1*3f1979aaSAndroid Build Coastguard Worker /* 2*3f1979aaSAndroid Build Coastguard Worker Copyright (c) 2013 Julien Pommier. 3*3f1979aaSAndroid Build Coastguard Worker Copyright (c) 2019 Hayati Ayguen ( [email protected] ) 4*3f1979aaSAndroid Build Coastguard Worker */ 5*3f1979aaSAndroid Build Coastguard Worker 6*3f1979aaSAndroid Build Coastguard Worker #define _WANT_SNAN 1 7*3f1979aaSAndroid Build Coastguard Worker 8*3f1979aaSAndroid Build Coastguard Worker #include "pffft.h" 9*3f1979aaSAndroid Build Coastguard Worker #include "pffastconv.h" 10*3f1979aaSAndroid Build Coastguard Worker 11*3f1979aaSAndroid Build Coastguard Worker #include <math.h> 12*3f1979aaSAndroid Build Coastguard Worker #include <float.h> 13*3f1979aaSAndroid Build Coastguard Worker #include <limits.h> 14*3f1979aaSAndroid Build Coastguard Worker #include <inttypes.h> 15*3f1979aaSAndroid Build Coastguard Worker #include <stdio.h> 16*3f1979aaSAndroid Build Coastguard Worker #include <stdlib.h> 17*3f1979aaSAndroid Build Coastguard Worker #include <time.h> 18*3f1979aaSAndroid Build Coastguard Worker #include <assert.h> 19*3f1979aaSAndroid Build Coastguard Worker #include <string.h> 20*3f1979aaSAndroid Build Coastguard Worker 21*3f1979aaSAndroid Build Coastguard Worker #ifdef HAVE_SYS_TIMES 22*3f1979aaSAndroid Build Coastguard Worker # include <sys/times.h> 23*3f1979aaSAndroid Build Coastguard Worker # include <unistd.h> 24*3f1979aaSAndroid Build Coastguard Worker #endif 25*3f1979aaSAndroid Build Coastguard Worker 26*3f1979aaSAndroid Build Coastguard Worker /* 27*3f1979aaSAndroid Build Coastguard Worker vector support macros: the rest of the code is independant of 28*3f1979aaSAndroid Build Coastguard Worker SSE/Altivec/NEON -- adding support for other platforms with 4-element 29*3f1979aaSAndroid Build Coastguard Worker vectors should be limited to these macros 30*3f1979aaSAndroid Build Coastguard Worker */ 31*3f1979aaSAndroid Build Coastguard Worker #if 0 32*3f1979aaSAndroid Build Coastguard Worker #include "simd/pf_float.h" 33*3f1979aaSAndroid Build Coastguard Worker #endif 34*3f1979aaSAndroid Build Coastguard Worker 35*3f1979aaSAndroid Build Coastguard Worker #if defined(_MSC_VER) 36*3f1979aaSAndroid Build Coastguard Worker # define RESTRICT __restrict 37*3f1979aaSAndroid Build Coastguard Worker #elif defined(__GNUC__) 38*3f1979aaSAndroid Build Coastguard Worker # define RESTRICT __restrict 39*3f1979aaSAndroid Build Coastguard Worker #else 40*3f1979aaSAndroid Build Coastguard Worker # define RESTRICT 41*3f1979aaSAndroid Build Coastguard Worker #endif 42*3f1979aaSAndroid Build Coastguard Worker 43*3f1979aaSAndroid Build Coastguard Worker 44*3f1979aaSAndroid Build Coastguard Worker #if defined(_MSC_VER) 45*3f1979aaSAndroid Build Coastguard Worker #pragma warning( disable : 4244 ) 46*3f1979aaSAndroid Build Coastguard Worker #endif 47*3f1979aaSAndroid Build Coastguard Worker 48*3f1979aaSAndroid Build Coastguard Worker 49*3f1979aaSAndroid Build Coastguard Worker #ifdef SNANF 50*3f1979aaSAndroid Build Coastguard Worker #define INVALID_FLOAT_VAL SNANF 51*3f1979aaSAndroid Build Coastguard Worker #elif defined(SNAN) 52*3f1979aaSAndroid Build Coastguard Worker #define INVALID_FLOAT_VAL SNAN 53*3f1979aaSAndroid Build Coastguard Worker #elif defined(NAN) 54*3f1979aaSAndroid Build Coastguard Worker #define INVALID_FLOAT_VAL NAN 55*3f1979aaSAndroid Build Coastguard Worker #elif defined(INFINITY) 56*3f1979aaSAndroid Build Coastguard Worker #define INVALID_FLOAT_VAL INFINITY 57*3f1979aaSAndroid Build Coastguard Worker #else 58*3f1979aaSAndroid Build Coastguard Worker #define INVALID_FLOAT_VAL FLT_MAX 59*3f1979aaSAndroid Build Coastguard Worker #endif 60*3f1979aaSAndroid Build Coastguard Worker 61*3f1979aaSAndroid Build Coastguard Worker 62*3f1979aaSAndroid Build Coastguard Worker #if defined(HAVE_SYS_TIMES) uclock_sec(void)63*3f1979aaSAndroid Build Coastguard Worker inline double uclock_sec(void) { 64*3f1979aaSAndroid Build Coastguard Worker static double ttclk = 0.; 65*3f1979aaSAndroid Build Coastguard Worker struct tms t; 66*3f1979aaSAndroid Build Coastguard Worker if (ttclk == 0.) 67*3f1979aaSAndroid Build Coastguard Worker ttclk = sysconf(_SC_CLK_TCK); 68*3f1979aaSAndroid Build Coastguard Worker times(&t); 69*3f1979aaSAndroid Build Coastguard Worker /* use only the user time of this process - not realtime, which depends on OS-scheduler .. */ 70*3f1979aaSAndroid Build Coastguard Worker return ((double)t.tms_utime)) / ttclk; 71*3f1979aaSAndroid Build Coastguard Worker } 72*3f1979aaSAndroid Build Coastguard Worker # else 73*3f1979aaSAndroid Build Coastguard Worker double uclock_sec(void) 74*3f1979aaSAndroid Build Coastguard Worker { return (double)clock()/(double)CLOCKS_PER_SEC; } 75*3f1979aaSAndroid Build Coastguard Worker #endif 76*3f1979aaSAndroid Build Coastguard Worker 77*3f1979aaSAndroid Build Coastguard Worker 78*3f1979aaSAndroid Build Coastguard Worker 79*3f1979aaSAndroid Build Coastguard Worker typedef int (*pfnConvolution) (void * setup, const float * X, int len, float *Y, const float *Yref, int applyFlush); 80*3f1979aaSAndroid Build Coastguard Worker typedef void* (*pfnConvSetup) (float *Hfwd, int Nf, int * BlkLen, int flags); 81*3f1979aaSAndroid Build Coastguard Worker typedef pfnConvolution (*pfnGetConvFnPtr) (void * setup); 82*3f1979aaSAndroid Build Coastguard Worker typedef void (*pfnConvDestroy) (void * setup); 83*3f1979aaSAndroid Build Coastguard Worker 84*3f1979aaSAndroid Build Coastguard Worker 85*3f1979aaSAndroid Build Coastguard Worker struct ConvSetup 86*3f1979aaSAndroid Build Coastguard Worker { 87*3f1979aaSAndroid Build Coastguard Worker pfnConvolution pfn; 88*3f1979aaSAndroid Build Coastguard Worker int N; 89*3f1979aaSAndroid Build Coastguard Worker int B; 90*3f1979aaSAndroid Build Coastguard Worker float * H; 91*3f1979aaSAndroid Build Coastguard Worker int flags; 92*3f1979aaSAndroid Build Coastguard Worker }; 93*3f1979aaSAndroid Build Coastguard Worker 94*3f1979aaSAndroid Build Coastguard Worker 95*3f1979aaSAndroid Build Coastguard Worker void * convSetupRev( float * H, int N, int * BlkLen, int flags ) 96*3f1979aaSAndroid Build Coastguard Worker { 97*3f1979aaSAndroid Build Coastguard Worker struct ConvSetup * s = pffastconv_malloc( sizeof(struct ConvSetup) ); 98*3f1979aaSAndroid Build Coastguard Worker int i, Nr = N; 99*3f1979aaSAndroid Build Coastguard Worker if (flags & PFFASTCONV_CPLX_INP_OUT) 100*3f1979aaSAndroid Build Coastguard Worker Nr *= 2; 101*3f1979aaSAndroid Build Coastguard Worker Nr += 4; 102*3f1979aaSAndroid Build Coastguard Worker s->pfn = NULL; 103*3f1979aaSAndroid Build Coastguard Worker s->N = N; 104*3f1979aaSAndroid Build Coastguard Worker s->B = *BlkLen; 105*3f1979aaSAndroid Build Coastguard Worker s->H = pffastconv_malloc((unsigned)Nr * sizeof(float)); 106*3f1979aaSAndroid Build Coastguard Worker s->flags = flags; 107*3f1979aaSAndroid Build Coastguard Worker memset(s->H, 0, (unsigned)Nr * sizeof(float)); 108*3f1979aaSAndroid Build Coastguard Worker if (flags & PFFASTCONV_CPLX_INP_OUT) 109*3f1979aaSAndroid Build Coastguard Worker { 110*3f1979aaSAndroid Build Coastguard Worker for ( i = 0; i < N; ++i ) { 111*3f1979aaSAndroid Build Coastguard Worker s->H[2*(N-1 -i) ] = H[i]; 112*3f1979aaSAndroid Build Coastguard Worker s->H[2*(N-1 -i)+1] = H[i]; 113*3f1979aaSAndroid Build Coastguard Worker } 114*3f1979aaSAndroid Build Coastguard Worker /* simpler detection of overruns */ 115*3f1979aaSAndroid Build Coastguard Worker s->H[ 2*N ] = INVALID_FLOAT_VAL; 116*3f1979aaSAndroid Build Coastguard Worker s->H[ 2*N +1 ] = INVALID_FLOAT_VAL; 117*3f1979aaSAndroid Build Coastguard Worker s->H[ 2*N +2 ] = INVALID_FLOAT_VAL; 118*3f1979aaSAndroid Build Coastguard Worker s->H[ 2*N +3 ] = INVALID_FLOAT_VAL; 119*3f1979aaSAndroid Build Coastguard Worker } 120*3f1979aaSAndroid Build Coastguard Worker else 121*3f1979aaSAndroid Build Coastguard Worker { 122*3f1979aaSAndroid Build Coastguard Worker for ( i = 0; i < N; ++i ) 123*3f1979aaSAndroid Build Coastguard Worker s->H[ N-1 -i ] = H[i]; 124*3f1979aaSAndroid Build Coastguard Worker /* simpler detection of overruns */ 125*3f1979aaSAndroid Build Coastguard Worker s->H[ N ] = INVALID_FLOAT_VAL; 126*3f1979aaSAndroid Build Coastguard Worker s->H[ N +1 ] = INVALID_FLOAT_VAL; 127*3f1979aaSAndroid Build Coastguard Worker s->H[ N +2 ] = INVALID_FLOAT_VAL; 128*3f1979aaSAndroid Build Coastguard Worker s->H[ N +3 ] = INVALID_FLOAT_VAL; 129*3f1979aaSAndroid Build Coastguard Worker } 130*3f1979aaSAndroid Build Coastguard Worker return s; 131*3f1979aaSAndroid Build Coastguard Worker } 132*3f1979aaSAndroid Build Coastguard Worker 133*3f1979aaSAndroid Build Coastguard Worker void convDestroyRev( void * setup ) 134*3f1979aaSAndroid Build Coastguard Worker { 135*3f1979aaSAndroid Build Coastguard Worker struct ConvSetup * s = (struct ConvSetup*)setup; 136*3f1979aaSAndroid Build Coastguard Worker pffastconv_free(s->H); 137*3f1979aaSAndroid Build Coastguard Worker pffastconv_free(setup); 138*3f1979aaSAndroid Build Coastguard Worker } 139*3f1979aaSAndroid Build Coastguard Worker 140*3f1979aaSAndroid Build Coastguard Worker 141*3f1979aaSAndroid Build Coastguard Worker pfnConvolution ConvGetFnPtrRev( void * setup ) 142*3f1979aaSAndroid Build Coastguard Worker { 143*3f1979aaSAndroid Build Coastguard Worker struct ConvSetup * s = (struct ConvSetup*)setup; 144*3f1979aaSAndroid Build Coastguard Worker if (!s) 145*3f1979aaSAndroid Build Coastguard Worker return NULL; 146*3f1979aaSAndroid Build Coastguard Worker return s->pfn; 147*3f1979aaSAndroid Build Coastguard Worker } 148*3f1979aaSAndroid Build Coastguard Worker 149*3f1979aaSAndroid Build Coastguard Worker 150*3f1979aaSAndroid Build Coastguard Worker void convSimdDestroy( void * setup ) 151*3f1979aaSAndroid Build Coastguard Worker { 152*3f1979aaSAndroid Build Coastguard Worker convDestroyRev(setup); 153*3f1979aaSAndroid Build Coastguard Worker } 154*3f1979aaSAndroid Build Coastguard Worker 155*3f1979aaSAndroid Build Coastguard Worker 156*3f1979aaSAndroid Build Coastguard Worker void * fastConvSetup( float * H, int N, int * BlkLen, int flags ) 157*3f1979aaSAndroid Build Coastguard Worker { 158*3f1979aaSAndroid Build Coastguard Worker void * p = pffastconv_new_setup( H, N, BlkLen, flags ); 159*3f1979aaSAndroid Build Coastguard Worker if (!p) 160*3f1979aaSAndroid Build Coastguard Worker printf("fastConvSetup(N = %d, *BlkLen = %d, flags = %d) = NULL\n", N, *BlkLen, flags); 161*3f1979aaSAndroid Build Coastguard Worker return p; 162*3f1979aaSAndroid Build Coastguard Worker } 163*3f1979aaSAndroid Build Coastguard Worker 164*3f1979aaSAndroid Build Coastguard Worker 165*3f1979aaSAndroid Build Coastguard Worker void fastConvDestroy( void * setup ) 166*3f1979aaSAndroid Build Coastguard Worker { 167*3f1979aaSAndroid Build Coastguard Worker pffastconv_destroy_setup( (PFFASTCONV_Setup*)setup ); 168*3f1979aaSAndroid Build Coastguard Worker } 169*3f1979aaSAndroid Build Coastguard Worker 170*3f1979aaSAndroid Build Coastguard Worker 171*3f1979aaSAndroid Build Coastguard Worker 172*3f1979aaSAndroid Build Coastguard Worker int slow_conv_R(void * setup, const float * input, int len, float *output, const float *Yref, int applyFlush) 173*3f1979aaSAndroid Build Coastguard Worker { 174*3f1979aaSAndroid Build Coastguard Worker struct ConvSetup * p = (struct ConvSetup*)setup; 175*3f1979aaSAndroid Build Coastguard Worker const float * RESTRICT X = input; 176*3f1979aaSAndroid Build Coastguard Worker const float * RESTRICT Hrev = p->H; 177*3f1979aaSAndroid Build Coastguard Worker float * RESTRICT Y = output; 178*3f1979aaSAndroid Build Coastguard Worker const int Nr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * p->N; 179*3f1979aaSAndroid Build Coastguard Worker const int lenNr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * (len - p->N); 180*3f1979aaSAndroid Build Coastguard Worker int i, j; 181*3f1979aaSAndroid Build Coastguard Worker (void)Yref; 182*3f1979aaSAndroid Build Coastguard Worker (void)applyFlush; 183*3f1979aaSAndroid Build Coastguard Worker 184*3f1979aaSAndroid Build Coastguard Worker if (p->flags & PFFASTCONV_CPLX_INP_OUT) 185*3f1979aaSAndroid Build Coastguard Worker { 186*3f1979aaSAndroid Build Coastguard Worker for ( i = 0; i <= lenNr; i += 2 ) 187*3f1979aaSAndroid Build Coastguard Worker { 188*3f1979aaSAndroid Build Coastguard Worker float sumRe = 0.0F, sumIm = 0.0F; 189*3f1979aaSAndroid Build Coastguard Worker for ( j = 0; j < Nr; j += 2 ) 190*3f1979aaSAndroid Build Coastguard Worker { 191*3f1979aaSAndroid Build Coastguard Worker sumRe += X[i+j ] * Hrev[j]; 192*3f1979aaSAndroid Build Coastguard Worker sumIm += X[i+j+1] * Hrev[j+1]; 193*3f1979aaSAndroid Build Coastguard Worker } 194*3f1979aaSAndroid Build Coastguard Worker Y[i ] = sumRe; 195*3f1979aaSAndroid Build Coastguard Worker Y[i+1] = sumIm; 196*3f1979aaSAndroid Build Coastguard Worker } 197*3f1979aaSAndroid Build Coastguard Worker return i/2; 198*3f1979aaSAndroid Build Coastguard Worker } 199*3f1979aaSAndroid Build Coastguard Worker else 200*3f1979aaSAndroid Build Coastguard Worker { 201*3f1979aaSAndroid Build Coastguard Worker for ( i = 0; i <= lenNr; ++i ) 202*3f1979aaSAndroid Build Coastguard Worker { 203*3f1979aaSAndroid Build Coastguard Worker float sum = 0.0F; 204*3f1979aaSAndroid Build Coastguard Worker for (j = 0; j < Nr; ++j ) 205*3f1979aaSAndroid Build Coastguard Worker sum += X[i+j] * Hrev[j]; 206*3f1979aaSAndroid Build Coastguard Worker Y[i] = sum; 207*3f1979aaSAndroid Build Coastguard Worker } 208*3f1979aaSAndroid Build Coastguard Worker return i; 209*3f1979aaSAndroid Build Coastguard Worker } 210*3f1979aaSAndroid Build Coastguard Worker } 211*3f1979aaSAndroid Build Coastguard Worker 212*3f1979aaSAndroid Build Coastguard Worker 213*3f1979aaSAndroid Build Coastguard Worker 214*3f1979aaSAndroid Build Coastguard Worker int slow_conv_A(void * setup, const float * input, int len, float *output, const float *Yref, int applyFlush) 215*3f1979aaSAndroid Build Coastguard Worker { 216*3f1979aaSAndroid Build Coastguard Worker float sum[4]; 217*3f1979aaSAndroid Build Coastguard Worker struct ConvSetup * p = (struct ConvSetup*)setup; 218*3f1979aaSAndroid Build Coastguard Worker const float * RESTRICT X = input; 219*3f1979aaSAndroid Build Coastguard Worker const float * RESTRICT Hrev = p->H; 220*3f1979aaSAndroid Build Coastguard Worker float * RESTRICT Y = output; 221*3f1979aaSAndroid Build Coastguard Worker const int Nr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * p->N; 222*3f1979aaSAndroid Build Coastguard Worker const int lenNr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * (len - p->N); 223*3f1979aaSAndroid Build Coastguard Worker int i, j; 224*3f1979aaSAndroid Build Coastguard Worker (void)Yref; 225*3f1979aaSAndroid Build Coastguard Worker (void)applyFlush; 226*3f1979aaSAndroid Build Coastguard Worker 227*3f1979aaSAndroid Build Coastguard Worker if (p->flags & PFFASTCONV_CPLX_INP_OUT) 228*3f1979aaSAndroid Build Coastguard Worker { 229*3f1979aaSAndroid Build Coastguard Worker if ( (Nr & 3) == 0 ) 230*3f1979aaSAndroid Build Coastguard Worker { 231*3f1979aaSAndroid Build Coastguard Worker for ( i = 0; i <= lenNr; i += 2 ) 232*3f1979aaSAndroid Build Coastguard Worker { 233*3f1979aaSAndroid Build Coastguard Worker sum[0] = sum[1] = sum[2] = sum[3] = 0.0F; 234*3f1979aaSAndroid Build Coastguard Worker for (j = 0; j < Nr; j += 4 ) 235*3f1979aaSAndroid Build Coastguard Worker { 236*3f1979aaSAndroid Build Coastguard Worker sum[0] += X[i+j] * Hrev[j]; 237*3f1979aaSAndroid Build Coastguard Worker sum[1] += X[i+j+1] * Hrev[j+1]; 238*3f1979aaSAndroid Build Coastguard Worker sum[2] += X[i+j+2] * Hrev[j+2]; 239*3f1979aaSAndroid Build Coastguard Worker sum[3] += X[i+j+3] * Hrev[j+3]; 240*3f1979aaSAndroid Build Coastguard Worker } 241*3f1979aaSAndroid Build Coastguard Worker Y[i ] = sum[0] + sum[2]; 242*3f1979aaSAndroid Build Coastguard Worker Y[i+1] = sum[1] + sum[3]; 243*3f1979aaSAndroid Build Coastguard Worker } 244*3f1979aaSAndroid Build Coastguard Worker } 245*3f1979aaSAndroid Build Coastguard Worker else 246*3f1979aaSAndroid Build Coastguard Worker { 247*3f1979aaSAndroid Build Coastguard Worker const int M = Nr & (~3); 248*3f1979aaSAndroid Build Coastguard Worker for ( i = 0; i <= lenNr; i += 2 ) 249*3f1979aaSAndroid Build Coastguard Worker { 250*3f1979aaSAndroid Build Coastguard Worker float tailSumRe = 0.0F, tailSumIm = 0.0F; 251*3f1979aaSAndroid Build Coastguard Worker sum[0] = sum[1] = sum[2] = sum[3] = 0.0F; 252*3f1979aaSAndroid Build Coastguard Worker for (j = 0; j < M; j += 4 ) 253*3f1979aaSAndroid Build Coastguard Worker { 254*3f1979aaSAndroid Build Coastguard Worker sum[0] += X[i+j ] * Hrev[j ]; 255*3f1979aaSAndroid Build Coastguard Worker sum[1] += X[i+j+1] * Hrev[j+1]; 256*3f1979aaSAndroid Build Coastguard Worker sum[2] += X[i+j+2] * Hrev[j+2]; 257*3f1979aaSAndroid Build Coastguard Worker sum[3] += X[i+j+3] * Hrev[j+3]; 258*3f1979aaSAndroid Build Coastguard Worker } 259*3f1979aaSAndroid Build Coastguard Worker for ( ; j < Nr; j += 2 ) { 260*3f1979aaSAndroid Build Coastguard Worker tailSumRe += X[i+j ] * Hrev[j ]; 261*3f1979aaSAndroid Build Coastguard Worker tailSumIm += X[i+j+1] * Hrev[j+1]; 262*3f1979aaSAndroid Build Coastguard Worker } 263*3f1979aaSAndroid Build Coastguard Worker Y[i ] = ( sum[0] + sum[2] ) + tailSumRe; 264*3f1979aaSAndroid Build Coastguard Worker Y[i+1] = ( sum[1] + sum[3] ) + tailSumIm; 265*3f1979aaSAndroid Build Coastguard Worker } 266*3f1979aaSAndroid Build Coastguard Worker } 267*3f1979aaSAndroid Build Coastguard Worker return i/2; 268*3f1979aaSAndroid Build Coastguard Worker } 269*3f1979aaSAndroid Build Coastguard Worker else 270*3f1979aaSAndroid Build Coastguard Worker { 271*3f1979aaSAndroid Build Coastguard Worker if ( (Nr & 3) == 0 ) 272*3f1979aaSAndroid Build Coastguard Worker { 273*3f1979aaSAndroid Build Coastguard Worker for ( i = 0; i <= lenNr; ++i ) 274*3f1979aaSAndroid Build Coastguard Worker { 275*3f1979aaSAndroid Build Coastguard Worker sum[0] = sum[1] = sum[2] = sum[3] = 0.0F; 276*3f1979aaSAndroid Build Coastguard Worker for (j = 0; j < Nr; j += 4 ) 277*3f1979aaSAndroid Build Coastguard Worker { 278*3f1979aaSAndroid Build Coastguard Worker sum[0] += X[i+j] * Hrev[j]; 279*3f1979aaSAndroid Build Coastguard Worker sum[1] += X[i+j+1] * Hrev[j+1]; 280*3f1979aaSAndroid Build Coastguard Worker sum[2] += X[i+j+2] * Hrev[j+2]; 281*3f1979aaSAndroid Build Coastguard Worker sum[3] += X[i+j+3] * Hrev[j+3]; 282*3f1979aaSAndroid Build Coastguard Worker } 283*3f1979aaSAndroid Build Coastguard Worker Y[i] = sum[0] + sum[1] + sum[2] + sum[3]; 284*3f1979aaSAndroid Build Coastguard Worker } 285*3f1979aaSAndroid Build Coastguard Worker return i; 286*3f1979aaSAndroid Build Coastguard Worker } 287*3f1979aaSAndroid Build Coastguard Worker else 288*3f1979aaSAndroid Build Coastguard Worker { 289*3f1979aaSAndroid Build Coastguard Worker const int M = Nr & (~3); 290*3f1979aaSAndroid Build Coastguard Worker /* printf("A: Nr = %d, M = %d, H[M] = %f, H[M+1] = %f, H[M+2] = %f, H[M+3] = %f\n", Nr, M, Hrev[M], Hrev[M+1], Hrev[M+2], Hrev[M+3] ); */ 291*3f1979aaSAndroid Build Coastguard Worker for ( i = 0; i <= lenNr; ++i ) 292*3f1979aaSAndroid Build Coastguard Worker { 293*3f1979aaSAndroid Build Coastguard Worker float tailSum = 0.0; 294*3f1979aaSAndroid Build Coastguard Worker sum[0] = sum[1] = sum[2] = sum[3] = 0.0F; 295*3f1979aaSAndroid Build Coastguard Worker for (j = 0; j < M; j += 4 ) 296*3f1979aaSAndroid Build Coastguard Worker { 297*3f1979aaSAndroid Build Coastguard Worker sum[0] += X[i+j] * Hrev[j]; 298*3f1979aaSAndroid Build Coastguard Worker sum[1] += X[i+j+1] * Hrev[j+1]; 299*3f1979aaSAndroid Build Coastguard Worker sum[2] += X[i+j+2] * Hrev[j+2]; 300*3f1979aaSAndroid Build Coastguard Worker sum[3] += X[i+j+3] * Hrev[j+3]; 301*3f1979aaSAndroid Build Coastguard Worker } 302*3f1979aaSAndroid Build Coastguard Worker for ( ; j < Nr; ++j ) 303*3f1979aaSAndroid Build Coastguard Worker tailSum += X[i+j] * Hrev[j]; 304*3f1979aaSAndroid Build Coastguard Worker Y[i] = (sum[0] + sum[1]) + (sum[2] + sum[3]) + tailSum; 305*3f1979aaSAndroid Build Coastguard Worker } 306*3f1979aaSAndroid Build Coastguard Worker return i; 307*3f1979aaSAndroid Build Coastguard Worker } 308*3f1979aaSAndroid Build Coastguard Worker } 309*3f1979aaSAndroid Build Coastguard Worker } 310*3f1979aaSAndroid Build Coastguard Worker 311*3f1979aaSAndroid Build Coastguard Worker 312*3f1979aaSAndroid Build Coastguard Worker int slow_conv_B(void * setup, const float * input, int len, float *output, const float *Yref, int applyFlush) 313*3f1979aaSAndroid Build Coastguard Worker { 314*3f1979aaSAndroid Build Coastguard Worker float sum[4]; 315*3f1979aaSAndroid Build Coastguard Worker struct ConvSetup * p = (struct ConvSetup*)setup; 316*3f1979aaSAndroid Build Coastguard Worker (void)Yref; 317*3f1979aaSAndroid Build Coastguard Worker (void)applyFlush; 318*3f1979aaSAndroid Build Coastguard Worker if (p->flags & PFFASTCONV_SYMMETRIC) 319*3f1979aaSAndroid Build Coastguard Worker { 320*3f1979aaSAndroid Build Coastguard Worker const float * RESTRICT X = input; 321*3f1979aaSAndroid Build Coastguard Worker const float * RESTRICT Hrev = p->H; 322*3f1979aaSAndroid Build Coastguard Worker float * RESTRICT Y = output; 323*3f1979aaSAndroid Build Coastguard Worker const int Nr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * p->N; 324*3f1979aaSAndroid Build Coastguard Worker const int lenNr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * (len - p->N); 325*3f1979aaSAndroid Build Coastguard Worker const int h = Nr / 2 -4; 326*3f1979aaSAndroid Build Coastguard Worker const int E = Nr -4; 327*3f1979aaSAndroid Build Coastguard Worker int i, j; 328*3f1979aaSAndroid Build Coastguard Worker 329*3f1979aaSAndroid Build Coastguard Worker if (p->flags & PFFASTCONV_CPLX_INP_OUT) 330*3f1979aaSAndroid Build Coastguard Worker { 331*3f1979aaSAndroid Build Coastguard Worker for ( i = 0; i <= lenNr; i += 2 ) 332*3f1979aaSAndroid Build Coastguard Worker { 333*3f1979aaSAndroid Build Coastguard Worker const int k = i + E; 334*3f1979aaSAndroid Build Coastguard Worker sum[0] = sum[1] = sum[2] = sum[3] = 0.0F; 335*3f1979aaSAndroid Build Coastguard Worker for (j = 0; j <= h; j += 4 ) 336*3f1979aaSAndroid Build Coastguard Worker { 337*3f1979aaSAndroid Build Coastguard Worker sum[0] += Hrev[j ] * ( X[i+j ] + X[k-j+2] ); 338*3f1979aaSAndroid Build Coastguard Worker sum[1] += Hrev[j+1] * ( X[i+j+1] + X[k-j+3] ); 339*3f1979aaSAndroid Build Coastguard Worker sum[2] += Hrev[j+2] * ( X[i+j+2] + X[k-j ] ); 340*3f1979aaSAndroid Build Coastguard Worker sum[3] += Hrev[j+3] * ( X[i+j+3] + X[k-j+1] ); 341*3f1979aaSAndroid Build Coastguard Worker } 342*3f1979aaSAndroid Build Coastguard Worker Y[i ] = sum[0] + sum[2]; 343*3f1979aaSAndroid Build Coastguard Worker Y[i+1] = sum[1] + sum[3]; 344*3f1979aaSAndroid Build Coastguard Worker } 345*3f1979aaSAndroid Build Coastguard Worker return i/2; 346*3f1979aaSAndroid Build Coastguard Worker } 347*3f1979aaSAndroid Build Coastguard Worker else 348*3f1979aaSAndroid Build Coastguard Worker { 349*3f1979aaSAndroid Build Coastguard Worker for ( i = 0; i <= lenNr; ++i ) 350*3f1979aaSAndroid Build Coastguard Worker { 351*3f1979aaSAndroid Build Coastguard Worker const int k = i + E; 352*3f1979aaSAndroid Build Coastguard Worker sum[0] = sum[1] = sum[2] = sum[3] = 0.0F; 353*3f1979aaSAndroid Build Coastguard Worker for (j = 0; j <= h; j += 4 ) 354*3f1979aaSAndroid Build Coastguard Worker { 355*3f1979aaSAndroid Build Coastguard Worker sum[0] += Hrev[j ] * ( X[i+j ] + X[k-j+3] ); 356*3f1979aaSAndroid Build Coastguard Worker sum[1] += Hrev[j+1] * ( X[i+j+1] + X[k-j+2] ); 357*3f1979aaSAndroid Build Coastguard Worker sum[2] += Hrev[j+2] * ( X[i+j+2] + X[k-j+1] ); 358*3f1979aaSAndroid Build Coastguard Worker sum[3] += Hrev[j+3] * ( X[i+j+3] + X[k-j ] ); 359*3f1979aaSAndroid Build Coastguard Worker } 360*3f1979aaSAndroid Build Coastguard Worker Y[i] = sum[0] + sum[1] + sum[2] + sum[3]; 361*3f1979aaSAndroid Build Coastguard Worker } 362*3f1979aaSAndroid Build Coastguard Worker return i; 363*3f1979aaSAndroid Build Coastguard Worker } 364*3f1979aaSAndroid Build Coastguard Worker } 365*3f1979aaSAndroid Build Coastguard Worker else 366*3f1979aaSAndroid Build Coastguard Worker { 367*3f1979aaSAndroid Build Coastguard Worker const float * RESTRICT X = input; 368*3f1979aaSAndroid Build Coastguard Worker const float * RESTRICT Hrev = p->H; 369*3f1979aaSAndroid Build Coastguard Worker float * RESTRICT Y = output; 370*3f1979aaSAndroid Build Coastguard Worker const int Nr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * p->N; 371*3f1979aaSAndroid Build Coastguard Worker const int lenNr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * (len - p->N); 372*3f1979aaSAndroid Build Coastguard Worker int i, j; 373*3f1979aaSAndroid Build Coastguard Worker 374*3f1979aaSAndroid Build Coastguard Worker if (p->flags & PFFASTCONV_CPLX_INP_OUT) 375*3f1979aaSAndroid Build Coastguard Worker { 376*3f1979aaSAndroid Build Coastguard Worker for ( i = 0; i <= lenNr; i += 2 ) 377*3f1979aaSAndroid Build Coastguard Worker { 378*3f1979aaSAndroid Build Coastguard Worker sum[0] = sum[1] = sum[2] = sum[3] = 0.0F; 379*3f1979aaSAndroid Build Coastguard Worker for (j = 0; j < Nr; j += 4 ) 380*3f1979aaSAndroid Build Coastguard Worker { 381*3f1979aaSAndroid Build Coastguard Worker sum[0] += X[i+j] * Hrev[j]; 382*3f1979aaSAndroid Build Coastguard Worker sum[1] += X[i+j+1] * Hrev[j+1]; 383*3f1979aaSAndroid Build Coastguard Worker sum[2] += X[i+j+2] * Hrev[j+2]; 384*3f1979aaSAndroid Build Coastguard Worker sum[3] += X[i+j+3] * Hrev[j+3]; 385*3f1979aaSAndroid Build Coastguard Worker } 386*3f1979aaSAndroid Build Coastguard Worker Y[i ] = sum[0] + sum[2]; 387*3f1979aaSAndroid Build Coastguard Worker Y[i+1] = sum[1] + sum[3]; 388*3f1979aaSAndroid Build Coastguard Worker } 389*3f1979aaSAndroid Build Coastguard Worker return i/2; 390*3f1979aaSAndroid Build Coastguard Worker } 391*3f1979aaSAndroid Build Coastguard Worker else 392*3f1979aaSAndroid Build Coastguard Worker { 393*3f1979aaSAndroid Build Coastguard Worker if ( (Nr & 3) == 0 ) 394*3f1979aaSAndroid Build Coastguard Worker { 395*3f1979aaSAndroid Build Coastguard Worker for ( i = 0; i <= lenNr; ++i ) 396*3f1979aaSAndroid Build Coastguard Worker { 397*3f1979aaSAndroid Build Coastguard Worker sum[0] = sum[1] = sum[2] = sum[3] = 0.0F; 398*3f1979aaSAndroid Build Coastguard Worker for (j = 0; j < Nr; j += 4 ) 399*3f1979aaSAndroid Build Coastguard Worker { 400*3f1979aaSAndroid Build Coastguard Worker sum[0] += X[i+j] * Hrev[j]; 401*3f1979aaSAndroid Build Coastguard Worker sum[1] += X[i+j+1] * Hrev[j+1]; 402*3f1979aaSAndroid Build Coastguard Worker sum[2] += X[i+j+2] * Hrev[j+2]; 403*3f1979aaSAndroid Build Coastguard Worker sum[3] += X[i+j+3] * Hrev[j+3]; 404*3f1979aaSAndroid Build Coastguard Worker } 405*3f1979aaSAndroid Build Coastguard Worker Y[i] = (sum[0] + sum[1]) + (sum[2] + sum[3]); 406*3f1979aaSAndroid Build Coastguard Worker } 407*3f1979aaSAndroid Build Coastguard Worker return i; 408*3f1979aaSAndroid Build Coastguard Worker } 409*3f1979aaSAndroid Build Coastguard Worker else 410*3f1979aaSAndroid Build Coastguard Worker { 411*3f1979aaSAndroid Build Coastguard Worker const int M = Nr & (~3); 412*3f1979aaSAndroid Build Coastguard Worker /* printf("B: Nr = %d\n", Nr ); */ 413*3f1979aaSAndroid Build Coastguard Worker for ( i = 0; i <= lenNr; ++i ) 414*3f1979aaSAndroid Build Coastguard Worker { 415*3f1979aaSAndroid Build Coastguard Worker float tailSum = 0.0; 416*3f1979aaSAndroid Build Coastguard Worker sum[0] = sum[1] = sum[2] = sum[3] = 0.0F; 417*3f1979aaSAndroid Build Coastguard Worker for (j = 0; j < M; j += 4 ) 418*3f1979aaSAndroid Build Coastguard Worker { 419*3f1979aaSAndroid Build Coastguard Worker sum[0] += X[i+j] * Hrev[j]; 420*3f1979aaSAndroid Build Coastguard Worker sum[1] += X[i+j+1] * Hrev[j+1]; 421*3f1979aaSAndroid Build Coastguard Worker sum[2] += X[i+j+2] * Hrev[j+2]; 422*3f1979aaSAndroid Build Coastguard Worker sum[3] += X[i+j+3] * Hrev[j+3]; 423*3f1979aaSAndroid Build Coastguard Worker } 424*3f1979aaSAndroid Build Coastguard Worker for ( ; j < Nr; ++j ) 425*3f1979aaSAndroid Build Coastguard Worker tailSum += X[i+j] * Hrev[j]; 426*3f1979aaSAndroid Build Coastguard Worker Y[i] = (sum[0] + sum[1]) + (sum[2] + sum[3]) + tailSum; 427*3f1979aaSAndroid Build Coastguard Worker } 428*3f1979aaSAndroid Build Coastguard Worker return i; 429*3f1979aaSAndroid Build Coastguard Worker } 430*3f1979aaSAndroid Build Coastguard Worker } 431*3f1979aaSAndroid Build Coastguard Worker } 432*3f1979aaSAndroid Build Coastguard Worker 433*3f1979aaSAndroid Build Coastguard Worker } 434*3f1979aaSAndroid Build Coastguard Worker 435*3f1979aaSAndroid Build Coastguard Worker 436*3f1979aaSAndroid Build Coastguard Worker int fast_conv(void * setup, const float * X, int len, float *Y, const float *Yref, int applyFlush) 437*3f1979aaSAndroid Build Coastguard Worker { 438*3f1979aaSAndroid Build Coastguard Worker (void)Yref; 439*3f1979aaSAndroid Build Coastguard Worker return pffastconv_apply( (PFFASTCONV_Setup*)setup, X, len, Y, applyFlush ); 440*3f1979aaSAndroid Build Coastguard Worker } 441*3f1979aaSAndroid Build Coastguard Worker 442*3f1979aaSAndroid Build Coastguard Worker 443*3f1979aaSAndroid Build Coastguard Worker 444*3f1979aaSAndroid Build Coastguard Worker void printFirst( const float * V, const char * st, const int N, const int perLine ) 445*3f1979aaSAndroid Build Coastguard Worker { 446*3f1979aaSAndroid Build Coastguard Worker (void)V; (void)st; (void)N; (void)perLine; 447*3f1979aaSAndroid Build Coastguard Worker return; 448*3f1979aaSAndroid Build Coastguard Worker #if 0 449*3f1979aaSAndroid Build Coastguard Worker int i; 450*3f1979aaSAndroid Build Coastguard Worker for ( i = 0; i < N; ++i ) 451*3f1979aaSAndroid Build Coastguard Worker { 452*3f1979aaSAndroid Build Coastguard Worker if ( (i % perLine) == 0 ) 453*3f1979aaSAndroid Build Coastguard Worker printf("\n%s[%d]", st, i); 454*3f1979aaSAndroid Build Coastguard Worker printf("\t%.1f", V[i]); 455*3f1979aaSAndroid Build Coastguard Worker } 456*3f1979aaSAndroid Build Coastguard Worker printf("\n"); 457*3f1979aaSAndroid Build Coastguard Worker #endif 458*3f1979aaSAndroid Build Coastguard Worker } 459*3f1979aaSAndroid Build Coastguard Worker 460*3f1979aaSAndroid Build Coastguard Worker 461*3f1979aaSAndroid Build Coastguard Worker 462*3f1979aaSAndroid Build Coastguard Worker #define NUMY 11 463*3f1979aaSAndroid Build Coastguard Worker 464*3f1979aaSAndroid Build Coastguard Worker 465*3f1979aaSAndroid Build Coastguard Worker int test(int FILTERLEN, int convFlags, const int testOutLen, int printDbg, int printSpeed) { 466*3f1979aaSAndroid Build Coastguard Worker double t0, t1, tstop, td, tdref; 467*3f1979aaSAndroid Build Coastguard Worker float *X, *H; 468*3f1979aaSAndroid Build Coastguard Worker float *Y[NUMY]; 469*3f1979aaSAndroid Build Coastguard Worker int64_t outN[NUMY]; 470*3f1979aaSAndroid Build Coastguard Worker /* 256 KFloats or 16 MFloats data */ 471*3f1979aaSAndroid Build Coastguard Worker #if 1 472*3f1979aaSAndroid Build Coastguard Worker const int len = testOutLen ? (1 << 18) : (1 << 24); 473*3f1979aaSAndroid Build Coastguard Worker #elif 0 474*3f1979aaSAndroid Build Coastguard Worker const int len = testOutLen ? (1 << 18) : (1 << 13); 475*3f1979aaSAndroid Build Coastguard Worker #else 476*3f1979aaSAndroid Build Coastguard Worker const int len = testOutLen ? (1 << 18) : (1024); 477*3f1979aaSAndroid Build Coastguard Worker #endif 478*3f1979aaSAndroid Build Coastguard Worker const int cplxFactor = ( convFlags & PFFASTCONV_CPLX_INP_OUT ) ? 2 : 1; 479*3f1979aaSAndroid Build Coastguard Worker const int lenC = len / cplxFactor; 480*3f1979aaSAndroid Build Coastguard Worker 481*3f1979aaSAndroid Build Coastguard Worker int yi, yc, posMaxErr; 482*3f1979aaSAndroid Build Coastguard Worker float yRangeMin, yRangeMax, yErrLimit, maxErr = 0.0; 483*3f1979aaSAndroid Build Coastguard Worker int i, j, numErrOverLimit, iter; 484*3f1979aaSAndroid Build Coastguard Worker int retErr = 0; 485*3f1979aaSAndroid Build Coastguard Worker 486*3f1979aaSAndroid Build Coastguard Worker /* 0 1 2 3 4 5 6 7 8 9 */ 487*3f1979aaSAndroid Build Coastguard Worker pfnConvSetup aSetup[NUMY] = { convSetupRev, convSetupRev, convSetupRev, fastConvSetup, fastConvSetup, fastConvSetup, fastConvSetup, fastConvSetup, fastConvSetup, fastConvSetup }; 488*3f1979aaSAndroid Build Coastguard Worker pfnConvDestroy aDestroy[NUMY] = { convDestroyRev, convDestroyRev, convDestroyRev, fastConvDestroy, fastConvDestroy, fastConvDestroy, fastConvDestroy, fastConvDestroy, fastConvDestroy, fastConvDestroy }; 489*3f1979aaSAndroid Build Coastguard Worker pfnGetConvFnPtr aGetFnPtr[NUMY] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; 490*3f1979aaSAndroid Build Coastguard Worker pfnConvolution aConv[NUMY] = { slow_conv_R, slow_conv_A, slow_conv_B, fast_conv, fast_conv, fast_conv, fast_conv, fast_conv, fast_conv, fast_conv }; 491*3f1979aaSAndroid Build Coastguard Worker const char * convText[NUMY] = { "R(non-simd)", "A(non-simd)", "B(non-simd)", "fast_conv_64", "fast_conv_128", "fast_conv_256", "fast_conv_512", "fast_conv_1K", "fast_conv_2K", "fast_conv_4K" }; 492*3f1979aaSAndroid Build Coastguard Worker int aFastAlgo[NUMY] = { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 }; 493*3f1979aaSAndroid Build Coastguard Worker void * aSetupCfg[NUMY] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL }; 494*3f1979aaSAndroid Build Coastguard Worker int aBlkLen[NUMY] = { 1024, 1024, 1024, 64, 128, 256, 512, 1024, 2048, 4096 }; 495*3f1979aaSAndroid Build Coastguard Worker #if 1 496*3f1979aaSAndroid Build Coastguard Worker int aRunAlgo[NUMY] = { 1, 1, 1, FILTERLEN<64, FILTERLEN<128, FILTERLEN<256, FILTERLEN<512, FILTERLEN<1024, FILTERLEN<2048, FILTERLEN<4096 }; 497*3f1979aaSAndroid Build Coastguard Worker #elif 0 498*3f1979aaSAndroid Build Coastguard Worker int aRunAlgo[NUMY] = { 1, 0, 0, 0 && FILTERLEN<64, 1 && FILTERLEN<128, 1 && FILTERLEN<256, 0 && FILTERLEN<512, 0 && FILTERLEN<1024, 0 && FILTERLEN<2048, 0 && FILTERLEN<4096 }; 499*3f1979aaSAndroid Build Coastguard Worker #else 500*3f1979aaSAndroid Build Coastguard Worker int aRunAlgo[NUMY] = { 1, 1, 1, 0 && FILTERLEN<64, 0 && FILTERLEN<128, 1 && FILTERLEN<256, 0 && FILTERLEN<512, 0 && FILTERLEN<1024, 0 && FILTERLEN<2048, 0 && FILTERLEN<4096 }; 501*3f1979aaSAndroid Build Coastguard Worker #endif 502*3f1979aaSAndroid Build Coastguard Worker double aSpeedFactor[NUMY], aDuration[NUMY], procSmpPerSec[NUMY]; 503*3f1979aaSAndroid Build Coastguard Worker 504*3f1979aaSAndroid Build Coastguard Worker X = pffastconv_malloc( (unsigned)(len+4) * sizeof(float) ); 505*3f1979aaSAndroid Build Coastguard Worker for ( i=0; i < NUMY; ++i) 506*3f1979aaSAndroid Build Coastguard Worker { 507*3f1979aaSAndroid Build Coastguard Worker if ( 1 || i < 2 ) 508*3f1979aaSAndroid Build Coastguard Worker Y[i] = pffastconv_malloc( (unsigned)len * sizeof(float) ); 509*3f1979aaSAndroid Build Coastguard Worker else 510*3f1979aaSAndroid Build Coastguard Worker Y[i] = Y[1]; 511*3f1979aaSAndroid Build Coastguard Worker 512*3f1979aaSAndroid Build Coastguard Worker Y[i][0] = 123.F; /* test for pffft_zconvolve_no_accu() */ 513*3f1979aaSAndroid Build Coastguard Worker aSpeedFactor[i] = -1.0; 514*3f1979aaSAndroid Build Coastguard Worker aDuration[i] = -1.0; 515*3f1979aaSAndroid Build Coastguard Worker procSmpPerSec[i] = -1.0; 516*3f1979aaSAndroid Build Coastguard Worker } 517*3f1979aaSAndroid Build Coastguard Worker 518*3f1979aaSAndroid Build Coastguard Worker H = pffastconv_malloc((unsigned)FILTERLEN * sizeof(float)); 519*3f1979aaSAndroid Build Coastguard Worker 520*3f1979aaSAndroid Build Coastguard Worker /* initialize input */ 521*3f1979aaSAndroid Build Coastguard Worker if ( convFlags & PFFASTCONV_CPLX_INP_OUT ) 522*3f1979aaSAndroid Build Coastguard Worker { 523*3f1979aaSAndroid Build Coastguard Worker for ( i = 0; i < lenC; ++i ) 524*3f1979aaSAndroid Build Coastguard Worker { 525*3f1979aaSAndroid Build Coastguard Worker X[2*i ] = (float)(i % 4093); /* 4094 is a prime number. see https://en.wikipedia.org/wiki/List_of_prime_numbers */ 526*3f1979aaSAndroid Build Coastguard Worker X[2*i+1] = (float)((i+2048) % 4093); 527*3f1979aaSAndroid Build Coastguard Worker } 528*3f1979aaSAndroid Build Coastguard Worker } 529*3f1979aaSAndroid Build Coastguard Worker else 530*3f1979aaSAndroid Build Coastguard Worker { 531*3f1979aaSAndroid Build Coastguard Worker for ( i = 0; i < len; ++i ) 532*3f1979aaSAndroid Build Coastguard Worker X[i] = (float)(i % 4093); /* 4094 is a prime number. see https://en.wikipedia.org/wiki/List_of_prime_numbers */ 533*3f1979aaSAndroid Build Coastguard Worker } 534*3f1979aaSAndroid Build Coastguard Worker X[ len ] = INVALID_FLOAT_VAL; 535*3f1979aaSAndroid Build Coastguard Worker X[ len +1 ] = INVALID_FLOAT_VAL; 536*3f1979aaSAndroid Build Coastguard Worker X[ len +2 ] = INVALID_FLOAT_VAL; 537*3f1979aaSAndroid Build Coastguard Worker X[ len +3 ] = INVALID_FLOAT_VAL; 538*3f1979aaSAndroid Build Coastguard Worker 539*3f1979aaSAndroid Build Coastguard Worker if (!testOutLen) 540*3f1979aaSAndroid Build Coastguard Worker printFirst( X, "X", 64, 8 ); 541*3f1979aaSAndroid Build Coastguard Worker 542*3f1979aaSAndroid Build Coastguard Worker /* filter coeffs */ 543*3f1979aaSAndroid Build Coastguard Worker memset( H, 0, FILTERLEN * sizeof(float) ); 544*3f1979aaSAndroid Build Coastguard Worker #if 1 545*3f1979aaSAndroid Build Coastguard Worker if ( convFlags & PFFASTCONV_SYMMETRIC ) 546*3f1979aaSAndroid Build Coastguard Worker { 547*3f1979aaSAndroid Build Coastguard Worker const int half = FILTERLEN / 2; 548*3f1979aaSAndroid Build Coastguard Worker for ( j = 0; j < half; ++j ) { 549*3f1979aaSAndroid Build Coastguard Worker switch (j % 3) { 550*3f1979aaSAndroid Build Coastguard Worker case 0: H[j] = H[FILTERLEN-1-j] = -1.0F; break; 551*3f1979aaSAndroid Build Coastguard Worker case 1: H[j] = H[FILTERLEN-1-j] = 1.0F; break; 552*3f1979aaSAndroid Build Coastguard Worker case 2: H[j] = H[FILTERLEN-1-j] = 0.5F; break; 553*3f1979aaSAndroid Build Coastguard Worker } 554*3f1979aaSAndroid Build Coastguard Worker } 555*3f1979aaSAndroid Build Coastguard Worker } 556*3f1979aaSAndroid Build Coastguard Worker else 557*3f1979aaSAndroid Build Coastguard Worker { 558*3f1979aaSAndroid Build Coastguard Worker for ( j = 0; j < FILTERLEN; ++j ) { 559*3f1979aaSAndroid Build Coastguard Worker switch (j % 3) { 560*3f1979aaSAndroid Build Coastguard Worker case 0: H[j] = -1.0F; break; 561*3f1979aaSAndroid Build Coastguard Worker case 1: H[j] = 1.0F; break; 562*3f1979aaSAndroid Build Coastguard Worker case 2: H[j] = 0.5F; break; 563*3f1979aaSAndroid Build Coastguard Worker } 564*3f1979aaSAndroid Build Coastguard Worker } 565*3f1979aaSAndroid Build Coastguard Worker } 566*3f1979aaSAndroid Build Coastguard Worker #else 567*3f1979aaSAndroid Build Coastguard Worker H[0] = 1.0F; 568*3f1979aaSAndroid Build Coastguard Worker H[FILTERLEN -1] = 1.0F; 569*3f1979aaSAndroid Build Coastguard Worker #endif 570*3f1979aaSAndroid Build Coastguard Worker if (!testOutLen) 571*3f1979aaSAndroid Build Coastguard Worker printFirst( H, "H", FILTERLEN, 8 ); 572*3f1979aaSAndroid Build Coastguard Worker 573*3f1979aaSAndroid Build Coastguard Worker printf("\n"); 574*3f1979aaSAndroid Build Coastguard Worker printf("filterLen = %d\t%s%s\t%s:\n", FILTERLEN, 575*3f1979aaSAndroid Build Coastguard Worker ((convFlags & PFFASTCONV_CPLX_INP_OUT)?"cplx":"real"), 576*3f1979aaSAndroid Build Coastguard Worker (convFlags & PFFASTCONV_CPLX_INP_OUT)?((convFlags & PFFASTCONV_CPLX_SINGLE_FFT)?" single":" 2x") : "", 577*3f1979aaSAndroid Build Coastguard Worker ((convFlags & PFFASTCONV_SYMMETRIC)?"symmetric":"non-sym") ); 578*3f1979aaSAndroid Build Coastguard Worker 579*3f1979aaSAndroid Build Coastguard Worker while (1) 580*3f1979aaSAndroid Build Coastguard Worker { 581*3f1979aaSAndroid Build Coastguard Worker 582*3f1979aaSAndroid Build Coastguard Worker for ( yi = 0; yi < NUMY; ++yi ) 583*3f1979aaSAndroid Build Coastguard Worker { 584*3f1979aaSAndroid Build Coastguard Worker if (!aRunAlgo[yi]) 585*3f1979aaSAndroid Build Coastguard Worker continue; 586*3f1979aaSAndroid Build Coastguard Worker 587*3f1979aaSAndroid Build Coastguard Worker aSetupCfg[yi] = aSetup[yi]( H, FILTERLEN, &aBlkLen[yi], convFlags ); 588*3f1979aaSAndroid Build Coastguard Worker 589*3f1979aaSAndroid Build Coastguard Worker /* get effective apply function ptr */ 590*3f1979aaSAndroid Build Coastguard Worker if ( aSetupCfg[yi] && aGetFnPtr[yi] ) 591*3f1979aaSAndroid Build Coastguard Worker aConv[yi] = aGetFnPtr[yi]( aSetupCfg[yi] ); 592*3f1979aaSAndroid Build Coastguard Worker 593*3f1979aaSAndroid Build Coastguard Worker if ( aSetupCfg[yi] && aConv[yi] ) { 594*3f1979aaSAndroid Build Coastguard Worker if (testOutLen) 595*3f1979aaSAndroid Build Coastguard Worker { 596*3f1979aaSAndroid Build Coastguard Worker t0 = uclock_sec(); 597*3f1979aaSAndroid Build Coastguard Worker outN[yi] = aConv[yi]( aSetupCfg[yi], X, lenC, Y[yi], Y[0], 1 /* applyFlush */ ); 598*3f1979aaSAndroid Build Coastguard Worker t1 = uclock_sec(); 599*3f1979aaSAndroid Build Coastguard Worker td = t1 - t0; 600*3f1979aaSAndroid Build Coastguard Worker } 601*3f1979aaSAndroid Build Coastguard Worker else 602*3f1979aaSAndroid Build Coastguard Worker { 603*3f1979aaSAndroid Build Coastguard Worker const int blkLen = 4096; /* required for 'fast_conv_4K' */ 604*3f1979aaSAndroid Build Coastguard Worker int64_t offC = 0, offS, Nout; 605*3f1979aaSAndroid Build Coastguard Worker int k; 606*3f1979aaSAndroid Build Coastguard Worker iter = 0; 607*3f1979aaSAndroid Build Coastguard Worker outN[yi] = 0; 608*3f1979aaSAndroid Build Coastguard Worker t0 = uclock_sec(); 609*3f1979aaSAndroid Build Coastguard Worker tstop = t0 + 0.25; /* benchmark duration: 250 ms */ 610*3f1979aaSAndroid Build Coastguard Worker do { 611*3f1979aaSAndroid Build Coastguard Worker for ( k = 0; k < 128 && offC +blkLen < lenC; ++k ) 612*3f1979aaSAndroid Build Coastguard Worker { 613*3f1979aaSAndroid Build Coastguard Worker offS = cplxFactor * offC; 614*3f1979aaSAndroid Build Coastguard Worker Nout = aConv[yi]( aSetupCfg[yi], X +offS, blkLen, Y[yi] +offS, Y[0], (offC +blkLen >= lenC) /* applyFlush */ ); 615*3f1979aaSAndroid Build Coastguard Worker offC += Nout; 616*3f1979aaSAndroid Build Coastguard Worker ++iter; 617*3f1979aaSAndroid Build Coastguard Worker if ( !Nout ) 618*3f1979aaSAndroid Build Coastguard Worker break; 619*3f1979aaSAndroid Build Coastguard Worker if ( offC +blkLen >= lenC ) 620*3f1979aaSAndroid Build Coastguard Worker { 621*3f1979aaSAndroid Build Coastguard Worker outN[yi] += offC; 622*3f1979aaSAndroid Build Coastguard Worker offC = 0; 623*3f1979aaSAndroid Build Coastguard Worker } 624*3f1979aaSAndroid Build Coastguard Worker } 625*3f1979aaSAndroid Build Coastguard Worker t1 = uclock_sec(); 626*3f1979aaSAndroid Build Coastguard Worker } while ( t1 < tstop ); 627*3f1979aaSAndroid Build Coastguard Worker outN[yi] += offC; 628*3f1979aaSAndroid Build Coastguard Worker td = t1 - t0; 629*3f1979aaSAndroid Build Coastguard Worker procSmpPerSec[yi] = cplxFactor * (double)outN[yi] / td; 630*3f1979aaSAndroid Build Coastguard Worker } 631*3f1979aaSAndroid Build Coastguard Worker } 632*3f1979aaSAndroid Build Coastguard Worker else 633*3f1979aaSAndroid Build Coastguard Worker { 634*3f1979aaSAndroid Build Coastguard Worker t0 = t1 = td = 0.0; 635*3f1979aaSAndroid Build Coastguard Worker outN[yi] = 0; 636*3f1979aaSAndroid Build Coastguard Worker } 637*3f1979aaSAndroid Build Coastguard Worker aDuration[yi] = td; 638*3f1979aaSAndroid Build Coastguard Worker if ( yi == 0 ) { 639*3f1979aaSAndroid Build Coastguard Worker const float * Yvals = Y[0]; 640*3f1979aaSAndroid Build Coastguard Worker const int64_t refOutLen = cplxFactor * outN[0]; 641*3f1979aaSAndroid Build Coastguard Worker tdref = td; 642*3f1979aaSAndroid Build Coastguard Worker if (printDbg) { 643*3f1979aaSAndroid Build Coastguard Worker printf("convolution '%s' took: %f ms\n", convText[yi], td*1000.0); 644*3f1979aaSAndroid Build Coastguard Worker printf(" convolution '%s' output size %" PRId64 " == (cplx) len %d + %" PRId64 "\n", convText[yi], outN[yi], len / cplxFactor, outN[yi] - len / cplxFactor); 645*3f1979aaSAndroid Build Coastguard Worker } 646*3f1979aaSAndroid Build Coastguard Worker aSpeedFactor[yi] = 1.0; 647*3f1979aaSAndroid Build Coastguard Worker /* */ 648*3f1979aaSAndroid Build Coastguard Worker yRangeMin = FLT_MAX; 649*3f1979aaSAndroid Build Coastguard Worker yRangeMax = FLT_MIN; 650*3f1979aaSAndroid Build Coastguard Worker for ( i = 0; i < refOutLen; ++i ) 651*3f1979aaSAndroid Build Coastguard Worker { 652*3f1979aaSAndroid Build Coastguard Worker if ( yRangeMax < Yvals[i] ) yRangeMax = Yvals[i]; 653*3f1979aaSAndroid Build Coastguard Worker if ( yRangeMin > Yvals[i] ) yRangeMin = Yvals[i]; 654*3f1979aaSAndroid Build Coastguard Worker } 655*3f1979aaSAndroid Build Coastguard Worker yErrLimit = fabsf(yRangeMax - yRangeMin) / ( 100.0F * 1000.0F ); 656*3f1979aaSAndroid Build Coastguard Worker /* yErrLimit = 0.01F; */ 657*3f1979aaSAndroid Build Coastguard Worker if (testOutLen) { 658*3f1979aaSAndroid Build Coastguard Worker if (1) { 659*3f1979aaSAndroid Build Coastguard Worker printf("reference output len = %" PRId64 " smp\n", outN[0]); 660*3f1979aaSAndroid Build Coastguard Worker printf("reference output range |%.1f ..%.1f| = %.1f ==> err limit = %f\n", yRangeMin, yRangeMax, yRangeMax - yRangeMin, yErrLimit); 661*3f1979aaSAndroid Build Coastguard Worker } 662*3f1979aaSAndroid Build Coastguard Worker printFirst( Yvals, "Yref", 64, 8 ); 663*3f1979aaSAndroid Build Coastguard Worker } 664*3f1979aaSAndroid Build Coastguard Worker } 665*3f1979aaSAndroid Build Coastguard Worker else 666*3f1979aaSAndroid Build Coastguard Worker { 667*3f1979aaSAndroid Build Coastguard Worker aSpeedFactor[yi] = tdref / td; 668*3f1979aaSAndroid Build Coastguard Worker if (printDbg) { 669*3f1979aaSAndroid Build Coastguard Worker printf("\nconvolution '%s' took: %f ms == %f %% == %f X\n", convText[yi], td*1000.0, td * 100 / tdref, tdref / td); 670*3f1979aaSAndroid Build Coastguard Worker printf(" convolution '%s' output size %" PRId64 " == (cplx) len %d + %" PRId64 "\n", convText[yi], outN[yi], len / cplxFactor, outN[yi] - len / cplxFactor); 671*3f1979aaSAndroid Build Coastguard Worker } 672*3f1979aaSAndroid Build Coastguard Worker } 673*3f1979aaSAndroid Build Coastguard Worker } 674*3f1979aaSAndroid Build Coastguard Worker 675*3f1979aaSAndroid Build Coastguard Worker int iMaxSpeedSlowAlgo = -1; 676*3f1979aaSAndroid Build Coastguard Worker int iFirstFastAlgo = -1; 677*3f1979aaSAndroid Build Coastguard Worker int iMaxSpeedFastAlgo = -1; 678*3f1979aaSAndroid Build Coastguard Worker int iPrintedRefOutLen = 0; 679*3f1979aaSAndroid Build Coastguard Worker { 680*3f1979aaSAndroid Build Coastguard Worker for ( yc = 1; yc < NUMY; ++yc ) 681*3f1979aaSAndroid Build Coastguard Worker { 682*3f1979aaSAndroid Build Coastguard Worker if (!aRunAlgo[yc]) 683*3f1979aaSAndroid Build Coastguard Worker continue; 684*3f1979aaSAndroid Build Coastguard Worker if (aFastAlgo[yc]) { 685*3f1979aaSAndroid Build Coastguard Worker if ( iMaxSpeedFastAlgo < 0 || aSpeedFactor[yc] > aSpeedFactor[iMaxSpeedFastAlgo] ) 686*3f1979aaSAndroid Build Coastguard Worker iMaxSpeedFastAlgo = yc; 687*3f1979aaSAndroid Build Coastguard Worker 688*3f1979aaSAndroid Build Coastguard Worker if (iFirstFastAlgo < 0) 689*3f1979aaSAndroid Build Coastguard Worker iFirstFastAlgo = yc; 690*3f1979aaSAndroid Build Coastguard Worker } 691*3f1979aaSAndroid Build Coastguard Worker else 692*3f1979aaSAndroid Build Coastguard Worker { 693*3f1979aaSAndroid Build Coastguard Worker if ( iMaxSpeedSlowAlgo < 0 || aSpeedFactor[yc] > aSpeedFactor[iMaxSpeedSlowAlgo] ) 694*3f1979aaSAndroid Build Coastguard Worker iMaxSpeedSlowAlgo = yc; 695*3f1979aaSAndroid Build Coastguard Worker } 696*3f1979aaSAndroid Build Coastguard Worker } 697*3f1979aaSAndroid Build Coastguard Worker 698*3f1979aaSAndroid Build Coastguard Worker if (printSpeed) 699*3f1979aaSAndroid Build Coastguard Worker { 700*3f1979aaSAndroid Build Coastguard Worker if (testOutLen) 701*3f1979aaSAndroid Build Coastguard Worker { 702*3f1979aaSAndroid Build Coastguard Worker if (iMaxSpeedSlowAlgo >= 0 ) 703*3f1979aaSAndroid Build Coastguard Worker printf("fastest slow algorithm is '%s' at speed %f X ; abs duration %f ms\n", convText[iMaxSpeedSlowAlgo], aSpeedFactor[iMaxSpeedSlowAlgo], 1000.0 * aDuration[iMaxSpeedSlowAlgo]); 704*3f1979aaSAndroid Build Coastguard Worker if (0 != iMaxSpeedSlowAlgo && aRunAlgo[0]) 705*3f1979aaSAndroid Build Coastguard Worker printf("slow algorithm '%s' at speed %f X ; abs duration %f ms\n", convText[0], aSpeedFactor[0], 1000.0 * aDuration[0]); 706*3f1979aaSAndroid Build Coastguard Worker if (1 != iMaxSpeedSlowAlgo && aRunAlgo[1]) 707*3f1979aaSAndroid Build Coastguard Worker printf("slow algorithm '%s' at speed %f X ; abs duration %f ms\n", convText[1], aSpeedFactor[1], 1000.0 * aDuration[1]); 708*3f1979aaSAndroid Build Coastguard Worker 709*3f1979aaSAndroid Build Coastguard Worker if (iFirstFastAlgo >= 0 && iFirstFastAlgo != iMaxSpeedFastAlgo && aRunAlgo[iFirstFastAlgo]) 710*3f1979aaSAndroid Build Coastguard Worker printf("first fast algorithm is '%s' at speed %f X ; abs duration %f ms\n", convText[iFirstFastAlgo], aSpeedFactor[iFirstFastAlgo], 1000.0 * aDuration[iFirstFastAlgo]); 711*3f1979aaSAndroid Build Coastguard Worker if (iFirstFastAlgo >= 0 && iFirstFastAlgo+1 != iMaxSpeedFastAlgo && iFirstFastAlgo+1 < NUMY && aRunAlgo[iFirstFastAlgo+1]) 712*3f1979aaSAndroid Build Coastguard Worker printf("2nd fast algorithm is '%s' at speed %f X ; abs duration %f ms\n", convText[iFirstFastAlgo+1], aSpeedFactor[iFirstFastAlgo+1], 1000.0 * aDuration[iFirstFastAlgo+1]); 713*3f1979aaSAndroid Build Coastguard Worker 714*3f1979aaSAndroid Build Coastguard Worker if ( 0 <= iMaxSpeedFastAlgo && iMaxSpeedFastAlgo < NUMY && aRunAlgo[iMaxSpeedFastAlgo] ) 715*3f1979aaSAndroid Build Coastguard Worker { 716*3f1979aaSAndroid Build Coastguard Worker printf("fastest fast algorithm is '%s' at speed %f X ; abs duration %f ms\n", convText[iMaxSpeedFastAlgo], aSpeedFactor[iMaxSpeedFastAlgo], 1000.0 * aDuration[iMaxSpeedFastAlgo]); 717*3f1979aaSAndroid Build Coastguard Worker if ( 0 <= iMaxSpeedSlowAlgo && iMaxSpeedSlowAlgo < NUMY && aRunAlgo[iMaxSpeedSlowAlgo] ) 718*3f1979aaSAndroid Build Coastguard Worker printf("fast / slow ratio: %f X\n", aSpeedFactor[iMaxSpeedFastAlgo] / aSpeedFactor[iMaxSpeedSlowAlgo] ); 719*3f1979aaSAndroid Build Coastguard Worker } 720*3f1979aaSAndroid Build Coastguard Worker printf("\n"); 721*3f1979aaSAndroid Build Coastguard Worker } 722*3f1979aaSAndroid Build Coastguard Worker else 723*3f1979aaSAndroid Build Coastguard Worker { 724*3f1979aaSAndroid Build Coastguard Worker for ( yc = 0; yc < NUMY; ++yc ) 725*3f1979aaSAndroid Build Coastguard Worker { 726*3f1979aaSAndroid Build Coastguard Worker if (!aRunAlgo[yc] || procSmpPerSec[yc] <= 0.0) 727*3f1979aaSAndroid Build Coastguard Worker continue; 728*3f1979aaSAndroid Build Coastguard Worker printf("algo '%s':\t%.2f MSmp\tin\t%.1f ms\t= %g kSmpPerSec\n", 729*3f1979aaSAndroid Build Coastguard Worker convText[yc], (double)outN[yc]/(1000.0 * 1000.0), 1000.0 * aDuration[yc], procSmpPerSec[yc] * 0.001 ); 730*3f1979aaSAndroid Build Coastguard Worker } 731*3f1979aaSAndroid Build Coastguard Worker } 732*3f1979aaSAndroid Build Coastguard Worker 733*3f1979aaSAndroid Build Coastguard Worker } 734*3f1979aaSAndroid Build Coastguard Worker } 735*3f1979aaSAndroid Build Coastguard Worker 736*3f1979aaSAndroid Build Coastguard Worker 737*3f1979aaSAndroid Build Coastguard Worker for ( yc = 1; yc < NUMY; ++yc ) 738*3f1979aaSAndroid Build Coastguard Worker { 739*3f1979aaSAndroid Build Coastguard Worker const float * Yref; 740*3f1979aaSAndroid Build Coastguard Worker const float * Ycurr; 741*3f1979aaSAndroid Build Coastguard Worker int outMin; 742*3f1979aaSAndroid Build Coastguard Worker 743*3f1979aaSAndroid Build Coastguard Worker if (!aRunAlgo[yc]) 744*3f1979aaSAndroid Build Coastguard Worker continue; 745*3f1979aaSAndroid Build Coastguard Worker 746*3f1979aaSAndroid Build Coastguard Worker if (printDbg) 747*3f1979aaSAndroid Build Coastguard Worker printf("\n"); 748*3f1979aaSAndroid Build Coastguard Worker 749*3f1979aaSAndroid Build Coastguard Worker if ( outN[yc] == 0 ) 750*3f1979aaSAndroid Build Coastguard Worker { 751*3f1979aaSAndroid Build Coastguard Worker printf("output size 0: '%s' not implemented\n", convText[yc]); 752*3f1979aaSAndroid Build Coastguard Worker } 753*3f1979aaSAndroid Build Coastguard Worker else if ( outN[0] != outN[yc] /* && aFastAlgo[yc] */ && testOutLen ) 754*3f1979aaSAndroid Build Coastguard Worker { 755*3f1979aaSAndroid Build Coastguard Worker if (!iPrintedRefOutLen) 756*3f1979aaSAndroid Build Coastguard Worker { 757*3f1979aaSAndroid Build Coastguard Worker printf("reference output size = %" PRId64 ", delta to (cplx) input length = %" PRId64 " smp\n", outN[0], (len / cplxFactor) - outN[0]); 758*3f1979aaSAndroid Build Coastguard Worker iPrintedRefOutLen = 1; 759*3f1979aaSAndroid Build Coastguard Worker } 760*3f1979aaSAndroid Build Coastguard Worker printf("output size doesn't match!: ref (FILTERLEN %d) returned %" PRId64 " smp, '%s' returned %" PRId64 " smp : delta = %" PRId64 " smp\n", 761*3f1979aaSAndroid Build Coastguard Worker FILTERLEN, outN[0], convText[yc], outN[yc], outN[yc] - outN[0] ); 762*3f1979aaSAndroid Build Coastguard Worker retErr = 1; 763*3f1979aaSAndroid Build Coastguard Worker } 764*3f1979aaSAndroid Build Coastguard Worker 765*3f1979aaSAndroid Build Coastguard Worker posMaxErr = 0; 766*3f1979aaSAndroid Build Coastguard Worker maxErr = -1.0; 767*3f1979aaSAndroid Build Coastguard Worker Yref = Y[0]; 768*3f1979aaSAndroid Build Coastguard Worker Ycurr = Y[yc]; 769*3f1979aaSAndroid Build Coastguard Worker outMin = ( outN[yc] < outN[0] ) ? outN[yc] : outN[0]; 770*3f1979aaSAndroid Build Coastguard Worker numErrOverLimit = 0; 771*3f1979aaSAndroid Build Coastguard Worker for ( i = 0; i < outMin; ++i ) 772*3f1979aaSAndroid Build Coastguard Worker { 773*3f1979aaSAndroid Build Coastguard Worker if ( numErrOverLimit < 6 && fabs(Ycurr[i] - Yref[i]) >= yErrLimit ) 774*3f1979aaSAndroid Build Coastguard Worker { 775*3f1979aaSAndroid Build Coastguard Worker printf("algo '%s': at %d: ***ERROR*** = %f, errLimit = %f, ref = %f, actual = %f\n", 776*3f1979aaSAndroid Build Coastguard Worker convText[yc], i, fabs(Ycurr[i] - Yref[i]), yErrLimit, Yref[i], Ycurr[i] ); 777*3f1979aaSAndroid Build Coastguard Worker ++numErrOverLimit; 778*3f1979aaSAndroid Build Coastguard Worker } 779*3f1979aaSAndroid Build Coastguard Worker 780*3f1979aaSAndroid Build Coastguard Worker if ( fabs(Ycurr[i] - Yref[i]) > maxErr ) 781*3f1979aaSAndroid Build Coastguard Worker { 782*3f1979aaSAndroid Build Coastguard Worker maxErr = fabsf(Ycurr[i] - Yref[i]); 783*3f1979aaSAndroid Build Coastguard Worker posMaxErr = i; 784*3f1979aaSAndroid Build Coastguard Worker } 785*3f1979aaSAndroid Build Coastguard Worker } 786*3f1979aaSAndroid Build Coastguard Worker 787*3f1979aaSAndroid Build Coastguard Worker if ( printDbg || (iMaxSpeedSlowAlgo == i) || (iMaxSpeedFastAlgo == i) ) 788*3f1979aaSAndroid Build Coastguard Worker printf("max difference for '%s' is %g at sample idx %d of max inp 4093-1 == %f %%\n", convText[yc], maxErr, posMaxErr, maxErr * 100.0 / 4092.0 ); 789*3f1979aaSAndroid Build Coastguard Worker } 790*3f1979aaSAndroid Build Coastguard Worker 791*3f1979aaSAndroid Build Coastguard Worker break; 792*3f1979aaSAndroid Build Coastguard Worker } 793*3f1979aaSAndroid Build Coastguard Worker 794*3f1979aaSAndroid Build Coastguard Worker pffastconv_free(X); 795*3f1979aaSAndroid Build Coastguard Worker for ( i=0; i < NUMY; ++i) 796*3f1979aaSAndroid Build Coastguard Worker { 797*3f1979aaSAndroid Build Coastguard Worker if ( 1 || i < 2 ) 798*3f1979aaSAndroid Build Coastguard Worker pffastconv_free( Y[i] ); 799*3f1979aaSAndroid Build Coastguard Worker if (!aRunAlgo[i]) 800*3f1979aaSAndroid Build Coastguard Worker continue; 801*3f1979aaSAndroid Build Coastguard Worker aDestroy[i]( aSetupCfg[i] ); 802*3f1979aaSAndroid Build Coastguard Worker } 803*3f1979aaSAndroid Build Coastguard Worker 804*3f1979aaSAndroid Build Coastguard Worker pffastconv_free(H); 805*3f1979aaSAndroid Build Coastguard Worker 806*3f1979aaSAndroid Build Coastguard Worker return retErr; 807*3f1979aaSAndroid Build Coastguard Worker } 808*3f1979aaSAndroid Build Coastguard Worker 809*3f1979aaSAndroid Build Coastguard Worker /* small functions inside pffft.c that will detect (compiler) bugs with respect to simd instructions */ 810*3f1979aaSAndroid Build Coastguard Worker void validate_pffft_simd(); 811*3f1979aaSAndroid Build Coastguard Worker int validate_pffft_simd_ex(FILE * DbgOut); 812*3f1979aaSAndroid Build Coastguard Worker 813*3f1979aaSAndroid Build Coastguard Worker 814*3f1979aaSAndroid Build Coastguard Worker int main(int argc, char **argv) 815*3f1979aaSAndroid Build Coastguard Worker { 816*3f1979aaSAndroid Build Coastguard Worker int result = 0; 817*3f1979aaSAndroid Build Coastguard Worker int i, k, M, flagsA, flagsB, flagsC, testOutLen, printDbg, printSpeed; 818*3f1979aaSAndroid Build Coastguard Worker int testOutLens = 1, benchConv = 1, quickTest = 0, slowTest = 0; 819*3f1979aaSAndroid Build Coastguard Worker int testReal = 1, testCplx = 1, testSymetric = 0; 820*3f1979aaSAndroid Build Coastguard Worker 821*3f1979aaSAndroid Build Coastguard Worker for ( i = 1; i < argc; ++i ) { 822*3f1979aaSAndroid Build Coastguard Worker 823*3f1979aaSAndroid Build Coastguard Worker if (!strcmp(argv[i], "--test-simd")) { 824*3f1979aaSAndroid Build Coastguard Worker int numErrs = validate_pffft_simd_ex(stdout); 825*3f1979aaSAndroid Build Coastguard Worker fprintf( ( numErrs != 0 ? stderr : stdout ), "validate_pffft_simd_ex() returned %d errors!\n", numErrs); 826*3f1979aaSAndroid Build Coastguard Worker return ( numErrs > 0 ? 1 : 0 ); 827*3f1979aaSAndroid Build Coastguard Worker } 828*3f1979aaSAndroid Build Coastguard Worker 829*3f1979aaSAndroid Build Coastguard Worker if (!strcmp(argv[i], "--no-len")) { 830*3f1979aaSAndroid Build Coastguard Worker testOutLens = 0; 831*3f1979aaSAndroid Build Coastguard Worker } 832*3f1979aaSAndroid Build Coastguard Worker else if (!strcmp(argv[i], "--no-bench")) { 833*3f1979aaSAndroid Build Coastguard Worker benchConv = 0; 834*3f1979aaSAndroid Build Coastguard Worker } 835*3f1979aaSAndroid Build Coastguard Worker else if (!strcmp(argv[i], "--quick")) { 836*3f1979aaSAndroid Build Coastguard Worker quickTest = 1; 837*3f1979aaSAndroid Build Coastguard Worker } 838*3f1979aaSAndroid Build Coastguard Worker else if (!strcmp(argv[i], "--slow")) { 839*3f1979aaSAndroid Build Coastguard Worker slowTest = 1; 840*3f1979aaSAndroid Build Coastguard Worker } 841*3f1979aaSAndroid Build Coastguard Worker else if (!strcmp(argv[i], "--real")) { 842*3f1979aaSAndroid Build Coastguard Worker testCplx = 0; 843*3f1979aaSAndroid Build Coastguard Worker } 844*3f1979aaSAndroid Build Coastguard Worker else if (!strcmp(argv[i], "--cplx")) { 845*3f1979aaSAndroid Build Coastguard Worker testReal = 0; 846*3f1979aaSAndroid Build Coastguard Worker } 847*3f1979aaSAndroid Build Coastguard Worker else if (!strcmp(argv[i], "--sym")) { 848*3f1979aaSAndroid Build Coastguard Worker testSymetric = 1; 849*3f1979aaSAndroid Build Coastguard Worker } 850*3f1979aaSAndroid Build Coastguard Worker else /* if (!strcmp(argv[i], "--help")) */ { 851*3f1979aaSAndroid Build Coastguard Worker printf("usage: %s [--test-simd] [--no-len] [--no-bench] [--quick|--slow] [--real|--cplx] [--sym]\n", argv[0]); 852*3f1979aaSAndroid Build Coastguard Worker exit(1); 853*3f1979aaSAndroid Build Coastguard Worker } 854*3f1979aaSAndroid Build Coastguard Worker } 855*3f1979aaSAndroid Build Coastguard Worker 856*3f1979aaSAndroid Build Coastguard Worker 857*3f1979aaSAndroid Build Coastguard Worker if (testOutLens) 858*3f1979aaSAndroid Build Coastguard Worker { 859*3f1979aaSAndroid Build Coastguard Worker for ( k = 0; k < 3; ++k ) 860*3f1979aaSAndroid Build Coastguard Worker { 861*3f1979aaSAndroid Build Coastguard Worker if ( (k == 0 && !testReal) || (k > 0 && !testCplx) ) 862*3f1979aaSAndroid Build Coastguard Worker continue; 863*3f1979aaSAndroid Build Coastguard Worker printf("\n\n==========\n"); 864*3f1979aaSAndroid Build Coastguard Worker printf("testing %s %s output lengths ..\n", (k == 0 ? "real" : "cplx"), ( k == 0 ? "" : (k==1 ? "2x" : "single") ) ); 865*3f1979aaSAndroid Build Coastguard Worker printf("==========\n"); 866*3f1979aaSAndroid Build Coastguard Worker flagsA = (k == 0) ? 0 : PFFASTCONV_CPLX_INP_OUT; 867*3f1979aaSAndroid Build Coastguard Worker flagsB = flagsA | ( testSymetric ? PFFASTCONV_SYMMETRIC : 0 ); 868*3f1979aaSAndroid Build Coastguard Worker flagsC = flagsB | PFFASTCONV_CPLX_SINGLE_FFT; 869*3f1979aaSAndroid Build Coastguard Worker testOutLen = 1; 870*3f1979aaSAndroid Build Coastguard Worker printDbg = 0; 871*3f1979aaSAndroid Build Coastguard Worker printSpeed = 0; 872*3f1979aaSAndroid Build Coastguard Worker for ( M = 128 - 4; M <= (quickTest ? 128+16 : 256); ++M ) 873*3f1979aaSAndroid Build Coastguard Worker { 874*3f1979aaSAndroid Build Coastguard Worker if ( (M % 16) != 0 && testSymetric ) 875*3f1979aaSAndroid Build Coastguard Worker continue; 876*3f1979aaSAndroid Build Coastguard Worker result |= test(M, flagsB, testOutLen, printDbg, printSpeed); 877*3f1979aaSAndroid Build Coastguard Worker } 878*3f1979aaSAndroid Build Coastguard Worker } 879*3f1979aaSAndroid Build Coastguard Worker } 880*3f1979aaSAndroid Build Coastguard Worker 881*3f1979aaSAndroid Build Coastguard Worker if (benchConv) 882*3f1979aaSAndroid Build Coastguard Worker { 883*3f1979aaSAndroid Build Coastguard Worker for ( k = 0; k < 3; ++k ) 884*3f1979aaSAndroid Build Coastguard Worker { 885*3f1979aaSAndroid Build Coastguard Worker if ( (k == 0 && !testReal) || (k > 0 && !testCplx) ) 886*3f1979aaSAndroid Build Coastguard Worker continue; 887*3f1979aaSAndroid Build Coastguard Worker printf("\n\n==========\n"); 888*3f1979aaSAndroid Build Coastguard Worker printf("starting %s %s benchmark against linear convolutions ..\n", (k == 0 ? "real" : "cplx"), ( k == 0 ? "" : (k==1 ? "2x" : "single") ) ); 889*3f1979aaSAndroid Build Coastguard Worker printf("==========\n"); 890*3f1979aaSAndroid Build Coastguard Worker flagsA = (k == 0) ? 0 : PFFASTCONV_CPLX_INP_OUT; 891*3f1979aaSAndroid Build Coastguard Worker flagsB = flagsA | ( testSymetric ? PFFASTCONV_SYMMETRIC : 0 ); 892*3f1979aaSAndroid Build Coastguard Worker flagsC = flagsB | ( k == 2 ? PFFASTCONV_CPLX_SINGLE_FFT : 0 ); 893*3f1979aaSAndroid Build Coastguard Worker testOutLen = 0; 894*3f1979aaSAndroid Build Coastguard Worker printDbg = 0; 895*3f1979aaSAndroid Build Coastguard Worker printSpeed = 1; 896*3f1979aaSAndroid Build Coastguard Worker if (!slowTest) { 897*3f1979aaSAndroid Build Coastguard Worker result |= test( 32, flagsC, testOutLen, printDbg, printSpeed); 898*3f1979aaSAndroid Build Coastguard Worker result |= test( 32+ 16, flagsC, testOutLen, printDbg, printSpeed); 899*3f1979aaSAndroid Build Coastguard Worker result |= test( 64, flagsC, testOutLen, printDbg, printSpeed); 900*3f1979aaSAndroid Build Coastguard Worker result |= test( 64+ 32, flagsC, testOutLen, printDbg, printSpeed); 901*3f1979aaSAndroid Build Coastguard Worker result |= test(128, flagsC, testOutLen, printDbg, printSpeed); 902*3f1979aaSAndroid Build Coastguard Worker } 903*3f1979aaSAndroid Build Coastguard Worker if (!quickTest) { 904*3f1979aaSAndroid Build Coastguard Worker result |= test(128+ 64, flagsC, testOutLen, printDbg, printSpeed); 905*3f1979aaSAndroid Build Coastguard Worker result |= test(256, flagsC, testOutLen, printDbg, printSpeed); 906*3f1979aaSAndroid Build Coastguard Worker result |= test(256+128, flagsC, testOutLen, printDbg, printSpeed); 907*3f1979aaSAndroid Build Coastguard Worker result |= test(512, flagsC, testOutLen, printDbg, printSpeed); 908*3f1979aaSAndroid Build Coastguard Worker result |= test(1024, flagsC, testOutLen, printDbg, printSpeed); 909*3f1979aaSAndroid Build Coastguard Worker } 910*3f1979aaSAndroid Build Coastguard Worker } 911*3f1979aaSAndroid Build Coastguard Worker } 912*3f1979aaSAndroid Build Coastguard Worker 913*3f1979aaSAndroid Build Coastguard Worker return result; 914*3f1979aaSAndroid Build Coastguard Worker } 915*3f1979aaSAndroid Build Coastguard Worker 916