xref: /aosp_15_r20/external/pffft/pffastconv.c (revision 3f1979aa0d7ad34fcf3763de7b7b8f8cd67e5bdd)
1 /*
2   Copyright (c) 2019  Hayati Ayguen ( [email protected] )
3  */
4 
5 #include "pffastconv.h"
6 #include "pffft.h"
7 
8 #include <stdlib.h>
9 #include <stdint.h>
10 #include <stdio.h>
11 #include <math.h>
12 #include <assert.h>
13 #include <string.h>
14 
15 #define FASTCONV_DBG_OUT  0
16 
17 
18 /* detect compiler flavour */
19 #if defined(_MSC_VER)
20 #  define RESTRICT __restrict
21 #pragma warning( disable : 4244 4305 4204 4456 )
22 #elif defined(__GNUC__)
23 #  define RESTRICT __restrict
24 #endif
25 
26 
pffastconv_malloc(size_t nb_bytes)27 void *pffastconv_malloc(size_t nb_bytes)
28 {
29   return pffft_aligned_malloc(nb_bytes);
30 }
31 
pffastconv_free(void * p)32 void pffastconv_free(void *p)
33 {
34   pffft_aligned_free(p);
35 }
36 
pffastconv_simd_size()37 int pffastconv_simd_size()
38 {
39   return pffft_simd_size();
40 }
41 
42 
43 
44 struct PFFASTCONV_Setup
45 {
46   float * Xt;      /* input == x in time domain - copy for alignment */
47   float * Xf;      /* input == X in freq domain */
48   float * Hf;      /* filterCoeffs == H in freq domain */
49   float * Mf;      /* input * filterCoeffs in freq domain */
50   PFFFT_Setup *st;
51   int filterLen;   /* convolution length */
52   int Nfft;        /* FFT/block length */
53   int flags;
54   float scale;
55 };
56 
57 
pffastconv_new_setup(const float * filterCoeffs,int filterLen,int * blockLen,int flags)58 PFFASTCONV_Setup * pffastconv_new_setup( const float * filterCoeffs, int filterLen, int * blockLen, int flags )
59 {
60   PFFASTCONV_Setup * s = NULL;
61   const int cplxFactor = ( (flags & PFFASTCONV_CPLX_INP_OUT) && (flags & PFFASTCONV_CPLX_SINGLE_FFT) ) ? 2 : 1;
62   const int minFftLen = 2*pffft_simd_size()*pffft_simd_size();
63   int i, Nfft = 2 * pffft_next_power_of_two(filterLen -1);
64 #if FASTCONV_DBG_OUT
65   const int iOldBlkLen = *blockLen;
66 #endif
67 
68   if ( Nfft < minFftLen )
69     Nfft = minFftLen;
70 
71   if ( flags & PFFASTCONV_CPLX_FILTER )
72     return NULL;
73 
74   s = pffastconv_malloc( sizeof(struct PFFASTCONV_Setup) );
75 
76   if ( *blockLen > Nfft ) {
77     Nfft = *blockLen;
78     Nfft = pffft_next_power_of_two(Nfft);
79   }
80   *blockLen = Nfft;  /* this is in (complex) samples */
81 
82   Nfft *= cplxFactor;
83 
84   if ( (flags & PFFASTCONV_DIRECT_INP) && !(flags & PFFASTCONV_CPLX_INP_OUT) )
85     s->Xt = NULL;
86   else
87     s->Xt = pffastconv_malloc((unsigned)Nfft * sizeof(float));
88   s->Xf = pffastconv_malloc((unsigned)Nfft * sizeof(float));
89   s->Hf = pffastconv_malloc((unsigned)Nfft * sizeof(float));
90   s->Mf = pffastconv_malloc((unsigned)Nfft * sizeof(float));
91   s->st = pffft_new_setup(Nfft, PFFFT_REAL);  /* with complex: we do 2 x fft() */
92   s->filterLen = filterLen;        /* filterLen == convolution length == length of impulse response */
93   if ( cplxFactor == 2 )
94     s->filterLen = 2 * filterLen - 1;
95   s->Nfft = Nfft;  /* FFT/block length */
96   s->flags = flags;
97   s->scale = (float)( 1.0 / Nfft );
98 
99   memset( s->Xt, 0, (unsigned)Nfft * sizeof(float) );
100   if ( flags & PFFASTCONV_CORRELATION ) {
101     for ( i = 0; i < filterLen; ++i )
102       s->Xt[ ( Nfft - cplxFactor * i ) & (Nfft -1) ] = filterCoeffs[ i ];
103   } else {
104     for ( i = 0; i < filterLen; ++i )
105       s->Xt[ ( Nfft - cplxFactor * i ) & (Nfft -1) ] = filterCoeffs[ filterLen - 1 - i ];
106   }
107 
108   pffft_transform(s->st, s->Xt, s->Hf, /* tmp = */ s->Mf, PFFFT_FORWARD);
109 
110 #if FASTCONV_DBG_OUT
111   printf("\n  fastConvSetup(filterLen = %d, blockLen %d) --> blockLen %d, OutLen = %d\n"
112     , filterLen, iOldBlkLen, *blockLen, Nfft - filterLen +1 );
113 #endif
114 
115   return s;
116 }
117 
118 
pffastconv_destroy_setup(PFFASTCONV_Setup * s)119 void pffastconv_destroy_setup( PFFASTCONV_Setup * s )
120 {
121   if (!s)
122     return;
123   pffft_destroy_setup(s->st);
124   pffastconv_free(s->Mf);
125   pffastconv_free(s->Hf);
126   pffastconv_free(s->Xf);
127   if ( s->Xt )
128     pffastconv_free(s->Xt);
129   pffastconv_free(s);
130 }
131 
132 
pffastconv_apply(PFFASTCONV_Setup * s,const float * input_,int cplxInputLen,float * output_,int applyFlush)133 int pffastconv_apply(PFFASTCONV_Setup * s, const float *input_, int cplxInputLen, float *output_, int applyFlush)
134 {
135   const float * RESTRICT X = input_;
136   float * RESTRICT Y = output_;
137   const int Nfft = s->Nfft;
138   const int filterLen = s->filterLen;
139   const int flags = s->flags;
140   const int cplxFactor = ( (flags & PFFASTCONV_CPLX_INP_OUT) && (flags & PFFASTCONV_CPLX_SINGLE_FFT) ) ? 2 : 1;
141   const int inputLen = cplxFactor * cplxInputLen;
142   int inpOff, procLen, numOut = 0, j, part, cplxOff;
143 
144   /* applyFlush != 0:
145    *     inputLen - inpOff -filterLen + 1 > 0
146    * <=> inputLen -filterLen + 1 > inpOff
147    * <=> inpOff < inputLen -filterLen + 1
148    *
149    * applyFlush == 0:
150    *     inputLen - inpOff >= Nfft
151    * <=> inputLen - Nfft >= inpOff
152    * <=> inpOff <= inputLen - Nfft
153    * <=> inpOff < inputLen - Nfft + 1
154    */
155 
156   if ( cplxFactor == 2 )
157   {
158     const int maxOff = applyFlush ? (inputLen -filterLen + 1) : (inputLen - Nfft + 1);
159 #if 0
160     printf( "*** inputLen %d, filterLen %d, Nfft %d => maxOff %d\n", inputLen, filterLen, Nfft, maxOff);
161 #endif
162     for ( inpOff = 0; inpOff < maxOff; inpOff += numOut )
163     {
164       procLen = ( (inputLen - inpOff) >= Nfft ) ? Nfft : (inputLen - inpOff);
165       numOut = ( procLen - filterLen + 1 ) & ( ~1 );
166       if (!numOut)
167         break;
168 #if 0
169       if (!inpOff)
170         printf("*** inpOff = %d, numOut = %d\n", inpOff, numOut);
171       if (inpOff + filterLen + 2 >= maxOff )
172         printf("*** inpOff = %d, inpOff + numOut = %d\n", inpOff, inpOff + numOut);
173 #endif
174 
175       if ( flags & PFFASTCONV_DIRECT_INP )
176       {
177         pffft_transform(s->st, X + inpOff, s->Xf, /* tmp = */ s->Mf, PFFFT_FORWARD);
178       }
179       else
180       {
181         memcpy( s->Xt, X + inpOff, (unsigned)procLen * sizeof(float) );
182         if ( procLen < Nfft )
183           memset( s->Xt + procLen, 0, (unsigned)(Nfft - procLen) * sizeof(float) );
184 
185         pffft_transform(s->st, s->Xt, s->Xf, /* tmp = */ s->Mf, PFFFT_FORWARD);
186       }
187 
188       pffft_zconvolve_no_accu(s->st, s->Xf, s->Hf, /* tmp = */ s->Mf, s->scale);
189 
190       if ( flags & PFFASTCONV_DIRECT_OUT )
191       {
192         pffft_transform(s->st, s->Mf, Y + inpOff, s->Xf, PFFFT_BACKWARD);
193       }
194       else
195       {
196         pffft_transform(s->st, s->Mf, s->Xf, /* tmp = */ s->Xt, PFFFT_BACKWARD);
197         memcpy( Y + inpOff, s->Xf, (unsigned)numOut * sizeof(float) );
198       }
199     }
200     return inpOff / cplxFactor;
201   }
202   else
203   {
204     const int maxOff = applyFlush ? (inputLen -filterLen + 1) : (inputLen - Nfft + 1);
205     const int numParts = (flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1;
206 
207     for ( inpOff = 0; inpOff < maxOff; inpOff += numOut )
208     {
209       procLen = ( (inputLen - inpOff) >= Nfft ) ? Nfft : (inputLen - inpOff);
210       numOut = procLen - filterLen + 1;
211 
212       for ( part = 0; part < numParts; ++part )  /* iterate per real/imag component */
213       {
214 
215         if ( flags & PFFASTCONV_CPLX_INP_OUT )
216         {
217           cplxOff = 2 * inpOff + part;
218           for ( j = 0; j < procLen; ++j )
219             s->Xt[j] = X[cplxOff + 2 * j];
220           if ( procLen < Nfft )
221             memset( s->Xt + procLen, 0, (unsigned)(Nfft - procLen) * sizeof(float) );
222 
223           pffft_transform(s->st, s->Xt, s->Xf, /* tmp = */ s->Mf, PFFFT_FORWARD);
224         }
225         else if ( flags & PFFASTCONV_DIRECT_INP )
226         {
227           pffft_transform(s->st, X + inpOff, s->Xf, /* tmp = */ s->Mf, PFFFT_FORWARD);
228         }
229         else
230         {
231           memcpy( s->Xt, X + inpOff, (unsigned)procLen * sizeof(float) );
232           if ( procLen < Nfft )
233             memset( s->Xt + procLen, 0, (unsigned)(Nfft - procLen) * sizeof(float) );
234 
235           pffft_transform(s->st, s->Xt, s->Xf, /* tmp = */ s->Mf, PFFFT_FORWARD);
236         }
237 
238         pffft_zconvolve_no_accu(s->st, s->Xf, s->Hf, /* tmp = */ s->Mf, s->scale);
239 
240         if ( flags & PFFASTCONV_CPLX_INP_OUT )
241         {
242           pffft_transform(s->st, s->Mf, s->Xf, /* tmp = */ s->Xt, PFFFT_BACKWARD);
243 
244           cplxOff = 2 * inpOff + part;
245           for ( j = 0; j < numOut; ++j )
246             Y[ cplxOff + 2 * j ] = s->Xf[j];
247         }
248         else if ( flags & PFFASTCONV_DIRECT_OUT )
249         {
250           pffft_transform(s->st, s->Mf, Y + inpOff, s->Xf, PFFFT_BACKWARD);
251         }
252         else
253         {
254           pffft_transform(s->st, s->Mf, s->Xf, /* tmp = */ s->Xt, PFFFT_BACKWARD);
255           memcpy( Y + inpOff, s->Xf, (unsigned)numOut * sizeof(float) );
256         }
257 
258       }
259     }
260 
261     return inpOff;
262   }
263 }
264 
265