xref: /aosp_15_r20/external/libaom/aom_dsp/fft.c (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1*77c1e3ccSAndroid Build Coastguard Worker /*
2*77c1e3ccSAndroid Build Coastguard Worker  * Copyright (c) 2018, Alliance for Open Media. All rights reserved.
3*77c1e3ccSAndroid Build Coastguard Worker  *
4*77c1e3ccSAndroid Build Coastguard Worker  * This source code is subject to the terms of the BSD 2 Clause License and
5*77c1e3ccSAndroid Build Coastguard Worker  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6*77c1e3ccSAndroid Build Coastguard Worker  * was not distributed with this source code in the LICENSE file, you can
7*77c1e3ccSAndroid Build Coastguard Worker  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8*77c1e3ccSAndroid Build Coastguard Worker  * Media Patent License 1.0 was not distributed with this source code in the
9*77c1e3ccSAndroid Build Coastguard Worker  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10*77c1e3ccSAndroid Build Coastguard Worker  */
11*77c1e3ccSAndroid Build Coastguard Worker 
12*77c1e3ccSAndroid Build Coastguard Worker #include "aom_dsp/aom_dsp_common.h"
13*77c1e3ccSAndroid Build Coastguard Worker #include "aom_dsp/fft_common.h"
14*77c1e3ccSAndroid Build Coastguard Worker #include "config/aom_dsp_rtcd.h"
15*77c1e3ccSAndroid Build Coastguard Worker 
simple_transpose(const float * A,float * B,int n)16*77c1e3ccSAndroid Build Coastguard Worker static inline void simple_transpose(const float *A, float *B, int n) {
17*77c1e3ccSAndroid Build Coastguard Worker   for (int y = 0; y < n; y++) {
18*77c1e3ccSAndroid Build Coastguard Worker     for (int x = 0; x < n; x++) {
19*77c1e3ccSAndroid Build Coastguard Worker       B[y * n + x] = A[x * n + y];
20*77c1e3ccSAndroid Build Coastguard Worker     }
21*77c1e3ccSAndroid Build Coastguard Worker   }
22*77c1e3ccSAndroid Build Coastguard Worker }
23*77c1e3ccSAndroid Build Coastguard Worker 
24*77c1e3ccSAndroid Build Coastguard Worker // The 1d transform is real to complex and packs the complex results in
25*77c1e3ccSAndroid Build Coastguard Worker // a way to take advantage of conjugate symmetry (e.g., the n/2 + 1 real
26*77c1e3ccSAndroid Build Coastguard Worker // components, followed by the n/2 - 1 imaginary components). After the
27*77c1e3ccSAndroid Build Coastguard Worker // transform is done on the rows, the first n/2 + 1 columns are real, and
28*77c1e3ccSAndroid Build Coastguard Worker // the remaining are the imaginary components. After the transform on the
29*77c1e3ccSAndroid Build Coastguard Worker // columns, the region of [0, n/2]x[0, n/2] contains the real part of
30*77c1e3ccSAndroid Build Coastguard Worker // fft of the real columns. The real part of the 2d fft also includes the
31*77c1e3ccSAndroid Build Coastguard Worker // imaginary part of transformed imaginary columns. This function assembles
32*77c1e3ccSAndroid Build Coastguard Worker // the correct outputs while putting the real and imaginary components
33*77c1e3ccSAndroid Build Coastguard Worker // next to each other.
unpack_2d_output(const float * col_fft,float * output,int n)34*77c1e3ccSAndroid Build Coastguard Worker static inline void unpack_2d_output(const float *col_fft, float *output,
35*77c1e3ccSAndroid Build Coastguard Worker                                     int n) {
36*77c1e3ccSAndroid Build Coastguard Worker   for (int y = 0; y <= n / 2; ++y) {
37*77c1e3ccSAndroid Build Coastguard Worker     const int y2 = y + n / 2;
38*77c1e3ccSAndroid Build Coastguard Worker     const int y_extra = y2 > n / 2 && y2 < n;
39*77c1e3ccSAndroid Build Coastguard Worker 
40*77c1e3ccSAndroid Build Coastguard Worker     for (int x = 0; x <= n / 2; ++x) {
41*77c1e3ccSAndroid Build Coastguard Worker       const int x2 = x + n / 2;
42*77c1e3ccSAndroid Build Coastguard Worker       const int x_extra = x2 > n / 2 && x2 < n;
43*77c1e3ccSAndroid Build Coastguard Worker       output[2 * (y * n + x)] =
44*77c1e3ccSAndroid Build Coastguard Worker           col_fft[y * n + x] - (x_extra && y_extra ? col_fft[y2 * n + x2] : 0);
45*77c1e3ccSAndroid Build Coastguard Worker       output[2 * (y * n + x) + 1] = (y_extra ? col_fft[y2 * n + x] : 0) +
46*77c1e3ccSAndroid Build Coastguard Worker                                     (x_extra ? col_fft[y * n + x2] : 0);
47*77c1e3ccSAndroid Build Coastguard Worker       if (y_extra) {
48*77c1e3ccSAndroid Build Coastguard Worker         output[2 * ((n - y) * n + x)] =
49*77c1e3ccSAndroid Build Coastguard Worker             col_fft[y * n + x] +
50*77c1e3ccSAndroid Build Coastguard Worker             (x_extra && y_extra ? col_fft[y2 * n + x2] : 0);
51*77c1e3ccSAndroid Build Coastguard Worker         output[2 * ((n - y) * n + x) + 1] =
52*77c1e3ccSAndroid Build Coastguard Worker             -(y_extra ? col_fft[y2 * n + x] : 0) +
53*77c1e3ccSAndroid Build Coastguard Worker             (x_extra ? col_fft[y * n + x2] : 0);
54*77c1e3ccSAndroid Build Coastguard Worker       }
55*77c1e3ccSAndroid Build Coastguard Worker     }
56*77c1e3ccSAndroid Build Coastguard Worker   }
57*77c1e3ccSAndroid Build Coastguard Worker }
58*77c1e3ccSAndroid Build Coastguard Worker 
aom_fft_2d_gen(const float * input,float * temp,float * output,int n,aom_fft_1d_func_t tform,aom_fft_transpose_func_t transpose,aom_fft_unpack_func_t unpack,int vec_size)59*77c1e3ccSAndroid Build Coastguard Worker void aom_fft_2d_gen(const float *input, float *temp, float *output, int n,
60*77c1e3ccSAndroid Build Coastguard Worker                     aom_fft_1d_func_t tform, aom_fft_transpose_func_t transpose,
61*77c1e3ccSAndroid Build Coastguard Worker                     aom_fft_unpack_func_t unpack, int vec_size) {
62*77c1e3ccSAndroid Build Coastguard Worker   for (int x = 0; x < n; x += vec_size) {
63*77c1e3ccSAndroid Build Coastguard Worker     tform(input + x, output + x, n);
64*77c1e3ccSAndroid Build Coastguard Worker   }
65*77c1e3ccSAndroid Build Coastguard Worker   transpose(output, temp, n);
66*77c1e3ccSAndroid Build Coastguard Worker 
67*77c1e3ccSAndroid Build Coastguard Worker   for (int x = 0; x < n; x += vec_size) {
68*77c1e3ccSAndroid Build Coastguard Worker     tform(temp + x, output + x, n);
69*77c1e3ccSAndroid Build Coastguard Worker   }
70*77c1e3ccSAndroid Build Coastguard Worker   transpose(output, temp, n);
71*77c1e3ccSAndroid Build Coastguard Worker 
72*77c1e3ccSAndroid Build Coastguard Worker   unpack(temp, output, n);
73*77c1e3ccSAndroid Build Coastguard Worker }
74*77c1e3ccSAndroid Build Coastguard Worker 
store_float(float * output,float input)75*77c1e3ccSAndroid Build Coastguard Worker static inline void store_float(float *output, float input) { *output = input; }
add_float(float a,float b)76*77c1e3ccSAndroid Build Coastguard Worker static inline float add_float(float a, float b) { return a + b; }
sub_float(float a,float b)77*77c1e3ccSAndroid Build Coastguard Worker static inline float sub_float(float a, float b) { return a - b; }
mul_float(float a,float b)78*77c1e3ccSAndroid Build Coastguard Worker static inline float mul_float(float a, float b) { return a * b; }
79*77c1e3ccSAndroid Build Coastguard Worker 
80*77c1e3ccSAndroid Build Coastguard Worker GEN_FFT_2(void, float, float, float, *, store_float)
81*77c1e3ccSAndroid Build Coastguard Worker GEN_FFT_4(void, float, float, float, *, store_float, (float), add_float,
82*77c1e3ccSAndroid Build Coastguard Worker           sub_float)
83*77c1e3ccSAndroid Build Coastguard Worker GEN_FFT_8(void, float, float, float, *, store_float, (float), add_float,
84*77c1e3ccSAndroid Build Coastguard Worker           sub_float, mul_float)
85*77c1e3ccSAndroid Build Coastguard Worker GEN_FFT_16(void, float, float, float, *, store_float, (float), add_float,
86*77c1e3ccSAndroid Build Coastguard Worker            sub_float, mul_float)
87*77c1e3ccSAndroid Build Coastguard Worker GEN_FFT_32(void, float, float, float, *, store_float, (float), add_float,
88*77c1e3ccSAndroid Build Coastguard Worker            sub_float, mul_float)
89*77c1e3ccSAndroid Build Coastguard Worker 
aom_fft2x2_float_c(const float * input,float * temp,float * output)90*77c1e3ccSAndroid Build Coastguard Worker void aom_fft2x2_float_c(const float *input, float *temp, float *output) {
91*77c1e3ccSAndroid Build Coastguard Worker   aom_fft_2d_gen(input, temp, output, 2, aom_fft1d_2_float, simple_transpose,
92*77c1e3ccSAndroid Build Coastguard Worker                  unpack_2d_output, 1);
93*77c1e3ccSAndroid Build Coastguard Worker }
94*77c1e3ccSAndroid Build Coastguard Worker 
aom_fft4x4_float_c(const float * input,float * temp,float * output)95*77c1e3ccSAndroid Build Coastguard Worker void aom_fft4x4_float_c(const float *input, float *temp, float *output) {
96*77c1e3ccSAndroid Build Coastguard Worker   aom_fft_2d_gen(input, temp, output, 4, aom_fft1d_4_float, simple_transpose,
97*77c1e3ccSAndroid Build Coastguard Worker                  unpack_2d_output, 1);
98*77c1e3ccSAndroid Build Coastguard Worker }
99*77c1e3ccSAndroid Build Coastguard Worker 
aom_fft8x8_float_c(const float * input,float * temp,float * output)100*77c1e3ccSAndroid Build Coastguard Worker void aom_fft8x8_float_c(const float *input, float *temp, float *output) {
101*77c1e3ccSAndroid Build Coastguard Worker   aom_fft_2d_gen(input, temp, output, 8, aom_fft1d_8_float, simple_transpose,
102*77c1e3ccSAndroid Build Coastguard Worker                  unpack_2d_output, 1);
103*77c1e3ccSAndroid Build Coastguard Worker }
104*77c1e3ccSAndroid Build Coastguard Worker 
aom_fft16x16_float_c(const float * input,float * temp,float * output)105*77c1e3ccSAndroid Build Coastguard Worker void aom_fft16x16_float_c(const float *input, float *temp, float *output) {
106*77c1e3ccSAndroid Build Coastguard Worker   aom_fft_2d_gen(input, temp, output, 16, aom_fft1d_16_float, simple_transpose,
107*77c1e3ccSAndroid Build Coastguard Worker                  unpack_2d_output, 1);
108*77c1e3ccSAndroid Build Coastguard Worker }
109*77c1e3ccSAndroid Build Coastguard Worker 
aom_fft32x32_float_c(const float * input,float * temp,float * output)110*77c1e3ccSAndroid Build Coastguard Worker void aom_fft32x32_float_c(const float *input, float *temp, float *output) {
111*77c1e3ccSAndroid Build Coastguard Worker   aom_fft_2d_gen(input, temp, output, 32, aom_fft1d_32_float, simple_transpose,
112*77c1e3ccSAndroid Build Coastguard Worker                  unpack_2d_output, 1);
113*77c1e3ccSAndroid Build Coastguard Worker }
114*77c1e3ccSAndroid Build Coastguard Worker 
aom_ifft_2d_gen(const float * input,float * temp,float * output,int n,aom_fft_1d_func_t fft_single,aom_fft_1d_func_t fft_multi,aom_fft_1d_func_t ifft_multi,aom_fft_transpose_func_t transpose,int vec_size)115*77c1e3ccSAndroid Build Coastguard Worker void aom_ifft_2d_gen(const float *input, float *temp, float *output, int n,
116*77c1e3ccSAndroid Build Coastguard Worker                      aom_fft_1d_func_t fft_single, aom_fft_1d_func_t fft_multi,
117*77c1e3ccSAndroid Build Coastguard Worker                      aom_fft_1d_func_t ifft_multi,
118*77c1e3ccSAndroid Build Coastguard Worker                      aom_fft_transpose_func_t transpose, int vec_size) {
119*77c1e3ccSAndroid Build Coastguard Worker   // Column 0 and n/2 have conjugate symmetry, so we can directly do the ifft
120*77c1e3ccSAndroid Build Coastguard Worker   // and get real outputs.
121*77c1e3ccSAndroid Build Coastguard Worker   for (int y = 0; y <= n / 2; ++y) {
122*77c1e3ccSAndroid Build Coastguard Worker     output[y * n] = input[2 * y * n];
123*77c1e3ccSAndroid Build Coastguard Worker     output[y * n + 1] = input[2 * (y * n + n / 2)];
124*77c1e3ccSAndroid Build Coastguard Worker   }
125*77c1e3ccSAndroid Build Coastguard Worker   for (int y = n / 2 + 1; y < n; ++y) {
126*77c1e3ccSAndroid Build Coastguard Worker     output[y * n] = input[2 * (y - n / 2) * n + 1];
127*77c1e3ccSAndroid Build Coastguard Worker     output[y * n + 1] = input[2 * ((y - n / 2) * n + n / 2) + 1];
128*77c1e3ccSAndroid Build Coastguard Worker   }
129*77c1e3ccSAndroid Build Coastguard Worker 
130*77c1e3ccSAndroid Build Coastguard Worker   for (int i = 0; i < 2; i += vec_size) {
131*77c1e3ccSAndroid Build Coastguard Worker     ifft_multi(output + i, temp + i, n);
132*77c1e3ccSAndroid Build Coastguard Worker   }
133*77c1e3ccSAndroid Build Coastguard Worker 
134*77c1e3ccSAndroid Build Coastguard Worker   // For the other columns, since we don't have a full ifft for complex inputs
135*77c1e3ccSAndroid Build Coastguard Worker   // we have to split them into the real and imaginary counterparts.
136*77c1e3ccSAndroid Build Coastguard Worker   // Pack the real component, then the imaginary components.
137*77c1e3ccSAndroid Build Coastguard Worker   for (int y = 0; y < n; ++y) {
138*77c1e3ccSAndroid Build Coastguard Worker     for (int x = 1; x < n / 2; ++x) {
139*77c1e3ccSAndroid Build Coastguard Worker       output[y * n + (x + 1)] = input[2 * (y * n + x)];
140*77c1e3ccSAndroid Build Coastguard Worker     }
141*77c1e3ccSAndroid Build Coastguard Worker     for (int x = 1; x < n / 2; ++x) {
142*77c1e3ccSAndroid Build Coastguard Worker       output[y * n + (x + n / 2)] = input[2 * (y * n + x) + 1];
143*77c1e3ccSAndroid Build Coastguard Worker     }
144*77c1e3ccSAndroid Build Coastguard Worker   }
145*77c1e3ccSAndroid Build Coastguard Worker   for (int y = 2; y < vec_size; y++) {
146*77c1e3ccSAndroid Build Coastguard Worker     fft_single(output + y, temp + y, n);
147*77c1e3ccSAndroid Build Coastguard Worker   }
148*77c1e3ccSAndroid Build Coastguard Worker   // This is the part that can be sped up with SIMD
149*77c1e3ccSAndroid Build Coastguard Worker   for (int y = AOMMAX(2, vec_size); y < n; y += vec_size) {
150*77c1e3ccSAndroid Build Coastguard Worker     fft_multi(output + y, temp + y, n);
151*77c1e3ccSAndroid Build Coastguard Worker   }
152*77c1e3ccSAndroid Build Coastguard Worker 
153*77c1e3ccSAndroid Build Coastguard Worker   // Put the 0 and n/2 th results in the correct place.
154*77c1e3ccSAndroid Build Coastguard Worker   for (int x = 0; x < n; ++x) {
155*77c1e3ccSAndroid Build Coastguard Worker     output[x] = temp[x * n];
156*77c1e3ccSAndroid Build Coastguard Worker     output[(n / 2) * n + x] = temp[x * n + 1];
157*77c1e3ccSAndroid Build Coastguard Worker   }
158*77c1e3ccSAndroid Build Coastguard Worker   // This rearranges and transposes.
159*77c1e3ccSAndroid Build Coastguard Worker   for (int y = 1; y < n / 2; ++y) {
160*77c1e3ccSAndroid Build Coastguard Worker     // Fill in the real columns
161*77c1e3ccSAndroid Build Coastguard Worker     for (int x = 0; x <= n / 2; ++x) {
162*77c1e3ccSAndroid Build Coastguard Worker       output[x + y * n] =
163*77c1e3ccSAndroid Build Coastguard Worker           temp[(y + 1) + x * n] +
164*77c1e3ccSAndroid Build Coastguard Worker           ((x > 0 && x < n / 2) ? temp[(y + n / 2) + (x + n / 2) * n] : 0);
165*77c1e3ccSAndroid Build Coastguard Worker     }
166*77c1e3ccSAndroid Build Coastguard Worker     for (int x = n / 2 + 1; x < n; ++x) {
167*77c1e3ccSAndroid Build Coastguard Worker       output[x + y * n] = temp[(y + 1) + (n - x) * n] -
168*77c1e3ccSAndroid Build Coastguard Worker                           temp[(y + n / 2) + ((n - x) + n / 2) * n];
169*77c1e3ccSAndroid Build Coastguard Worker     }
170*77c1e3ccSAndroid Build Coastguard Worker     // Fill in the imag columns
171*77c1e3ccSAndroid Build Coastguard Worker     for (int x = 0; x <= n / 2; ++x) {
172*77c1e3ccSAndroid Build Coastguard Worker       output[x + (y + n / 2) * n] =
173*77c1e3ccSAndroid Build Coastguard Worker           temp[(y + n / 2) + x * n] -
174*77c1e3ccSAndroid Build Coastguard Worker           ((x > 0 && x < n / 2) ? temp[(y + 1) + (x + n / 2) * n] : 0);
175*77c1e3ccSAndroid Build Coastguard Worker     }
176*77c1e3ccSAndroid Build Coastguard Worker     for (int x = n / 2 + 1; x < n; ++x) {
177*77c1e3ccSAndroid Build Coastguard Worker       output[x + (y + n / 2) * n] = temp[(y + 1) + ((n - x) + n / 2) * n] +
178*77c1e3ccSAndroid Build Coastguard Worker                                     temp[(y + n / 2) + (n - x) * n];
179*77c1e3ccSAndroid Build Coastguard Worker     }
180*77c1e3ccSAndroid Build Coastguard Worker   }
181*77c1e3ccSAndroid Build Coastguard Worker   for (int y = 0; y < n; y += vec_size) {
182*77c1e3ccSAndroid Build Coastguard Worker     ifft_multi(output + y, temp + y, n);
183*77c1e3ccSAndroid Build Coastguard Worker   }
184*77c1e3ccSAndroid Build Coastguard Worker   transpose(temp, output, n);
185*77c1e3ccSAndroid Build Coastguard Worker }
186*77c1e3ccSAndroid Build Coastguard Worker 
187*77c1e3ccSAndroid Build Coastguard Worker GEN_IFFT_2(static void, float, float, float, *, store_float)
188*77c1e3ccSAndroid Build Coastguard Worker GEN_IFFT_4(static void, float, float, float, *, store_float, (float), add_float,
189*77c1e3ccSAndroid Build Coastguard Worker            sub_float)
190*77c1e3ccSAndroid Build Coastguard Worker GEN_IFFT_8(static void, float, float, float, *, store_float, (float), add_float,
191*77c1e3ccSAndroid Build Coastguard Worker            sub_float, mul_float)
192*77c1e3ccSAndroid Build Coastguard Worker GEN_IFFT_16(static void, float, float, float, *, store_float, (float),
193*77c1e3ccSAndroid Build Coastguard Worker             add_float, sub_float, mul_float)
194*77c1e3ccSAndroid Build Coastguard Worker GEN_IFFT_32(static void, float, float, float, *, store_float, (float),
195*77c1e3ccSAndroid Build Coastguard Worker             add_float, sub_float, mul_float)
196*77c1e3ccSAndroid Build Coastguard Worker 
aom_ifft2x2_float_c(const float * input,float * temp,float * output)197*77c1e3ccSAndroid Build Coastguard Worker void aom_ifft2x2_float_c(const float *input, float *temp, float *output) {
198*77c1e3ccSAndroid Build Coastguard Worker   aom_ifft_2d_gen(input, temp, output, 2, aom_fft1d_2_float, aom_fft1d_2_float,
199*77c1e3ccSAndroid Build Coastguard Worker                   aom_ifft1d_2_float, simple_transpose, 1);
200*77c1e3ccSAndroid Build Coastguard Worker }
201*77c1e3ccSAndroid Build Coastguard Worker 
aom_ifft4x4_float_c(const float * input,float * temp,float * output)202*77c1e3ccSAndroid Build Coastguard Worker void aom_ifft4x4_float_c(const float *input, float *temp, float *output) {
203*77c1e3ccSAndroid Build Coastguard Worker   aom_ifft_2d_gen(input, temp, output, 4, aom_fft1d_4_float, aom_fft1d_4_float,
204*77c1e3ccSAndroid Build Coastguard Worker                   aom_ifft1d_4_float, simple_transpose, 1);
205*77c1e3ccSAndroid Build Coastguard Worker }
206*77c1e3ccSAndroid Build Coastguard Worker 
aom_ifft8x8_float_c(const float * input,float * temp,float * output)207*77c1e3ccSAndroid Build Coastguard Worker void aom_ifft8x8_float_c(const float *input, float *temp, float *output) {
208*77c1e3ccSAndroid Build Coastguard Worker   aom_ifft_2d_gen(input, temp, output, 8, aom_fft1d_8_float, aom_fft1d_8_float,
209*77c1e3ccSAndroid Build Coastguard Worker                   aom_ifft1d_8_float, simple_transpose, 1);
210*77c1e3ccSAndroid Build Coastguard Worker }
211*77c1e3ccSAndroid Build Coastguard Worker 
aom_ifft16x16_float_c(const float * input,float * temp,float * output)212*77c1e3ccSAndroid Build Coastguard Worker void aom_ifft16x16_float_c(const float *input, float *temp, float *output) {
213*77c1e3ccSAndroid Build Coastguard Worker   aom_ifft_2d_gen(input, temp, output, 16, aom_fft1d_16_float,
214*77c1e3ccSAndroid Build Coastguard Worker                   aom_fft1d_16_float, aom_ifft1d_16_float, simple_transpose, 1);
215*77c1e3ccSAndroid Build Coastguard Worker }
216*77c1e3ccSAndroid Build Coastguard Worker 
aom_ifft32x32_float_c(const float * input,float * temp,float * output)217*77c1e3ccSAndroid Build Coastguard Worker void aom_ifft32x32_float_c(const float *input, float *temp, float *output) {
218*77c1e3ccSAndroid Build Coastguard Worker   aom_ifft_2d_gen(input, temp, output, 32, aom_fft1d_32_float,
219*77c1e3ccSAndroid Build Coastguard Worker                   aom_fft1d_32_float, aom_ifft1d_32_float, simple_transpose, 1);
220*77c1e3ccSAndroid Build Coastguard Worker }
221