xref: /aosp_15_r20/external/libaom/aom_dsp/fft_common.h (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1*77c1e3ccSAndroid Build Coastguard Worker /*
2*77c1e3ccSAndroid Build Coastguard Worker  * Copyright (c) 2018, Alliance for Open Media. All rights reserved.
3*77c1e3ccSAndroid Build Coastguard Worker  *
4*77c1e3ccSAndroid Build Coastguard Worker  * This source code is subject to the terms of the BSD 2 Clause License and
5*77c1e3ccSAndroid Build Coastguard Worker  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6*77c1e3ccSAndroid Build Coastguard Worker  * was not distributed with this source code in the LICENSE file, you can
7*77c1e3ccSAndroid Build Coastguard Worker  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8*77c1e3ccSAndroid Build Coastguard Worker  * Media Patent License 1.0 was not distributed with this source code in the
9*77c1e3ccSAndroid Build Coastguard Worker  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10*77c1e3ccSAndroid Build Coastguard Worker  */
11*77c1e3ccSAndroid Build Coastguard Worker 
12*77c1e3ccSAndroid Build Coastguard Worker #ifndef AOM_AOM_DSP_FFT_COMMON_H_
13*77c1e3ccSAndroid Build Coastguard Worker #define AOM_AOM_DSP_FFT_COMMON_H_
14*77c1e3ccSAndroid Build Coastguard Worker 
15*77c1e3ccSAndroid Build Coastguard Worker #ifdef __cplusplus
16*77c1e3ccSAndroid Build Coastguard Worker extern "C" {
17*77c1e3ccSAndroid Build Coastguard Worker #endif
18*77c1e3ccSAndroid Build Coastguard Worker 
19*77c1e3ccSAndroid Build Coastguard Worker /*!\brief A function pointer for computing 1d fft and ifft.
20*77c1e3ccSAndroid Build Coastguard Worker  *
21*77c1e3ccSAndroid Build Coastguard Worker  * The function will point to an implementation for a specific transform size,
22*77c1e3ccSAndroid Build Coastguard Worker  * and may perform the transforms using vectorized instructions.
23*77c1e3ccSAndroid Build Coastguard Worker  *
24*77c1e3ccSAndroid Build Coastguard Worker  * For a non-vectorized forward transforms of size n, the input and output
25*77c1e3ccSAndroid Build Coastguard Worker  * buffers will be size n. The output takes advantage of conjugate symmetry and
26*77c1e3ccSAndroid Build Coastguard Worker  * packs the results as: [r_0, r_1, ..., r_{n/2}, i_1, ..., i_{n/2-1}], where
27*77c1e3ccSAndroid Build Coastguard Worker  * (r_{j}, i_{j}) is the complex output for index j.
28*77c1e3ccSAndroid Build Coastguard Worker  *
29*77c1e3ccSAndroid Build Coastguard Worker  * An inverse transform will assume that the complex "input" is packed
30*77c1e3ccSAndroid Build Coastguard Worker  * similarly. Its output will be real.
31*77c1e3ccSAndroid Build Coastguard Worker  *
32*77c1e3ccSAndroid Build Coastguard Worker  * Non-vectorized transforms (e.g., on a single row) would use a stride = 1.
33*77c1e3ccSAndroid Build Coastguard Worker  *
34*77c1e3ccSAndroid Build Coastguard Worker  * Vectorized implementations are parallelized along the columns so that the fft
35*77c1e3ccSAndroid Build Coastguard Worker  * can be performed on multiple columns at a time. In such cases the data block
36*77c1e3ccSAndroid Build Coastguard Worker  * for input and output is typically square (n x n) and the stride will
37*77c1e3ccSAndroid Build Coastguard Worker  * correspond to the spacing between rows. At minimum, the input size must be
38*77c1e3ccSAndroid Build Coastguard Worker  * n x simd_vector_length.
39*77c1e3ccSAndroid Build Coastguard Worker  *
40*77c1e3ccSAndroid Build Coastguard Worker  * \param[in]  input   Input buffer. See above for size restrictions.
41*77c1e3ccSAndroid Build Coastguard Worker  * \param[out] output  Output buffer. See above for size restrictions.
42*77c1e3ccSAndroid Build Coastguard Worker  * \param[in]  stride  The spacing in number of elements between rows
43*77c1e3ccSAndroid Build Coastguard Worker  *                     (or elements)
44*77c1e3ccSAndroid Build Coastguard Worker  */
45*77c1e3ccSAndroid Build Coastguard Worker typedef void (*aom_fft_1d_func_t)(const float *input, float *output,
46*77c1e3ccSAndroid Build Coastguard Worker                                   int stride);
47*77c1e3ccSAndroid Build Coastguard Worker 
48*77c1e3ccSAndroid Build Coastguard Worker // Declare some of the forward non-vectorized transforms which are used in some
49*77c1e3ccSAndroid Build Coastguard Worker // of the vectorized implementations
50*77c1e3ccSAndroid Build Coastguard Worker void aom_fft1d_2_float(const float *input, float *output, int stride);
51*77c1e3ccSAndroid Build Coastguard Worker void aom_fft1d_4_float(const float *input, float *output, int stride);
52*77c1e3ccSAndroid Build Coastguard Worker void aom_fft1d_8_float(const float *input, float *output, int stride);
53*77c1e3ccSAndroid Build Coastguard Worker void aom_fft1d_16_float(const float *input, float *output, int stride);
54*77c1e3ccSAndroid Build Coastguard Worker void aom_fft1d_32_float(const float *input, float *output, int stride);
55*77c1e3ccSAndroid Build Coastguard Worker 
56*77c1e3ccSAndroid Build Coastguard Worker /**\!brief Function pointer for transposing a matrix of floats.
57*77c1e3ccSAndroid Build Coastguard Worker  *
58*77c1e3ccSAndroid Build Coastguard Worker  * \param[in]  input  Input buffer (size n x n)
59*77c1e3ccSAndroid Build Coastguard Worker  * \param[out] output Output buffer (size n x n)
60*77c1e3ccSAndroid Build Coastguard Worker  * \param[in]  n      Extent of one dimension of the square matrix.
61*77c1e3ccSAndroid Build Coastguard Worker  */
62*77c1e3ccSAndroid Build Coastguard Worker typedef void (*aom_fft_transpose_func_t)(const float *input, float *output,
63*77c1e3ccSAndroid Build Coastguard Worker                                          int n);
64*77c1e3ccSAndroid Build Coastguard Worker 
65*77c1e3ccSAndroid Build Coastguard Worker /**\!brief Function pointer for re-arranging intermediate 2d transform results.
66*77c1e3ccSAndroid Build Coastguard Worker  *
67*77c1e3ccSAndroid Build Coastguard Worker  * After re-arrangement, the real and imaginary components will be packed
68*77c1e3ccSAndroid Build Coastguard Worker  * tightly next to each other.
69*77c1e3ccSAndroid Build Coastguard Worker  *
70*77c1e3ccSAndroid Build Coastguard Worker  * \param[in]  input  Input buffer (size n x n)
71*77c1e3ccSAndroid Build Coastguard Worker  * \param[out] output Output buffer (size 2 x n x n)
72*77c1e3ccSAndroid Build Coastguard Worker  * \param[in]  n      Extent of one dimension of the square matrix.
73*77c1e3ccSAndroid Build Coastguard Worker  */
74*77c1e3ccSAndroid Build Coastguard Worker typedef void (*aom_fft_unpack_func_t)(const float *input, float *output, int n);
75*77c1e3ccSAndroid Build Coastguard Worker 
76*77c1e3ccSAndroid Build Coastguard Worker /*!\brief Performs a 2d fft with the given functions.
77*77c1e3ccSAndroid Build Coastguard Worker  *
78*77c1e3ccSAndroid Build Coastguard Worker  * This generator function allows for multiple different implementations of 2d
79*77c1e3ccSAndroid Build Coastguard Worker  * fft with different vector operations, without having to redefine the main
80*77c1e3ccSAndroid Build Coastguard Worker  * body multiple times.
81*77c1e3ccSAndroid Build Coastguard Worker  *
82*77c1e3ccSAndroid Build Coastguard Worker  * \param[in]  input     Input buffer to run the transform on (size n x n)
83*77c1e3ccSAndroid Build Coastguard Worker  * \param[out] temp      Working buffer for computing the transform (size n x n)
84*77c1e3ccSAndroid Build Coastguard Worker  * \param[out] output    Output buffer (size 2 x n x n)
85*77c1e3ccSAndroid Build Coastguard Worker  * \param[in]  tform     Forward transform function
86*77c1e3ccSAndroid Build Coastguard Worker  * \param[in]  transpose Transpose function (for n x n matrix)
87*77c1e3ccSAndroid Build Coastguard Worker  * \param[in]  unpack    Unpack function used to massage outputs to correct form
88*77c1e3ccSAndroid Build Coastguard Worker  * \param[in]  vec_size  Vector size (the transform is done vec_size units at
89*77c1e3ccSAndroid Build Coastguard Worker  *                       a time)
90*77c1e3ccSAndroid Build Coastguard Worker  */
91*77c1e3ccSAndroid Build Coastguard Worker void aom_fft_2d_gen(const float *input, float *temp, float *output, int n,
92*77c1e3ccSAndroid Build Coastguard Worker                     aom_fft_1d_func_t tform, aom_fft_transpose_func_t transpose,
93*77c1e3ccSAndroid Build Coastguard Worker                     aom_fft_unpack_func_t unpack, int vec_size);
94*77c1e3ccSAndroid Build Coastguard Worker 
95*77c1e3ccSAndroid Build Coastguard Worker /*!\brief Perform a 2d inverse fft with the given helper functions
96*77c1e3ccSAndroid Build Coastguard Worker  *
97*77c1e3ccSAndroid Build Coastguard Worker  * \param[in]  input      Input buffer to run the transform on (size 2 x n x n)
98*77c1e3ccSAndroid Build Coastguard Worker  * \param[out] temp       Working buffer for computations (size 2 x n x n)
99*77c1e3ccSAndroid Build Coastguard Worker  * \param[out] output     Output buffer (size n x n)
100*77c1e3ccSAndroid Build Coastguard Worker  * \param[in]  fft_single Forward transform function (non vectorized)
101*77c1e3ccSAndroid Build Coastguard Worker  * \param[in]  fft_multi  Forward transform function (vectorized)
102*77c1e3ccSAndroid Build Coastguard Worker  * \param[in]  ifft_multi Inverse transform function (vectorized)
103*77c1e3ccSAndroid Build Coastguard Worker  * \param[in]  transpose  Transpose function (for n x n matrix)
104*77c1e3ccSAndroid Build Coastguard Worker  * \param[in]  vec_size   Vector size (the transform is done vec_size
105*77c1e3ccSAndroid Build Coastguard Worker  *                        units at a time)
106*77c1e3ccSAndroid Build Coastguard Worker  */
107*77c1e3ccSAndroid Build Coastguard Worker void aom_ifft_2d_gen(const float *input, float *temp, float *output, int n,
108*77c1e3ccSAndroid Build Coastguard Worker                      aom_fft_1d_func_t fft_single, aom_fft_1d_func_t fft_multi,
109*77c1e3ccSAndroid Build Coastguard Worker                      aom_fft_1d_func_t ifft_multi,
110*77c1e3ccSAndroid Build Coastguard Worker                      aom_fft_transpose_func_t transpose, int vec_size);
111*77c1e3ccSAndroid Build Coastguard Worker #ifdef __cplusplus
112*77c1e3ccSAndroid Build Coastguard Worker }
113*77c1e3ccSAndroid Build Coastguard Worker #endif
114*77c1e3ccSAndroid Build Coastguard Worker 
115*77c1e3ccSAndroid Build Coastguard Worker // The macros below define 1D fft/ifft for different data types and for
116*77c1e3ccSAndroid Build Coastguard Worker // different simd vector intrinsic types.
117*77c1e3ccSAndroid Build Coastguard Worker 
118*77c1e3ccSAndroid Build Coastguard Worker #define GEN_FFT_2(ret, suffix, T, T_VEC, load, store)               \
119*77c1e3ccSAndroid Build Coastguard Worker   ret aom_fft1d_2_##suffix(const T *input, T *output, int stride) { \
120*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i0 = load(input + 0 * stride);                      \
121*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i1 = load(input + 1 * stride);                      \
122*77c1e3ccSAndroid Build Coastguard Worker     store(output + 0 * stride, i0 + i1);                            \
123*77c1e3ccSAndroid Build Coastguard Worker     store(output + 1 * stride, i0 - i1);                            \
124*77c1e3ccSAndroid Build Coastguard Worker   }
125*77c1e3ccSAndroid Build Coastguard Worker 
126*77c1e3ccSAndroid Build Coastguard Worker #define GEN_FFT_4(ret, suffix, T, T_VEC, load, store, constant, add, sub) \
127*77c1e3ccSAndroid Build Coastguard Worker   ret aom_fft1d_4_##suffix(const T *input, T *output, int stride) {       \
128*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight0 = constant(0.0f);                                \
129*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i0 = load(input + 0 * stride);                            \
130*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i1 = load(input + 1 * stride);                            \
131*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i2 = load(input + 2 * stride);                            \
132*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i3 = load(input + 3 * stride);                            \
133*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w0 = add(i0, i2);                                         \
134*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w1 = sub(i0, i2);                                         \
135*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w2 = add(i1, i3);                                         \
136*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w3 = sub(i1, i3);                                         \
137*77c1e3ccSAndroid Build Coastguard Worker     store(output + 0 * stride, add(w0, w2));                              \
138*77c1e3ccSAndroid Build Coastguard Worker     store(output + 1 * stride, w1);                                       \
139*77c1e3ccSAndroid Build Coastguard Worker     store(output + 2 * stride, sub(w0, w2));                              \
140*77c1e3ccSAndroid Build Coastguard Worker     store(output + 3 * stride, sub(kWeight0, w3));                        \
141*77c1e3ccSAndroid Build Coastguard Worker   }
142*77c1e3ccSAndroid Build Coastguard Worker 
143*77c1e3ccSAndroid Build Coastguard Worker #define GEN_FFT_8(ret, suffix, T, T_VEC, load, store, constant, add, sub, mul) \
144*77c1e3ccSAndroid Build Coastguard Worker   ret aom_fft1d_8_##suffix(const T *input, T *output, int stride) {            \
145*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight0 = constant(0.0f);                                     \
146*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight2 = constant(0.707107f);                                \
147*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i0 = load(input + 0 * stride);                                 \
148*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i1 = load(input + 1 * stride);                                 \
149*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i2 = load(input + 2 * stride);                                 \
150*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i3 = load(input + 3 * stride);                                 \
151*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i4 = load(input + 4 * stride);                                 \
152*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i5 = load(input + 5 * stride);                                 \
153*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i6 = load(input + 6 * stride);                                 \
154*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i7 = load(input + 7 * stride);                                 \
155*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w0 = add(i0, i4);                                              \
156*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w1 = sub(i0, i4);                                              \
157*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w2 = add(i2, i6);                                              \
158*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w3 = sub(i2, i6);                                              \
159*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w4 = add(w0, w2);                                              \
160*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w5 = sub(w0, w2);                                              \
161*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w7 = add(i1, i5);                                              \
162*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w8 = sub(i1, i5);                                              \
163*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w9 = add(i3, i7);                                              \
164*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w10 = sub(i3, i7);                                             \
165*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w11 = add(w7, w9);                                             \
166*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w12 = sub(w7, w9);                                             \
167*77c1e3ccSAndroid Build Coastguard Worker     store(output + 0 * stride, add(w4, w11));                                  \
168*77c1e3ccSAndroid Build Coastguard Worker     store(output + 1 * stride, add(w1, mul(kWeight2, sub(w8, w10))));          \
169*77c1e3ccSAndroid Build Coastguard Worker     store(output + 2 * stride, w5);                                            \
170*77c1e3ccSAndroid Build Coastguard Worker     store(output + 3 * stride, sub(w1, mul(kWeight2, sub(w8, w10))));          \
171*77c1e3ccSAndroid Build Coastguard Worker     store(output + 4 * stride, sub(w4, w11));                                  \
172*77c1e3ccSAndroid Build Coastguard Worker     store(output + 5 * stride,                                                 \
173*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, w3), mul(kWeight2, add(w10, w8))));                \
174*77c1e3ccSAndroid Build Coastguard Worker     store(output + 6 * stride, sub(kWeight0, w12));                            \
175*77c1e3ccSAndroid Build Coastguard Worker     store(output + 7 * stride, sub(w3, mul(kWeight2, add(w10, w8))));          \
176*77c1e3ccSAndroid Build Coastguard Worker   }
177*77c1e3ccSAndroid Build Coastguard Worker 
178*77c1e3ccSAndroid Build Coastguard Worker #define GEN_FFT_16(ret, suffix, T, T_VEC, load, store, constant, add, sub, \
179*77c1e3ccSAndroid Build Coastguard Worker                    mul)                                                    \
180*77c1e3ccSAndroid Build Coastguard Worker   ret aom_fft1d_16_##suffix(const T *input, T *output, int stride) {       \
181*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight0 = constant(0.0f);                                 \
182*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight2 = constant(0.707107f);                            \
183*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight3 = constant(0.92388f);                             \
184*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight4 = constant(0.382683f);                            \
185*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i0 = load(input + 0 * stride);                             \
186*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i1 = load(input + 1 * stride);                             \
187*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i2 = load(input + 2 * stride);                             \
188*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i3 = load(input + 3 * stride);                             \
189*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i4 = load(input + 4 * stride);                             \
190*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i5 = load(input + 5 * stride);                             \
191*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i6 = load(input + 6 * stride);                             \
192*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i7 = load(input + 7 * stride);                             \
193*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i8 = load(input + 8 * stride);                             \
194*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i9 = load(input + 9 * stride);                             \
195*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i10 = load(input + 10 * stride);                           \
196*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i11 = load(input + 11 * stride);                           \
197*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i12 = load(input + 12 * stride);                           \
198*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i13 = load(input + 13 * stride);                           \
199*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i14 = load(input + 14 * stride);                           \
200*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i15 = load(input + 15 * stride);                           \
201*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w0 = add(i0, i8);                                          \
202*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w1 = sub(i0, i8);                                          \
203*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w2 = add(i4, i12);                                         \
204*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w3 = sub(i4, i12);                                         \
205*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w4 = add(w0, w2);                                          \
206*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w5 = sub(w0, w2);                                          \
207*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w7 = add(i2, i10);                                         \
208*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w8 = sub(i2, i10);                                         \
209*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w9 = add(i6, i14);                                         \
210*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w10 = sub(i6, i14);                                        \
211*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w11 = add(w7, w9);                                         \
212*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w12 = sub(w7, w9);                                         \
213*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w14 = add(w4, w11);                                        \
214*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w15 = sub(w4, w11);                                        \
215*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w16[2] = { add(w1, mul(kWeight2, sub(w8, w10))),           \
216*77c1e3ccSAndroid Build Coastguard Worker                            sub(sub(kWeight0, w3),                          \
217*77c1e3ccSAndroid Build Coastguard Worker                                mul(kWeight2, add(w10, w8))) };             \
218*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w18[2] = { sub(w1, mul(kWeight2, sub(w8, w10))),           \
219*77c1e3ccSAndroid Build Coastguard Worker                            sub(w3, mul(kWeight2, add(w10, w8))) };         \
220*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w19 = add(i1, i9);                                         \
221*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w20 = sub(i1, i9);                                         \
222*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w21 = add(i5, i13);                                        \
223*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w22 = sub(i5, i13);                                        \
224*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w23 = add(w19, w21);                                       \
225*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w24 = sub(w19, w21);                                       \
226*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w26 = add(i3, i11);                                        \
227*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w27 = sub(i3, i11);                                        \
228*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w28 = add(i7, i15);                                        \
229*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w29 = sub(i7, i15);                                        \
230*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w30 = add(w26, w28);                                       \
231*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w31 = sub(w26, w28);                                       \
232*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w33 = add(w23, w30);                                       \
233*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w34 = sub(w23, w30);                                       \
234*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w35[2] = { add(w20, mul(kWeight2, sub(w27, w29))),         \
235*77c1e3ccSAndroid Build Coastguard Worker                            sub(sub(kWeight0, w22),                         \
236*77c1e3ccSAndroid Build Coastguard Worker                                mul(kWeight2, add(w29, w27))) };            \
237*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w37[2] = { sub(w20, mul(kWeight2, sub(w27, w29))),         \
238*77c1e3ccSAndroid Build Coastguard Worker                            sub(w22, mul(kWeight2, add(w29, w27))) };       \
239*77c1e3ccSAndroid Build Coastguard Worker     store(output + 0 * stride, add(w14, w33));                             \
240*77c1e3ccSAndroid Build Coastguard Worker     store(output + 1 * stride,                                             \
241*77c1e3ccSAndroid Build Coastguard Worker           add(w16[0], add(mul(kWeight3, w35[0]), mul(kWeight4, w35[1])))); \
242*77c1e3ccSAndroid Build Coastguard Worker     store(output + 2 * stride, add(w5, mul(kWeight2, sub(w24, w31))));     \
243*77c1e3ccSAndroid Build Coastguard Worker     store(output + 3 * stride,                                             \
244*77c1e3ccSAndroid Build Coastguard Worker           add(w18[0], add(mul(kWeight4, w37[0]), mul(kWeight3, w37[1])))); \
245*77c1e3ccSAndroid Build Coastguard Worker     store(output + 4 * stride, w15);                                       \
246*77c1e3ccSAndroid Build Coastguard Worker     store(output + 5 * stride,                                             \
247*77c1e3ccSAndroid Build Coastguard Worker           add(w18[0], sub(sub(kWeight0, mul(kWeight4, w37[0])),            \
248*77c1e3ccSAndroid Build Coastguard Worker                           mul(kWeight3, w37[1]))));                        \
249*77c1e3ccSAndroid Build Coastguard Worker     store(output + 6 * stride, sub(w5, mul(kWeight2, sub(w24, w31))));     \
250*77c1e3ccSAndroid Build Coastguard Worker     store(output + 7 * stride,                                             \
251*77c1e3ccSAndroid Build Coastguard Worker           add(w16[0], sub(sub(kWeight0, mul(kWeight3, w35[0])),            \
252*77c1e3ccSAndroid Build Coastguard Worker                           mul(kWeight4, w35[1]))));                        \
253*77c1e3ccSAndroid Build Coastguard Worker     store(output + 8 * stride, sub(w14, w33));                             \
254*77c1e3ccSAndroid Build Coastguard Worker     store(output + 9 * stride,                                             \
255*77c1e3ccSAndroid Build Coastguard Worker           add(w16[1], sub(mul(kWeight3, w35[1]), mul(kWeight4, w35[0])))); \
256*77c1e3ccSAndroid Build Coastguard Worker     store(output + 10 * stride,                                            \
257*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, w12), mul(kWeight2, add(w31, w24))));          \
258*77c1e3ccSAndroid Build Coastguard Worker     store(output + 11 * stride,                                            \
259*77c1e3ccSAndroid Build Coastguard Worker           add(w18[1], sub(mul(kWeight4, w37[1]), mul(kWeight3, w37[0])))); \
260*77c1e3ccSAndroid Build Coastguard Worker     store(output + 12 * stride, sub(kWeight0, w34));                       \
261*77c1e3ccSAndroid Build Coastguard Worker     store(output + 13 * stride,                                            \
262*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, w18[1]),                                       \
263*77c1e3ccSAndroid Build Coastguard Worker               sub(mul(kWeight3, w37[0]), mul(kWeight4, w37[1]))));         \
264*77c1e3ccSAndroid Build Coastguard Worker     store(output + 14 * stride, sub(w12, mul(kWeight2, add(w31, w24))));   \
265*77c1e3ccSAndroid Build Coastguard Worker     store(output + 15 * stride,                                            \
266*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, w16[1]),                                       \
267*77c1e3ccSAndroid Build Coastguard Worker               sub(mul(kWeight4, w35[0]), mul(kWeight3, w35[1]))));         \
268*77c1e3ccSAndroid Build Coastguard Worker   }
269*77c1e3ccSAndroid Build Coastguard Worker 
270*77c1e3ccSAndroid Build Coastguard Worker #define GEN_FFT_32(ret, suffix, T, T_VEC, load, store, constant, add, sub,   \
271*77c1e3ccSAndroid Build Coastguard Worker                    mul)                                                      \
272*77c1e3ccSAndroid Build Coastguard Worker   ret aom_fft1d_32_##suffix(const T *input, T *output, int stride) {         \
273*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight0 = constant(0.0f);                                   \
274*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight2 = constant(0.707107f);                              \
275*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight3 = constant(0.92388f);                               \
276*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight4 = constant(0.382683f);                              \
277*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight5 = constant(0.980785f);                              \
278*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight6 = constant(0.19509f);                               \
279*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight7 = constant(0.83147f);                               \
280*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight8 = constant(0.55557f);                               \
281*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i0 = load(input + 0 * stride);                               \
282*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i1 = load(input + 1 * stride);                               \
283*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i2 = load(input + 2 * stride);                               \
284*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i3 = load(input + 3 * stride);                               \
285*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i4 = load(input + 4 * stride);                               \
286*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i5 = load(input + 5 * stride);                               \
287*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i6 = load(input + 6 * stride);                               \
288*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i7 = load(input + 7 * stride);                               \
289*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i8 = load(input + 8 * stride);                               \
290*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i9 = load(input + 9 * stride);                               \
291*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i10 = load(input + 10 * stride);                             \
292*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i11 = load(input + 11 * stride);                             \
293*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i12 = load(input + 12 * stride);                             \
294*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i13 = load(input + 13 * stride);                             \
295*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i14 = load(input + 14 * stride);                             \
296*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i15 = load(input + 15 * stride);                             \
297*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i16 = load(input + 16 * stride);                             \
298*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i17 = load(input + 17 * stride);                             \
299*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i18 = load(input + 18 * stride);                             \
300*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i19 = load(input + 19 * stride);                             \
301*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i20 = load(input + 20 * stride);                             \
302*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i21 = load(input + 21 * stride);                             \
303*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i22 = load(input + 22 * stride);                             \
304*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i23 = load(input + 23 * stride);                             \
305*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i24 = load(input + 24 * stride);                             \
306*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i25 = load(input + 25 * stride);                             \
307*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i26 = load(input + 26 * stride);                             \
308*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i27 = load(input + 27 * stride);                             \
309*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i28 = load(input + 28 * stride);                             \
310*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i29 = load(input + 29 * stride);                             \
311*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i30 = load(input + 30 * stride);                             \
312*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i31 = load(input + 31 * stride);                             \
313*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w0 = add(i0, i16);                                           \
314*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w1 = sub(i0, i16);                                           \
315*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w2 = add(i8, i24);                                           \
316*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w3 = sub(i8, i24);                                           \
317*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w4 = add(w0, w2);                                            \
318*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w5 = sub(w0, w2);                                            \
319*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w7 = add(i4, i20);                                           \
320*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w8 = sub(i4, i20);                                           \
321*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w9 = add(i12, i28);                                          \
322*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w10 = sub(i12, i28);                                         \
323*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w11 = add(w7, w9);                                           \
324*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w12 = sub(w7, w9);                                           \
325*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w14 = add(w4, w11);                                          \
326*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w15 = sub(w4, w11);                                          \
327*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w16[2] = { add(w1, mul(kWeight2, sub(w8, w10))),             \
328*77c1e3ccSAndroid Build Coastguard Worker                            sub(sub(kWeight0, w3),                            \
329*77c1e3ccSAndroid Build Coastguard Worker                                mul(kWeight2, add(w10, w8))) };               \
330*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w18[2] = { sub(w1, mul(kWeight2, sub(w8, w10))),             \
331*77c1e3ccSAndroid Build Coastguard Worker                            sub(w3, mul(kWeight2, add(w10, w8))) };           \
332*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w19 = add(i2, i18);                                          \
333*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w20 = sub(i2, i18);                                          \
334*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w21 = add(i10, i26);                                         \
335*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w22 = sub(i10, i26);                                         \
336*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w23 = add(w19, w21);                                         \
337*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w24 = sub(w19, w21);                                         \
338*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w26 = add(i6, i22);                                          \
339*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w27 = sub(i6, i22);                                          \
340*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w28 = add(i14, i30);                                         \
341*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w29 = sub(i14, i30);                                         \
342*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w30 = add(w26, w28);                                         \
343*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w31 = sub(w26, w28);                                         \
344*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w33 = add(w23, w30);                                         \
345*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w34 = sub(w23, w30);                                         \
346*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w35[2] = { add(w20, mul(kWeight2, sub(w27, w29))),           \
347*77c1e3ccSAndroid Build Coastguard Worker                            sub(sub(kWeight0, w22),                           \
348*77c1e3ccSAndroid Build Coastguard Worker                                mul(kWeight2, add(w29, w27))) };              \
349*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w37[2] = { sub(w20, mul(kWeight2, sub(w27, w29))),           \
350*77c1e3ccSAndroid Build Coastguard Worker                            sub(w22, mul(kWeight2, add(w29, w27))) };         \
351*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w38 = add(w14, w33);                                         \
352*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w39 = sub(w14, w33);                                         \
353*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w40[2] = {                                                   \
354*77c1e3ccSAndroid Build Coastguard Worker       add(w16[0], add(mul(kWeight3, w35[0]), mul(kWeight4, w35[1]))),        \
355*77c1e3ccSAndroid Build Coastguard Worker       add(w16[1], sub(mul(kWeight3, w35[1]), mul(kWeight4, w35[0])))         \
356*77c1e3ccSAndroid Build Coastguard Worker     };                                                                       \
357*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w41[2] = { add(w5, mul(kWeight2, sub(w24, w31))),            \
358*77c1e3ccSAndroid Build Coastguard Worker                            sub(sub(kWeight0, w12),                           \
359*77c1e3ccSAndroid Build Coastguard Worker                                mul(kWeight2, add(w31, w24))) };              \
360*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w42[2] = {                                                   \
361*77c1e3ccSAndroid Build Coastguard Worker       add(w18[0], add(mul(kWeight4, w37[0]), mul(kWeight3, w37[1]))),        \
362*77c1e3ccSAndroid Build Coastguard Worker       add(w18[1], sub(mul(kWeight4, w37[1]), mul(kWeight3, w37[0])))         \
363*77c1e3ccSAndroid Build Coastguard Worker     };                                                                       \
364*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w44[2] = {                                                   \
365*77c1e3ccSAndroid Build Coastguard Worker       add(w18[0],                                                            \
366*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, mul(kWeight4, w37[0])), mul(kWeight3, w37[1]))), \
367*77c1e3ccSAndroid Build Coastguard Worker       sub(sub(kWeight0, w18[1]),                                             \
368*77c1e3ccSAndroid Build Coastguard Worker           sub(mul(kWeight3, w37[0]), mul(kWeight4, w37[1])))                 \
369*77c1e3ccSAndroid Build Coastguard Worker     };                                                                       \
370*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w45[2] = { sub(w5, mul(kWeight2, sub(w24, w31))),            \
371*77c1e3ccSAndroid Build Coastguard Worker                            sub(w12, mul(kWeight2, add(w31, w24))) };         \
372*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w46[2] = {                                                   \
373*77c1e3ccSAndroid Build Coastguard Worker       add(w16[0],                                                            \
374*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, mul(kWeight3, w35[0])), mul(kWeight4, w35[1]))), \
375*77c1e3ccSAndroid Build Coastguard Worker       sub(sub(kWeight0, w16[1]),                                             \
376*77c1e3ccSAndroid Build Coastguard Worker           sub(mul(kWeight4, w35[0]), mul(kWeight3, w35[1])))                 \
377*77c1e3ccSAndroid Build Coastguard Worker     };                                                                       \
378*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w47 = add(i1, i17);                                          \
379*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w48 = sub(i1, i17);                                          \
380*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w49 = add(i9, i25);                                          \
381*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w50 = sub(i9, i25);                                          \
382*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w51 = add(w47, w49);                                         \
383*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w52 = sub(w47, w49);                                         \
384*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w54 = add(i5, i21);                                          \
385*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w55 = sub(i5, i21);                                          \
386*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w56 = add(i13, i29);                                         \
387*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w57 = sub(i13, i29);                                         \
388*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w58 = add(w54, w56);                                         \
389*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w59 = sub(w54, w56);                                         \
390*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w61 = add(w51, w58);                                         \
391*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w62 = sub(w51, w58);                                         \
392*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w63[2] = { add(w48, mul(kWeight2, sub(w55, w57))),           \
393*77c1e3ccSAndroid Build Coastguard Worker                            sub(sub(kWeight0, w50),                           \
394*77c1e3ccSAndroid Build Coastguard Worker                                mul(kWeight2, add(w57, w55))) };              \
395*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w65[2] = { sub(w48, mul(kWeight2, sub(w55, w57))),           \
396*77c1e3ccSAndroid Build Coastguard Worker                            sub(w50, mul(kWeight2, add(w57, w55))) };         \
397*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w66 = add(i3, i19);                                          \
398*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w67 = sub(i3, i19);                                          \
399*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w68 = add(i11, i27);                                         \
400*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w69 = sub(i11, i27);                                         \
401*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w70 = add(w66, w68);                                         \
402*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w71 = sub(w66, w68);                                         \
403*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w73 = add(i7, i23);                                          \
404*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w74 = sub(i7, i23);                                          \
405*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w75 = add(i15, i31);                                         \
406*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w76 = sub(i15, i31);                                         \
407*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w77 = add(w73, w75);                                         \
408*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w78 = sub(w73, w75);                                         \
409*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w80 = add(w70, w77);                                         \
410*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w81 = sub(w70, w77);                                         \
411*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w82[2] = { add(w67, mul(kWeight2, sub(w74, w76))),           \
412*77c1e3ccSAndroid Build Coastguard Worker                            sub(sub(kWeight0, w69),                           \
413*77c1e3ccSAndroid Build Coastguard Worker                                mul(kWeight2, add(w76, w74))) };              \
414*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w84[2] = { sub(w67, mul(kWeight2, sub(w74, w76))),           \
415*77c1e3ccSAndroid Build Coastguard Worker                            sub(w69, mul(kWeight2, add(w76, w74))) };         \
416*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w85 = add(w61, w80);                                         \
417*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w86 = sub(w61, w80);                                         \
418*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w87[2] = {                                                   \
419*77c1e3ccSAndroid Build Coastguard Worker       add(w63[0], add(mul(kWeight3, w82[0]), mul(kWeight4, w82[1]))),        \
420*77c1e3ccSAndroid Build Coastguard Worker       add(w63[1], sub(mul(kWeight3, w82[1]), mul(kWeight4, w82[0])))         \
421*77c1e3ccSAndroid Build Coastguard Worker     };                                                                       \
422*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w88[2] = { add(w52, mul(kWeight2, sub(w71, w78))),           \
423*77c1e3ccSAndroid Build Coastguard Worker                            sub(sub(kWeight0, w59),                           \
424*77c1e3ccSAndroid Build Coastguard Worker                                mul(kWeight2, add(w78, w71))) };              \
425*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w89[2] = {                                                   \
426*77c1e3ccSAndroid Build Coastguard Worker       add(w65[0], add(mul(kWeight4, w84[0]), mul(kWeight3, w84[1]))),        \
427*77c1e3ccSAndroid Build Coastguard Worker       add(w65[1], sub(mul(kWeight4, w84[1]), mul(kWeight3, w84[0])))         \
428*77c1e3ccSAndroid Build Coastguard Worker     };                                                                       \
429*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w91[2] = {                                                   \
430*77c1e3ccSAndroid Build Coastguard Worker       add(w65[0],                                                            \
431*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, mul(kWeight4, w84[0])), mul(kWeight3, w84[1]))), \
432*77c1e3ccSAndroid Build Coastguard Worker       sub(sub(kWeight0, w65[1]),                                             \
433*77c1e3ccSAndroid Build Coastguard Worker           sub(mul(kWeight3, w84[0]), mul(kWeight4, w84[1])))                 \
434*77c1e3ccSAndroid Build Coastguard Worker     };                                                                       \
435*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w92[2] = { sub(w52, mul(kWeight2, sub(w71, w78))),           \
436*77c1e3ccSAndroid Build Coastguard Worker                            sub(w59, mul(kWeight2, add(w78, w71))) };         \
437*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w93[2] = {                                                   \
438*77c1e3ccSAndroid Build Coastguard Worker       add(w63[0],                                                            \
439*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, mul(kWeight3, w82[0])), mul(kWeight4, w82[1]))), \
440*77c1e3ccSAndroid Build Coastguard Worker       sub(sub(kWeight0, w63[1]),                                             \
441*77c1e3ccSAndroid Build Coastguard Worker           sub(mul(kWeight4, w82[0]), mul(kWeight3, w82[1])))                 \
442*77c1e3ccSAndroid Build Coastguard Worker     };                                                                       \
443*77c1e3ccSAndroid Build Coastguard Worker     store(output + 0 * stride, add(w38, w85));                               \
444*77c1e3ccSAndroid Build Coastguard Worker     store(output + 1 * stride,                                               \
445*77c1e3ccSAndroid Build Coastguard Worker           add(w40[0], add(mul(kWeight5, w87[0]), mul(kWeight6, w87[1]))));   \
446*77c1e3ccSAndroid Build Coastguard Worker     store(output + 2 * stride,                                               \
447*77c1e3ccSAndroid Build Coastguard Worker           add(w41[0], add(mul(kWeight3, w88[0]), mul(kWeight4, w88[1]))));   \
448*77c1e3ccSAndroid Build Coastguard Worker     store(output + 3 * stride,                                               \
449*77c1e3ccSAndroid Build Coastguard Worker           add(w42[0], add(mul(kWeight7, w89[0]), mul(kWeight8, w89[1]))));   \
450*77c1e3ccSAndroid Build Coastguard Worker     store(output + 4 * stride, add(w15, mul(kWeight2, sub(w62, w81))));      \
451*77c1e3ccSAndroid Build Coastguard Worker     store(output + 5 * stride,                                               \
452*77c1e3ccSAndroid Build Coastguard Worker           add(w44[0], add(mul(kWeight8, w91[0]), mul(kWeight7, w91[1]))));   \
453*77c1e3ccSAndroid Build Coastguard Worker     store(output + 6 * stride,                                               \
454*77c1e3ccSAndroid Build Coastguard Worker           add(w45[0], add(mul(kWeight4, w92[0]), mul(kWeight3, w92[1]))));   \
455*77c1e3ccSAndroid Build Coastguard Worker     store(output + 7 * stride,                                               \
456*77c1e3ccSAndroid Build Coastguard Worker           add(w46[0], add(mul(kWeight6, w93[0]), mul(kWeight5, w93[1]))));   \
457*77c1e3ccSAndroid Build Coastguard Worker     store(output + 8 * stride, w39);                                         \
458*77c1e3ccSAndroid Build Coastguard Worker     store(output + 9 * stride,                                               \
459*77c1e3ccSAndroid Build Coastguard Worker           add(w46[0], sub(sub(kWeight0, mul(kWeight6, w93[0])),              \
460*77c1e3ccSAndroid Build Coastguard Worker                           mul(kWeight5, w93[1]))));                          \
461*77c1e3ccSAndroid Build Coastguard Worker     store(output + 10 * stride,                                              \
462*77c1e3ccSAndroid Build Coastguard Worker           add(w45[0], sub(sub(kWeight0, mul(kWeight4, w92[0])),              \
463*77c1e3ccSAndroid Build Coastguard Worker                           mul(kWeight3, w92[1]))));                          \
464*77c1e3ccSAndroid Build Coastguard Worker     store(output + 11 * stride,                                              \
465*77c1e3ccSAndroid Build Coastguard Worker           add(w44[0], sub(sub(kWeight0, mul(kWeight8, w91[0])),              \
466*77c1e3ccSAndroid Build Coastguard Worker                           mul(kWeight7, w91[1]))));                          \
467*77c1e3ccSAndroid Build Coastguard Worker     store(output + 12 * stride, sub(w15, mul(kWeight2, sub(w62, w81))));     \
468*77c1e3ccSAndroid Build Coastguard Worker     store(output + 13 * stride,                                              \
469*77c1e3ccSAndroid Build Coastguard Worker           add(w42[0], sub(sub(kWeight0, mul(kWeight7, w89[0])),              \
470*77c1e3ccSAndroid Build Coastguard Worker                           mul(kWeight8, w89[1]))));                          \
471*77c1e3ccSAndroid Build Coastguard Worker     store(output + 14 * stride,                                              \
472*77c1e3ccSAndroid Build Coastguard Worker           add(w41[0], sub(sub(kWeight0, mul(kWeight3, w88[0])),              \
473*77c1e3ccSAndroid Build Coastguard Worker                           mul(kWeight4, w88[1]))));                          \
474*77c1e3ccSAndroid Build Coastguard Worker     store(output + 15 * stride,                                              \
475*77c1e3ccSAndroid Build Coastguard Worker           add(w40[0], sub(sub(kWeight0, mul(kWeight5, w87[0])),              \
476*77c1e3ccSAndroid Build Coastguard Worker                           mul(kWeight6, w87[1]))));                          \
477*77c1e3ccSAndroid Build Coastguard Worker     store(output + 16 * stride, sub(w38, w85));                              \
478*77c1e3ccSAndroid Build Coastguard Worker     store(output + 17 * stride,                                              \
479*77c1e3ccSAndroid Build Coastguard Worker           add(w40[1], sub(mul(kWeight5, w87[1]), mul(kWeight6, w87[0]))));   \
480*77c1e3ccSAndroid Build Coastguard Worker     store(output + 18 * stride,                                              \
481*77c1e3ccSAndroid Build Coastguard Worker           add(w41[1], sub(mul(kWeight3, w88[1]), mul(kWeight4, w88[0]))));   \
482*77c1e3ccSAndroid Build Coastguard Worker     store(output + 19 * stride,                                              \
483*77c1e3ccSAndroid Build Coastguard Worker           add(w42[1], sub(mul(kWeight7, w89[1]), mul(kWeight8, w89[0]))));   \
484*77c1e3ccSAndroid Build Coastguard Worker     store(output + 20 * stride,                                              \
485*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, w34), mul(kWeight2, add(w81, w62))));            \
486*77c1e3ccSAndroid Build Coastguard Worker     store(output + 21 * stride,                                              \
487*77c1e3ccSAndroid Build Coastguard Worker           add(w44[1], sub(mul(kWeight8, w91[1]), mul(kWeight7, w91[0]))));   \
488*77c1e3ccSAndroid Build Coastguard Worker     store(output + 22 * stride,                                              \
489*77c1e3ccSAndroid Build Coastguard Worker           add(w45[1], sub(mul(kWeight4, w92[1]), mul(kWeight3, w92[0]))));   \
490*77c1e3ccSAndroid Build Coastguard Worker     store(output + 23 * stride,                                              \
491*77c1e3ccSAndroid Build Coastguard Worker           add(w46[1], sub(mul(kWeight6, w93[1]), mul(kWeight5, w93[0]))));   \
492*77c1e3ccSAndroid Build Coastguard Worker     store(output + 24 * stride, sub(kWeight0, w86));                         \
493*77c1e3ccSAndroid Build Coastguard Worker     store(output + 25 * stride,                                              \
494*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, w46[1]),                                         \
495*77c1e3ccSAndroid Build Coastguard Worker               sub(mul(kWeight5, w93[0]), mul(kWeight6, w93[1]))));           \
496*77c1e3ccSAndroid Build Coastguard Worker     store(output + 26 * stride,                                              \
497*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, w45[1]),                                         \
498*77c1e3ccSAndroid Build Coastguard Worker               sub(mul(kWeight3, w92[0]), mul(kWeight4, w92[1]))));           \
499*77c1e3ccSAndroid Build Coastguard Worker     store(output + 27 * stride,                                              \
500*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, w44[1]),                                         \
501*77c1e3ccSAndroid Build Coastguard Worker               sub(mul(kWeight7, w91[0]), mul(kWeight8, w91[1]))));           \
502*77c1e3ccSAndroid Build Coastguard Worker     store(output + 28 * stride, sub(w34, mul(kWeight2, add(w81, w62))));     \
503*77c1e3ccSAndroid Build Coastguard Worker     store(output + 29 * stride,                                              \
504*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, w42[1]),                                         \
505*77c1e3ccSAndroid Build Coastguard Worker               sub(mul(kWeight8, w89[0]), mul(kWeight7, w89[1]))));           \
506*77c1e3ccSAndroid Build Coastguard Worker     store(output + 30 * stride,                                              \
507*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, w41[1]),                                         \
508*77c1e3ccSAndroid Build Coastguard Worker               sub(mul(kWeight4, w88[0]), mul(kWeight3, w88[1]))));           \
509*77c1e3ccSAndroid Build Coastguard Worker     store(output + 31 * stride,                                              \
510*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, w40[1]),                                         \
511*77c1e3ccSAndroid Build Coastguard Worker               sub(mul(kWeight6, w87[0]), mul(kWeight5, w87[1]))));           \
512*77c1e3ccSAndroid Build Coastguard Worker   }
513*77c1e3ccSAndroid Build Coastguard Worker 
514*77c1e3ccSAndroid Build Coastguard Worker #define GEN_IFFT_2(ret, suffix, T, T_VEC, load, store)               \
515*77c1e3ccSAndroid Build Coastguard Worker   ret aom_ifft1d_2_##suffix(const T *input, T *output, int stride) { \
516*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i0 = load(input + 0 * stride);                       \
517*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i1 = load(input + 1 * stride);                       \
518*77c1e3ccSAndroid Build Coastguard Worker     store(output + 0 * stride, i0 + i1);                             \
519*77c1e3ccSAndroid Build Coastguard Worker     store(output + 1 * stride, i0 - i1);                             \
520*77c1e3ccSAndroid Build Coastguard Worker   }
521*77c1e3ccSAndroid Build Coastguard Worker 
522*77c1e3ccSAndroid Build Coastguard Worker #define GEN_IFFT_4(ret, suffix, T, T_VEC, load, store, constant, add, sub) \
523*77c1e3ccSAndroid Build Coastguard Worker   ret aom_ifft1d_4_##suffix(const T *input, T *output, int stride) {       \
524*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight0 = constant(0.0f);                                 \
525*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i0 = load(input + 0 * stride);                             \
526*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i1 = load(input + 1 * stride);                             \
527*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i2 = load(input + 2 * stride);                             \
528*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i3 = load(input + 3 * stride);                             \
529*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w2 = add(i0, i2);                                          \
530*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w3 = sub(i0, i2);                                          \
531*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w4[2] = { add(i1, i1), sub(i3, i3) };                      \
532*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w5[2] = { sub(i1, i1), sub(sub(kWeight0, i3), i3) };       \
533*77c1e3ccSAndroid Build Coastguard Worker     store(output + 0 * stride, add(w2, w4[0]));                            \
534*77c1e3ccSAndroid Build Coastguard Worker     store(output + 1 * stride, add(w3, w5[1]));                            \
535*77c1e3ccSAndroid Build Coastguard Worker     store(output + 2 * stride, sub(w2, w4[0]));                            \
536*77c1e3ccSAndroid Build Coastguard Worker     store(output + 3 * stride, sub(w3, w5[1]));                            \
537*77c1e3ccSAndroid Build Coastguard Worker   }
538*77c1e3ccSAndroid Build Coastguard Worker 
539*77c1e3ccSAndroid Build Coastguard Worker #define GEN_IFFT_8(ret, suffix, T, T_VEC, load, store, constant, add, sub, \
540*77c1e3ccSAndroid Build Coastguard Worker                    mul)                                                    \
541*77c1e3ccSAndroid Build Coastguard Worker   ret aom_ifft1d_8_##suffix(const T *input, T *output, int stride) {       \
542*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight0 = constant(0.0f);                                 \
543*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight2 = constant(0.707107f);                            \
544*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i0 = load(input + 0 * stride);                             \
545*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i1 = load(input + 1 * stride);                             \
546*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i2 = load(input + 2 * stride);                             \
547*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i3 = load(input + 3 * stride);                             \
548*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i4 = load(input + 4 * stride);                             \
549*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i5 = load(input + 5 * stride);                             \
550*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i6 = load(input + 6 * stride);                             \
551*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i7 = load(input + 7 * stride);                             \
552*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w6 = add(i0, i4);                                          \
553*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w7 = sub(i0, i4);                                          \
554*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w8[2] = { add(i2, i2), sub(i6, i6) };                      \
555*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w9[2] = { sub(i2, i2), sub(sub(kWeight0, i6), i6) };       \
556*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w10[2] = { add(w6, w8[0]), w8[1] };                        \
557*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w11[2] = { sub(w6, w8[0]), sub(kWeight0, w8[1]) };         \
558*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w12[2] = { add(w7, w9[1]), sub(kWeight0, w9[0]) };         \
559*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w13[2] = { sub(w7, w9[1]), w9[0] };                        \
560*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w14[2] = { add(i1, i3), sub(i7, i5) };                     \
561*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w15[2] = { sub(i1, i3), sub(sub(kWeight0, i5), i7) };      \
562*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w16[2] = { add(i3, i1), sub(i5, i7) };                     \
563*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w17[2] = { sub(i3, i1), sub(sub(kWeight0, i7), i5) };      \
564*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w18[2] = { add(w14[0], w16[0]), add(w14[1], w16[1]) };     \
565*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w19[2] = { sub(w14[0], w16[0]), sub(w14[1], w16[1]) };     \
566*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w20[2] = { add(w15[0], w17[1]), sub(w15[1], w17[0]) };     \
567*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w21[2] = { sub(w15[0], w17[1]), add(w15[1], w17[0]) };     \
568*77c1e3ccSAndroid Build Coastguard Worker     store(output + 0 * stride, add(w10[0], w18[0]));                       \
569*77c1e3ccSAndroid Build Coastguard Worker     store(output + 1 * stride,                                             \
570*77c1e3ccSAndroid Build Coastguard Worker           add(w12[0], mul(kWeight2, add(w20[0], w20[1]))));                \
571*77c1e3ccSAndroid Build Coastguard Worker     store(output + 2 * stride, add(w11[0], w19[1]));                       \
572*77c1e3ccSAndroid Build Coastguard Worker     store(output + 3 * stride,                                             \
573*77c1e3ccSAndroid Build Coastguard Worker           sub(w13[0], mul(kWeight2, sub(w21[0], w21[1]))));                \
574*77c1e3ccSAndroid Build Coastguard Worker     store(output + 4 * stride, sub(w10[0], w18[0]));                       \
575*77c1e3ccSAndroid Build Coastguard Worker     store(output + 5 * stride,                                             \
576*77c1e3ccSAndroid Build Coastguard Worker           add(w12[0], sub(sub(kWeight0, mul(kWeight2, w20[0])),            \
577*77c1e3ccSAndroid Build Coastguard Worker                           mul(kWeight2, w20[1]))));                        \
578*77c1e3ccSAndroid Build Coastguard Worker     store(output + 6 * stride, sub(w11[0], w19[1]));                       \
579*77c1e3ccSAndroid Build Coastguard Worker     store(output + 7 * stride,                                             \
580*77c1e3ccSAndroid Build Coastguard Worker           add(w13[0], mul(kWeight2, sub(w21[0], w21[1]))));                \
581*77c1e3ccSAndroid Build Coastguard Worker   }
582*77c1e3ccSAndroid Build Coastguard Worker 
583*77c1e3ccSAndroid Build Coastguard Worker #define GEN_IFFT_16(ret, suffix, T, T_VEC, load, store, constant, add, sub,   \
584*77c1e3ccSAndroid Build Coastguard Worker                     mul)                                                      \
585*77c1e3ccSAndroid Build Coastguard Worker   ret aom_ifft1d_16_##suffix(const T *input, T *output, int stride) {         \
586*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight0 = constant(0.0f);                                    \
587*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight2 = constant(0.707107f);                               \
588*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight3 = constant(0.92388f);                                \
589*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight4 = constant(0.382683f);                               \
590*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i0 = load(input + 0 * stride);                                \
591*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i1 = load(input + 1 * stride);                                \
592*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i2 = load(input + 2 * stride);                                \
593*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i3 = load(input + 3 * stride);                                \
594*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i4 = load(input + 4 * stride);                                \
595*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i5 = load(input + 5 * stride);                                \
596*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i6 = load(input + 6 * stride);                                \
597*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i7 = load(input + 7 * stride);                                \
598*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i8 = load(input + 8 * stride);                                \
599*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i9 = load(input + 9 * stride);                                \
600*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i10 = load(input + 10 * stride);                              \
601*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i11 = load(input + 11 * stride);                              \
602*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i12 = load(input + 12 * stride);                              \
603*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i13 = load(input + 13 * stride);                              \
604*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i14 = load(input + 14 * stride);                              \
605*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i15 = load(input + 15 * stride);                              \
606*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w14 = add(i0, i8);                                            \
607*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w15 = sub(i0, i8);                                            \
608*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w16[2] = { add(i4, i4), sub(i12, i12) };                      \
609*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w17[2] = { sub(i4, i4), sub(sub(kWeight0, i12), i12) };       \
610*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w18[2] = { add(w14, w16[0]), w16[1] };                        \
611*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w19[2] = { sub(w14, w16[0]), sub(kWeight0, w16[1]) };         \
612*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w20[2] = { add(w15, w17[1]), sub(kWeight0, w17[0]) };         \
613*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w21[2] = { sub(w15, w17[1]), w17[0] };                        \
614*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w22[2] = { add(i2, i6), sub(i14, i10) };                      \
615*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w23[2] = { sub(i2, i6), sub(sub(kWeight0, i10), i14) };       \
616*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w24[2] = { add(i6, i2), sub(i10, i14) };                      \
617*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w25[2] = { sub(i6, i2), sub(sub(kWeight0, i14), i10) };       \
618*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w26[2] = { add(w22[0], w24[0]), add(w22[1], w24[1]) };        \
619*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w27[2] = { sub(w22[0], w24[0]), sub(w22[1], w24[1]) };        \
620*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w28[2] = { add(w23[0], w25[1]), sub(w23[1], w25[0]) };        \
621*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w29[2] = { sub(w23[0], w25[1]), add(w23[1], w25[0]) };        \
622*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w30[2] = { add(w18[0], w26[0]), add(w18[1], w26[1]) };        \
623*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w31[2] = { sub(w18[0], w26[0]), sub(w18[1], w26[1]) };        \
624*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w32[2] = { add(w20[0], mul(kWeight2, add(w28[0], w28[1]))),   \
625*77c1e3ccSAndroid Build Coastguard Worker                            add(w20[1], mul(kWeight2, sub(w28[1], w28[0]))) }; \
626*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w33[2] = { add(w20[0],                                        \
627*77c1e3ccSAndroid Build Coastguard Worker                                sub(sub(kWeight0, mul(kWeight2, w28[0])),      \
628*77c1e3ccSAndroid Build Coastguard Worker                                    mul(kWeight2, w28[1]))),                   \
629*77c1e3ccSAndroid Build Coastguard Worker                            add(w20[1], mul(kWeight2, sub(w28[0], w28[1]))) }; \
630*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w34[2] = { add(w19[0], w27[1]), sub(w19[1], w27[0]) };        \
631*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w35[2] = { sub(w19[0], w27[1]), add(w19[1], w27[0]) };        \
632*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w36[2] = { sub(w21[0], mul(kWeight2, sub(w29[0], w29[1]))),   \
633*77c1e3ccSAndroid Build Coastguard Worker                            sub(w21[1], mul(kWeight2, add(w29[1], w29[0]))) }; \
634*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w37[2] = { add(w21[0], mul(kWeight2, sub(w29[0], w29[1]))),   \
635*77c1e3ccSAndroid Build Coastguard Worker                            add(w21[1], mul(kWeight2, add(w29[1], w29[0]))) }; \
636*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w38[2] = { add(i1, i7), sub(i15, i9) };                       \
637*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w39[2] = { sub(i1, i7), sub(sub(kWeight0, i9), i15) };        \
638*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w40[2] = { add(i5, i3), sub(i11, i13) };                      \
639*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w41[2] = { sub(i5, i3), sub(sub(kWeight0, i13), i11) };       \
640*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w42[2] = { add(w38[0], w40[0]), add(w38[1], w40[1]) };        \
641*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w43[2] = { sub(w38[0], w40[0]), sub(w38[1], w40[1]) };        \
642*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w44[2] = { add(w39[0], w41[1]), sub(w39[1], w41[0]) };        \
643*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w45[2] = { sub(w39[0], w41[1]), add(w39[1], w41[0]) };        \
644*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w46[2] = { add(i3, i5), sub(i13, i11) };                      \
645*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w47[2] = { sub(i3, i5), sub(sub(kWeight0, i11), i13) };       \
646*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w48[2] = { add(i7, i1), sub(i9, i15) };                       \
647*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w49[2] = { sub(i7, i1), sub(sub(kWeight0, i15), i9) };        \
648*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w50[2] = { add(w46[0], w48[0]), add(w46[1], w48[1]) };        \
649*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w51[2] = { sub(w46[0], w48[0]), sub(w46[1], w48[1]) };        \
650*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w52[2] = { add(w47[0], w49[1]), sub(w47[1], w49[0]) };        \
651*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w53[2] = { sub(w47[0], w49[1]), add(w47[1], w49[0]) };        \
652*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w54[2] = { add(w42[0], w50[0]), add(w42[1], w50[1]) };        \
653*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w55[2] = { sub(w42[0], w50[0]), sub(w42[1], w50[1]) };        \
654*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w56[2] = { add(w44[0], mul(kWeight2, add(w52[0], w52[1]))),   \
655*77c1e3ccSAndroid Build Coastguard Worker                            add(w44[1], mul(kWeight2, sub(w52[1], w52[0]))) }; \
656*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w57[2] = { add(w44[0],                                        \
657*77c1e3ccSAndroid Build Coastguard Worker                                sub(sub(kWeight0, mul(kWeight2, w52[0])),      \
658*77c1e3ccSAndroid Build Coastguard Worker                                    mul(kWeight2, w52[1]))),                   \
659*77c1e3ccSAndroid Build Coastguard Worker                            add(w44[1], mul(kWeight2, sub(w52[0], w52[1]))) }; \
660*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w58[2] = { add(w43[0], w51[1]), sub(w43[1], w51[0]) };        \
661*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w59[2] = { sub(w43[0], w51[1]), add(w43[1], w51[0]) };        \
662*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w60[2] = { sub(w45[0], mul(kWeight2, sub(w53[0], w53[1]))),   \
663*77c1e3ccSAndroid Build Coastguard Worker                            sub(w45[1], mul(kWeight2, add(w53[1], w53[0]))) }; \
664*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w61[2] = { add(w45[0], mul(kWeight2, sub(w53[0], w53[1]))),   \
665*77c1e3ccSAndroid Build Coastguard Worker                            add(w45[1], mul(kWeight2, add(w53[1], w53[0]))) }; \
666*77c1e3ccSAndroid Build Coastguard Worker     store(output + 0 * stride, add(w30[0], w54[0]));                          \
667*77c1e3ccSAndroid Build Coastguard Worker     store(output + 1 * stride,                                                \
668*77c1e3ccSAndroid Build Coastguard Worker           add(w32[0], add(mul(kWeight3, w56[0]), mul(kWeight4, w56[1]))));    \
669*77c1e3ccSAndroid Build Coastguard Worker     store(output + 2 * stride,                                                \
670*77c1e3ccSAndroid Build Coastguard Worker           add(w34[0], mul(kWeight2, add(w58[0], w58[1]))));                   \
671*77c1e3ccSAndroid Build Coastguard Worker     store(output + 3 * stride,                                                \
672*77c1e3ccSAndroid Build Coastguard Worker           add(w36[0], add(mul(kWeight4, w60[0]), mul(kWeight3, w60[1]))));    \
673*77c1e3ccSAndroid Build Coastguard Worker     store(output + 4 * stride, add(w31[0], w55[1]));                          \
674*77c1e3ccSAndroid Build Coastguard Worker     store(output + 5 * stride,                                                \
675*77c1e3ccSAndroid Build Coastguard Worker           sub(w33[0], sub(mul(kWeight4, w57[0]), mul(kWeight3, w57[1]))));    \
676*77c1e3ccSAndroid Build Coastguard Worker     store(output + 6 * stride,                                                \
677*77c1e3ccSAndroid Build Coastguard Worker           sub(w35[0], mul(kWeight2, sub(w59[0], w59[1]))));                   \
678*77c1e3ccSAndroid Build Coastguard Worker     store(output + 7 * stride,                                                \
679*77c1e3ccSAndroid Build Coastguard Worker           sub(w37[0], sub(mul(kWeight3, w61[0]), mul(kWeight4, w61[1]))));    \
680*77c1e3ccSAndroid Build Coastguard Worker     store(output + 8 * stride, sub(w30[0], w54[0]));                          \
681*77c1e3ccSAndroid Build Coastguard Worker     store(output + 9 * stride,                                                \
682*77c1e3ccSAndroid Build Coastguard Worker           add(w32[0], sub(sub(kWeight0, mul(kWeight3, w56[0])),               \
683*77c1e3ccSAndroid Build Coastguard Worker                           mul(kWeight4, w56[1]))));                           \
684*77c1e3ccSAndroid Build Coastguard Worker     store(output + 10 * stride,                                               \
685*77c1e3ccSAndroid Build Coastguard Worker           add(w34[0], sub(sub(kWeight0, mul(kWeight2, w58[0])),               \
686*77c1e3ccSAndroid Build Coastguard Worker                           mul(kWeight2, w58[1]))));                           \
687*77c1e3ccSAndroid Build Coastguard Worker     store(output + 11 * stride,                                               \
688*77c1e3ccSAndroid Build Coastguard Worker           add(w36[0], sub(sub(kWeight0, mul(kWeight4, w60[0])),               \
689*77c1e3ccSAndroid Build Coastguard Worker                           mul(kWeight3, w60[1]))));                           \
690*77c1e3ccSAndroid Build Coastguard Worker     store(output + 12 * stride, sub(w31[0], w55[1]));                         \
691*77c1e3ccSAndroid Build Coastguard Worker     store(output + 13 * stride,                                               \
692*77c1e3ccSAndroid Build Coastguard Worker           add(w33[0], sub(mul(kWeight4, w57[0]), mul(kWeight3, w57[1]))));    \
693*77c1e3ccSAndroid Build Coastguard Worker     store(output + 14 * stride,                                               \
694*77c1e3ccSAndroid Build Coastguard Worker           add(w35[0], mul(kWeight2, sub(w59[0], w59[1]))));                   \
695*77c1e3ccSAndroid Build Coastguard Worker     store(output + 15 * stride,                                               \
696*77c1e3ccSAndroid Build Coastguard Worker           add(w37[0], sub(mul(kWeight3, w61[0]), mul(kWeight4, w61[1]))));    \
697*77c1e3ccSAndroid Build Coastguard Worker   }
698*77c1e3ccSAndroid Build Coastguard Worker #define GEN_IFFT_32(ret, suffix, T, T_VEC, load, store, constant, add, sub,    \
699*77c1e3ccSAndroid Build Coastguard Worker                     mul)                                                       \
700*77c1e3ccSAndroid Build Coastguard Worker   ret aom_ifft1d_32_##suffix(const T *input, T *output, int stride) {          \
701*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight0 = constant(0.0f);                                     \
702*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight2 = constant(0.707107f);                                \
703*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight3 = constant(0.92388f);                                 \
704*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight4 = constant(0.382683f);                                \
705*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight5 = constant(0.980785f);                                \
706*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight6 = constant(0.19509f);                                 \
707*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight7 = constant(0.83147f);                                 \
708*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC kWeight8 = constant(0.55557f);                                 \
709*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i0 = load(input + 0 * stride);                                 \
710*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i1 = load(input + 1 * stride);                                 \
711*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i2 = load(input + 2 * stride);                                 \
712*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i3 = load(input + 3 * stride);                                 \
713*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i4 = load(input + 4 * stride);                                 \
714*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i5 = load(input + 5 * stride);                                 \
715*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i6 = load(input + 6 * stride);                                 \
716*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i7 = load(input + 7 * stride);                                 \
717*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i8 = load(input + 8 * stride);                                 \
718*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i9 = load(input + 9 * stride);                                 \
719*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i10 = load(input + 10 * stride);                               \
720*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i11 = load(input + 11 * stride);                               \
721*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i12 = load(input + 12 * stride);                               \
722*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i13 = load(input + 13 * stride);                               \
723*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i14 = load(input + 14 * stride);                               \
724*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i15 = load(input + 15 * stride);                               \
725*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i16 = load(input + 16 * stride);                               \
726*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i17 = load(input + 17 * stride);                               \
727*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i18 = load(input + 18 * stride);                               \
728*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i19 = load(input + 19 * stride);                               \
729*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i20 = load(input + 20 * stride);                               \
730*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i21 = load(input + 21 * stride);                               \
731*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i22 = load(input + 22 * stride);                               \
732*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i23 = load(input + 23 * stride);                               \
733*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i24 = load(input + 24 * stride);                               \
734*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i25 = load(input + 25 * stride);                               \
735*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i26 = load(input + 26 * stride);                               \
736*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i27 = load(input + 27 * stride);                               \
737*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i28 = load(input + 28 * stride);                               \
738*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i29 = load(input + 29 * stride);                               \
739*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i30 = load(input + 30 * stride);                               \
740*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC i31 = load(input + 31 * stride);                               \
741*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w30 = add(i0, i16);                                            \
742*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w31 = sub(i0, i16);                                            \
743*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w32[2] = { add(i8, i8), sub(i24, i24) };                       \
744*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w33[2] = { sub(i8, i8), sub(sub(kWeight0, i24), i24) };        \
745*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w34[2] = { add(w30, w32[0]), w32[1] };                         \
746*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w35[2] = { sub(w30, w32[0]), sub(kWeight0, w32[1]) };          \
747*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w36[2] = { add(w31, w33[1]), sub(kWeight0, w33[0]) };          \
748*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w37[2] = { sub(w31, w33[1]), w33[0] };                         \
749*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w38[2] = { add(i4, i12), sub(i28, i20) };                      \
750*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w39[2] = { sub(i4, i12), sub(sub(kWeight0, i20), i28) };       \
751*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w40[2] = { add(i12, i4), sub(i20, i28) };                      \
752*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w41[2] = { sub(i12, i4), sub(sub(kWeight0, i28), i20) };       \
753*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w42[2] = { add(w38[0], w40[0]), add(w38[1], w40[1]) };         \
754*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w43[2] = { sub(w38[0], w40[0]), sub(w38[1], w40[1]) };         \
755*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w44[2] = { add(w39[0], w41[1]), sub(w39[1], w41[0]) };         \
756*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w45[2] = { sub(w39[0], w41[1]), add(w39[1], w41[0]) };         \
757*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w46[2] = { add(w34[0], w42[0]), add(w34[1], w42[1]) };         \
758*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w47[2] = { sub(w34[0], w42[0]), sub(w34[1], w42[1]) };         \
759*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w48[2] = { add(w36[0], mul(kWeight2, add(w44[0], w44[1]))),    \
760*77c1e3ccSAndroid Build Coastguard Worker                            add(w36[1], mul(kWeight2, sub(w44[1], w44[0]))) };  \
761*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w49[2] = { add(w36[0],                                         \
762*77c1e3ccSAndroid Build Coastguard Worker                                sub(sub(kWeight0, mul(kWeight2, w44[0])),       \
763*77c1e3ccSAndroid Build Coastguard Worker                                    mul(kWeight2, w44[1]))),                    \
764*77c1e3ccSAndroid Build Coastguard Worker                            add(w36[1], mul(kWeight2, sub(w44[0], w44[1]))) };  \
765*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w50[2] = { add(w35[0], w43[1]), sub(w35[1], w43[0]) };         \
766*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w51[2] = { sub(w35[0], w43[1]), add(w35[1], w43[0]) };         \
767*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w52[2] = { sub(w37[0], mul(kWeight2, sub(w45[0], w45[1]))),    \
768*77c1e3ccSAndroid Build Coastguard Worker                            sub(w37[1], mul(kWeight2, add(w45[1], w45[0]))) };  \
769*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w53[2] = { add(w37[0], mul(kWeight2, sub(w45[0], w45[1]))),    \
770*77c1e3ccSAndroid Build Coastguard Worker                            add(w37[1], mul(kWeight2, add(w45[1], w45[0]))) };  \
771*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w54[2] = { add(i2, i14), sub(i30, i18) };                      \
772*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w55[2] = { sub(i2, i14), sub(sub(kWeight0, i18), i30) };       \
773*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w56[2] = { add(i10, i6), sub(i22, i26) };                      \
774*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w57[2] = { sub(i10, i6), sub(sub(kWeight0, i26), i22) };       \
775*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w58[2] = { add(w54[0], w56[0]), add(w54[1], w56[1]) };         \
776*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w59[2] = { sub(w54[0], w56[0]), sub(w54[1], w56[1]) };         \
777*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w60[2] = { add(w55[0], w57[1]), sub(w55[1], w57[0]) };         \
778*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w61[2] = { sub(w55[0], w57[1]), add(w55[1], w57[0]) };         \
779*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w62[2] = { add(i6, i10), sub(i26, i22) };                      \
780*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w63[2] = { sub(i6, i10), sub(sub(kWeight0, i22), i26) };       \
781*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w64[2] = { add(i14, i2), sub(i18, i30) };                      \
782*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w65[2] = { sub(i14, i2), sub(sub(kWeight0, i30), i18) };       \
783*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w66[2] = { add(w62[0], w64[0]), add(w62[1], w64[1]) };         \
784*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w67[2] = { sub(w62[0], w64[0]), sub(w62[1], w64[1]) };         \
785*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w68[2] = { add(w63[0], w65[1]), sub(w63[1], w65[0]) };         \
786*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w69[2] = { sub(w63[0], w65[1]), add(w63[1], w65[0]) };         \
787*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w70[2] = { add(w58[0], w66[0]), add(w58[1], w66[1]) };         \
788*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w71[2] = { sub(w58[0], w66[0]), sub(w58[1], w66[1]) };         \
789*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w72[2] = { add(w60[0], mul(kWeight2, add(w68[0], w68[1]))),    \
790*77c1e3ccSAndroid Build Coastguard Worker                            add(w60[1], mul(kWeight2, sub(w68[1], w68[0]))) };  \
791*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w73[2] = { add(w60[0],                                         \
792*77c1e3ccSAndroid Build Coastguard Worker                                sub(sub(kWeight0, mul(kWeight2, w68[0])),       \
793*77c1e3ccSAndroid Build Coastguard Worker                                    mul(kWeight2, w68[1]))),                    \
794*77c1e3ccSAndroid Build Coastguard Worker                            add(w60[1], mul(kWeight2, sub(w68[0], w68[1]))) };  \
795*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w74[2] = { add(w59[0], w67[1]), sub(w59[1], w67[0]) };         \
796*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w75[2] = { sub(w59[0], w67[1]), add(w59[1], w67[0]) };         \
797*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w76[2] = { sub(w61[0], mul(kWeight2, sub(w69[0], w69[1]))),    \
798*77c1e3ccSAndroid Build Coastguard Worker                            sub(w61[1], mul(kWeight2, add(w69[1], w69[0]))) };  \
799*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w77[2] = { add(w61[0], mul(kWeight2, sub(w69[0], w69[1]))),    \
800*77c1e3ccSAndroid Build Coastguard Worker                            add(w61[1], mul(kWeight2, add(w69[1], w69[0]))) };  \
801*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w78[2] = { add(w46[0], w70[0]), add(w46[1], w70[1]) };         \
802*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w79[2] = { sub(w46[0], w70[0]), sub(w46[1], w70[1]) };         \
803*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w80[2] = {                                                     \
804*77c1e3ccSAndroid Build Coastguard Worker       add(w48[0], add(mul(kWeight3, w72[0]), mul(kWeight4, w72[1]))),          \
805*77c1e3ccSAndroid Build Coastguard Worker       add(w48[1], sub(mul(kWeight3, w72[1]), mul(kWeight4, w72[0])))           \
806*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
807*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w81[2] = {                                                     \
808*77c1e3ccSAndroid Build Coastguard Worker       add(w48[0],                                                              \
809*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, mul(kWeight3, w72[0])), mul(kWeight4, w72[1]))),   \
810*77c1e3ccSAndroid Build Coastguard Worker       add(w48[1], sub(mul(kWeight4, w72[0]), mul(kWeight3, w72[1])))           \
811*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
812*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w82[2] = { add(w50[0], mul(kWeight2, add(w74[0], w74[1]))),    \
813*77c1e3ccSAndroid Build Coastguard Worker                            add(w50[1], mul(kWeight2, sub(w74[1], w74[0]))) };  \
814*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w83[2] = { add(w50[0],                                         \
815*77c1e3ccSAndroid Build Coastguard Worker                                sub(sub(kWeight0, mul(kWeight2, w74[0])),       \
816*77c1e3ccSAndroid Build Coastguard Worker                                    mul(kWeight2, w74[1]))),                    \
817*77c1e3ccSAndroid Build Coastguard Worker                            add(w50[1], mul(kWeight2, sub(w74[0], w74[1]))) };  \
818*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w84[2] = {                                                     \
819*77c1e3ccSAndroid Build Coastguard Worker       add(w52[0], add(mul(kWeight4, w76[0]), mul(kWeight3, w76[1]))),          \
820*77c1e3ccSAndroid Build Coastguard Worker       add(w52[1], sub(mul(kWeight4, w76[1]), mul(kWeight3, w76[0])))           \
821*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
822*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w85[2] = {                                                     \
823*77c1e3ccSAndroid Build Coastguard Worker       add(w52[0],                                                              \
824*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, mul(kWeight4, w76[0])), mul(kWeight3, w76[1]))),   \
825*77c1e3ccSAndroid Build Coastguard Worker       add(w52[1], sub(mul(kWeight3, w76[0]), mul(kWeight4, w76[1])))           \
826*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
827*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w86[2] = { add(w47[0], w71[1]), sub(w47[1], w71[0]) };         \
828*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w87[2] = { sub(w47[0], w71[1]), add(w47[1], w71[0]) };         \
829*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w88[2] = {                                                     \
830*77c1e3ccSAndroid Build Coastguard Worker       sub(w49[0], sub(mul(kWeight4, w73[0]), mul(kWeight3, w73[1]))),          \
831*77c1e3ccSAndroid Build Coastguard Worker       add(w49[1],                                                              \
832*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, mul(kWeight4, w73[1])), mul(kWeight3, w73[0])))    \
833*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
834*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w89[2] = {                                                     \
835*77c1e3ccSAndroid Build Coastguard Worker       add(w49[0], sub(mul(kWeight4, w73[0]), mul(kWeight3, w73[1]))),          \
836*77c1e3ccSAndroid Build Coastguard Worker       add(w49[1], add(mul(kWeight4, w73[1]), mul(kWeight3, w73[0])))           \
837*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
838*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w90[2] = { sub(w51[0], mul(kWeight2, sub(w75[0], w75[1]))),    \
839*77c1e3ccSAndroid Build Coastguard Worker                            sub(w51[1], mul(kWeight2, add(w75[1], w75[0]))) };  \
840*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w91[2] = { add(w51[0], mul(kWeight2, sub(w75[0], w75[1]))),    \
841*77c1e3ccSAndroid Build Coastguard Worker                            add(w51[1], mul(kWeight2, add(w75[1], w75[0]))) };  \
842*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w92[2] = {                                                     \
843*77c1e3ccSAndroid Build Coastguard Worker       sub(w53[0], sub(mul(kWeight3, w77[0]), mul(kWeight4, w77[1]))),          \
844*77c1e3ccSAndroid Build Coastguard Worker       add(w53[1],                                                              \
845*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, mul(kWeight3, w77[1])), mul(kWeight4, w77[0])))    \
846*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
847*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w93[2] = {                                                     \
848*77c1e3ccSAndroid Build Coastguard Worker       add(w53[0], sub(mul(kWeight3, w77[0]), mul(kWeight4, w77[1]))),          \
849*77c1e3ccSAndroid Build Coastguard Worker       add(w53[1], add(mul(kWeight3, w77[1]), mul(kWeight4, w77[0])))           \
850*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
851*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w94[2] = { add(i1, i15), sub(i31, i17) };                      \
852*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w95[2] = { sub(i1, i15), sub(sub(kWeight0, i17), i31) };       \
853*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w96[2] = { add(i9, i7), sub(i23, i25) };                       \
854*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w97[2] = { sub(i9, i7), sub(sub(kWeight0, i25), i23) };        \
855*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w98[2] = { add(w94[0], w96[0]), add(w94[1], w96[1]) };         \
856*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w99[2] = { sub(w94[0], w96[0]), sub(w94[1], w96[1]) };         \
857*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w100[2] = { add(w95[0], w97[1]), sub(w95[1], w97[0]) };        \
858*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w101[2] = { sub(w95[0], w97[1]), add(w95[1], w97[0]) };        \
859*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w102[2] = { add(i5, i11), sub(i27, i21) };                     \
860*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w103[2] = { sub(i5, i11), sub(sub(kWeight0, i21), i27) };      \
861*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w104[2] = { add(i13, i3), sub(i19, i29) };                     \
862*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w105[2] = { sub(i13, i3), sub(sub(kWeight0, i29), i19) };      \
863*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w106[2] = { add(w102[0], w104[0]), add(w102[1], w104[1]) };    \
864*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w107[2] = { sub(w102[0], w104[0]), sub(w102[1], w104[1]) };    \
865*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w108[2] = { add(w103[0], w105[1]), sub(w103[1], w105[0]) };    \
866*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w109[2] = { sub(w103[0], w105[1]), add(w103[1], w105[0]) };    \
867*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w110[2] = { add(w98[0], w106[0]), add(w98[1], w106[1]) };      \
868*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w111[2] = { sub(w98[0], w106[0]), sub(w98[1], w106[1]) };      \
869*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w112[2] = {                                                    \
870*77c1e3ccSAndroid Build Coastguard Worker       add(w100[0], mul(kWeight2, add(w108[0], w108[1]))),                      \
871*77c1e3ccSAndroid Build Coastguard Worker       add(w100[1], mul(kWeight2, sub(w108[1], w108[0])))                       \
872*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
873*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w113[2] = {                                                    \
874*77c1e3ccSAndroid Build Coastguard Worker       add(w100[0],                                                             \
875*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, mul(kWeight2, w108[0])), mul(kWeight2, w108[1]))), \
876*77c1e3ccSAndroid Build Coastguard Worker       add(w100[1], mul(kWeight2, sub(w108[0], w108[1])))                       \
877*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
878*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w114[2] = { add(w99[0], w107[1]), sub(w99[1], w107[0]) };      \
879*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w115[2] = { sub(w99[0], w107[1]), add(w99[1], w107[0]) };      \
880*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w116[2] = {                                                    \
881*77c1e3ccSAndroid Build Coastguard Worker       sub(w101[0], mul(kWeight2, sub(w109[0], w109[1]))),                      \
882*77c1e3ccSAndroid Build Coastguard Worker       sub(w101[1], mul(kWeight2, add(w109[1], w109[0])))                       \
883*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
884*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w117[2] = {                                                    \
885*77c1e3ccSAndroid Build Coastguard Worker       add(w101[0], mul(kWeight2, sub(w109[0], w109[1]))),                      \
886*77c1e3ccSAndroid Build Coastguard Worker       add(w101[1], mul(kWeight2, add(w109[1], w109[0])))                       \
887*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
888*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w118[2] = { add(i3, i13), sub(i29, i19) };                     \
889*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w119[2] = { sub(i3, i13), sub(sub(kWeight0, i19), i29) };      \
890*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w120[2] = { add(i11, i5), sub(i21, i27) };                     \
891*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w121[2] = { sub(i11, i5), sub(sub(kWeight0, i27), i21) };      \
892*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w122[2] = { add(w118[0], w120[0]), add(w118[1], w120[1]) };    \
893*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w123[2] = { sub(w118[0], w120[0]), sub(w118[1], w120[1]) };    \
894*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w124[2] = { add(w119[0], w121[1]), sub(w119[1], w121[0]) };    \
895*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w125[2] = { sub(w119[0], w121[1]), add(w119[1], w121[0]) };    \
896*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w126[2] = { add(i7, i9), sub(i25, i23) };                      \
897*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w127[2] = { sub(i7, i9), sub(sub(kWeight0, i23), i25) };       \
898*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w128[2] = { add(i15, i1), sub(i17, i31) };                     \
899*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w129[2] = { sub(i15, i1), sub(sub(kWeight0, i31), i17) };      \
900*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w130[2] = { add(w126[0], w128[0]), add(w126[1], w128[1]) };    \
901*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w131[2] = { sub(w126[0], w128[0]), sub(w126[1], w128[1]) };    \
902*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w132[2] = { add(w127[0], w129[1]), sub(w127[1], w129[0]) };    \
903*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w133[2] = { sub(w127[0], w129[1]), add(w127[1], w129[0]) };    \
904*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w134[2] = { add(w122[0], w130[0]), add(w122[1], w130[1]) };    \
905*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w135[2] = { sub(w122[0], w130[0]), sub(w122[1], w130[1]) };    \
906*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w136[2] = {                                                    \
907*77c1e3ccSAndroid Build Coastguard Worker       add(w124[0], mul(kWeight2, add(w132[0], w132[1]))),                      \
908*77c1e3ccSAndroid Build Coastguard Worker       add(w124[1], mul(kWeight2, sub(w132[1], w132[0])))                       \
909*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
910*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w137[2] = {                                                    \
911*77c1e3ccSAndroid Build Coastguard Worker       add(w124[0],                                                             \
912*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, mul(kWeight2, w132[0])), mul(kWeight2, w132[1]))), \
913*77c1e3ccSAndroid Build Coastguard Worker       add(w124[1], mul(kWeight2, sub(w132[0], w132[1])))                       \
914*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
915*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w138[2] = { add(w123[0], w131[1]), sub(w123[1], w131[0]) };    \
916*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w139[2] = { sub(w123[0], w131[1]), add(w123[1], w131[0]) };    \
917*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w140[2] = {                                                    \
918*77c1e3ccSAndroid Build Coastguard Worker       sub(w125[0], mul(kWeight2, sub(w133[0], w133[1]))),                      \
919*77c1e3ccSAndroid Build Coastguard Worker       sub(w125[1], mul(kWeight2, add(w133[1], w133[0])))                       \
920*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
921*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w141[2] = {                                                    \
922*77c1e3ccSAndroid Build Coastguard Worker       add(w125[0], mul(kWeight2, sub(w133[0], w133[1]))),                      \
923*77c1e3ccSAndroid Build Coastguard Worker       add(w125[1], mul(kWeight2, add(w133[1], w133[0])))                       \
924*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
925*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w142[2] = { add(w110[0], w134[0]), add(w110[1], w134[1]) };    \
926*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w143[2] = { sub(w110[0], w134[0]), sub(w110[1], w134[1]) };    \
927*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w144[2] = {                                                    \
928*77c1e3ccSAndroid Build Coastguard Worker       add(w112[0], add(mul(kWeight3, w136[0]), mul(kWeight4, w136[1]))),       \
929*77c1e3ccSAndroid Build Coastguard Worker       add(w112[1], sub(mul(kWeight3, w136[1]), mul(kWeight4, w136[0])))        \
930*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
931*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w145[2] = {                                                    \
932*77c1e3ccSAndroid Build Coastguard Worker       add(w112[0],                                                             \
933*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, mul(kWeight3, w136[0])), mul(kWeight4, w136[1]))), \
934*77c1e3ccSAndroid Build Coastguard Worker       add(w112[1], sub(mul(kWeight4, w136[0]), mul(kWeight3, w136[1])))        \
935*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
936*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w146[2] = {                                                    \
937*77c1e3ccSAndroid Build Coastguard Worker       add(w114[0], mul(kWeight2, add(w138[0], w138[1]))),                      \
938*77c1e3ccSAndroid Build Coastguard Worker       add(w114[1], mul(kWeight2, sub(w138[1], w138[0])))                       \
939*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
940*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w147[2] = {                                                    \
941*77c1e3ccSAndroid Build Coastguard Worker       add(w114[0],                                                             \
942*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, mul(kWeight2, w138[0])), mul(kWeight2, w138[1]))), \
943*77c1e3ccSAndroid Build Coastguard Worker       add(w114[1], mul(kWeight2, sub(w138[0], w138[1])))                       \
944*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
945*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w148[2] = {                                                    \
946*77c1e3ccSAndroid Build Coastguard Worker       add(w116[0], add(mul(kWeight4, w140[0]), mul(kWeight3, w140[1]))),       \
947*77c1e3ccSAndroid Build Coastguard Worker       add(w116[1], sub(mul(kWeight4, w140[1]), mul(kWeight3, w140[0])))        \
948*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
949*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w149[2] = {                                                    \
950*77c1e3ccSAndroid Build Coastguard Worker       add(w116[0],                                                             \
951*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, mul(kWeight4, w140[0])), mul(kWeight3, w140[1]))), \
952*77c1e3ccSAndroid Build Coastguard Worker       add(w116[1], sub(mul(kWeight3, w140[0]), mul(kWeight4, w140[1])))        \
953*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
954*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w150[2] = { add(w111[0], w135[1]), sub(w111[1], w135[0]) };    \
955*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w151[2] = { sub(w111[0], w135[1]), add(w111[1], w135[0]) };    \
956*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w152[2] = {                                                    \
957*77c1e3ccSAndroid Build Coastguard Worker       sub(w113[0], sub(mul(kWeight4, w137[0]), mul(kWeight3, w137[1]))),       \
958*77c1e3ccSAndroid Build Coastguard Worker       add(w113[1],                                                             \
959*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, mul(kWeight4, w137[1])), mul(kWeight3, w137[0])))  \
960*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
961*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w153[2] = {                                                    \
962*77c1e3ccSAndroid Build Coastguard Worker       add(w113[0], sub(mul(kWeight4, w137[0]), mul(kWeight3, w137[1]))),       \
963*77c1e3ccSAndroid Build Coastguard Worker       add(w113[1], add(mul(kWeight4, w137[1]), mul(kWeight3, w137[0])))        \
964*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
965*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w154[2] = {                                                    \
966*77c1e3ccSAndroid Build Coastguard Worker       sub(w115[0], mul(kWeight2, sub(w139[0], w139[1]))),                      \
967*77c1e3ccSAndroid Build Coastguard Worker       sub(w115[1], mul(kWeight2, add(w139[1], w139[0])))                       \
968*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
969*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w155[2] = {                                                    \
970*77c1e3ccSAndroid Build Coastguard Worker       add(w115[0], mul(kWeight2, sub(w139[0], w139[1]))),                      \
971*77c1e3ccSAndroid Build Coastguard Worker       add(w115[1], mul(kWeight2, add(w139[1], w139[0])))                       \
972*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
973*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w156[2] = {                                                    \
974*77c1e3ccSAndroid Build Coastguard Worker       sub(w117[0], sub(mul(kWeight3, w141[0]), mul(kWeight4, w141[1]))),       \
975*77c1e3ccSAndroid Build Coastguard Worker       add(w117[1],                                                             \
976*77c1e3ccSAndroid Build Coastguard Worker           sub(sub(kWeight0, mul(kWeight3, w141[1])), mul(kWeight4, w141[0])))  \
977*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
978*77c1e3ccSAndroid Build Coastguard Worker     const T_VEC w157[2] = {                                                    \
979*77c1e3ccSAndroid Build Coastguard Worker       add(w117[0], sub(mul(kWeight3, w141[0]), mul(kWeight4, w141[1]))),       \
980*77c1e3ccSAndroid Build Coastguard Worker       add(w117[1], add(mul(kWeight3, w141[1]), mul(kWeight4, w141[0])))        \
981*77c1e3ccSAndroid Build Coastguard Worker     };                                                                         \
982*77c1e3ccSAndroid Build Coastguard Worker     store(output + 0 * stride, add(w78[0], w142[0]));                          \
983*77c1e3ccSAndroid Build Coastguard Worker     store(output + 1 * stride,                                                 \
984*77c1e3ccSAndroid Build Coastguard Worker           add(w80[0], add(mul(kWeight5, w144[0]), mul(kWeight6, w144[1]))));   \
985*77c1e3ccSAndroid Build Coastguard Worker     store(output + 2 * stride,                                                 \
986*77c1e3ccSAndroid Build Coastguard Worker           add(w82[0], add(mul(kWeight3, w146[0]), mul(kWeight4, w146[1]))));   \
987*77c1e3ccSAndroid Build Coastguard Worker     store(output + 3 * stride,                                                 \
988*77c1e3ccSAndroid Build Coastguard Worker           add(w84[0], add(mul(kWeight7, w148[0]), mul(kWeight8, w148[1]))));   \
989*77c1e3ccSAndroid Build Coastguard Worker     store(output + 4 * stride,                                                 \
990*77c1e3ccSAndroid Build Coastguard Worker           add(w86[0], mul(kWeight2, add(w150[0], w150[1]))));                  \
991*77c1e3ccSAndroid Build Coastguard Worker     store(output + 5 * stride,                                                 \
992*77c1e3ccSAndroid Build Coastguard Worker           add(w88[0], add(mul(kWeight8, w152[0]), mul(kWeight7, w152[1]))));   \
993*77c1e3ccSAndroid Build Coastguard Worker     store(output + 6 * stride,                                                 \
994*77c1e3ccSAndroid Build Coastguard Worker           add(w90[0], add(mul(kWeight4, w154[0]), mul(kWeight3, w154[1]))));   \
995*77c1e3ccSAndroid Build Coastguard Worker     store(output + 7 * stride,                                                 \
996*77c1e3ccSAndroid Build Coastguard Worker           add(w92[0], add(mul(kWeight6, w156[0]), mul(kWeight5, w156[1]))));   \
997*77c1e3ccSAndroid Build Coastguard Worker     store(output + 8 * stride, add(w79[0], w143[1]));                          \
998*77c1e3ccSAndroid Build Coastguard Worker     store(output + 9 * stride,                                                 \
999*77c1e3ccSAndroid Build Coastguard Worker           sub(w81[0], sub(mul(kWeight6, w145[0]), mul(kWeight5, w145[1]))));   \
1000*77c1e3ccSAndroid Build Coastguard Worker     store(output + 10 * stride,                                                \
1001*77c1e3ccSAndroid Build Coastguard Worker           sub(w83[0], sub(mul(kWeight4, w147[0]), mul(kWeight3, w147[1]))));   \
1002*77c1e3ccSAndroid Build Coastguard Worker     store(output + 11 * stride,                                                \
1003*77c1e3ccSAndroid Build Coastguard Worker           sub(w85[0], sub(mul(kWeight8, w149[0]), mul(kWeight7, w149[1]))));   \
1004*77c1e3ccSAndroid Build Coastguard Worker     store(output + 12 * stride,                                                \
1005*77c1e3ccSAndroid Build Coastguard Worker           sub(w87[0], mul(kWeight2, sub(w151[0], w151[1]))));                  \
1006*77c1e3ccSAndroid Build Coastguard Worker     store(output + 13 * stride,                                                \
1007*77c1e3ccSAndroid Build Coastguard Worker           sub(w89[0], sub(mul(kWeight7, w153[0]), mul(kWeight8, w153[1]))));   \
1008*77c1e3ccSAndroid Build Coastguard Worker     store(output + 14 * stride,                                                \
1009*77c1e3ccSAndroid Build Coastguard Worker           sub(w91[0], sub(mul(kWeight3, w155[0]), mul(kWeight4, w155[1]))));   \
1010*77c1e3ccSAndroid Build Coastguard Worker     store(output + 15 * stride,                                                \
1011*77c1e3ccSAndroid Build Coastguard Worker           sub(w93[0], sub(mul(kWeight5, w157[0]), mul(kWeight6, w157[1]))));   \
1012*77c1e3ccSAndroid Build Coastguard Worker     store(output + 16 * stride, sub(w78[0], w142[0]));                         \
1013*77c1e3ccSAndroid Build Coastguard Worker     store(output + 17 * stride,                                                \
1014*77c1e3ccSAndroid Build Coastguard Worker           add(w80[0], sub(sub(kWeight0, mul(kWeight5, w144[0])),               \
1015*77c1e3ccSAndroid Build Coastguard Worker                           mul(kWeight6, w144[1]))));                           \
1016*77c1e3ccSAndroid Build Coastguard Worker     store(output + 18 * stride,                                                \
1017*77c1e3ccSAndroid Build Coastguard Worker           add(w82[0], sub(sub(kWeight0, mul(kWeight3, w146[0])),               \
1018*77c1e3ccSAndroid Build Coastguard Worker                           mul(kWeight4, w146[1]))));                           \
1019*77c1e3ccSAndroid Build Coastguard Worker     store(output + 19 * stride,                                                \
1020*77c1e3ccSAndroid Build Coastguard Worker           add(w84[0], sub(sub(kWeight0, mul(kWeight7, w148[0])),               \
1021*77c1e3ccSAndroid Build Coastguard Worker                           mul(kWeight8, w148[1]))));                           \
1022*77c1e3ccSAndroid Build Coastguard Worker     store(output + 20 * stride,                                                \
1023*77c1e3ccSAndroid Build Coastguard Worker           add(w86[0], sub(sub(kWeight0, mul(kWeight2, w150[0])),               \
1024*77c1e3ccSAndroid Build Coastguard Worker                           mul(kWeight2, w150[1]))));                           \
1025*77c1e3ccSAndroid Build Coastguard Worker     store(output + 21 * stride,                                                \
1026*77c1e3ccSAndroid Build Coastguard Worker           add(w88[0], sub(sub(kWeight0, mul(kWeight8, w152[0])),               \
1027*77c1e3ccSAndroid Build Coastguard Worker                           mul(kWeight7, w152[1]))));                           \
1028*77c1e3ccSAndroid Build Coastguard Worker     store(output + 22 * stride,                                                \
1029*77c1e3ccSAndroid Build Coastguard Worker           add(w90[0], sub(sub(kWeight0, mul(kWeight4, w154[0])),               \
1030*77c1e3ccSAndroid Build Coastguard Worker                           mul(kWeight3, w154[1]))));                           \
1031*77c1e3ccSAndroid Build Coastguard Worker     store(output + 23 * stride,                                                \
1032*77c1e3ccSAndroid Build Coastguard Worker           add(w92[0], sub(sub(kWeight0, mul(kWeight6, w156[0])),               \
1033*77c1e3ccSAndroid Build Coastguard Worker                           mul(kWeight5, w156[1]))));                           \
1034*77c1e3ccSAndroid Build Coastguard Worker     store(output + 24 * stride, sub(w79[0], w143[1]));                         \
1035*77c1e3ccSAndroid Build Coastguard Worker     store(output + 25 * stride,                                                \
1036*77c1e3ccSAndroid Build Coastguard Worker           add(w81[0], sub(mul(kWeight6, w145[0]), mul(kWeight5, w145[1]))));   \
1037*77c1e3ccSAndroid Build Coastguard Worker     store(output + 26 * stride,                                                \
1038*77c1e3ccSAndroid Build Coastguard Worker           add(w83[0], sub(mul(kWeight4, w147[0]), mul(kWeight3, w147[1]))));   \
1039*77c1e3ccSAndroid Build Coastguard Worker     store(output + 27 * stride,                                                \
1040*77c1e3ccSAndroid Build Coastguard Worker           add(w85[0], sub(mul(kWeight8, w149[0]), mul(kWeight7, w149[1]))));   \
1041*77c1e3ccSAndroid Build Coastguard Worker     store(output + 28 * stride,                                                \
1042*77c1e3ccSAndroid Build Coastguard Worker           add(w87[0], mul(kWeight2, sub(w151[0], w151[1]))));                  \
1043*77c1e3ccSAndroid Build Coastguard Worker     store(output + 29 * stride,                                                \
1044*77c1e3ccSAndroid Build Coastguard Worker           add(w89[0], sub(mul(kWeight7, w153[0]), mul(kWeight8, w153[1]))));   \
1045*77c1e3ccSAndroid Build Coastguard Worker     store(output + 30 * stride,                                                \
1046*77c1e3ccSAndroid Build Coastguard Worker           add(w91[0], sub(mul(kWeight3, w155[0]), mul(kWeight4, w155[1]))));   \
1047*77c1e3ccSAndroid Build Coastguard Worker     store(output + 31 * stride,                                                \
1048*77c1e3ccSAndroid Build Coastguard Worker           add(w93[0], sub(mul(kWeight5, w157[0]), mul(kWeight6, w157[1]))));   \
1049*77c1e3ccSAndroid Build Coastguard Worker   }
1050*77c1e3ccSAndroid Build Coastguard Worker 
1051*77c1e3ccSAndroid Build Coastguard Worker #endif  // AOM_AOM_DSP_FFT_COMMON_H_
1052