1*77c1e3ccSAndroid Build Coastguard Worker /* 2*77c1e3ccSAndroid Build Coastguard Worker * Copyright (c) 2018, Alliance for Open Media. All rights reserved. 3*77c1e3ccSAndroid Build Coastguard Worker * 4*77c1e3ccSAndroid Build Coastguard Worker * This source code is subject to the terms of the BSD 2 Clause License and 5*77c1e3ccSAndroid Build Coastguard Worker * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6*77c1e3ccSAndroid Build Coastguard Worker * was not distributed with this source code in the LICENSE file, you can 7*77c1e3ccSAndroid Build Coastguard Worker * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8*77c1e3ccSAndroid Build Coastguard Worker * Media Patent License 1.0 was not distributed with this source code in the 9*77c1e3ccSAndroid Build Coastguard Worker * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10*77c1e3ccSAndroid Build Coastguard Worker */ 11*77c1e3ccSAndroid Build Coastguard Worker 12*77c1e3ccSAndroid Build Coastguard Worker #ifndef AOM_AOM_DSP_FFT_COMMON_H_ 13*77c1e3ccSAndroid Build Coastguard Worker #define AOM_AOM_DSP_FFT_COMMON_H_ 14*77c1e3ccSAndroid Build Coastguard Worker 15*77c1e3ccSAndroid Build Coastguard Worker #ifdef __cplusplus 16*77c1e3ccSAndroid Build Coastguard Worker extern "C" { 17*77c1e3ccSAndroid Build Coastguard Worker #endif 18*77c1e3ccSAndroid Build Coastguard Worker 19*77c1e3ccSAndroid Build Coastguard Worker /*!\brief A function pointer for computing 1d fft and ifft. 20*77c1e3ccSAndroid Build Coastguard Worker * 21*77c1e3ccSAndroid Build Coastguard Worker * The function will point to an implementation for a specific transform size, 22*77c1e3ccSAndroid Build Coastguard Worker * and may perform the transforms using vectorized instructions. 23*77c1e3ccSAndroid Build Coastguard Worker * 24*77c1e3ccSAndroid Build Coastguard Worker * For a non-vectorized forward transforms of size n, the input and output 25*77c1e3ccSAndroid Build Coastguard Worker * buffers will be size n. The output takes advantage of conjugate symmetry and 26*77c1e3ccSAndroid Build Coastguard Worker * packs the results as: [r_0, r_1, ..., r_{n/2}, i_1, ..., i_{n/2-1}], where 27*77c1e3ccSAndroid Build Coastguard Worker * (r_{j}, i_{j}) is the complex output for index j. 28*77c1e3ccSAndroid Build Coastguard Worker * 29*77c1e3ccSAndroid Build Coastguard Worker * An inverse transform will assume that the complex "input" is packed 30*77c1e3ccSAndroid Build Coastguard Worker * similarly. Its output will be real. 31*77c1e3ccSAndroid Build Coastguard Worker * 32*77c1e3ccSAndroid Build Coastguard Worker * Non-vectorized transforms (e.g., on a single row) would use a stride = 1. 33*77c1e3ccSAndroid Build Coastguard Worker * 34*77c1e3ccSAndroid Build Coastguard Worker * Vectorized implementations are parallelized along the columns so that the fft 35*77c1e3ccSAndroid Build Coastguard Worker * can be performed on multiple columns at a time. In such cases the data block 36*77c1e3ccSAndroid Build Coastguard Worker * for input and output is typically square (n x n) and the stride will 37*77c1e3ccSAndroid Build Coastguard Worker * correspond to the spacing between rows. At minimum, the input size must be 38*77c1e3ccSAndroid Build Coastguard Worker * n x simd_vector_length. 39*77c1e3ccSAndroid Build Coastguard Worker * 40*77c1e3ccSAndroid Build Coastguard Worker * \param[in] input Input buffer. See above for size restrictions. 41*77c1e3ccSAndroid Build Coastguard Worker * \param[out] output Output buffer. See above for size restrictions. 42*77c1e3ccSAndroid Build Coastguard Worker * \param[in] stride The spacing in number of elements between rows 43*77c1e3ccSAndroid Build Coastguard Worker * (or elements) 44*77c1e3ccSAndroid Build Coastguard Worker */ 45*77c1e3ccSAndroid Build Coastguard Worker typedef void (*aom_fft_1d_func_t)(const float *input, float *output, 46*77c1e3ccSAndroid Build Coastguard Worker int stride); 47*77c1e3ccSAndroid Build Coastguard Worker 48*77c1e3ccSAndroid Build Coastguard Worker // Declare some of the forward non-vectorized transforms which are used in some 49*77c1e3ccSAndroid Build Coastguard Worker // of the vectorized implementations 50*77c1e3ccSAndroid Build Coastguard Worker void aom_fft1d_2_float(const float *input, float *output, int stride); 51*77c1e3ccSAndroid Build Coastguard Worker void aom_fft1d_4_float(const float *input, float *output, int stride); 52*77c1e3ccSAndroid Build Coastguard Worker void aom_fft1d_8_float(const float *input, float *output, int stride); 53*77c1e3ccSAndroid Build Coastguard Worker void aom_fft1d_16_float(const float *input, float *output, int stride); 54*77c1e3ccSAndroid Build Coastguard Worker void aom_fft1d_32_float(const float *input, float *output, int stride); 55*77c1e3ccSAndroid Build Coastguard Worker 56*77c1e3ccSAndroid Build Coastguard Worker /**\!brief Function pointer for transposing a matrix of floats. 57*77c1e3ccSAndroid Build Coastguard Worker * 58*77c1e3ccSAndroid Build Coastguard Worker * \param[in] input Input buffer (size n x n) 59*77c1e3ccSAndroid Build Coastguard Worker * \param[out] output Output buffer (size n x n) 60*77c1e3ccSAndroid Build Coastguard Worker * \param[in] n Extent of one dimension of the square matrix. 61*77c1e3ccSAndroid Build Coastguard Worker */ 62*77c1e3ccSAndroid Build Coastguard Worker typedef void (*aom_fft_transpose_func_t)(const float *input, float *output, 63*77c1e3ccSAndroid Build Coastguard Worker int n); 64*77c1e3ccSAndroid Build Coastguard Worker 65*77c1e3ccSAndroid Build Coastguard Worker /**\!brief Function pointer for re-arranging intermediate 2d transform results. 66*77c1e3ccSAndroid Build Coastguard Worker * 67*77c1e3ccSAndroid Build Coastguard Worker * After re-arrangement, the real and imaginary components will be packed 68*77c1e3ccSAndroid Build Coastguard Worker * tightly next to each other. 69*77c1e3ccSAndroid Build Coastguard Worker * 70*77c1e3ccSAndroid Build Coastguard Worker * \param[in] input Input buffer (size n x n) 71*77c1e3ccSAndroid Build Coastguard Worker * \param[out] output Output buffer (size 2 x n x n) 72*77c1e3ccSAndroid Build Coastguard Worker * \param[in] n Extent of one dimension of the square matrix. 73*77c1e3ccSAndroid Build Coastguard Worker */ 74*77c1e3ccSAndroid Build Coastguard Worker typedef void (*aom_fft_unpack_func_t)(const float *input, float *output, int n); 75*77c1e3ccSAndroid Build Coastguard Worker 76*77c1e3ccSAndroid Build Coastguard Worker /*!\brief Performs a 2d fft with the given functions. 77*77c1e3ccSAndroid Build Coastguard Worker * 78*77c1e3ccSAndroid Build Coastguard Worker * This generator function allows for multiple different implementations of 2d 79*77c1e3ccSAndroid Build Coastguard Worker * fft with different vector operations, without having to redefine the main 80*77c1e3ccSAndroid Build Coastguard Worker * body multiple times. 81*77c1e3ccSAndroid Build Coastguard Worker * 82*77c1e3ccSAndroid Build Coastguard Worker * \param[in] input Input buffer to run the transform on (size n x n) 83*77c1e3ccSAndroid Build Coastguard Worker * \param[out] temp Working buffer for computing the transform (size n x n) 84*77c1e3ccSAndroid Build Coastguard Worker * \param[out] output Output buffer (size 2 x n x n) 85*77c1e3ccSAndroid Build Coastguard Worker * \param[in] tform Forward transform function 86*77c1e3ccSAndroid Build Coastguard Worker * \param[in] transpose Transpose function (for n x n matrix) 87*77c1e3ccSAndroid Build Coastguard Worker * \param[in] unpack Unpack function used to massage outputs to correct form 88*77c1e3ccSAndroid Build Coastguard Worker * \param[in] vec_size Vector size (the transform is done vec_size units at 89*77c1e3ccSAndroid Build Coastguard Worker * a time) 90*77c1e3ccSAndroid Build Coastguard Worker */ 91*77c1e3ccSAndroid Build Coastguard Worker void aom_fft_2d_gen(const float *input, float *temp, float *output, int n, 92*77c1e3ccSAndroid Build Coastguard Worker aom_fft_1d_func_t tform, aom_fft_transpose_func_t transpose, 93*77c1e3ccSAndroid Build Coastguard Worker aom_fft_unpack_func_t unpack, int vec_size); 94*77c1e3ccSAndroid Build Coastguard Worker 95*77c1e3ccSAndroid Build Coastguard Worker /*!\brief Perform a 2d inverse fft with the given helper functions 96*77c1e3ccSAndroid Build Coastguard Worker * 97*77c1e3ccSAndroid Build Coastguard Worker * \param[in] input Input buffer to run the transform on (size 2 x n x n) 98*77c1e3ccSAndroid Build Coastguard Worker * \param[out] temp Working buffer for computations (size 2 x n x n) 99*77c1e3ccSAndroid Build Coastguard Worker * \param[out] output Output buffer (size n x n) 100*77c1e3ccSAndroid Build Coastguard Worker * \param[in] fft_single Forward transform function (non vectorized) 101*77c1e3ccSAndroid Build Coastguard Worker * \param[in] fft_multi Forward transform function (vectorized) 102*77c1e3ccSAndroid Build Coastguard Worker * \param[in] ifft_multi Inverse transform function (vectorized) 103*77c1e3ccSAndroid Build Coastguard Worker * \param[in] transpose Transpose function (for n x n matrix) 104*77c1e3ccSAndroid Build Coastguard Worker * \param[in] vec_size Vector size (the transform is done vec_size 105*77c1e3ccSAndroid Build Coastguard Worker * units at a time) 106*77c1e3ccSAndroid Build Coastguard Worker */ 107*77c1e3ccSAndroid Build Coastguard Worker void aom_ifft_2d_gen(const float *input, float *temp, float *output, int n, 108*77c1e3ccSAndroid Build Coastguard Worker aom_fft_1d_func_t fft_single, aom_fft_1d_func_t fft_multi, 109*77c1e3ccSAndroid Build Coastguard Worker aom_fft_1d_func_t ifft_multi, 110*77c1e3ccSAndroid Build Coastguard Worker aom_fft_transpose_func_t transpose, int vec_size); 111*77c1e3ccSAndroid Build Coastguard Worker #ifdef __cplusplus 112*77c1e3ccSAndroid Build Coastguard Worker } 113*77c1e3ccSAndroid Build Coastguard Worker #endif 114*77c1e3ccSAndroid Build Coastguard Worker 115*77c1e3ccSAndroid Build Coastguard Worker // The macros below define 1D fft/ifft for different data types and for 116*77c1e3ccSAndroid Build Coastguard Worker // different simd vector intrinsic types. 117*77c1e3ccSAndroid Build Coastguard Worker 118*77c1e3ccSAndroid Build Coastguard Worker #define GEN_FFT_2(ret, suffix, T, T_VEC, load, store) \ 119*77c1e3ccSAndroid Build Coastguard Worker ret aom_fft1d_2_##suffix(const T *input, T *output, int stride) { \ 120*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i0 = load(input + 0 * stride); \ 121*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i1 = load(input + 1 * stride); \ 122*77c1e3ccSAndroid Build Coastguard Worker store(output + 0 * stride, i0 + i1); \ 123*77c1e3ccSAndroid Build Coastguard Worker store(output + 1 * stride, i0 - i1); \ 124*77c1e3ccSAndroid Build Coastguard Worker } 125*77c1e3ccSAndroid Build Coastguard Worker 126*77c1e3ccSAndroid Build Coastguard Worker #define GEN_FFT_4(ret, suffix, T, T_VEC, load, store, constant, add, sub) \ 127*77c1e3ccSAndroid Build Coastguard Worker ret aom_fft1d_4_##suffix(const T *input, T *output, int stride) { \ 128*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight0 = constant(0.0f); \ 129*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i0 = load(input + 0 * stride); \ 130*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i1 = load(input + 1 * stride); \ 131*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i2 = load(input + 2 * stride); \ 132*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i3 = load(input + 3 * stride); \ 133*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w0 = add(i0, i2); \ 134*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w1 = sub(i0, i2); \ 135*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w2 = add(i1, i3); \ 136*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w3 = sub(i1, i3); \ 137*77c1e3ccSAndroid Build Coastguard Worker store(output + 0 * stride, add(w0, w2)); \ 138*77c1e3ccSAndroid Build Coastguard Worker store(output + 1 * stride, w1); \ 139*77c1e3ccSAndroid Build Coastguard Worker store(output + 2 * stride, sub(w0, w2)); \ 140*77c1e3ccSAndroid Build Coastguard Worker store(output + 3 * stride, sub(kWeight0, w3)); \ 141*77c1e3ccSAndroid Build Coastguard Worker } 142*77c1e3ccSAndroid Build Coastguard Worker 143*77c1e3ccSAndroid Build Coastguard Worker #define GEN_FFT_8(ret, suffix, T, T_VEC, load, store, constant, add, sub, mul) \ 144*77c1e3ccSAndroid Build Coastguard Worker ret aom_fft1d_8_##suffix(const T *input, T *output, int stride) { \ 145*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight0 = constant(0.0f); \ 146*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight2 = constant(0.707107f); \ 147*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i0 = load(input + 0 * stride); \ 148*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i1 = load(input + 1 * stride); \ 149*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i2 = load(input + 2 * stride); \ 150*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i3 = load(input + 3 * stride); \ 151*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i4 = load(input + 4 * stride); \ 152*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i5 = load(input + 5 * stride); \ 153*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i6 = load(input + 6 * stride); \ 154*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i7 = load(input + 7 * stride); \ 155*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w0 = add(i0, i4); \ 156*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w1 = sub(i0, i4); \ 157*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w2 = add(i2, i6); \ 158*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w3 = sub(i2, i6); \ 159*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w4 = add(w0, w2); \ 160*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w5 = sub(w0, w2); \ 161*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w7 = add(i1, i5); \ 162*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w8 = sub(i1, i5); \ 163*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w9 = add(i3, i7); \ 164*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w10 = sub(i3, i7); \ 165*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w11 = add(w7, w9); \ 166*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w12 = sub(w7, w9); \ 167*77c1e3ccSAndroid Build Coastguard Worker store(output + 0 * stride, add(w4, w11)); \ 168*77c1e3ccSAndroid Build Coastguard Worker store(output + 1 * stride, add(w1, mul(kWeight2, sub(w8, w10)))); \ 169*77c1e3ccSAndroid Build Coastguard Worker store(output + 2 * stride, w5); \ 170*77c1e3ccSAndroid Build Coastguard Worker store(output + 3 * stride, sub(w1, mul(kWeight2, sub(w8, w10)))); \ 171*77c1e3ccSAndroid Build Coastguard Worker store(output + 4 * stride, sub(w4, w11)); \ 172*77c1e3ccSAndroid Build Coastguard Worker store(output + 5 * stride, \ 173*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, w3), mul(kWeight2, add(w10, w8)))); \ 174*77c1e3ccSAndroid Build Coastguard Worker store(output + 6 * stride, sub(kWeight0, w12)); \ 175*77c1e3ccSAndroid Build Coastguard Worker store(output + 7 * stride, sub(w3, mul(kWeight2, add(w10, w8)))); \ 176*77c1e3ccSAndroid Build Coastguard Worker } 177*77c1e3ccSAndroid Build Coastguard Worker 178*77c1e3ccSAndroid Build Coastguard Worker #define GEN_FFT_16(ret, suffix, T, T_VEC, load, store, constant, add, sub, \ 179*77c1e3ccSAndroid Build Coastguard Worker mul) \ 180*77c1e3ccSAndroid Build Coastguard Worker ret aom_fft1d_16_##suffix(const T *input, T *output, int stride) { \ 181*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight0 = constant(0.0f); \ 182*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight2 = constant(0.707107f); \ 183*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight3 = constant(0.92388f); \ 184*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight4 = constant(0.382683f); \ 185*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i0 = load(input + 0 * stride); \ 186*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i1 = load(input + 1 * stride); \ 187*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i2 = load(input + 2 * stride); \ 188*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i3 = load(input + 3 * stride); \ 189*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i4 = load(input + 4 * stride); \ 190*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i5 = load(input + 5 * stride); \ 191*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i6 = load(input + 6 * stride); \ 192*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i7 = load(input + 7 * stride); \ 193*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i8 = load(input + 8 * stride); \ 194*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i9 = load(input + 9 * stride); \ 195*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i10 = load(input + 10 * stride); \ 196*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i11 = load(input + 11 * stride); \ 197*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i12 = load(input + 12 * stride); \ 198*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i13 = load(input + 13 * stride); \ 199*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i14 = load(input + 14 * stride); \ 200*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i15 = load(input + 15 * stride); \ 201*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w0 = add(i0, i8); \ 202*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w1 = sub(i0, i8); \ 203*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w2 = add(i4, i12); \ 204*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w3 = sub(i4, i12); \ 205*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w4 = add(w0, w2); \ 206*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w5 = sub(w0, w2); \ 207*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w7 = add(i2, i10); \ 208*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w8 = sub(i2, i10); \ 209*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w9 = add(i6, i14); \ 210*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w10 = sub(i6, i14); \ 211*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w11 = add(w7, w9); \ 212*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w12 = sub(w7, w9); \ 213*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w14 = add(w4, w11); \ 214*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w15 = sub(w4, w11); \ 215*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w16[2] = { add(w1, mul(kWeight2, sub(w8, w10))), \ 216*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, w3), \ 217*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight2, add(w10, w8))) }; \ 218*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w18[2] = { sub(w1, mul(kWeight2, sub(w8, w10))), \ 219*77c1e3ccSAndroid Build Coastguard Worker sub(w3, mul(kWeight2, add(w10, w8))) }; \ 220*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w19 = add(i1, i9); \ 221*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w20 = sub(i1, i9); \ 222*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w21 = add(i5, i13); \ 223*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w22 = sub(i5, i13); \ 224*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w23 = add(w19, w21); \ 225*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w24 = sub(w19, w21); \ 226*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w26 = add(i3, i11); \ 227*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w27 = sub(i3, i11); \ 228*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w28 = add(i7, i15); \ 229*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w29 = sub(i7, i15); \ 230*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w30 = add(w26, w28); \ 231*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w31 = sub(w26, w28); \ 232*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w33 = add(w23, w30); \ 233*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w34 = sub(w23, w30); \ 234*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w35[2] = { add(w20, mul(kWeight2, sub(w27, w29))), \ 235*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, w22), \ 236*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight2, add(w29, w27))) }; \ 237*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w37[2] = { sub(w20, mul(kWeight2, sub(w27, w29))), \ 238*77c1e3ccSAndroid Build Coastguard Worker sub(w22, mul(kWeight2, add(w29, w27))) }; \ 239*77c1e3ccSAndroid Build Coastguard Worker store(output + 0 * stride, add(w14, w33)); \ 240*77c1e3ccSAndroid Build Coastguard Worker store(output + 1 * stride, \ 241*77c1e3ccSAndroid Build Coastguard Worker add(w16[0], add(mul(kWeight3, w35[0]), mul(kWeight4, w35[1])))); \ 242*77c1e3ccSAndroid Build Coastguard Worker store(output + 2 * stride, add(w5, mul(kWeight2, sub(w24, w31)))); \ 243*77c1e3ccSAndroid Build Coastguard Worker store(output + 3 * stride, \ 244*77c1e3ccSAndroid Build Coastguard Worker add(w18[0], add(mul(kWeight4, w37[0]), mul(kWeight3, w37[1])))); \ 245*77c1e3ccSAndroid Build Coastguard Worker store(output + 4 * stride, w15); \ 246*77c1e3ccSAndroid Build Coastguard Worker store(output + 5 * stride, \ 247*77c1e3ccSAndroid Build Coastguard Worker add(w18[0], sub(sub(kWeight0, mul(kWeight4, w37[0])), \ 248*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight3, w37[1])))); \ 249*77c1e3ccSAndroid Build Coastguard Worker store(output + 6 * stride, sub(w5, mul(kWeight2, sub(w24, w31)))); \ 250*77c1e3ccSAndroid Build Coastguard Worker store(output + 7 * stride, \ 251*77c1e3ccSAndroid Build Coastguard Worker add(w16[0], sub(sub(kWeight0, mul(kWeight3, w35[0])), \ 252*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight4, w35[1])))); \ 253*77c1e3ccSAndroid Build Coastguard Worker store(output + 8 * stride, sub(w14, w33)); \ 254*77c1e3ccSAndroid Build Coastguard Worker store(output + 9 * stride, \ 255*77c1e3ccSAndroid Build Coastguard Worker add(w16[1], sub(mul(kWeight3, w35[1]), mul(kWeight4, w35[0])))); \ 256*77c1e3ccSAndroid Build Coastguard Worker store(output + 10 * stride, \ 257*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, w12), mul(kWeight2, add(w31, w24)))); \ 258*77c1e3ccSAndroid Build Coastguard Worker store(output + 11 * stride, \ 259*77c1e3ccSAndroid Build Coastguard Worker add(w18[1], sub(mul(kWeight4, w37[1]), mul(kWeight3, w37[0])))); \ 260*77c1e3ccSAndroid Build Coastguard Worker store(output + 12 * stride, sub(kWeight0, w34)); \ 261*77c1e3ccSAndroid Build Coastguard Worker store(output + 13 * stride, \ 262*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, w18[1]), \ 263*77c1e3ccSAndroid Build Coastguard Worker sub(mul(kWeight3, w37[0]), mul(kWeight4, w37[1])))); \ 264*77c1e3ccSAndroid Build Coastguard Worker store(output + 14 * stride, sub(w12, mul(kWeight2, add(w31, w24)))); \ 265*77c1e3ccSAndroid Build Coastguard Worker store(output + 15 * stride, \ 266*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, w16[1]), \ 267*77c1e3ccSAndroid Build Coastguard Worker sub(mul(kWeight4, w35[0]), mul(kWeight3, w35[1])))); \ 268*77c1e3ccSAndroid Build Coastguard Worker } 269*77c1e3ccSAndroid Build Coastguard Worker 270*77c1e3ccSAndroid Build Coastguard Worker #define GEN_FFT_32(ret, suffix, T, T_VEC, load, store, constant, add, sub, \ 271*77c1e3ccSAndroid Build Coastguard Worker mul) \ 272*77c1e3ccSAndroid Build Coastguard Worker ret aom_fft1d_32_##suffix(const T *input, T *output, int stride) { \ 273*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight0 = constant(0.0f); \ 274*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight2 = constant(0.707107f); \ 275*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight3 = constant(0.92388f); \ 276*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight4 = constant(0.382683f); \ 277*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight5 = constant(0.980785f); \ 278*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight6 = constant(0.19509f); \ 279*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight7 = constant(0.83147f); \ 280*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight8 = constant(0.55557f); \ 281*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i0 = load(input + 0 * stride); \ 282*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i1 = load(input + 1 * stride); \ 283*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i2 = load(input + 2 * stride); \ 284*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i3 = load(input + 3 * stride); \ 285*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i4 = load(input + 4 * stride); \ 286*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i5 = load(input + 5 * stride); \ 287*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i6 = load(input + 6 * stride); \ 288*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i7 = load(input + 7 * stride); \ 289*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i8 = load(input + 8 * stride); \ 290*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i9 = load(input + 9 * stride); \ 291*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i10 = load(input + 10 * stride); \ 292*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i11 = load(input + 11 * stride); \ 293*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i12 = load(input + 12 * stride); \ 294*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i13 = load(input + 13 * stride); \ 295*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i14 = load(input + 14 * stride); \ 296*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i15 = load(input + 15 * stride); \ 297*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i16 = load(input + 16 * stride); \ 298*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i17 = load(input + 17 * stride); \ 299*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i18 = load(input + 18 * stride); \ 300*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i19 = load(input + 19 * stride); \ 301*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i20 = load(input + 20 * stride); \ 302*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i21 = load(input + 21 * stride); \ 303*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i22 = load(input + 22 * stride); \ 304*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i23 = load(input + 23 * stride); \ 305*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i24 = load(input + 24 * stride); \ 306*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i25 = load(input + 25 * stride); \ 307*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i26 = load(input + 26 * stride); \ 308*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i27 = load(input + 27 * stride); \ 309*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i28 = load(input + 28 * stride); \ 310*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i29 = load(input + 29 * stride); \ 311*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i30 = load(input + 30 * stride); \ 312*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i31 = load(input + 31 * stride); \ 313*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w0 = add(i0, i16); \ 314*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w1 = sub(i0, i16); \ 315*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w2 = add(i8, i24); \ 316*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w3 = sub(i8, i24); \ 317*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w4 = add(w0, w2); \ 318*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w5 = sub(w0, w2); \ 319*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w7 = add(i4, i20); \ 320*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w8 = sub(i4, i20); \ 321*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w9 = add(i12, i28); \ 322*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w10 = sub(i12, i28); \ 323*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w11 = add(w7, w9); \ 324*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w12 = sub(w7, w9); \ 325*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w14 = add(w4, w11); \ 326*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w15 = sub(w4, w11); \ 327*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w16[2] = { add(w1, mul(kWeight2, sub(w8, w10))), \ 328*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, w3), \ 329*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight2, add(w10, w8))) }; \ 330*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w18[2] = { sub(w1, mul(kWeight2, sub(w8, w10))), \ 331*77c1e3ccSAndroid Build Coastguard Worker sub(w3, mul(kWeight2, add(w10, w8))) }; \ 332*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w19 = add(i2, i18); \ 333*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w20 = sub(i2, i18); \ 334*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w21 = add(i10, i26); \ 335*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w22 = sub(i10, i26); \ 336*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w23 = add(w19, w21); \ 337*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w24 = sub(w19, w21); \ 338*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w26 = add(i6, i22); \ 339*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w27 = sub(i6, i22); \ 340*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w28 = add(i14, i30); \ 341*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w29 = sub(i14, i30); \ 342*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w30 = add(w26, w28); \ 343*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w31 = sub(w26, w28); \ 344*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w33 = add(w23, w30); \ 345*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w34 = sub(w23, w30); \ 346*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w35[2] = { add(w20, mul(kWeight2, sub(w27, w29))), \ 347*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, w22), \ 348*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight2, add(w29, w27))) }; \ 349*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w37[2] = { sub(w20, mul(kWeight2, sub(w27, w29))), \ 350*77c1e3ccSAndroid Build Coastguard Worker sub(w22, mul(kWeight2, add(w29, w27))) }; \ 351*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w38 = add(w14, w33); \ 352*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w39 = sub(w14, w33); \ 353*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w40[2] = { \ 354*77c1e3ccSAndroid Build Coastguard Worker add(w16[0], add(mul(kWeight3, w35[0]), mul(kWeight4, w35[1]))), \ 355*77c1e3ccSAndroid Build Coastguard Worker add(w16[1], sub(mul(kWeight3, w35[1]), mul(kWeight4, w35[0]))) \ 356*77c1e3ccSAndroid Build Coastguard Worker }; \ 357*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w41[2] = { add(w5, mul(kWeight2, sub(w24, w31))), \ 358*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, w12), \ 359*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight2, add(w31, w24))) }; \ 360*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w42[2] = { \ 361*77c1e3ccSAndroid Build Coastguard Worker add(w18[0], add(mul(kWeight4, w37[0]), mul(kWeight3, w37[1]))), \ 362*77c1e3ccSAndroid Build Coastguard Worker add(w18[1], sub(mul(kWeight4, w37[1]), mul(kWeight3, w37[0]))) \ 363*77c1e3ccSAndroid Build Coastguard Worker }; \ 364*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w44[2] = { \ 365*77c1e3ccSAndroid Build Coastguard Worker add(w18[0], \ 366*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, mul(kWeight4, w37[0])), mul(kWeight3, w37[1]))), \ 367*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, w18[1]), \ 368*77c1e3ccSAndroid Build Coastguard Worker sub(mul(kWeight3, w37[0]), mul(kWeight4, w37[1]))) \ 369*77c1e3ccSAndroid Build Coastguard Worker }; \ 370*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w45[2] = { sub(w5, mul(kWeight2, sub(w24, w31))), \ 371*77c1e3ccSAndroid Build Coastguard Worker sub(w12, mul(kWeight2, add(w31, w24))) }; \ 372*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w46[2] = { \ 373*77c1e3ccSAndroid Build Coastguard Worker add(w16[0], \ 374*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, mul(kWeight3, w35[0])), mul(kWeight4, w35[1]))), \ 375*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, w16[1]), \ 376*77c1e3ccSAndroid Build Coastguard Worker sub(mul(kWeight4, w35[0]), mul(kWeight3, w35[1]))) \ 377*77c1e3ccSAndroid Build Coastguard Worker }; \ 378*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w47 = add(i1, i17); \ 379*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w48 = sub(i1, i17); \ 380*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w49 = add(i9, i25); \ 381*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w50 = sub(i9, i25); \ 382*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w51 = add(w47, w49); \ 383*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w52 = sub(w47, w49); \ 384*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w54 = add(i5, i21); \ 385*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w55 = sub(i5, i21); \ 386*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w56 = add(i13, i29); \ 387*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w57 = sub(i13, i29); \ 388*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w58 = add(w54, w56); \ 389*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w59 = sub(w54, w56); \ 390*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w61 = add(w51, w58); \ 391*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w62 = sub(w51, w58); \ 392*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w63[2] = { add(w48, mul(kWeight2, sub(w55, w57))), \ 393*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, w50), \ 394*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight2, add(w57, w55))) }; \ 395*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w65[2] = { sub(w48, mul(kWeight2, sub(w55, w57))), \ 396*77c1e3ccSAndroid Build Coastguard Worker sub(w50, mul(kWeight2, add(w57, w55))) }; \ 397*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w66 = add(i3, i19); \ 398*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w67 = sub(i3, i19); \ 399*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w68 = add(i11, i27); \ 400*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w69 = sub(i11, i27); \ 401*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w70 = add(w66, w68); \ 402*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w71 = sub(w66, w68); \ 403*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w73 = add(i7, i23); \ 404*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w74 = sub(i7, i23); \ 405*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w75 = add(i15, i31); \ 406*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w76 = sub(i15, i31); \ 407*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w77 = add(w73, w75); \ 408*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w78 = sub(w73, w75); \ 409*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w80 = add(w70, w77); \ 410*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w81 = sub(w70, w77); \ 411*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w82[2] = { add(w67, mul(kWeight2, sub(w74, w76))), \ 412*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, w69), \ 413*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight2, add(w76, w74))) }; \ 414*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w84[2] = { sub(w67, mul(kWeight2, sub(w74, w76))), \ 415*77c1e3ccSAndroid Build Coastguard Worker sub(w69, mul(kWeight2, add(w76, w74))) }; \ 416*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w85 = add(w61, w80); \ 417*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w86 = sub(w61, w80); \ 418*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w87[2] = { \ 419*77c1e3ccSAndroid Build Coastguard Worker add(w63[0], add(mul(kWeight3, w82[0]), mul(kWeight4, w82[1]))), \ 420*77c1e3ccSAndroid Build Coastguard Worker add(w63[1], sub(mul(kWeight3, w82[1]), mul(kWeight4, w82[0]))) \ 421*77c1e3ccSAndroid Build Coastguard Worker }; \ 422*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w88[2] = { add(w52, mul(kWeight2, sub(w71, w78))), \ 423*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, w59), \ 424*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight2, add(w78, w71))) }; \ 425*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w89[2] = { \ 426*77c1e3ccSAndroid Build Coastguard Worker add(w65[0], add(mul(kWeight4, w84[0]), mul(kWeight3, w84[1]))), \ 427*77c1e3ccSAndroid Build Coastguard Worker add(w65[1], sub(mul(kWeight4, w84[1]), mul(kWeight3, w84[0]))) \ 428*77c1e3ccSAndroid Build Coastguard Worker }; \ 429*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w91[2] = { \ 430*77c1e3ccSAndroid Build Coastguard Worker add(w65[0], \ 431*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, mul(kWeight4, w84[0])), mul(kWeight3, w84[1]))), \ 432*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, w65[1]), \ 433*77c1e3ccSAndroid Build Coastguard Worker sub(mul(kWeight3, w84[0]), mul(kWeight4, w84[1]))) \ 434*77c1e3ccSAndroid Build Coastguard Worker }; \ 435*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w92[2] = { sub(w52, mul(kWeight2, sub(w71, w78))), \ 436*77c1e3ccSAndroid Build Coastguard Worker sub(w59, mul(kWeight2, add(w78, w71))) }; \ 437*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w93[2] = { \ 438*77c1e3ccSAndroid Build Coastguard Worker add(w63[0], \ 439*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, mul(kWeight3, w82[0])), mul(kWeight4, w82[1]))), \ 440*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, w63[1]), \ 441*77c1e3ccSAndroid Build Coastguard Worker sub(mul(kWeight4, w82[0]), mul(kWeight3, w82[1]))) \ 442*77c1e3ccSAndroid Build Coastguard Worker }; \ 443*77c1e3ccSAndroid Build Coastguard Worker store(output + 0 * stride, add(w38, w85)); \ 444*77c1e3ccSAndroid Build Coastguard Worker store(output + 1 * stride, \ 445*77c1e3ccSAndroid Build Coastguard Worker add(w40[0], add(mul(kWeight5, w87[0]), mul(kWeight6, w87[1])))); \ 446*77c1e3ccSAndroid Build Coastguard Worker store(output + 2 * stride, \ 447*77c1e3ccSAndroid Build Coastguard Worker add(w41[0], add(mul(kWeight3, w88[0]), mul(kWeight4, w88[1])))); \ 448*77c1e3ccSAndroid Build Coastguard Worker store(output + 3 * stride, \ 449*77c1e3ccSAndroid Build Coastguard Worker add(w42[0], add(mul(kWeight7, w89[0]), mul(kWeight8, w89[1])))); \ 450*77c1e3ccSAndroid Build Coastguard Worker store(output + 4 * stride, add(w15, mul(kWeight2, sub(w62, w81)))); \ 451*77c1e3ccSAndroid Build Coastguard Worker store(output + 5 * stride, \ 452*77c1e3ccSAndroid Build Coastguard Worker add(w44[0], add(mul(kWeight8, w91[0]), mul(kWeight7, w91[1])))); \ 453*77c1e3ccSAndroid Build Coastguard Worker store(output + 6 * stride, \ 454*77c1e3ccSAndroid Build Coastguard Worker add(w45[0], add(mul(kWeight4, w92[0]), mul(kWeight3, w92[1])))); \ 455*77c1e3ccSAndroid Build Coastguard Worker store(output + 7 * stride, \ 456*77c1e3ccSAndroid Build Coastguard Worker add(w46[0], add(mul(kWeight6, w93[0]), mul(kWeight5, w93[1])))); \ 457*77c1e3ccSAndroid Build Coastguard Worker store(output + 8 * stride, w39); \ 458*77c1e3ccSAndroid Build Coastguard Worker store(output + 9 * stride, \ 459*77c1e3ccSAndroid Build Coastguard Worker add(w46[0], sub(sub(kWeight0, mul(kWeight6, w93[0])), \ 460*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight5, w93[1])))); \ 461*77c1e3ccSAndroid Build Coastguard Worker store(output + 10 * stride, \ 462*77c1e3ccSAndroid Build Coastguard Worker add(w45[0], sub(sub(kWeight0, mul(kWeight4, w92[0])), \ 463*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight3, w92[1])))); \ 464*77c1e3ccSAndroid Build Coastguard Worker store(output + 11 * stride, \ 465*77c1e3ccSAndroid Build Coastguard Worker add(w44[0], sub(sub(kWeight0, mul(kWeight8, w91[0])), \ 466*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight7, w91[1])))); \ 467*77c1e3ccSAndroid Build Coastguard Worker store(output + 12 * stride, sub(w15, mul(kWeight2, sub(w62, w81)))); \ 468*77c1e3ccSAndroid Build Coastguard Worker store(output + 13 * stride, \ 469*77c1e3ccSAndroid Build Coastguard Worker add(w42[0], sub(sub(kWeight0, mul(kWeight7, w89[0])), \ 470*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight8, w89[1])))); \ 471*77c1e3ccSAndroid Build Coastguard Worker store(output + 14 * stride, \ 472*77c1e3ccSAndroid Build Coastguard Worker add(w41[0], sub(sub(kWeight0, mul(kWeight3, w88[0])), \ 473*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight4, w88[1])))); \ 474*77c1e3ccSAndroid Build Coastguard Worker store(output + 15 * stride, \ 475*77c1e3ccSAndroid Build Coastguard Worker add(w40[0], sub(sub(kWeight0, mul(kWeight5, w87[0])), \ 476*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight6, w87[1])))); \ 477*77c1e3ccSAndroid Build Coastguard Worker store(output + 16 * stride, sub(w38, w85)); \ 478*77c1e3ccSAndroid Build Coastguard Worker store(output + 17 * stride, \ 479*77c1e3ccSAndroid Build Coastguard Worker add(w40[1], sub(mul(kWeight5, w87[1]), mul(kWeight6, w87[0])))); \ 480*77c1e3ccSAndroid Build Coastguard Worker store(output + 18 * stride, \ 481*77c1e3ccSAndroid Build Coastguard Worker add(w41[1], sub(mul(kWeight3, w88[1]), mul(kWeight4, w88[0])))); \ 482*77c1e3ccSAndroid Build Coastguard Worker store(output + 19 * stride, \ 483*77c1e3ccSAndroid Build Coastguard Worker add(w42[1], sub(mul(kWeight7, w89[1]), mul(kWeight8, w89[0])))); \ 484*77c1e3ccSAndroid Build Coastguard Worker store(output + 20 * stride, \ 485*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, w34), mul(kWeight2, add(w81, w62)))); \ 486*77c1e3ccSAndroid Build Coastguard Worker store(output + 21 * stride, \ 487*77c1e3ccSAndroid Build Coastguard Worker add(w44[1], sub(mul(kWeight8, w91[1]), mul(kWeight7, w91[0])))); \ 488*77c1e3ccSAndroid Build Coastguard Worker store(output + 22 * stride, \ 489*77c1e3ccSAndroid Build Coastguard Worker add(w45[1], sub(mul(kWeight4, w92[1]), mul(kWeight3, w92[0])))); \ 490*77c1e3ccSAndroid Build Coastguard Worker store(output + 23 * stride, \ 491*77c1e3ccSAndroid Build Coastguard Worker add(w46[1], sub(mul(kWeight6, w93[1]), mul(kWeight5, w93[0])))); \ 492*77c1e3ccSAndroid Build Coastguard Worker store(output + 24 * stride, sub(kWeight0, w86)); \ 493*77c1e3ccSAndroid Build Coastguard Worker store(output + 25 * stride, \ 494*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, w46[1]), \ 495*77c1e3ccSAndroid Build Coastguard Worker sub(mul(kWeight5, w93[0]), mul(kWeight6, w93[1])))); \ 496*77c1e3ccSAndroid Build Coastguard Worker store(output + 26 * stride, \ 497*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, w45[1]), \ 498*77c1e3ccSAndroid Build Coastguard Worker sub(mul(kWeight3, w92[0]), mul(kWeight4, w92[1])))); \ 499*77c1e3ccSAndroid Build Coastguard Worker store(output + 27 * stride, \ 500*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, w44[1]), \ 501*77c1e3ccSAndroid Build Coastguard Worker sub(mul(kWeight7, w91[0]), mul(kWeight8, w91[1])))); \ 502*77c1e3ccSAndroid Build Coastguard Worker store(output + 28 * stride, sub(w34, mul(kWeight2, add(w81, w62)))); \ 503*77c1e3ccSAndroid Build Coastguard Worker store(output + 29 * stride, \ 504*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, w42[1]), \ 505*77c1e3ccSAndroid Build Coastguard Worker sub(mul(kWeight8, w89[0]), mul(kWeight7, w89[1])))); \ 506*77c1e3ccSAndroid Build Coastguard Worker store(output + 30 * stride, \ 507*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, w41[1]), \ 508*77c1e3ccSAndroid Build Coastguard Worker sub(mul(kWeight4, w88[0]), mul(kWeight3, w88[1])))); \ 509*77c1e3ccSAndroid Build Coastguard Worker store(output + 31 * stride, \ 510*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, w40[1]), \ 511*77c1e3ccSAndroid Build Coastguard Worker sub(mul(kWeight6, w87[0]), mul(kWeight5, w87[1])))); \ 512*77c1e3ccSAndroid Build Coastguard Worker } 513*77c1e3ccSAndroid Build Coastguard Worker 514*77c1e3ccSAndroid Build Coastguard Worker #define GEN_IFFT_2(ret, suffix, T, T_VEC, load, store) \ 515*77c1e3ccSAndroid Build Coastguard Worker ret aom_ifft1d_2_##suffix(const T *input, T *output, int stride) { \ 516*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i0 = load(input + 0 * stride); \ 517*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i1 = load(input + 1 * stride); \ 518*77c1e3ccSAndroid Build Coastguard Worker store(output + 0 * stride, i0 + i1); \ 519*77c1e3ccSAndroid Build Coastguard Worker store(output + 1 * stride, i0 - i1); \ 520*77c1e3ccSAndroid Build Coastguard Worker } 521*77c1e3ccSAndroid Build Coastguard Worker 522*77c1e3ccSAndroid Build Coastguard Worker #define GEN_IFFT_4(ret, suffix, T, T_VEC, load, store, constant, add, sub) \ 523*77c1e3ccSAndroid Build Coastguard Worker ret aom_ifft1d_4_##suffix(const T *input, T *output, int stride) { \ 524*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight0 = constant(0.0f); \ 525*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i0 = load(input + 0 * stride); \ 526*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i1 = load(input + 1 * stride); \ 527*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i2 = load(input + 2 * stride); \ 528*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i3 = load(input + 3 * stride); \ 529*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w2 = add(i0, i2); \ 530*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w3 = sub(i0, i2); \ 531*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w4[2] = { add(i1, i1), sub(i3, i3) }; \ 532*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w5[2] = { sub(i1, i1), sub(sub(kWeight0, i3), i3) }; \ 533*77c1e3ccSAndroid Build Coastguard Worker store(output + 0 * stride, add(w2, w4[0])); \ 534*77c1e3ccSAndroid Build Coastguard Worker store(output + 1 * stride, add(w3, w5[1])); \ 535*77c1e3ccSAndroid Build Coastguard Worker store(output + 2 * stride, sub(w2, w4[0])); \ 536*77c1e3ccSAndroid Build Coastguard Worker store(output + 3 * stride, sub(w3, w5[1])); \ 537*77c1e3ccSAndroid Build Coastguard Worker } 538*77c1e3ccSAndroid Build Coastguard Worker 539*77c1e3ccSAndroid Build Coastguard Worker #define GEN_IFFT_8(ret, suffix, T, T_VEC, load, store, constant, add, sub, \ 540*77c1e3ccSAndroid Build Coastguard Worker mul) \ 541*77c1e3ccSAndroid Build Coastguard Worker ret aom_ifft1d_8_##suffix(const T *input, T *output, int stride) { \ 542*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight0 = constant(0.0f); \ 543*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight2 = constant(0.707107f); \ 544*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i0 = load(input + 0 * stride); \ 545*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i1 = load(input + 1 * stride); \ 546*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i2 = load(input + 2 * stride); \ 547*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i3 = load(input + 3 * stride); \ 548*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i4 = load(input + 4 * stride); \ 549*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i5 = load(input + 5 * stride); \ 550*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i6 = load(input + 6 * stride); \ 551*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i7 = load(input + 7 * stride); \ 552*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w6 = add(i0, i4); \ 553*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w7 = sub(i0, i4); \ 554*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w8[2] = { add(i2, i2), sub(i6, i6) }; \ 555*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w9[2] = { sub(i2, i2), sub(sub(kWeight0, i6), i6) }; \ 556*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w10[2] = { add(w6, w8[0]), w8[1] }; \ 557*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w11[2] = { sub(w6, w8[0]), sub(kWeight0, w8[1]) }; \ 558*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w12[2] = { add(w7, w9[1]), sub(kWeight0, w9[0]) }; \ 559*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w13[2] = { sub(w7, w9[1]), w9[0] }; \ 560*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w14[2] = { add(i1, i3), sub(i7, i5) }; \ 561*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w15[2] = { sub(i1, i3), sub(sub(kWeight0, i5), i7) }; \ 562*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w16[2] = { add(i3, i1), sub(i5, i7) }; \ 563*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w17[2] = { sub(i3, i1), sub(sub(kWeight0, i7), i5) }; \ 564*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w18[2] = { add(w14[0], w16[0]), add(w14[1], w16[1]) }; \ 565*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w19[2] = { sub(w14[0], w16[0]), sub(w14[1], w16[1]) }; \ 566*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w20[2] = { add(w15[0], w17[1]), sub(w15[1], w17[0]) }; \ 567*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w21[2] = { sub(w15[0], w17[1]), add(w15[1], w17[0]) }; \ 568*77c1e3ccSAndroid Build Coastguard Worker store(output + 0 * stride, add(w10[0], w18[0])); \ 569*77c1e3ccSAndroid Build Coastguard Worker store(output + 1 * stride, \ 570*77c1e3ccSAndroid Build Coastguard Worker add(w12[0], mul(kWeight2, add(w20[0], w20[1])))); \ 571*77c1e3ccSAndroid Build Coastguard Worker store(output + 2 * stride, add(w11[0], w19[1])); \ 572*77c1e3ccSAndroid Build Coastguard Worker store(output + 3 * stride, \ 573*77c1e3ccSAndroid Build Coastguard Worker sub(w13[0], mul(kWeight2, sub(w21[0], w21[1])))); \ 574*77c1e3ccSAndroid Build Coastguard Worker store(output + 4 * stride, sub(w10[0], w18[0])); \ 575*77c1e3ccSAndroid Build Coastguard Worker store(output + 5 * stride, \ 576*77c1e3ccSAndroid Build Coastguard Worker add(w12[0], sub(sub(kWeight0, mul(kWeight2, w20[0])), \ 577*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight2, w20[1])))); \ 578*77c1e3ccSAndroid Build Coastguard Worker store(output + 6 * stride, sub(w11[0], w19[1])); \ 579*77c1e3ccSAndroid Build Coastguard Worker store(output + 7 * stride, \ 580*77c1e3ccSAndroid Build Coastguard Worker add(w13[0], mul(kWeight2, sub(w21[0], w21[1])))); \ 581*77c1e3ccSAndroid Build Coastguard Worker } 582*77c1e3ccSAndroid Build Coastguard Worker 583*77c1e3ccSAndroid Build Coastguard Worker #define GEN_IFFT_16(ret, suffix, T, T_VEC, load, store, constant, add, sub, \ 584*77c1e3ccSAndroid Build Coastguard Worker mul) \ 585*77c1e3ccSAndroid Build Coastguard Worker ret aom_ifft1d_16_##suffix(const T *input, T *output, int stride) { \ 586*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight0 = constant(0.0f); \ 587*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight2 = constant(0.707107f); \ 588*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight3 = constant(0.92388f); \ 589*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight4 = constant(0.382683f); \ 590*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i0 = load(input + 0 * stride); \ 591*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i1 = load(input + 1 * stride); \ 592*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i2 = load(input + 2 * stride); \ 593*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i3 = load(input + 3 * stride); \ 594*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i4 = load(input + 4 * stride); \ 595*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i5 = load(input + 5 * stride); \ 596*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i6 = load(input + 6 * stride); \ 597*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i7 = load(input + 7 * stride); \ 598*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i8 = load(input + 8 * stride); \ 599*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i9 = load(input + 9 * stride); \ 600*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i10 = load(input + 10 * stride); \ 601*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i11 = load(input + 11 * stride); \ 602*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i12 = load(input + 12 * stride); \ 603*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i13 = load(input + 13 * stride); \ 604*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i14 = load(input + 14 * stride); \ 605*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i15 = load(input + 15 * stride); \ 606*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w14 = add(i0, i8); \ 607*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w15 = sub(i0, i8); \ 608*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w16[2] = { add(i4, i4), sub(i12, i12) }; \ 609*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w17[2] = { sub(i4, i4), sub(sub(kWeight0, i12), i12) }; \ 610*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w18[2] = { add(w14, w16[0]), w16[1] }; \ 611*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w19[2] = { sub(w14, w16[0]), sub(kWeight0, w16[1]) }; \ 612*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w20[2] = { add(w15, w17[1]), sub(kWeight0, w17[0]) }; \ 613*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w21[2] = { sub(w15, w17[1]), w17[0] }; \ 614*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w22[2] = { add(i2, i6), sub(i14, i10) }; \ 615*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w23[2] = { sub(i2, i6), sub(sub(kWeight0, i10), i14) }; \ 616*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w24[2] = { add(i6, i2), sub(i10, i14) }; \ 617*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w25[2] = { sub(i6, i2), sub(sub(kWeight0, i14), i10) }; \ 618*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w26[2] = { add(w22[0], w24[0]), add(w22[1], w24[1]) }; \ 619*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w27[2] = { sub(w22[0], w24[0]), sub(w22[1], w24[1]) }; \ 620*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w28[2] = { add(w23[0], w25[1]), sub(w23[1], w25[0]) }; \ 621*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w29[2] = { sub(w23[0], w25[1]), add(w23[1], w25[0]) }; \ 622*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w30[2] = { add(w18[0], w26[0]), add(w18[1], w26[1]) }; \ 623*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w31[2] = { sub(w18[0], w26[0]), sub(w18[1], w26[1]) }; \ 624*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w32[2] = { add(w20[0], mul(kWeight2, add(w28[0], w28[1]))), \ 625*77c1e3ccSAndroid Build Coastguard Worker add(w20[1], mul(kWeight2, sub(w28[1], w28[0]))) }; \ 626*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w33[2] = { add(w20[0], \ 627*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, mul(kWeight2, w28[0])), \ 628*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight2, w28[1]))), \ 629*77c1e3ccSAndroid Build Coastguard Worker add(w20[1], mul(kWeight2, sub(w28[0], w28[1]))) }; \ 630*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w34[2] = { add(w19[0], w27[1]), sub(w19[1], w27[0]) }; \ 631*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w35[2] = { sub(w19[0], w27[1]), add(w19[1], w27[0]) }; \ 632*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w36[2] = { sub(w21[0], mul(kWeight2, sub(w29[0], w29[1]))), \ 633*77c1e3ccSAndroid Build Coastguard Worker sub(w21[1], mul(kWeight2, add(w29[1], w29[0]))) }; \ 634*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w37[2] = { add(w21[0], mul(kWeight2, sub(w29[0], w29[1]))), \ 635*77c1e3ccSAndroid Build Coastguard Worker add(w21[1], mul(kWeight2, add(w29[1], w29[0]))) }; \ 636*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w38[2] = { add(i1, i7), sub(i15, i9) }; \ 637*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w39[2] = { sub(i1, i7), sub(sub(kWeight0, i9), i15) }; \ 638*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w40[2] = { add(i5, i3), sub(i11, i13) }; \ 639*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w41[2] = { sub(i5, i3), sub(sub(kWeight0, i13), i11) }; \ 640*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w42[2] = { add(w38[0], w40[0]), add(w38[1], w40[1]) }; \ 641*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w43[2] = { sub(w38[0], w40[0]), sub(w38[1], w40[1]) }; \ 642*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w44[2] = { add(w39[0], w41[1]), sub(w39[1], w41[0]) }; \ 643*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w45[2] = { sub(w39[0], w41[1]), add(w39[1], w41[0]) }; \ 644*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w46[2] = { add(i3, i5), sub(i13, i11) }; \ 645*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w47[2] = { sub(i3, i5), sub(sub(kWeight0, i11), i13) }; \ 646*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w48[2] = { add(i7, i1), sub(i9, i15) }; \ 647*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w49[2] = { sub(i7, i1), sub(sub(kWeight0, i15), i9) }; \ 648*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w50[2] = { add(w46[0], w48[0]), add(w46[1], w48[1]) }; \ 649*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w51[2] = { sub(w46[0], w48[0]), sub(w46[1], w48[1]) }; \ 650*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w52[2] = { add(w47[0], w49[1]), sub(w47[1], w49[0]) }; \ 651*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w53[2] = { sub(w47[0], w49[1]), add(w47[1], w49[0]) }; \ 652*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w54[2] = { add(w42[0], w50[0]), add(w42[1], w50[1]) }; \ 653*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w55[2] = { sub(w42[0], w50[0]), sub(w42[1], w50[1]) }; \ 654*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w56[2] = { add(w44[0], mul(kWeight2, add(w52[0], w52[1]))), \ 655*77c1e3ccSAndroid Build Coastguard Worker add(w44[1], mul(kWeight2, sub(w52[1], w52[0]))) }; \ 656*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w57[2] = { add(w44[0], \ 657*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, mul(kWeight2, w52[0])), \ 658*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight2, w52[1]))), \ 659*77c1e3ccSAndroid Build Coastguard Worker add(w44[1], mul(kWeight2, sub(w52[0], w52[1]))) }; \ 660*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w58[2] = { add(w43[0], w51[1]), sub(w43[1], w51[0]) }; \ 661*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w59[2] = { sub(w43[0], w51[1]), add(w43[1], w51[0]) }; \ 662*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w60[2] = { sub(w45[0], mul(kWeight2, sub(w53[0], w53[1]))), \ 663*77c1e3ccSAndroid Build Coastguard Worker sub(w45[1], mul(kWeight2, add(w53[1], w53[0]))) }; \ 664*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w61[2] = { add(w45[0], mul(kWeight2, sub(w53[0], w53[1]))), \ 665*77c1e3ccSAndroid Build Coastguard Worker add(w45[1], mul(kWeight2, add(w53[1], w53[0]))) }; \ 666*77c1e3ccSAndroid Build Coastguard Worker store(output + 0 * stride, add(w30[0], w54[0])); \ 667*77c1e3ccSAndroid Build Coastguard Worker store(output + 1 * stride, \ 668*77c1e3ccSAndroid Build Coastguard Worker add(w32[0], add(mul(kWeight3, w56[0]), mul(kWeight4, w56[1])))); \ 669*77c1e3ccSAndroid Build Coastguard Worker store(output + 2 * stride, \ 670*77c1e3ccSAndroid Build Coastguard Worker add(w34[0], mul(kWeight2, add(w58[0], w58[1])))); \ 671*77c1e3ccSAndroid Build Coastguard Worker store(output + 3 * stride, \ 672*77c1e3ccSAndroid Build Coastguard Worker add(w36[0], add(mul(kWeight4, w60[0]), mul(kWeight3, w60[1])))); \ 673*77c1e3ccSAndroid Build Coastguard Worker store(output + 4 * stride, add(w31[0], w55[1])); \ 674*77c1e3ccSAndroid Build Coastguard Worker store(output + 5 * stride, \ 675*77c1e3ccSAndroid Build Coastguard Worker sub(w33[0], sub(mul(kWeight4, w57[0]), mul(kWeight3, w57[1])))); \ 676*77c1e3ccSAndroid Build Coastguard Worker store(output + 6 * stride, \ 677*77c1e3ccSAndroid Build Coastguard Worker sub(w35[0], mul(kWeight2, sub(w59[0], w59[1])))); \ 678*77c1e3ccSAndroid Build Coastguard Worker store(output + 7 * stride, \ 679*77c1e3ccSAndroid Build Coastguard Worker sub(w37[0], sub(mul(kWeight3, w61[0]), mul(kWeight4, w61[1])))); \ 680*77c1e3ccSAndroid Build Coastguard Worker store(output + 8 * stride, sub(w30[0], w54[0])); \ 681*77c1e3ccSAndroid Build Coastguard Worker store(output + 9 * stride, \ 682*77c1e3ccSAndroid Build Coastguard Worker add(w32[0], sub(sub(kWeight0, mul(kWeight3, w56[0])), \ 683*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight4, w56[1])))); \ 684*77c1e3ccSAndroid Build Coastguard Worker store(output + 10 * stride, \ 685*77c1e3ccSAndroid Build Coastguard Worker add(w34[0], sub(sub(kWeight0, mul(kWeight2, w58[0])), \ 686*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight2, w58[1])))); \ 687*77c1e3ccSAndroid Build Coastguard Worker store(output + 11 * stride, \ 688*77c1e3ccSAndroid Build Coastguard Worker add(w36[0], sub(sub(kWeight0, mul(kWeight4, w60[0])), \ 689*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight3, w60[1])))); \ 690*77c1e3ccSAndroid Build Coastguard Worker store(output + 12 * stride, sub(w31[0], w55[1])); \ 691*77c1e3ccSAndroid Build Coastguard Worker store(output + 13 * stride, \ 692*77c1e3ccSAndroid Build Coastguard Worker add(w33[0], sub(mul(kWeight4, w57[0]), mul(kWeight3, w57[1])))); \ 693*77c1e3ccSAndroid Build Coastguard Worker store(output + 14 * stride, \ 694*77c1e3ccSAndroid Build Coastguard Worker add(w35[0], mul(kWeight2, sub(w59[0], w59[1])))); \ 695*77c1e3ccSAndroid Build Coastguard Worker store(output + 15 * stride, \ 696*77c1e3ccSAndroid Build Coastguard Worker add(w37[0], sub(mul(kWeight3, w61[0]), mul(kWeight4, w61[1])))); \ 697*77c1e3ccSAndroid Build Coastguard Worker } 698*77c1e3ccSAndroid Build Coastguard Worker #define GEN_IFFT_32(ret, suffix, T, T_VEC, load, store, constant, add, sub, \ 699*77c1e3ccSAndroid Build Coastguard Worker mul) \ 700*77c1e3ccSAndroid Build Coastguard Worker ret aom_ifft1d_32_##suffix(const T *input, T *output, int stride) { \ 701*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight0 = constant(0.0f); \ 702*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight2 = constant(0.707107f); \ 703*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight3 = constant(0.92388f); \ 704*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight4 = constant(0.382683f); \ 705*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight5 = constant(0.980785f); \ 706*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight6 = constant(0.19509f); \ 707*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight7 = constant(0.83147f); \ 708*77c1e3ccSAndroid Build Coastguard Worker const T_VEC kWeight8 = constant(0.55557f); \ 709*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i0 = load(input + 0 * stride); \ 710*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i1 = load(input + 1 * stride); \ 711*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i2 = load(input + 2 * stride); \ 712*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i3 = load(input + 3 * stride); \ 713*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i4 = load(input + 4 * stride); \ 714*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i5 = load(input + 5 * stride); \ 715*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i6 = load(input + 6 * stride); \ 716*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i7 = load(input + 7 * stride); \ 717*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i8 = load(input + 8 * stride); \ 718*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i9 = load(input + 9 * stride); \ 719*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i10 = load(input + 10 * stride); \ 720*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i11 = load(input + 11 * stride); \ 721*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i12 = load(input + 12 * stride); \ 722*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i13 = load(input + 13 * stride); \ 723*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i14 = load(input + 14 * stride); \ 724*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i15 = load(input + 15 * stride); \ 725*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i16 = load(input + 16 * stride); \ 726*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i17 = load(input + 17 * stride); \ 727*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i18 = load(input + 18 * stride); \ 728*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i19 = load(input + 19 * stride); \ 729*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i20 = load(input + 20 * stride); \ 730*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i21 = load(input + 21 * stride); \ 731*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i22 = load(input + 22 * stride); \ 732*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i23 = load(input + 23 * stride); \ 733*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i24 = load(input + 24 * stride); \ 734*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i25 = load(input + 25 * stride); \ 735*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i26 = load(input + 26 * stride); \ 736*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i27 = load(input + 27 * stride); \ 737*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i28 = load(input + 28 * stride); \ 738*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i29 = load(input + 29 * stride); \ 739*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i30 = load(input + 30 * stride); \ 740*77c1e3ccSAndroid Build Coastguard Worker const T_VEC i31 = load(input + 31 * stride); \ 741*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w30 = add(i0, i16); \ 742*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w31 = sub(i0, i16); \ 743*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w32[2] = { add(i8, i8), sub(i24, i24) }; \ 744*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w33[2] = { sub(i8, i8), sub(sub(kWeight0, i24), i24) }; \ 745*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w34[2] = { add(w30, w32[0]), w32[1] }; \ 746*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w35[2] = { sub(w30, w32[0]), sub(kWeight0, w32[1]) }; \ 747*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w36[2] = { add(w31, w33[1]), sub(kWeight0, w33[0]) }; \ 748*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w37[2] = { sub(w31, w33[1]), w33[0] }; \ 749*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w38[2] = { add(i4, i12), sub(i28, i20) }; \ 750*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w39[2] = { sub(i4, i12), sub(sub(kWeight0, i20), i28) }; \ 751*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w40[2] = { add(i12, i4), sub(i20, i28) }; \ 752*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w41[2] = { sub(i12, i4), sub(sub(kWeight0, i28), i20) }; \ 753*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w42[2] = { add(w38[0], w40[0]), add(w38[1], w40[1]) }; \ 754*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w43[2] = { sub(w38[0], w40[0]), sub(w38[1], w40[1]) }; \ 755*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w44[2] = { add(w39[0], w41[1]), sub(w39[1], w41[0]) }; \ 756*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w45[2] = { sub(w39[0], w41[1]), add(w39[1], w41[0]) }; \ 757*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w46[2] = { add(w34[0], w42[0]), add(w34[1], w42[1]) }; \ 758*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w47[2] = { sub(w34[0], w42[0]), sub(w34[1], w42[1]) }; \ 759*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w48[2] = { add(w36[0], mul(kWeight2, add(w44[0], w44[1]))), \ 760*77c1e3ccSAndroid Build Coastguard Worker add(w36[1], mul(kWeight2, sub(w44[1], w44[0]))) }; \ 761*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w49[2] = { add(w36[0], \ 762*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, mul(kWeight2, w44[0])), \ 763*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight2, w44[1]))), \ 764*77c1e3ccSAndroid Build Coastguard Worker add(w36[1], mul(kWeight2, sub(w44[0], w44[1]))) }; \ 765*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w50[2] = { add(w35[0], w43[1]), sub(w35[1], w43[0]) }; \ 766*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w51[2] = { sub(w35[0], w43[1]), add(w35[1], w43[0]) }; \ 767*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w52[2] = { sub(w37[0], mul(kWeight2, sub(w45[0], w45[1]))), \ 768*77c1e3ccSAndroid Build Coastguard Worker sub(w37[1], mul(kWeight2, add(w45[1], w45[0]))) }; \ 769*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w53[2] = { add(w37[0], mul(kWeight2, sub(w45[0], w45[1]))), \ 770*77c1e3ccSAndroid Build Coastguard Worker add(w37[1], mul(kWeight2, add(w45[1], w45[0]))) }; \ 771*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w54[2] = { add(i2, i14), sub(i30, i18) }; \ 772*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w55[2] = { sub(i2, i14), sub(sub(kWeight0, i18), i30) }; \ 773*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w56[2] = { add(i10, i6), sub(i22, i26) }; \ 774*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w57[2] = { sub(i10, i6), sub(sub(kWeight0, i26), i22) }; \ 775*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w58[2] = { add(w54[0], w56[0]), add(w54[1], w56[1]) }; \ 776*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w59[2] = { sub(w54[0], w56[0]), sub(w54[1], w56[1]) }; \ 777*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w60[2] = { add(w55[0], w57[1]), sub(w55[1], w57[0]) }; \ 778*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w61[2] = { sub(w55[0], w57[1]), add(w55[1], w57[0]) }; \ 779*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w62[2] = { add(i6, i10), sub(i26, i22) }; \ 780*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w63[2] = { sub(i6, i10), sub(sub(kWeight0, i22), i26) }; \ 781*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w64[2] = { add(i14, i2), sub(i18, i30) }; \ 782*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w65[2] = { sub(i14, i2), sub(sub(kWeight0, i30), i18) }; \ 783*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w66[2] = { add(w62[0], w64[0]), add(w62[1], w64[1]) }; \ 784*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w67[2] = { sub(w62[0], w64[0]), sub(w62[1], w64[1]) }; \ 785*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w68[2] = { add(w63[0], w65[1]), sub(w63[1], w65[0]) }; \ 786*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w69[2] = { sub(w63[0], w65[1]), add(w63[1], w65[0]) }; \ 787*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w70[2] = { add(w58[0], w66[0]), add(w58[1], w66[1]) }; \ 788*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w71[2] = { sub(w58[0], w66[0]), sub(w58[1], w66[1]) }; \ 789*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w72[2] = { add(w60[0], mul(kWeight2, add(w68[0], w68[1]))), \ 790*77c1e3ccSAndroid Build Coastguard Worker add(w60[1], mul(kWeight2, sub(w68[1], w68[0]))) }; \ 791*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w73[2] = { add(w60[0], \ 792*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, mul(kWeight2, w68[0])), \ 793*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight2, w68[1]))), \ 794*77c1e3ccSAndroid Build Coastguard Worker add(w60[1], mul(kWeight2, sub(w68[0], w68[1]))) }; \ 795*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w74[2] = { add(w59[0], w67[1]), sub(w59[1], w67[0]) }; \ 796*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w75[2] = { sub(w59[0], w67[1]), add(w59[1], w67[0]) }; \ 797*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w76[2] = { sub(w61[0], mul(kWeight2, sub(w69[0], w69[1]))), \ 798*77c1e3ccSAndroid Build Coastguard Worker sub(w61[1], mul(kWeight2, add(w69[1], w69[0]))) }; \ 799*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w77[2] = { add(w61[0], mul(kWeight2, sub(w69[0], w69[1]))), \ 800*77c1e3ccSAndroid Build Coastguard Worker add(w61[1], mul(kWeight2, add(w69[1], w69[0]))) }; \ 801*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w78[2] = { add(w46[0], w70[0]), add(w46[1], w70[1]) }; \ 802*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w79[2] = { sub(w46[0], w70[0]), sub(w46[1], w70[1]) }; \ 803*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w80[2] = { \ 804*77c1e3ccSAndroid Build Coastguard Worker add(w48[0], add(mul(kWeight3, w72[0]), mul(kWeight4, w72[1]))), \ 805*77c1e3ccSAndroid Build Coastguard Worker add(w48[1], sub(mul(kWeight3, w72[1]), mul(kWeight4, w72[0]))) \ 806*77c1e3ccSAndroid Build Coastguard Worker }; \ 807*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w81[2] = { \ 808*77c1e3ccSAndroid Build Coastguard Worker add(w48[0], \ 809*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, mul(kWeight3, w72[0])), mul(kWeight4, w72[1]))), \ 810*77c1e3ccSAndroid Build Coastguard Worker add(w48[1], sub(mul(kWeight4, w72[0]), mul(kWeight3, w72[1]))) \ 811*77c1e3ccSAndroid Build Coastguard Worker }; \ 812*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w82[2] = { add(w50[0], mul(kWeight2, add(w74[0], w74[1]))), \ 813*77c1e3ccSAndroid Build Coastguard Worker add(w50[1], mul(kWeight2, sub(w74[1], w74[0]))) }; \ 814*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w83[2] = { add(w50[0], \ 815*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, mul(kWeight2, w74[0])), \ 816*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight2, w74[1]))), \ 817*77c1e3ccSAndroid Build Coastguard Worker add(w50[1], mul(kWeight2, sub(w74[0], w74[1]))) }; \ 818*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w84[2] = { \ 819*77c1e3ccSAndroid Build Coastguard Worker add(w52[0], add(mul(kWeight4, w76[0]), mul(kWeight3, w76[1]))), \ 820*77c1e3ccSAndroid Build Coastguard Worker add(w52[1], sub(mul(kWeight4, w76[1]), mul(kWeight3, w76[0]))) \ 821*77c1e3ccSAndroid Build Coastguard Worker }; \ 822*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w85[2] = { \ 823*77c1e3ccSAndroid Build Coastguard Worker add(w52[0], \ 824*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, mul(kWeight4, w76[0])), mul(kWeight3, w76[1]))), \ 825*77c1e3ccSAndroid Build Coastguard Worker add(w52[1], sub(mul(kWeight3, w76[0]), mul(kWeight4, w76[1]))) \ 826*77c1e3ccSAndroid Build Coastguard Worker }; \ 827*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w86[2] = { add(w47[0], w71[1]), sub(w47[1], w71[0]) }; \ 828*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w87[2] = { sub(w47[0], w71[1]), add(w47[1], w71[0]) }; \ 829*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w88[2] = { \ 830*77c1e3ccSAndroid Build Coastguard Worker sub(w49[0], sub(mul(kWeight4, w73[0]), mul(kWeight3, w73[1]))), \ 831*77c1e3ccSAndroid Build Coastguard Worker add(w49[1], \ 832*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, mul(kWeight4, w73[1])), mul(kWeight3, w73[0]))) \ 833*77c1e3ccSAndroid Build Coastguard Worker }; \ 834*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w89[2] = { \ 835*77c1e3ccSAndroid Build Coastguard Worker add(w49[0], sub(mul(kWeight4, w73[0]), mul(kWeight3, w73[1]))), \ 836*77c1e3ccSAndroid Build Coastguard Worker add(w49[1], add(mul(kWeight4, w73[1]), mul(kWeight3, w73[0]))) \ 837*77c1e3ccSAndroid Build Coastguard Worker }; \ 838*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w90[2] = { sub(w51[0], mul(kWeight2, sub(w75[0], w75[1]))), \ 839*77c1e3ccSAndroid Build Coastguard Worker sub(w51[1], mul(kWeight2, add(w75[1], w75[0]))) }; \ 840*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w91[2] = { add(w51[0], mul(kWeight2, sub(w75[0], w75[1]))), \ 841*77c1e3ccSAndroid Build Coastguard Worker add(w51[1], mul(kWeight2, add(w75[1], w75[0]))) }; \ 842*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w92[2] = { \ 843*77c1e3ccSAndroid Build Coastguard Worker sub(w53[0], sub(mul(kWeight3, w77[0]), mul(kWeight4, w77[1]))), \ 844*77c1e3ccSAndroid Build Coastguard Worker add(w53[1], \ 845*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, mul(kWeight3, w77[1])), mul(kWeight4, w77[0]))) \ 846*77c1e3ccSAndroid Build Coastguard Worker }; \ 847*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w93[2] = { \ 848*77c1e3ccSAndroid Build Coastguard Worker add(w53[0], sub(mul(kWeight3, w77[0]), mul(kWeight4, w77[1]))), \ 849*77c1e3ccSAndroid Build Coastguard Worker add(w53[1], add(mul(kWeight3, w77[1]), mul(kWeight4, w77[0]))) \ 850*77c1e3ccSAndroid Build Coastguard Worker }; \ 851*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w94[2] = { add(i1, i15), sub(i31, i17) }; \ 852*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w95[2] = { sub(i1, i15), sub(sub(kWeight0, i17), i31) }; \ 853*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w96[2] = { add(i9, i7), sub(i23, i25) }; \ 854*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w97[2] = { sub(i9, i7), sub(sub(kWeight0, i25), i23) }; \ 855*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w98[2] = { add(w94[0], w96[0]), add(w94[1], w96[1]) }; \ 856*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w99[2] = { sub(w94[0], w96[0]), sub(w94[1], w96[1]) }; \ 857*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w100[2] = { add(w95[0], w97[1]), sub(w95[1], w97[0]) }; \ 858*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w101[2] = { sub(w95[0], w97[1]), add(w95[1], w97[0]) }; \ 859*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w102[2] = { add(i5, i11), sub(i27, i21) }; \ 860*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w103[2] = { sub(i5, i11), sub(sub(kWeight0, i21), i27) }; \ 861*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w104[2] = { add(i13, i3), sub(i19, i29) }; \ 862*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w105[2] = { sub(i13, i3), sub(sub(kWeight0, i29), i19) }; \ 863*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w106[2] = { add(w102[0], w104[0]), add(w102[1], w104[1]) }; \ 864*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w107[2] = { sub(w102[0], w104[0]), sub(w102[1], w104[1]) }; \ 865*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w108[2] = { add(w103[0], w105[1]), sub(w103[1], w105[0]) }; \ 866*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w109[2] = { sub(w103[0], w105[1]), add(w103[1], w105[0]) }; \ 867*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w110[2] = { add(w98[0], w106[0]), add(w98[1], w106[1]) }; \ 868*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w111[2] = { sub(w98[0], w106[0]), sub(w98[1], w106[1]) }; \ 869*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w112[2] = { \ 870*77c1e3ccSAndroid Build Coastguard Worker add(w100[0], mul(kWeight2, add(w108[0], w108[1]))), \ 871*77c1e3ccSAndroid Build Coastguard Worker add(w100[1], mul(kWeight2, sub(w108[1], w108[0]))) \ 872*77c1e3ccSAndroid Build Coastguard Worker }; \ 873*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w113[2] = { \ 874*77c1e3ccSAndroid Build Coastguard Worker add(w100[0], \ 875*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, mul(kWeight2, w108[0])), mul(kWeight2, w108[1]))), \ 876*77c1e3ccSAndroid Build Coastguard Worker add(w100[1], mul(kWeight2, sub(w108[0], w108[1]))) \ 877*77c1e3ccSAndroid Build Coastguard Worker }; \ 878*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w114[2] = { add(w99[0], w107[1]), sub(w99[1], w107[0]) }; \ 879*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w115[2] = { sub(w99[0], w107[1]), add(w99[1], w107[0]) }; \ 880*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w116[2] = { \ 881*77c1e3ccSAndroid Build Coastguard Worker sub(w101[0], mul(kWeight2, sub(w109[0], w109[1]))), \ 882*77c1e3ccSAndroid Build Coastguard Worker sub(w101[1], mul(kWeight2, add(w109[1], w109[0]))) \ 883*77c1e3ccSAndroid Build Coastguard Worker }; \ 884*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w117[2] = { \ 885*77c1e3ccSAndroid Build Coastguard Worker add(w101[0], mul(kWeight2, sub(w109[0], w109[1]))), \ 886*77c1e3ccSAndroid Build Coastguard Worker add(w101[1], mul(kWeight2, add(w109[1], w109[0]))) \ 887*77c1e3ccSAndroid Build Coastguard Worker }; \ 888*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w118[2] = { add(i3, i13), sub(i29, i19) }; \ 889*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w119[2] = { sub(i3, i13), sub(sub(kWeight0, i19), i29) }; \ 890*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w120[2] = { add(i11, i5), sub(i21, i27) }; \ 891*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w121[2] = { sub(i11, i5), sub(sub(kWeight0, i27), i21) }; \ 892*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w122[2] = { add(w118[0], w120[0]), add(w118[1], w120[1]) }; \ 893*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w123[2] = { sub(w118[0], w120[0]), sub(w118[1], w120[1]) }; \ 894*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w124[2] = { add(w119[0], w121[1]), sub(w119[1], w121[0]) }; \ 895*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w125[2] = { sub(w119[0], w121[1]), add(w119[1], w121[0]) }; \ 896*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w126[2] = { add(i7, i9), sub(i25, i23) }; \ 897*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w127[2] = { sub(i7, i9), sub(sub(kWeight0, i23), i25) }; \ 898*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w128[2] = { add(i15, i1), sub(i17, i31) }; \ 899*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w129[2] = { sub(i15, i1), sub(sub(kWeight0, i31), i17) }; \ 900*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w130[2] = { add(w126[0], w128[0]), add(w126[1], w128[1]) }; \ 901*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w131[2] = { sub(w126[0], w128[0]), sub(w126[1], w128[1]) }; \ 902*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w132[2] = { add(w127[0], w129[1]), sub(w127[1], w129[0]) }; \ 903*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w133[2] = { sub(w127[0], w129[1]), add(w127[1], w129[0]) }; \ 904*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w134[2] = { add(w122[0], w130[0]), add(w122[1], w130[1]) }; \ 905*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w135[2] = { sub(w122[0], w130[0]), sub(w122[1], w130[1]) }; \ 906*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w136[2] = { \ 907*77c1e3ccSAndroid Build Coastguard Worker add(w124[0], mul(kWeight2, add(w132[0], w132[1]))), \ 908*77c1e3ccSAndroid Build Coastguard Worker add(w124[1], mul(kWeight2, sub(w132[1], w132[0]))) \ 909*77c1e3ccSAndroid Build Coastguard Worker }; \ 910*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w137[2] = { \ 911*77c1e3ccSAndroid Build Coastguard Worker add(w124[0], \ 912*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, mul(kWeight2, w132[0])), mul(kWeight2, w132[1]))), \ 913*77c1e3ccSAndroid Build Coastguard Worker add(w124[1], mul(kWeight2, sub(w132[0], w132[1]))) \ 914*77c1e3ccSAndroid Build Coastguard Worker }; \ 915*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w138[2] = { add(w123[0], w131[1]), sub(w123[1], w131[0]) }; \ 916*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w139[2] = { sub(w123[0], w131[1]), add(w123[1], w131[0]) }; \ 917*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w140[2] = { \ 918*77c1e3ccSAndroid Build Coastguard Worker sub(w125[0], mul(kWeight2, sub(w133[0], w133[1]))), \ 919*77c1e3ccSAndroid Build Coastguard Worker sub(w125[1], mul(kWeight2, add(w133[1], w133[0]))) \ 920*77c1e3ccSAndroid Build Coastguard Worker }; \ 921*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w141[2] = { \ 922*77c1e3ccSAndroid Build Coastguard Worker add(w125[0], mul(kWeight2, sub(w133[0], w133[1]))), \ 923*77c1e3ccSAndroid Build Coastguard Worker add(w125[1], mul(kWeight2, add(w133[1], w133[0]))) \ 924*77c1e3ccSAndroid Build Coastguard Worker }; \ 925*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w142[2] = { add(w110[0], w134[0]), add(w110[1], w134[1]) }; \ 926*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w143[2] = { sub(w110[0], w134[0]), sub(w110[1], w134[1]) }; \ 927*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w144[2] = { \ 928*77c1e3ccSAndroid Build Coastguard Worker add(w112[0], add(mul(kWeight3, w136[0]), mul(kWeight4, w136[1]))), \ 929*77c1e3ccSAndroid Build Coastguard Worker add(w112[1], sub(mul(kWeight3, w136[1]), mul(kWeight4, w136[0]))) \ 930*77c1e3ccSAndroid Build Coastguard Worker }; \ 931*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w145[2] = { \ 932*77c1e3ccSAndroid Build Coastguard Worker add(w112[0], \ 933*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, mul(kWeight3, w136[0])), mul(kWeight4, w136[1]))), \ 934*77c1e3ccSAndroid Build Coastguard Worker add(w112[1], sub(mul(kWeight4, w136[0]), mul(kWeight3, w136[1]))) \ 935*77c1e3ccSAndroid Build Coastguard Worker }; \ 936*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w146[2] = { \ 937*77c1e3ccSAndroid Build Coastguard Worker add(w114[0], mul(kWeight2, add(w138[0], w138[1]))), \ 938*77c1e3ccSAndroid Build Coastguard Worker add(w114[1], mul(kWeight2, sub(w138[1], w138[0]))) \ 939*77c1e3ccSAndroid Build Coastguard Worker }; \ 940*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w147[2] = { \ 941*77c1e3ccSAndroid Build Coastguard Worker add(w114[0], \ 942*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, mul(kWeight2, w138[0])), mul(kWeight2, w138[1]))), \ 943*77c1e3ccSAndroid Build Coastguard Worker add(w114[1], mul(kWeight2, sub(w138[0], w138[1]))) \ 944*77c1e3ccSAndroid Build Coastguard Worker }; \ 945*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w148[2] = { \ 946*77c1e3ccSAndroid Build Coastguard Worker add(w116[0], add(mul(kWeight4, w140[0]), mul(kWeight3, w140[1]))), \ 947*77c1e3ccSAndroid Build Coastguard Worker add(w116[1], sub(mul(kWeight4, w140[1]), mul(kWeight3, w140[0]))) \ 948*77c1e3ccSAndroid Build Coastguard Worker }; \ 949*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w149[2] = { \ 950*77c1e3ccSAndroid Build Coastguard Worker add(w116[0], \ 951*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, mul(kWeight4, w140[0])), mul(kWeight3, w140[1]))), \ 952*77c1e3ccSAndroid Build Coastguard Worker add(w116[1], sub(mul(kWeight3, w140[0]), mul(kWeight4, w140[1]))) \ 953*77c1e3ccSAndroid Build Coastguard Worker }; \ 954*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w150[2] = { add(w111[0], w135[1]), sub(w111[1], w135[0]) }; \ 955*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w151[2] = { sub(w111[0], w135[1]), add(w111[1], w135[0]) }; \ 956*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w152[2] = { \ 957*77c1e3ccSAndroid Build Coastguard Worker sub(w113[0], sub(mul(kWeight4, w137[0]), mul(kWeight3, w137[1]))), \ 958*77c1e3ccSAndroid Build Coastguard Worker add(w113[1], \ 959*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, mul(kWeight4, w137[1])), mul(kWeight3, w137[0]))) \ 960*77c1e3ccSAndroid Build Coastguard Worker }; \ 961*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w153[2] = { \ 962*77c1e3ccSAndroid Build Coastguard Worker add(w113[0], sub(mul(kWeight4, w137[0]), mul(kWeight3, w137[1]))), \ 963*77c1e3ccSAndroid Build Coastguard Worker add(w113[1], add(mul(kWeight4, w137[1]), mul(kWeight3, w137[0]))) \ 964*77c1e3ccSAndroid Build Coastguard Worker }; \ 965*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w154[2] = { \ 966*77c1e3ccSAndroid Build Coastguard Worker sub(w115[0], mul(kWeight2, sub(w139[0], w139[1]))), \ 967*77c1e3ccSAndroid Build Coastguard Worker sub(w115[1], mul(kWeight2, add(w139[1], w139[0]))) \ 968*77c1e3ccSAndroid Build Coastguard Worker }; \ 969*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w155[2] = { \ 970*77c1e3ccSAndroid Build Coastguard Worker add(w115[0], mul(kWeight2, sub(w139[0], w139[1]))), \ 971*77c1e3ccSAndroid Build Coastguard Worker add(w115[1], mul(kWeight2, add(w139[1], w139[0]))) \ 972*77c1e3ccSAndroid Build Coastguard Worker }; \ 973*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w156[2] = { \ 974*77c1e3ccSAndroid Build Coastguard Worker sub(w117[0], sub(mul(kWeight3, w141[0]), mul(kWeight4, w141[1]))), \ 975*77c1e3ccSAndroid Build Coastguard Worker add(w117[1], \ 976*77c1e3ccSAndroid Build Coastguard Worker sub(sub(kWeight0, mul(kWeight3, w141[1])), mul(kWeight4, w141[0]))) \ 977*77c1e3ccSAndroid Build Coastguard Worker }; \ 978*77c1e3ccSAndroid Build Coastguard Worker const T_VEC w157[2] = { \ 979*77c1e3ccSAndroid Build Coastguard Worker add(w117[0], sub(mul(kWeight3, w141[0]), mul(kWeight4, w141[1]))), \ 980*77c1e3ccSAndroid Build Coastguard Worker add(w117[1], add(mul(kWeight3, w141[1]), mul(kWeight4, w141[0]))) \ 981*77c1e3ccSAndroid Build Coastguard Worker }; \ 982*77c1e3ccSAndroid Build Coastguard Worker store(output + 0 * stride, add(w78[0], w142[0])); \ 983*77c1e3ccSAndroid Build Coastguard Worker store(output + 1 * stride, \ 984*77c1e3ccSAndroid Build Coastguard Worker add(w80[0], add(mul(kWeight5, w144[0]), mul(kWeight6, w144[1])))); \ 985*77c1e3ccSAndroid Build Coastguard Worker store(output + 2 * stride, \ 986*77c1e3ccSAndroid Build Coastguard Worker add(w82[0], add(mul(kWeight3, w146[0]), mul(kWeight4, w146[1])))); \ 987*77c1e3ccSAndroid Build Coastguard Worker store(output + 3 * stride, \ 988*77c1e3ccSAndroid Build Coastguard Worker add(w84[0], add(mul(kWeight7, w148[0]), mul(kWeight8, w148[1])))); \ 989*77c1e3ccSAndroid Build Coastguard Worker store(output + 4 * stride, \ 990*77c1e3ccSAndroid Build Coastguard Worker add(w86[0], mul(kWeight2, add(w150[0], w150[1])))); \ 991*77c1e3ccSAndroid Build Coastguard Worker store(output + 5 * stride, \ 992*77c1e3ccSAndroid Build Coastguard Worker add(w88[0], add(mul(kWeight8, w152[0]), mul(kWeight7, w152[1])))); \ 993*77c1e3ccSAndroid Build Coastguard Worker store(output + 6 * stride, \ 994*77c1e3ccSAndroid Build Coastguard Worker add(w90[0], add(mul(kWeight4, w154[0]), mul(kWeight3, w154[1])))); \ 995*77c1e3ccSAndroid Build Coastguard Worker store(output + 7 * stride, \ 996*77c1e3ccSAndroid Build Coastguard Worker add(w92[0], add(mul(kWeight6, w156[0]), mul(kWeight5, w156[1])))); \ 997*77c1e3ccSAndroid Build Coastguard Worker store(output + 8 * stride, add(w79[0], w143[1])); \ 998*77c1e3ccSAndroid Build Coastguard Worker store(output + 9 * stride, \ 999*77c1e3ccSAndroid Build Coastguard Worker sub(w81[0], sub(mul(kWeight6, w145[0]), mul(kWeight5, w145[1])))); \ 1000*77c1e3ccSAndroid Build Coastguard Worker store(output + 10 * stride, \ 1001*77c1e3ccSAndroid Build Coastguard Worker sub(w83[0], sub(mul(kWeight4, w147[0]), mul(kWeight3, w147[1])))); \ 1002*77c1e3ccSAndroid Build Coastguard Worker store(output + 11 * stride, \ 1003*77c1e3ccSAndroid Build Coastguard Worker sub(w85[0], sub(mul(kWeight8, w149[0]), mul(kWeight7, w149[1])))); \ 1004*77c1e3ccSAndroid Build Coastguard Worker store(output + 12 * stride, \ 1005*77c1e3ccSAndroid Build Coastguard Worker sub(w87[0], mul(kWeight2, sub(w151[0], w151[1])))); \ 1006*77c1e3ccSAndroid Build Coastguard Worker store(output + 13 * stride, \ 1007*77c1e3ccSAndroid Build Coastguard Worker sub(w89[0], sub(mul(kWeight7, w153[0]), mul(kWeight8, w153[1])))); \ 1008*77c1e3ccSAndroid Build Coastguard Worker store(output + 14 * stride, \ 1009*77c1e3ccSAndroid Build Coastguard Worker sub(w91[0], sub(mul(kWeight3, w155[0]), mul(kWeight4, w155[1])))); \ 1010*77c1e3ccSAndroid Build Coastguard Worker store(output + 15 * stride, \ 1011*77c1e3ccSAndroid Build Coastguard Worker sub(w93[0], sub(mul(kWeight5, w157[0]), mul(kWeight6, w157[1])))); \ 1012*77c1e3ccSAndroid Build Coastguard Worker store(output + 16 * stride, sub(w78[0], w142[0])); \ 1013*77c1e3ccSAndroid Build Coastguard Worker store(output + 17 * stride, \ 1014*77c1e3ccSAndroid Build Coastguard Worker add(w80[0], sub(sub(kWeight0, mul(kWeight5, w144[0])), \ 1015*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight6, w144[1])))); \ 1016*77c1e3ccSAndroid Build Coastguard Worker store(output + 18 * stride, \ 1017*77c1e3ccSAndroid Build Coastguard Worker add(w82[0], sub(sub(kWeight0, mul(kWeight3, w146[0])), \ 1018*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight4, w146[1])))); \ 1019*77c1e3ccSAndroid Build Coastguard Worker store(output + 19 * stride, \ 1020*77c1e3ccSAndroid Build Coastguard Worker add(w84[0], sub(sub(kWeight0, mul(kWeight7, w148[0])), \ 1021*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight8, w148[1])))); \ 1022*77c1e3ccSAndroid Build Coastguard Worker store(output + 20 * stride, \ 1023*77c1e3ccSAndroid Build Coastguard Worker add(w86[0], sub(sub(kWeight0, mul(kWeight2, w150[0])), \ 1024*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight2, w150[1])))); \ 1025*77c1e3ccSAndroid Build Coastguard Worker store(output + 21 * stride, \ 1026*77c1e3ccSAndroid Build Coastguard Worker add(w88[0], sub(sub(kWeight0, mul(kWeight8, w152[0])), \ 1027*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight7, w152[1])))); \ 1028*77c1e3ccSAndroid Build Coastguard Worker store(output + 22 * stride, \ 1029*77c1e3ccSAndroid Build Coastguard Worker add(w90[0], sub(sub(kWeight0, mul(kWeight4, w154[0])), \ 1030*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight3, w154[1])))); \ 1031*77c1e3ccSAndroid Build Coastguard Worker store(output + 23 * stride, \ 1032*77c1e3ccSAndroid Build Coastguard Worker add(w92[0], sub(sub(kWeight0, mul(kWeight6, w156[0])), \ 1033*77c1e3ccSAndroid Build Coastguard Worker mul(kWeight5, w156[1])))); \ 1034*77c1e3ccSAndroid Build Coastguard Worker store(output + 24 * stride, sub(w79[0], w143[1])); \ 1035*77c1e3ccSAndroid Build Coastguard Worker store(output + 25 * stride, \ 1036*77c1e3ccSAndroid Build Coastguard Worker add(w81[0], sub(mul(kWeight6, w145[0]), mul(kWeight5, w145[1])))); \ 1037*77c1e3ccSAndroid Build Coastguard Worker store(output + 26 * stride, \ 1038*77c1e3ccSAndroid Build Coastguard Worker add(w83[0], sub(mul(kWeight4, w147[0]), mul(kWeight3, w147[1])))); \ 1039*77c1e3ccSAndroid Build Coastguard Worker store(output + 27 * stride, \ 1040*77c1e3ccSAndroid Build Coastguard Worker add(w85[0], sub(mul(kWeight8, w149[0]), mul(kWeight7, w149[1])))); \ 1041*77c1e3ccSAndroid Build Coastguard Worker store(output + 28 * stride, \ 1042*77c1e3ccSAndroid Build Coastguard Worker add(w87[0], mul(kWeight2, sub(w151[0], w151[1])))); \ 1043*77c1e3ccSAndroid Build Coastguard Worker store(output + 29 * stride, \ 1044*77c1e3ccSAndroid Build Coastguard Worker add(w89[0], sub(mul(kWeight7, w153[0]), mul(kWeight8, w153[1])))); \ 1045*77c1e3ccSAndroid Build Coastguard Worker store(output + 30 * stride, \ 1046*77c1e3ccSAndroid Build Coastguard Worker add(w91[0], sub(mul(kWeight3, w155[0]), mul(kWeight4, w155[1])))); \ 1047*77c1e3ccSAndroid Build Coastguard Worker store(output + 31 * stride, \ 1048*77c1e3ccSAndroid Build Coastguard Worker add(w93[0], sub(mul(kWeight5, w157[0]), mul(kWeight6, w157[1])))); \ 1049*77c1e3ccSAndroid Build Coastguard Worker } 1050*77c1e3ccSAndroid Build Coastguard Worker 1051*77c1e3ccSAndroid Build Coastguard Worker #endif // AOM_AOM_DSP_FFT_COMMON_H_ 1052