1*a58d3d2aSXin Li /* Copyright (c) 2015 Xiph.Org Foundation
2*a58d3d2aSXin Li Written by Viswanath Puttagunta */
3*a58d3d2aSXin Li /**
4*a58d3d2aSXin Li @file celt_fft_ne10.c
5*a58d3d2aSXin Li @brief ARM Neon optimizations for fft using NE10 library
6*a58d3d2aSXin Li */
7*a58d3d2aSXin Li
8*a58d3d2aSXin Li /*
9*a58d3d2aSXin Li Redistribution and use in source and binary forms, with or without
10*a58d3d2aSXin Li modification, are permitted provided that the following conditions
11*a58d3d2aSXin Li are met:
12*a58d3d2aSXin Li
13*a58d3d2aSXin Li - Redistributions of source code must retain the above copyright
14*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer.
15*a58d3d2aSXin Li
16*a58d3d2aSXin Li - Redistributions in binary form must reproduce the above copyright
17*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer in the
18*a58d3d2aSXin Li documentation and/or other materials provided with the distribution.
19*a58d3d2aSXin Li
20*a58d3d2aSXin Li THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21*a58d3d2aSXin Li ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22*a58d3d2aSXin Li LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23*a58d3d2aSXin Li A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
24*a58d3d2aSXin Li OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25*a58d3d2aSXin Li EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26*a58d3d2aSXin Li PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27*a58d3d2aSXin Li PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28*a58d3d2aSXin Li LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29*a58d3d2aSXin Li NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30*a58d3d2aSXin Li SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*a58d3d2aSXin Li */
32*a58d3d2aSXin Li
33*a58d3d2aSXin Li #ifndef SKIP_CONFIG_H
34*a58d3d2aSXin Li #ifdef HAVE_CONFIG_H
35*a58d3d2aSXin Li #include "config.h"
36*a58d3d2aSXin Li #endif
37*a58d3d2aSXin Li #endif
38*a58d3d2aSXin Li
39*a58d3d2aSXin Li #include <NE10_dsp.h>
40*a58d3d2aSXin Li #include "os_support.h"
41*a58d3d2aSXin Li #include "kiss_fft.h"
42*a58d3d2aSXin Li #include "stack_alloc.h"
43*a58d3d2aSXin Li
44*a58d3d2aSXin Li #if !defined(FIXED_POINT)
45*a58d3d2aSXin Li # define NE10_FFT_ALLOC_C2C_TYPE_NEON ne10_fft_alloc_c2c_float32_neon
46*a58d3d2aSXin Li # define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_float32_t
47*a58d3d2aSXin Li # define NE10_FFT_STATE_TYPE_T ne10_fft_state_float32_t
48*a58d3d2aSXin Li # define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_float32
49*a58d3d2aSXin Li # define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_float32_t
50*a58d3d2aSXin Li # define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_float32_neon
51*a58d3d2aSXin Li #else
52*a58d3d2aSXin Li # define NE10_FFT_ALLOC_C2C_TYPE_NEON(nfft) ne10_fft_alloc_c2c_int32_neon(nfft)
53*a58d3d2aSXin Li # define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_int32_t
54*a58d3d2aSXin Li # define NE10_FFT_STATE_TYPE_T ne10_fft_state_int32_t
55*a58d3d2aSXin Li # define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
56*a58d3d2aSXin Li # define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
57*a58d3d2aSXin Li # define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_int32_t
58*a58d3d2aSXin Li # define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_int32_neon
59*a58d3d2aSXin Li #endif
60*a58d3d2aSXin Li
61*a58d3d2aSXin Li #if defined(CUSTOM_MODES)
62*a58d3d2aSXin Li
63*a58d3d2aSXin Li /* nfft lengths in NE10 that support scaled fft */
64*a58d3d2aSXin Li # define NE10_FFTSCALED_SUPPORT_MAX 4
65*a58d3d2aSXin Li static const int ne10_fft_scaled_support[NE10_FFTSCALED_SUPPORT_MAX] = {
66*a58d3d2aSXin Li 480, 240, 120, 60
67*a58d3d2aSXin Li };
68*a58d3d2aSXin Li
opus_fft_alloc_arm_neon(kiss_fft_state * st)69*a58d3d2aSXin Li int opus_fft_alloc_arm_neon(kiss_fft_state *st)
70*a58d3d2aSXin Li {
71*a58d3d2aSXin Li int i;
72*a58d3d2aSXin Li size_t memneeded = sizeof(struct arch_fft_state);
73*a58d3d2aSXin Li
74*a58d3d2aSXin Li st->arch_fft = (arch_fft_state *)opus_alloc(memneeded);
75*a58d3d2aSXin Li if (!st->arch_fft)
76*a58d3d2aSXin Li return -1;
77*a58d3d2aSXin Li
78*a58d3d2aSXin Li for (i = 0; i < NE10_FFTSCALED_SUPPORT_MAX; i++) {
79*a58d3d2aSXin Li if(st->nfft == ne10_fft_scaled_support[i])
80*a58d3d2aSXin Li break;
81*a58d3d2aSXin Li }
82*a58d3d2aSXin Li if (i == NE10_FFTSCALED_SUPPORT_MAX) {
83*a58d3d2aSXin Li /* This nfft length (scaled fft) is not supported in NE10 */
84*a58d3d2aSXin Li st->arch_fft->is_supported = 0;
85*a58d3d2aSXin Li st->arch_fft->priv = NULL;
86*a58d3d2aSXin Li }
87*a58d3d2aSXin Li else {
88*a58d3d2aSXin Li st->arch_fft->is_supported = 1;
89*a58d3d2aSXin Li st->arch_fft->priv = (void *)NE10_FFT_ALLOC_C2C_TYPE_NEON(st->nfft);
90*a58d3d2aSXin Li if (st->arch_fft->priv == NULL) {
91*a58d3d2aSXin Li return -1;
92*a58d3d2aSXin Li }
93*a58d3d2aSXin Li }
94*a58d3d2aSXin Li return 0;
95*a58d3d2aSXin Li }
96*a58d3d2aSXin Li
opus_fft_free_arm_neon(kiss_fft_state * st)97*a58d3d2aSXin Li void opus_fft_free_arm_neon(kiss_fft_state *st)
98*a58d3d2aSXin Li {
99*a58d3d2aSXin Li NE10_FFT_CFG_TYPE_T cfg;
100*a58d3d2aSXin Li
101*a58d3d2aSXin Li if (!st->arch_fft)
102*a58d3d2aSXin Li return;
103*a58d3d2aSXin Li
104*a58d3d2aSXin Li cfg = (NE10_FFT_CFG_TYPE_T)st->arch_fft->priv;
105*a58d3d2aSXin Li if (cfg)
106*a58d3d2aSXin Li NE10_FFT_DESTROY_C2C_TYPE(cfg);
107*a58d3d2aSXin Li opus_free(st->arch_fft);
108*a58d3d2aSXin Li }
109*a58d3d2aSXin Li #endif
110*a58d3d2aSXin Li
opus_fft_neon(const kiss_fft_state * st,const kiss_fft_cpx * fin,kiss_fft_cpx * fout)111*a58d3d2aSXin Li void opus_fft_neon(const kiss_fft_state *st,
112*a58d3d2aSXin Li const kiss_fft_cpx *fin,
113*a58d3d2aSXin Li kiss_fft_cpx *fout)
114*a58d3d2aSXin Li {
115*a58d3d2aSXin Li NE10_FFT_STATE_TYPE_T state;
116*a58d3d2aSXin Li NE10_FFT_CFG_TYPE_T cfg = &state;
117*a58d3d2aSXin Li VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
118*a58d3d2aSXin Li SAVE_STACK;
119*a58d3d2aSXin Li ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
120*a58d3d2aSXin Li
121*a58d3d2aSXin Li if (!st->arch_fft->is_supported) {
122*a58d3d2aSXin Li /* This nfft length (scaled fft) not supported in NE10 */
123*a58d3d2aSXin Li opus_fft_c(st, fin, fout);
124*a58d3d2aSXin Li }
125*a58d3d2aSXin Li else {
126*a58d3d2aSXin Li memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
127*a58d3d2aSXin Li state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
128*a58d3d2aSXin Li #if !defined(FIXED_POINT)
129*a58d3d2aSXin Li state.is_forward_scaled = 1;
130*a58d3d2aSXin Li
131*a58d3d2aSXin Li NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
132*a58d3d2aSXin Li (NE10_FFT_CPX_TYPE_T *)fin,
133*a58d3d2aSXin Li cfg, 0);
134*a58d3d2aSXin Li #else
135*a58d3d2aSXin Li NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
136*a58d3d2aSXin Li (NE10_FFT_CPX_TYPE_T *)fin,
137*a58d3d2aSXin Li cfg, 0, 1);
138*a58d3d2aSXin Li #endif
139*a58d3d2aSXin Li }
140*a58d3d2aSXin Li RESTORE_STACK;
141*a58d3d2aSXin Li }
142*a58d3d2aSXin Li
opus_ifft_neon(const kiss_fft_state * st,const kiss_fft_cpx * fin,kiss_fft_cpx * fout)143*a58d3d2aSXin Li void opus_ifft_neon(const kiss_fft_state *st,
144*a58d3d2aSXin Li const kiss_fft_cpx *fin,
145*a58d3d2aSXin Li kiss_fft_cpx *fout)
146*a58d3d2aSXin Li {
147*a58d3d2aSXin Li NE10_FFT_STATE_TYPE_T state;
148*a58d3d2aSXin Li NE10_FFT_CFG_TYPE_T cfg = &state;
149*a58d3d2aSXin Li VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
150*a58d3d2aSXin Li SAVE_STACK;
151*a58d3d2aSXin Li ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
152*a58d3d2aSXin Li
153*a58d3d2aSXin Li if (!st->arch_fft->is_supported) {
154*a58d3d2aSXin Li /* This nfft length (scaled fft) not supported in NE10 */
155*a58d3d2aSXin Li opus_ifft_c(st, fin, fout);
156*a58d3d2aSXin Li }
157*a58d3d2aSXin Li else {
158*a58d3d2aSXin Li memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
159*a58d3d2aSXin Li state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
160*a58d3d2aSXin Li #if !defined(FIXED_POINT)
161*a58d3d2aSXin Li state.is_backward_scaled = 0;
162*a58d3d2aSXin Li
163*a58d3d2aSXin Li NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
164*a58d3d2aSXin Li (NE10_FFT_CPX_TYPE_T *)fin,
165*a58d3d2aSXin Li cfg, 1);
166*a58d3d2aSXin Li #else
167*a58d3d2aSXin Li NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
168*a58d3d2aSXin Li (NE10_FFT_CPX_TYPE_T *)fin,
169*a58d3d2aSXin Li cfg, 1, 0);
170*a58d3d2aSXin Li #endif
171*a58d3d2aSXin Li }
172*a58d3d2aSXin Li RESTORE_STACK;
173*a58d3d2aSXin Li }
174