xref: /aosp_15_r20/external/libopus/celt/arm/celt_fft_ne10.c (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1*a58d3d2aSXin Li /* Copyright (c) 2015 Xiph.Org Foundation
2*a58d3d2aSXin Li    Written by Viswanath Puttagunta */
3*a58d3d2aSXin Li /**
4*a58d3d2aSXin Li    @file celt_fft_ne10.c
5*a58d3d2aSXin Li    @brief ARM Neon optimizations for fft using NE10 library
6*a58d3d2aSXin Li  */
7*a58d3d2aSXin Li 
8*a58d3d2aSXin Li /*
9*a58d3d2aSXin Li    Redistribution and use in source and binary forms, with or without
10*a58d3d2aSXin Li    modification, are permitted provided that the following conditions
11*a58d3d2aSXin Li    are met:
12*a58d3d2aSXin Li 
13*a58d3d2aSXin Li    - Redistributions of source code must retain the above copyright
14*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer.
15*a58d3d2aSXin Li 
16*a58d3d2aSXin Li    - Redistributions in binary form must reproduce the above copyright
17*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer in the
18*a58d3d2aSXin Li    documentation and/or other materials provided with the distribution.
19*a58d3d2aSXin Li 
20*a58d3d2aSXin Li    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21*a58d3d2aSXin Li    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22*a58d3d2aSXin Li    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23*a58d3d2aSXin Li    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
24*a58d3d2aSXin Li    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25*a58d3d2aSXin Li    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26*a58d3d2aSXin Li    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27*a58d3d2aSXin Li    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28*a58d3d2aSXin Li    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29*a58d3d2aSXin Li    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30*a58d3d2aSXin Li    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*a58d3d2aSXin Li */
32*a58d3d2aSXin Li 
33*a58d3d2aSXin Li #ifndef SKIP_CONFIG_H
34*a58d3d2aSXin Li #ifdef HAVE_CONFIG_H
35*a58d3d2aSXin Li #include "config.h"
36*a58d3d2aSXin Li #endif
37*a58d3d2aSXin Li #endif
38*a58d3d2aSXin Li 
39*a58d3d2aSXin Li #include <NE10_dsp.h>
40*a58d3d2aSXin Li #include "os_support.h"
41*a58d3d2aSXin Li #include "kiss_fft.h"
42*a58d3d2aSXin Li #include "stack_alloc.h"
43*a58d3d2aSXin Li 
44*a58d3d2aSXin Li #if !defined(FIXED_POINT)
45*a58d3d2aSXin Li # define NE10_FFT_ALLOC_C2C_TYPE_NEON ne10_fft_alloc_c2c_float32_neon
46*a58d3d2aSXin Li # define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_float32_t
47*a58d3d2aSXin Li # define NE10_FFT_STATE_TYPE_T ne10_fft_state_float32_t
48*a58d3d2aSXin Li # define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_float32
49*a58d3d2aSXin Li # define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_float32_t
50*a58d3d2aSXin Li # define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_float32_neon
51*a58d3d2aSXin Li #else
52*a58d3d2aSXin Li # define NE10_FFT_ALLOC_C2C_TYPE_NEON(nfft) ne10_fft_alloc_c2c_int32_neon(nfft)
53*a58d3d2aSXin Li # define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_int32_t
54*a58d3d2aSXin Li # define NE10_FFT_STATE_TYPE_T ne10_fft_state_int32_t
55*a58d3d2aSXin Li # define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
56*a58d3d2aSXin Li # define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
57*a58d3d2aSXin Li # define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_int32_t
58*a58d3d2aSXin Li # define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_int32_neon
59*a58d3d2aSXin Li #endif
60*a58d3d2aSXin Li 
61*a58d3d2aSXin Li #if defined(CUSTOM_MODES)
62*a58d3d2aSXin Li 
63*a58d3d2aSXin Li /* nfft lengths in NE10 that support scaled fft */
64*a58d3d2aSXin Li # define NE10_FFTSCALED_SUPPORT_MAX 4
65*a58d3d2aSXin Li static const int ne10_fft_scaled_support[NE10_FFTSCALED_SUPPORT_MAX] = {
66*a58d3d2aSXin Li    480, 240, 120, 60
67*a58d3d2aSXin Li };
68*a58d3d2aSXin Li 
opus_fft_alloc_arm_neon(kiss_fft_state * st)69*a58d3d2aSXin Li int opus_fft_alloc_arm_neon(kiss_fft_state *st)
70*a58d3d2aSXin Li {
71*a58d3d2aSXin Li    int i;
72*a58d3d2aSXin Li    size_t memneeded = sizeof(struct arch_fft_state);
73*a58d3d2aSXin Li 
74*a58d3d2aSXin Li    st->arch_fft = (arch_fft_state *)opus_alloc(memneeded);
75*a58d3d2aSXin Li    if (!st->arch_fft)
76*a58d3d2aSXin Li       return -1;
77*a58d3d2aSXin Li 
78*a58d3d2aSXin Li    for (i = 0; i < NE10_FFTSCALED_SUPPORT_MAX; i++) {
79*a58d3d2aSXin Li       if(st->nfft == ne10_fft_scaled_support[i])
80*a58d3d2aSXin Li          break;
81*a58d3d2aSXin Li    }
82*a58d3d2aSXin Li    if (i == NE10_FFTSCALED_SUPPORT_MAX) {
83*a58d3d2aSXin Li       /* This nfft length (scaled fft) is not supported in NE10 */
84*a58d3d2aSXin Li       st->arch_fft->is_supported = 0;
85*a58d3d2aSXin Li       st->arch_fft->priv = NULL;
86*a58d3d2aSXin Li    }
87*a58d3d2aSXin Li    else {
88*a58d3d2aSXin Li       st->arch_fft->is_supported = 1;
89*a58d3d2aSXin Li       st->arch_fft->priv = (void *)NE10_FFT_ALLOC_C2C_TYPE_NEON(st->nfft);
90*a58d3d2aSXin Li       if (st->arch_fft->priv == NULL) {
91*a58d3d2aSXin Li          return -1;
92*a58d3d2aSXin Li       }
93*a58d3d2aSXin Li    }
94*a58d3d2aSXin Li    return 0;
95*a58d3d2aSXin Li }
96*a58d3d2aSXin Li 
opus_fft_free_arm_neon(kiss_fft_state * st)97*a58d3d2aSXin Li void opus_fft_free_arm_neon(kiss_fft_state *st)
98*a58d3d2aSXin Li {
99*a58d3d2aSXin Li    NE10_FFT_CFG_TYPE_T cfg;
100*a58d3d2aSXin Li 
101*a58d3d2aSXin Li    if (!st->arch_fft)
102*a58d3d2aSXin Li       return;
103*a58d3d2aSXin Li 
104*a58d3d2aSXin Li    cfg = (NE10_FFT_CFG_TYPE_T)st->arch_fft->priv;
105*a58d3d2aSXin Li    if (cfg)
106*a58d3d2aSXin Li       NE10_FFT_DESTROY_C2C_TYPE(cfg);
107*a58d3d2aSXin Li    opus_free(st->arch_fft);
108*a58d3d2aSXin Li }
109*a58d3d2aSXin Li #endif
110*a58d3d2aSXin Li 
opus_fft_neon(const kiss_fft_state * st,const kiss_fft_cpx * fin,kiss_fft_cpx * fout)111*a58d3d2aSXin Li void opus_fft_neon(const kiss_fft_state *st,
112*a58d3d2aSXin Li                    const kiss_fft_cpx *fin,
113*a58d3d2aSXin Li                    kiss_fft_cpx *fout)
114*a58d3d2aSXin Li {
115*a58d3d2aSXin Li    NE10_FFT_STATE_TYPE_T state;
116*a58d3d2aSXin Li    NE10_FFT_CFG_TYPE_T cfg = &state;
117*a58d3d2aSXin Li    VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
118*a58d3d2aSXin Li    SAVE_STACK;
119*a58d3d2aSXin Li    ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
120*a58d3d2aSXin Li 
121*a58d3d2aSXin Li    if (!st->arch_fft->is_supported) {
122*a58d3d2aSXin Li       /* This nfft length (scaled fft) not supported in NE10 */
123*a58d3d2aSXin Li       opus_fft_c(st, fin, fout);
124*a58d3d2aSXin Li    }
125*a58d3d2aSXin Li    else {
126*a58d3d2aSXin Li       memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
127*a58d3d2aSXin Li       state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
128*a58d3d2aSXin Li #if !defined(FIXED_POINT)
129*a58d3d2aSXin Li       state.is_forward_scaled = 1;
130*a58d3d2aSXin Li 
131*a58d3d2aSXin Li       NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
132*a58d3d2aSXin Li                                 (NE10_FFT_CPX_TYPE_T *)fin,
133*a58d3d2aSXin Li                                 cfg, 0);
134*a58d3d2aSXin Li #else
135*a58d3d2aSXin Li       NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
136*a58d3d2aSXin Li                                 (NE10_FFT_CPX_TYPE_T *)fin,
137*a58d3d2aSXin Li                                 cfg, 0, 1);
138*a58d3d2aSXin Li #endif
139*a58d3d2aSXin Li    }
140*a58d3d2aSXin Li    RESTORE_STACK;
141*a58d3d2aSXin Li }
142*a58d3d2aSXin Li 
opus_ifft_neon(const kiss_fft_state * st,const kiss_fft_cpx * fin,kiss_fft_cpx * fout)143*a58d3d2aSXin Li void opus_ifft_neon(const kiss_fft_state *st,
144*a58d3d2aSXin Li                     const kiss_fft_cpx *fin,
145*a58d3d2aSXin Li                     kiss_fft_cpx *fout)
146*a58d3d2aSXin Li {
147*a58d3d2aSXin Li    NE10_FFT_STATE_TYPE_T state;
148*a58d3d2aSXin Li    NE10_FFT_CFG_TYPE_T cfg = &state;
149*a58d3d2aSXin Li    VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
150*a58d3d2aSXin Li    SAVE_STACK;
151*a58d3d2aSXin Li    ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
152*a58d3d2aSXin Li 
153*a58d3d2aSXin Li    if (!st->arch_fft->is_supported) {
154*a58d3d2aSXin Li       /* This nfft length (scaled fft) not supported in NE10 */
155*a58d3d2aSXin Li       opus_ifft_c(st, fin, fout);
156*a58d3d2aSXin Li    }
157*a58d3d2aSXin Li    else {
158*a58d3d2aSXin Li       memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
159*a58d3d2aSXin Li       state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
160*a58d3d2aSXin Li #if !defined(FIXED_POINT)
161*a58d3d2aSXin Li       state.is_backward_scaled = 0;
162*a58d3d2aSXin Li 
163*a58d3d2aSXin Li       NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
164*a58d3d2aSXin Li                                 (NE10_FFT_CPX_TYPE_T *)fin,
165*a58d3d2aSXin Li                                 cfg, 1);
166*a58d3d2aSXin Li #else
167*a58d3d2aSXin Li       NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
168*a58d3d2aSXin Li                                 (NE10_FFT_CPX_TYPE_T *)fin,
169*a58d3d2aSXin Li                                 cfg, 1, 0);
170*a58d3d2aSXin Li #endif
171*a58d3d2aSXin Li    }
172*a58d3d2aSXin Li    RESTORE_STACK;
173*a58d3d2aSXin Li }
174