xref: /aosp_15_r20/external/libopus/dnn/x86/dnn_x86.h (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1*a58d3d2aSXin Li /* Copyright (c) 2011-2019 Mozilla
2*a58d3d2aSXin Li                  2023 Amazon */
3*a58d3d2aSXin Li /*
4*a58d3d2aSXin Li    Redistribution and use in source and binary forms, with or without
5*a58d3d2aSXin Li    modification, are permitted provided that the following conditions
6*a58d3d2aSXin Li    are met:
7*a58d3d2aSXin Li 
8*a58d3d2aSXin Li    - Redistributions of source code must retain the above copyright
9*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer.
10*a58d3d2aSXin Li 
11*a58d3d2aSXin Li    - Redistributions in binary form must reproduce the above copyright
12*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer in the
13*a58d3d2aSXin Li    documentation and/or other materials provided with the distribution.
14*a58d3d2aSXin Li 
15*a58d3d2aSXin Li    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16*a58d3d2aSXin Li    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17*a58d3d2aSXin Li    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18*a58d3d2aSXin Li    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
19*a58d3d2aSXin Li    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20*a58d3d2aSXin Li    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21*a58d3d2aSXin Li    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22*a58d3d2aSXin Li    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23*a58d3d2aSXin Li    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24*a58d3d2aSXin Li    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*a58d3d2aSXin Li    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*a58d3d2aSXin Li */
27*a58d3d2aSXin Li 
28*a58d3d2aSXin Li #ifndef DNN_X86_H
29*a58d3d2aSXin Li #define DNN_X86_H
30*a58d3d2aSXin Li 
31*a58d3d2aSXin Li #include "cpu_support.h"
32*a58d3d2aSXin Li #include "opus_types.h"
33*a58d3d2aSXin Li 
34*a58d3d2aSXin Li #if defined(OPUS_X86_MAY_HAVE_SSE2)
35*a58d3d2aSXin Li void compute_linear_sse2(const LinearLayer *linear, float *out, const float *in);
36*a58d3d2aSXin Li void compute_activation_sse2(float *output, const float *input, int N, int activation);
37*a58d3d2aSXin Li void compute_conv2d_sse2(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
38*a58d3d2aSXin Li #endif
39*a58d3d2aSXin Li 
40*a58d3d2aSXin Li #if defined(OPUS_X86_MAY_HAVE_SSE4_1)
41*a58d3d2aSXin Li void compute_linear_sse4_1(const LinearLayer *linear, float *out, const float *in);
42*a58d3d2aSXin Li void compute_activation_sse4_1(float *output, const float *input, int N, int activation);
43*a58d3d2aSXin Li void compute_conv2d_sse4_1(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
44*a58d3d2aSXin Li #endif
45*a58d3d2aSXin Li 
46*a58d3d2aSXin Li #if defined(OPUS_X86_MAY_HAVE_AVX2)
47*a58d3d2aSXin Li void compute_linear_avx2(const LinearLayer *linear, float *out, const float *in);
48*a58d3d2aSXin Li void compute_activation_avx2(float *output, const float *input, int N, int activation);
49*a58d3d2aSXin Li void compute_conv2d_avx2(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
50*a58d3d2aSXin Li #endif
51*a58d3d2aSXin Li 
52*a58d3d2aSXin Li 
53*a58d3d2aSXin Li #if defined(OPUS_X86_PRESUME_AVX2)
54*a58d3d2aSXin Li 
55*a58d3d2aSXin Li #define OVERRIDE_COMPUTE_LINEAR
56*a58d3d2aSXin Li #define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_avx2(linear, out, in))
57*a58d3d2aSXin Li #define OVERRIDE_COMPUTE_ACTIVATION
58*a58d3d2aSXin Li #define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_avx2(output, input, N, activation))
59*a58d3d2aSXin Li #define OVERRIDE_COMPUTE_CONV2D
60*a58d3d2aSXin Li #define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_avx2(conv, out, mem, in, height, hstride, activation))
61*a58d3d2aSXin Li 
62*a58d3d2aSXin Li #elif defined(OPUS_X86_PRESUME_SSE4_1) && !defined(OPUS_X86_MAY_HAVE_AVX2)
63*a58d3d2aSXin Li 
64*a58d3d2aSXin Li #define OVERRIDE_COMPUTE_LINEAR
65*a58d3d2aSXin Li #define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_sse4_1(linear, out, in))
66*a58d3d2aSXin Li #define OVERRIDE_COMPUTE_ACTIVATION
67*a58d3d2aSXin Li #define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_sse4_1(output, input, N, activation))
68*a58d3d2aSXin Li #define OVERRIDE_COMPUTE_CONV2D
69*a58d3d2aSXin Li #define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_sse4_1(conv, out, mem, in, height, hstride, activation))
70*a58d3d2aSXin Li 
71*a58d3d2aSXin Li #elif defined(OPUS_X86_PRESUME_SSE2) && !defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_MAY_HAVE_SSE4_1)
72*a58d3d2aSXin Li 
73*a58d3d2aSXin Li #define OVERRIDE_COMPUTE_LINEAR
74*a58d3d2aSXin Li #define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_sse2(linear, out, in))
75*a58d3d2aSXin Li #define OVERRIDE_COMPUTE_ACTIVATION
76*a58d3d2aSXin Li #define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_sse2(output, input, N, activation))
77*a58d3d2aSXin Li #define OVERRIDE_COMPUTE_CONV2D
78*a58d3d2aSXin Li #define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_sse2(conv, out, mem, in, height, hstride, activation))
79*a58d3d2aSXin Li 
80*a58d3d2aSXin Li #elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_X86_MAY_HAVE_AVX2) || defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2))
81*a58d3d2aSXin Li 
82*a58d3d2aSXin Li extern void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
83*a58d3d2aSXin Li                     const LinearLayer *linear,
84*a58d3d2aSXin Li                     float *out,
85*a58d3d2aSXin Li                     const float *in
86*a58d3d2aSXin Li                     );
87*a58d3d2aSXin Li #define OVERRIDE_COMPUTE_LINEAR
88*a58d3d2aSXin Li #define compute_linear(linear, out, in, arch) \
89*a58d3d2aSXin Li     ((*DNN_COMPUTE_LINEAR_IMPL[(arch) & OPUS_ARCHMASK])(linear, out, in))
90*a58d3d2aSXin Li 
91*a58d3d2aSXin Li 
92*a58d3d2aSXin Li extern void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
93*a58d3d2aSXin Li                     float *output,
94*a58d3d2aSXin Li                     const float *input,
95*a58d3d2aSXin Li                     int N,
96*a58d3d2aSXin Li                     int activation
97*a58d3d2aSXin Li                     );
98*a58d3d2aSXin Li #define OVERRIDE_COMPUTE_ACTIVATION
99*a58d3d2aSXin Li #define compute_activation(output, input, N, activation, arch) \
100*a58d3d2aSXin Li     ((*DNN_COMPUTE_ACTIVATION_IMPL[(arch) & OPUS_ARCHMASK])(output, input, N, activation))
101*a58d3d2aSXin Li 
102*a58d3d2aSXin Li 
103*a58d3d2aSXin Li extern void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
104*a58d3d2aSXin Li                     const Conv2dLayer *conv,
105*a58d3d2aSXin Li                     float *out,
106*a58d3d2aSXin Li                     float *mem,
107*a58d3d2aSXin Li                     const float *in,
108*a58d3d2aSXin Li                     int height,
109*a58d3d2aSXin Li                     int hstride,
110*a58d3d2aSXin Li                     int activation
111*a58d3d2aSXin Li                     );
112*a58d3d2aSXin Li #define OVERRIDE_COMPUTE_CONV2D
113*a58d3d2aSXin Li #define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) \
114*a58d3d2aSXin Li     ((*DNN_COMPUTE_CONV2D_IMPL[(arch) & OPUS_ARCHMASK])(conv, out, mem, in, height, hstride, activation))
115*a58d3d2aSXin Li 
116*a58d3d2aSXin Li 
117*a58d3d2aSXin Li #endif
118*a58d3d2aSXin Li 
119*a58d3d2aSXin Li 
120*a58d3d2aSXin Li 
121*a58d3d2aSXin Li #endif /* DNN_X86_H */
122