1*a58d3d2aSXin Li /* Copyright (c) 2011-2019 Mozilla 2*a58d3d2aSXin Li 2023 Amazon */ 3*a58d3d2aSXin Li /* 4*a58d3d2aSXin Li Redistribution and use in source and binary forms, with or without 5*a58d3d2aSXin Li modification, are permitted provided that the following conditions 6*a58d3d2aSXin Li are met: 7*a58d3d2aSXin Li 8*a58d3d2aSXin Li - Redistributions of source code must retain the above copyright 9*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer. 10*a58d3d2aSXin Li 11*a58d3d2aSXin Li - Redistributions in binary form must reproduce the above copyright 12*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer in the 13*a58d3d2aSXin Li documentation and/or other materials provided with the distribution. 14*a58d3d2aSXin Li 15*a58d3d2aSXin Li THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16*a58d3d2aSXin Li ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17*a58d3d2aSXin Li LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18*a58d3d2aSXin Li A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 19*a58d3d2aSXin Li CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20*a58d3d2aSXin Li EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21*a58d3d2aSXin Li PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22*a58d3d2aSXin Li PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 23*a58d3d2aSXin Li LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 24*a58d3d2aSXin Li NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25*a58d3d2aSXin Li SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26*a58d3d2aSXin Li */ 27*a58d3d2aSXin Li 28*a58d3d2aSXin Li #ifndef DNN_X86_H 29*a58d3d2aSXin Li #define DNN_X86_H 30*a58d3d2aSXin Li 31*a58d3d2aSXin Li #include "cpu_support.h" 32*a58d3d2aSXin Li #include "opus_types.h" 33*a58d3d2aSXin Li 34*a58d3d2aSXin Li #if defined(OPUS_X86_MAY_HAVE_SSE2) 35*a58d3d2aSXin Li void compute_linear_sse2(const LinearLayer *linear, float *out, const float *in); 36*a58d3d2aSXin Li void compute_activation_sse2(float *output, const float *input, int N, int activation); 37*a58d3d2aSXin Li void compute_conv2d_sse2(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation); 38*a58d3d2aSXin Li #endif 39*a58d3d2aSXin Li 40*a58d3d2aSXin Li #if defined(OPUS_X86_MAY_HAVE_SSE4_1) 41*a58d3d2aSXin Li void compute_linear_sse4_1(const LinearLayer *linear, float *out, const float *in); 42*a58d3d2aSXin Li void compute_activation_sse4_1(float *output, const float *input, int N, int activation); 43*a58d3d2aSXin Li void compute_conv2d_sse4_1(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation); 44*a58d3d2aSXin Li #endif 45*a58d3d2aSXin Li 46*a58d3d2aSXin Li #if defined(OPUS_X86_MAY_HAVE_AVX2) 47*a58d3d2aSXin Li void compute_linear_avx2(const LinearLayer *linear, float *out, const float *in); 48*a58d3d2aSXin Li void compute_activation_avx2(float *output, const float *input, int N, int activation); 49*a58d3d2aSXin Li void compute_conv2d_avx2(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation); 50*a58d3d2aSXin Li #endif 51*a58d3d2aSXin Li 52*a58d3d2aSXin Li 53*a58d3d2aSXin Li #if defined(OPUS_X86_PRESUME_AVX2) 54*a58d3d2aSXin Li 55*a58d3d2aSXin Li #define OVERRIDE_COMPUTE_LINEAR 56*a58d3d2aSXin Li #define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_avx2(linear, out, in)) 57*a58d3d2aSXin Li #define OVERRIDE_COMPUTE_ACTIVATION 58*a58d3d2aSXin Li #define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_avx2(output, input, N, activation)) 59*a58d3d2aSXin Li #define OVERRIDE_COMPUTE_CONV2D 60*a58d3d2aSXin Li #define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_avx2(conv, out, mem, in, height, hstride, activation)) 61*a58d3d2aSXin Li 62*a58d3d2aSXin Li #elif defined(OPUS_X86_PRESUME_SSE4_1) && !defined(OPUS_X86_MAY_HAVE_AVX2) 63*a58d3d2aSXin Li 64*a58d3d2aSXin Li #define OVERRIDE_COMPUTE_LINEAR 65*a58d3d2aSXin Li #define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_sse4_1(linear, out, in)) 66*a58d3d2aSXin Li #define OVERRIDE_COMPUTE_ACTIVATION 67*a58d3d2aSXin Li #define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_sse4_1(output, input, N, activation)) 68*a58d3d2aSXin Li #define OVERRIDE_COMPUTE_CONV2D 69*a58d3d2aSXin Li #define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_sse4_1(conv, out, mem, in, height, hstride, activation)) 70*a58d3d2aSXin Li 71*a58d3d2aSXin Li #elif defined(OPUS_X86_PRESUME_SSE2) && !defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_MAY_HAVE_SSE4_1) 72*a58d3d2aSXin Li 73*a58d3d2aSXin Li #define OVERRIDE_COMPUTE_LINEAR 74*a58d3d2aSXin Li #define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_sse2(linear, out, in)) 75*a58d3d2aSXin Li #define OVERRIDE_COMPUTE_ACTIVATION 76*a58d3d2aSXin Li #define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_sse2(output, input, N, activation)) 77*a58d3d2aSXin Li #define OVERRIDE_COMPUTE_CONV2D 78*a58d3d2aSXin Li #define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_sse2(conv, out, mem, in, height, hstride, activation)) 79*a58d3d2aSXin Li 80*a58d3d2aSXin Li #elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_X86_MAY_HAVE_AVX2) || defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) 81*a58d3d2aSXin Li 82*a58d3d2aSXin Li extern void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])( 83*a58d3d2aSXin Li const LinearLayer *linear, 84*a58d3d2aSXin Li float *out, 85*a58d3d2aSXin Li const float *in 86*a58d3d2aSXin Li ); 87*a58d3d2aSXin Li #define OVERRIDE_COMPUTE_LINEAR 88*a58d3d2aSXin Li #define compute_linear(linear, out, in, arch) \ 89*a58d3d2aSXin Li ((*DNN_COMPUTE_LINEAR_IMPL[(arch) & OPUS_ARCHMASK])(linear, out, in)) 90*a58d3d2aSXin Li 91*a58d3d2aSXin Li 92*a58d3d2aSXin Li extern void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])( 93*a58d3d2aSXin Li float *output, 94*a58d3d2aSXin Li const float *input, 95*a58d3d2aSXin Li int N, 96*a58d3d2aSXin Li int activation 97*a58d3d2aSXin Li ); 98*a58d3d2aSXin Li #define OVERRIDE_COMPUTE_ACTIVATION 99*a58d3d2aSXin Li #define compute_activation(output, input, N, activation, arch) \ 100*a58d3d2aSXin Li ((*DNN_COMPUTE_ACTIVATION_IMPL[(arch) & OPUS_ARCHMASK])(output, input, N, activation)) 101*a58d3d2aSXin Li 102*a58d3d2aSXin Li 103*a58d3d2aSXin Li extern void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])( 104*a58d3d2aSXin Li const Conv2dLayer *conv, 105*a58d3d2aSXin Li float *out, 106*a58d3d2aSXin Li float *mem, 107*a58d3d2aSXin Li const float *in, 108*a58d3d2aSXin Li int height, 109*a58d3d2aSXin Li int hstride, 110*a58d3d2aSXin Li int activation 111*a58d3d2aSXin Li ); 112*a58d3d2aSXin Li #define OVERRIDE_COMPUTE_CONV2D 113*a58d3d2aSXin Li #define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) \ 114*a58d3d2aSXin Li ((*DNN_COMPUTE_CONV2D_IMPL[(arch) & OPUS_ARCHMASK])(conv, out, mem, in, height, hstride, activation)) 115*a58d3d2aSXin Li 116*a58d3d2aSXin Li 117*a58d3d2aSXin Li #endif 118*a58d3d2aSXin Li 119*a58d3d2aSXin Li 120*a58d3d2aSXin Li 121*a58d3d2aSXin Li #endif /* DNN_X86_H */ 122