xref: /aosp_15_r20/external/libopus/dnn/x86/x86_dnn_map.c (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1*a58d3d2aSXin Li /* Copyright (c) 2018-2019 Mozilla
2*a58d3d2aSXin Li                  2023 Amazon */
3*a58d3d2aSXin Li /*
4*a58d3d2aSXin Li    Redistribution and use in source and binary forms, with or without
5*a58d3d2aSXin Li    modification, are permitted provided that the following conditions
6*a58d3d2aSXin Li    are met:
7*a58d3d2aSXin Li 
8*a58d3d2aSXin Li    - Redistributions of source code must retain the above copyright
9*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer.
10*a58d3d2aSXin Li 
11*a58d3d2aSXin Li    - Redistributions in binary form must reproduce the above copyright
12*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer in the
13*a58d3d2aSXin Li    documentation and/or other materials provided with the distribution.
14*a58d3d2aSXin Li 
15*a58d3d2aSXin Li    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16*a58d3d2aSXin Li    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17*a58d3d2aSXin Li    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18*a58d3d2aSXin Li    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
19*a58d3d2aSXin Li    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20*a58d3d2aSXin Li    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21*a58d3d2aSXin Li    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22*a58d3d2aSXin Li    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23*a58d3d2aSXin Li    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24*a58d3d2aSXin Li    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*a58d3d2aSXin Li    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*a58d3d2aSXin Li */
27*a58d3d2aSXin Li 
28*a58d3d2aSXin Li #ifdef HAVE_CONFIG_H
29*a58d3d2aSXin Li #include "config.h"
30*a58d3d2aSXin Li #endif
31*a58d3d2aSXin Li 
32*a58d3d2aSXin Li #include "x86/x86cpu.h"
33*a58d3d2aSXin Li #include "nnet.h"
34*a58d3d2aSXin Li 
35*a58d3d2aSXin Li #if defined(OPUS_HAVE_RTCD)
36*a58d3d2aSXin Li 
37*a58d3d2aSXin Li #if (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_AVX2))
38*a58d3d2aSXin Li 
39*a58d3d2aSXin Li void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
40*a58d3d2aSXin Li          const LinearLayer *linear,
41*a58d3d2aSXin Li          float *out,
42*a58d3d2aSXin Li          const float *in
43*a58d3d2aSXin Li ) = {
44*a58d3d2aSXin Li   compute_linear_c,                /* non-sse */
45*a58d3d2aSXin Li   compute_linear_c,
46*a58d3d2aSXin Li   MAY_HAVE_SSE2(compute_linear),
47*a58d3d2aSXin Li   MAY_HAVE_SSE4_1(compute_linear), /* sse4.1  */
48*a58d3d2aSXin Li   MAY_HAVE_AVX2(compute_linear)  /* avx  */
49*a58d3d2aSXin Li };
50*a58d3d2aSXin Li 
51*a58d3d2aSXin Li void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
52*a58d3d2aSXin Li          float *output,
53*a58d3d2aSXin Li          const float *input,
54*a58d3d2aSXin Li          int N,
55*a58d3d2aSXin Li          int activation
56*a58d3d2aSXin Li ) = {
57*a58d3d2aSXin Li   compute_activation_c,                /* non-sse */
58*a58d3d2aSXin Li   compute_activation_c,
59*a58d3d2aSXin Li   MAY_HAVE_SSE2(compute_activation),
60*a58d3d2aSXin Li   MAY_HAVE_SSE4_1(compute_activation), /* sse4.1  */
61*a58d3d2aSXin Li   MAY_HAVE_AVX2(compute_activation)  /* avx  */
62*a58d3d2aSXin Li };
63*a58d3d2aSXin Li 
64*a58d3d2aSXin Li void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
65*a58d3d2aSXin Li          const Conv2dLayer *conv,
66*a58d3d2aSXin Li          float *out,
67*a58d3d2aSXin Li          float *mem,
68*a58d3d2aSXin Li          const float *in,
69*a58d3d2aSXin Li          int height,
70*a58d3d2aSXin Li          int hstride,
71*a58d3d2aSXin Li          int activation
72*a58d3d2aSXin Li ) = {
73*a58d3d2aSXin Li   compute_conv2d_c,                /* non-sse */
74*a58d3d2aSXin Li   compute_conv2d_c,
75*a58d3d2aSXin Li   MAY_HAVE_SSE2(compute_conv2d),
76*a58d3d2aSXin Li   MAY_HAVE_SSE4_1(compute_conv2d), /* sse4.1  */
77*a58d3d2aSXin Li   MAY_HAVE_AVX2(compute_conv2d)  /* avx  */
78*a58d3d2aSXin Li };
79*a58d3d2aSXin Li 
80*a58d3d2aSXin Li #endif
81*a58d3d2aSXin Li 
82*a58d3d2aSXin Li 
83*a58d3d2aSXin Li #endif
84