xref: /aosp_15_r20/external/libopus/dnn/nnet.c (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1*a58d3d2aSXin Li /* Copyright (c) 2018 Mozilla
2*a58d3d2aSXin Li                  2008-2011 Octasic Inc.
3*a58d3d2aSXin Li                  2012-2017 Jean-Marc Valin */
4*a58d3d2aSXin Li /*
5*a58d3d2aSXin Li    Redistribution and use in source and binary forms, with or without
6*a58d3d2aSXin Li    modification, are permitted provided that the following conditions
7*a58d3d2aSXin Li    are met:
8*a58d3d2aSXin Li 
9*a58d3d2aSXin Li    - Redistributions of source code must retain the above copyright
10*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer.
11*a58d3d2aSXin Li 
12*a58d3d2aSXin Li    - Redistributions in binary form must reproduce the above copyright
13*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer in the
14*a58d3d2aSXin Li    documentation and/or other materials provided with the distribution.
15*a58d3d2aSXin Li 
16*a58d3d2aSXin Li    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17*a58d3d2aSXin Li    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18*a58d3d2aSXin Li    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19*a58d3d2aSXin Li    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
20*a58d3d2aSXin Li    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21*a58d3d2aSXin Li    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22*a58d3d2aSXin Li    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23*a58d3d2aSXin Li    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24*a58d3d2aSXin Li    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25*a58d3d2aSXin Li    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26*a58d3d2aSXin Li    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27*a58d3d2aSXin Li */
28*a58d3d2aSXin Li 
29*a58d3d2aSXin Li #ifdef HAVE_CONFIG_H
30*a58d3d2aSXin Li #include "config.h"
31*a58d3d2aSXin Li #endif
32*a58d3d2aSXin Li 
33*a58d3d2aSXin Li #include <stdlib.h>
34*a58d3d2aSXin Li #include <math.h>
35*a58d3d2aSXin Li #include "opus_types.h"
36*a58d3d2aSXin Li #include "arch.h"
37*a58d3d2aSXin Li #include "nnet.h"
38*a58d3d2aSXin Li #include "dred_rdovae_constants.h"
39*a58d3d2aSXin Li #include "plc_data.h"
40*a58d3d2aSXin Li #include "fargan.h"
41*a58d3d2aSXin Li #include "os_support.h"
42*a58d3d2aSXin Li #include "vec.h"
43*a58d3d2aSXin Li 
44*a58d3d2aSXin Li #ifdef ENABLE_OSCE
45*a58d3d2aSXin Li #include "osce.h"
46*a58d3d2aSXin Li #endif
47*a58d3d2aSXin Li 
48*a58d3d2aSXin Li #ifdef NO_OPTIMIZATIONS
49*a58d3d2aSXin Li #if defined(_MSC_VER)
50*a58d3d2aSXin Li #pragma message ("Compiling without any vectorization. This code will be very slow")
51*a58d3d2aSXin Li #else
52*a58d3d2aSXin Li #warning Compiling without any vectorization. This code will be very slow
53*a58d3d2aSXin Li #endif
54*a58d3d2aSXin Li #endif
55*a58d3d2aSXin Li 
56*a58d3d2aSXin Li 
57*a58d3d2aSXin Li #define SOFTMAX_HACK
58*a58d3d2aSXin Li 
59*a58d3d2aSXin Li 
compute_generic_dense(const LinearLayer * layer,float * output,const float * input,int activation,int arch)60*a58d3d2aSXin Li void compute_generic_dense(const LinearLayer *layer, float *output, const float *input, int activation, int arch)
61*a58d3d2aSXin Li {
62*a58d3d2aSXin Li    compute_linear(layer, output, input, arch);
63*a58d3d2aSXin Li    compute_activation(output, output, layer->nb_outputs, activation, arch);
64*a58d3d2aSXin Li }
65*a58d3d2aSXin Li 
66*a58d3d2aSXin Li #ifdef ENABLE_OSCE
67*a58d3d2aSXin Li #define MAX_RNN_NEURONS_ALL IMAX(IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_UNITS), DRED_MAX_RNN_NEURONS), OSCE_MAX_RNN_NEURONS)
68*a58d3d2aSXin Li #else
69*a58d3d2aSXin Li #define MAX_RNN_NEURONS_ALL IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_UNITS), DRED_MAX_RNN_NEURONS)
70*a58d3d2aSXin Li #endif
71*a58d3d2aSXin Li 
compute_generic_gru(const LinearLayer * input_weights,const LinearLayer * recurrent_weights,float * state,const float * in,int arch)72*a58d3d2aSXin Li void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch)
73*a58d3d2aSXin Li {
74*a58d3d2aSXin Li   int i;
75*a58d3d2aSXin Li   int N;
76*a58d3d2aSXin Li   float zrh[3*MAX_RNN_NEURONS_ALL];
77*a58d3d2aSXin Li   float recur[3*MAX_RNN_NEURONS_ALL];
78*a58d3d2aSXin Li   float *z;
79*a58d3d2aSXin Li   float *r;
80*a58d3d2aSXin Li   float *h;
81*a58d3d2aSXin Li   celt_assert(3*recurrent_weights->nb_inputs == recurrent_weights->nb_outputs);
82*a58d3d2aSXin Li   celt_assert(input_weights->nb_outputs == recurrent_weights->nb_outputs);
83*a58d3d2aSXin Li   N = recurrent_weights->nb_inputs;
84*a58d3d2aSXin Li   z = zrh;
85*a58d3d2aSXin Li   r = &zrh[N];
86*a58d3d2aSXin Li   h = &zrh[2*N];
87*a58d3d2aSXin Li   celt_assert(recurrent_weights->nb_outputs <= 3*MAX_RNN_NEURONS_ALL);
88*a58d3d2aSXin Li   celt_assert(in != state);
89*a58d3d2aSXin Li   compute_linear(input_weights, zrh, in, arch);
90*a58d3d2aSXin Li   compute_linear(recurrent_weights, recur, state, arch);
91*a58d3d2aSXin Li   for (i=0;i<2*N;i++)
92*a58d3d2aSXin Li      zrh[i] += recur[i];
93*a58d3d2aSXin Li   compute_activation(zrh, zrh, 2*N, ACTIVATION_SIGMOID, arch);
94*a58d3d2aSXin Li   for (i=0;i<N;i++)
95*a58d3d2aSXin Li      h[i] += recur[2*N+i]*r[i];
96*a58d3d2aSXin Li   compute_activation(h, h, N, ACTIVATION_TANH, arch);
97*a58d3d2aSXin Li   for (i=0;i<N;i++)
98*a58d3d2aSXin Li      h[i] = z[i]*state[i] + (1-z[i])*h[i];
99*a58d3d2aSXin Li   for (i=0;i<N;i++)
100*a58d3d2aSXin Li      state[i] = h[i];
101*a58d3d2aSXin Li }
102*a58d3d2aSXin Li 
compute_glu(const LinearLayer * layer,float * output,const float * input,int arch)103*a58d3d2aSXin Li void compute_glu(const LinearLayer *layer, float *output, const float *input, int arch)
104*a58d3d2aSXin Li {
105*a58d3d2aSXin Li    int i;
106*a58d3d2aSXin Li    float act2[MAX_INPUTS];
107*a58d3d2aSXin Li    celt_assert(layer->nb_inputs == layer->nb_outputs);
108*a58d3d2aSXin Li    compute_linear(layer, act2, input, arch);
109*a58d3d2aSXin Li    compute_activation(act2, act2, layer->nb_outputs, ACTIVATION_SIGMOID, arch);
110*a58d3d2aSXin Li    if (input == output) {
111*a58d3d2aSXin Li      /* Give a vectorization hint to the compiler for the in-place case. */
112*a58d3d2aSXin Li      for (i=0;i<layer->nb_outputs;i++) output[i] = output[i]*act2[i];
113*a58d3d2aSXin Li    } else {
114*a58d3d2aSXin Li      for (i=0;i<layer->nb_outputs;i++) output[i] = input[i]*act2[i];
115*a58d3d2aSXin Li    }
116*a58d3d2aSXin Li }
117*a58d3d2aSXin Li 
118*a58d3d2aSXin Li #define MAX_CONV_INPUTS_ALL DRED_MAX_CONV_INPUTS
119*a58d3d2aSXin Li 
compute_generic_conv1d(const LinearLayer * layer,float * output,float * mem,const float * input,int input_size,int activation,int arch)120*a58d3d2aSXin Li void compute_generic_conv1d(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int activation, int arch)
121*a58d3d2aSXin Li {
122*a58d3d2aSXin Li    float tmp[MAX_CONV_INPUTS_ALL];
123*a58d3d2aSXin Li    celt_assert(input != output);
124*a58d3d2aSXin Li    celt_assert(layer->nb_inputs <= MAX_CONV_INPUTS_ALL);
125*a58d3d2aSXin Li    if (layer->nb_inputs!=input_size) OPUS_COPY(tmp, mem, layer->nb_inputs-input_size);
126*a58d3d2aSXin Li    OPUS_COPY(&tmp[layer->nb_inputs-input_size], input, input_size);
127*a58d3d2aSXin Li    compute_linear(layer, output, tmp, arch);
128*a58d3d2aSXin Li    compute_activation(output, output, layer->nb_outputs, activation, arch);
129*a58d3d2aSXin Li    if (layer->nb_inputs!=input_size) OPUS_COPY(mem, &tmp[input_size], layer->nb_inputs-input_size);
130*a58d3d2aSXin Li }
131*a58d3d2aSXin Li 
compute_generic_conv1d_dilation(const LinearLayer * layer,float * output,float * mem,const float * input,int input_size,int dilation,int activation,int arch)132*a58d3d2aSXin Li void compute_generic_conv1d_dilation(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int dilation, int activation, int arch)
133*a58d3d2aSXin Li {
134*a58d3d2aSXin Li    float tmp[MAX_CONV_INPUTS_ALL];
135*a58d3d2aSXin Li    int ksize = layer->nb_inputs/input_size;
136*a58d3d2aSXin Li    int i;
137*a58d3d2aSXin Li    celt_assert(input != output);
138*a58d3d2aSXin Li    celt_assert(layer->nb_inputs <= MAX_CONV_INPUTS_ALL);
139*a58d3d2aSXin Li    if (dilation==1) OPUS_COPY(tmp, mem, layer->nb_inputs-input_size);
140*a58d3d2aSXin Li    else for (i=0;i<ksize-1;i++) OPUS_COPY(&tmp[i*input_size], &mem[i*input_size*dilation], input_size);
141*a58d3d2aSXin Li    OPUS_COPY(&tmp[layer->nb_inputs-input_size], input, input_size);
142*a58d3d2aSXin Li    compute_linear(layer, output, tmp, arch);
143*a58d3d2aSXin Li    compute_activation(output, output, layer->nb_outputs, activation, arch);
144*a58d3d2aSXin Li    if (dilation==1) OPUS_COPY(mem, &tmp[input_size], layer->nb_inputs-input_size);
145*a58d3d2aSXin Li    else {
146*a58d3d2aSXin Li      OPUS_COPY(mem, &mem[input_size], input_size*dilation*(ksize-1)-input_size);
147*a58d3d2aSXin Li      OPUS_COPY(&mem[input_size*dilation*(ksize-1)-input_size], input, input_size);
148*a58d3d2aSXin Li    }
149*a58d3d2aSXin Li }
150