xref: /aosp_15_r20/external/libopus/dnn/nnet.c (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1 /* Copyright (c) 2018 Mozilla
2                  2008-2011 Octasic Inc.
3                  2012-2017 Jean-Marc Valin */
4 /*
5    Redistribution and use in source and binary forms, with or without
6    modification, are permitted provided that the following conditions
7    are met:
8 
9    - Redistributions of source code must retain the above copyright
10    notice, this list of conditions and the following disclaimer.
11 
12    - Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15 
16    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
20    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28 
29 #ifdef HAVE_CONFIG_H
30 #include "config.h"
31 #endif
32 
33 #include <stdlib.h>
34 #include <math.h>
35 #include "opus_types.h"
36 #include "arch.h"
37 #include "nnet.h"
38 #include "dred_rdovae_constants.h"
39 #include "plc_data.h"
40 #include "fargan.h"
41 #include "os_support.h"
42 #include "vec.h"
43 
44 #ifdef ENABLE_OSCE
45 #include "osce.h"
46 #endif
47 
48 #ifdef NO_OPTIMIZATIONS
49 #if defined(_MSC_VER)
50 #pragma message ("Compiling without any vectorization. This code will be very slow")
51 #else
52 #warning Compiling without any vectorization. This code will be very slow
53 #endif
54 #endif
55 
56 
57 #define SOFTMAX_HACK
58 
59 
compute_generic_dense(const LinearLayer * layer,float * output,const float * input,int activation,int arch)60 void compute_generic_dense(const LinearLayer *layer, float *output, const float *input, int activation, int arch)
61 {
62    compute_linear(layer, output, input, arch);
63    compute_activation(output, output, layer->nb_outputs, activation, arch);
64 }
65 
66 #ifdef ENABLE_OSCE
67 #define MAX_RNN_NEURONS_ALL IMAX(IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_UNITS), DRED_MAX_RNN_NEURONS), OSCE_MAX_RNN_NEURONS)
68 #else
69 #define MAX_RNN_NEURONS_ALL IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_UNITS), DRED_MAX_RNN_NEURONS)
70 #endif
71 
compute_generic_gru(const LinearLayer * input_weights,const LinearLayer * recurrent_weights,float * state,const float * in,int arch)72 void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch)
73 {
74   int i;
75   int N;
76   float zrh[3*MAX_RNN_NEURONS_ALL];
77   float recur[3*MAX_RNN_NEURONS_ALL];
78   float *z;
79   float *r;
80   float *h;
81   celt_assert(3*recurrent_weights->nb_inputs == recurrent_weights->nb_outputs);
82   celt_assert(input_weights->nb_outputs == recurrent_weights->nb_outputs);
83   N = recurrent_weights->nb_inputs;
84   z = zrh;
85   r = &zrh[N];
86   h = &zrh[2*N];
87   celt_assert(recurrent_weights->nb_outputs <= 3*MAX_RNN_NEURONS_ALL);
88   celt_assert(in != state);
89   compute_linear(input_weights, zrh, in, arch);
90   compute_linear(recurrent_weights, recur, state, arch);
91   for (i=0;i<2*N;i++)
92      zrh[i] += recur[i];
93   compute_activation(zrh, zrh, 2*N, ACTIVATION_SIGMOID, arch);
94   for (i=0;i<N;i++)
95      h[i] += recur[2*N+i]*r[i];
96   compute_activation(h, h, N, ACTIVATION_TANH, arch);
97   for (i=0;i<N;i++)
98      h[i] = z[i]*state[i] + (1-z[i])*h[i];
99   for (i=0;i<N;i++)
100      state[i] = h[i];
101 }
102 
compute_glu(const LinearLayer * layer,float * output,const float * input,int arch)103 void compute_glu(const LinearLayer *layer, float *output, const float *input, int arch)
104 {
105    int i;
106    float act2[MAX_INPUTS];
107    celt_assert(layer->nb_inputs == layer->nb_outputs);
108    compute_linear(layer, act2, input, arch);
109    compute_activation(act2, act2, layer->nb_outputs, ACTIVATION_SIGMOID, arch);
110    if (input == output) {
111      /* Give a vectorization hint to the compiler for the in-place case. */
112      for (i=0;i<layer->nb_outputs;i++) output[i] = output[i]*act2[i];
113    } else {
114      for (i=0;i<layer->nb_outputs;i++) output[i] = input[i]*act2[i];
115    }
116 }
117 
118 #define MAX_CONV_INPUTS_ALL DRED_MAX_CONV_INPUTS
119 
compute_generic_conv1d(const LinearLayer * layer,float * output,float * mem,const float * input,int input_size,int activation,int arch)120 void compute_generic_conv1d(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int activation, int arch)
121 {
122    float tmp[MAX_CONV_INPUTS_ALL];
123    celt_assert(input != output);
124    celt_assert(layer->nb_inputs <= MAX_CONV_INPUTS_ALL);
125    if (layer->nb_inputs!=input_size) OPUS_COPY(tmp, mem, layer->nb_inputs-input_size);
126    OPUS_COPY(&tmp[layer->nb_inputs-input_size], input, input_size);
127    compute_linear(layer, output, tmp, arch);
128    compute_activation(output, output, layer->nb_outputs, activation, arch);
129    if (layer->nb_inputs!=input_size) OPUS_COPY(mem, &tmp[input_size], layer->nb_inputs-input_size);
130 }
131 
compute_generic_conv1d_dilation(const LinearLayer * layer,float * output,float * mem,const float * input,int input_size,int dilation,int activation,int arch)132 void compute_generic_conv1d_dilation(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int dilation, int activation, int arch)
133 {
134    float tmp[MAX_CONV_INPUTS_ALL];
135    int ksize = layer->nb_inputs/input_size;
136    int i;
137    celt_assert(input != output);
138    celt_assert(layer->nb_inputs <= MAX_CONV_INPUTS_ALL);
139    if (dilation==1) OPUS_COPY(tmp, mem, layer->nb_inputs-input_size);
140    else for (i=0;i<ksize-1;i++) OPUS_COPY(&tmp[i*input_size], &mem[i*input_size*dilation], input_size);
141    OPUS_COPY(&tmp[layer->nb_inputs-input_size], input, input_size);
142    compute_linear(layer, output, tmp, arch);
143    compute_activation(output, output, layer->nb_outputs, activation, arch);
144    if (dilation==1) OPUS_COPY(mem, &tmp[input_size], layer->nb_inputs-input_size);
145    else {
146      OPUS_COPY(mem, &mem[input_size], input_size*dilation*(ksize-1)-input_size);
147      OPUS_COPY(&mem[input_size*dilation*(ksize-1)-input_size], input, input_size);
148    }
149 }
150