xref: /aosp_15_r20/external/libopus/dnn/nndsp.c (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1 /* Copyright (c) 2023 Amazon
2    Written by Jan Buethe */
3 /*
4    Redistribution and use in source and binary forms, with or without
5    modification, are permitted provided that the following conditions
6    are met:
7 
8    - Redistributions of source code must retain the above copyright
9    notice, this list of conditions and the following disclaimer.
10 
11    - Redistributions in binary form must reproduce the above copyright
12    notice, this list of conditions and the following disclaimer in the
13    documentation and/or other materials provided with the distribution.
14 
15    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31 
32 
33 #include "nndsp.h"
34 #include "arch.h"
35 #include "nnet.h"
36 #include "os_support.h"
37 #include "pitch.h"
38 
39 #include <math.h>
40 
41 #ifndef M_PI
42 #define M_PI 3.141592653589793f
43 #endif
44 
45 #define KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel) ((((i_out_channels) * in_channels) + (i_in_channels)) * kernel_size + (i_kernel))
46 
init_adaconv_state(AdaConvState * hAdaConv)47 void init_adaconv_state(AdaConvState *hAdaConv)
48 {
49     OPUS_CLEAR(hAdaConv, 1);
50 }
51 
init_adacomb_state(AdaCombState * hAdaComb)52 void init_adacomb_state(AdaCombState *hAdaComb)
53 {
54     OPUS_CLEAR(hAdaComb, 1);
55 }
56 
init_adashape_state(AdaShapeState * hAdaShape)57 void init_adashape_state(AdaShapeState *hAdaShape)
58 {
59     OPUS_CLEAR(hAdaShape, 1);
60 }
61 
compute_overlap_window(float * window,int overlap_size)62 void compute_overlap_window(float *window, int overlap_size)
63 {
64     int i_sample;
65     for (i_sample=0; i_sample < overlap_size; i_sample++)
66     {
67         window[i_sample] = 0.5f + 0.5f * cos(M_PI * (i_sample + 0.5f) / overlap_size);
68     }
69 }
70 
71 #ifdef DEBUG_NNDSP
print_float_vector(const char * name,const float * vec,int length)72 void print_float_vector(const char* name, const float *vec, int length)
73 {
74     for (int i = 0; i < length; i ++)
75     {
76         printf("%s[%d]: %f\n", name, i, vec[i]);
77     }
78 }
79 #endif
80 
scale_kernel(float * kernel,int in_channels,int out_channels,int kernel_size,float * gain)81 static void scale_kernel(
82     float *kernel,
83     int in_channels,
84     int out_channels,
85     int kernel_size,
86     float *gain
87 )
88 /* normalizes (p-norm) kernel over input channel and kernel dimension */
89 {
90     float norm;
91     int i_in_channels, i_out_channels, i_kernel;
92 
93     for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
94     {
95         norm = 0;
96         for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels ++)
97         {
98             for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
99             {
100                 norm += kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] * kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)];
101             }
102         }
103 #ifdef DEBUG_NNDSP
104         printf("kernel norm: %f, %f\n", norm, sqrt(norm));
105 #endif
106         norm = 1.f / (1e-6f + sqrt(norm));
107         for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
108         {
109             for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
110             {
111 
112                 kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] *= norm * gain[i_out_channels];
113             }
114         }
115     }
116 }
117 
transform_gains(float * gains,int num_gains,float filter_gain_a,float filter_gain_b)118 static void transform_gains(
119     float *gains,
120     int num_gains,
121     float filter_gain_a,
122     float filter_gain_b
123 )
124 {
125     int i;
126     for (i = 0; i < num_gains; i++)
127     {
128         gains[i] = exp(filter_gain_a * gains[i] + filter_gain_b);
129     }
130 }
131 
adaconv_process_frame(AdaConvState * hAdaConv,float * x_out,const float * x_in,const float * features,const LinearLayer * kernel_layer,const LinearLayer * gain_layer,int feature_dim,int frame_size,int overlap_size,int in_channels,int out_channels,int kernel_size,int left_padding,float filter_gain_a,float filter_gain_b,float shape_gain,float * window,int arch)132 void adaconv_process_frame(
133     AdaConvState* hAdaConv,
134     float *x_out,
135     const float *x_in,
136     const float *features,
137     const LinearLayer *kernel_layer,
138     const LinearLayer *gain_layer,
139     int feature_dim,
140     int frame_size,
141     int overlap_size,
142     int in_channels,
143     int out_channels,
144     int kernel_size,
145     int left_padding,
146     float filter_gain_a,
147     float filter_gain_b,
148     float shape_gain,
149     float *window,
150     int arch
151 )
152 {
153     float output_buffer[ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS];
154     float kernel_buffer[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS];
155     float input_buffer[ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE)];
156     float kernel0[ADACONV_MAX_KERNEL_SIZE];
157     float kernel1[ADACONV_MAX_KERNEL_SIZE];
158     float channel_buffer0[ADACONV_MAX_OVERLAP_SIZE];
159     float channel_buffer1[ADACONV_MAX_FRAME_SIZE];
160     float gain_buffer[ADACONV_MAX_OUTPUT_CHANNELS];
161     float *p_input;
162     int i_in_channels, i_out_channels, i_sample;
163 
164     (void) feature_dim; /* ToDo: figure out whether we might need this information */
165 
166     celt_assert(shape_gain == 1);
167     celt_assert(left_padding == kernel_size - 1); /* currently only supports causal version. Non-causal version not difficult to implement but will require third loop */
168     celt_assert(kernel_size < frame_size);
169 
170     OPUS_CLEAR(output_buffer, ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS);
171     OPUS_CLEAR(kernel_buffer, ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS);
172     OPUS_CLEAR(input_buffer, ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE));
173 
174 #ifdef DEBUG_NNDSP
175     print_float_vector("x_in", x_in, in_channels * frame_size);
176 #endif
177 
178     /* prepare input */
179     for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)
180     {
181         OPUS_COPY(input_buffer + i_in_channels * (kernel_size + frame_size), hAdaConv->history + i_in_channels * kernel_size, kernel_size);
182         OPUS_COPY(input_buffer + kernel_size + i_in_channels * (kernel_size + frame_size), x_in + frame_size * i_in_channels, frame_size);
183     }
184     p_input = input_buffer + kernel_size;
185 
186 
187     /* calculate new kernel and new gain */
188     compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);
189     compute_generic_dense(gain_layer, gain_buffer, features, ACTIVATION_TANH, arch);
190 #ifdef DEBUG_NNDSP
191     print_float_vector("features", features, feature_dim);
192     print_float_vector("adaconv_kernel_raw", kernel_buffer, in_channels * out_channels * kernel_size);
193     print_float_vector("adaconv_gain_raw", gain_buffer, out_channels);
194 #endif
195     transform_gains(gain_buffer, out_channels, filter_gain_a, filter_gain_b);
196     scale_kernel(kernel_buffer, in_channels, out_channels, kernel_size, gain_buffer);
197 
198 #ifdef DEBUG_NNDSP
199     print_float_vector("adaconv_kernel", kernel_buffer, in_channels * out_channels * kernel_size);
200     print_float_vector("adaconv_gain", gain_buffer, out_channels);
201 #endif
202 
203     /* calculate overlapping part using kernel from last frame */
204 
205     for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
206     {
207         for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
208         {
209             OPUS_CLEAR(kernel0, ADACONV_MAX_KERNEL_SIZE);
210             OPUS_CLEAR(kernel1, ADACONV_MAX_KERNEL_SIZE);
211 
212             OPUS_COPY(kernel0, hAdaConv->last_kernel + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size);
213             OPUS_COPY(kernel1, kernel_buffer + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size);
214             celt_pitch_xcorr(kernel0, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer0, ADACONV_MAX_KERNEL_SIZE, overlap_size, arch);
215             celt_pitch_xcorr(kernel1, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer1, ADACONV_MAX_KERNEL_SIZE, frame_size, arch);
216             for (i_sample = 0; i_sample < overlap_size; i_sample++)
217             {
218                 output_buffer[i_sample + i_out_channels * frame_size] +=  window[i_sample] * channel_buffer0[i_sample];
219                 output_buffer[i_sample + i_out_channels * frame_size] += (1.f - window[i_sample]) * channel_buffer1[i_sample];
220             }
221             for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
222             {
223                 output_buffer[i_sample + i_out_channels * frame_size] += channel_buffer1[i_sample];
224             }
225         }
226     }
227 
228     OPUS_COPY(x_out, output_buffer, out_channels * frame_size);
229 
230 #ifdef DEBUG_NNDSP
231     print_float_vector("x_out", x_out, out_channels * frame_size);
232 #endif
233 
234     /* buffer update */
235     for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)
236     {
237         OPUS_COPY(hAdaConv->history + i_in_channels * kernel_size, p_input + i_in_channels * (frame_size + kernel_size) + frame_size - kernel_size, kernel_size);
238     }
239     OPUS_COPY(hAdaConv->last_kernel, kernel_buffer, kernel_size * in_channels * out_channels);
240 }
241 
adacomb_process_frame(AdaCombState * hAdaComb,float * x_out,const float * x_in,const float * features,const LinearLayer * kernel_layer,const LinearLayer * gain_layer,const LinearLayer * global_gain_layer,int pitch_lag,int feature_dim,int frame_size,int overlap_size,int kernel_size,int left_padding,float filter_gain_a,float filter_gain_b,float log_gain_limit,float * window,int arch)242 void adacomb_process_frame(
243     AdaCombState* hAdaComb,
244     float *x_out,
245     const float *x_in,
246     const float *features,
247     const LinearLayer *kernel_layer,
248     const LinearLayer *gain_layer,
249     const LinearLayer *global_gain_layer,
250     int pitch_lag,
251     int feature_dim,
252     int frame_size,
253     int overlap_size,
254     int kernel_size,
255     int left_padding,
256     float filter_gain_a,
257     float filter_gain_b,
258     float log_gain_limit,
259     float *window,
260     int arch
261 )
262 {
263     float output_buffer[ADACOMB_MAX_FRAME_SIZE];
264     float output_buffer_last[ADACOMB_MAX_FRAME_SIZE];
265     float kernel_buffer[ADACOMB_MAX_KERNEL_SIZE];
266     float input_buffer[ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE];
267     float gain, global_gain;
268     float *p_input;
269     int i_sample;
270     float kernel[16];
271     float last_kernel[16];
272 
273     (void) feature_dim; /* ToDo: figure out whether we might need this information */
274 
275     OPUS_CLEAR(output_buffer, ADACOMB_MAX_FRAME_SIZE);
276     OPUS_CLEAR(kernel_buffer, ADACOMB_MAX_KERNEL_SIZE);
277     OPUS_CLEAR(input_buffer, ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE);
278 
279     OPUS_COPY(input_buffer, hAdaComb->history, kernel_size + ADACOMB_MAX_LAG);
280     OPUS_COPY(input_buffer + kernel_size + ADACOMB_MAX_LAG, x_in, frame_size);
281     p_input = input_buffer + kernel_size + ADACOMB_MAX_LAG;
282 
283     /* calculate new kernel and new gain */
284     compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);
285     compute_generic_dense(gain_layer, &gain, features, ACTIVATION_RELU, arch);
286     compute_generic_dense(global_gain_layer, &global_gain, features, ACTIVATION_TANH, arch);
287 #ifdef DEBUG_NNDSP
288     print_float_vector("features", features, feature_dim);
289     print_float_vector("adacomb_kernel_raw", kernel_buffer, kernel_size);
290     print_float_vector("adacomb_gain_raw", &gain, 1);
291     print_float_vector("adacomb_global_gain_raw", &global_gain, 1);
292 #endif
293     gain = exp(log_gain_limit - gain);
294     global_gain = exp(filter_gain_a * global_gain + filter_gain_b);
295     scale_kernel(kernel_buffer, 1, 1, kernel_size, &gain);
296 
297 #ifdef DEBUG_NNDSP
298     print_float_vector("adacomb_kernel", kernel_buffer, kernel_size);
299     print_float_vector("adacomb_gain", &gain, 1);
300 #endif
301 
302     OPUS_CLEAR(kernel, ADACOMB_MAX_KERNEL_SIZE);
303     OPUS_CLEAR(last_kernel, ADACOMB_MAX_KERNEL_SIZE);
304     OPUS_COPY(kernel, kernel_buffer, kernel_size);
305     OPUS_COPY(last_kernel, hAdaComb->last_kernel, kernel_size);
306 
307     celt_pitch_xcorr(last_kernel, &p_input[- left_padding - hAdaComb->last_pitch_lag], output_buffer_last, ADACOMB_MAX_KERNEL_SIZE, overlap_size, arch);
308 
309     celt_pitch_xcorr(kernel, &p_input[- left_padding - pitch_lag], output_buffer, ADACOMB_MAX_KERNEL_SIZE, frame_size, arch);
310     for (i_sample = 0; i_sample < overlap_size; i_sample++)
311     {
312       output_buffer[i_sample] = hAdaComb->last_global_gain * window[i_sample] * output_buffer_last[i_sample] + global_gain * (1.f - window[i_sample]) * output_buffer[i_sample];
313     }
314 
315     for (i_sample = 0; i_sample < overlap_size; i_sample++)
316     {
317       output_buffer[i_sample] += (window[i_sample] * hAdaComb->last_global_gain + (1.f - window[i_sample]) * global_gain) * p_input[i_sample];
318     }
319 
320     for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
321     {
322       output_buffer[i_sample] = global_gain * (output_buffer[i_sample] + p_input[i_sample]);
323     }
324     OPUS_COPY(x_out, output_buffer, frame_size);
325 
326 #ifdef DEBUG_NNDSP
327     print_float_vector("x_out", x_out, frame_size);
328 #endif
329 
330     /* buffer update */
331     OPUS_COPY(hAdaComb->last_kernel, kernel_buffer, kernel_size);
332     OPUS_COPY(hAdaComb->history, p_input + frame_size - kernel_size - ADACOMB_MAX_LAG, kernel_size + ADACOMB_MAX_LAG);
333     hAdaComb->last_pitch_lag = pitch_lag;
334     hAdaComb->last_global_gain = global_gain;
335 }
336 
337 
adashape_process_frame(AdaShapeState * hAdaShape,float * x_out,const float * x_in,const float * features,const LinearLayer * alpha1f,const LinearLayer * alpha1t,const LinearLayer * alpha2,int feature_dim,int frame_size,int avg_pool_k,int arch)338 void adashape_process_frame(
339     AdaShapeState *hAdaShape,
340     float *x_out,
341     const float *x_in,
342     const float *features,
343     const LinearLayer *alpha1f,
344     const LinearLayer *alpha1t,
345     const LinearLayer *alpha2,
346     int feature_dim,
347     int frame_size,
348     int avg_pool_k,
349     int arch
350 )
351 {
352     float in_buffer[ADASHAPE_MAX_INPUT_DIM + ADASHAPE_MAX_FRAME_SIZE];
353     float out_buffer[ADASHAPE_MAX_FRAME_SIZE];
354     float tmp_buffer[ADASHAPE_MAX_FRAME_SIZE];
355     int i, k;
356     int tenv_size;
357     float mean;
358     float *tenv;
359 
360     celt_assert(frame_size % avg_pool_k == 0);
361     celt_assert(feature_dim + frame_size / avg_pool_k + 1 < ADASHAPE_MAX_INPUT_DIM);
362 
363     tenv_size = frame_size / avg_pool_k;
364     tenv = in_buffer + feature_dim;
365     OPUS_CLEAR(tenv, tenv_size + 1);
366 
367     OPUS_COPY(in_buffer, features, feature_dim);
368 
369     /* calculate temporal envelope */
370     mean = 0;
371     for (i = 0; i < tenv_size; i++)
372     {
373         for (k = 0; k < avg_pool_k; k++)
374         {
375             tenv[i] += fabs(x_in[i * avg_pool_k + k]);
376         }
377         tenv[i] = log(tenv[i] / avg_pool_k + 1.52587890625e-05f);
378         mean += tenv[i];
379     }
380     mean /= tenv_size;
381     for (i = 0; i < tenv_size; i++)
382     {
383         tenv[i] -= mean;
384     }
385     tenv[tenv_size] = mean;
386 #ifdef DEBUG_NNDSP
387     print_float_vector("tenv", tenv, tenv_size + 1);
388 #endif
389 
390     /* calculate temporal weights */
391 #ifdef DEBUG_NNDSP
392     print_float_vector("alpha1_in", in_buffer, feature_dim + tenv_size + 1);
393 #endif
394     compute_generic_conv1d(alpha1f, out_buffer, hAdaShape->conv_alpha1f_state, in_buffer, feature_dim, ACTIVATION_LINEAR, arch);
395     compute_generic_conv1d(alpha1t, tmp_buffer, hAdaShape->conv_alpha1t_state, tenv, tenv_size + 1, ACTIVATION_LINEAR, arch);
396 #ifdef DEBUG_NNDSP
397     print_float_vector("alpha1_out", out_buffer, frame_size);
398 #endif
399     /* compute leaky ReLU by hand. ToDo: try tanh activation */
400     for (i = 0; i < frame_size; i ++)
401     {
402         float tmp = out_buffer[i] + tmp_buffer[i];
403         in_buffer[i] = tmp >= 0 ? tmp : 0.2 * tmp;
404     }
405 #ifdef DEBUG_NNDSP
406     print_float_vector("post_alpha1", in_buffer, frame_size);
407 #endif
408     compute_generic_conv1d(alpha2, out_buffer, hAdaShape->conv_alpha2_state, in_buffer, frame_size, ACTIVATION_LINEAR, arch);
409 
410     /* shape signal */
411     for (i = 0; i < frame_size; i ++)
412     {
413         x_out[i] = exp(out_buffer[i]) * x_in[i];
414     }
415 
416 }
417