1 /* Copyright (c) 2023 Amazon
2 Written by Jan Buethe */
3 /*
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32
33 #include "nndsp.h"
34 #include "arch.h"
35 #include "nnet.h"
36 #include "os_support.h"
37 #include "pitch.h"
38
39 #include <math.h>
40
41 #ifndef M_PI
42 #define M_PI 3.141592653589793f
43 #endif
44
45 #define KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel) ((((i_out_channels) * in_channels) + (i_in_channels)) * kernel_size + (i_kernel))
46
init_adaconv_state(AdaConvState * hAdaConv)47 void init_adaconv_state(AdaConvState *hAdaConv)
48 {
49 OPUS_CLEAR(hAdaConv, 1);
50 }
51
init_adacomb_state(AdaCombState * hAdaComb)52 void init_adacomb_state(AdaCombState *hAdaComb)
53 {
54 OPUS_CLEAR(hAdaComb, 1);
55 }
56
init_adashape_state(AdaShapeState * hAdaShape)57 void init_adashape_state(AdaShapeState *hAdaShape)
58 {
59 OPUS_CLEAR(hAdaShape, 1);
60 }
61
compute_overlap_window(float * window,int overlap_size)62 void compute_overlap_window(float *window, int overlap_size)
63 {
64 int i_sample;
65 for (i_sample=0; i_sample < overlap_size; i_sample++)
66 {
67 window[i_sample] = 0.5f + 0.5f * cos(M_PI * (i_sample + 0.5f) / overlap_size);
68 }
69 }
70
71 #ifdef DEBUG_NNDSP
print_float_vector(const char * name,const float * vec,int length)72 void print_float_vector(const char* name, const float *vec, int length)
73 {
74 for (int i = 0; i < length; i ++)
75 {
76 printf("%s[%d]: %f\n", name, i, vec[i]);
77 }
78 }
79 #endif
80
scale_kernel(float * kernel,int in_channels,int out_channels,int kernel_size,float * gain)81 static void scale_kernel(
82 float *kernel,
83 int in_channels,
84 int out_channels,
85 int kernel_size,
86 float *gain
87 )
88 /* normalizes (p-norm) kernel over input channel and kernel dimension */
89 {
90 float norm;
91 int i_in_channels, i_out_channels, i_kernel;
92
93 for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
94 {
95 norm = 0;
96 for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels ++)
97 {
98 for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
99 {
100 norm += kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] * kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)];
101 }
102 }
103 #ifdef DEBUG_NNDSP
104 printf("kernel norm: %f, %f\n", norm, sqrt(norm));
105 #endif
106 norm = 1.f / (1e-6f + sqrt(norm));
107 for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
108 {
109 for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
110 {
111
112 kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] *= norm * gain[i_out_channels];
113 }
114 }
115 }
116 }
117
transform_gains(float * gains,int num_gains,float filter_gain_a,float filter_gain_b)118 static void transform_gains(
119 float *gains,
120 int num_gains,
121 float filter_gain_a,
122 float filter_gain_b
123 )
124 {
125 int i;
126 for (i = 0; i < num_gains; i++)
127 {
128 gains[i] = exp(filter_gain_a * gains[i] + filter_gain_b);
129 }
130 }
131
adaconv_process_frame(AdaConvState * hAdaConv,float * x_out,const float * x_in,const float * features,const LinearLayer * kernel_layer,const LinearLayer * gain_layer,int feature_dim,int frame_size,int overlap_size,int in_channels,int out_channels,int kernel_size,int left_padding,float filter_gain_a,float filter_gain_b,float shape_gain,float * window,int arch)132 void adaconv_process_frame(
133 AdaConvState* hAdaConv,
134 float *x_out,
135 const float *x_in,
136 const float *features,
137 const LinearLayer *kernel_layer,
138 const LinearLayer *gain_layer,
139 int feature_dim,
140 int frame_size,
141 int overlap_size,
142 int in_channels,
143 int out_channels,
144 int kernel_size,
145 int left_padding,
146 float filter_gain_a,
147 float filter_gain_b,
148 float shape_gain,
149 float *window,
150 int arch
151 )
152 {
153 float output_buffer[ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS];
154 float kernel_buffer[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS];
155 float input_buffer[ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE)];
156 float kernel0[ADACONV_MAX_KERNEL_SIZE];
157 float kernel1[ADACONV_MAX_KERNEL_SIZE];
158 float channel_buffer0[ADACONV_MAX_OVERLAP_SIZE];
159 float channel_buffer1[ADACONV_MAX_FRAME_SIZE];
160 float gain_buffer[ADACONV_MAX_OUTPUT_CHANNELS];
161 float *p_input;
162 int i_in_channels, i_out_channels, i_sample;
163
164 (void) feature_dim; /* ToDo: figure out whether we might need this information */
165
166 celt_assert(shape_gain == 1);
167 celt_assert(left_padding == kernel_size - 1); /* currently only supports causal version. Non-causal version not difficult to implement but will require third loop */
168 celt_assert(kernel_size < frame_size);
169
170 OPUS_CLEAR(output_buffer, ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS);
171 OPUS_CLEAR(kernel_buffer, ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS);
172 OPUS_CLEAR(input_buffer, ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE));
173
174 #ifdef DEBUG_NNDSP
175 print_float_vector("x_in", x_in, in_channels * frame_size);
176 #endif
177
178 /* prepare input */
179 for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)
180 {
181 OPUS_COPY(input_buffer + i_in_channels * (kernel_size + frame_size), hAdaConv->history + i_in_channels * kernel_size, kernel_size);
182 OPUS_COPY(input_buffer + kernel_size + i_in_channels * (kernel_size + frame_size), x_in + frame_size * i_in_channels, frame_size);
183 }
184 p_input = input_buffer + kernel_size;
185
186
187 /* calculate new kernel and new gain */
188 compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);
189 compute_generic_dense(gain_layer, gain_buffer, features, ACTIVATION_TANH, arch);
190 #ifdef DEBUG_NNDSP
191 print_float_vector("features", features, feature_dim);
192 print_float_vector("adaconv_kernel_raw", kernel_buffer, in_channels * out_channels * kernel_size);
193 print_float_vector("adaconv_gain_raw", gain_buffer, out_channels);
194 #endif
195 transform_gains(gain_buffer, out_channels, filter_gain_a, filter_gain_b);
196 scale_kernel(kernel_buffer, in_channels, out_channels, kernel_size, gain_buffer);
197
198 #ifdef DEBUG_NNDSP
199 print_float_vector("adaconv_kernel", kernel_buffer, in_channels * out_channels * kernel_size);
200 print_float_vector("adaconv_gain", gain_buffer, out_channels);
201 #endif
202
203 /* calculate overlapping part using kernel from last frame */
204
205 for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
206 {
207 for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
208 {
209 OPUS_CLEAR(kernel0, ADACONV_MAX_KERNEL_SIZE);
210 OPUS_CLEAR(kernel1, ADACONV_MAX_KERNEL_SIZE);
211
212 OPUS_COPY(kernel0, hAdaConv->last_kernel + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size);
213 OPUS_COPY(kernel1, kernel_buffer + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size);
214 celt_pitch_xcorr(kernel0, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer0, ADACONV_MAX_KERNEL_SIZE, overlap_size, arch);
215 celt_pitch_xcorr(kernel1, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer1, ADACONV_MAX_KERNEL_SIZE, frame_size, arch);
216 for (i_sample = 0; i_sample < overlap_size; i_sample++)
217 {
218 output_buffer[i_sample + i_out_channels * frame_size] += window[i_sample] * channel_buffer0[i_sample];
219 output_buffer[i_sample + i_out_channels * frame_size] += (1.f - window[i_sample]) * channel_buffer1[i_sample];
220 }
221 for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
222 {
223 output_buffer[i_sample + i_out_channels * frame_size] += channel_buffer1[i_sample];
224 }
225 }
226 }
227
228 OPUS_COPY(x_out, output_buffer, out_channels * frame_size);
229
230 #ifdef DEBUG_NNDSP
231 print_float_vector("x_out", x_out, out_channels * frame_size);
232 #endif
233
234 /* buffer update */
235 for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)
236 {
237 OPUS_COPY(hAdaConv->history + i_in_channels * kernel_size, p_input + i_in_channels * (frame_size + kernel_size) + frame_size - kernel_size, kernel_size);
238 }
239 OPUS_COPY(hAdaConv->last_kernel, kernel_buffer, kernel_size * in_channels * out_channels);
240 }
241
adacomb_process_frame(AdaCombState * hAdaComb,float * x_out,const float * x_in,const float * features,const LinearLayer * kernel_layer,const LinearLayer * gain_layer,const LinearLayer * global_gain_layer,int pitch_lag,int feature_dim,int frame_size,int overlap_size,int kernel_size,int left_padding,float filter_gain_a,float filter_gain_b,float log_gain_limit,float * window,int arch)242 void adacomb_process_frame(
243 AdaCombState* hAdaComb,
244 float *x_out,
245 const float *x_in,
246 const float *features,
247 const LinearLayer *kernel_layer,
248 const LinearLayer *gain_layer,
249 const LinearLayer *global_gain_layer,
250 int pitch_lag,
251 int feature_dim,
252 int frame_size,
253 int overlap_size,
254 int kernel_size,
255 int left_padding,
256 float filter_gain_a,
257 float filter_gain_b,
258 float log_gain_limit,
259 float *window,
260 int arch
261 )
262 {
263 float output_buffer[ADACOMB_MAX_FRAME_SIZE];
264 float output_buffer_last[ADACOMB_MAX_FRAME_SIZE];
265 float kernel_buffer[ADACOMB_MAX_KERNEL_SIZE];
266 float input_buffer[ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE];
267 float gain, global_gain;
268 float *p_input;
269 int i_sample;
270 float kernel[16];
271 float last_kernel[16];
272
273 (void) feature_dim; /* ToDo: figure out whether we might need this information */
274
275 OPUS_CLEAR(output_buffer, ADACOMB_MAX_FRAME_SIZE);
276 OPUS_CLEAR(kernel_buffer, ADACOMB_MAX_KERNEL_SIZE);
277 OPUS_CLEAR(input_buffer, ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE);
278
279 OPUS_COPY(input_buffer, hAdaComb->history, kernel_size + ADACOMB_MAX_LAG);
280 OPUS_COPY(input_buffer + kernel_size + ADACOMB_MAX_LAG, x_in, frame_size);
281 p_input = input_buffer + kernel_size + ADACOMB_MAX_LAG;
282
283 /* calculate new kernel and new gain */
284 compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);
285 compute_generic_dense(gain_layer, &gain, features, ACTIVATION_RELU, arch);
286 compute_generic_dense(global_gain_layer, &global_gain, features, ACTIVATION_TANH, arch);
287 #ifdef DEBUG_NNDSP
288 print_float_vector("features", features, feature_dim);
289 print_float_vector("adacomb_kernel_raw", kernel_buffer, kernel_size);
290 print_float_vector("adacomb_gain_raw", &gain, 1);
291 print_float_vector("adacomb_global_gain_raw", &global_gain, 1);
292 #endif
293 gain = exp(log_gain_limit - gain);
294 global_gain = exp(filter_gain_a * global_gain + filter_gain_b);
295 scale_kernel(kernel_buffer, 1, 1, kernel_size, &gain);
296
297 #ifdef DEBUG_NNDSP
298 print_float_vector("adacomb_kernel", kernel_buffer, kernel_size);
299 print_float_vector("adacomb_gain", &gain, 1);
300 #endif
301
302 OPUS_CLEAR(kernel, ADACOMB_MAX_KERNEL_SIZE);
303 OPUS_CLEAR(last_kernel, ADACOMB_MAX_KERNEL_SIZE);
304 OPUS_COPY(kernel, kernel_buffer, kernel_size);
305 OPUS_COPY(last_kernel, hAdaComb->last_kernel, kernel_size);
306
307 celt_pitch_xcorr(last_kernel, &p_input[- left_padding - hAdaComb->last_pitch_lag], output_buffer_last, ADACOMB_MAX_KERNEL_SIZE, overlap_size, arch);
308
309 celt_pitch_xcorr(kernel, &p_input[- left_padding - pitch_lag], output_buffer, ADACOMB_MAX_KERNEL_SIZE, frame_size, arch);
310 for (i_sample = 0; i_sample < overlap_size; i_sample++)
311 {
312 output_buffer[i_sample] = hAdaComb->last_global_gain * window[i_sample] * output_buffer_last[i_sample] + global_gain * (1.f - window[i_sample]) * output_buffer[i_sample];
313 }
314
315 for (i_sample = 0; i_sample < overlap_size; i_sample++)
316 {
317 output_buffer[i_sample] += (window[i_sample] * hAdaComb->last_global_gain + (1.f - window[i_sample]) * global_gain) * p_input[i_sample];
318 }
319
320 for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
321 {
322 output_buffer[i_sample] = global_gain * (output_buffer[i_sample] + p_input[i_sample]);
323 }
324 OPUS_COPY(x_out, output_buffer, frame_size);
325
326 #ifdef DEBUG_NNDSP
327 print_float_vector("x_out", x_out, frame_size);
328 #endif
329
330 /* buffer update */
331 OPUS_COPY(hAdaComb->last_kernel, kernel_buffer, kernel_size);
332 OPUS_COPY(hAdaComb->history, p_input + frame_size - kernel_size - ADACOMB_MAX_LAG, kernel_size + ADACOMB_MAX_LAG);
333 hAdaComb->last_pitch_lag = pitch_lag;
334 hAdaComb->last_global_gain = global_gain;
335 }
336
337
adashape_process_frame(AdaShapeState * hAdaShape,float * x_out,const float * x_in,const float * features,const LinearLayer * alpha1f,const LinearLayer * alpha1t,const LinearLayer * alpha2,int feature_dim,int frame_size,int avg_pool_k,int arch)338 void adashape_process_frame(
339 AdaShapeState *hAdaShape,
340 float *x_out,
341 const float *x_in,
342 const float *features,
343 const LinearLayer *alpha1f,
344 const LinearLayer *alpha1t,
345 const LinearLayer *alpha2,
346 int feature_dim,
347 int frame_size,
348 int avg_pool_k,
349 int arch
350 )
351 {
352 float in_buffer[ADASHAPE_MAX_INPUT_DIM + ADASHAPE_MAX_FRAME_SIZE];
353 float out_buffer[ADASHAPE_MAX_FRAME_SIZE];
354 float tmp_buffer[ADASHAPE_MAX_FRAME_SIZE];
355 int i, k;
356 int tenv_size;
357 float mean;
358 float *tenv;
359
360 celt_assert(frame_size % avg_pool_k == 0);
361 celt_assert(feature_dim + frame_size / avg_pool_k + 1 < ADASHAPE_MAX_INPUT_DIM);
362
363 tenv_size = frame_size / avg_pool_k;
364 tenv = in_buffer + feature_dim;
365 OPUS_CLEAR(tenv, tenv_size + 1);
366
367 OPUS_COPY(in_buffer, features, feature_dim);
368
369 /* calculate temporal envelope */
370 mean = 0;
371 for (i = 0; i < tenv_size; i++)
372 {
373 for (k = 0; k < avg_pool_k; k++)
374 {
375 tenv[i] += fabs(x_in[i * avg_pool_k + k]);
376 }
377 tenv[i] = log(tenv[i] / avg_pool_k + 1.52587890625e-05f);
378 mean += tenv[i];
379 }
380 mean /= tenv_size;
381 for (i = 0; i < tenv_size; i++)
382 {
383 tenv[i] -= mean;
384 }
385 tenv[tenv_size] = mean;
386 #ifdef DEBUG_NNDSP
387 print_float_vector("tenv", tenv, tenv_size + 1);
388 #endif
389
390 /* calculate temporal weights */
391 #ifdef DEBUG_NNDSP
392 print_float_vector("alpha1_in", in_buffer, feature_dim + tenv_size + 1);
393 #endif
394 compute_generic_conv1d(alpha1f, out_buffer, hAdaShape->conv_alpha1f_state, in_buffer, feature_dim, ACTIVATION_LINEAR, arch);
395 compute_generic_conv1d(alpha1t, tmp_buffer, hAdaShape->conv_alpha1t_state, tenv, tenv_size + 1, ACTIVATION_LINEAR, arch);
396 #ifdef DEBUG_NNDSP
397 print_float_vector("alpha1_out", out_buffer, frame_size);
398 #endif
399 /* compute leaky ReLU by hand. ToDo: try tanh activation */
400 for (i = 0; i < frame_size; i ++)
401 {
402 float tmp = out_buffer[i] + tmp_buffer[i];
403 in_buffer[i] = tmp >= 0 ? tmp : 0.2 * tmp;
404 }
405 #ifdef DEBUG_NNDSP
406 print_float_vector("post_alpha1", in_buffer, frame_size);
407 #endif
408 compute_generic_conv1d(alpha2, out_buffer, hAdaShape->conv_alpha2_state, in_buffer, frame_size, ACTIVATION_LINEAR, arch);
409
410 /* shape signal */
411 for (i = 0; i < frame_size; i ++)
412 {
413 x_out[i] = exp(out_buffer[i]) * x_in[i];
414 }
415
416 }
417