xref: /aosp_15_r20/external/libopus/dnn/osce.c (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1 /* Copyright (c) 2023 Amazon
2    Written by Jan Buethe */
3 /*
4    Redistribution and use in source and binary forms, with or without
5    modification, are permitted provided that the following conditions
6    are met:
7 
8    - Redistributions of source code must retain the above copyright
9    notice, this list of conditions and the following disclaimer.
10 
11    - Redistributions in binary form must reproduce the above copyright
12    notice, this list of conditions and the following disclaimer in the
13    documentation and/or other materials provided with the distribution.
14 
15    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31 
32 
33 #include <math.h>
34 #include "osce.h"
35 #include "osce_features.h"
36 #include "os_support.h"
37 #include "nndsp.h"
38 #include "float_cast.h"
39 #include "arch.h"
40 
41 #ifdef OSCE_DEBUG
42 #include <stdio.h>
43 /*#define WRITE_FEATURES*/
44 /*#define DEBUG_LACE*/
45 /*#define DEBUG_NOLACE*/
46 #define FINIT(fid, name, mode) do{if (fid == NULL) {fid = fopen(name, mode);}} while(0)
47 #endif
48 
49 #ifdef ENABLE_OSCE_TRAINING_DATA
50 #include <stdio.h>
51 #endif
52 
53 #define CLIP(a, min, max) (((a) < (min) ? (min) : (a)) > (max) ? (max) : (a))
54 
55 extern const WeightArray lacelayers_arrays[];
56 extern const WeightArray nolacelayers_arrays[];
57 
58 /* LACE */
59 
60 #ifndef DISABLE_LACE
61 
compute_lace_numbits_embedding(float * emb,float numbits,int dim,float min_val,float max_val,int logscale)62 static void compute_lace_numbits_embedding(float *emb, float numbits, int dim, float min_val, float max_val, int logscale)
63 {
64     float x;
65     (void) dim;
66 
67     numbits = logscale ? log(numbits) : numbits;
68     x = CLIP(numbits, min_val, max_val) - (max_val + min_val) / 2;
69 
70     emb[0] = sin(x * LACE_NUMBITS_SCALE_0 - 0.5f);
71     emb[1] = sin(x * LACE_NUMBITS_SCALE_1 - 0.5f);
72     emb[2] = sin(x * LACE_NUMBITS_SCALE_2 - 0.5f);
73     emb[3] = sin(x * LACE_NUMBITS_SCALE_3 - 0.5f);
74     emb[4] = sin(x * LACE_NUMBITS_SCALE_4 - 0.5f);
75     emb[5] = sin(x * LACE_NUMBITS_SCALE_5 - 0.5f);
76     emb[6] = sin(x * LACE_NUMBITS_SCALE_6 - 0.5f);
77     emb[7] = sin(x * LACE_NUMBITS_SCALE_7 - 0.5f);
78 }
79 
80 
init_lace(LACE * hLACE,const WeightArray * weights)81 static int init_lace(LACE *hLACE, const WeightArray *weights)
82 {
83     int ret = 0;
84     OPUS_CLEAR(hLACE, 1);
85     celt_assert(weights != NULL);
86 
87     ret = init_lacelayers(&hLACE->layers, weights);
88 
89     compute_overlap_window(hLACE->window, LACE_OVERLAP_SIZE);
90 
91     return ret;
92 }
93 
reset_lace_state(LACEState * state)94 static void reset_lace_state(LACEState *state)
95 {
96     OPUS_CLEAR(state, 1);
97 
98     init_adacomb_state(&state->cf1_state);
99     init_adacomb_state(&state->cf2_state);
100     init_adaconv_state(&state->af1_state);
101 }
102 
lace_feature_net(LACE * hLACE,LACEState * state,float * output,const float * features,const float * numbits,const int * periods,int arch)103 static void lace_feature_net(
104     LACE *hLACE,
105     LACEState *state,
106     float *output,
107     const float *features,
108     const float *numbits,
109     const int *periods,
110     int arch
111 )
112 {
113     float input_buffer[4 * IMAX(LACE_COND_DIM, LACE_HIDDEN_FEATURE_DIM)];
114     float output_buffer[4 * IMAX(LACE_COND_DIM, LACE_HIDDEN_FEATURE_DIM)];
115     float numbits_embedded[2 * LACE_NUMBITS_EMBEDDING_DIM];
116     int i_subframe;
117 
118     compute_lace_numbits_embedding(numbits_embedded, numbits[0], LACE_NUMBITS_EMBEDDING_DIM,
119         log(LACE_NUMBITS_RANGE_LOW), log(LACE_NUMBITS_RANGE_HIGH), 1);
120     compute_lace_numbits_embedding(numbits_embedded + LACE_NUMBITS_EMBEDDING_DIM, numbits[1], LACE_NUMBITS_EMBEDDING_DIM,
121         log(LACE_NUMBITS_RANGE_LOW), log(LACE_NUMBITS_RANGE_HIGH), 1);
122 
123     /* scaling and dimensionality reduction */
124     for (i_subframe = 0; i_subframe < 4; i_subframe ++)
125     {
126         OPUS_COPY(input_buffer, features + i_subframe * LACE_NUM_FEATURES, LACE_NUM_FEATURES);
127         OPUS_COPY(input_buffer + LACE_NUM_FEATURES, hLACE->layers.lace_pitch_embedding.float_weights + periods[i_subframe] * LACE_PITCH_EMBEDDING_DIM, LACE_PITCH_EMBEDDING_DIM);
128         OPUS_COPY(input_buffer + LACE_NUM_FEATURES + LACE_PITCH_EMBEDDING_DIM, numbits_embedded, 2 * LACE_NUMBITS_EMBEDDING_DIM);
129 
130         compute_generic_conv1d(
131             &hLACE->layers.lace_fnet_conv1,
132             output_buffer + i_subframe * LACE_HIDDEN_FEATURE_DIM,
133             NULL,
134             input_buffer,
135             LACE_NUM_FEATURES + LACE_PITCH_EMBEDDING_DIM + 2 * LACE_NUMBITS_EMBEDDING_DIM,
136             ACTIVATION_TANH,
137             arch);
138     }
139 
140     /* subframe accumulation */
141     OPUS_COPY(input_buffer, output_buffer, 4 * LACE_HIDDEN_FEATURE_DIM);
142     compute_generic_conv1d(
143         &hLACE->layers.lace_fnet_conv2,
144         output_buffer,
145         state->feature_net_conv2_state,
146         input_buffer,
147         4 * LACE_HIDDEN_FEATURE_DIM,
148         ACTIVATION_TANH,
149         arch
150     );
151 
152     /* tconv upsampling */
153     OPUS_COPY(input_buffer, output_buffer, 4 * LACE_COND_DIM);
154     compute_generic_dense(
155         &hLACE->layers.lace_fnet_tconv,
156         output_buffer,
157         input_buffer,
158         ACTIVATION_TANH,
159         arch
160     );
161 
162     /* GRU */
163     OPUS_COPY(input_buffer, output_buffer, 4 * LACE_COND_DIM);
164     for (i_subframe = 0; i_subframe < 4; i_subframe++)
165     {
166         compute_generic_gru(
167             &hLACE->layers.lace_fnet_gru_input,
168             &hLACE->layers.lace_fnet_gru_recurrent,
169             state->feature_net_gru_state,
170             input_buffer + i_subframe * LACE_COND_DIM,
171             arch
172         );
173         OPUS_COPY(output + i_subframe * LACE_COND_DIM, state->feature_net_gru_state, LACE_COND_DIM);
174     }
175 }
176 
177 
lace_process_20ms_frame(LACE * hLACE,LACEState * state,float * x_out,const float * x_in,const float * features,const float * numbits,const int * periods,int arch)178 static void lace_process_20ms_frame(
179     LACE* hLACE,
180     LACEState *state,
181     float *x_out,
182     const float *x_in,
183     const float *features,
184     const float *numbits,
185     const int *periods,
186     int arch
187 )
188 {
189     float feature_buffer[4 * LACE_COND_DIM];
190     float output_buffer[4 * LACE_FRAME_SIZE];
191     int i_subframe, i_sample;
192 
193 #ifdef DEBUG_LACE
194     static FILE *f_features=NULL, *f_encfeatures=NULL, *f_xin=NULL, *f_xpreemph=NULL, *f_postcf1=NULL;
195     static FILE *f_postcf2=NULL, *f_postaf1=NULL, *f_xdeemph, *f_numbits, *f_periods;
196 
197 
198     FINIT(f_features, "debug/c_features.f32", "wb");
199     FINIT(f_encfeatures, "debug/c_encoded_features.f32", "wb");
200     FINIT(f_xin, "debug/c_x_in.f32", "wb");
201     FINIT(f_xpreemph, "debug/c_xpreemph.f32", "wb");
202     FINIT(f_xdeemph, "debug/c_xdeemph.f32", "wb");
203     FINIT(f_postcf1, "debug/c_post_cf1.f32", "wb");
204     FINIT(f_postcf2, "debug/c_post_cf2.f32", "wb");
205     FINIT(f_postaf1, "debug/c_post_af1.f32", "wb");
206     FINIT(f_numbits, "debug/c_numbits.f32", "wb");
207     FINIT(f_periods, "debug/c_periods.s32", "wb");
208 
209     fwrite(x_in, sizeof(*x_in), 4 * LACE_FRAME_SIZE, f_xin);
210     fwrite(numbits, sizeof(*numbits), 2, f_numbits);
211     fwrite(periods, sizeof(*periods), 4, f_periods);
212 #endif
213 
214     /* pre-emphasis */
215     for (i_sample = 0; i_sample < 4 * LACE_FRAME_SIZE; i_sample ++)
216     {
217         output_buffer[i_sample] = x_in[i_sample] - LACE_PREEMPH * state->preemph_mem;
218         state->preemph_mem = x_in[i_sample];
219     }
220 
221     /* run feature encoder */
222     lace_feature_net(hLACE, state, feature_buffer, features, numbits, periods, arch);
223 #ifdef DEBUG_LACE
224     fwrite(features, sizeof(*features), 4 * LACE_NUM_FEATURES, f_features);
225     fwrite(feature_buffer, sizeof(*feature_buffer), 4 * LACE_COND_DIM, f_encfeatures);
226     fwrite(output_buffer, sizeof(float), 4 * LACE_FRAME_SIZE, f_xpreemph);
227 #endif
228 
229     /* 1st comb filtering stage */
230     for (i_subframe = 0; i_subframe < 4; i_subframe++)
231     {
232         adacomb_process_frame(
233             &state->cf1_state,
234             output_buffer + i_subframe * LACE_FRAME_SIZE,
235             output_buffer + i_subframe * LACE_FRAME_SIZE,
236             feature_buffer + i_subframe * LACE_COND_DIM,
237             &hLACE->layers.lace_cf1_kernel,
238             &hLACE->layers.lace_cf1_gain,
239             &hLACE->layers.lace_cf1_global_gain,
240             periods[i_subframe],
241             LACE_COND_DIM,
242             LACE_FRAME_SIZE,
243             LACE_OVERLAP_SIZE,
244             LACE_CF1_KERNEL_SIZE,
245             LACE_CF1_LEFT_PADDING,
246             LACE_CF1_FILTER_GAIN_A,
247             LACE_CF1_FILTER_GAIN_B,
248             LACE_CF1_LOG_GAIN_LIMIT,
249             hLACE->window,
250             arch);
251     }
252 
253 #ifdef DEBUG_LACE
254     fwrite(output_buffer, sizeof(float), 4 * LACE_FRAME_SIZE, f_postcf1);
255 #endif
256 
257     /* 2nd comb filtering stage */
258     for (i_subframe = 0; i_subframe < 4; i_subframe++)
259     {
260         adacomb_process_frame(
261             &state->cf2_state,
262             output_buffer + i_subframe * LACE_FRAME_SIZE,
263             output_buffer + i_subframe * LACE_FRAME_SIZE,
264             feature_buffer + i_subframe * LACE_COND_DIM,
265             &hLACE->layers.lace_cf2_kernel,
266             &hLACE->layers.lace_cf2_gain,
267             &hLACE->layers.lace_cf2_global_gain,
268             periods[i_subframe],
269             LACE_COND_DIM,
270             LACE_FRAME_SIZE,
271             LACE_OVERLAP_SIZE,
272             LACE_CF2_KERNEL_SIZE,
273             LACE_CF2_LEFT_PADDING,
274             LACE_CF2_FILTER_GAIN_A,
275             LACE_CF2_FILTER_GAIN_B,
276             LACE_CF2_LOG_GAIN_LIMIT,
277             hLACE->window,
278             arch);
279     }
280 #ifdef DEBUG_LACE
281     fwrite(output_buffer, sizeof(float), 4 * LACE_FRAME_SIZE, f_postcf2);
282 #endif
283 
284     /* final adaptive filtering stage */
285     for (i_subframe = 0; i_subframe < 4; i_subframe++)
286     {
287         adaconv_process_frame(
288             &state->af1_state,
289             output_buffer + i_subframe * LACE_FRAME_SIZE,
290             output_buffer + i_subframe * LACE_FRAME_SIZE,
291             feature_buffer + i_subframe * LACE_COND_DIM,
292             &hLACE->layers.lace_af1_kernel,
293             &hLACE->layers.lace_af1_gain,
294             LACE_COND_DIM,
295             LACE_FRAME_SIZE,
296             LACE_OVERLAP_SIZE,
297             LACE_AF1_IN_CHANNELS,
298             LACE_AF1_OUT_CHANNELS,
299             LACE_AF1_KERNEL_SIZE,
300             LACE_AF1_LEFT_PADDING,
301             LACE_AF1_FILTER_GAIN_A,
302             LACE_AF1_FILTER_GAIN_B,
303             LACE_AF1_SHAPE_GAIN,
304             hLACE->window,
305             arch);
306     }
307 #ifdef DEBUG_LACE
308     fwrite(output_buffer, sizeof(float), 4 * LACE_FRAME_SIZE, f_postaf1);
309 #endif
310 
311     /* de-emphasis */
312     for (i_sample = 0; i_sample < 4 * LACE_FRAME_SIZE; i_sample ++)
313     {
314         x_out[i_sample] = output_buffer[i_sample] + LACE_PREEMPH * state->deemph_mem;
315         state->deemph_mem = x_out[i_sample];
316     }
317 #ifdef DEBUG_LACE
318     fwrite(x_out, sizeof(float), 4 * LACE_FRAME_SIZE, f_xdeemph);
319 #endif
320 }
321 
322 #endif /* #ifndef DISABLE_LACE */
323 
324 
325 /* NoLACE */
326 #ifndef DISABLE_NOLACE
327 
compute_nolace_numbits_embedding(float * emb,float numbits,int dim,float min_val,float max_val,int logscale)328 static void compute_nolace_numbits_embedding(float *emb, float numbits, int dim, float min_val, float max_val, int logscale)
329 {
330     float x;
331     (void) dim;
332 
333     numbits = logscale ? log(numbits) : numbits;
334     x = CLIP(numbits, min_val, max_val) - (max_val + min_val) / 2;
335 
336     emb[0] = sin(x * NOLACE_NUMBITS_SCALE_0 - 0.5f);
337     emb[1] = sin(x * NOLACE_NUMBITS_SCALE_1 - 0.5f);
338     emb[2] = sin(x * NOLACE_NUMBITS_SCALE_2 - 0.5f);
339     emb[3] = sin(x * NOLACE_NUMBITS_SCALE_3 - 0.5f);
340     emb[4] = sin(x * NOLACE_NUMBITS_SCALE_4 - 0.5f);
341     emb[5] = sin(x * NOLACE_NUMBITS_SCALE_5 - 0.5f);
342     emb[6] = sin(x * NOLACE_NUMBITS_SCALE_6 - 0.5f);
343     emb[7] = sin(x * NOLACE_NUMBITS_SCALE_7 - 0.5f);
344 }
345 
init_nolace(NoLACE * hNoLACE,const WeightArray * weights)346 static int init_nolace(NoLACE *hNoLACE, const WeightArray *weights)
347 {
348     int ret = 0;
349     OPUS_CLEAR(hNoLACE, 1);
350     celt_assert(weights != NULL);
351 
352     ret = init_nolacelayers(&hNoLACE->layers, weights);
353 
354     compute_overlap_window(hNoLACE->window, NOLACE_OVERLAP_SIZE);
355 
356     return ret;
357 }
358 
reset_nolace_state(NoLACEState * state)359 static void reset_nolace_state(NoLACEState *state)
360 {
361     OPUS_CLEAR(state, 1);
362 
363     init_adacomb_state(&state->cf1_state);
364     init_adacomb_state(&state->cf2_state);
365     init_adaconv_state(&state->af1_state);
366     init_adaconv_state(&state->af2_state);
367     init_adaconv_state(&state->af3_state);
368     init_adaconv_state(&state->af4_state);
369     init_adashape_state(&state->tdshape1_state);
370     init_adashape_state(&state->tdshape2_state);
371     init_adashape_state(&state->tdshape3_state);
372 }
373 
nolace_feature_net(NoLACE * hNoLACE,NoLACEState * state,float * output,const float * features,const float * numbits,const int * periods,int arch)374 static void nolace_feature_net(
375     NoLACE *hNoLACE,
376     NoLACEState *state,
377     float *output,
378     const float *features,
379     const float *numbits,
380     const int *periods,
381     int arch
382 )
383 {
384     float input_buffer[4 * IMAX(NOLACE_COND_DIM, NOLACE_HIDDEN_FEATURE_DIM)];
385     float output_buffer[4 * IMAX(NOLACE_COND_DIM, NOLACE_HIDDEN_FEATURE_DIM)];
386     float numbits_embedded[2 * NOLACE_NUMBITS_EMBEDDING_DIM];
387     int i_subframe;
388 
389     compute_nolace_numbits_embedding(numbits_embedded, numbits[0], NOLACE_NUMBITS_EMBEDDING_DIM,
390         log(NOLACE_NUMBITS_RANGE_LOW), log(NOLACE_NUMBITS_RANGE_HIGH), 1);
391     compute_nolace_numbits_embedding(numbits_embedded + NOLACE_NUMBITS_EMBEDDING_DIM, numbits[1], NOLACE_NUMBITS_EMBEDDING_DIM,
392         log(NOLACE_NUMBITS_RANGE_LOW), log(NOLACE_NUMBITS_RANGE_HIGH), 1);
393 
394     /* scaling and dimensionality reduction */
395     for (i_subframe = 0; i_subframe < 4; i_subframe ++)
396     {
397         OPUS_COPY(input_buffer, features + i_subframe * NOLACE_NUM_FEATURES, NOLACE_NUM_FEATURES);
398         OPUS_COPY(input_buffer + NOLACE_NUM_FEATURES, hNoLACE->layers.nolace_pitch_embedding.float_weights + periods[i_subframe] * NOLACE_PITCH_EMBEDDING_DIM, NOLACE_PITCH_EMBEDDING_DIM);
399         OPUS_COPY(input_buffer + NOLACE_NUM_FEATURES + NOLACE_PITCH_EMBEDDING_DIM, numbits_embedded, 2 * NOLACE_NUMBITS_EMBEDDING_DIM);
400 
401         compute_generic_conv1d(
402             &hNoLACE->layers.nolace_fnet_conv1,
403             output_buffer + i_subframe * NOLACE_HIDDEN_FEATURE_DIM,
404             NULL,
405             input_buffer,
406             NOLACE_NUM_FEATURES + NOLACE_PITCH_EMBEDDING_DIM + 2 * NOLACE_NUMBITS_EMBEDDING_DIM,
407             ACTIVATION_TANH,
408             arch);
409     }
410 
411     /* subframe accumulation */
412     OPUS_COPY(input_buffer, output_buffer, 4 * NOLACE_HIDDEN_FEATURE_DIM);
413     compute_generic_conv1d(
414         &hNoLACE->layers.nolace_fnet_conv2,
415         output_buffer,
416         state->feature_net_conv2_state,
417         input_buffer,
418         4 * NOLACE_HIDDEN_FEATURE_DIM,
419         ACTIVATION_TANH,
420         arch
421     );
422 
423     /* tconv upsampling */
424     OPUS_COPY(input_buffer, output_buffer, 4 * NOLACE_COND_DIM);
425     compute_generic_dense(
426         &hNoLACE->layers.nolace_fnet_tconv,
427         output_buffer,
428         input_buffer,
429         ACTIVATION_TANH,
430         arch
431     );
432 
433     /* GRU */
434     OPUS_COPY(input_buffer, output_buffer, 4 * NOLACE_COND_DIM);
435     for (i_subframe = 0; i_subframe < 4; i_subframe++)
436     {
437         compute_generic_gru(
438             &hNoLACE->layers.nolace_fnet_gru_input,
439             &hNoLACE->layers.nolace_fnet_gru_recurrent,
440             state->feature_net_gru_state,
441             input_buffer + i_subframe * NOLACE_COND_DIM,
442             arch
443         );
444         OPUS_COPY(output + i_subframe * NOLACE_COND_DIM, state->feature_net_gru_state, NOLACE_COND_DIM);
445     }
446 }
447 
448 
nolace_process_20ms_frame(NoLACE * hNoLACE,NoLACEState * state,float * x_out,const float * x_in,const float * features,const float * numbits,const int * periods,int arch)449 static void nolace_process_20ms_frame(
450     NoLACE* hNoLACE,
451     NoLACEState *state,
452     float *x_out,
453     const float *x_in,
454     const float *features,
455     const float *numbits,
456     const int *periods,
457     int arch
458 )
459 {
460     float feature_buffer[4 * NOLACE_COND_DIM];
461     float feature_transform_buffer[4 * NOLACE_COND_DIM];
462     float x_buffer1[8 * NOLACE_FRAME_SIZE];
463     float x_buffer2[8 * NOLACE_FRAME_SIZE];
464     int i_subframe, i_sample;
465     NOLACELayers *layers = &hNoLACE->layers;
466 
467 #ifdef DEBUG_NOLACE
468     static FILE *f_features=NULL, *f_encfeatures=NULL, *f_xin=NULL, *f_xpreemph=NULL, *f_postcf1=NULL;
469     static FILE *f_postcf2=NULL, *f_postaf1=NULL, *f_xdeemph, *f_numbits, *f_periods;
470     static FILE *f_ffpostcf1, *f_fpostcf2, *f_fpostaf1;
471 
472 
473     FINIT(f_features, "debug/c_features.f32", "wb");
474     FINIT(f_encfeatures, "debug/c_encoded_features.f32", "wb");
475     FINIT(f_xin, "debug/c_x_in.f32", "wb");
476     FINIT(f_xpreemph, "debug/c_xpreemph.f32", "wb");
477     FINIT(f_xdeemph, "debug/c_xdeemph.f32", "wb");
478     FINIT(f_postcf1, "debug/c_post_cf1.f32", "wb");
479     FINIT(f_postcf2, "debug/c_post_cf2.f32", "wb");
480     FINIT(f_postaf1, "debug/c_post_af1.f32", "wb");
481     FINIT(f_numbits, "debug/c_numbits.f32", "wb");
482     FINIT(f_periods, "debug/c_periods.s32", "wb");
483 
484     fwrite(x_in, sizeof(*x_in), 4 * NOLACE_FRAME_SIZE, f_xin);
485     fwrite(numbits, sizeof(*numbits), 2, f_numbits);
486     fwrite(periods, sizeof(*periods), 4, f_periods);
487 #endif
488 
489     /* pre-emphasis */
490     for (i_sample = 0; i_sample < 4 * NOLACE_FRAME_SIZE; i_sample ++)
491     {
492         x_buffer1[i_sample] = x_in[i_sample] - NOLACE_PREEMPH * state->preemph_mem;
493         state->preemph_mem = x_in[i_sample];
494     }
495 
496     /* run feature encoder */
497     nolace_feature_net(hNoLACE, state, feature_buffer, features, numbits, periods, arch);
498 #ifdef DEBUG_NOLACE
499     fwrite(features, sizeof(*features), 4 * NOLACE_NUM_FEATURES, f_features);
500     fwrite(feature_buffer, sizeof(*feature_buffer), 4 * NOLACE_COND_DIM, f_encfeatures);
501     fwrite(output_buffer, sizeof(float), 4 * NOLACE_FRAME_SIZE, f_xpreemph);
502 #endif
503 
504     /* 1st comb filtering stage */
505     for (i_subframe = 0; i_subframe < 4; i_subframe++)
506     {
507         /* modifies signal in place */
508         adacomb_process_frame(
509             &state->cf1_state,
510             x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,
511             x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,
512             feature_buffer + i_subframe * NOLACE_COND_DIM,
513             &hNoLACE->layers.nolace_cf1_kernel,
514             &hNoLACE->layers.nolace_cf1_gain,
515             &hNoLACE->layers.nolace_cf1_global_gain,
516             periods[i_subframe],
517             NOLACE_COND_DIM,
518             NOLACE_FRAME_SIZE,
519             NOLACE_OVERLAP_SIZE,
520             NOLACE_CF1_KERNEL_SIZE,
521             NOLACE_CF1_LEFT_PADDING,
522             NOLACE_CF1_FILTER_GAIN_A,
523             NOLACE_CF1_FILTER_GAIN_B,
524             NOLACE_CF1_LOG_GAIN_LIMIT,
525             hNoLACE->window,
526             arch);
527 
528         compute_generic_conv1d(
529             &layers->nolace_post_cf1,
530             feature_transform_buffer + i_subframe * NOLACE_COND_DIM,
531             state->post_cf1_state,
532             feature_buffer + i_subframe * NOLACE_COND_DIM,
533             NOLACE_COND_DIM,
534             ACTIVATION_TANH,
535             arch);
536     }
537 
538     /* update feature buffer */
539     OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);
540 
541 #ifdef DEBUG_NOLACE
542     fwrite(x_buffer1, sizeof(float), 4 * NOLACE_FRAME_SIZE, f_postcf1);
543 #endif
544 
545     /* 2nd comb filtering stage */
546     for (i_subframe = 0; i_subframe < 4; i_subframe++)
547     {
548         /* modifies signal in place */
549         adacomb_process_frame(
550             &state->cf2_state,
551             x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,
552             x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,
553             feature_buffer + i_subframe * NOLACE_COND_DIM,
554             &hNoLACE->layers.nolace_cf2_kernel,
555             &hNoLACE->layers.nolace_cf2_gain,
556             &hNoLACE->layers.nolace_cf2_global_gain,
557             periods[i_subframe],
558             NOLACE_COND_DIM,
559             NOLACE_FRAME_SIZE,
560             NOLACE_OVERLAP_SIZE,
561             NOLACE_CF2_KERNEL_SIZE,
562             NOLACE_CF2_LEFT_PADDING,
563             NOLACE_CF2_FILTER_GAIN_A,
564             NOLACE_CF2_FILTER_GAIN_B,
565             NOLACE_CF2_LOG_GAIN_LIMIT,
566             hNoLACE->window,
567             arch);
568 
569         compute_generic_conv1d(
570             &layers->nolace_post_cf2,
571             feature_transform_buffer + i_subframe * NOLACE_COND_DIM,
572             state->post_cf2_state,
573             feature_buffer + i_subframe * NOLACE_COND_DIM,
574             NOLACE_COND_DIM,
575             ACTIVATION_TANH,
576             arch);
577     }
578 
579     /* update feature buffer */
580     OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);
581 
582 #ifdef DEBUG_NOLACE
583     fwrite(x_buffer1, sizeof(float), 4 * NOLACE_FRAME_SIZE, f_postcf2);
584 #endif
585 
586     /* final adaptive filtering stage */
587     for (i_subframe = 0; i_subframe < 4; i_subframe++)
588     {
589         adaconv_process_frame(
590             &state->af1_state,
591             x_buffer2 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF1_OUT_CHANNELS,
592             x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,
593             feature_buffer + i_subframe * NOLACE_COND_DIM,
594             &hNoLACE->layers.nolace_af1_kernel,
595             &hNoLACE->layers.nolace_af1_gain,
596             NOLACE_COND_DIM,
597             NOLACE_FRAME_SIZE,
598             NOLACE_OVERLAP_SIZE,
599             NOLACE_AF1_IN_CHANNELS,
600             NOLACE_AF1_OUT_CHANNELS,
601             NOLACE_AF1_KERNEL_SIZE,
602             NOLACE_AF1_LEFT_PADDING,
603             NOLACE_AF1_FILTER_GAIN_A,
604             NOLACE_AF1_FILTER_GAIN_B,
605             NOLACE_AF1_SHAPE_GAIN,
606             hNoLACE->window,
607             arch);
608 
609         compute_generic_conv1d(
610             &layers->nolace_post_af1,
611             feature_transform_buffer + i_subframe * NOLACE_COND_DIM,
612             state->post_af1_state,
613             feature_buffer + i_subframe * NOLACE_COND_DIM,
614             NOLACE_COND_DIM,
615             ACTIVATION_TANH,
616             arch);
617     }
618 
619     /* update feature buffer */
620     OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);
621 
622 #ifdef DEBUG_NOLACE
623     fwrite(x_buffer2, sizeof(float), 4 * NOLACE_FRAME_SIZE * NOLACE_AF1_OUT_CHANNELS, f_postaf1);
624 #endif
625 
626     /* first shape-mix round */
627     for (i_subframe = 0; i_subframe < 4; i_subframe++)
628     {
629         celt_assert(NOLACE_AF1_OUT_CHANNELS == 2);
630         /* modifies second channel in place */
631         adashape_process_frame(
632             &state->tdshape1_state,
633             x_buffer2 + i_subframe * NOLACE_AF1_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
634             x_buffer2 + i_subframe * NOLACE_AF1_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
635             feature_buffer + i_subframe * NOLACE_COND_DIM,
636             &layers->nolace_tdshape1_alpha1_f,
637             &layers->nolace_tdshape1_alpha1_t,
638             &layers->nolace_tdshape1_alpha2,
639             NOLACE_TDSHAPE1_FEATURE_DIM,
640             NOLACE_TDSHAPE1_FRAME_SIZE,
641             NOLACE_TDSHAPE1_AVG_POOL_K,
642             arch
643         );
644 
645         adaconv_process_frame(
646             &state->af2_state,
647             x_buffer1 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF2_OUT_CHANNELS,
648             x_buffer2 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF2_IN_CHANNELS,
649             feature_buffer + i_subframe * NOLACE_COND_DIM,
650             &hNoLACE->layers.nolace_af2_kernel,
651             &hNoLACE->layers.nolace_af2_gain,
652             NOLACE_COND_DIM,
653             NOLACE_FRAME_SIZE,
654             NOLACE_OVERLAP_SIZE,
655             NOLACE_AF2_IN_CHANNELS,
656             NOLACE_AF2_OUT_CHANNELS,
657             NOLACE_AF2_KERNEL_SIZE,
658             NOLACE_AF2_LEFT_PADDING,
659             NOLACE_AF2_FILTER_GAIN_A,
660             NOLACE_AF2_FILTER_GAIN_B,
661             NOLACE_AF2_SHAPE_GAIN,
662             hNoLACE->window,
663             arch);
664 
665         compute_generic_conv1d(
666             &layers->nolace_post_af2,
667             feature_transform_buffer + i_subframe * NOLACE_COND_DIM,
668             state->post_af2_state,
669             feature_buffer + i_subframe * NOLACE_COND_DIM,
670             NOLACE_COND_DIM,
671             ACTIVATION_TANH,
672             arch);
673     }
674 
675     /* update feature buffer */
676     OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);
677 
678 #ifdef DEBUG_NOLACE
679     fwrite(x_buffer1, sizeof(float), 4 * NOLACE_FRAME_SIZE * NOLACE_AF2_OUT_CHANNELS, f_postaf2);
680 #endif
681 
682     /* second shape-mix round */
683     for (i_subframe = 0; i_subframe < 4; i_subframe++)
684     {
685         celt_assert(NOLACE_AF2_OUT_CHANNELS == 2);
686         /* modifies second channel in place */
687         adashape_process_frame(
688             &state->tdshape2_state,
689             x_buffer1 + i_subframe * NOLACE_AF2_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
690             x_buffer1 + i_subframe * NOLACE_AF2_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
691             feature_buffer + i_subframe * NOLACE_COND_DIM,
692             &layers->nolace_tdshape2_alpha1_f,
693             &layers->nolace_tdshape2_alpha1_t,
694             &layers->nolace_tdshape2_alpha2,
695             NOLACE_TDSHAPE2_FEATURE_DIM,
696             NOLACE_TDSHAPE2_FRAME_SIZE,
697             NOLACE_TDSHAPE2_AVG_POOL_K,
698             arch
699         );
700 
701         adaconv_process_frame(
702             &state->af3_state,
703             x_buffer2 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF3_OUT_CHANNELS,
704             x_buffer1 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF3_IN_CHANNELS,
705             feature_buffer + i_subframe * NOLACE_COND_DIM,
706             &hNoLACE->layers.nolace_af3_kernel,
707             &hNoLACE->layers.nolace_af3_gain,
708             NOLACE_COND_DIM,
709             NOLACE_FRAME_SIZE,
710             NOLACE_OVERLAP_SIZE,
711             NOLACE_AF3_IN_CHANNELS,
712             NOLACE_AF3_OUT_CHANNELS,
713             NOLACE_AF3_KERNEL_SIZE,
714             NOLACE_AF3_LEFT_PADDING,
715             NOLACE_AF3_FILTER_GAIN_A,
716             NOLACE_AF3_FILTER_GAIN_B,
717             NOLACE_AF3_SHAPE_GAIN,
718             hNoLACE->window,
719             arch);
720 
721         compute_generic_conv1d(
722             &layers->nolace_post_af3,
723             feature_transform_buffer + i_subframe * NOLACE_COND_DIM,
724             state->post_af3_state,
725             feature_buffer + i_subframe * NOLACE_COND_DIM,
726             NOLACE_COND_DIM,
727             ACTIVATION_TANH,
728             arch);
729     }
730 
731     /* update feature buffer */
732     OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);
733 
734     /* third shape-mix round */
735     for (i_subframe = 0; i_subframe < 4; i_subframe++)
736     {
737         celt_assert(NOLACE_AF3_OUT_CHANNELS == 2);
738         /* modifies second channel in place */
739         adashape_process_frame(
740             &state->tdshape3_state,
741             x_buffer2 + i_subframe * NOLACE_AF3_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
742             x_buffer2 + i_subframe * NOLACE_AF3_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
743             feature_buffer + i_subframe * NOLACE_COND_DIM,
744             &layers->nolace_tdshape3_alpha1_f,
745             &layers->nolace_tdshape3_alpha1_t,
746             &layers->nolace_tdshape3_alpha2,
747             NOLACE_TDSHAPE3_FEATURE_DIM,
748             NOLACE_TDSHAPE3_FRAME_SIZE,
749             NOLACE_TDSHAPE3_AVG_POOL_K,
750             arch
751         );
752 
753         adaconv_process_frame(
754             &state->af4_state,
755             x_buffer1 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF4_OUT_CHANNELS,
756             x_buffer2 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF4_IN_CHANNELS,
757             feature_buffer + i_subframe * NOLACE_COND_DIM,
758             &hNoLACE->layers.nolace_af4_kernel,
759             &hNoLACE->layers.nolace_af4_gain,
760             NOLACE_COND_DIM,
761             NOLACE_FRAME_SIZE,
762             NOLACE_OVERLAP_SIZE,
763             NOLACE_AF4_IN_CHANNELS,
764             NOLACE_AF4_OUT_CHANNELS,
765             NOLACE_AF4_KERNEL_SIZE,
766             NOLACE_AF4_LEFT_PADDING,
767             NOLACE_AF4_FILTER_GAIN_A,
768             NOLACE_AF4_FILTER_GAIN_B,
769             NOLACE_AF4_SHAPE_GAIN,
770             hNoLACE->window,
771             arch);
772 
773     }
774 
775 
776     /* de-emphasis */
777     for (i_sample = 0; i_sample < 4 * NOLACE_FRAME_SIZE; i_sample ++)
778     {
779         x_out[i_sample] = x_buffer1[i_sample] + NOLACE_PREEMPH * state->deemph_mem;
780         state->deemph_mem = x_out[i_sample];
781     }
782 #ifdef DEBUG_NOLACE
783     fwrite(x_out, sizeof(float), 4 * NOLACE_FRAME_SIZE, f_xdeemph);
784 #endif
785 }
786 
787 #endif /* #ifndef DISABLE_NOLACE */
788 
789 /* API */
790 
osce_reset(silk_OSCE_struct * hOSCE,int method)791 void osce_reset(silk_OSCE_struct *hOSCE, int method)
792 {
793     OSCEState *state = &hOSCE->state;
794 
795     OPUS_CLEAR(&hOSCE->features, 1);
796 
797     switch(method)
798     {
799         case OSCE_METHOD_NONE:
800             break;
801 #ifndef DISABLE_LACE
802         case OSCE_METHOD_LACE:
803             reset_lace_state(&state->lace);
804             break;
805 #endif
806 #ifndef DISABLE_NOLACE
807         case OSCE_METHOD_NOLACE:
808             reset_nolace_state(&state->nolace);
809             break;
810 #endif
811         default:
812             celt_assert(0 && "method not defined"); /* Question: return error code? */
813     }
814     hOSCE->method = method;
815     hOSCE->features.reset = 2;
816 }
817 
818 
819 #if 0
820 #include <stdio.h>
821 static void print_float_array(FILE *fid, const char  *name, const float *array, int n)
822 {
823     int i;
824     for (i = 0; i < n; i++)
825     {
826         fprintf(fid, "%s[%d]: %f\n", name, i, array[i]);
827     }
828 }
829 
830 static void print_int_array(FILE *fid, const char  *name, const int *array, int n)
831 {
832     int i;
833     for (i = 0; i < n; i++)
834     {
835         fprintf(fid, "%s[%d]: %d\n", name, i, array[i]);
836     }
837 }
838 
839 static void print_int8_array(FILE *fid, const char  *name, const opus_int8 *array, int n)
840 {
841     int i;
842     for (i = 0; i < n; i++)
843     {
844         fprintf(fid, "%s[%d]: %d\n", name, i, array[i]);
845     }
846 }
847 
848 static void print_linear_layer(FILE *fid, const char *name, LinearLayer *layer)
849 {
850     int i, n_in, n_out, n_total;
851     char tmp[256];
852 
853     n_in = layer->nb_inputs;
854     n_out = layer->nb_outputs;
855     n_total = n_in * n_out;
856 
857     fprintf(fid, "\nprinting layer %s...\n", name);
858     fprintf(fid, "%s.nb_inputs: %d\n%s.nb_outputs: %d\n", name, n_in, name, n_out);
859 
860     if (layer->bias !=NULL){}
861     if (layer->subias !=NULL){}
862     if (layer->weights !=NULL){}
863     if (layer->float_weights !=NULL){}
864 
865     if (layer->bias != NULL) {sprintf(tmp, "%s.bias", name); print_float_array(fid, tmp, layer->bias, n_out);}
866     if (layer->subias != NULL) {sprintf(tmp, "%s.subias", name); print_float_array(fid, tmp, layer->subias, n_out);}
867     if (layer->weights != NULL) {sprintf(tmp, "%s.weights", name); print_int8_array(fid, tmp, layer->weights, n_total);}
868     if (layer->float_weights != NULL) {sprintf(tmp, "%s.float_weights", name); print_float_array(fid, tmp, layer->float_weights, n_total);}
869     //if (layer->weights_idx != NULL) {sprintf(tmp, "%s.weights_idx", name); print_float_array(fid, tmp, layer->weights_idx, n_total);}
870     if (layer->diag != NULL) {sprintf(tmp, "%s.diag", name); print_float_array(fid, tmp, layer->diag, n_in);}
871     if (layer->scale != NULL) {sprintf(tmp, "%s.scale", name); print_float_array(fid, tmp, layer->scale, n_out);}
872 
873 }
874 #endif
875 
osce_load_models(OSCEModel * model,const void * data,int len)876 int osce_load_models(OSCEModel *model, const void *data, int len)
877 {
878     int ret = 0;
879     WeightArray *list;
880 
881     if (data != NULL  && len)
882     {
883         /* init from buffer */
884         parse_weights(&list, data, len);
885 
886 #ifndef DISABLE_LACE
887         if (ret == 0) {ret = init_lace(&model->lace, list);}
888 #endif
889 
890 #ifndef DISABLE_NOLACE
891         if (ret == 0) {ret = init_nolace(&model->nolace, list);}
892 #endif
893 
894         free(list);
895     } else
896     {
897 #ifdef USE_WEIGHTS_FILE
898         return -1;
899 #else
900 #ifndef DISABLE_LACE
901         if (ret == 0) {ret = init_lace(&model->lace, lacelayers_arrays);}
902 #endif
903 
904 #ifndef DISABLE_NOLACE
905         if (ret == 0) {ret = init_nolace(&model->nolace, nolacelayers_arrays);}
906 #endif
907 
908 #endif /* USE_WEIGHTS_FILE */
909     }
910 
911     ret = ret ? -1 : 0;
912     return ret;
913 }
914 
osce_enhance_frame(OSCEModel * model,silk_decoder_state * psDec,silk_decoder_control * psDecCtrl,opus_int16 xq[],opus_int32 num_bits,int arch)915 void osce_enhance_frame(
916     OSCEModel                   *model,                         /* I    OSCE model struct                           */
917     silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */
918     silk_decoder_control        *psDecCtrl,                     /* I    Decoder control                             */
919     opus_int16                  xq[],                           /* I/O  Decoded speech                              */
920     opus_int32                  num_bits,                       /* I    Size of SILK payload in bits                */
921     int                         arch                            /* I    Run-time architecture                       */
922 )
923 {
924     float in_buffer[320];
925     float out_buffer[320];
926     float features[4 * OSCE_FEATURE_DIM];
927     float numbits[2];
928     int periods[4];
929     int i;
930     int method;
931 
932     /* enhancement only implemented for 20 ms frame at 16kHz */
933     if (psDec->fs_kHz != 16 || psDec->nb_subfr != 4)
934     {
935         osce_reset(&psDec->osce, psDec->osce.method);
936         return;
937     }
938 
939     osce_calculate_features(psDec, psDecCtrl, features, numbits, periods, xq, num_bits);
940 
941     /* scale input */
942     for (i = 0; i < 320; i++)
943     {
944         in_buffer[i] = ((float) xq[i]) * (1.f/32768.f);
945     }
946 
947     if (model->loaded)
948         method = psDec->osce.method;
949     else
950         method = OSCE_METHOD_NONE;
951     switch(method)
952     {
953         case OSCE_METHOD_NONE:
954             OPUS_COPY(out_buffer, in_buffer, 320);
955             break;
956 #ifndef DISABLE_LACE
957         case OSCE_METHOD_LACE:
958             lace_process_20ms_frame(&model->lace, &psDec->osce.state.lace, out_buffer, in_buffer, features, numbits, periods, arch);
959             break;
960 #endif
961 #ifndef DISABLE_NOLACE
962         case OSCE_METHOD_NOLACE:
963             nolace_process_20ms_frame(&model->nolace, &psDec->osce.state.nolace, out_buffer, in_buffer, features, numbits, periods, arch);
964             break;
965 #endif
966         default:
967             celt_assert(0 && "method not defined");
968     }
969 
970 #ifdef ENABLE_OSCE_TRAINING_DATA
971     int  k;
972 
973     static FILE *flpc = NULL;
974     static FILE *fgain = NULL;
975     static FILE *fltp = NULL;
976     static FILE *fperiod = NULL;
977     static FILE *fnoisy16k = NULL;
978     static FILE* f_numbits = NULL;
979     static FILE* f_numbits_smooth = NULL;
980 
981     if (flpc == NULL) {flpc = fopen("features_lpc.f32", "wb");}
982     if (fgain == NULL) {fgain = fopen("features_gain.f32", "wb");}
983     if (fltp == NULL) {fltp = fopen("features_ltp.f32", "wb");}
984     if (fperiod == NULL) {fperiod = fopen("features_period.s16", "wb");}
985     if (fnoisy16k == NULL) {fnoisy16k = fopen("noisy_16k.s16", "wb");}
986     if(f_numbits == NULL) {f_numbits = fopen("features_num_bits.s32", "wb");}
987     if (f_numbits_smooth == NULL) {f_numbits_smooth = fopen("features_num_bits_smooth.f32", "wb");}
988 
989     fwrite(&num_bits, sizeof(num_bits), 1, f_numbits);
990     fwrite(&(psDec->osce.features.numbits_smooth), sizeof(psDec->osce.features.numbits_smooth), 1, f_numbits_smooth);
991 
992     for (k = 0; k < psDec->nb_subfr; k++)
993     {
994         float tmp;
995         int16_t itmp;
996         float lpc_buffer[16] = {0};
997         opus_int16 *A_Q12, *B_Q14;
998 
999         (void) num_bits;
1000         (void) arch;
1001 
1002         /* gain */
1003         tmp = (float) psDecCtrl->Gains_Q16[k] / (1UL << 16);
1004         fwrite(&tmp, sizeof(tmp), 1, fgain);
1005 
1006         /* LPC */
1007         A_Q12 = psDecCtrl->PredCoef_Q12[ k >> 1 ];
1008         for (i = 0; i < psDec->LPC_order; i++)
1009         {
1010             lpc_buffer[i] = (float) A_Q12[i] / (1U << 12);
1011         }
1012         fwrite(lpc_buffer, sizeof(lpc_buffer[0]), 16, flpc);
1013 
1014         /* LTP */
1015         B_Q14 = &psDecCtrl->LTPCoef_Q14[ k * LTP_ORDER ];
1016         for (i = 0; i < 5; i++)
1017         {
1018             tmp = (float) B_Q14[i] / (1U << 14);
1019             fwrite(&tmp, sizeof(tmp), 1, fltp);
1020         }
1021 
1022         /* periods */
1023         itmp = psDec->indices.signalType == TYPE_VOICED ? psDecCtrl->pitchL[ k ] : 0;
1024         fwrite(&itmp, sizeof(itmp), 1, fperiod);
1025     }
1026 
1027     fwrite(xq, psDec->nb_subfr * psDec->subfr_length, sizeof(xq[0]), fnoisy16k);
1028 #endif
1029 
1030     if (psDec->osce.features.reset > 1)
1031     {
1032         OPUS_COPY(out_buffer, in_buffer, 320);
1033         psDec->osce.features.reset --;
1034     }
1035     else if (psDec->osce.features.reset)
1036     {
1037         osce_cross_fade_10ms(out_buffer, in_buffer, 320);
1038         psDec->osce.features.reset = 0;
1039     }
1040 
1041     /* scale output */
1042     for (i = 0; i < 320; i++)
1043     {
1044         float tmp = 32768.f * out_buffer[i];
1045         if (tmp > 32767.f) tmp = 32767.f;
1046         if (tmp < -32767.f) tmp = -32767.f;
1047         xq[i] = float2int(tmp);
1048     }
1049 
1050 }
1051 
1052 
1053 #if 0
1054 
1055 #include <stdio.h>
1056 
1057 void lace_feature_net_compare(
1058     const char * prefix,
1059     int num_frames,
1060     LACE* hLACE
1061 )
1062 {
1063     char in_feature_file[256];
1064     char out_feature_file[256];
1065     char numbits_file[256];
1066     char periods_file[256];
1067     char message[512];
1068     int i_frame, i_feature;
1069     float mse;
1070     float in_features[4 * LACE_NUM_FEATURES];
1071     float out_features[4 * LACE_COND_DIM];
1072     float out_features2[4 * LACE_COND_DIM];
1073     float numbits[2];
1074     int periods[4];
1075 
1076     init_lace(hLACE);
1077 
1078     FILE *f_in_features, *f_out_features, *f_numbits, *f_periods;
1079 
1080     strcpy(in_feature_file, prefix);
1081     strcat(in_feature_file, "_in_features.f32");
1082     f_in_features = fopen(in_feature_file, "rb");
1083     if (f_in_features == NULL)
1084     {
1085         sprintf(message, "could not open file %s", in_feature_file);
1086         perror(message);
1087         exit(1);
1088     }
1089 
1090     strcpy(out_feature_file, prefix);
1091     strcat(out_feature_file, "_out_features.f32");
1092     f_out_features = fopen(out_feature_file, "rb");
1093     if (f_out_features == NULL)
1094     {
1095         sprintf(message, "could not open file %s", out_feature_file);
1096         perror(message);
1097         exit(1);
1098     }
1099 
1100     strcpy(periods_file, prefix);
1101     strcat(periods_file, "_periods.s32");
1102     f_periods = fopen(periods_file, "rb");
1103     if (f_periods == NULL)
1104     {
1105         sprintf(message, "could not open file %s", periods_file);
1106         perror(message);
1107         exit(1);
1108     }
1109 
1110     strcpy(numbits_file, prefix);
1111     strcat(numbits_file, "_numbits.f32");
1112     f_numbits = fopen(numbits_file, "rb");
1113     if (f_numbits == NULL)
1114     {
1115         sprintf(message, "could not open file %s", numbits_file);
1116         perror(message);
1117         exit(1);
1118     }
1119 
1120     for (i_frame = 0; i_frame < num_frames; i_frame ++)
1121     {
1122         if(fread(in_features, sizeof(float), 4 * LACE_NUM_FEATURES, f_in_features) != 4 * LACE_NUM_FEATURES)
1123         {
1124             fprintf(stderr, "could not read frame %d from in_features\n", i_frame);
1125             exit(1);
1126         }
1127         if(fread(out_features, sizeof(float), 4 * LACE_COND_DIM, f_out_features) != 4 * LACE_COND_DIM)
1128         {
1129             fprintf(stderr, "could not read frame %d from out_features\n", i_frame);
1130             exit(1);
1131         }
1132         if(fread(periods, sizeof(int), 4, f_periods) != 4)
1133         {
1134             fprintf(stderr, "could not read frame %d from periods\n", i_frame);
1135             exit(1);
1136         }
1137         if(fread(numbits, sizeof(float), 2, f_numbits) != 2)
1138         {
1139             fprintf(stderr, "could not read frame %d from numbits\n", i_frame);
1140             exit(1);
1141         }
1142 
1143 
1144         lace_feature_net(hLACE, out_features2, in_features, numbits, periods);
1145 
1146         float mse = 0;
1147         for (int i = 0; i < 4 * LACE_COND_DIM; i ++)
1148         {
1149             mse += pow(out_features[i] - out_features2[i], 2);
1150         }
1151         mse /= (4 * LACE_COND_DIM);
1152         printf("rmse: %f\n", sqrt(mse));
1153 
1154     }
1155 
1156     fclose(f_in_features);
1157     fclose(f_out_features);
1158     fclose(f_numbits);
1159     fclose(f_periods);
1160 }
1161 
1162 
1163 void lace_demo(
1164     char *prefix,
1165     char *output
1166 )
1167 {
1168     char feature_file[256];
1169     char numbits_file[256];
1170     char periods_file[256];
1171     char x_in_file[256];
1172     char message[512];
1173     int i_frame;
1174     float mse;
1175     float features[4 * LACE_NUM_FEATURES];
1176     float numbits[2];
1177     int periods[4];
1178     float x_in[4 * LACE_FRAME_SIZE];
1179     int16_t x_out[4 * LACE_FRAME_SIZE];
1180     float buffer[4 * LACE_FRAME_SIZE];
1181     LACE hLACE;
1182     int frame_counter = 0;
1183     FILE *f_features, *f_numbits, *f_periods, *f_x_in, *f_x_out;
1184 
1185     init_lace(&hLACE);
1186 
1187     strcpy(feature_file, prefix);
1188     strcat(feature_file, "_features.f32");
1189     f_features = fopen(feature_file, "rb");
1190     if (f_features == NULL)
1191     {
1192         sprintf(message, "could not open file %s", feature_file);
1193         perror(message);
1194         exit(1);
1195     }
1196 
1197     strcpy(x_in_file, prefix);
1198     strcat(x_in_file, "_x_in.f32");
1199     f_x_in = fopen(x_in_file, "rb");
1200     if (f_x_in == NULL)
1201     {
1202         sprintf(message, "could not open file %s", x_in_file);
1203         perror(message);
1204         exit(1);
1205     }
1206 
1207     f_x_out = fopen(output, "wb");
1208     if (f_x_out == NULL)
1209     {
1210         sprintf(message, "could not open file %s", output);
1211         perror(message);
1212         exit(1);
1213     }
1214 
1215     strcpy(periods_file, prefix);
1216     strcat(periods_file, "_periods.s32");
1217     f_periods = fopen(periods_file, "rb");
1218     if (f_periods == NULL)
1219     {
1220         sprintf(message, "could not open file %s", periods_file);
1221         perror(message);
1222         exit(1);
1223     }
1224 
1225     strcpy(numbits_file, prefix);
1226     strcat(numbits_file, "_numbits.f32");
1227     f_numbits = fopen(numbits_file, "rb");
1228     if (f_numbits == NULL)
1229     {
1230         sprintf(message, "could not open file %s", numbits_file);
1231         perror(message);
1232         exit(1);
1233     }
1234 
1235     printf("processing %s\n", prefix);
1236 
1237     while (fread(x_in, sizeof(float), 4 * LACE_FRAME_SIZE, f_x_in) == 4 * LACE_FRAME_SIZE)
1238     {
1239         printf("\rframe: %d", frame_counter++);
1240         if(fread(features, sizeof(float), 4 * LACE_NUM_FEATURES, f_features) != 4 * LACE_NUM_FEATURES)
1241         {
1242             fprintf(stderr, "could not read frame %d from features\n", i_frame);
1243             exit(1);
1244         }
1245         if(fread(periods, sizeof(int), 4, f_periods) != 4)
1246         {
1247             fprintf(stderr, "could not read frame %d from periods\n", i_frame);
1248             exit(1);
1249         }
1250         if(fread(numbits, sizeof(float), 2, f_numbits) != 2)
1251         {
1252             fprintf(stderr, "could not read frame %d from numbits\n", i_frame);
1253             exit(1);
1254         }
1255 
1256         lace_process_20ms_frame(
1257             &hLACE,
1258             buffer,
1259             x_in,
1260             features,
1261             numbits,
1262             periods
1263         );
1264 
1265         for (int n=0; n < 4 * LACE_FRAME_SIZE; n ++)
1266         {
1267             float tmp = (1UL<<15) * buffer[n];
1268             tmp = CLIP(tmp, -32768, 32767);
1269             x_out[n] = (int16_t) round(tmp);
1270         }
1271 
1272         fwrite(x_out, sizeof(int16_t), 4 * LACE_FRAME_SIZE, f_x_out);
1273     }
1274     printf("\ndone!\n");
1275 
1276     fclose(f_features);
1277     fclose(f_numbits);
1278     fclose(f_periods);
1279     fclose(f_x_in);
1280     fclose(f_x_out);
1281 }
1282 
1283 void nolace_demo(
1284     char *prefix,
1285     char *output
1286 )
1287 {
1288     char feature_file[256];
1289     char numbits_file[256];
1290     char periods_file[256];
1291     char x_in_file[256];
1292     char message[512];
1293     int i_frame;
1294     float mse;
1295     float features[4 * LACE_NUM_FEATURES];
1296     float numbits[2];
1297     int periods[4];
1298     float x_in[4 * LACE_FRAME_SIZE];
1299     int16_t x_out[4 * LACE_FRAME_SIZE];
1300     float buffer[4 * LACE_FRAME_SIZE];
1301     NoLACE hNoLACE;
1302     int frame_counter = 0;
1303     FILE *f_features, *f_numbits, *f_periods, *f_x_in, *f_x_out;
1304 
1305     init_nolace(&hNoLACE);
1306 
1307     strcpy(feature_file, prefix);
1308     strcat(feature_file, "_features.f32");
1309     f_features = fopen(feature_file, "rb");
1310     if (f_features == NULL)
1311     {
1312         sprintf(message, "could not open file %s", feature_file);
1313         perror(message);
1314         exit(1);
1315     }
1316 
1317     strcpy(x_in_file, prefix);
1318     strcat(x_in_file, "_x_in.f32");
1319     f_x_in = fopen(x_in_file, "rb");
1320     if (f_x_in == NULL)
1321     {
1322         sprintf(message, "could not open file %s", x_in_file);
1323         perror(message);
1324         exit(1);
1325     }
1326 
1327     f_x_out = fopen(output, "wb");
1328     if (f_x_out == NULL)
1329     {
1330         sprintf(message, "could not open file %s", output);
1331         perror(message);
1332         exit(1);
1333     }
1334 
1335     strcpy(periods_file, prefix);
1336     strcat(periods_file, "_periods.s32");
1337     f_periods = fopen(periods_file, "rb");
1338     if (f_periods == NULL)
1339     {
1340         sprintf(message, "could not open file %s", periods_file);
1341         perror(message);
1342         exit(1);
1343     }
1344 
1345     strcpy(numbits_file, prefix);
1346     strcat(numbits_file, "_numbits.f32");
1347     f_numbits = fopen(numbits_file, "rb");
1348     if (f_numbits == NULL)
1349     {
1350         sprintf(message, "could not open file %s", numbits_file);
1351         perror(message);
1352         exit(1);
1353     }
1354 
1355     printf("processing %s\n", prefix);
1356 
1357     while (fread(x_in, sizeof(float), 4 * LACE_FRAME_SIZE, f_x_in) == 4 * LACE_FRAME_SIZE)
1358     {
1359         printf("\rframe: %d", frame_counter++);
1360         if(fread(features, sizeof(float), 4 * LACE_NUM_FEATURES, f_features) != 4 * LACE_NUM_FEATURES)
1361         {
1362             fprintf(stderr, "could not read frame %d from features\n", i_frame);
1363             exit(1);
1364         }
1365         if(fread(periods, sizeof(int), 4, f_periods) != 4)
1366         {
1367             fprintf(stderr, "could not read frame %d from periods\n", i_frame);
1368             exit(1);
1369         }
1370         if(fread(numbits, sizeof(float), 2, f_numbits) != 2)
1371         {
1372             fprintf(stderr, "could not read frame %d from numbits\n", i_frame);
1373             exit(1);
1374         }
1375 
1376         nolace_process_20ms_frame(
1377             &hNoLACE,
1378             buffer,
1379             x_in,
1380             features,
1381             numbits,
1382             periods
1383         );
1384 
1385         for (int n=0; n < 4 * LACE_FRAME_SIZE; n ++)
1386         {
1387             float tmp = (1UL<<15) * buffer[n];
1388             tmp = CLIP(tmp, -32768, 32767);
1389             x_out[n] = (int16_t) round(tmp);
1390         }
1391 
1392         fwrite(x_out, sizeof(int16_t), 4 * LACE_FRAME_SIZE, f_x_out);
1393     }
1394     printf("\ndone!\n");
1395 
1396     fclose(f_features);
1397     fclose(f_numbits);
1398     fclose(f_periods);
1399     fclose(f_x_in);
1400     fclose(f_x_out);
1401 }
1402 
1403 
1404 int main()
1405 {
1406 #if 0
1407     LACE hLACE;
1408 
1409     lace_feature_net_compare("testvec2/lace", 5, &hLACE);
1410 
1411     lace_demo("testdata/test9", "out_lace_c_9kbps.pcm");
1412     lace_demo("testdata/test6", "out_lace_c_6kbps.pcm");
1413 #endif
1414     nolace_demo("testdata/test9", "out_nolace_c_9kbps.pcm");
1415 
1416 }
1417 #endif
1418 
1419 /*gcc  -I ../include -I . -I ../silk -I ../celt osce.c nndsp.c lace_data.c nolace_data.c nnet.c parse_lpcnet_weights.c -lm -o lacetest*/
1420