xref: /aosp_15_r20/external/libopus/dnn/training_tf2/test_lpcnet.py (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1#!/usr/bin/python3
2'''Copyright (c) 2018 Mozilla
3
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions
6   are met:
7
8   - Redistributions of source code must retain the above copyright
9   notice, this list of conditions and the following disclaimer.
10
11   - Redistributions in binary form must reproduce the above copyright
12   notice, this list of conditions and the following disclaimer in the
13   documentation and/or other materials provided with the distribution.
14
15   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
19   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26'''
27import argparse
28import sys
29
30import h5py
31import numpy as np
32
33import lpcnet
34from ulaw import ulaw2lin, lin2ulaw
35
36
37parser = argparse.ArgumentParser()
38parser.add_argument('model-file', type=str, help='model weight h5 file')
39parser.add_argument('--lpc-gamma', type=float, help='LPC weighting factor. WARNING: giving an inconsistent value here will severely degrade performance', default=1)
40
41args = parser.parse_args()
42
43filename = args.model_file
44with h5py.File(filename, "r") as f:
45    units = min(f['model_weights']['gru_a']['gru_a']['recurrent_kernel:0'].shape)
46    units2 = min(f['model_weights']['gru_b']['gru_b']['recurrent_kernel:0'].shape)
47    cond_size = min(f['model_weights']['feature_dense1']['feature_dense1']['kernel:0'].shape)
48    e2e = 'rc2lpc' in f['model_weights']
49
50
51model, enc, dec = lpcnet.new_lpcnet_model(training = False, rnn_units1=units, rnn_units2=units2, flag_e2e = e2e, cond_size=cond_size, batch_size=1)
52
53model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
54#model.summary()
55
56
57feature_file = sys.argv[2]
58out_file = sys.argv[3]
59frame_size = model.frame_size
60nb_features = 36
61nb_used_features = model.nb_used_features
62
63features = np.fromfile(feature_file, dtype='float32')
64features = np.resize(features, (-1, nb_features))
65nb_frames = 1
66feature_chunk_size = features.shape[0]
67pcm_chunk_size = frame_size*feature_chunk_size
68
69features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features))
70periods = (.1 + 50*features[:,:,18:19]+100).astype('int16')
71
72
73
74model.load_weights(filename);
75
76order = 16
77
78pcm = np.zeros((nb_frames*pcm_chunk_size, ))
79fexc = np.zeros((1, 1, 3), dtype='int16')+128
80state1 = np.zeros((1, model.rnn_units1), dtype='float32')
81state2 = np.zeros((1, model.rnn_units2), dtype='float32')
82
83mem = 0
84coef = 0.85
85
86lpc_weights = np.array([args.lpc_gamma ** (i + 1) for i in range(16)])
87
88fout = open(out_file, 'wb')
89
90skip = order + 1
91for c in range(0, nb_frames):
92    if not e2e:
93        cfeat = enc.predict([features[c:c+1, :, :nb_used_features], periods[c:c+1, :, :]])
94    else:
95        cfeat,lpcs = enc.predict([features[c:c+1, :, :nb_used_features], periods[c:c+1, :, :]])
96    for fr in range(0, feature_chunk_size):
97        f = c*feature_chunk_size + fr
98        if not e2e:
99            a = features[c, fr, nb_features-order:] * lpc_weights
100        else:
101            a = lpcs[c,fr]
102        for i in range(skip, frame_size):
103            pred = -sum(a*pcm[f*frame_size + i - 1:f*frame_size + i - order-1:-1])
104            fexc[0, 0, 1] = lin2ulaw(pred)
105
106            p, state1, state2 = dec.predict([fexc, cfeat[:, fr:fr+1, :], state1, state2])
107            #Lower the temperature for voiced frames to reduce noisiness
108            p *= np.power(p, np.maximum(0, 1.5*features[c, fr, 19] - .5))
109            p = p/(1e-18 + np.sum(p))
110            #Cut off the tail of the remaining distribution
111            p = np.maximum(p-0.002, 0).astype('float64')
112            p = p/(1e-8 + np.sum(p))
113
114            fexc[0, 0, 2] = np.argmax(np.random.multinomial(1, p[0,0,:], 1))
115            pcm[f*frame_size + i] = pred + ulaw2lin(fexc[0, 0, 2])
116            fexc[0, 0, 0] = lin2ulaw(pcm[f*frame_size + i])
117            mem = coef*mem + pcm[f*frame_size + i]
118            #print(mem)
119            np.array([np.round(mem)], dtype='int16').tofile(fout)
120        skip = 0
121