1*a58d3d2aSXin Li#!/usr/bin/python3 2*a58d3d2aSXin Li'''Copyright (c) 2021-2022 Amazon 3*a58d3d2aSXin Li Copyright (c) 2018-2019 Mozilla 4*a58d3d2aSXin Li 5*a58d3d2aSXin Li Redistribution and use in source and binary forms, with or without 6*a58d3d2aSXin Li modification, are permitted provided that the following conditions 7*a58d3d2aSXin Li are met: 8*a58d3d2aSXin Li 9*a58d3d2aSXin Li - Redistributions of source code must retain the above copyright 10*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer. 11*a58d3d2aSXin Li 12*a58d3d2aSXin Li - Redistributions in binary form must reproduce the above copyright 13*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer in the 14*a58d3d2aSXin Li documentation and/or other materials provided with the distribution. 15*a58d3d2aSXin Li 16*a58d3d2aSXin Li THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17*a58d3d2aSXin Li ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18*a58d3d2aSXin Li LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19*a58d3d2aSXin Li A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 20*a58d3d2aSXin Li CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21*a58d3d2aSXin Li EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22*a58d3d2aSXin Li PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23*a58d3d2aSXin Li PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24*a58d3d2aSXin Li LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25*a58d3d2aSXin Li NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26*a58d3d2aSXin Li SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27*a58d3d2aSXin Li''' 28*a58d3d2aSXin Li 29*a58d3d2aSXin Li# Train an LPCNet model 30*a58d3d2aSXin Li 31*a58d3d2aSXin Liimport argparse 32*a58d3d2aSXin Li#from plc_loader import PLCLoader 33*a58d3d2aSXin Li 34*a58d3d2aSXin Liparser = argparse.ArgumentParser(description='Train a PLC model') 35*a58d3d2aSXin Li 36*a58d3d2aSXin Liparser.add_argument('features', metavar='<features file>', help='binary features file (float32)') 37*a58d3d2aSXin Liparser.add_argument('output', metavar='<output>', help='trained model file (.h5)') 38*a58d3d2aSXin Liparser.add_argument('--model', metavar='<model>', default='rdovae', help='PLC model python definition (without .py)') 39*a58d3d2aSXin Ligroup1 = parser.add_mutually_exclusive_group() 40*a58d3d2aSXin Ligroup1.add_argument('--weights', metavar='<input weights>', help='model weights') 41*a58d3d2aSXin Liparser.add_argument('--cond-size', metavar='<units>', default=1024, type=int, help='number of units in conditioning network (default 1024)') 42*a58d3d2aSXin Liparser.add_argument('--batch-size', metavar='<batch size>', default=1, type=int, help='batch size to use (default 128)') 43*a58d3d2aSXin Liparser.add_argument('--seq-length', metavar='<sequence length>', default=1000, type=int, help='sequence length to use (default 1000)') 44*a58d3d2aSXin Li 45*a58d3d2aSXin Li 46*a58d3d2aSXin Liargs = parser.parse_args() 47*a58d3d2aSXin Li 48*a58d3d2aSXin Liimport importlib 49*a58d3d2aSXin Lirdovae = importlib.import_module(args.model) 50*a58d3d2aSXin Li 51*a58d3d2aSXin Lifrom rdovae import apply_dead_zone 52*a58d3d2aSXin Li 53*a58d3d2aSXin Liimport sys 54*a58d3d2aSXin Liimport numpy as np 55*a58d3d2aSXin Lifrom tensorflow.keras.optimizers import Adam 56*a58d3d2aSXin Lifrom tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger 57*a58d3d2aSXin Liimport tensorflow.keras.backend as K 58*a58d3d2aSXin Liimport h5py 59*a58d3d2aSXin Li 60*a58d3d2aSXin Liimport tensorflow as tf 61*a58d3d2aSXin Lifrom rdovae import pvq_quantize 62*a58d3d2aSXin Li 63*a58d3d2aSXin Li# Try reducing batch_size if you run out of memory on your GPU 64*a58d3d2aSXin Libatch_size = args.batch_size 65*a58d3d2aSXin Li 66*a58d3d2aSXin Limodel, encoder, decoder, qembedding = rdovae.new_rdovae_model(nb_used_features=20, nb_bits=80, batch_size=batch_size, cond_size=args.cond_size) 67*a58d3d2aSXin Limodel.load_weights(args.weights) 68*a58d3d2aSXin Li 69*a58d3d2aSXin Lilpc_order = 16 70*a58d3d2aSXin Li 71*a58d3d2aSXin Lifeature_file = args.features 72*a58d3d2aSXin Linb_features = model.nb_used_features + lpc_order 73*a58d3d2aSXin Linb_used_features = model.nb_used_features 74*a58d3d2aSXin Lisequence_size = args.seq_length 75*a58d3d2aSXin Li 76*a58d3d2aSXin Li# u for unquantised, load 16 bit PCM samples and convert to mu-law 77*a58d3d2aSXin Li 78*a58d3d2aSXin Li 79*a58d3d2aSXin Lifeatures = np.memmap(feature_file, dtype='float32', mode='r') 80*a58d3d2aSXin Linb_sequences = len(features)//(nb_features*sequence_size)//batch_size*batch_size 81*a58d3d2aSXin Lifeatures = features[:nb_sequences*sequence_size*nb_features] 82*a58d3d2aSXin Li 83*a58d3d2aSXin Lifeatures = np.reshape(features, (nb_sequences, sequence_size, nb_features)) 84*a58d3d2aSXin Liprint(features.shape) 85*a58d3d2aSXin Lifeatures = features[:, :, :nb_used_features] 86*a58d3d2aSXin Li#features = np.random.randn(73600, 1000, 17) 87*a58d3d2aSXin Li 88*a58d3d2aSXin Li 89*a58d3d2aSXin Libits, gru_state_dec = encoder.predict([features], batch_size=batch_size) 90*a58d3d2aSXin Li(gru_state_dec).astype('float32').tofile(args.output + "-state.f32") 91*a58d3d2aSXin Li 92*a58d3d2aSXin Li 93*a58d3d2aSXin Li#dist = rdovae.feat_dist_loss(features, quant_out) 94*a58d3d2aSXin Li#rate = rdovae.sq1_rate_loss(features, model_bits) 95*a58d3d2aSXin Li#rate2 = rdovae.sq_rate_metric(features, model_bits) 96*a58d3d2aSXin Li#print(dist, rate, rate2) 97*a58d3d2aSXin Li 98*a58d3d2aSXin Liprint("shapes are:") 99*a58d3d2aSXin Liprint(bits.shape) 100*a58d3d2aSXin Liprint(gru_state_dec.shape) 101*a58d3d2aSXin Li 102*a58d3d2aSXin Lifeatures.astype('float32').tofile(args.output + "-input.f32") 103*a58d3d2aSXin Li#quant_out.astype('float32').tofile(args.output + "-enc_dec.f32") 104*a58d3d2aSXin Linbits=80 105*a58d3d2aSXin Libits.astype('float32').tofile(args.output + "-syms.f32") 106*a58d3d2aSXin Li 107*a58d3d2aSXin Lilambda_val = 0.0002 * np.ones((nb_sequences, sequence_size//2, 1)) 108*a58d3d2aSXin Liquant_id = np.round(3.8*np.log(lambda_val/.0002)).astype('int16') 109*a58d3d2aSXin Liquant_id = quant_id[:,:,0] 110*a58d3d2aSXin Liquant_embed = qembedding(quant_id) 111*a58d3d2aSXin Liquant_scale = tf.math.softplus(quant_embed[:,:,:nbits]) 112*a58d3d2aSXin Lidead_zone = tf.math.softplus(quant_embed[:, :, nbits : 2 * nbits]) 113*a58d3d2aSXin Li 114*a58d3d2aSXin Libits = bits*quant_scale 115*a58d3d2aSXin Libits = np.round(apply_dead_zone([bits, dead_zone]).numpy()) 116*a58d3d2aSXin Libits = bits/quant_scale 117*a58d3d2aSXin Li 118*a58d3d2aSXin Ligru_state_dec = pvq_quantize(gru_state_dec, 82) 119*a58d3d2aSXin Li#gru_state_dec = gru_state_dec/(1e-15+tf.norm(gru_state_dec, axis=-1,keepdims=True)) 120*a58d3d2aSXin Ligru_state_dec = gru_state_dec[:,-1,:] 121*a58d3d2aSXin Lidec_out = decoder([bits[:,1::2,:], gru_state_dec]) 122*a58d3d2aSXin Li 123*a58d3d2aSXin Liprint(dec_out.shape) 124*a58d3d2aSXin Li 125*a58d3d2aSXin Lidec_out.numpy().astype('float32').tofile(args.output + "-quant_out.f32") 126