1*a58d3d2aSXin Li#!/usr/bin/python3 2*a58d3d2aSXin Li'''Copyright (c) 2021-2022 Amazon 3*a58d3d2aSXin Li Copyright (c) 2018-2019 Mozilla 4*a58d3d2aSXin Li 5*a58d3d2aSXin Li Redistribution and use in source and binary forms, with or without 6*a58d3d2aSXin Li modification, are permitted provided that the following conditions 7*a58d3d2aSXin Li are met: 8*a58d3d2aSXin Li 9*a58d3d2aSXin Li - Redistributions of source code must retain the above copyright 10*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer. 11*a58d3d2aSXin Li 12*a58d3d2aSXin Li - Redistributions in binary form must reproduce the above copyright 13*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer in the 14*a58d3d2aSXin Li documentation and/or other materials provided with the distribution. 15*a58d3d2aSXin Li 16*a58d3d2aSXin Li THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17*a58d3d2aSXin Li ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18*a58d3d2aSXin Li LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19*a58d3d2aSXin Li A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 20*a58d3d2aSXin Li CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21*a58d3d2aSXin Li EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22*a58d3d2aSXin Li PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23*a58d3d2aSXin Li PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24*a58d3d2aSXin Li LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25*a58d3d2aSXin Li NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26*a58d3d2aSXin Li SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27*a58d3d2aSXin Li''' 28*a58d3d2aSXin Li 29*a58d3d2aSXin Li# Train an LPCNet model 30*a58d3d2aSXin Liimport tensorflow as tf 31*a58d3d2aSXin Listrategy = tf.distribute.MultiWorkerMirroredStrategy() 32*a58d3d2aSXin Li 33*a58d3d2aSXin Li 34*a58d3d2aSXin Liimport argparse 35*a58d3d2aSXin Li#from plc_loader import PLCLoader 36*a58d3d2aSXin Li 37*a58d3d2aSXin Liparser = argparse.ArgumentParser(description='Train a quantization model') 38*a58d3d2aSXin Li 39*a58d3d2aSXin Liparser.add_argument('features', metavar='<features file>', help='binary features file (float32)') 40*a58d3d2aSXin Liparser.add_argument('output', metavar='<output>', help='trained model file (.h5)') 41*a58d3d2aSXin Liparser.add_argument('--model', metavar='<model>', default='rdovae', help='PLC model python definition (without .py)') 42*a58d3d2aSXin Ligroup1 = parser.add_mutually_exclusive_group() 43*a58d3d2aSXin Ligroup1.add_argument('--quantize', metavar='<input weights>', help='quantize model') 44*a58d3d2aSXin Ligroup1.add_argument('--retrain', metavar='<input weights>', help='continue training model') 45*a58d3d2aSXin Liparser.add_argument('--cond-size', metavar='<units>', default=1024, type=int, help='number of units in conditioning network (default 1024)') 46*a58d3d2aSXin Liparser.add_argument('--epochs', metavar='<epochs>', default=120, type=int, help='number of epochs to train for (default 120)') 47*a58d3d2aSXin Liparser.add_argument('--batch-size', metavar='<batch size>', default=128, type=int, help='batch size to use (default 128)') 48*a58d3d2aSXin Liparser.add_argument('--seq-length', metavar='<sequence length>', default=1000, type=int, help='sequence length to use (default 1000)') 49*a58d3d2aSXin Liparser.add_argument('--lr', metavar='<learning rate>', type=float, help='learning rate') 50*a58d3d2aSXin Liparser.add_argument('--decay', metavar='<decay>', type=float, help='learning rate decay') 51*a58d3d2aSXin Liparser.add_argument('--logdir', metavar='<log dir>', help='directory for tensorboard log files') 52*a58d3d2aSXin Li 53*a58d3d2aSXin Li 54*a58d3d2aSXin Liargs = parser.parse_args() 55*a58d3d2aSXin Li 56*a58d3d2aSXin Liimport importlib 57*a58d3d2aSXin Lirdovae = importlib.import_module(args.model) 58*a58d3d2aSXin Li 59*a58d3d2aSXin Liimport sys 60*a58d3d2aSXin Liimport numpy as np 61*a58d3d2aSXin Lifrom tensorflow.keras.optimizers import Adam 62*a58d3d2aSXin Lifrom tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger 63*a58d3d2aSXin Liimport tensorflow.keras.backend as K 64*a58d3d2aSXin Liimport h5py 65*a58d3d2aSXin Li 66*a58d3d2aSXin Li#gpus = tf.config.experimental.list_physical_devices('GPU') 67*a58d3d2aSXin Li#if gpus: 68*a58d3d2aSXin Li# try: 69*a58d3d2aSXin Li# tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)]) 70*a58d3d2aSXin Li# except RuntimeError as e: 71*a58d3d2aSXin Li# print(e) 72*a58d3d2aSXin Li 73*a58d3d2aSXin Linb_epochs = args.epochs 74*a58d3d2aSXin Li 75*a58d3d2aSXin Li# Try reducing batch_size if you run out of memory on your GPU 76*a58d3d2aSXin Libatch_size = args.batch_size 77*a58d3d2aSXin Li 78*a58d3d2aSXin Liquantize = args.quantize is not None 79*a58d3d2aSXin Liretrain = args.retrain is not None 80*a58d3d2aSXin Li 81*a58d3d2aSXin Liif quantize: 82*a58d3d2aSXin Li lr = 0.00003 83*a58d3d2aSXin Li decay = 0 84*a58d3d2aSXin Li input_model = args.quantize 85*a58d3d2aSXin Lielse: 86*a58d3d2aSXin Li lr = 0.001 87*a58d3d2aSXin Li decay = 2.5e-5 88*a58d3d2aSXin Li 89*a58d3d2aSXin Liif args.lr is not None: 90*a58d3d2aSXin Li lr = args.lr 91*a58d3d2aSXin Li 92*a58d3d2aSXin Liif args.decay is not None: 93*a58d3d2aSXin Li decay = args.decay 94*a58d3d2aSXin Li 95*a58d3d2aSXin Liif retrain: 96*a58d3d2aSXin Li input_model = args.retrain 97*a58d3d2aSXin Li 98*a58d3d2aSXin Li 99*a58d3d2aSXin Liopt = Adam(lr, decay=decay, beta_2=0.99) 100*a58d3d2aSXin Li 101*a58d3d2aSXin Liwith strategy.scope(): 102*a58d3d2aSXin Li model, encoder, decoder, _ = rdovae.new_rdovae_model(nb_used_features=20, nb_bits=80, batch_size=batch_size, cond_size=args.cond_size, nb_quant=16) 103*a58d3d2aSXin Li model.compile(optimizer=opt, loss=[rdovae.feat_dist_loss, rdovae.feat_dist_loss, rdovae.sq1_rate_loss, rdovae.sq2_rate_loss], loss_weights=[.5, .5, 1., .1], metrics={'hard_bits':rdovae.sq_rate_metric}) 104*a58d3d2aSXin Li model.summary() 105*a58d3d2aSXin Li 106*a58d3d2aSXin Lilpc_order = 16 107*a58d3d2aSXin Li 108*a58d3d2aSXin Lifeature_file = args.features 109*a58d3d2aSXin Linb_features = model.nb_used_features + lpc_order 110*a58d3d2aSXin Linb_used_features = model.nb_used_features 111*a58d3d2aSXin Lisequence_size = args.seq_length 112*a58d3d2aSXin Li 113*a58d3d2aSXin Li# u for unquantised, load 16 bit PCM samples and convert to mu-law 114*a58d3d2aSXin Li 115*a58d3d2aSXin Li 116*a58d3d2aSXin Lifeatures = np.memmap(feature_file, dtype='float32', mode='r') 117*a58d3d2aSXin Linb_sequences = len(features)//(nb_features*sequence_size)//batch_size*batch_size 118*a58d3d2aSXin Lifeatures = features[:nb_sequences*sequence_size*nb_features] 119*a58d3d2aSXin Li 120*a58d3d2aSXin Lifeatures = np.reshape(features, (nb_sequences, sequence_size, nb_features)) 121*a58d3d2aSXin Liprint(features.shape) 122*a58d3d2aSXin Lifeatures = features[:, :, :nb_used_features] 123*a58d3d2aSXin Li 124*a58d3d2aSXin Li#lambda_val = np.repeat(np.random.uniform(.0007, .002, (features.shape[0], 1, 1)), features.shape[1]//2, axis=1) 125*a58d3d2aSXin Li#quant_id = np.round(10*np.log(lambda_val/.0007)).astype('int16') 126*a58d3d2aSXin Li#quant_id = quant_id[:,:,0] 127*a58d3d2aSXin Liquant_id = np.repeat(np.random.randint(16, size=(features.shape[0], 1, 1), dtype='int16'), features.shape[1]//2, axis=1) 128*a58d3d2aSXin Lilambda_val = .0002*np.exp(quant_id/3.8) 129*a58d3d2aSXin Liquant_id = quant_id[:,:,0] 130*a58d3d2aSXin Li 131*a58d3d2aSXin Li# dump models to disk as we go 132*a58d3d2aSXin Licheckpoint = ModelCheckpoint('{}_{}_{}.h5'.format(args.output, args.cond_size, '{epoch:02d}')) 133*a58d3d2aSXin Li 134*a58d3d2aSXin Liif args.retrain is not None: 135*a58d3d2aSXin Li model.load_weights(args.retrain) 136*a58d3d2aSXin Li 137*a58d3d2aSXin Liif quantize or retrain: 138*a58d3d2aSXin Li #Adapting from an existing model 139*a58d3d2aSXin Li model.load_weights(input_model) 140*a58d3d2aSXin Li 141*a58d3d2aSXin Limodel.save_weights('{}_{}_initial.h5'.format(args.output, args.cond_size)) 142*a58d3d2aSXin Li 143*a58d3d2aSXin Licallbacks = [checkpoint] 144*a58d3d2aSXin Li#callbacks = [] 145*a58d3d2aSXin Li 146*a58d3d2aSXin Liif args.logdir is not None: 147*a58d3d2aSXin Li logdir = '{}/{}_{}_logs'.format(args.logdir, args.output, args.cond_size) 148*a58d3d2aSXin Li tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir) 149*a58d3d2aSXin Li callbacks.append(tensorboard_callback) 150*a58d3d2aSXin Li 151*a58d3d2aSXin Limodel.fit([features, quant_id, lambda_val], [features, features, features, features], batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=callbacks) 152