1# Copyright © 2021 Arm Ltd and Contributors. All rights reserved. 2# SPDX-License-Identifier: MIT 3 4import numpy as np 5import os 6import sys 7 8script_dir = os.path.dirname(__file__) 9sys.path.insert(1, os.path.join(script_dir, '..', 'common')) 10 11from mfcc import MFCC, AudioPreprocessor 12 13 14class Wav2LetterMFCC(MFCC): 15 """Extends base MFCC class to provide Wav2Letter-specific MFCC requirements.""" 16 17 def __init__(self, mfcc_params): 18 super().__init__(mfcc_params) 19 20 def spectrum_calc(self, audio_data): 21 return np.abs(np.fft.rfft(np.hanning(self.mfcc_params.frame_len + 1)[0:self.mfcc_params.frame_len] * audio_data, 22 self.mfcc_params.n_fft)) ** 2 23 24 def log_mel(self, mel_energy): 25 mel_energy += 1e-10 26 log_mel_energy = 10.0 * np.log10(mel_energy) 27 top_db = 80.0 28 return np.maximum(log_mel_energy, log_mel_energy.max() - top_db) 29 30 def create_dct_matrix(self, num_fbank_bins, num_mfcc_feats): 31 """ 32 Creates the Discrete Cosine Transform matrix to be used in the compute function. 33 34 Args: 35 num_fbank_bins: The number of filter bank bins 36 num_mfcc_feats: the number of MFCC features 37 38 Returns: 39 the DCT matrix 40 """ 41 dct_m = np.zeros(num_fbank_bins * num_mfcc_feats) 42 for k in range(num_mfcc_feats): 43 for n in range(num_fbank_bins): 44 if k == 0: 45 dct_m[(k * num_fbank_bins) + n] = 2 * np.sqrt(1 / (4 * num_fbank_bins)) * np.cos( 46 (np.pi / num_fbank_bins) * (n + 0.5) * k) 47 else: 48 dct_m[(k * num_fbank_bins) + n] = 2 * np.sqrt(1 / (2 * num_fbank_bins)) * np.cos( 49 (np.pi / num_fbank_bins) * (n + 0.5) * k) 50 51 dct_m = np.reshape(dct_m, [self.mfcc_params.num_mfcc_feats, self.mfcc_params.num_fbank_bins]) 52 return dct_m 53 54 def mel_norm(self, weight, right_mel, left_mel): 55 """Over-riding parent class with ASR specific weight normalisation.""" 56 enorm = 2.0 / (self.inv_mel_scale(right_mel, False) - self.inv_mel_scale(left_mel, False)) 57 return weight * enorm 58 59 60class W2LAudioPreprocessor(AudioPreprocessor): 61 62 def __init__(self, mfcc, model_input_size, stride): 63 self.model_input_size = model_input_size 64 self.stride = stride 65 66 super().__init__(self, model_input_size, stride) 67 # Savitzky - Golay differential filters 68 self.savgol_order1_coeffs = np.array([6.66666667e-02, 5.00000000e-02, 3.33333333e-02, 69 1.66666667e-02, -3.46944695e-18, -1.66666667e-02, 70 -3.33333333e-02, -5.00000000e-02, -6.66666667e-02]) 71 72 self.savgol_order2_coeffs = np.array([0.06060606, 0.01515152, -0.01731602, 73 -0.03679654, -0.04329004, -0.03679654, 74 -0.01731602, 0.01515152, 0.06060606]) 75 self._mfcc_calc = mfcc 76 77 def mfcc_delta_calc(self, features): 78 """Over-riding parent class with ASR specific MFCC derivative features""" 79 mfcc_delta_np = np.zeros_like(features) 80 mfcc_delta2_np = np.zeros_like(features) 81 82 for i in range(features.shape[1]): 83 idelta = np.convolve(features[:, i], self.savgol_order1_coeffs, 'same') 84 mfcc_delta_np[:, i] = idelta 85 ideltadelta = np.convolve(features[:, i], self.savgol_order2_coeffs, 'same') 86 mfcc_delta2_np[:, i] = ideltadelta 87 88 features = np.concatenate((self._normalize(features), self._normalize(mfcc_delta_np), 89 self._normalize(mfcc_delta2_np)), axis=1) 90 91 return features 92