1*a58d3d2aSXin Li'''Copyright (c) 2017-2018 Mozilla 2*a58d3d2aSXin Li 3*a58d3d2aSXin Li Redistribution and use in source and binary forms, with or without 4*a58d3d2aSXin Li modification, are permitted provided that the following conditions 5*a58d3d2aSXin Li are met: 6*a58d3d2aSXin Li 7*a58d3d2aSXin Li - Redistributions of source code must retain the above copyright 8*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer. 9*a58d3d2aSXin Li 10*a58d3d2aSXin Li - Redistributions in binary form must reproduce the above copyright 11*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer in the 12*a58d3d2aSXin Li documentation and/or other materials provided with the distribution. 13*a58d3d2aSXin Li 14*a58d3d2aSXin Li THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 15*a58d3d2aSXin Li ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 16*a58d3d2aSXin Li LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 17*a58d3d2aSXin Li A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 18*a58d3d2aSXin Li CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19*a58d3d2aSXin Li EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20*a58d3d2aSXin Li PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21*a58d3d2aSXin Li PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 22*a58d3d2aSXin Li LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23*a58d3d2aSXin Li NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24*a58d3d2aSXin Li SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25*a58d3d2aSXin Li''' 26*a58d3d2aSXin Li 27*a58d3d2aSXin Li""" helper functions for dumping some Keras layers to C files """ 28*a58d3d2aSXin Li 29*a58d3d2aSXin Liimport numpy as np 30*a58d3d2aSXin Li 31*a58d3d2aSXin Li 32*a58d3d2aSXin Lidef printVector(f, vector, name, dtype='float', dotp=False, static=True): 33*a58d3d2aSXin Li """ prints vector as one-dimensional C array """ 34*a58d3d2aSXin Li if dotp: 35*a58d3d2aSXin Li vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8)) 36*a58d3d2aSXin Li vector = vector.transpose((2, 0, 3, 1)) 37*a58d3d2aSXin Li v = np.reshape(vector, (-1)) 38*a58d3d2aSXin Li if static: 39*a58d3d2aSXin Li f.write('static const {} {}[{}] = {{\n '.format(dtype, name, len(v))) 40*a58d3d2aSXin Li else: 41*a58d3d2aSXin Li f.write('const {} {}[{}] = {{\n '.format(dtype, name, len(v))) 42*a58d3d2aSXin Li for i in range(0, len(v)): 43*a58d3d2aSXin Li f.write('{}'.format(v[i])) 44*a58d3d2aSXin Li if (i!=len(v)-1): 45*a58d3d2aSXin Li f.write(',') 46*a58d3d2aSXin Li else: 47*a58d3d2aSXin Li break; 48*a58d3d2aSXin Li if (i%8==7): 49*a58d3d2aSXin Li f.write("\n ") 50*a58d3d2aSXin Li else: 51*a58d3d2aSXin Li f.write(" ") 52*a58d3d2aSXin Li f.write('\n};\n\n') 53*a58d3d2aSXin Li return vector 54*a58d3d2aSXin Li 55*a58d3d2aSXin Lidef printSparseVector(f, A, name, have_diag=True): 56*a58d3d2aSXin Li N = A.shape[0] 57*a58d3d2aSXin Li M = A.shape[1] 58*a58d3d2aSXin Li W = np.zeros((0,), dtype='int') 59*a58d3d2aSXin Li W0 = np.zeros((0,)) 60*a58d3d2aSXin Li if have_diag: 61*a58d3d2aSXin Li diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])]) 62*a58d3d2aSXin Li A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N])) 63*a58d3d2aSXin Li A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N])) 64*a58d3d2aSXin Li A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:])) 65*a58d3d2aSXin Li printVector(f, diag, name + '_diag') 66*a58d3d2aSXin Li AQ = np.minimum(127, np.maximum(-128, np.round(A*128))).astype('int') 67*a58d3d2aSXin Li idx = np.zeros((0,), dtype='int') 68*a58d3d2aSXin Li for i in range(M//8): 69*a58d3d2aSXin Li pos = idx.shape[0] 70*a58d3d2aSXin Li idx = np.append(idx, -1) 71*a58d3d2aSXin Li nb_nonzero = 0 72*a58d3d2aSXin Li for j in range(N//4): 73*a58d3d2aSXin Li block = A[j*4:(j+1)*4, i*8:(i+1)*8] 74*a58d3d2aSXin Li qblock = AQ[j*4:(j+1)*4, i*8:(i+1)*8] 75*a58d3d2aSXin Li if np.sum(np.abs(block)) > 1e-10: 76*a58d3d2aSXin Li nb_nonzero = nb_nonzero + 1 77*a58d3d2aSXin Li idx = np.append(idx, j*4) 78*a58d3d2aSXin Li vblock = qblock.transpose((1,0)).reshape((-1,)) 79*a58d3d2aSXin Li W0 = np.concatenate([W0, block.reshape((-1,))]) 80*a58d3d2aSXin Li W = np.concatenate([W, vblock]) 81*a58d3d2aSXin Li idx[pos] = nb_nonzero 82*a58d3d2aSXin Li f.write('#ifdef DOT_PROD\n') 83*a58d3d2aSXin Li printVector(f, W, name, dtype='qweight') 84*a58d3d2aSXin Li f.write('#else /*DOT_PROD*/\n') 85*a58d3d2aSXin Li printVector(f, W0, name, dtype='qweight') 86*a58d3d2aSXin Li f.write('#endif /*DOT_PROD*/\n') 87*a58d3d2aSXin Li printVector(f, idx, name + '_idx', dtype='int') 88*a58d3d2aSXin Li return AQ 89*a58d3d2aSXin Li 90*a58d3d2aSXin Lidef dump_sparse_gru(self, f, hf): 91*a58d3d2aSXin Li name = 'sparse_' + self.name 92*a58d3d2aSXin Li print("printing layer " + name + " of type sparse " + self.__class__.__name__) 93*a58d3d2aSXin Li weights = self.get_weights() 94*a58d3d2aSXin Li qweights = printSparseVector(f, weights[1], name + '_recurrent_weights') 95*a58d3d2aSXin Li printVector(f, weights[-1], name + '_bias') 96*a58d3d2aSXin Li subias = weights[-1].copy() 97*a58d3d2aSXin Li subias[1,:] = subias[1,:] - np.sum(qweights*(1./128),axis=0) 98*a58d3d2aSXin Li printVector(f, subias, name + '_subias') 99*a58d3d2aSXin Li if hasattr(self, 'activation'): 100*a58d3d2aSXin Li activation = self.activation.__name__.upper() 101*a58d3d2aSXin Li else: 102*a58d3d2aSXin Li activation = 'TANH' 103*a58d3d2aSXin Li if hasattr(self, 'reset_after') and not self.reset_after: 104*a58d3d2aSXin Li reset_after = 0 105*a58d3d2aSXin Li else: 106*a58d3d2aSXin Li reset_after = 1 107*a58d3d2aSXin Li neurons = weights[0].shape[1]//3 108*a58d3d2aSXin Li max_rnn_neurons = neurons 109*a58d3d2aSXin Li f.write('const SparseGRULayer {} = {{\n {}_bias,\n {}_subias,\n {}_recurrent_weights_diag,\n {}_recurrent_weights,\n {}_recurrent_weights_idx,\n {}, ACTIVATION_{}, {}\n}};\n\n' 110*a58d3d2aSXin Li .format(name, name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after)) 111*a58d3d2aSXin Li hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3)) 112*a58d3d2aSXin Li hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3)) 113*a58d3d2aSXin Li hf.write('extern const SparseGRULayer {};\n\n'.format(name)); 114*a58d3d2aSXin Li return max_rnn_neurons 115*a58d3d2aSXin Li 116*a58d3d2aSXin Lidef dump_gru_layer(self, f, hf, dotp=False, sparse=False): 117*a58d3d2aSXin Li name = self.name 118*a58d3d2aSXin Li print("printing layer " + name + " of type " + self.__class__.__name__) 119*a58d3d2aSXin Li weights = self.get_weights() 120*a58d3d2aSXin Li if sparse: 121*a58d3d2aSXin Li qweight = printSparseVector(f, weights[0], name + '_weights', have_diag=False) 122*a58d3d2aSXin Li else: 123*a58d3d2aSXin Li qweight = printVector(f, weights[0], name + '_weights') 124*a58d3d2aSXin Li 125*a58d3d2aSXin Li if dotp: 126*a58d3d2aSXin Li f.write('#ifdef DOT_PROD\n') 127*a58d3d2aSXin Li qweight2 = np.clip(np.round(128.*weights[1]).astype('int'), -128, 127) 128*a58d3d2aSXin Li printVector(f, qweight2, name + '_recurrent_weights', dotp=True, dtype='qweight') 129*a58d3d2aSXin Li f.write('#else /*DOT_PROD*/\n') 130*a58d3d2aSXin Li else: 131*a58d3d2aSXin Li qweight2 = weights[1] 132*a58d3d2aSXin Li 133*a58d3d2aSXin Li printVector(f, weights[1], name + '_recurrent_weights') 134*a58d3d2aSXin Li if dotp: 135*a58d3d2aSXin Li f.write('#endif /*DOT_PROD*/\n') 136*a58d3d2aSXin Li 137*a58d3d2aSXin Li printVector(f, weights[-1], name + '_bias') 138*a58d3d2aSXin Li subias = weights[-1].copy() 139*a58d3d2aSXin Li subias[0,:] = subias[0,:] - np.sum(qweight*(1./128.),axis=0) 140*a58d3d2aSXin Li subias[1,:] = subias[1,:] - np.sum(qweight2*(1./128.),axis=0) 141*a58d3d2aSXin Li printVector(f, subias, name + '_subias') 142*a58d3d2aSXin Li if hasattr(self, 'activation'): 143*a58d3d2aSXin Li activation = self.activation.__name__.upper() 144*a58d3d2aSXin Li else: 145*a58d3d2aSXin Li activation = 'TANH' 146*a58d3d2aSXin Li if hasattr(self, 'reset_after') and not self.reset_after: 147*a58d3d2aSXin Li reset_after = 0 148*a58d3d2aSXin Li else: 149*a58d3d2aSXin Li reset_after = 1 150*a58d3d2aSXin Li neurons = weights[0].shape[1]//3 151*a58d3d2aSXin Li max_rnn_neurons = neurons 152*a58d3d2aSXin Li f.write('const GRULayer {} = {{\n {}_bias,\n {}_subias,\n {}_weights,\n {},\n {}_recurrent_weights,\n {}, {}, ACTIVATION_{}, {}\n}};\n\n' 153*a58d3d2aSXin Li .format(name, name, name, name, name + "_weights_idx" if sparse else "NULL", name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after)) 154*a58d3d2aSXin Li hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3)) 155*a58d3d2aSXin Li hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3)) 156*a58d3d2aSXin Li hf.write('extern const GRULayer {};\n\n'.format(name)); 157*a58d3d2aSXin Li return max_rnn_neurons 158*a58d3d2aSXin Li 159*a58d3d2aSXin Lidef dump_dense_layer_impl(name, weights, bias, activation, f, hf): 160*a58d3d2aSXin Li printVector(f, weights, name + '_weights') 161*a58d3d2aSXin Li printVector(f, bias, name + '_bias') 162*a58d3d2aSXin Li f.write('const DenseLayer {} = {{\n {}_bias,\n {}_weights,\n {}, {}, ACTIVATION_{}\n}};\n\n' 163*a58d3d2aSXin Li .format(name, name, name, weights.shape[0], weights.shape[1], activation)) 164*a58d3d2aSXin Li hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1])) 165*a58d3d2aSXin Li hf.write('extern const DenseLayer {};\n\n'.format(name)); 166*a58d3d2aSXin Li 167*a58d3d2aSXin Lidef dump_dense_layer(self, f, hf): 168*a58d3d2aSXin Li name = self.name 169*a58d3d2aSXin Li print("printing layer " + name + " of type " + self.__class__.__name__) 170*a58d3d2aSXin Li weights = self.get_weights() 171*a58d3d2aSXin Li activation = self.activation.__name__.upper() 172*a58d3d2aSXin Li dump_dense_layer_impl(name, weights[0], weights[1], activation, f, hf) 173*a58d3d2aSXin Li return False 174*a58d3d2aSXin Li 175*a58d3d2aSXin Lidef dump_conv1d_layer(self, f, hf): 176*a58d3d2aSXin Li name = self.name 177*a58d3d2aSXin Li print("printing layer " + name + " of type " + self.__class__.__name__) 178*a58d3d2aSXin Li weights = self.get_weights() 179*a58d3d2aSXin Li printVector(f, weights[0], name + '_weights') 180*a58d3d2aSXin Li printVector(f, weights[-1], name + '_bias') 181*a58d3d2aSXin Li activation = self.activation.__name__.upper() 182*a58d3d2aSXin Li max_conv_inputs = weights[0].shape[1]*weights[0].shape[0] 183*a58d3d2aSXin Li f.write('const Conv1DLayer {} = {{\n {}_bias,\n {}_weights,\n {}, {}, {}, ACTIVATION_{}\n}};\n\n' 184*a58d3d2aSXin Li .format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation)) 185*a58d3d2aSXin Li hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[2])) 186*a58d3d2aSXin Li hf.write('#define {}_STATE_SIZE ({}*{})\n'.format(name.upper(), weights[0].shape[1], (weights[0].shape[0]-1))) 187*a58d3d2aSXin Li hf.write('#define {}_DELAY {}\n'.format(name.upper(), (weights[0].shape[0]-1)//2)) 188*a58d3d2aSXin Li hf.write('extern const Conv1DLayer {};\n\n'.format(name)); 189*a58d3d2aSXin Li return max_conv_inputs 190