1'''Copyright (c) 2017-2018 Mozilla 2 Copyright (c) 2022 Amazon 3 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions 6 are met: 7 8 - Redistributions of source code must retain the above copyright 9 notice, this list of conditions and the following disclaimer. 10 11 - Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer in the 13 documentation and/or other materials provided with the distribution. 14 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 19 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26''' 27 28import numpy as np 29 30from .c_writer import CWriter 31 32def print_vector(writer, vector, name, dtype='float', reshape_8x4=False, static=True, debug_float=False): 33 34 if isinstance(writer, CWriter): 35 f = writer.source 36 binary_blob = writer.enable_binary_blob 37 else: 38 f = writer 39 binary_blob = False 40 41 dtype_suffix = { 42 'float' : 'float', 43 'opus_int8' : 'int8', 44 'opus_uint16' : 'uint16', 45 'opus_int16' : 'int16', 46 'int' : 'int', 47 'qweight': 'qweight' 48 } 49 50 51 if binary_blob: 52 f.write( 53f''' 54#ifndef USE_WEIGHTS_FILE 55''' 56 ) 57 writer.weight_arrays.append(name) 58 59 if reshape_8x4: 60 vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8)) 61 vector = vector.transpose((2, 0, 3, 1)) 62 63 v = np.reshape(vector, (-1)) 64 65 if debug_float: 66 f.write('#ifndef DISABLE_DEBUG_FLOAT\n') 67 if binary_blob: 68 f.write( 69f''' 70#define WEIGHTS_{name}_DEFINED 71#define WEIGHTS_{name}_TYPE WEIGHT_TYPE_{dtype_suffix[dtype]} 72''' 73 ) 74 75 if static: 76 f.write('static ') 77 78 f.write(f'const {dtype} {name}[{len(v)}] = {{\n ') 79 80 for i in range(0, len(v)): 81 82 f.write(f'{v[i]}') 83 84 if (i!=len(v)-1): 85 f.write(',') 86 else: 87 break 88 89 if (i%8==7): 90 f.write("\n ") 91 else: 92 f.write(" ") 93 94 f.write('\n};\n\n') 95 if debug_float: f.write('#endif /*DISABLE_DEBUG_FLOAT*/\n') 96 97 if binary_blob: 98 f.write( 99f''' 100#endif /* USE_WEIGHTS_FILE */ 101''' 102 ) 103 104 return vector 105 106 107 108def extract_diagonal(A): 109 """ input shape is (N, k*N) """ 110 111 N, M = A.shape 112 B = A.copy() 113 assert M % N == 0 114 k = M // N 115 116 diags = [] 117 for l in range(k): 118 diag = np.diag(B[:, l * N : (l+1) * N]).copy() 119 B[:, l * N : (l+1) * N] -= np.diag(diag) 120 diags.append(diag) 121 122 diag = np.concatenate(diags) 123 124 return diag, B 125 126def quantize_weight(weight, scale): 127 scale = scale + 1e-30 128 Aq = np.round(weight / scale).astype('int') 129 if Aq.max() > 127 or Aq.min() <= -128: 130 raise ValueError("value out of bounds in quantize_weight") 131 Aq = np.clip(np.round(weight / scale).astype('int'), -128, 127) 132 return Aq 133 134 135def print_sparse_weight(writer, A, name, scale=1/128, have_diag=True, quantize=False): 136 N = A.shape[0] 137 M = A.shape[1] 138 W = np.zeros((0,), dtype='int') 139 W0 = np.zeros((0,)) 140 141 if have_diag: 142 diag, A = extract_diagonal(A) 143 print_vector(writer, diag, name + '_diag') 144 145 if quantize: 146 Aq = quantize_weight(A, scale) 147 else: 148 Aq = A 149 150 # extract blocks 151 idx = np.zeros((0,), dtype='int') 152 for i in range(M//8): 153 pos = idx.shape[0] 154 idx = np.append(idx, -1) 155 nb_nonzero = 0 156 for j in range(N//4): 157 block = A[j*4:(j+1)*4, i*8:(i+1)*8] 158 qblock = Aq[j*4:(j+1)*4, i*8:(i+1)*8] 159 if np.sum(np.abs(block)) > 1e-10: 160 nb_nonzero = nb_nonzero + 1 161 idx = np.append(idx, j*4) 162 vblock = qblock.transpose((1,0)).reshape((-1,)) 163 W0 = np.concatenate([W0, block.reshape((-1,))]) 164 W = np.concatenate([W, vblock]) 165 idx[pos] = nb_nonzero 166 167 if quantize: print_vector(writer, W, name + '_int8', reshape_8x4=False, dtype='opus_int8') 168 print_vector(writer, W0, name + '_float', reshape_8x4=False, dtype='float', debug_float=quantize) 169 print_vector(writer, idx, name + '_idx', reshape_8x4=False, dtype='int') 170 171 return Aq 172 173 174 175def compute_scaling(weight): 176 """ computes optimal scaling vector for weight of shape (features_in, features_out) """ 177 178 n_in, n_out = weight.shape 179 assert n_in % 4 == 0 and n_out % 8 == 0 180 181 weight_max_abs = np.max(np.abs(weight), axis=0) 182 weight_max_sum = np.max(np.abs(weight[: n_in : 2] + weight[1 : n_in : 2]), axis=0) 183 scale_max = weight_max_abs / 127 184 scale_sum = weight_max_sum / 129 185 186 scale = np.maximum(scale_max, scale_sum) 187 188 return scale 189 190def qn(string): 191 if string == "NULL": return string 192 else: return '"' + string + '"' 193 194def print_linear_layer(writer : CWriter, 195 name : str, 196 weight : np.ndarray, 197 bias : np.ndarray, 198 scale : np.ndarray = None, 199 sparse : bool = False, 200 diagonal : bool = False, 201 quantize : bool = True): 202 203 """ prints linear layer 204 205 Parameters: 206 ----------- 207 name : str 208 layer name 209 weight: np.ndarray 210 ... 211 scale: np.ndarray or None 212 If None auto scaling will be applied. Otherwise, output channels will be multiplied by scale (the usual broadcasting rules apply). 213 214 215 """ 216 217 if len(weight.shape) != 2: 218 raise ValueError('expecting 2-dim weight array in print_linear_layer') 219 220 221 bias_name = "NULL" if bias is None else name + "_bias" 222 subias_name = name + "_subias" if quantize else "NULL" 223 scale_name = name + "_scale" if quantize else "NULL" 224 idx_name = name + "_weights_idx" if sparse else "NULL" 225 float_weight_name = name + "_weights_float" 226 int_weight_name = name + "_weights_int8" if quantize else "NULL" 227 diag_name = name + "_weights_diag" if sparse and diagonal else "NULL" 228 229 nb_inputs, nb_outputs = weight.shape 230 231 if scale is None and quantize: 232 scale = compute_scaling(weight) 233 234 235 if sparse: 236 weight_q = print_sparse_weight(writer, weight, name + "_weights", scale=scale, have_diag=diagonal, quantize=quantize) 237 else: 238 if quantize: 239 weight_q = quantize_weight(weight, scale) 240 print_vector(writer, weight_q, name + "_weights_int8", dtype='opus_int8', reshape_8x4=True) 241 242 print_vector(writer, weight, name + "_weights_float", dtype='float', reshape_8x4=False, debug_float=quantize) 243 244 if quantize: 245 subias = (np.zeros(nb_outputs) if bias is None else bias) - np.sum(weight_q * scale, axis=0) 246 print_vector(writer, subias, name + "_subias") 247 248 final_scale = scale / 127 * np.ones(nb_outputs) 249 print_vector(writer, final_scale, name + "_scale") 250 251 if bias is not None: 252 print_vector(writer, bias, name + "_bias") 253 254 255 init_call = f'linear_init(&model->{name}, arrays, {qn(bias_name)}, {qn(subias_name)}, {qn(int_weight_name)},' \ 256 + f'{qn(float_weight_name)}, {qn(idx_name)}, {qn(diag_name)}, {qn(scale_name)}, {nb_inputs}, {nb_outputs})' 257 258 writer.layer_dict[name] = ('LinearLayer', init_call) 259 260 261def print_dense_layer(writer : CWriter, 262 name : str, 263 weight : np.ndarray, 264 bias : np.ndarray, 265 scale=1/128, 266 format : str = 'torch', 267 sparse=False, 268 diagonal=False, 269 quantize=False): 270 271 if format == 'torch': 272 weight = weight.transpose() 273 274 print_linear_layer(writer, name, weight, bias, scale=scale, sparse=sparse, diagonal=diagonal, quantize=quantize) 275 276 writer.header.write(f"\n#define {name.upper()}_OUT_SIZE {weight.shape[1]}\n") 277 278 279def print_conv1d_layer(writer : CWriter, 280 name : str, 281 weight : np.ndarray, 282 bias : np.ndarray, 283 scale=1/128, 284 format : str = 'torch', 285 quantize=False, 286 sparse=False): 287 288 289 if format == "torch": 290 # convert to channels last 291 weight = np.transpose(weight, (2, 1, 0)) 292 293 lin_weight = np.reshape(weight, (-1, weight.shape[-1])) 294 print_linear_layer(writer, name, lin_weight, bias, scale=scale, sparse=sparse, diagonal=False, quantize=quantize) 295 296 297 writer.header.write(f"\n#define {name.upper()}_OUT_SIZE {weight.shape[2]}\n") 298 writer.header.write(f"\n#define {name.upper()}_IN_SIZE {weight.shape[1]}\n") 299 writer.header.write(f"\n#define {name.upper()}_STATE_SIZE ({weight.shape[1]} * ({weight.shape[0] - 1}))\n") 300 writer.header.write(f"\n#define {name.upper()}_DELAY {(weight.shape[0] - 1) // 2}\n") # CAVE: delay is not a property of the conv layer 301 302 return weight.shape[0] * weight.shape[1] 303 304def print_conv2d_layer(writer : CWriter, 305 name : str, 306 weight : np.ndarray, 307 bias : np.ndarray, 308 scale : float=1/128, 309 quantize : bool=False): 310 311 if quantize: 312 print("[print_conv2d_layer] warning: quantize argument ignored") 313 314 bias_name = name + "_bias" 315 float_weight_name = name + "_weight_float" 316 317 print_vector(writer, weight, float_weight_name) 318 print_vector(writer, bias, bias_name) 319 320 # init function 321 out_channels, in_channels, ksize1, ksize2 = weight.shape 322 init_call = f'conv2d_init(&model->{name}, arrays, "{bias_name}", "{float_weight_name}", {in_channels}, {out_channels}, {ksize1}, {ksize2})' 323 324 writer.layer_dict[name] = ('Conv2dLayer', init_call) 325 326 327 328def print_gru_layer(writer : CWriter, 329 name : str, 330 weight : np.ndarray, 331 recurrent_weight : np.ndarray, 332 bias : np.ndarray, 333 recurrent_bias : np.ndarray, 334 format : str = 'torch', 335 quantize : bool = False, 336 input_sparse : bool = False, 337 recurrent_sparse : bool = False, 338 scale=1/128, 339 recurrent_scale=1/128 340 ): 341 342 if format == "torch": 343 # change gate ordering from rzn to zrn 344 345 N = weight.shape[0] // 3 346 for x in [weight, recurrent_weight, bias, recurrent_bias]: 347 if x is None: continue 348 tmp = x[0:N].copy() 349 x[0:N] = x[N:2*N] 350 x[N:2*N] = tmp 351 352 weight = weight.transpose() 353 recurrent_weight = recurrent_weight.transpose() 354 else: 355 N = weight.shape[1] // 3 356 357 print_linear_layer(writer, name + "_input", weight, bias, scale=scale, sparse=input_sparse, quantize=quantize) 358 print_linear_layer(writer, name + "_recurrent", recurrent_weight, recurrent_bias, scale=recurrent_scale, sparse=recurrent_sparse, diagonal=recurrent_sparse, quantize=quantize) 359 360 # wrapping it up 361 writer.header.write(f"\n#define {name.upper()}_OUT_SIZE {N}\n") 362 writer.header.write(f"\n#define {name.upper()}_STATE_SIZE {N}\n") 363 364 return N 365 366 367def print_tconv1d_layer(writer : CWriter, 368 name : str, 369 weight : np.ndarray, 370 bias : np.ndarray, 371 stride: int, 372 scale=1/128, 373 quantize=False, 374 sparse=False): 375 376 in_channels, out_channels, kernel_size = weight.shape 377 378 379 linear_weight = weight.transpose(2, 1, 0).reshape(kernel_size * out_channels, in_channels).transpose(1, 0) 380 linear_bias = np.repeat(bias[np.newaxis, :], kernel_size, 0).flatten() 381 382 print_linear_layer(writer, name, linear_weight, linear_bias, scale=scale, quantize=quantize, sparse=sparse) 383 384 writer.header.write(f"\n#define {name.upper()}_KERNEL_SIZE {kernel_size}\n") 385 writer.header.write(f"\n#define {name.upper()}_STRIDE {stride}\n") 386 writer.header.write(f"\n#define {name.upper()}_IN_CHANNELS {in_channels}\n") 387 writer.header.write(f"\n#define {name.upper()}_OUT_CHANNELS {out_channels}\n")