xref: /aosp_15_r20/external/libopus/dnn/torch/weight-exchange/wexchange/c_export/common.py (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1'''Copyright (c) 2017-2018 Mozilla
2   Copyright (c) 2022 Amazon
3
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions
6   are met:
7
8   - Redistributions of source code must retain the above copyright
9   notice, this list of conditions and the following disclaimer.
10
11   - Redistributions in binary form must reproduce the above copyright
12   notice, this list of conditions and the following disclaimer in the
13   documentation and/or other materials provided with the distribution.
14
15   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
19   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26'''
27
28import numpy as np
29
30from .c_writer import CWriter
31
32def print_vector(writer, vector, name, dtype='float', reshape_8x4=False, static=True, debug_float=False):
33
34    if isinstance(writer, CWriter):
35        f = writer.source
36        binary_blob = writer.enable_binary_blob
37    else:
38        f = writer
39        binary_blob = False
40
41    dtype_suffix = {
42        'float' : 'float',
43        'opus_int8' : 'int8',
44        'opus_uint16' : 'uint16',
45        'opus_int16' : 'int16',
46        'int' : 'int',
47        'qweight': 'qweight'
48    }
49
50
51    if binary_blob:
52        f.write(
53f'''
54#ifndef USE_WEIGHTS_FILE
55'''
56        )
57        writer.weight_arrays.append(name)
58
59    if reshape_8x4:
60        vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8))
61        vector = vector.transpose((2, 0, 3, 1))
62
63    v = np.reshape(vector, (-1))
64
65    if debug_float:
66        f.write('#ifndef DISABLE_DEBUG_FLOAT\n')
67    if binary_blob:
68        f.write(
69f'''
70#define WEIGHTS_{name}_DEFINED
71#define WEIGHTS_{name}_TYPE WEIGHT_TYPE_{dtype_suffix[dtype]}
72'''
73        )
74
75    if static:
76        f.write('static ')
77
78    f.write(f'const {dtype} {name}[{len(v)}] = {{\n    ')
79
80    for i in range(0, len(v)):
81
82        f.write(f'{v[i]}')
83
84        if (i!=len(v)-1):
85            f.write(',')
86        else:
87            break
88
89        if (i%8==7):
90            f.write("\n    ")
91        else:
92            f.write(" ")
93
94    f.write('\n};\n\n')
95    if debug_float: f.write('#endif /*DISABLE_DEBUG_FLOAT*/\n')
96
97    if binary_blob:
98        f.write(
99f'''
100#endif /* USE_WEIGHTS_FILE */
101'''
102        )
103
104    return vector
105
106
107
108def extract_diagonal(A):
109    """ input shape is (N, k*N) """
110
111    N, M = A.shape
112    B = A.copy()
113    assert M % N == 0
114    k = M // N
115
116    diags = []
117    for l in range(k):
118        diag = np.diag(B[:, l * N : (l+1) * N]).copy()
119        B[:, l * N : (l+1) * N] -= np.diag(diag)
120        diags.append(diag)
121
122    diag = np.concatenate(diags)
123
124    return diag, B
125
126def quantize_weight(weight, scale):
127    scale = scale + 1e-30
128    Aq = np.round(weight / scale).astype('int')
129    if Aq.max() > 127 or Aq.min() <= -128:
130        raise ValueError("value out of bounds in quantize_weight")
131    Aq = np.clip(np.round(weight / scale).astype('int'), -128, 127)
132    return Aq
133
134
135def print_sparse_weight(writer, A, name, scale=1/128, have_diag=True, quantize=False):
136    N = A.shape[0]
137    M = A.shape[1]
138    W = np.zeros((0,), dtype='int')
139    W0 = np.zeros((0,))
140
141    if have_diag:
142        diag, A = extract_diagonal(A)
143        print_vector(writer, diag, name + '_diag')
144
145    if quantize:
146        Aq = quantize_weight(A, scale)
147    else:
148        Aq = A
149
150    # extract blocks
151    idx = np.zeros((0,), dtype='int')
152    for i in range(M//8):
153        pos = idx.shape[0]
154        idx = np.append(idx, -1)
155        nb_nonzero = 0
156        for j in range(N//4):
157            block = A[j*4:(j+1)*4, i*8:(i+1)*8]
158            qblock = Aq[j*4:(j+1)*4, i*8:(i+1)*8]
159            if np.sum(np.abs(block)) > 1e-10:
160                nb_nonzero = nb_nonzero + 1
161                idx = np.append(idx, j*4)
162                vblock = qblock.transpose((1,0)).reshape((-1,))
163                W0 = np.concatenate([W0, block.reshape((-1,))])
164                W = np.concatenate([W, vblock])
165        idx[pos] = nb_nonzero
166
167    if quantize: print_vector(writer, W, name + '_int8', reshape_8x4=False, dtype='opus_int8')
168    print_vector(writer, W0, name + '_float', reshape_8x4=False, dtype='float', debug_float=quantize)
169    print_vector(writer, idx, name + '_idx', reshape_8x4=False, dtype='int')
170
171    return Aq
172
173
174
175def compute_scaling(weight):
176    """ computes optimal scaling vector for weight of shape (features_in, features_out) """
177
178    n_in, n_out = weight.shape
179    assert n_in % 4 == 0 and n_out % 8 == 0
180
181    weight_max_abs = np.max(np.abs(weight), axis=0)
182    weight_max_sum = np.max(np.abs(weight[: n_in : 2] + weight[1 : n_in : 2]), axis=0)
183    scale_max = weight_max_abs / 127
184    scale_sum = weight_max_sum / 129
185
186    scale = np.maximum(scale_max, scale_sum)
187
188    return scale
189
190def qn(string):
191    if string == "NULL": return string
192    else: return '"' + string + '"'
193
194def print_linear_layer(writer : CWriter,
195                       name : str,
196                       weight : np.ndarray,
197                       bias : np.ndarray,
198                       scale : np.ndarray = None,
199                       sparse : bool = False,
200                       diagonal : bool = False,
201                       quantize : bool = True):
202
203    """ prints linear layer
204
205    Parameters:
206    -----------
207    name : str
208        layer name
209    weight: np.ndarray
210    ...
211    scale: np.ndarray or None
212        If None auto scaling will be applied. Otherwise, output channels will be multiplied by scale (the usual broadcasting rules apply).
213
214
215    """
216
217    if len(weight.shape) != 2:
218        raise ValueError('expecting 2-dim weight array in print_linear_layer')
219
220
221    bias_name           = "NULL" if bias is None else name + "_bias"
222    subias_name         = name + "_subias" if quantize else "NULL"
223    scale_name          = name + "_scale" if quantize else "NULL"
224    idx_name            = name + "_weights_idx" if sparse else "NULL"
225    float_weight_name   = name + "_weights_float"
226    int_weight_name     = name + "_weights_int8" if quantize else "NULL"
227    diag_name           = name + "_weights_diag" if sparse and diagonal else "NULL"
228
229    nb_inputs, nb_outputs = weight.shape
230
231    if scale is None and quantize:
232        scale = compute_scaling(weight)
233
234
235    if sparse:
236        weight_q = print_sparse_weight(writer, weight, name + "_weights", scale=scale, have_diag=diagonal, quantize=quantize)
237    else:
238        if quantize:
239            weight_q = quantize_weight(weight, scale)
240            print_vector(writer, weight_q, name + "_weights_int8", dtype='opus_int8', reshape_8x4=True)
241
242        print_vector(writer, weight, name + "_weights_float", dtype='float', reshape_8x4=False, debug_float=quantize)
243
244    if quantize:
245        subias = (np.zeros(nb_outputs) if bias is None else bias) - np.sum(weight_q * scale, axis=0)
246        print_vector(writer, subias, name + "_subias")
247
248        final_scale = scale / 127 * np.ones(nb_outputs)
249        print_vector(writer, final_scale, name + "_scale")
250
251    if bias is not None:
252        print_vector(writer, bias, name + "_bias")
253
254
255    init_call = f'linear_init(&model->{name}, arrays, {qn(bias_name)}, {qn(subias_name)}, {qn(int_weight_name)},' \
256        + f'{qn(float_weight_name)}, {qn(idx_name)}, {qn(diag_name)}, {qn(scale_name)}, {nb_inputs}, {nb_outputs})'
257
258    writer.layer_dict[name] = ('LinearLayer', init_call)
259
260
261def print_dense_layer(writer : CWriter,
262                      name : str,
263                      weight : np.ndarray,
264                      bias : np.ndarray,
265                      scale=1/128,
266                      format : str = 'torch',
267                      sparse=False,
268                      diagonal=False,
269                      quantize=False):
270
271    if format == 'torch':
272        weight = weight.transpose()
273
274    print_linear_layer(writer, name, weight, bias, scale=scale, sparse=sparse, diagonal=diagonal, quantize=quantize)
275
276    writer.header.write(f"\n#define {name.upper()}_OUT_SIZE {weight.shape[1]}\n")
277
278
279def print_conv1d_layer(writer : CWriter,
280                       name : str,
281                       weight : np.ndarray,
282                       bias : np.ndarray,
283                       scale=1/128,
284                       format : str = 'torch',
285                       quantize=False,
286                       sparse=False):
287
288
289    if format == "torch":
290        # convert to channels last
291        weight = np.transpose(weight, (2, 1, 0))
292
293    lin_weight = np.reshape(weight, (-1, weight.shape[-1]))
294    print_linear_layer(writer, name, lin_weight, bias, scale=scale, sparse=sparse, diagonal=False, quantize=quantize)
295
296
297    writer.header.write(f"\n#define {name.upper()}_OUT_SIZE {weight.shape[2]}\n")
298    writer.header.write(f"\n#define {name.upper()}_IN_SIZE {weight.shape[1]}\n")
299    writer.header.write(f"\n#define {name.upper()}_STATE_SIZE ({weight.shape[1]} * ({weight.shape[0] - 1}))\n")
300    writer.header.write(f"\n#define {name.upper()}_DELAY {(weight.shape[0] - 1) // 2}\n") # CAVE: delay is not a property of the conv layer
301
302    return weight.shape[0] * weight.shape[1]
303
304def print_conv2d_layer(writer : CWriter,
305                       name : str,
306                       weight : np.ndarray,
307                       bias : np.ndarray,
308                       scale : float=1/128,
309                       quantize : bool=False):
310
311    if quantize:
312        print("[print_conv2d_layer] warning: quantize argument ignored")
313
314    bias_name = name + "_bias"
315    float_weight_name = name + "_weight_float"
316
317    print_vector(writer, weight, float_weight_name)
318    print_vector(writer, bias, bias_name)
319
320    # init function
321    out_channels, in_channels, ksize1, ksize2 = weight.shape
322    init_call = f'conv2d_init(&model->{name}, arrays, "{bias_name}", "{float_weight_name}", {in_channels}, {out_channels}, {ksize1}, {ksize2})'
323
324    writer.layer_dict[name] = ('Conv2dLayer', init_call)
325
326
327
328def print_gru_layer(writer : CWriter,
329                    name : str,
330                    weight : np.ndarray,
331                    recurrent_weight : np.ndarray,
332                    bias : np.ndarray,
333                    recurrent_bias : np.ndarray,
334                    format : str = 'torch',
335                    quantize : bool = False,
336                    input_sparse : bool = False,
337                    recurrent_sparse : bool = False,
338                    scale=1/128,
339                    recurrent_scale=1/128
340                    ):
341
342    if format == "torch":
343        # change gate ordering from rzn to zrn
344
345        N = weight.shape[0] // 3
346        for x in [weight, recurrent_weight, bias, recurrent_bias]:
347            if x is None: continue
348            tmp = x[0:N].copy()
349            x[0:N] = x[N:2*N]
350            x[N:2*N] = tmp
351
352        weight = weight.transpose()
353        recurrent_weight = recurrent_weight.transpose()
354    else:
355        N = weight.shape[1] // 3
356
357    print_linear_layer(writer, name + "_input", weight, bias, scale=scale, sparse=input_sparse, quantize=quantize)
358    print_linear_layer(writer, name + "_recurrent", recurrent_weight, recurrent_bias, scale=recurrent_scale, sparse=recurrent_sparse, diagonal=recurrent_sparse, quantize=quantize)
359
360    # wrapping it up
361    writer.header.write(f"\n#define {name.upper()}_OUT_SIZE {N}\n")
362    writer.header.write(f"\n#define {name.upper()}_STATE_SIZE {N}\n")
363
364    return N
365
366
367def print_tconv1d_layer(writer : CWriter,
368                       name : str,
369                       weight : np.ndarray,
370                       bias : np.ndarray,
371                       stride: int,
372                       scale=1/128,
373                       quantize=False,
374                       sparse=False):
375
376    in_channels, out_channels, kernel_size = weight.shape
377
378
379    linear_weight = weight.transpose(2, 1, 0).reshape(kernel_size * out_channels, in_channels).transpose(1, 0)
380    linear_bias = np.repeat(bias[np.newaxis, :], kernel_size, 0).flatten()
381
382    print_linear_layer(writer, name, linear_weight, linear_bias, scale=scale, quantize=quantize, sparse=sparse)
383
384    writer.header.write(f"\n#define {name.upper()}_KERNEL_SIZE {kernel_size}\n")
385    writer.header.write(f"\n#define {name.upper()}_STRIDE {stride}\n")
386    writer.header.write(f"\n#define {name.upper()}_IN_CHANNELS {in_channels}\n")
387    writer.header.write(f"\n#define {name.upper()}_OUT_CHANNELS {out_channels}\n")