xref: /aosp_15_r20/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/qnnpack/params.h (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 /*
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #pragma once
10 
11 #include <stdbool.h>
12 #include <stddef.h>
13 #include <stdint.h>
14 
15 #include <qnnpack/common.h>
16 
17 #include <cpuinfo.h>
18 
19 struct pytorch_qnnp_fp16_clamping_params {
20   uint16_t scale;
21   uint16_t max;
22   uint16_t min;
23 };
24 
25 struct pytorch_qnnp_fp32_clamping_params {
26   float max;
27   float min;
28 };
29 
30 union pytorch_qnnp_fp32_requantization_params {
31   struct {
32     float* scales;
33     uint8_t output_zero_point;
34     uint8_t output_max;
35     uint8_t output_min;
36     float min_less_zero_point;
37     float max_less_zero_point;
38     float magic;
39     int32_t magic_less_zero_point;
40   } scalar;
41   struct {
42     float* scales;
43     float max;
44     float min;
45     float magic;
46     int32_t magic_less_zero_point;
47   } neon;
48   struct {
49     float* scales;
50     int16_t zero_point;
51     uint8_t max;
52     uint8_t min;
53   } neonv8;
54   struct {
55     PYTORCH_QNNP_ALIGN(16) float* scales;
56     PYTORCH_QNNP_ALIGN(16) int16_t zero_point[8];
57     PYTORCH_QNNP_ALIGN(16) uint8_t max[16];
58     PYTORCH_QNNP_ALIGN(16) uint8_t min[16];
59   } sse2;
60   struct {
61     PYTORCH_QNNP_ALIGN(16) float* scales;
62     PYTORCH_QNNP_ALIGN(16) float min_less_zero_point[4];
63     PYTORCH_QNNP_ALIGN(16) float max_less_zero_point[4];
64     PYTORCH_QNNP_ALIGN(16) float magic[4];
65     PYTORCH_QNNP_ALIGN(16) int32_t magic_less_zero_point[4];
66   } psimd;
67 };
68 
69 union pytorch_qnnp_precise_requantization_params {
70   struct {
71     uint32_t multiplier;
72     uint32_t rounding_lo;
73     uint32_t rounding_hi;
74     uint32_t shift_less_32;
75     int32_t min_less_zero_point;
76     int32_t max_less_zero_point;
77     int32_t zero_point;
78   } scalar;
79   struct {
80     int32_t multiplier;
81     int32_t right_shift;
82     int16_t zero_point;
83     uint8_t max;
84     uint8_t min;
85   } neon;
86   struct {
87     PYTORCH_QNNP_ALIGN(16) uint32_t multiplier[4];
88     PYTORCH_QNNP_ALIGN(16) uint64_t rounding[2];
89     PYTORCH_QNNP_ALIGN(16) uint32_t shift[4];
90     PYTORCH_QNNP_ALIGN(16) int16_t zero_point[8];
91     PYTORCH_QNNP_ALIGN(16) uint8_t max[16];
92     PYTORCH_QNNP_ALIGN(16) uint8_t min[16];
93   } sse2;
94 };
95 
96 union pytorch_qnnp_q31_requantization_params {
97   struct {
98     int32_t multiplier;
99     int32_t remainder_mask;
100     int32_t remainder_threshold;
101     uint32_t shift;
102     int32_t min_less_zero_point;
103     int32_t max_less_zero_point;
104     int32_t zero_point;
105   } scalar;
106 #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
107   struct {
108     int32_t multiplier;
109     int32_t right_shift;
110     int16_t zero_point;
111     uint8_t max;
112     uint8_t min;
113   } neon;
114 #endif /* CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 */
115 #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
116   struct {
117     PYTORCH_QNNP_ALIGN(16) uint32_t multiplier[4];
118     PYTORCH_QNNP_ALIGN(16) uint64_t rounding[2];
119     PYTORCH_QNNP_ALIGN(16) int32_t remainder_mask[4];
120     PYTORCH_QNNP_ALIGN(16) int32_t remainder_threshold[4];
121     PYTORCH_QNNP_ALIGN(16) uint64_t shift[2];
122     PYTORCH_QNNP_ALIGN(16) int16_t zero_point[8];
123     PYTORCH_QNNP_ALIGN(16) uint8_t max[16];
124     PYTORCH_QNNP_ALIGN(16) uint8_t min[16];
125   } sse2;
126 #endif /* CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 */
127 };
128 
129 union pytorch_qnnp_conv_quantization_params {
130   struct {
131     const uint8_t* kernel_zero_points;
132     int32_t input_zero_point;
133     const float* requantization_scales;
134     int32_t output_min_less_zero_point;
135     int32_t output_max_less_zero_point;
136     int32_t output_zero_point;
137   } scalar;
138 #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
139   struct {
140     const uint8_t* kernel_zero_points;
141     int16_t input_zero_point;
142     const float* requantization_scales;
143     int16_t output_zero_point;
144     uint8_t output_max;
145     uint8_t output_min;
146     // Following four are for nearest-ties-to-even
147     // rounding in aarch32. This saves some instructions
148     // needed otherwise.
149     float vfmax;
150     float vfmin;
151     float vfmagic;
152     int32_t vimagic;
153   } neon;
154 #endif /* CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 */
155 #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
156   struct {
157     PYTORCH_QNNP_ALIGN(16) const uint8_t* kernel_zero_points;
158     PYTORCH_QNNP_ALIGN(16) int16_t input_zero_point[8];
159     const PYTORCH_QNNP_ALIGN(16) float* requantization_scales;
160     PYTORCH_QNNP_ALIGN(16) int16_t output_zero_point[8];
161     PYTORCH_QNNP_ALIGN(16) uint8_t output_max[16];
162     PYTORCH_QNNP_ALIGN(16) uint8_t output_min[16];
163   } sse2;
164 #endif /* CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 */
165 };
166 
167 struct pytorch_qnnp_conv_dynamic_quantization_params {
168   int16_t input_zero_point;
169   const uint8_t* kernel_zero_points;
170   const float* multipliers;
171 };
172 
173 union pytorch_qnnp_requantization_params {
174   union pytorch_qnnp_precise_requantization_params precise;
175   union pytorch_qnnp_fp32_requantization_params fp32;
176   union pytorch_qnnp_q31_requantization_params q31;
177 };
178 
179 union pytorch_qnnp_add_quantization_params {
180   struct {
181     int32_t zero_point_product;
182     uint32_t a_multiplier;
183     uint32_t b_multiplier;
184     uint32_t shift;
185     int32_t remainder_mask;
186     int32_t remainder_threshold;
187     int32_t y_zero_point;
188     int32_t y_max;
189     int32_t y_min;
190   } scalar;
191 #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
192   struct {
193     uint8_t a_zero_point;
194     uint8_t b_zero_point;
195     int16_t y_zero_point;
196     int32_t a_multiplier;
197     int32_t b_multiplier;
198     int32_t right_shift;
199     uint8_t y_max;
200     uint8_t y_min;
201   } neon;
202 #endif
203 #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
204   struct {
205     PYTORCH_QNNP_ALIGN(16) int32_t zero_point_product[4];
206     PYTORCH_QNNP_ALIGN(16) uint16_t a_multiplier_lo[8];
207     PYTORCH_QNNP_ALIGN(16) uint16_t a_multiplier_hi[8];
208     PYTORCH_QNNP_ALIGN(16) uint16_t b_multiplier_lo[8];
209     PYTORCH_QNNP_ALIGN(16) uint16_t b_multiplier_hi[8];
210     PYTORCH_QNNP_ALIGN(16) int32_t remainder_mask[4];
211     PYTORCH_QNNP_ALIGN(16) int32_t remainder_threshold[4];
212     PYTORCH_QNNP_ALIGN(16) int16_t y_zero_point[8];
213     PYTORCH_QNNP_ALIGN(16) uint8_t y_max[16];
214     PYTORCH_QNNP_ALIGN(16) uint8_t y_min[16];
215     uint32_t shift;
216     uint32_t a_multiplier;
217     uint32_t b_multiplier;
218   } sse2;
219 #endif
220 };
221 
222 union pytorch_qnnp_avgpool_quantization_params {
223   struct {
224     int32_t bias;
225     float scale;
226     int32_t output_zero_point;
227     uint8_t output_max;
228     uint8_t output_min;
229   } scalar;
230 #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
231   struct {
232     int32_t bias;
233     float scale;
234     int16_t output_zero_point;
235     uint8_t output_max;
236     uint8_t output_min;
237     // Following four are for nearest-ties-to-even
238     // rounding in aarch32. This saves some instructions
239     // needed otherwise.
240     float vfmax;
241     float vfmin;
242     float vfmagic;
243     int32_t vimagic;
244   } neon;
245 #endif /* CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 */
246 #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
247   struct {
248     PYTORCH_QNNP_ALIGN(16) int32_t bias[4];
249     PYTORCH_QNNP_ALIGN(16) float scale[4];
250     PYTORCH_QNNP_ALIGN(16) int16_t output_zero_point[8];
251     PYTORCH_QNNP_ALIGN(16) uint8_t output_max[16];
252     PYTORCH_QNNP_ALIGN(16) uint8_t output_min[16];
253   } sse2;
254 #endif
255 };
256 
257 union pytorch_qnnp_u8_clamping_params {
258   struct {
259     int32_t output_max;
260     int32_t output_min;
261   } scalar;
262 #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
263   struct {
264     uint8_t output_max;
265     uint8_t output_min;
266   } neon;
267 #endif /* CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 */
268 #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
269   struct {
270     PYTORCH_QNNP_ALIGN(16) uint8_t output_max[16];
271     PYTORCH_QNNP_ALIGN(16) uint8_t output_min[16];
272   } sse2;
273 #endif /* CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 */
274 };
275 
276 typedef void (*pytorch_q8gemm_ukernel_function)(
277     size_t mr,
278     size_t nr,
279     size_t k,
280     const uint8_t* a,
281     size_t a_stride,
282     const void* w,
283     uint8_t* c,
284     size_t c_stride,
285     size_t output_channel_index,
286     const union pytorch_qnnp_conv_quantization_params* quantization_params);
287 
288 /*
289   Q8 GEMM kernel with support for dynamic quantization.
290 
291   The w parameter designates weights, and is to be passed on to this kernel
292   exactly as returned by the pack function.  The initial bias portion of
293   this buffer will be ignored.
294 
295   The bias parameter, expects max(nr, 8) floating-point biases.  Technically
296   the kernels only need nr biases from the buffer pointed to by this parameter,
297   but end up reading at most 8 to keep the logic simple and fast.  Consequently,
298   make sure this parameter has enough storage for 8 floating point numbers to
299   avoid triggering out of bound errors.  The remaining 8 - nr biases, if any,
300   will be unused.
301 
302   quantization_params contains the quantization parameters, namely input, and
303   kernel zero points, and the multiplier.  The multiplier is expected to be
304   equal to input_scale * kernel_scale.
305 */
306 
307 typedef void (*pytorch_q8gemm_dq_ukernel_function)(
308     size_t mr,
309     size_t nr,
310     size_t k,
311     const uint8_t* a,
312     size_t a_stride,
313     const void* w,
314     const float* bias,
315     float* c,
316     size_t c_stride,
317     size_t output_channel_index,
318     const struct pytorch_qnnp_conv_dynamic_quantization_params* quantization_params);
319 
320 typedef void (*pytorch_q8gemm_dq_sparse_ukernel_function)(
321     size_t mr,
322     size_t nr,
323     const uint8_t* a,
324     size_t a_stride,
325     const uint8_t* packed_w,
326     const uint32_t* w_row_ptr,
327     const uint32_t* w_block_ids_ptr,
328     const float* bias,
329     float* c,
330     size_t c_stride,
331     size_t output_channel_index,
332     const struct pytorch_qnnp_conv_dynamic_quantization_params* quantization_params);
333 
334 typedef void (*pytorch_q8gemm_dq_sparse_packedA_w32_ukernel_function)(
335     size_t mr,
336     size_t nr,
337     const uint8_t* a_packed,
338     const uint8_t* packed_w,
339     const uint32_t* w_row_ptr,
340     const uint32_t* w_block_ids_ptr,
341     const float* bias,
342     float* c,
343     size_t c_stride,
344     size_t output_channel_index,
345     const struct pytorch_qnnp_conv_dynamic_quantization_params* quantization_params);
346 
347 typedef void (*pytorch_q8gemm_dq_sparse_packedA_w16_ukernel_function)(
348     size_t mr,
349     size_t nr,
350     const uint8_t* a_packed,
351     const uint8_t* packed_w,
352     const uint16_t* w_row_ptr,
353     const uint16_t* w_block_ids_ptr,
354     const float* bias,
355     float* c,
356     size_t c_stride,
357     size_t output_channel_index,
358     const struct pytorch_qnnp_conv_dynamic_quantization_params* quantization_params);
359 
360 typedef void (*pytorch_q8gemm_dq_sparse_packedA_w8_ukernel_function)(
361     size_t mr,
362     size_t nr,
363     const uint8_t* a_packed,
364     const uint8_t* packed_w,
365     const uint8_t* w_row_ptr,
366     const uint8_t* w_block_ids_ptr,
367     const float* bias,
368     float* c,
369     size_t c_stride,
370     size_t output_channel_index,
371     const struct pytorch_qnnp_conv_dynamic_quantization_params* quantization_params);
372 
373 typedef void (*pytorch_q8gemm_sparse_packA_ukernel_function)(
374     const size_t mr,
375     const size_t K,
376     const uint8_t* a,
377     const size_t a_stride,
378     uint8_t* a_packed);
379 
380 typedef void (*pytorch_q8conv_ukernel_function)(
381     size_t mr,
382     size_t nr,
383     size_t kc,
384     size_t ks,
385     const uint8_t** a,
386     const void* w,
387     uint8_t* c,
388     size_t c_stride,
389     size_t output_channel_index,
390     const union pytorch_qnnp_conv_quantization_params* quantization_params);
391 
392 typedef void (*pytorch_q8gemm_xzp_ukernel_function)(
393     size_t mr,
394     size_t nr,
395     size_t k,
396     const uint8_t* a,
397     size_t a_stride,
398     const int32_t* a_sum,
399     const void* w,
400     uint8_t* c,
401     size_t c_stride,
402     const union pytorch_qnnp_q31_requantization_params* requantization_params);
403 
404 typedef void (*pytorch_q8sum_rows_ukernel_function)(
405     const uint8_t* a,
406     size_t m,
407     size_t k,
408     size_t stride,
409     int32_t multiplier,
410     int32_t* sums);
411 
412 typedef void (*pytorch_xzipc_ukernel_function)(size_t n, const void* x, void* y);
413 
414 typedef void (
415     *pytorch_xzipv_ukernel_function)(size_t n, size_t m, const void* x, void* y);
416 
417 typedef void (*pytorch_x8lut_ukernel_function)(
418     size_t n,
419     const uint8_t* x,
420     const uint8_t* t,
421     uint8_t* y);
422 
423 typedef void (*pytorch_sgemm_ukernel_function)(
424     size_t mr,
425     size_t nr,
426     size_t k,
427     const float* a,
428     size_t a_stride,
429     const float* w,
430     float* c,
431     size_t c_stride,
432     const struct pytorch_qnnp_fp32_clamping_params* clamping_params);
433 
434 typedef void (*pytorch_sconv_ukernel_function)(
435     size_t mr,
436     size_t nr,
437     size_t kc,
438     size_t ks,
439     const float** a,
440     const float* w,
441     float* c,
442     size_t c_stride,
443     const struct pytorch_qnnp_fp32_clamping_params* clamping_params);
444 
445 typedef void (*pytorch_hgemm_ukernel_function)(
446     size_t mr,
447     size_t nr,
448     size_t k,
449     const void* a,
450     size_t a_stride,
451     const void* w,
452     void* c,
453     size_t c_stride,
454     const struct pytorch_qnnp_fp16_clamping_params* clamping_params);
455 
456 typedef void (*pytorch_q8dwconv2d_up_ukernel_function)(
457     size_t channels,
458     size_t output_width,
459     const uint8_t** input,
460     const void* weights,
461     uint8_t* output,
462     size_t input_stride,
463     size_t output_increment,
464     const union pytorch_qnnp_conv_quantization_params* quantization_params);
465 
466 typedef void (*pytorch_q8dwconv2d_mp_ukernel_function)(
467     size_t channels,
468     size_t output_width,
469     const uint8_t** input,
470     const void* weights,
471     int32_t* buffer,
472     uint8_t* output,
473     size_t input_stride,
474     size_t output_increment,
475     const union pytorch_qnnp_conv_quantization_params* quantization_params);
476 
477 typedef void (*pytorch_q8dwconv3d_mp_ukernel_function)(
478     size_t channels,
479     size_t output_height,
480     size_t output_width,
481     const uint8_t** input,
482     const void* weights,
483     int32_t* buffer,
484     uint8_t* output,
485     size_t input_row_stride,
486     size_t input_col_stride,
487     size_t output_increment,
488     const union pytorch_qnnp_conv_quantization_params* quantization_params);
489 
490 typedef void (*pytorch_q8gavgpool_up_ukernel_function)(
491     size_t m,
492     size_t n,
493     const uint8_t* x,
494     size_t x_stride,
495     const uint8_t* zero,
496     uint8_t* y,
497     const union pytorch_qnnp_avgpool_quantization_params* quantization_params);
498 
499 typedef void (*pytorch_q8gavgpool_mp_ukernel_function)(
500     size_t m,
501     size_t n,
502     const uint8_t* x,
503     size_t x_stride,
504     const uint8_t* zero,
505     int32_t* buffer,
506     uint8_t* y,
507     const union pytorch_qnnp_avgpool_quantization_params* quantization_params);
508 
509 typedef void (*pytorch_q8avgpool_up_ukernel_function)(
510     size_t n,
511     size_t ks,
512     size_t kc,
513     const uint8_t** x,
514     const uint8_t* zero,
515     uint8_t* y,
516     size_t x_increment,
517     size_t y_increment,
518     const union pytorch_qnnp_avgpool_quantization_params* quantization_params);
519 
520 typedef void (*pytorch_q8avgpool_mp_ukernel_function)(
521     size_t n,
522     size_t ks,
523     size_t kc,
524     const uint8_t** x,
525     const uint8_t* zero,
526     int32_t* buffer,
527     uint8_t* y,
528     size_t x_increment,
529     size_t y_increment,
530     const union pytorch_qnnp_avgpool_quantization_params* quantization_params);
531 
532 typedef void (*pytorch_u8maxpool_ukernel_function)(
533     size_t n,
534     size_t ks,
535     size_t kc,
536     const uint8_t** x,
537     uint8_t* y,
538     size_t x_increment,
539     size_t y_increment,
540     const union pytorch_qnnp_u8_clamping_params* params);
541 
542 typedef void (*pytorch_u8clamp_ukernel_function)(
543     size_t n,
544     const uint8_t* x,
545     uint8_t* y,
546     const union pytorch_qnnp_u8_clamping_params* params);
547 
548 typedef uint8_t (*pytorch_u8rmax_ukernel_function)(size_t n, const uint8_t* x);
549 
550 typedef void (*pytorch_u8lut32norm_ukernel_function)(
551     size_t n,
552     const uint8_t* x,
553     const uint32_t* t,
554     uint8_t* y);
555 
556 typedef void (*pytorch_q8vadd_ukernel_function)(
557     size_t n,
558     const uint8_t* a,
559     const uint8_t* b,
560     uint8_t* y,
561     const union pytorch_qnnp_add_quantization_params* quantization_params);
562 
563 struct pytorch_q8conv_parameters {
564   pytorch_q8gemm_ukernel_function gemm;
565   pytorch_q8conv_ukernel_function conv;
566   pytorch_q8gemm_dq_ukernel_function gemm_dq;
567   uint8_t mr;
568   uint8_t nr;
569   uint8_t kr;
570 };
571 
572 struct pytorch_q8gemm_sparse_parameters {
573   pytorch_q8gemm_dq_sparse_ukernel_function gemm_dq;
574   // w32, w16, and w8 refer to variants of the kernel which use uint32_t,
575   // uint16_t, and uint8_t datatype for row values/col indices respectively
576   pytorch_q8gemm_dq_sparse_packedA_w32_ukernel_function packedA_w32_gemm_dq;
577   pytorch_q8gemm_dq_sparse_packedA_w16_ukernel_function packedA_w16_gemm_dq;
578   pytorch_q8gemm_dq_sparse_packedA_w8_ukernel_function packedA_w8_gemm_dq;
579   pytorch_q8gemm_sparse_packA_ukernel_function packA;
580   uint8_t mr;
581   uint8_t nr;
582   uint8_t kr;
583   uint8_t log2_mr;
584   uint8_t log2_row_block_size;
585   uint32_t row_block_size;
586   uint32_t col_block_size;
587 };
588 
589 struct pytorch_q8conv_xzp_parameters {
590   pytorch_q8gemm_xzp_ukernel_function gemm;
591   /* no conv ukernel */
592   uint8_t mr;
593   uint8_t nr;
594   uint8_t kr;
595   uint8_t kc;
596   size_t kthreshold;
597 };
598 
599 struct pytorch_q8dwconv2d_up_parameters {
600   pytorch_q8dwconv2d_up_ukernel_function updw;
601   pytorch_q8dwconv2d_up_ukernel_function updw_per_channel;
602   uint8_t cr;
603 };
604 
605 struct pytorch_q8dwconv2d_mp_parameters {
606   pytorch_q8dwconv2d_mp_ukernel_function mpdw;
607   pytorch_q8dwconv2d_mp_ukernel_function mpdw_per_channel;
608   uint8_t cr;
609 };
610 
611 struct pytorch_q8dwconv3d_mp_parameters {
612   pytorch_q8dwconv3d_mp_ukernel_function mpdw;
613   uint8_t cr;
614 };
615 
616 struct pytorch_q8sum_rows_parameters {
617   pytorch_q8sum_rows_ukernel_function sum_rows;
618   uint32_t m;
619 };
620 
621 struct pytorch_q8gavgpool_parameters {
622   pytorch_q8gavgpool_up_ukernel_function ltnr;
623   pytorch_q8gavgpool_up_ukernel_function genr_lemr;
624   pytorch_q8gavgpool_mp_ukernel_function genr_gtmr;
625   uint8_t mr;
626   uint8_t nr;
627 };
628 
629 struct pytorch_q8avgpool_parameters {
630   pytorch_q8avgpool_up_ukernel_function ltkr;
631   pytorch_q8avgpool_up_ukernel_function gekr_lemr;
632   pytorch_q8avgpool_mp_ukernel_function gekr_gtmr;
633   uint8_t mr;
634   uint8_t qr;
635   uint8_t kr;
636 };
637 
638 struct pytorch_u8maxpool_parameters {
639   pytorch_u8maxpool_ukernel_function ltkr;
640   pytorch_u8maxpool_ukernel_function gekr;
641   uint8_t mr;
642   uint8_t qr;
643   uint8_t kr;
644 };
645 
646 struct pytorch_x8zip_parameters {
647   pytorch_xzipc_ukernel_function x2;
648   pytorch_xzipc_ukernel_function x3;
649   pytorch_xzipc_ukernel_function x4;
650   pytorch_xzipv_ukernel_function xm;
651 };
652 
653 struct pytorch_qnnp_parameters {
654   struct pytorch_q8conv_parameters q8conv;
655   struct pytorch_q8gemm_sparse_parameters q8gemm_sparse_c1x4;
656   struct pytorch_q8gemm_sparse_parameters q8gemm_sparse_c8x1;
657   struct pytorch_q8conv_xzp_parameters q8conv_xzp;
658   struct pytorch_q8dwconv2d_up_parameters q8dw9;
659   struct pytorch_q8dwconv2d_mp_parameters q8dw25;
660   struct pytorch_q8dwconv3d_mp_parameters q8dw27;
661   struct pytorch_q8sum_rows_parameters q8sum_rows;
662   pytorch_q8vadd_ukernel_function q8vadd;
663   struct pytorch_q8gavgpool_parameters q8gavgpool;
664   struct pytorch_q8avgpool_parameters q8avgpool;
665   struct pytorch_u8maxpool_parameters u8maxpool;
666   pytorch_u8lut32norm_ukernel_function u8lut32norm;
667   pytorch_u8clamp_ukernel_function u8clamp;
668   pytorch_u8rmax_ukernel_function u8rmax;
669   struct pytorch_x8zip_parameters x8zip;
670   pytorch_x8lut_ukernel_function x8lut;
671   bool initialized;
672 };
673 
674 #ifdef __cplusplus
675 extern "C" {
676 #endif
677 
678 extern struct pytorch_qnnp_parameters pytorch_qnnp_params;
679 
680 #ifdef __cplusplus
681 }
682 #endif
683