1 /* 2 * Copyright (c) Facebook, Inc. and its affiliates. 3 * All rights reserved. 4 * 5 * This source code is licensed under the BSD-style license found in the 6 * LICENSE file in the root directory of this source tree. 7 */ 8 9 #pragma once 10 11 #include <stdbool.h> 12 #include <stddef.h> 13 #include <stdint.h> 14 15 #include <qnnpack/common.h> 16 17 #include <cpuinfo.h> 18 19 struct pytorch_qnnp_fp16_clamping_params { 20 uint16_t scale; 21 uint16_t max; 22 uint16_t min; 23 }; 24 25 struct pytorch_qnnp_fp32_clamping_params { 26 float max; 27 float min; 28 }; 29 30 union pytorch_qnnp_fp32_requantization_params { 31 struct { 32 float* scales; 33 uint8_t output_zero_point; 34 uint8_t output_max; 35 uint8_t output_min; 36 float min_less_zero_point; 37 float max_less_zero_point; 38 float magic; 39 int32_t magic_less_zero_point; 40 } scalar; 41 struct { 42 float* scales; 43 float max; 44 float min; 45 float magic; 46 int32_t magic_less_zero_point; 47 } neon; 48 struct { 49 float* scales; 50 int16_t zero_point; 51 uint8_t max; 52 uint8_t min; 53 } neonv8; 54 struct { 55 PYTORCH_QNNP_ALIGN(16) float* scales; 56 PYTORCH_QNNP_ALIGN(16) int16_t zero_point[8]; 57 PYTORCH_QNNP_ALIGN(16) uint8_t max[16]; 58 PYTORCH_QNNP_ALIGN(16) uint8_t min[16]; 59 } sse2; 60 struct { 61 PYTORCH_QNNP_ALIGN(16) float* scales; 62 PYTORCH_QNNP_ALIGN(16) float min_less_zero_point[4]; 63 PYTORCH_QNNP_ALIGN(16) float max_less_zero_point[4]; 64 PYTORCH_QNNP_ALIGN(16) float magic[4]; 65 PYTORCH_QNNP_ALIGN(16) int32_t magic_less_zero_point[4]; 66 } psimd; 67 }; 68 69 union pytorch_qnnp_precise_requantization_params { 70 struct { 71 uint32_t multiplier; 72 uint32_t rounding_lo; 73 uint32_t rounding_hi; 74 uint32_t shift_less_32; 75 int32_t min_less_zero_point; 76 int32_t max_less_zero_point; 77 int32_t zero_point; 78 } scalar; 79 struct { 80 int32_t multiplier; 81 int32_t right_shift; 82 int16_t zero_point; 83 uint8_t max; 84 uint8_t min; 85 } neon; 86 struct { 87 PYTORCH_QNNP_ALIGN(16) uint32_t multiplier[4]; 88 PYTORCH_QNNP_ALIGN(16) uint64_t rounding[2]; 89 PYTORCH_QNNP_ALIGN(16) uint32_t shift[4]; 90 PYTORCH_QNNP_ALIGN(16) int16_t zero_point[8]; 91 PYTORCH_QNNP_ALIGN(16) uint8_t max[16]; 92 PYTORCH_QNNP_ALIGN(16) uint8_t min[16]; 93 } sse2; 94 }; 95 96 union pytorch_qnnp_q31_requantization_params { 97 struct { 98 int32_t multiplier; 99 int32_t remainder_mask; 100 int32_t remainder_threshold; 101 uint32_t shift; 102 int32_t min_less_zero_point; 103 int32_t max_less_zero_point; 104 int32_t zero_point; 105 } scalar; 106 #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 107 struct { 108 int32_t multiplier; 109 int32_t right_shift; 110 int16_t zero_point; 111 uint8_t max; 112 uint8_t min; 113 } neon; 114 #endif /* CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 */ 115 #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 116 struct { 117 PYTORCH_QNNP_ALIGN(16) uint32_t multiplier[4]; 118 PYTORCH_QNNP_ALIGN(16) uint64_t rounding[2]; 119 PYTORCH_QNNP_ALIGN(16) int32_t remainder_mask[4]; 120 PYTORCH_QNNP_ALIGN(16) int32_t remainder_threshold[4]; 121 PYTORCH_QNNP_ALIGN(16) uint64_t shift[2]; 122 PYTORCH_QNNP_ALIGN(16) int16_t zero_point[8]; 123 PYTORCH_QNNP_ALIGN(16) uint8_t max[16]; 124 PYTORCH_QNNP_ALIGN(16) uint8_t min[16]; 125 } sse2; 126 #endif /* CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 */ 127 }; 128 129 union pytorch_qnnp_conv_quantization_params { 130 struct { 131 const uint8_t* kernel_zero_points; 132 int32_t input_zero_point; 133 const float* requantization_scales; 134 int32_t output_min_less_zero_point; 135 int32_t output_max_less_zero_point; 136 int32_t output_zero_point; 137 } scalar; 138 #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 139 struct { 140 const uint8_t* kernel_zero_points; 141 int16_t input_zero_point; 142 const float* requantization_scales; 143 int16_t output_zero_point; 144 uint8_t output_max; 145 uint8_t output_min; 146 // Following four are for nearest-ties-to-even 147 // rounding in aarch32. This saves some instructions 148 // needed otherwise. 149 float vfmax; 150 float vfmin; 151 float vfmagic; 152 int32_t vimagic; 153 } neon; 154 #endif /* CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 */ 155 #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 156 struct { 157 PYTORCH_QNNP_ALIGN(16) const uint8_t* kernel_zero_points; 158 PYTORCH_QNNP_ALIGN(16) int16_t input_zero_point[8]; 159 const PYTORCH_QNNP_ALIGN(16) float* requantization_scales; 160 PYTORCH_QNNP_ALIGN(16) int16_t output_zero_point[8]; 161 PYTORCH_QNNP_ALIGN(16) uint8_t output_max[16]; 162 PYTORCH_QNNP_ALIGN(16) uint8_t output_min[16]; 163 } sse2; 164 #endif /* CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 */ 165 }; 166 167 struct pytorch_qnnp_conv_dynamic_quantization_params { 168 int16_t input_zero_point; 169 const uint8_t* kernel_zero_points; 170 const float* multipliers; 171 }; 172 173 union pytorch_qnnp_requantization_params { 174 union pytorch_qnnp_precise_requantization_params precise; 175 union pytorch_qnnp_fp32_requantization_params fp32; 176 union pytorch_qnnp_q31_requantization_params q31; 177 }; 178 179 union pytorch_qnnp_add_quantization_params { 180 struct { 181 int32_t zero_point_product; 182 uint32_t a_multiplier; 183 uint32_t b_multiplier; 184 uint32_t shift; 185 int32_t remainder_mask; 186 int32_t remainder_threshold; 187 int32_t y_zero_point; 188 int32_t y_max; 189 int32_t y_min; 190 } scalar; 191 #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 192 struct { 193 uint8_t a_zero_point; 194 uint8_t b_zero_point; 195 int16_t y_zero_point; 196 int32_t a_multiplier; 197 int32_t b_multiplier; 198 int32_t right_shift; 199 uint8_t y_max; 200 uint8_t y_min; 201 } neon; 202 #endif 203 #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 204 struct { 205 PYTORCH_QNNP_ALIGN(16) int32_t zero_point_product[4]; 206 PYTORCH_QNNP_ALIGN(16) uint16_t a_multiplier_lo[8]; 207 PYTORCH_QNNP_ALIGN(16) uint16_t a_multiplier_hi[8]; 208 PYTORCH_QNNP_ALIGN(16) uint16_t b_multiplier_lo[8]; 209 PYTORCH_QNNP_ALIGN(16) uint16_t b_multiplier_hi[8]; 210 PYTORCH_QNNP_ALIGN(16) int32_t remainder_mask[4]; 211 PYTORCH_QNNP_ALIGN(16) int32_t remainder_threshold[4]; 212 PYTORCH_QNNP_ALIGN(16) int16_t y_zero_point[8]; 213 PYTORCH_QNNP_ALIGN(16) uint8_t y_max[16]; 214 PYTORCH_QNNP_ALIGN(16) uint8_t y_min[16]; 215 uint32_t shift; 216 uint32_t a_multiplier; 217 uint32_t b_multiplier; 218 } sse2; 219 #endif 220 }; 221 222 union pytorch_qnnp_avgpool_quantization_params { 223 struct { 224 int32_t bias; 225 float scale; 226 int32_t output_zero_point; 227 uint8_t output_max; 228 uint8_t output_min; 229 } scalar; 230 #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 231 struct { 232 int32_t bias; 233 float scale; 234 int16_t output_zero_point; 235 uint8_t output_max; 236 uint8_t output_min; 237 // Following four are for nearest-ties-to-even 238 // rounding in aarch32. This saves some instructions 239 // needed otherwise. 240 float vfmax; 241 float vfmin; 242 float vfmagic; 243 int32_t vimagic; 244 } neon; 245 #endif /* CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 */ 246 #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 247 struct { 248 PYTORCH_QNNP_ALIGN(16) int32_t bias[4]; 249 PYTORCH_QNNP_ALIGN(16) float scale[4]; 250 PYTORCH_QNNP_ALIGN(16) int16_t output_zero_point[8]; 251 PYTORCH_QNNP_ALIGN(16) uint8_t output_max[16]; 252 PYTORCH_QNNP_ALIGN(16) uint8_t output_min[16]; 253 } sse2; 254 #endif 255 }; 256 257 union pytorch_qnnp_u8_clamping_params { 258 struct { 259 int32_t output_max; 260 int32_t output_min; 261 } scalar; 262 #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 263 struct { 264 uint8_t output_max; 265 uint8_t output_min; 266 } neon; 267 #endif /* CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 */ 268 #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 269 struct { 270 PYTORCH_QNNP_ALIGN(16) uint8_t output_max[16]; 271 PYTORCH_QNNP_ALIGN(16) uint8_t output_min[16]; 272 } sse2; 273 #endif /* CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 */ 274 }; 275 276 typedef void (*pytorch_q8gemm_ukernel_function)( 277 size_t mr, 278 size_t nr, 279 size_t k, 280 const uint8_t* a, 281 size_t a_stride, 282 const void* w, 283 uint8_t* c, 284 size_t c_stride, 285 size_t output_channel_index, 286 const union pytorch_qnnp_conv_quantization_params* quantization_params); 287 288 /* 289 Q8 GEMM kernel with support for dynamic quantization. 290 291 The w parameter designates weights, and is to be passed on to this kernel 292 exactly as returned by the pack function. The initial bias portion of 293 this buffer will be ignored. 294 295 The bias parameter, expects max(nr, 8) floating-point biases. Technically 296 the kernels only need nr biases from the buffer pointed to by this parameter, 297 but end up reading at most 8 to keep the logic simple and fast. Consequently, 298 make sure this parameter has enough storage for 8 floating point numbers to 299 avoid triggering out of bound errors. The remaining 8 - nr biases, if any, 300 will be unused. 301 302 quantization_params contains the quantization parameters, namely input, and 303 kernel zero points, and the multiplier. The multiplier is expected to be 304 equal to input_scale * kernel_scale. 305 */ 306 307 typedef void (*pytorch_q8gemm_dq_ukernel_function)( 308 size_t mr, 309 size_t nr, 310 size_t k, 311 const uint8_t* a, 312 size_t a_stride, 313 const void* w, 314 const float* bias, 315 float* c, 316 size_t c_stride, 317 size_t output_channel_index, 318 const struct pytorch_qnnp_conv_dynamic_quantization_params* quantization_params); 319 320 typedef void (*pytorch_q8gemm_dq_sparse_ukernel_function)( 321 size_t mr, 322 size_t nr, 323 const uint8_t* a, 324 size_t a_stride, 325 const uint8_t* packed_w, 326 const uint32_t* w_row_ptr, 327 const uint32_t* w_block_ids_ptr, 328 const float* bias, 329 float* c, 330 size_t c_stride, 331 size_t output_channel_index, 332 const struct pytorch_qnnp_conv_dynamic_quantization_params* quantization_params); 333 334 typedef void (*pytorch_q8gemm_dq_sparse_packedA_w32_ukernel_function)( 335 size_t mr, 336 size_t nr, 337 const uint8_t* a_packed, 338 const uint8_t* packed_w, 339 const uint32_t* w_row_ptr, 340 const uint32_t* w_block_ids_ptr, 341 const float* bias, 342 float* c, 343 size_t c_stride, 344 size_t output_channel_index, 345 const struct pytorch_qnnp_conv_dynamic_quantization_params* quantization_params); 346 347 typedef void (*pytorch_q8gemm_dq_sparse_packedA_w16_ukernel_function)( 348 size_t mr, 349 size_t nr, 350 const uint8_t* a_packed, 351 const uint8_t* packed_w, 352 const uint16_t* w_row_ptr, 353 const uint16_t* w_block_ids_ptr, 354 const float* bias, 355 float* c, 356 size_t c_stride, 357 size_t output_channel_index, 358 const struct pytorch_qnnp_conv_dynamic_quantization_params* quantization_params); 359 360 typedef void (*pytorch_q8gemm_dq_sparse_packedA_w8_ukernel_function)( 361 size_t mr, 362 size_t nr, 363 const uint8_t* a_packed, 364 const uint8_t* packed_w, 365 const uint8_t* w_row_ptr, 366 const uint8_t* w_block_ids_ptr, 367 const float* bias, 368 float* c, 369 size_t c_stride, 370 size_t output_channel_index, 371 const struct pytorch_qnnp_conv_dynamic_quantization_params* quantization_params); 372 373 typedef void (*pytorch_q8gemm_sparse_packA_ukernel_function)( 374 const size_t mr, 375 const size_t K, 376 const uint8_t* a, 377 const size_t a_stride, 378 uint8_t* a_packed); 379 380 typedef void (*pytorch_q8conv_ukernel_function)( 381 size_t mr, 382 size_t nr, 383 size_t kc, 384 size_t ks, 385 const uint8_t** a, 386 const void* w, 387 uint8_t* c, 388 size_t c_stride, 389 size_t output_channel_index, 390 const union pytorch_qnnp_conv_quantization_params* quantization_params); 391 392 typedef void (*pytorch_q8gemm_xzp_ukernel_function)( 393 size_t mr, 394 size_t nr, 395 size_t k, 396 const uint8_t* a, 397 size_t a_stride, 398 const int32_t* a_sum, 399 const void* w, 400 uint8_t* c, 401 size_t c_stride, 402 const union pytorch_qnnp_q31_requantization_params* requantization_params); 403 404 typedef void (*pytorch_q8sum_rows_ukernel_function)( 405 const uint8_t* a, 406 size_t m, 407 size_t k, 408 size_t stride, 409 int32_t multiplier, 410 int32_t* sums); 411 412 typedef void (*pytorch_xzipc_ukernel_function)(size_t n, const void* x, void* y); 413 414 typedef void ( 415 *pytorch_xzipv_ukernel_function)(size_t n, size_t m, const void* x, void* y); 416 417 typedef void (*pytorch_x8lut_ukernel_function)( 418 size_t n, 419 const uint8_t* x, 420 const uint8_t* t, 421 uint8_t* y); 422 423 typedef void (*pytorch_sgemm_ukernel_function)( 424 size_t mr, 425 size_t nr, 426 size_t k, 427 const float* a, 428 size_t a_stride, 429 const float* w, 430 float* c, 431 size_t c_stride, 432 const struct pytorch_qnnp_fp32_clamping_params* clamping_params); 433 434 typedef void (*pytorch_sconv_ukernel_function)( 435 size_t mr, 436 size_t nr, 437 size_t kc, 438 size_t ks, 439 const float** a, 440 const float* w, 441 float* c, 442 size_t c_stride, 443 const struct pytorch_qnnp_fp32_clamping_params* clamping_params); 444 445 typedef void (*pytorch_hgemm_ukernel_function)( 446 size_t mr, 447 size_t nr, 448 size_t k, 449 const void* a, 450 size_t a_stride, 451 const void* w, 452 void* c, 453 size_t c_stride, 454 const struct pytorch_qnnp_fp16_clamping_params* clamping_params); 455 456 typedef void (*pytorch_q8dwconv2d_up_ukernel_function)( 457 size_t channels, 458 size_t output_width, 459 const uint8_t** input, 460 const void* weights, 461 uint8_t* output, 462 size_t input_stride, 463 size_t output_increment, 464 const union pytorch_qnnp_conv_quantization_params* quantization_params); 465 466 typedef void (*pytorch_q8dwconv2d_mp_ukernel_function)( 467 size_t channels, 468 size_t output_width, 469 const uint8_t** input, 470 const void* weights, 471 int32_t* buffer, 472 uint8_t* output, 473 size_t input_stride, 474 size_t output_increment, 475 const union pytorch_qnnp_conv_quantization_params* quantization_params); 476 477 typedef void (*pytorch_q8dwconv3d_mp_ukernel_function)( 478 size_t channels, 479 size_t output_height, 480 size_t output_width, 481 const uint8_t** input, 482 const void* weights, 483 int32_t* buffer, 484 uint8_t* output, 485 size_t input_row_stride, 486 size_t input_col_stride, 487 size_t output_increment, 488 const union pytorch_qnnp_conv_quantization_params* quantization_params); 489 490 typedef void (*pytorch_q8gavgpool_up_ukernel_function)( 491 size_t m, 492 size_t n, 493 const uint8_t* x, 494 size_t x_stride, 495 const uint8_t* zero, 496 uint8_t* y, 497 const union pytorch_qnnp_avgpool_quantization_params* quantization_params); 498 499 typedef void (*pytorch_q8gavgpool_mp_ukernel_function)( 500 size_t m, 501 size_t n, 502 const uint8_t* x, 503 size_t x_stride, 504 const uint8_t* zero, 505 int32_t* buffer, 506 uint8_t* y, 507 const union pytorch_qnnp_avgpool_quantization_params* quantization_params); 508 509 typedef void (*pytorch_q8avgpool_up_ukernel_function)( 510 size_t n, 511 size_t ks, 512 size_t kc, 513 const uint8_t** x, 514 const uint8_t* zero, 515 uint8_t* y, 516 size_t x_increment, 517 size_t y_increment, 518 const union pytorch_qnnp_avgpool_quantization_params* quantization_params); 519 520 typedef void (*pytorch_q8avgpool_mp_ukernel_function)( 521 size_t n, 522 size_t ks, 523 size_t kc, 524 const uint8_t** x, 525 const uint8_t* zero, 526 int32_t* buffer, 527 uint8_t* y, 528 size_t x_increment, 529 size_t y_increment, 530 const union pytorch_qnnp_avgpool_quantization_params* quantization_params); 531 532 typedef void (*pytorch_u8maxpool_ukernel_function)( 533 size_t n, 534 size_t ks, 535 size_t kc, 536 const uint8_t** x, 537 uint8_t* y, 538 size_t x_increment, 539 size_t y_increment, 540 const union pytorch_qnnp_u8_clamping_params* params); 541 542 typedef void (*pytorch_u8clamp_ukernel_function)( 543 size_t n, 544 const uint8_t* x, 545 uint8_t* y, 546 const union pytorch_qnnp_u8_clamping_params* params); 547 548 typedef uint8_t (*pytorch_u8rmax_ukernel_function)(size_t n, const uint8_t* x); 549 550 typedef void (*pytorch_u8lut32norm_ukernel_function)( 551 size_t n, 552 const uint8_t* x, 553 const uint32_t* t, 554 uint8_t* y); 555 556 typedef void (*pytorch_q8vadd_ukernel_function)( 557 size_t n, 558 const uint8_t* a, 559 const uint8_t* b, 560 uint8_t* y, 561 const union pytorch_qnnp_add_quantization_params* quantization_params); 562 563 struct pytorch_q8conv_parameters { 564 pytorch_q8gemm_ukernel_function gemm; 565 pytorch_q8conv_ukernel_function conv; 566 pytorch_q8gemm_dq_ukernel_function gemm_dq; 567 uint8_t mr; 568 uint8_t nr; 569 uint8_t kr; 570 }; 571 572 struct pytorch_q8gemm_sparse_parameters { 573 pytorch_q8gemm_dq_sparse_ukernel_function gemm_dq; 574 // w32, w16, and w8 refer to variants of the kernel which use uint32_t, 575 // uint16_t, and uint8_t datatype for row values/col indices respectively 576 pytorch_q8gemm_dq_sparse_packedA_w32_ukernel_function packedA_w32_gemm_dq; 577 pytorch_q8gemm_dq_sparse_packedA_w16_ukernel_function packedA_w16_gemm_dq; 578 pytorch_q8gemm_dq_sparse_packedA_w8_ukernel_function packedA_w8_gemm_dq; 579 pytorch_q8gemm_sparse_packA_ukernel_function packA; 580 uint8_t mr; 581 uint8_t nr; 582 uint8_t kr; 583 uint8_t log2_mr; 584 uint8_t log2_row_block_size; 585 uint32_t row_block_size; 586 uint32_t col_block_size; 587 }; 588 589 struct pytorch_q8conv_xzp_parameters { 590 pytorch_q8gemm_xzp_ukernel_function gemm; 591 /* no conv ukernel */ 592 uint8_t mr; 593 uint8_t nr; 594 uint8_t kr; 595 uint8_t kc; 596 size_t kthreshold; 597 }; 598 599 struct pytorch_q8dwconv2d_up_parameters { 600 pytorch_q8dwconv2d_up_ukernel_function updw; 601 pytorch_q8dwconv2d_up_ukernel_function updw_per_channel; 602 uint8_t cr; 603 }; 604 605 struct pytorch_q8dwconv2d_mp_parameters { 606 pytorch_q8dwconv2d_mp_ukernel_function mpdw; 607 pytorch_q8dwconv2d_mp_ukernel_function mpdw_per_channel; 608 uint8_t cr; 609 }; 610 611 struct pytorch_q8dwconv3d_mp_parameters { 612 pytorch_q8dwconv3d_mp_ukernel_function mpdw; 613 uint8_t cr; 614 }; 615 616 struct pytorch_q8sum_rows_parameters { 617 pytorch_q8sum_rows_ukernel_function sum_rows; 618 uint32_t m; 619 }; 620 621 struct pytorch_q8gavgpool_parameters { 622 pytorch_q8gavgpool_up_ukernel_function ltnr; 623 pytorch_q8gavgpool_up_ukernel_function genr_lemr; 624 pytorch_q8gavgpool_mp_ukernel_function genr_gtmr; 625 uint8_t mr; 626 uint8_t nr; 627 }; 628 629 struct pytorch_q8avgpool_parameters { 630 pytorch_q8avgpool_up_ukernel_function ltkr; 631 pytorch_q8avgpool_up_ukernel_function gekr_lemr; 632 pytorch_q8avgpool_mp_ukernel_function gekr_gtmr; 633 uint8_t mr; 634 uint8_t qr; 635 uint8_t kr; 636 }; 637 638 struct pytorch_u8maxpool_parameters { 639 pytorch_u8maxpool_ukernel_function ltkr; 640 pytorch_u8maxpool_ukernel_function gekr; 641 uint8_t mr; 642 uint8_t qr; 643 uint8_t kr; 644 }; 645 646 struct pytorch_x8zip_parameters { 647 pytorch_xzipc_ukernel_function x2; 648 pytorch_xzipc_ukernel_function x3; 649 pytorch_xzipc_ukernel_function x4; 650 pytorch_xzipv_ukernel_function xm; 651 }; 652 653 struct pytorch_qnnp_parameters { 654 struct pytorch_q8conv_parameters q8conv; 655 struct pytorch_q8gemm_sparse_parameters q8gemm_sparse_c1x4; 656 struct pytorch_q8gemm_sparse_parameters q8gemm_sparse_c8x1; 657 struct pytorch_q8conv_xzp_parameters q8conv_xzp; 658 struct pytorch_q8dwconv2d_up_parameters q8dw9; 659 struct pytorch_q8dwconv2d_mp_parameters q8dw25; 660 struct pytorch_q8dwconv3d_mp_parameters q8dw27; 661 struct pytorch_q8sum_rows_parameters q8sum_rows; 662 pytorch_q8vadd_ukernel_function q8vadd; 663 struct pytorch_q8gavgpool_parameters q8gavgpool; 664 struct pytorch_q8avgpool_parameters q8avgpool; 665 struct pytorch_u8maxpool_parameters u8maxpool; 666 pytorch_u8lut32norm_ukernel_function u8lut32norm; 667 pytorch_u8clamp_ukernel_function u8clamp; 668 pytorch_u8rmax_ukernel_function u8rmax; 669 struct pytorch_x8zip_parameters x8zip; 670 pytorch_x8lut_ukernel_function x8lut; 671 bool initialized; 672 }; 673 674 #ifdef __cplusplus 675 extern "C" { 676 #endif 677 678 extern struct pytorch_qnnp_parameters pytorch_qnnp_params; 679 680 #ifdef __cplusplus 681 } 682 #endif 683