1*4bdc9457SAndroid Build Coastguard Worker // Copyright 2019 Google LLC 2*4bdc9457SAndroid Build Coastguard Worker // 3*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the 4*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree. 5*4bdc9457SAndroid Build Coastguard Worker 6*4bdc9457SAndroid Build Coastguard Worker #pragma once 7*4bdc9457SAndroid Build Coastguard Worker 8*4bdc9457SAndroid Build Coastguard Worker 9*4bdc9457SAndroid Build Coastguard Worker #include <stddef.h> 10*4bdc9457SAndroid Build Coastguard Worker #include <stdint.h> 11*4bdc9457SAndroid Build Coastguard Worker 12*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack.h> 13*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/common.h> 14*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/math.h> 15*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/params.h> 16*4bdc9457SAndroid Build Coastguard Worker 17*4bdc9457SAndroid Build Coastguard Worker 18*4bdc9457SAndroid Build Coastguard Worker enum xnn_parallelization_type { 19*4bdc9457SAndroid Build Coastguard Worker xnn_parallelization_type_invalid = 0, 20*4bdc9457SAndroid Build Coastguard Worker xnn_parallelization_type_1d, 21*4bdc9457SAndroid Build Coastguard Worker xnn_parallelization_type_1d_tile_1d, 22*4bdc9457SAndroid Build Coastguard Worker xnn_parallelization_type_2d, 23*4bdc9457SAndroid Build Coastguard Worker xnn_parallelization_type_2d_tile_1d, 24*4bdc9457SAndroid Build Coastguard Worker xnn_parallelization_type_2d_tile_2d, 25*4bdc9457SAndroid Build Coastguard Worker xnn_parallelization_type_3d, 26*4bdc9457SAndroid Build Coastguard Worker xnn_parallelization_type_3d_tile_2d, 27*4bdc9457SAndroid Build Coastguard Worker xnn_parallelization_type_4d, 28*4bdc9457SAndroid Build Coastguard Worker xnn_parallelization_type_4d_tile_2d, 29*4bdc9457SAndroid Build Coastguard Worker xnn_parallelization_type_5d, 30*4bdc9457SAndroid Build Coastguard Worker xnn_parallelization_type_5d_tile_2d, 31*4bdc9457SAndroid Build Coastguard Worker xnn_parallelization_type_6d_tile_2d, 32*4bdc9457SAndroid Build Coastguard Worker #if XNN_MAX_UARCH_TYPES > 1 33*4bdc9457SAndroid Build Coastguard Worker xnn_parallelization_type_2d_tile_2d_with_uarch, 34*4bdc9457SAndroid Build Coastguard Worker xnn_parallelization_type_3d_tile_2d_with_uarch, 35*4bdc9457SAndroid Build Coastguard Worker xnn_parallelization_type_4d_tile_2d_with_uarch, 36*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_MAX_UARCH_TYPES > 1 37*4bdc9457SAndroid Build Coastguard Worker }; 38*4bdc9457SAndroid Build Coastguard Worker 39*4bdc9457SAndroid Build Coastguard Worker struct compute_parameters { 40*4bdc9457SAndroid Build Coastguard Worker enum xnn_parallelization_type type; 41*4bdc9457SAndroid Build Coastguard Worker union { 42*4bdc9457SAndroid Build Coastguard Worker pthreadpool_task_1d_t task_1d; 43*4bdc9457SAndroid Build Coastguard Worker pthreadpool_task_1d_tile_1d_t task_1d_tile_1d; 44*4bdc9457SAndroid Build Coastguard Worker pthreadpool_task_2d_t task_2d; 45*4bdc9457SAndroid Build Coastguard Worker pthreadpool_task_2d_tile_1d_t task_2d_tile_1d; 46*4bdc9457SAndroid Build Coastguard Worker pthreadpool_task_2d_tile_2d_t task_2d_tile_2d; 47*4bdc9457SAndroid Build Coastguard Worker pthreadpool_task_3d_t task_3d; 48*4bdc9457SAndroid Build Coastguard Worker pthreadpool_task_3d_tile_2d_t task_3d_tile_2d; 49*4bdc9457SAndroid Build Coastguard Worker pthreadpool_task_4d_t task_4d; 50*4bdc9457SAndroid Build Coastguard Worker pthreadpool_task_4d_tile_2d_t task_4d_tile_2d; 51*4bdc9457SAndroid Build Coastguard Worker pthreadpool_task_5d_t task_5d; 52*4bdc9457SAndroid Build Coastguard Worker pthreadpool_task_5d_tile_2d_t task_5d_tile_2d; 53*4bdc9457SAndroid Build Coastguard Worker pthreadpool_task_6d_tile_2d_t task_6d_tile_2d; 54*4bdc9457SAndroid Build Coastguard Worker #if XNN_MAX_UARCH_TYPES > 1 55*4bdc9457SAndroid Build Coastguard Worker pthreadpool_task_2d_tile_2d_with_id_t task_2d_tile_2d_with_id; 56*4bdc9457SAndroid Build Coastguard Worker pthreadpool_task_3d_tile_2d_with_id_t task_3d_tile_2d_with_id; 57*4bdc9457SAndroid Build Coastguard Worker pthreadpool_task_4d_tile_2d_with_id_t task_4d_tile_2d_with_id; 58*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_MAX_UARCH_TYPES > 1 59*4bdc9457SAndroid Build Coastguard Worker }; 60*4bdc9457SAndroid Build Coastguard Worker size_t range[6]; 61*4bdc9457SAndroid Build Coastguard Worker size_t tile[2]; 62*4bdc9457SAndroid Build Coastguard Worker }; 63*4bdc9457SAndroid Build Coastguard Worker 64*4bdc9457SAndroid Build Coastguard Worker struct transpose_context { 65*4bdc9457SAndroid Build Coastguard Worker const void* x; 66*4bdc9457SAndroid Build Coastguard Worker void* y; 67*4bdc9457SAndroid Build Coastguard Worker union { 68*4bdc9457SAndroid Build Coastguard Worker xnn_transposec_ukernel_function const_size_ukernel; 69*4bdc9457SAndroid Build Coastguard Worker xnn_transposev_ukernel_function variable_size_ukernel; 70*4bdc9457SAndroid Build Coastguard Worker }; 71*4bdc9457SAndroid Build Coastguard Worker union { 72*4bdc9457SAndroid Build Coastguard Worker size_t element_size; 73*4bdc9457SAndroid Build Coastguard Worker size_t log2_element_size; 74*4bdc9457SAndroid Build Coastguard Worker }; 75*4bdc9457SAndroid Build Coastguard Worker size_t input_stride[XNN_MAX_TENSOR_DIMS]; 76*4bdc9457SAndroid Build Coastguard Worker size_t output_stride[XNN_MAX_TENSOR_DIMS]; 77*4bdc9457SAndroid Build Coastguard Worker }; 78*4bdc9457SAndroid Build Coastguard Worker 79*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_transposec_2d( 80*4bdc9457SAndroid Build Coastguard Worker const struct transpose_context* context, 81*4bdc9457SAndroid Build Coastguard Worker size_t i, 82*4bdc9457SAndroid Build Coastguard Worker size_t j, 83*4bdc9457SAndroid Build Coastguard Worker size_t tile_i, 84*4bdc9457SAndroid Build Coastguard Worker size_t tile_j); 85*4bdc9457SAndroid Build Coastguard Worker 86*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_transposec_3d( 87*4bdc9457SAndroid Build Coastguard Worker const struct transpose_context* context, 88*4bdc9457SAndroid Build Coastguard Worker size_t i, 89*4bdc9457SAndroid Build Coastguard Worker size_t j, 90*4bdc9457SAndroid Build Coastguard Worker size_t k, 91*4bdc9457SAndroid Build Coastguard Worker size_t tile_j, 92*4bdc9457SAndroid Build Coastguard Worker size_t tile_k); 93*4bdc9457SAndroid Build Coastguard Worker 94*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_transposec_4d( 95*4bdc9457SAndroid Build Coastguard Worker const struct transpose_context* context, 96*4bdc9457SAndroid Build Coastguard Worker size_t i, 97*4bdc9457SAndroid Build Coastguard Worker size_t j, 98*4bdc9457SAndroid Build Coastguard Worker size_t k, 99*4bdc9457SAndroid Build Coastguard Worker size_t l, 100*4bdc9457SAndroid Build Coastguard Worker size_t tile_k, 101*4bdc9457SAndroid Build Coastguard Worker size_t tile_l); 102*4bdc9457SAndroid Build Coastguard Worker 103*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_transposec_5d( 104*4bdc9457SAndroid Build Coastguard Worker const struct transpose_context* context, 105*4bdc9457SAndroid Build Coastguard Worker size_t i, 106*4bdc9457SAndroid Build Coastguard Worker size_t j, 107*4bdc9457SAndroid Build Coastguard Worker size_t k, 108*4bdc9457SAndroid Build Coastguard Worker size_t l, 109*4bdc9457SAndroid Build Coastguard Worker size_t m, 110*4bdc9457SAndroid Build Coastguard Worker size_t tile_l, 111*4bdc9457SAndroid Build Coastguard Worker size_t tile_m); 112*4bdc9457SAndroid Build Coastguard Worker 113*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_transposec_6d( 114*4bdc9457SAndroid Build Coastguard Worker const struct transpose_context* context, 115*4bdc9457SAndroid Build Coastguard Worker size_t i, 116*4bdc9457SAndroid Build Coastguard Worker size_t j, 117*4bdc9457SAndroid Build Coastguard Worker size_t k, 118*4bdc9457SAndroid Build Coastguard Worker size_t l, 119*4bdc9457SAndroid Build Coastguard Worker size_t m, 120*4bdc9457SAndroid Build Coastguard Worker size_t n, 121*4bdc9457SAndroid Build Coastguard Worker size_t tile_m, 122*4bdc9457SAndroid Build Coastguard Worker size_t tile_n); 123*4bdc9457SAndroid Build Coastguard Worker 124*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_transposev_2d( 125*4bdc9457SAndroid Build Coastguard Worker const struct transpose_context* context, 126*4bdc9457SAndroid Build Coastguard Worker size_t i, 127*4bdc9457SAndroid Build Coastguard Worker size_t j, 128*4bdc9457SAndroid Build Coastguard Worker size_t tile_i, 129*4bdc9457SAndroid Build Coastguard Worker size_t tile_j); 130*4bdc9457SAndroid Build Coastguard Worker 131*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_transposev_3d( 132*4bdc9457SAndroid Build Coastguard Worker const struct transpose_context* context, 133*4bdc9457SAndroid Build Coastguard Worker size_t i, 134*4bdc9457SAndroid Build Coastguard Worker size_t j, 135*4bdc9457SAndroid Build Coastguard Worker size_t k, 136*4bdc9457SAndroid Build Coastguard Worker size_t tile_j, 137*4bdc9457SAndroid Build Coastguard Worker size_t tile_k); 138*4bdc9457SAndroid Build Coastguard Worker 139*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_transposev_4d( 140*4bdc9457SAndroid Build Coastguard Worker const struct transpose_context* context, 141*4bdc9457SAndroid Build Coastguard Worker size_t i, 142*4bdc9457SAndroid Build Coastguard Worker size_t j, 143*4bdc9457SAndroid Build Coastguard Worker size_t k, 144*4bdc9457SAndroid Build Coastguard Worker size_t l, 145*4bdc9457SAndroid Build Coastguard Worker size_t tile_k, 146*4bdc9457SAndroid Build Coastguard Worker size_t tile_l); 147*4bdc9457SAndroid Build Coastguard Worker 148*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_transposev_5d( 149*4bdc9457SAndroid Build Coastguard Worker const struct transpose_context* context, 150*4bdc9457SAndroid Build Coastguard Worker size_t i, 151*4bdc9457SAndroid Build Coastguard Worker size_t j, 152*4bdc9457SAndroid Build Coastguard Worker size_t k, 153*4bdc9457SAndroid Build Coastguard Worker size_t l, 154*4bdc9457SAndroid Build Coastguard Worker size_t m, 155*4bdc9457SAndroid Build Coastguard Worker size_t tile_l, 156*4bdc9457SAndroid Build Coastguard Worker size_t tile_m); 157*4bdc9457SAndroid Build Coastguard Worker 158*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_transposev_6d( 159*4bdc9457SAndroid Build Coastguard Worker const struct transpose_context* context, 160*4bdc9457SAndroid Build Coastguard Worker size_t i, 161*4bdc9457SAndroid Build Coastguard Worker size_t j, 162*4bdc9457SAndroid Build Coastguard Worker size_t k, 163*4bdc9457SAndroid Build Coastguard Worker size_t l, 164*4bdc9457SAndroid Build Coastguard Worker size_t m, 165*4bdc9457SAndroid Build Coastguard Worker size_t n, 166*4bdc9457SAndroid Build Coastguard Worker size_t tile_m, 167*4bdc9457SAndroid Build Coastguard Worker size_t tile_n); 168*4bdc9457SAndroid Build Coastguard Worker 169*4bdc9457SAndroid Build Coastguard Worker struct gemm_context { 170*4bdc9457SAndroid Build Coastguard Worker size_t k_scaled; 171*4bdc9457SAndroid Build Coastguard Worker const void* a; 172*4bdc9457SAndroid Build Coastguard Worker size_t a_stride; 173*4bdc9457SAndroid Build Coastguard Worker const void* packed_w; 174*4bdc9457SAndroid Build Coastguard Worker size_t w_stride; 175*4bdc9457SAndroid Build Coastguard Worker size_t wg_stride; 176*4bdc9457SAndroid Build Coastguard Worker void* c; 177*4bdc9457SAndroid Build Coastguard Worker size_t cm_stride; 178*4bdc9457SAndroid Build Coastguard Worker size_t cn_stride; 179*4bdc9457SAndroid Build Coastguard Worker size_t cg_stride; 180*4bdc9457SAndroid Build Coastguard Worker uint32_t log2_csize; 181*4bdc9457SAndroid Build Coastguard Worker struct xnn_hmp_gemm_ukernel ukernel; 182*4bdc9457SAndroid Build Coastguard Worker void* fused_params; 183*4bdc9457SAndroid Build Coastguard Worker union { 184*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_conv_minmax_params qs8; 185*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_conv_minmax_params qu8; 186*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_scaleminmax_params f16; 187*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_minmax_params f32; 188*4bdc9457SAndroid Build Coastguard Worker } params; 189*4bdc9457SAndroid Build Coastguard Worker }; 190*4bdc9457SAndroid Build Coastguard Worker 191*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 192*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_grouped_gemm( 193*4bdc9457SAndroid Build Coastguard Worker const struct gemm_context context[restrict XNN_MIN_ELEMENTS(1)], 194*4bdc9457SAndroid Build Coastguard Worker size_t group_index, 195*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_start, 196*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_start, 197*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_size, 198*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_size); 199*4bdc9457SAndroid Build Coastguard Worker 200*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_gemm( 201*4bdc9457SAndroid Build Coastguard Worker const struct gemm_context context[restrict XNN_MIN_ELEMENTS(1)], 202*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_start, 203*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_start, 204*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_size, 205*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_size); 206*4bdc9457SAndroid Build Coastguard Worker 207*4bdc9457SAndroid Build Coastguard Worker #if XNN_MAX_UARCH_TYPES > 1 208*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_hmp_grouped_gemm( 209*4bdc9457SAndroid Build Coastguard Worker const struct gemm_context context[restrict XNN_MIN_ELEMENTS(1)], 210*4bdc9457SAndroid Build Coastguard Worker uint32_t uarch_index, 211*4bdc9457SAndroid Build Coastguard Worker size_t group_index, 212*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_start, 213*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_start, 214*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_size, 215*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_size); 216*4bdc9457SAndroid Build Coastguard Worker 217*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_hmp_gemm( 218*4bdc9457SAndroid Build Coastguard Worker const struct gemm_context context[restrict XNN_MIN_ELEMENTS(1)], 219*4bdc9457SAndroid Build Coastguard Worker uint32_t uarch_index, 220*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_start, 221*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_start, 222*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_size, 223*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_size); 224*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_MAX_UARCH_TYPES > 1 225*4bdc9457SAndroid Build Coastguard Worker #endif 226*4bdc9457SAndroid Build Coastguard Worker 227*4bdc9457SAndroid Build Coastguard Worker // Context for Sparse Matrix-Dense Matrix Multiplication. 228*4bdc9457SAndroid Build Coastguard Worker // C [MxN] := A [MxK] * B [KxN] + bias [N] 229*4bdc9457SAndroid Build Coastguard Worker // A and C are dense matrices with row-major storage, B is a sparse matrix. 230*4bdc9457SAndroid Build Coastguard Worker struct spmm_context { 231*4bdc9457SAndroid Build Coastguard Worker // N dimension of the B and C matrices. 232*4bdc9457SAndroid Build Coastguard Worker // Corresponds to number of output channels in 1x1 convolution. 233*4bdc9457SAndroid Build Coastguard Worker size_t n; 234*4bdc9457SAndroid Build Coastguard Worker // M dimension of the A and C matrices, pre-scaled by sizeof(element size). 235*4bdc9457SAndroid Build Coastguard Worker // Corresponds to the stride, in bytes, between adjacent rows of C matrix. 236*4bdc9457SAndroid Build Coastguard Worker size_t scaled_m; 237*4bdc9457SAndroid Build Coastguard Worker // Input matrix A. 238*4bdc9457SAndroid Build Coastguard Worker const void* input; 239*4bdc9457SAndroid Build Coastguard Worker // Packed bias elements and non-zero filter elements. 240*4bdc9457SAndroid Build Coastguard Worker const void* nonzero_weights; 241*4bdc9457SAndroid Build Coastguard Worker // Input pointer increments, in bytes, after each processed non-zero weight. 242*4bdc9457SAndroid Build Coastguard Worker const int32_t* input_increments; 243*4bdc9457SAndroid Build Coastguard Worker // Number of non-zero filter elements per each N (output channel) dimension. 244*4bdc9457SAndroid Build Coastguard Worker const uint32_t* output_channel_nonzeros; 245*4bdc9457SAndroid Build Coastguard Worker // Output matrix C. 246*4bdc9457SAndroid Build Coastguard Worker void* output; 247*4bdc9457SAndroid Build Coastguard Worker // Stride, in bytes, between matrices A corresponding to different images in batched 1x1 Convolution 248*4bdc9457SAndroid Build Coastguard Worker size_t batched_input_stride; 249*4bdc9457SAndroid Build Coastguard Worker // Stride, in bytes, between matrices C corresponding to different images in batched 1x1 Convolution 250*4bdc9457SAndroid Build Coastguard Worker size_t batched_output_stride; 251*4bdc9457SAndroid Build Coastguard Worker // Micro-kernel function pointer. 252*4bdc9457SAndroid Build Coastguard Worker xnn_spmm_ukernel_function ukernel; 253*4bdc9457SAndroid Build Coastguard Worker // Output activation parameters. 254*4bdc9457SAndroid Build Coastguard Worker union { 255*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_minmax_params f32; 256*4bdc9457SAndroid Build Coastguard Worker } params; 257*4bdc9457SAndroid Build Coastguard Worker }; 258*4bdc9457SAndroid Build Coastguard Worker 259*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 260*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_spmm( 261*4bdc9457SAndroid Build Coastguard Worker const struct spmm_context context[restrict XNN_MIN_ELEMENTS(1)], 262*4bdc9457SAndroid Build Coastguard Worker size_t batch_index, 263*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_start, 264*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_size); 265*4bdc9457SAndroid Build Coastguard Worker #endif 266*4bdc9457SAndroid Build Coastguard Worker 267*4bdc9457SAndroid Build Coastguard Worker struct igemm_context { 268*4bdc9457SAndroid Build Coastguard Worker size_t ks; 269*4bdc9457SAndroid Build Coastguard Worker size_t ks_scaled; 270*4bdc9457SAndroid Build Coastguard Worker size_t kc; 271*4bdc9457SAndroid Build Coastguard Worker size_t w_stride; 272*4bdc9457SAndroid Build Coastguard Worker const void** indirect_a; 273*4bdc9457SAndroid Build Coastguard Worker size_t a_offset; 274*4bdc9457SAndroid Build Coastguard Worker void* zero; 275*4bdc9457SAndroid Build Coastguard Worker const void* packed_w; 276*4bdc9457SAndroid Build Coastguard Worker void* c; 277*4bdc9457SAndroid Build Coastguard Worker size_t cm_stride; 278*4bdc9457SAndroid Build Coastguard Worker size_t cn_stride; 279*4bdc9457SAndroid Build Coastguard Worker size_t ga_stride; 280*4bdc9457SAndroid Build Coastguard Worker size_t gw_stride; 281*4bdc9457SAndroid Build Coastguard Worker size_t gc_stride; 282*4bdc9457SAndroid Build Coastguard Worker size_t ba_stride; 283*4bdc9457SAndroid Build Coastguard Worker size_t bc_stride; 284*4bdc9457SAndroid Build Coastguard Worker uint32_t log2_csize; 285*4bdc9457SAndroid Build Coastguard Worker struct xnn_hmp_igemm_ukernel ukernel; 286*4bdc9457SAndroid Build Coastguard Worker union { 287*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_conv_minmax_params qs8; 288*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_conv_minmax_params qu8; 289*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_scaleminmax_params f16; 290*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_minmax_params f32; 291*4bdc9457SAndroid Build Coastguard Worker } params; 292*4bdc9457SAndroid Build Coastguard Worker }; 293*4bdc9457SAndroid Build Coastguard Worker 294*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 295*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_grouped_igemm( 296*4bdc9457SAndroid Build Coastguard Worker const struct igemm_context context[restrict XNN_MIN_ELEMENTS(1)], 297*4bdc9457SAndroid Build Coastguard Worker size_t group_index, 298*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_start, 299*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_start, 300*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_size, 301*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_size); 302*4bdc9457SAndroid Build Coastguard Worker 303*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_grouped_batch_igemm( 304*4bdc9457SAndroid Build Coastguard Worker const struct igemm_context context[restrict XNN_MIN_ELEMENTS(1)], 305*4bdc9457SAndroid Build Coastguard Worker size_t batch_index, 306*4bdc9457SAndroid Build Coastguard Worker size_t group_index, 307*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_start, 308*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_start, 309*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_size, 310*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_size); 311*4bdc9457SAndroid Build Coastguard Worker 312*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_igemm( 313*4bdc9457SAndroid Build Coastguard Worker const struct igemm_context context[restrict XNN_MIN_ELEMENTS(1)], 314*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_start, 315*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_start, 316*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_size, 317*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_size); 318*4bdc9457SAndroid Build Coastguard Worker 319*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_batch_igemm( 320*4bdc9457SAndroid Build Coastguard Worker const struct igemm_context context[restrict XNN_MIN_ELEMENTS(1)], 321*4bdc9457SAndroid Build Coastguard Worker size_t batch_index, 322*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_start, 323*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_start, 324*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_size, 325*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_size); 326*4bdc9457SAndroid Build Coastguard Worker 327*4bdc9457SAndroid Build Coastguard Worker #if XNN_MAX_UARCH_TYPES > 1 328*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_hmp_grouped_igemm( 329*4bdc9457SAndroid Build Coastguard Worker const struct igemm_context context[restrict XNN_MIN_ELEMENTS(1)], 330*4bdc9457SAndroid Build Coastguard Worker uint32_t uarch_index, 331*4bdc9457SAndroid Build Coastguard Worker size_t group_index, 332*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_start, 333*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_start, 334*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_size, 335*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_size); 336*4bdc9457SAndroid Build Coastguard Worker 337*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_hmp_grouped_batch_igemm( 338*4bdc9457SAndroid Build Coastguard Worker const struct igemm_context context[restrict XNN_MIN_ELEMENTS(1)], 339*4bdc9457SAndroid Build Coastguard Worker uint32_t uarch_index, 340*4bdc9457SAndroid Build Coastguard Worker size_t batch_index, 341*4bdc9457SAndroid Build Coastguard Worker size_t group_index, 342*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_start, 343*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_start, 344*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_size, 345*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_size); 346*4bdc9457SAndroid Build Coastguard Worker 347*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_hmp_igemm( 348*4bdc9457SAndroid Build Coastguard Worker const struct igemm_context context[restrict XNN_MIN_ELEMENTS(1)], 349*4bdc9457SAndroid Build Coastguard Worker uint32_t uarch_index, 350*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_start, 351*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_start, 352*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_size, 353*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_size); 354*4bdc9457SAndroid Build Coastguard Worker 355*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_batch_hmp_igemm( 356*4bdc9457SAndroid Build Coastguard Worker const struct igemm_context context[restrict XNN_MIN_ELEMENTS(1)], 357*4bdc9457SAndroid Build Coastguard Worker uint32_t uarch_index, 358*4bdc9457SAndroid Build Coastguard Worker size_t batch_index, 359*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_start, 360*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_start, 361*4bdc9457SAndroid Build Coastguard Worker size_t mr_block_size, 362*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_size); 363*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_MAX_UARCH_TYPES > 1 364*4bdc9457SAndroid Build Coastguard Worker #endif 365*4bdc9457SAndroid Build Coastguard Worker 366*4bdc9457SAndroid Build Coastguard Worker struct subgemm_context { 367*4bdc9457SAndroid Build Coastguard Worker const struct subconvolution_params* subconvolution_params; 368*4bdc9457SAndroid Build Coastguard Worker size_t kc; 369*4bdc9457SAndroid Build Coastguard Worker const void* a; 370*4bdc9457SAndroid Build Coastguard Worker size_t ax_stride; 371*4bdc9457SAndroid Build Coastguard Worker size_t ay_stride; 372*4bdc9457SAndroid Build Coastguard Worker size_t cx_stride; 373*4bdc9457SAndroid Build Coastguard Worker size_t cy_stride; 374*4bdc9457SAndroid Build Coastguard Worker size_t cn_stride; 375*4bdc9457SAndroid Build Coastguard Worker size_t ga_stride; 376*4bdc9457SAndroid Build Coastguard Worker size_t gw_stride; 377*4bdc9457SAndroid Build Coastguard Worker size_t gc_stride; 378*4bdc9457SAndroid Build Coastguard Worker size_t ba_stride; 379*4bdc9457SAndroid Build Coastguard Worker size_t bc_stride; 380*4bdc9457SAndroid Build Coastguard Worker uint32_t log2_csize; 381*4bdc9457SAndroid Build Coastguard Worker struct xnn_hmp_gemm_ukernel ukernel; 382*4bdc9457SAndroid Build Coastguard Worker union { 383*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_conv_minmax_params qs8; 384*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_conv_minmax_params qu8; 385*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_scaleminmax_params f16; 386*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_minmax_params f32; 387*4bdc9457SAndroid Build Coastguard Worker } params; 388*4bdc9457SAndroid Build Coastguard Worker }; 389*4bdc9457SAndroid Build Coastguard Worker 390*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 391*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_grouped_subgemm2d( 392*4bdc9457SAndroid Build Coastguard Worker const struct subgemm_context context[restrict XNN_MIN_ELEMENTS(1)], 393*4bdc9457SAndroid Build Coastguard Worker size_t batch_index, 394*4bdc9457SAndroid Build Coastguard Worker size_t group_index, 395*4bdc9457SAndroid Build Coastguard Worker size_t subkernel_index, 396*4bdc9457SAndroid Build Coastguard Worker size_t slice_y, 397*4bdc9457SAndroid Build Coastguard Worker size_t slice_x_start, 398*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_start, 399*4bdc9457SAndroid Build Coastguard Worker size_t slice_x_max, 400*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_size); 401*4bdc9457SAndroid Build Coastguard Worker 402*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_subgemm2d( 403*4bdc9457SAndroid Build Coastguard Worker const struct subgemm_context context[restrict XNN_MIN_ELEMENTS(1)], 404*4bdc9457SAndroid Build Coastguard Worker size_t batch_index, 405*4bdc9457SAndroid Build Coastguard Worker size_t subkernel_index, 406*4bdc9457SAndroid Build Coastguard Worker size_t slice_y, 407*4bdc9457SAndroid Build Coastguard Worker size_t slice_x_start, 408*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_start, 409*4bdc9457SAndroid Build Coastguard Worker size_t slice_x_max, 410*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_size); 411*4bdc9457SAndroid Build Coastguard Worker #endif 412*4bdc9457SAndroid Build Coastguard Worker 413*4bdc9457SAndroid Build Coastguard Worker struct subconv_context { 414*4bdc9457SAndroid Build Coastguard Worker const struct subconvolution_params* subconvolution_params; 415*4bdc9457SAndroid Build Coastguard Worker size_t kc; 416*4bdc9457SAndroid Build Coastguard Worker size_t a_offset; 417*4bdc9457SAndroid Build Coastguard Worker void* zero; 418*4bdc9457SAndroid Build Coastguard Worker size_t cx_stride; 419*4bdc9457SAndroid Build Coastguard Worker size_t cy_stride; 420*4bdc9457SAndroid Build Coastguard Worker size_t cn_stride; 421*4bdc9457SAndroid Build Coastguard Worker size_t ga_stride; 422*4bdc9457SAndroid Build Coastguard Worker size_t gw_stride; 423*4bdc9457SAndroid Build Coastguard Worker size_t gc_stride; 424*4bdc9457SAndroid Build Coastguard Worker size_t ba_stride; 425*4bdc9457SAndroid Build Coastguard Worker size_t bc_stride; 426*4bdc9457SAndroid Build Coastguard Worker uint32_t log2_csize; 427*4bdc9457SAndroid Build Coastguard Worker struct xnn_hmp_igemm_ukernel ukernel; 428*4bdc9457SAndroid Build Coastguard Worker union { 429*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_conv_minmax_params qs8; 430*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_conv_minmax_params qu8; 431*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_scaleminmax_params f16; 432*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_minmax_params f32; 433*4bdc9457SAndroid Build Coastguard Worker } params; 434*4bdc9457SAndroid Build Coastguard Worker }; 435*4bdc9457SAndroid Build Coastguard Worker 436*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 437*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_grouped_subconv2d( 438*4bdc9457SAndroid Build Coastguard Worker const struct subconv_context context[restrict XNN_MIN_ELEMENTS(1)], 439*4bdc9457SAndroid Build Coastguard Worker size_t batch_index, 440*4bdc9457SAndroid Build Coastguard Worker size_t group_index, 441*4bdc9457SAndroid Build Coastguard Worker size_t subkernel_index, 442*4bdc9457SAndroid Build Coastguard Worker size_t slice_y, 443*4bdc9457SAndroid Build Coastguard Worker size_t slice_x_start, 444*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_start, 445*4bdc9457SAndroid Build Coastguard Worker size_t slice_x_max, 446*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_size); 447*4bdc9457SAndroid Build Coastguard Worker 448*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_subconv2d( 449*4bdc9457SAndroid Build Coastguard Worker const struct subconv_context context[restrict XNN_MIN_ELEMENTS(1)], 450*4bdc9457SAndroid Build Coastguard Worker size_t batch_index, 451*4bdc9457SAndroid Build Coastguard Worker size_t subkernel_index, 452*4bdc9457SAndroid Build Coastguard Worker size_t slice_y, 453*4bdc9457SAndroid Build Coastguard Worker size_t slice_x_start, 454*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_start, 455*4bdc9457SAndroid Build Coastguard Worker size_t slice_x_max, 456*4bdc9457SAndroid Build Coastguard Worker size_t nr_block_size); 457*4bdc9457SAndroid Build Coastguard Worker #endif 458*4bdc9457SAndroid Build Coastguard Worker 459*4bdc9457SAndroid Build Coastguard Worker struct conv2d_context { 460*4bdc9457SAndroid Build Coastguard Worker size_t input_height; 461*4bdc9457SAndroid Build Coastguard Worker size_t input_width; 462*4bdc9457SAndroid Build Coastguard Worker const void* input; 463*4bdc9457SAndroid Build Coastguard Worker size_t input_batch_stride; 464*4bdc9457SAndroid Build Coastguard Worker const void* zero; 465*4bdc9457SAndroid Build Coastguard Worker const void* packed_weights; 466*4bdc9457SAndroid Build Coastguard Worker void* output; 467*4bdc9457SAndroid Build Coastguard Worker size_t output_batch_stride; 468*4bdc9457SAndroid Build Coastguard Worker size_t input_padding_top; 469*4bdc9457SAndroid Build Coastguard Worker size_t output_channels; 470*4bdc9457SAndroid Build Coastguard Worker size_t output_height_stride; 471*4bdc9457SAndroid Build Coastguard Worker size_t output_channel_stride; 472*4bdc9457SAndroid Build Coastguard Worker union { 473*4bdc9457SAndroid Build Coastguard Worker xnn_conv_hwc2chw_ukernel_function hwc2chw_ukernel; 474*4bdc9457SAndroid Build Coastguard Worker }; 475*4bdc9457SAndroid Build Coastguard Worker union { 476*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_minmax_params f32; 477*4bdc9457SAndroid Build Coastguard Worker } params; 478*4bdc9457SAndroid Build Coastguard Worker }; 479*4bdc9457SAndroid Build Coastguard Worker 480*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 481*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_conv2d_hwc2chw( 482*4bdc9457SAndroid Build Coastguard Worker const struct conv2d_context context[restrict XNN_MIN_ELEMENTS(1)], 483*4bdc9457SAndroid Build Coastguard Worker size_t batch_index, 484*4bdc9457SAndroid Build Coastguard Worker size_t output_y_start, 485*4bdc9457SAndroid Build Coastguard Worker size_t output_y_slice); 486*4bdc9457SAndroid Build Coastguard Worker #endif 487*4bdc9457SAndroid Build Coastguard Worker 488*4bdc9457SAndroid Build Coastguard Worker struct dwconv_context { 489*4bdc9457SAndroid Build Coastguard Worker const void** indirect_input; 490*4bdc9457SAndroid Build Coastguard Worker size_t indirect_input_width_stride; 491*4bdc9457SAndroid Build Coastguard Worker size_t indirect_input_height_stride; 492*4bdc9457SAndroid Build Coastguard Worker size_t input_offset; 493*4bdc9457SAndroid Build Coastguard Worker size_t input_batch_stride; 494*4bdc9457SAndroid Build Coastguard Worker const void* packed_weights; 495*4bdc9457SAndroid Build Coastguard Worker void* output; 496*4bdc9457SAndroid Build Coastguard Worker size_t output_batch_stride; 497*4bdc9457SAndroid Build Coastguard Worker size_t output_height_stride; 498*4bdc9457SAndroid Build Coastguard Worker size_t output_width; 499*4bdc9457SAndroid Build Coastguard Worker size_t groups; 500*4bdc9457SAndroid Build Coastguard Worker const void* zero; 501*4bdc9457SAndroid Build Coastguard Worker size_t output_increment; 502*4bdc9457SAndroid Build Coastguard Worker union { 503*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_conv_minmax_params qs8; 504*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_conv_minmax_params qu8; 505*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_minmax_params f16; 506*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_minmax_params f32; 507*4bdc9457SAndroid Build Coastguard Worker } params; 508*4bdc9457SAndroid Build Coastguard Worker union { 509*4bdc9457SAndroid Build Coastguard Worker xnn_dwconv_unipass_ukernel_function unipass_ukernel; 510*4bdc9457SAndroid Build Coastguard Worker }; 511*4bdc9457SAndroid Build Coastguard Worker }; 512*4bdc9457SAndroid Build Coastguard Worker 513*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 514*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_dwconv_unipass( 515*4bdc9457SAndroid Build Coastguard Worker const struct dwconv_context context[restrict XNN_MIN_ELEMENTS(1)], 516*4bdc9457SAndroid Build Coastguard Worker size_t batch_index, 517*4bdc9457SAndroid Build Coastguard Worker size_t output_y); 518*4bdc9457SAndroid Build Coastguard Worker #endif 519*4bdc9457SAndroid Build Coastguard Worker 520*4bdc9457SAndroid Build Coastguard Worker struct dwconv2d_context { 521*4bdc9457SAndroid Build Coastguard Worker size_t input_height; 522*4bdc9457SAndroid Build Coastguard Worker size_t input_width; 523*4bdc9457SAndroid Build Coastguard Worker const void* input; 524*4bdc9457SAndroid Build Coastguard Worker const void* zero; 525*4bdc9457SAndroid Build Coastguard Worker uint32_t input_padding_top; 526*4bdc9457SAndroid Build Coastguard Worker size_t input_channel_stride; 527*4bdc9457SAndroid Build Coastguard Worker size_t input_batch_stride; 528*4bdc9457SAndroid Build Coastguard Worker const void* packed_weights; 529*4bdc9457SAndroid Build Coastguard Worker size_t weights_channel_stride; 530*4bdc9457SAndroid Build Coastguard Worker void* output; 531*4bdc9457SAndroid Build Coastguard Worker size_t output_channel_stride; 532*4bdc9457SAndroid Build Coastguard Worker size_t output_batch_stride; 533*4bdc9457SAndroid Build Coastguard Worker union { 534*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_chw_params f32; 535*4bdc9457SAndroid Build Coastguard Worker } params; 536*4bdc9457SAndroid Build Coastguard Worker union { 537*4bdc9457SAndroid Build Coastguard Worker xnn_dwconv2d_chw_ukernel_function chw_ukernel; 538*4bdc9457SAndroid Build Coastguard Worker }; 539*4bdc9457SAndroid Build Coastguard Worker }; 540*4bdc9457SAndroid Build Coastguard Worker 541*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 542*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_dwconv2d_chw( 543*4bdc9457SAndroid Build Coastguard Worker const struct dwconv2d_context context[restrict XNN_MIN_ELEMENTS(1)], 544*4bdc9457SAndroid Build Coastguard Worker size_t batch_index, 545*4bdc9457SAndroid Build Coastguard Worker size_t channel); 546*4bdc9457SAndroid Build Coastguard Worker #endif 547*4bdc9457SAndroid Build Coastguard Worker 548*4bdc9457SAndroid Build Coastguard Worker struct max_pooling_context { 549*4bdc9457SAndroid Build Coastguard Worker const void** indirect_input; 550*4bdc9457SAndroid Build Coastguard Worker size_t indirect_input_height_stride; 551*4bdc9457SAndroid Build Coastguard Worker size_t input_offset; 552*4bdc9457SAndroid Build Coastguard Worker size_t input_batch_stride; 553*4bdc9457SAndroid Build Coastguard Worker void* output; 554*4bdc9457SAndroid Build Coastguard Worker size_t output_batch_stride; 555*4bdc9457SAndroid Build Coastguard Worker size_t output_height_stride; 556*4bdc9457SAndroid Build Coastguard Worker size_t output_width; 557*4bdc9457SAndroid Build Coastguard Worker size_t pooling_size; 558*4bdc9457SAndroid Build Coastguard Worker size_t channels; 559*4bdc9457SAndroid Build Coastguard Worker size_t input_increment; 560*4bdc9457SAndroid Build Coastguard Worker size_t output_increment; 561*4bdc9457SAndroid Build Coastguard Worker union { 562*4bdc9457SAndroid Build Coastguard Worker union xnn_u8_minmax_params u8; 563*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_minmax_params f32; 564*4bdc9457SAndroid Build Coastguard Worker } params; 565*4bdc9457SAndroid Build Coastguard Worker xnn_maxpool_ukernel_function ukernel; 566*4bdc9457SAndroid Build Coastguard Worker }; 567*4bdc9457SAndroid Build Coastguard Worker 568*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 569*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_max_pooling( 570*4bdc9457SAndroid Build Coastguard Worker const struct max_pooling_context context[restrict XNN_MIN_ELEMENTS(1)], 571*4bdc9457SAndroid Build Coastguard Worker size_t batch_index, 572*4bdc9457SAndroid Build Coastguard Worker size_t output_y); 573*4bdc9457SAndroid Build Coastguard Worker #endif 574*4bdc9457SAndroid Build Coastguard Worker 575*4bdc9457SAndroid Build Coastguard Worker struct unpooling_context { 576*4bdc9457SAndroid Build Coastguard Worker const void* input; 577*4bdc9457SAndroid Build Coastguard Worker size_t input_height_stride; 578*4bdc9457SAndroid Build Coastguard Worker size_t input_width_stride; 579*4bdc9457SAndroid Build Coastguard Worker const uint32_t* index; 580*4bdc9457SAndroid Build Coastguard Worker size_t index_height_stride; 581*4bdc9457SAndroid Build Coastguard Worker size_t index_width_stride; 582*4bdc9457SAndroid Build Coastguard Worker const void** indirect_output; 583*4bdc9457SAndroid Build Coastguard Worker size_t indirect_output_height_stride; 584*4bdc9457SAndroid Build Coastguard Worker size_t indirect_output_width_stride; 585*4bdc9457SAndroid Build Coastguard Worker size_t pooling_size; 586*4bdc9457SAndroid Build Coastguard Worker size_t channels; 587*4bdc9457SAndroid Build Coastguard Worker uint32_t fill_value; 588*4bdc9457SAndroid Build Coastguard Worker xnn_unpool_ukernel_function ukernel; 589*4bdc9457SAndroid Build Coastguard Worker }; 590*4bdc9457SAndroid Build Coastguard Worker 591*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 592*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_unpooling( 593*4bdc9457SAndroid Build Coastguard Worker const struct unpooling_context context[restrict XNN_MIN_ELEMENTS(1)], 594*4bdc9457SAndroid Build Coastguard Worker size_t input_y, 595*4bdc9457SAndroid Build Coastguard Worker size_t input_x); 596*4bdc9457SAndroid Build Coastguard Worker #endif 597*4bdc9457SAndroid Build Coastguard Worker 598*4bdc9457SAndroid Build Coastguard Worker struct argmax_pooling_context { 599*4bdc9457SAndroid Build Coastguard Worker const void** indirect_input; 600*4bdc9457SAndroid Build Coastguard Worker size_t indirect_input_height_stride; 601*4bdc9457SAndroid Build Coastguard Worker size_t input_offset; 602*4bdc9457SAndroid Build Coastguard Worker size_t input_batch_stride; 603*4bdc9457SAndroid Build Coastguard Worker void* output; 604*4bdc9457SAndroid Build Coastguard Worker size_t output_batch_stride; 605*4bdc9457SAndroid Build Coastguard Worker size_t output_height_stride; 606*4bdc9457SAndroid Build Coastguard Worker size_t output_width; 607*4bdc9457SAndroid Build Coastguard Worker uint32_t* index; 608*4bdc9457SAndroid Build Coastguard Worker size_t index_batch_stride; 609*4bdc9457SAndroid Build Coastguard Worker size_t index_height_stride; 610*4bdc9457SAndroid Build Coastguard Worker size_t pooling_size; 611*4bdc9457SAndroid Build Coastguard Worker size_t channels; 612*4bdc9457SAndroid Build Coastguard Worker size_t input_increment; 613*4bdc9457SAndroid Build Coastguard Worker size_t output_increment; 614*4bdc9457SAndroid Build Coastguard Worker union { 615*4bdc9457SAndroid Build Coastguard Worker xnn_argmaxpool_unipass_ukernel_function unipass_ukernel; 616*4bdc9457SAndroid Build Coastguard Worker xnn_argmaxpool_multipass_ukernel_function multipass_ukernel; 617*4bdc9457SAndroid Build Coastguard Worker }; 618*4bdc9457SAndroid Build Coastguard Worker }; 619*4bdc9457SAndroid Build Coastguard Worker 620*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 621*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_argmax_pooling_unipass( 622*4bdc9457SAndroid Build Coastguard Worker const struct argmax_pooling_context context[restrict XNN_MIN_ELEMENTS(1)], 623*4bdc9457SAndroid Build Coastguard Worker size_t batch_index, 624*4bdc9457SAndroid Build Coastguard Worker size_t output_y); 625*4bdc9457SAndroid Build Coastguard Worker 626*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_argmax_pooling_multipass( 627*4bdc9457SAndroid Build Coastguard Worker const struct argmax_pooling_context context[restrict XNN_MIN_ELEMENTS(1)], 628*4bdc9457SAndroid Build Coastguard Worker size_t batch_index, 629*4bdc9457SAndroid Build Coastguard Worker size_t output_y); 630*4bdc9457SAndroid Build Coastguard Worker #endif 631*4bdc9457SAndroid Build Coastguard Worker 632*4bdc9457SAndroid Build Coastguard Worker struct average_pooling_context { 633*4bdc9457SAndroid Build Coastguard Worker const void** indirect_input; 634*4bdc9457SAndroid Build Coastguard Worker size_t indirect_input_height_stride; 635*4bdc9457SAndroid Build Coastguard Worker size_t input_offset; 636*4bdc9457SAndroid Build Coastguard Worker size_t input_batch_stride; 637*4bdc9457SAndroid Build Coastguard Worker void* output; 638*4bdc9457SAndroid Build Coastguard Worker size_t output_batch_stride; 639*4bdc9457SAndroid Build Coastguard Worker size_t output_height_stride; 640*4bdc9457SAndroid Build Coastguard Worker size_t output_width; 641*4bdc9457SAndroid Build Coastguard Worker size_t pooling_size; 642*4bdc9457SAndroid Build Coastguard Worker size_t channels; 643*4bdc9457SAndroid Build Coastguard Worker const void* zero; 644*4bdc9457SAndroid Build Coastguard Worker size_t input_increment; 645*4bdc9457SAndroid Build Coastguard Worker size_t output_increment; 646*4bdc9457SAndroid Build Coastguard Worker union { 647*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_scaleminmax_params f16; 648*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_scaleminmax_params f32; 649*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params qu8; 650*4bdc9457SAndroid Build Coastguard Worker } params; 651*4bdc9457SAndroid Build Coastguard Worker union { 652*4bdc9457SAndroid Build Coastguard Worker xnn_avgpool_unipass_ukernel_function unipass_ukernel; 653*4bdc9457SAndroid Build Coastguard Worker xnn_avgpool_multipass_ukernel_function multipass_ukernel; 654*4bdc9457SAndroid Build Coastguard Worker }; 655*4bdc9457SAndroid Build Coastguard Worker }; 656*4bdc9457SAndroid Build Coastguard Worker 657*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 658*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_average_pooling_unipass( 659*4bdc9457SAndroid Build Coastguard Worker const struct average_pooling_context context[restrict XNN_MIN_ELEMENTS(1)], 660*4bdc9457SAndroid Build Coastguard Worker size_t batch_index, 661*4bdc9457SAndroid Build Coastguard Worker size_t output_y); 662*4bdc9457SAndroid Build Coastguard Worker 663*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_average_pooling_multipass( 664*4bdc9457SAndroid Build Coastguard Worker const struct average_pooling_context context[restrict XNN_MIN_ELEMENTS(1)], 665*4bdc9457SAndroid Build Coastguard Worker size_t batch_index, 666*4bdc9457SAndroid Build Coastguard Worker size_t output_y); 667*4bdc9457SAndroid Build Coastguard Worker #endif 668*4bdc9457SAndroid Build Coastguard Worker 669*4bdc9457SAndroid Build Coastguard Worker struct pixelwise_average_pooling_context { 670*4bdc9457SAndroid Build Coastguard Worker const void** indirect_input; 671*4bdc9457SAndroid Build Coastguard Worker size_t indirect_input_height_stride; 672*4bdc9457SAndroid Build Coastguard Worker size_t input_offset; 673*4bdc9457SAndroid Build Coastguard Worker size_t input_batch_stride; 674*4bdc9457SAndroid Build Coastguard Worker const void* pixelwise_buffer; 675*4bdc9457SAndroid Build Coastguard Worker size_t pixelwise_buffer_height_stride; 676*4bdc9457SAndroid Build Coastguard Worker void* output; 677*4bdc9457SAndroid Build Coastguard Worker size_t output_batch_stride; 678*4bdc9457SAndroid Build Coastguard Worker size_t output_height_stride; 679*4bdc9457SAndroid Build Coastguard Worker size_t output_width; 680*4bdc9457SAndroid Build Coastguard Worker size_t pooling_size; 681*4bdc9457SAndroid Build Coastguard Worker size_t channels; 682*4bdc9457SAndroid Build Coastguard Worker const void* zero; 683*4bdc9457SAndroid Build Coastguard Worker size_t input_increment; 684*4bdc9457SAndroid Build Coastguard Worker size_t output_increment; 685*4bdc9457SAndroid Build Coastguard Worker union { 686*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_minmax_params f16; 687*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_minmax_params f32; 688*4bdc9457SAndroid Build Coastguard Worker union xnn_u8_minmax_params u8; 689*4bdc9457SAndroid Build Coastguard Worker } params; 690*4bdc9457SAndroid Build Coastguard Worker union { 691*4bdc9457SAndroid Build Coastguard Worker xnn_pavgpool_unipass_ukernel_function unipass_ukernel; 692*4bdc9457SAndroid Build Coastguard Worker xnn_pavgpool_multipass_ukernel_function multipass_ukernel; 693*4bdc9457SAndroid Build Coastguard Worker }; 694*4bdc9457SAndroid Build Coastguard Worker }; 695*4bdc9457SAndroid Build Coastguard Worker 696*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 697*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_pixelwise_average_pooling_unipass( 698*4bdc9457SAndroid Build Coastguard Worker const struct pixelwise_average_pooling_context context[restrict XNN_MIN_ELEMENTS(1)], 699*4bdc9457SAndroid Build Coastguard Worker size_t batch_index, 700*4bdc9457SAndroid Build Coastguard Worker size_t output_y); 701*4bdc9457SAndroid Build Coastguard Worker 702*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_pixelwise_average_pooling_multipass( 703*4bdc9457SAndroid Build Coastguard Worker const struct pixelwise_average_pooling_context context[restrict XNN_MIN_ELEMENTS(1)], 704*4bdc9457SAndroid Build Coastguard Worker size_t batch_index, 705*4bdc9457SAndroid Build Coastguard Worker size_t output_y); 706*4bdc9457SAndroid Build Coastguard Worker #endif 707*4bdc9457SAndroid Build Coastguard Worker 708*4bdc9457SAndroid Build Coastguard Worker struct global_average_pooling_nwc_context { 709*4bdc9457SAndroid Build Coastguard Worker const void* input; 710*4bdc9457SAndroid Build Coastguard Worker const void* zero; 711*4bdc9457SAndroid Build Coastguard Worker size_t input_pixel_stride; 712*4bdc9457SAndroid Build Coastguard Worker size_t input_batch_stride; 713*4bdc9457SAndroid Build Coastguard Worker size_t input_elements; 714*4bdc9457SAndroid Build Coastguard Worker size_t channels; 715*4bdc9457SAndroid Build Coastguard Worker void* output; 716*4bdc9457SAndroid Build Coastguard Worker size_t output_batch_stride; 717*4bdc9457SAndroid Build Coastguard Worker union { 718*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_avgpool_minmax_params qs8; 719*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params qu8; 720*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_scaleminmax_params f16; 721*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_scaleminmax_params f32; 722*4bdc9457SAndroid Build Coastguard Worker } params; 723*4bdc9457SAndroid Build Coastguard Worker union { 724*4bdc9457SAndroid Build Coastguard Worker xnn_gavgpool_unipass_ukernel_function unipass_ukernel; 725*4bdc9457SAndroid Build Coastguard Worker xnn_gavgpool_multipass_ukernel_function multipass_ukernel; 726*4bdc9457SAndroid Build Coastguard Worker }; 727*4bdc9457SAndroid Build Coastguard Worker }; 728*4bdc9457SAndroid Build Coastguard Worker 729*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 730*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_global_average_pooling_nwc_unipass( 731*4bdc9457SAndroid Build Coastguard Worker const struct global_average_pooling_nwc_context context[restrict XNN_MIN_ELEMENTS(1)], 732*4bdc9457SAndroid Build Coastguard Worker size_t batch_index); 733*4bdc9457SAndroid Build Coastguard Worker 734*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_global_average_pooling_nwc_multipass( 735*4bdc9457SAndroid Build Coastguard Worker const struct global_average_pooling_nwc_context context[restrict XNN_MIN_ELEMENTS(1)], 736*4bdc9457SAndroid Build Coastguard Worker size_t batch_index); 737*4bdc9457SAndroid Build Coastguard Worker #endif 738*4bdc9457SAndroid Build Coastguard Worker 739*4bdc9457SAndroid Build Coastguard Worker struct global_average_pooling_ncw_context { 740*4bdc9457SAndroid Build Coastguard Worker size_t input_elements; 741*4bdc9457SAndroid Build Coastguard Worker const void* input; 742*4bdc9457SAndroid Build Coastguard Worker size_t input_channel_stride; 743*4bdc9457SAndroid Build Coastguard Worker size_t input_batch_stride; 744*4bdc9457SAndroid Build Coastguard Worker void* output; 745*4bdc9457SAndroid Build Coastguard Worker size_t output_channel_stride; 746*4bdc9457SAndroid Build Coastguard Worker size_t output_batch_stride; 747*4bdc9457SAndroid Build Coastguard Worker xnn_gavgpool_cw_ukernel_function ukernel; 748*4bdc9457SAndroid Build Coastguard Worker union { 749*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_gavgpool_params f32; 750*4bdc9457SAndroid Build Coastguard Worker } params; 751*4bdc9457SAndroid Build Coastguard Worker }; 752*4bdc9457SAndroid Build Coastguard Worker 753*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 754*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_global_average_pooling_ncw( 755*4bdc9457SAndroid Build Coastguard Worker const struct global_average_pooling_ncw_context context[restrict XNN_MIN_ELEMENTS(1)], 756*4bdc9457SAndroid Build Coastguard Worker size_t batch_index, 757*4bdc9457SAndroid Build Coastguard Worker size_t channels_start, 758*4bdc9457SAndroid Build Coastguard Worker size_t channels_slice); 759*4bdc9457SAndroid Build Coastguard Worker #endif 760*4bdc9457SAndroid Build Coastguard Worker 761*4bdc9457SAndroid Build Coastguard Worker struct resize_bilinear_context { 762*4bdc9457SAndroid Build Coastguard Worker // Number of channels multiplied by sizeof(input element). 763*4bdc9457SAndroid Build Coastguard Worker size_t scaled_channels; 764*4bdc9457SAndroid Build Coastguard Worker // Indirection buffer with pointers related to rows of input pixels. 765*4bdc9457SAndroid Build Coastguard Worker const void** indirect_input; 766*4bdc9457SAndroid Build Coastguard Worker // Offset, in bytes, to be added to pointers in indirection buffer. 767*4bdc9457SAndroid Build Coastguard Worker size_t input_offset; 768*4bdc9457SAndroid Build Coastguard Worker // Stride, in bytes, between images of consecutive batches in the input. 769*4bdc9457SAndroid Build Coastguard Worker size_t input_batch_stride; 770*4bdc9457SAndroid Build Coastguard Worker // Packed pairs of (x, y) linear interpolation coefficients. 771*4bdc9457SAndroid Build Coastguard Worker const void* packed_weights; 772*4bdc9457SAndroid Build Coastguard Worker // Pointer to the output tensor. 773*4bdc9457SAndroid Build Coastguard Worker void* output; 774*4bdc9457SAndroid Build Coastguard Worker // Stride, in bytes, between adjacent pixels in the output. 775*4bdc9457SAndroid Build Coastguard Worker size_t output_pixel_stride; 776*4bdc9457SAndroid Build Coastguard Worker // Stride, in bytes, between images of consecutive batches in the output. 777*4bdc9457SAndroid Build Coastguard Worker size_t output_batch_stride; 778*4bdc9457SAndroid Build Coastguard Worker // log2(sizeof(weight element)). 779*4bdc9457SAndroid Build Coastguard Worker uint32_t log2_wsize; 780*4bdc9457SAndroid Build Coastguard Worker // Pointer to BILINEAR micro-kernel function. 781*4bdc9457SAndroid Build Coastguard Worker xnn_ibilinear_ukernel_function ukernel; 782*4bdc9457SAndroid Build Coastguard Worker }; 783*4bdc9457SAndroid Build Coastguard Worker 784*4bdc9457SAndroid Build Coastguard Worker struct resize_bilinear_chw_context { 785*4bdc9457SAndroid Build Coastguard Worker // Number of pixels per output image plane. 786*4bdc9457SAndroid Build Coastguard Worker size_t output_pixels; 787*4bdc9457SAndroid Build Coastguard Worker // Number of channels multiplied by sizeof(input element). 788*4bdc9457SAndroid Build Coastguard Worker size_t channels; 789*4bdc9457SAndroid Build Coastguard Worker // Stride, in bytes, between adjacent channels in the input. 790*4bdc9457SAndroid Build Coastguard Worker size_t input_channel_stride; 791*4bdc9457SAndroid Build Coastguard Worker // Indirection buffer with pointers related to rows of input pixels. 792*4bdc9457SAndroid Build Coastguard Worker const void** indirect_input; 793*4bdc9457SAndroid Build Coastguard Worker // Offset, in bytes, to be added to pointers in indirection buffer. 794*4bdc9457SAndroid Build Coastguard Worker size_t input_offset; 795*4bdc9457SAndroid Build Coastguard Worker // Stride, in bytes, between images of consecutive batches in the input. 796*4bdc9457SAndroid Build Coastguard Worker size_t input_batch_stride; 797*4bdc9457SAndroid Build Coastguard Worker // Packed pairs of (x, y) linear interpolation coefficients. 798*4bdc9457SAndroid Build Coastguard Worker const void* packed_weights; 799*4bdc9457SAndroid Build Coastguard Worker // Pointer to the output tensor. 800*4bdc9457SAndroid Build Coastguard Worker void* output; 801*4bdc9457SAndroid Build Coastguard Worker // Stride, in bytes, between images of consecutive batches in the output. 802*4bdc9457SAndroid Build Coastguard Worker size_t output_batch_stride; 803*4bdc9457SAndroid Build Coastguard Worker // Stride, in bytes, between consecutive channels of an output image. 804*4bdc9457SAndroid Build Coastguard Worker size_t output_channel_stride; 805*4bdc9457SAndroid Build Coastguard Worker // Pointer to BILINEAR micro-kernel function. 806*4bdc9457SAndroid Build Coastguard Worker xnn_ibilinear_chw_ukernel_function ukernel; 807*4bdc9457SAndroid Build Coastguard Worker }; 808*4bdc9457SAndroid Build Coastguard Worker 809*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 810*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_resize_bilinear( 811*4bdc9457SAndroid Build Coastguard Worker const struct resize_bilinear_context context[restrict XNN_MIN_ELEMENTS(1)], 812*4bdc9457SAndroid Build Coastguard Worker size_t batch_index, 813*4bdc9457SAndroid Build Coastguard Worker size_t pixel_start, 814*4bdc9457SAndroid Build Coastguard Worker size_t pixel_range); 815*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_resize_bilinear_chw( 816*4bdc9457SAndroid Build Coastguard Worker const struct resize_bilinear_chw_context context[restrict XNN_MIN_ELEMENTS(1)], 817*4bdc9457SAndroid Build Coastguard Worker size_t batch_index, 818*4bdc9457SAndroid Build Coastguard Worker size_t pixel_start, 819*4bdc9457SAndroid Build Coastguard Worker size_t pixel_range); 820*4bdc9457SAndroid Build Coastguard Worker #endif 821*4bdc9457SAndroid Build Coastguard Worker 822*4bdc9457SAndroid Build Coastguard Worker struct elementwise_binary_context { 823*4bdc9457SAndroid Build Coastguard Worker const void* a; 824*4bdc9457SAndroid Build Coastguard Worker size_t a_stride[XNN_MAX_TENSOR_DIMS - 1]; 825*4bdc9457SAndroid Build Coastguard Worker const void* b; 826*4bdc9457SAndroid Build Coastguard Worker size_t b_stride[XNN_MAX_TENSOR_DIMS - 1]; 827*4bdc9457SAndroid Build Coastguard Worker void* y; 828*4bdc9457SAndroid Build Coastguard Worker size_t y_stride[XNN_MAX_TENSOR_DIMS - 1]; 829*4bdc9457SAndroid Build Coastguard Worker size_t elements; 830*4bdc9457SAndroid Build Coastguard Worker union { 831*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_add_minmax_params qs8_addsub; 832*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_add_minmax_params qu8_addsub; 833*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_mul_minmax_params qs8_mul; 834*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_mul_minmax_params qu8_mul; 835*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_minmax_params f16; 836*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_minmax_params f32; 837*4bdc9457SAndroid Build Coastguard Worker } params; 838*4bdc9457SAndroid Build Coastguard Worker xnn_vbinary_ukernel_function ukernel; 839*4bdc9457SAndroid Build Coastguard Worker }; 840*4bdc9457SAndroid Build Coastguard Worker 841*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 842*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_elementwise_binary_1d( 843*4bdc9457SAndroid Build Coastguard Worker const struct elementwise_binary_context context[restrict XNN_MIN_ELEMENTS(1)], 844*4bdc9457SAndroid Build Coastguard Worker size_t i); 845*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_elementwise_binary_2d( 846*4bdc9457SAndroid Build Coastguard Worker const struct elementwise_binary_context context[restrict XNN_MIN_ELEMENTS(1)], 847*4bdc9457SAndroid Build Coastguard Worker size_t i, size_t j); 848*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_elementwise_binary_3d( 849*4bdc9457SAndroid Build Coastguard Worker const struct elementwise_binary_context context[restrict XNN_MIN_ELEMENTS(1)], 850*4bdc9457SAndroid Build Coastguard Worker size_t i, size_t j, size_t k); 851*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_elementwise_binary_4d( 852*4bdc9457SAndroid Build Coastguard Worker const struct elementwise_binary_context context[restrict XNN_MIN_ELEMENTS(1)], 853*4bdc9457SAndroid Build Coastguard Worker size_t i, size_t j, size_t k, size_t l); 854*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_elementwise_binary_5d( 855*4bdc9457SAndroid Build Coastguard Worker const struct elementwise_binary_context context[restrict XNN_MIN_ELEMENTS(1)], 856*4bdc9457SAndroid Build Coastguard Worker size_t i, size_t j, size_t k, size_t l, size_t m); 857*4bdc9457SAndroid Build Coastguard Worker #endif 858*4bdc9457SAndroid Build Coastguard Worker 859*4bdc9457SAndroid Build Coastguard Worker struct channel_shuffle_context { 860*4bdc9457SAndroid Build Coastguard Worker const void* x; 861*4bdc9457SAndroid Build Coastguard Worker size_t x_stride; 862*4bdc9457SAndroid Build Coastguard Worker void* y; 863*4bdc9457SAndroid Build Coastguard Worker size_t y_stride; 864*4bdc9457SAndroid Build Coastguard Worker size_t n; 865*4bdc9457SAndroid Build Coastguard Worker size_t m; 866*4bdc9457SAndroid Build Coastguard Worker union { 867*4bdc9457SAndroid Build Coastguard Worker xnn_zipc_ukernel_function fixed_ukernel; 868*4bdc9457SAndroid Build Coastguard Worker xnn_zipv_ukernel_function variable_ukernel; 869*4bdc9457SAndroid Build Coastguard Worker }; 870*4bdc9457SAndroid Build Coastguard Worker }; 871*4bdc9457SAndroid Build Coastguard Worker 872*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 873*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_channel_shuffle_fixed( 874*4bdc9457SAndroid Build Coastguard Worker const struct channel_shuffle_context context[restrict XNN_MIN_ELEMENTS(1)], 875*4bdc9457SAndroid Build Coastguard Worker size_t index); 876*4bdc9457SAndroid Build Coastguard Worker 877*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_channel_shuffle_variable( 878*4bdc9457SAndroid Build Coastguard Worker const struct channel_shuffle_context context[restrict XNN_MIN_ELEMENTS(1)], 879*4bdc9457SAndroid Build Coastguard Worker size_t index); 880*4bdc9457SAndroid Build Coastguard Worker #endif 881*4bdc9457SAndroid Build Coastguard Worker 882*4bdc9457SAndroid Build Coastguard Worker struct lut_strided_context { 883*4bdc9457SAndroid Build Coastguard Worker size_t n; 884*4bdc9457SAndroid Build Coastguard Worker const void* x; 885*4bdc9457SAndroid Build Coastguard Worker size_t x_stride; 886*4bdc9457SAndroid Build Coastguard Worker const void* t; 887*4bdc9457SAndroid Build Coastguard Worker void* y; 888*4bdc9457SAndroid Build Coastguard Worker size_t y_stride; 889*4bdc9457SAndroid Build Coastguard Worker xnn_x8_lut_ukernel_function ukernel; 890*4bdc9457SAndroid Build Coastguard Worker }; 891*4bdc9457SAndroid Build Coastguard Worker 892*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 893*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_lut_strided( 894*4bdc9457SAndroid Build Coastguard Worker const struct lut_strided_context context[restrict XNN_MIN_ELEMENTS(1)], 895*4bdc9457SAndroid Build Coastguard Worker size_t batch_index); 896*4bdc9457SAndroid Build Coastguard Worker #endif 897*4bdc9457SAndroid Build Coastguard Worker 898*4bdc9457SAndroid Build Coastguard Worker struct lut_contiguous_context { 899*4bdc9457SAndroid Build Coastguard Worker const void* x; 900*4bdc9457SAndroid Build Coastguard Worker size_t x_stride; 901*4bdc9457SAndroid Build Coastguard Worker const void* t; 902*4bdc9457SAndroid Build Coastguard Worker void* y; 903*4bdc9457SAndroid Build Coastguard Worker size_t y_stride; 904*4bdc9457SAndroid Build Coastguard Worker xnn_x8_lut_ukernel_function ukernel; 905*4bdc9457SAndroid Build Coastguard Worker }; 906*4bdc9457SAndroid Build Coastguard Worker 907*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 908*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_lut_contiguous( 909*4bdc9457SAndroid Build Coastguard Worker const struct lut_contiguous_context context[restrict XNN_MIN_ELEMENTS(1)], 910*4bdc9457SAndroid Build Coastguard Worker size_t offset, 911*4bdc9457SAndroid Build Coastguard Worker size_t size); 912*4bdc9457SAndroid Build Coastguard Worker #endif 913*4bdc9457SAndroid Build Coastguard Worker 914*4bdc9457SAndroid Build Coastguard Worker struct univector_strided_context { 915*4bdc9457SAndroid Build Coastguard Worker size_t n; 916*4bdc9457SAndroid Build Coastguard Worker const void* x; 917*4bdc9457SAndroid Build Coastguard Worker size_t x_stride; 918*4bdc9457SAndroid Build Coastguard Worker void* y; 919*4bdc9457SAndroid Build Coastguard Worker size_t y_stride; 920*4bdc9457SAndroid Build Coastguard Worker xnn_vunary_ukernel_function ukernel; 921*4bdc9457SAndroid Build Coastguard Worker union { 922*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_abs_params f16_abs; 923*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_default_params f16_default; 924*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_f32_cvt_params f16_f32_cvt; 925*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_hswish_params f16_hswish; 926*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_lrelu_params f16_lrelu; 927*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_minmax_params f16_minmax; 928*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_neg_params f16_neg; 929*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_sigmoid_params f16_sigmoid; 930*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_abs_params f32_abs; 931*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_default_params f32_default; 932*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_elu_params f32_elu; 933*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_f16_cvt_params f32_f16_cvt; 934*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_hswish_params f32_hswish; 935*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_lrelu_params f32_lrelu; 936*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_minmax_params f32_minmax; 937*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_neg_params f32_neg; 938*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qs8_cvt_params f32_qs8_cvt; 939*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qu8_cvt_params f32_qu8_cvt; 940*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_rnd_params f32_rnd; 941*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sigmoid_params f32_sigmoid; 942*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sqrt_params f32_sqrt; 943*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_cvt_params qs8_cvt; 944*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_f32_cvt_params qs8_f32_cvt; 945*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_lrelu_params qs8_lrelu; 946*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_cvt_params qu8_cvt; 947*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_f32_cvt_params qu8_f32_cvt; 948*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_lrelu_params qu8_lrelu; 949*4bdc9457SAndroid Build Coastguard Worker union xnn_s8_minmax_params s8_minmax; 950*4bdc9457SAndroid Build Coastguard Worker union xnn_u8_minmax_params u8_minmax; 951*4bdc9457SAndroid Build Coastguard Worker } params; 952*4bdc9457SAndroid Build Coastguard Worker }; 953*4bdc9457SAndroid Build Coastguard Worker 954*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 955*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_univector_strided( 956*4bdc9457SAndroid Build Coastguard Worker const struct univector_strided_context context[restrict XNN_MIN_ELEMENTS(1)], 957*4bdc9457SAndroid Build Coastguard Worker size_t batch_index, 958*4bdc9457SAndroid Build Coastguard Worker size_t batch_range); 959*4bdc9457SAndroid Build Coastguard Worker #endif 960*4bdc9457SAndroid Build Coastguard Worker 961*4bdc9457SAndroid Build Coastguard Worker struct univector_contiguous_context { 962*4bdc9457SAndroid Build Coastguard Worker const void* x; 963*4bdc9457SAndroid Build Coastguard Worker void* y; 964*4bdc9457SAndroid Build Coastguard Worker uint16_t log2_xsize; 965*4bdc9457SAndroid Build Coastguard Worker uint16_t log2_ysize; 966*4bdc9457SAndroid Build Coastguard Worker xnn_vunary_ukernel_function ukernel; 967*4bdc9457SAndroid Build Coastguard Worker union { 968*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_abs_params f16_abs; 969*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_default_params f16_default; 970*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_f32_cvt_params f16_f32_cvt; 971*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_hswish_params f16_hswish; 972*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_lrelu_params f16_lrelu; 973*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_minmax_params f16_minmax; 974*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_neg_params f16_neg; 975*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_sigmoid_params f16_sigmoid; 976*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_abs_params f32_abs; 977*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_default_params f32_default; 978*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_elu_params f32_elu; 979*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_f16_cvt_params f32_f16_cvt; 980*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_hswish_params f32_hswish; 981*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_lrelu_params f32_lrelu; 982*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_minmax_params f32_minmax; 983*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_neg_params f32_neg; 984*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qs8_cvt_params f32_qs8_cvt; 985*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qu8_cvt_params f32_qu8_cvt; 986*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_rnd_params f32_rnd; 987*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sigmoid_params f32_sigmoid; 988*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sqrt_params f32_sqrt; 989*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_cvt_params qs8_cvt; 990*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_f32_cvt_params qs8_f32_cvt; 991*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_lrelu_params qs8_lrelu; 992*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_cvt_params qu8_cvt; 993*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_f32_cvt_params qu8_f32_cvt; 994*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_lrelu_params qu8_lrelu; 995*4bdc9457SAndroid Build Coastguard Worker union xnn_s8_minmax_params s8_minmax; 996*4bdc9457SAndroid Build Coastguard Worker union xnn_u8_minmax_params u8_minmax; 997*4bdc9457SAndroid Build Coastguard Worker } params; 998*4bdc9457SAndroid Build Coastguard Worker }; 999*4bdc9457SAndroid Build Coastguard Worker 1000*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 1001*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_univector_contiguous( 1002*4bdc9457SAndroid Build Coastguard Worker const struct univector_contiguous_context context[restrict XNN_MIN_ELEMENTS(1)], 1003*4bdc9457SAndroid Build Coastguard Worker size_t offset, 1004*4bdc9457SAndroid Build Coastguard Worker size_t size); 1005*4bdc9457SAndroid Build Coastguard Worker #endif 1006*4bdc9457SAndroid Build Coastguard Worker 1007*4bdc9457SAndroid Build Coastguard Worker struct prelu_context { 1008*4bdc9457SAndroid Build Coastguard Worker size_t n; 1009*4bdc9457SAndroid Build Coastguard Worker const void* x; 1010*4bdc9457SAndroid Build Coastguard Worker size_t x_stride; 1011*4bdc9457SAndroid Build Coastguard Worker const void* w; 1012*4bdc9457SAndroid Build Coastguard Worker void* y; 1013*4bdc9457SAndroid Build Coastguard Worker size_t y_stride; 1014*4bdc9457SAndroid Build Coastguard Worker xnn_prelu_ukernel_function ukernel; 1015*4bdc9457SAndroid Build Coastguard Worker }; 1016*4bdc9457SAndroid Build Coastguard Worker 1017*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 1018*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_prelu( 1019*4bdc9457SAndroid Build Coastguard Worker const struct prelu_context context[restrict XNN_MIN_ELEMENTS(1)], 1020*4bdc9457SAndroid Build Coastguard Worker size_t batch_start, 1021*4bdc9457SAndroid Build Coastguard Worker size_t batch_range); 1022*4bdc9457SAndroid Build Coastguard Worker #endif 1023*4bdc9457SAndroid Build Coastguard Worker 1024*4bdc9457SAndroid Build Coastguard Worker struct vmulcaddc_context { 1025*4bdc9457SAndroid Build Coastguard Worker size_t n; 1026*4bdc9457SAndroid Build Coastguard Worker const void* x; 1027*4bdc9457SAndroid Build Coastguard Worker size_t x_stride; 1028*4bdc9457SAndroid Build Coastguard Worker const void* w; 1029*4bdc9457SAndroid Build Coastguard Worker void* y; 1030*4bdc9457SAndroid Build Coastguard Worker size_t y_stride; 1031*4bdc9457SAndroid Build Coastguard Worker xnn_vmulcaddc_ukernel_function ukernel; 1032*4bdc9457SAndroid Build Coastguard Worker union { 1033*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_minmax_params f16; 1034*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_minmax_params f32; 1035*4bdc9457SAndroid Build Coastguard Worker } params; 1036*4bdc9457SAndroid Build Coastguard Worker }; 1037*4bdc9457SAndroid Build Coastguard Worker 1038*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 1039*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_vmulcaddc( 1040*4bdc9457SAndroid Build Coastguard Worker const struct vmulcaddc_context context[restrict XNN_MIN_ELEMENTS(1)], 1041*4bdc9457SAndroid Build Coastguard Worker size_t batch_start, 1042*4bdc9457SAndroid Build Coastguard Worker size_t batch_size); 1043*4bdc9457SAndroid Build Coastguard Worker #endif 1044*4bdc9457SAndroid Build Coastguard Worker 1045*4bdc9457SAndroid Build Coastguard Worker struct pad_context { 1046*4bdc9457SAndroid Build Coastguard Worker const void* input; 1047*4bdc9457SAndroid Build Coastguard Worker size_t input_stride[XNN_MAX_TENSOR_DIMS - 1]; 1048*4bdc9457SAndroid Build Coastguard Worker void* output; 1049*4bdc9457SAndroid Build Coastguard Worker size_t output_stride[XNN_MAX_TENSOR_DIMS - 1]; 1050*4bdc9457SAndroid Build Coastguard Worker size_t pre_paddings[XNN_MAX_TENSOR_DIMS]; 1051*4bdc9457SAndroid Build Coastguard Worker size_t post_paddings[1]; 1052*4bdc9457SAndroid Build Coastguard Worker size_t input_size[XNN_MAX_TENSOR_DIMS]; 1053*4bdc9457SAndroid Build Coastguard Worker size_t output_size[1]; 1054*4bdc9457SAndroid Build Coastguard Worker uint32_t padding_value; 1055*4bdc9457SAndroid Build Coastguard Worker xnn_pad_ukernel_function pad_ukernel; 1056*4bdc9457SAndroid Build Coastguard Worker xnn_fill_ukernel_function fill_ukernel; 1057*4bdc9457SAndroid Build Coastguard Worker }; 1058*4bdc9457SAndroid Build Coastguard Worker 1059*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 1060*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_pad_5d( 1061*4bdc9457SAndroid Build Coastguard Worker const struct pad_context context[restrict XNN_MIN_ELEMENTS(1)], 1062*4bdc9457SAndroid Build Coastguard Worker size_t i, size_t j, size_t k, size_t l, size_t m); 1063*4bdc9457SAndroid Build Coastguard Worker #endif 1064*4bdc9457SAndroid Build Coastguard Worker 1065*4bdc9457SAndroid Build Coastguard Worker struct u8_softmax_context { 1066*4bdc9457SAndroid Build Coastguard Worker size_t n; 1067*4bdc9457SAndroid Build Coastguard Worker const uint8_t* x; 1068*4bdc9457SAndroid Build Coastguard Worker size_t x_stride; 1069*4bdc9457SAndroid Build Coastguard Worker const uint32_t* t; 1070*4bdc9457SAndroid Build Coastguard Worker uint8_t* y; 1071*4bdc9457SAndroid Build Coastguard Worker size_t y_stride; 1072*4bdc9457SAndroid Build Coastguard Worker xnn_u8_rmax_ukernel_function rmax_ukernel; 1073*4bdc9457SAndroid Build Coastguard Worker xnn_u8_lut32norm_ukernel_function lut_norm_ukernel; 1074*4bdc9457SAndroid Build Coastguard Worker }; 1075*4bdc9457SAndroid Build Coastguard Worker 1076*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 1077*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_u8_softmax( 1078*4bdc9457SAndroid Build Coastguard Worker const struct u8_softmax_context context[restrict XNN_MIN_ELEMENTS(1)], 1079*4bdc9457SAndroid Build Coastguard Worker size_t batch_index); 1080*4bdc9457SAndroid Build Coastguard Worker #endif 1081*4bdc9457SAndroid Build Coastguard Worker 1082*4bdc9457SAndroid Build Coastguard Worker typedef void (*xnn_compute_reciprocal_function)(const void* input, void* output); 1083*4bdc9457SAndroid Build Coastguard Worker 1084*4bdc9457SAndroid Build Coastguard Worker struct floating_point_softmax_context { 1085*4bdc9457SAndroid Build Coastguard Worker size_t n; 1086*4bdc9457SAndroid Build Coastguard Worker const void* x; 1087*4bdc9457SAndroid Build Coastguard Worker size_t x_stride; 1088*4bdc9457SAndroid Build Coastguard Worker void* y; 1089*4bdc9457SAndroid Build Coastguard Worker size_t y_stride; 1090*4bdc9457SAndroid Build Coastguard Worker xnn_rmax_ukernel_function rmax_ukernel; 1091*4bdc9457SAndroid Build Coastguard Worker xnn_raddstoreexpminusmax_ukernel_function raddstoreexpminusmax_ukernel; 1092*4bdc9457SAndroid Build Coastguard Worker xnn_compute_reciprocal_function compute_reciprocal; 1093*4bdc9457SAndroid Build Coastguard Worker xnn_vbinary_ukernel_function vmulc_ukernel; 1094*4bdc9457SAndroid Build Coastguard Worker union { 1095*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_minmax_params f16; 1096*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_minmax_params f32; 1097*4bdc9457SAndroid Build Coastguard Worker } minmax_params; 1098*4bdc9457SAndroid Build Coastguard Worker union { 1099*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_expminus_params f16; 1100*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_expminus_params f32; 1101*4bdc9457SAndroid Build Coastguard Worker } expminus_params; 1102*4bdc9457SAndroid Build Coastguard Worker }; 1103*4bdc9457SAndroid Build Coastguard Worker 1104*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus 1105*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_floating_point_softmax( 1106*4bdc9457SAndroid Build Coastguard Worker const struct floating_point_softmax_context context[restrict XNN_MIN_ELEMENTS(1)], 1107*4bdc9457SAndroid Build Coastguard Worker size_t batch_index); 1108*4bdc9457SAndroid Build Coastguard Worker #endif 1109