xref: /aosp_15_r20/external/XNNPACK/src/xnnpack/compute.h (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1*4bdc9457SAndroid Build Coastguard Worker // Copyright 2019 Google LLC
2*4bdc9457SAndroid Build Coastguard Worker //
3*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the
4*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree.
5*4bdc9457SAndroid Build Coastguard Worker 
6*4bdc9457SAndroid Build Coastguard Worker #pragma once
7*4bdc9457SAndroid Build Coastguard Worker 
8*4bdc9457SAndroid Build Coastguard Worker 
9*4bdc9457SAndroid Build Coastguard Worker #include <stddef.h>
10*4bdc9457SAndroid Build Coastguard Worker #include <stdint.h>
11*4bdc9457SAndroid Build Coastguard Worker 
12*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack.h>
13*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/common.h>
14*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/math.h>
15*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/params.h>
16*4bdc9457SAndroid Build Coastguard Worker 
17*4bdc9457SAndroid Build Coastguard Worker 
18*4bdc9457SAndroid Build Coastguard Worker enum xnn_parallelization_type {
19*4bdc9457SAndroid Build Coastguard Worker   xnn_parallelization_type_invalid = 0,
20*4bdc9457SAndroid Build Coastguard Worker   xnn_parallelization_type_1d,
21*4bdc9457SAndroid Build Coastguard Worker   xnn_parallelization_type_1d_tile_1d,
22*4bdc9457SAndroid Build Coastguard Worker   xnn_parallelization_type_2d,
23*4bdc9457SAndroid Build Coastguard Worker   xnn_parallelization_type_2d_tile_1d,
24*4bdc9457SAndroid Build Coastguard Worker   xnn_parallelization_type_2d_tile_2d,
25*4bdc9457SAndroid Build Coastguard Worker   xnn_parallelization_type_3d,
26*4bdc9457SAndroid Build Coastguard Worker   xnn_parallelization_type_3d_tile_2d,
27*4bdc9457SAndroid Build Coastguard Worker   xnn_parallelization_type_4d,
28*4bdc9457SAndroid Build Coastguard Worker   xnn_parallelization_type_4d_tile_2d,
29*4bdc9457SAndroid Build Coastguard Worker   xnn_parallelization_type_5d,
30*4bdc9457SAndroid Build Coastguard Worker   xnn_parallelization_type_5d_tile_2d,
31*4bdc9457SAndroid Build Coastguard Worker   xnn_parallelization_type_6d_tile_2d,
32*4bdc9457SAndroid Build Coastguard Worker #if XNN_MAX_UARCH_TYPES > 1
33*4bdc9457SAndroid Build Coastguard Worker   xnn_parallelization_type_2d_tile_2d_with_uarch,
34*4bdc9457SAndroid Build Coastguard Worker   xnn_parallelization_type_3d_tile_2d_with_uarch,
35*4bdc9457SAndroid Build Coastguard Worker   xnn_parallelization_type_4d_tile_2d_with_uarch,
36*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_MAX_UARCH_TYPES > 1
37*4bdc9457SAndroid Build Coastguard Worker };
38*4bdc9457SAndroid Build Coastguard Worker 
39*4bdc9457SAndroid Build Coastguard Worker struct compute_parameters {
40*4bdc9457SAndroid Build Coastguard Worker   enum xnn_parallelization_type type;
41*4bdc9457SAndroid Build Coastguard Worker   union {
42*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_task_1d_t task_1d;
43*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_task_1d_tile_1d_t task_1d_tile_1d;
44*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_task_2d_t task_2d;
45*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_task_2d_tile_1d_t task_2d_tile_1d;
46*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_task_2d_tile_2d_t task_2d_tile_2d;
47*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_task_3d_t task_3d;
48*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_task_3d_tile_2d_t task_3d_tile_2d;
49*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_task_4d_t task_4d;
50*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_task_4d_tile_2d_t task_4d_tile_2d;
51*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_task_5d_t task_5d;
52*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_task_5d_tile_2d_t task_5d_tile_2d;
53*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_task_6d_tile_2d_t task_6d_tile_2d;
54*4bdc9457SAndroid Build Coastguard Worker #if XNN_MAX_UARCH_TYPES > 1
55*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_task_2d_tile_2d_with_id_t task_2d_tile_2d_with_id;
56*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_task_3d_tile_2d_with_id_t task_3d_tile_2d_with_id;
57*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_task_4d_tile_2d_with_id_t task_4d_tile_2d_with_id;
58*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_MAX_UARCH_TYPES > 1
59*4bdc9457SAndroid Build Coastguard Worker   };
60*4bdc9457SAndroid Build Coastguard Worker   size_t range[6];
61*4bdc9457SAndroid Build Coastguard Worker   size_t tile[2];
62*4bdc9457SAndroid Build Coastguard Worker };
63*4bdc9457SAndroid Build Coastguard Worker 
64*4bdc9457SAndroid Build Coastguard Worker struct transpose_context {
65*4bdc9457SAndroid Build Coastguard Worker   const void* x;
66*4bdc9457SAndroid Build Coastguard Worker   void* y;
67*4bdc9457SAndroid Build Coastguard Worker   union {
68*4bdc9457SAndroid Build Coastguard Worker     xnn_transposec_ukernel_function const_size_ukernel;
69*4bdc9457SAndroid Build Coastguard Worker     xnn_transposev_ukernel_function variable_size_ukernel;
70*4bdc9457SAndroid Build Coastguard Worker   };
71*4bdc9457SAndroid Build Coastguard Worker   union {
72*4bdc9457SAndroid Build Coastguard Worker     size_t element_size;
73*4bdc9457SAndroid Build Coastguard Worker     size_t log2_element_size;
74*4bdc9457SAndroid Build Coastguard Worker   };
75*4bdc9457SAndroid Build Coastguard Worker   size_t input_stride[XNN_MAX_TENSOR_DIMS];
76*4bdc9457SAndroid Build Coastguard Worker   size_t output_stride[XNN_MAX_TENSOR_DIMS];
77*4bdc9457SAndroid Build Coastguard Worker };
78*4bdc9457SAndroid Build Coastguard Worker 
79*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_transposec_2d(
80*4bdc9457SAndroid Build Coastguard Worker     const struct transpose_context* context,
81*4bdc9457SAndroid Build Coastguard Worker     size_t i,
82*4bdc9457SAndroid Build Coastguard Worker     size_t j,
83*4bdc9457SAndroid Build Coastguard Worker     size_t tile_i,
84*4bdc9457SAndroid Build Coastguard Worker     size_t tile_j);
85*4bdc9457SAndroid Build Coastguard Worker 
86*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_transposec_3d(
87*4bdc9457SAndroid Build Coastguard Worker     const struct transpose_context* context,
88*4bdc9457SAndroid Build Coastguard Worker     size_t i,
89*4bdc9457SAndroid Build Coastguard Worker     size_t j,
90*4bdc9457SAndroid Build Coastguard Worker     size_t k,
91*4bdc9457SAndroid Build Coastguard Worker     size_t tile_j,
92*4bdc9457SAndroid Build Coastguard Worker     size_t tile_k);
93*4bdc9457SAndroid Build Coastguard Worker 
94*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_transposec_4d(
95*4bdc9457SAndroid Build Coastguard Worker     const struct transpose_context* context,
96*4bdc9457SAndroid Build Coastguard Worker     size_t i,
97*4bdc9457SAndroid Build Coastguard Worker     size_t j,
98*4bdc9457SAndroid Build Coastguard Worker     size_t k,
99*4bdc9457SAndroid Build Coastguard Worker     size_t l,
100*4bdc9457SAndroid Build Coastguard Worker     size_t tile_k,
101*4bdc9457SAndroid Build Coastguard Worker     size_t tile_l);
102*4bdc9457SAndroid Build Coastguard Worker 
103*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_transposec_5d(
104*4bdc9457SAndroid Build Coastguard Worker     const struct transpose_context* context,
105*4bdc9457SAndroid Build Coastguard Worker     size_t i,
106*4bdc9457SAndroid Build Coastguard Worker     size_t j,
107*4bdc9457SAndroid Build Coastguard Worker     size_t k,
108*4bdc9457SAndroid Build Coastguard Worker     size_t l,
109*4bdc9457SAndroid Build Coastguard Worker     size_t m,
110*4bdc9457SAndroid Build Coastguard Worker     size_t tile_l,
111*4bdc9457SAndroid Build Coastguard Worker     size_t tile_m);
112*4bdc9457SAndroid Build Coastguard Worker 
113*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_transposec_6d(
114*4bdc9457SAndroid Build Coastguard Worker     const struct transpose_context* context,
115*4bdc9457SAndroid Build Coastguard Worker     size_t i,
116*4bdc9457SAndroid Build Coastguard Worker     size_t j,
117*4bdc9457SAndroid Build Coastguard Worker     size_t k,
118*4bdc9457SAndroid Build Coastguard Worker     size_t l,
119*4bdc9457SAndroid Build Coastguard Worker     size_t m,
120*4bdc9457SAndroid Build Coastguard Worker     size_t n,
121*4bdc9457SAndroid Build Coastguard Worker     size_t tile_m,
122*4bdc9457SAndroid Build Coastguard Worker     size_t tile_n);
123*4bdc9457SAndroid Build Coastguard Worker 
124*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_transposev_2d(
125*4bdc9457SAndroid Build Coastguard Worker     const struct transpose_context* context,
126*4bdc9457SAndroid Build Coastguard Worker     size_t i,
127*4bdc9457SAndroid Build Coastguard Worker     size_t j,
128*4bdc9457SAndroid Build Coastguard Worker     size_t tile_i,
129*4bdc9457SAndroid Build Coastguard Worker     size_t tile_j);
130*4bdc9457SAndroid Build Coastguard Worker 
131*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_transposev_3d(
132*4bdc9457SAndroid Build Coastguard Worker     const struct transpose_context* context,
133*4bdc9457SAndroid Build Coastguard Worker     size_t i,
134*4bdc9457SAndroid Build Coastguard Worker     size_t j,
135*4bdc9457SAndroid Build Coastguard Worker     size_t k,
136*4bdc9457SAndroid Build Coastguard Worker     size_t tile_j,
137*4bdc9457SAndroid Build Coastguard Worker     size_t tile_k);
138*4bdc9457SAndroid Build Coastguard Worker 
139*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_transposev_4d(
140*4bdc9457SAndroid Build Coastguard Worker     const struct transpose_context* context,
141*4bdc9457SAndroid Build Coastguard Worker     size_t i,
142*4bdc9457SAndroid Build Coastguard Worker     size_t j,
143*4bdc9457SAndroid Build Coastguard Worker     size_t k,
144*4bdc9457SAndroid Build Coastguard Worker     size_t l,
145*4bdc9457SAndroid Build Coastguard Worker     size_t tile_k,
146*4bdc9457SAndroid Build Coastguard Worker     size_t tile_l);
147*4bdc9457SAndroid Build Coastguard Worker 
148*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_transposev_5d(
149*4bdc9457SAndroid Build Coastguard Worker     const struct transpose_context* context,
150*4bdc9457SAndroid Build Coastguard Worker     size_t i,
151*4bdc9457SAndroid Build Coastguard Worker     size_t j,
152*4bdc9457SAndroid Build Coastguard Worker     size_t k,
153*4bdc9457SAndroid Build Coastguard Worker     size_t l,
154*4bdc9457SAndroid Build Coastguard Worker     size_t m,
155*4bdc9457SAndroid Build Coastguard Worker     size_t tile_l,
156*4bdc9457SAndroid Build Coastguard Worker     size_t tile_m);
157*4bdc9457SAndroid Build Coastguard Worker 
158*4bdc9457SAndroid Build Coastguard Worker XNN_PRIVATE void xnn_compute_transposev_6d(
159*4bdc9457SAndroid Build Coastguard Worker     const struct transpose_context* context,
160*4bdc9457SAndroid Build Coastguard Worker     size_t i,
161*4bdc9457SAndroid Build Coastguard Worker     size_t j,
162*4bdc9457SAndroid Build Coastguard Worker     size_t k,
163*4bdc9457SAndroid Build Coastguard Worker     size_t l,
164*4bdc9457SAndroid Build Coastguard Worker     size_t m,
165*4bdc9457SAndroid Build Coastguard Worker     size_t n,
166*4bdc9457SAndroid Build Coastguard Worker     size_t tile_m,
167*4bdc9457SAndroid Build Coastguard Worker     size_t tile_n);
168*4bdc9457SAndroid Build Coastguard Worker 
169*4bdc9457SAndroid Build Coastguard Worker struct gemm_context {
170*4bdc9457SAndroid Build Coastguard Worker   size_t k_scaled;
171*4bdc9457SAndroid Build Coastguard Worker   const void* a;
172*4bdc9457SAndroid Build Coastguard Worker   size_t a_stride;
173*4bdc9457SAndroid Build Coastguard Worker   const void* packed_w;
174*4bdc9457SAndroid Build Coastguard Worker   size_t w_stride;
175*4bdc9457SAndroid Build Coastguard Worker   size_t wg_stride;
176*4bdc9457SAndroid Build Coastguard Worker   void* c;
177*4bdc9457SAndroid Build Coastguard Worker   size_t cm_stride;
178*4bdc9457SAndroid Build Coastguard Worker   size_t cn_stride;
179*4bdc9457SAndroid Build Coastguard Worker   size_t cg_stride;
180*4bdc9457SAndroid Build Coastguard Worker   uint32_t log2_csize;
181*4bdc9457SAndroid Build Coastguard Worker   struct xnn_hmp_gemm_ukernel ukernel;
182*4bdc9457SAndroid Build Coastguard Worker   void* fused_params;
183*4bdc9457SAndroid Build Coastguard Worker   union {
184*4bdc9457SAndroid Build Coastguard Worker     union xnn_qs8_conv_minmax_params qs8;
185*4bdc9457SAndroid Build Coastguard Worker     union xnn_qu8_conv_minmax_params qu8;
186*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_scaleminmax_params f16;
187*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_minmax_params f32;
188*4bdc9457SAndroid Build Coastguard Worker   } params;
189*4bdc9457SAndroid Build Coastguard Worker };
190*4bdc9457SAndroid Build Coastguard Worker 
191*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
192*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_grouped_gemm(
193*4bdc9457SAndroid Build Coastguard Worker       const struct gemm_context context[restrict XNN_MIN_ELEMENTS(1)],
194*4bdc9457SAndroid Build Coastguard Worker       size_t group_index,
195*4bdc9457SAndroid Build Coastguard Worker       size_t mr_block_start,
196*4bdc9457SAndroid Build Coastguard Worker       size_t nr_block_start,
197*4bdc9457SAndroid Build Coastguard Worker       size_t mr_block_size,
198*4bdc9457SAndroid Build Coastguard Worker       size_t nr_block_size);
199*4bdc9457SAndroid Build Coastguard Worker 
200*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_gemm(
201*4bdc9457SAndroid Build Coastguard Worker       const struct gemm_context context[restrict XNN_MIN_ELEMENTS(1)],
202*4bdc9457SAndroid Build Coastguard Worker       size_t mr_block_start,
203*4bdc9457SAndroid Build Coastguard Worker       size_t nr_block_start,
204*4bdc9457SAndroid Build Coastguard Worker       size_t mr_block_size,
205*4bdc9457SAndroid Build Coastguard Worker       size_t nr_block_size);
206*4bdc9457SAndroid Build Coastguard Worker 
207*4bdc9457SAndroid Build Coastguard Worker   #if XNN_MAX_UARCH_TYPES > 1
208*4bdc9457SAndroid Build Coastguard Worker     XNN_PRIVATE void xnn_compute_hmp_grouped_gemm(
209*4bdc9457SAndroid Build Coastguard Worker         const struct gemm_context context[restrict XNN_MIN_ELEMENTS(1)],
210*4bdc9457SAndroid Build Coastguard Worker         uint32_t uarch_index,
211*4bdc9457SAndroid Build Coastguard Worker         size_t group_index,
212*4bdc9457SAndroid Build Coastguard Worker         size_t mr_block_start,
213*4bdc9457SAndroid Build Coastguard Worker         size_t nr_block_start,
214*4bdc9457SAndroid Build Coastguard Worker         size_t mr_block_size,
215*4bdc9457SAndroid Build Coastguard Worker         size_t nr_block_size);
216*4bdc9457SAndroid Build Coastguard Worker 
217*4bdc9457SAndroid Build Coastguard Worker     XNN_PRIVATE void xnn_compute_hmp_gemm(
218*4bdc9457SAndroid Build Coastguard Worker         const struct gemm_context context[restrict XNN_MIN_ELEMENTS(1)],
219*4bdc9457SAndroid Build Coastguard Worker         uint32_t uarch_index,
220*4bdc9457SAndroid Build Coastguard Worker         size_t mr_block_start,
221*4bdc9457SAndroid Build Coastguard Worker         size_t nr_block_start,
222*4bdc9457SAndroid Build Coastguard Worker         size_t mr_block_size,
223*4bdc9457SAndroid Build Coastguard Worker         size_t nr_block_size);
224*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_MAX_UARCH_TYPES > 1
225*4bdc9457SAndroid Build Coastguard Worker #endif
226*4bdc9457SAndroid Build Coastguard Worker 
227*4bdc9457SAndroid Build Coastguard Worker // Context for Sparse Matrix-Dense Matrix Multiplication.
228*4bdc9457SAndroid Build Coastguard Worker // C [MxN] := A [MxK] * B [KxN] + bias [N]
229*4bdc9457SAndroid Build Coastguard Worker // A and C are dense matrices with row-major storage, B is a sparse matrix.
230*4bdc9457SAndroid Build Coastguard Worker struct spmm_context {
231*4bdc9457SAndroid Build Coastguard Worker   // N dimension of the B and C matrices.
232*4bdc9457SAndroid Build Coastguard Worker   // Corresponds to number of output channels in 1x1 convolution.
233*4bdc9457SAndroid Build Coastguard Worker   size_t n;
234*4bdc9457SAndroid Build Coastguard Worker   // M dimension of the A and C matrices, pre-scaled by sizeof(element size).
235*4bdc9457SAndroid Build Coastguard Worker   // Corresponds to the stride, in bytes, between adjacent rows of C matrix.
236*4bdc9457SAndroid Build Coastguard Worker   size_t scaled_m;
237*4bdc9457SAndroid Build Coastguard Worker   // Input matrix A.
238*4bdc9457SAndroid Build Coastguard Worker   const void* input;
239*4bdc9457SAndroid Build Coastguard Worker   // Packed bias elements and non-zero filter elements.
240*4bdc9457SAndroid Build Coastguard Worker   const void* nonzero_weights;
241*4bdc9457SAndroid Build Coastguard Worker   // Input pointer increments, in bytes, after each processed non-zero weight.
242*4bdc9457SAndroid Build Coastguard Worker   const int32_t* input_increments;
243*4bdc9457SAndroid Build Coastguard Worker   // Number of non-zero filter elements per each N (output channel) dimension.
244*4bdc9457SAndroid Build Coastguard Worker   const uint32_t* output_channel_nonzeros;
245*4bdc9457SAndroid Build Coastguard Worker   // Output matrix C.
246*4bdc9457SAndroid Build Coastguard Worker   void* output;
247*4bdc9457SAndroid Build Coastguard Worker   // Stride, in bytes, between matrices A corresponding to different images in batched 1x1 Convolution
248*4bdc9457SAndroid Build Coastguard Worker   size_t batched_input_stride;
249*4bdc9457SAndroid Build Coastguard Worker   // Stride, in bytes, between matrices C corresponding to different images in batched 1x1 Convolution
250*4bdc9457SAndroid Build Coastguard Worker   size_t batched_output_stride;
251*4bdc9457SAndroid Build Coastguard Worker   // Micro-kernel function pointer.
252*4bdc9457SAndroid Build Coastguard Worker   xnn_spmm_ukernel_function ukernel;
253*4bdc9457SAndroid Build Coastguard Worker   // Output activation parameters.
254*4bdc9457SAndroid Build Coastguard Worker   union {
255*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_minmax_params f32;
256*4bdc9457SAndroid Build Coastguard Worker   } params;
257*4bdc9457SAndroid Build Coastguard Worker };
258*4bdc9457SAndroid Build Coastguard Worker 
259*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
260*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_spmm(
261*4bdc9457SAndroid Build Coastguard Worker     const struct spmm_context context[restrict XNN_MIN_ELEMENTS(1)],
262*4bdc9457SAndroid Build Coastguard Worker     size_t batch_index,
263*4bdc9457SAndroid Build Coastguard Worker     size_t mr_block_start,
264*4bdc9457SAndroid Build Coastguard Worker     size_t mr_block_size);
265*4bdc9457SAndroid Build Coastguard Worker #endif
266*4bdc9457SAndroid Build Coastguard Worker 
267*4bdc9457SAndroid Build Coastguard Worker struct igemm_context {
268*4bdc9457SAndroid Build Coastguard Worker   size_t ks;
269*4bdc9457SAndroid Build Coastguard Worker   size_t ks_scaled;
270*4bdc9457SAndroid Build Coastguard Worker   size_t kc;
271*4bdc9457SAndroid Build Coastguard Worker   size_t w_stride;
272*4bdc9457SAndroid Build Coastguard Worker   const void** indirect_a;
273*4bdc9457SAndroid Build Coastguard Worker   size_t a_offset;
274*4bdc9457SAndroid Build Coastguard Worker   void* zero;
275*4bdc9457SAndroid Build Coastguard Worker   const void* packed_w;
276*4bdc9457SAndroid Build Coastguard Worker   void* c;
277*4bdc9457SAndroid Build Coastguard Worker   size_t cm_stride;
278*4bdc9457SAndroid Build Coastguard Worker   size_t cn_stride;
279*4bdc9457SAndroid Build Coastguard Worker   size_t ga_stride;
280*4bdc9457SAndroid Build Coastguard Worker   size_t gw_stride;
281*4bdc9457SAndroid Build Coastguard Worker   size_t gc_stride;
282*4bdc9457SAndroid Build Coastguard Worker   size_t ba_stride;
283*4bdc9457SAndroid Build Coastguard Worker   size_t bc_stride;
284*4bdc9457SAndroid Build Coastguard Worker   uint32_t log2_csize;
285*4bdc9457SAndroid Build Coastguard Worker   struct xnn_hmp_igemm_ukernel ukernel;
286*4bdc9457SAndroid Build Coastguard Worker   union {
287*4bdc9457SAndroid Build Coastguard Worker     union xnn_qs8_conv_minmax_params qs8;
288*4bdc9457SAndroid Build Coastguard Worker     union xnn_qu8_conv_minmax_params qu8;
289*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_scaleminmax_params f16;
290*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_minmax_params f32;
291*4bdc9457SAndroid Build Coastguard Worker   } params;
292*4bdc9457SAndroid Build Coastguard Worker };
293*4bdc9457SAndroid Build Coastguard Worker 
294*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
295*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_grouped_igemm(
296*4bdc9457SAndroid Build Coastguard Worker       const struct igemm_context context[restrict XNN_MIN_ELEMENTS(1)],
297*4bdc9457SAndroid Build Coastguard Worker       size_t group_index,
298*4bdc9457SAndroid Build Coastguard Worker       size_t mr_block_start,
299*4bdc9457SAndroid Build Coastguard Worker       size_t nr_block_start,
300*4bdc9457SAndroid Build Coastguard Worker       size_t mr_block_size,
301*4bdc9457SAndroid Build Coastguard Worker       size_t nr_block_size);
302*4bdc9457SAndroid Build Coastguard Worker 
303*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_grouped_batch_igemm(
304*4bdc9457SAndroid Build Coastguard Worker       const struct igemm_context context[restrict XNN_MIN_ELEMENTS(1)],
305*4bdc9457SAndroid Build Coastguard Worker       size_t batch_index,
306*4bdc9457SAndroid Build Coastguard Worker       size_t group_index,
307*4bdc9457SAndroid Build Coastguard Worker       size_t mr_block_start,
308*4bdc9457SAndroid Build Coastguard Worker       size_t nr_block_start,
309*4bdc9457SAndroid Build Coastguard Worker       size_t mr_block_size,
310*4bdc9457SAndroid Build Coastguard Worker       size_t nr_block_size);
311*4bdc9457SAndroid Build Coastguard Worker 
312*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_igemm(
313*4bdc9457SAndroid Build Coastguard Worker       const struct igemm_context context[restrict XNN_MIN_ELEMENTS(1)],
314*4bdc9457SAndroid Build Coastguard Worker       size_t mr_block_start,
315*4bdc9457SAndroid Build Coastguard Worker       size_t nr_block_start,
316*4bdc9457SAndroid Build Coastguard Worker       size_t mr_block_size,
317*4bdc9457SAndroid Build Coastguard Worker       size_t nr_block_size);
318*4bdc9457SAndroid Build Coastguard Worker 
319*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_batch_igemm(
320*4bdc9457SAndroid Build Coastguard Worker       const struct igemm_context context[restrict XNN_MIN_ELEMENTS(1)],
321*4bdc9457SAndroid Build Coastguard Worker       size_t batch_index,
322*4bdc9457SAndroid Build Coastguard Worker       size_t mr_block_start,
323*4bdc9457SAndroid Build Coastguard Worker       size_t nr_block_start,
324*4bdc9457SAndroid Build Coastguard Worker       size_t mr_block_size,
325*4bdc9457SAndroid Build Coastguard Worker       size_t nr_block_size);
326*4bdc9457SAndroid Build Coastguard Worker 
327*4bdc9457SAndroid Build Coastguard Worker   #if XNN_MAX_UARCH_TYPES > 1
328*4bdc9457SAndroid Build Coastguard Worker     XNN_PRIVATE void xnn_compute_hmp_grouped_igemm(
329*4bdc9457SAndroid Build Coastguard Worker         const struct igemm_context context[restrict XNN_MIN_ELEMENTS(1)],
330*4bdc9457SAndroid Build Coastguard Worker         uint32_t uarch_index,
331*4bdc9457SAndroid Build Coastguard Worker         size_t group_index,
332*4bdc9457SAndroid Build Coastguard Worker         size_t mr_block_start,
333*4bdc9457SAndroid Build Coastguard Worker         size_t nr_block_start,
334*4bdc9457SAndroid Build Coastguard Worker         size_t mr_block_size,
335*4bdc9457SAndroid Build Coastguard Worker         size_t nr_block_size);
336*4bdc9457SAndroid Build Coastguard Worker 
337*4bdc9457SAndroid Build Coastguard Worker     XNN_PRIVATE void xnn_compute_hmp_grouped_batch_igemm(
338*4bdc9457SAndroid Build Coastguard Worker         const struct igemm_context context[restrict XNN_MIN_ELEMENTS(1)],
339*4bdc9457SAndroid Build Coastguard Worker         uint32_t uarch_index,
340*4bdc9457SAndroid Build Coastguard Worker         size_t batch_index,
341*4bdc9457SAndroid Build Coastguard Worker         size_t group_index,
342*4bdc9457SAndroid Build Coastguard Worker         size_t mr_block_start,
343*4bdc9457SAndroid Build Coastguard Worker         size_t nr_block_start,
344*4bdc9457SAndroid Build Coastguard Worker         size_t mr_block_size,
345*4bdc9457SAndroid Build Coastguard Worker         size_t nr_block_size);
346*4bdc9457SAndroid Build Coastguard Worker 
347*4bdc9457SAndroid Build Coastguard Worker     XNN_PRIVATE void xnn_compute_hmp_igemm(
348*4bdc9457SAndroid Build Coastguard Worker         const struct igemm_context context[restrict XNN_MIN_ELEMENTS(1)],
349*4bdc9457SAndroid Build Coastguard Worker         uint32_t uarch_index,
350*4bdc9457SAndroid Build Coastguard Worker         size_t mr_block_start,
351*4bdc9457SAndroid Build Coastguard Worker         size_t nr_block_start,
352*4bdc9457SAndroid Build Coastguard Worker         size_t mr_block_size,
353*4bdc9457SAndroid Build Coastguard Worker         size_t nr_block_size);
354*4bdc9457SAndroid Build Coastguard Worker 
355*4bdc9457SAndroid Build Coastguard Worker     XNN_PRIVATE void xnn_compute_batch_hmp_igemm(
356*4bdc9457SAndroid Build Coastguard Worker         const struct igemm_context context[restrict XNN_MIN_ELEMENTS(1)],
357*4bdc9457SAndroid Build Coastguard Worker         uint32_t uarch_index,
358*4bdc9457SAndroid Build Coastguard Worker         size_t batch_index,
359*4bdc9457SAndroid Build Coastguard Worker         size_t mr_block_start,
360*4bdc9457SAndroid Build Coastguard Worker         size_t nr_block_start,
361*4bdc9457SAndroid Build Coastguard Worker         size_t mr_block_size,
362*4bdc9457SAndroid Build Coastguard Worker         size_t nr_block_size);
363*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_MAX_UARCH_TYPES > 1
364*4bdc9457SAndroid Build Coastguard Worker #endif
365*4bdc9457SAndroid Build Coastguard Worker 
366*4bdc9457SAndroid Build Coastguard Worker struct subgemm_context {
367*4bdc9457SAndroid Build Coastguard Worker   const struct subconvolution_params* subconvolution_params;
368*4bdc9457SAndroid Build Coastguard Worker   size_t kc;
369*4bdc9457SAndroid Build Coastguard Worker   const void* a;
370*4bdc9457SAndroid Build Coastguard Worker   size_t ax_stride;
371*4bdc9457SAndroid Build Coastguard Worker   size_t ay_stride;
372*4bdc9457SAndroid Build Coastguard Worker   size_t cx_stride;
373*4bdc9457SAndroid Build Coastguard Worker   size_t cy_stride;
374*4bdc9457SAndroid Build Coastguard Worker   size_t cn_stride;
375*4bdc9457SAndroid Build Coastguard Worker   size_t ga_stride;
376*4bdc9457SAndroid Build Coastguard Worker   size_t gw_stride;
377*4bdc9457SAndroid Build Coastguard Worker   size_t gc_stride;
378*4bdc9457SAndroid Build Coastguard Worker   size_t ba_stride;
379*4bdc9457SAndroid Build Coastguard Worker   size_t bc_stride;
380*4bdc9457SAndroid Build Coastguard Worker   uint32_t log2_csize;
381*4bdc9457SAndroid Build Coastguard Worker   struct xnn_hmp_gemm_ukernel ukernel;
382*4bdc9457SAndroid Build Coastguard Worker   union {
383*4bdc9457SAndroid Build Coastguard Worker     union xnn_qs8_conv_minmax_params qs8;
384*4bdc9457SAndroid Build Coastguard Worker     union xnn_qu8_conv_minmax_params qu8;
385*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_scaleminmax_params f16;
386*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_minmax_params f32;
387*4bdc9457SAndroid Build Coastguard Worker   } params;
388*4bdc9457SAndroid Build Coastguard Worker };
389*4bdc9457SAndroid Build Coastguard Worker 
390*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
391*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_grouped_subgemm2d(
392*4bdc9457SAndroid Build Coastguard Worker       const struct subgemm_context context[restrict XNN_MIN_ELEMENTS(1)],
393*4bdc9457SAndroid Build Coastguard Worker       size_t batch_index,
394*4bdc9457SAndroid Build Coastguard Worker       size_t group_index,
395*4bdc9457SAndroid Build Coastguard Worker       size_t subkernel_index,
396*4bdc9457SAndroid Build Coastguard Worker       size_t slice_y,
397*4bdc9457SAndroid Build Coastguard Worker       size_t slice_x_start,
398*4bdc9457SAndroid Build Coastguard Worker       size_t nr_block_start,
399*4bdc9457SAndroid Build Coastguard Worker       size_t slice_x_max,
400*4bdc9457SAndroid Build Coastguard Worker       size_t nr_block_size);
401*4bdc9457SAndroid Build Coastguard Worker 
402*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_subgemm2d(
403*4bdc9457SAndroid Build Coastguard Worker       const struct subgemm_context context[restrict XNN_MIN_ELEMENTS(1)],
404*4bdc9457SAndroid Build Coastguard Worker       size_t batch_index,
405*4bdc9457SAndroid Build Coastguard Worker       size_t subkernel_index,
406*4bdc9457SAndroid Build Coastguard Worker       size_t slice_y,
407*4bdc9457SAndroid Build Coastguard Worker       size_t slice_x_start,
408*4bdc9457SAndroid Build Coastguard Worker       size_t nr_block_start,
409*4bdc9457SAndroid Build Coastguard Worker       size_t slice_x_max,
410*4bdc9457SAndroid Build Coastguard Worker       size_t nr_block_size);
411*4bdc9457SAndroid Build Coastguard Worker #endif
412*4bdc9457SAndroid Build Coastguard Worker 
413*4bdc9457SAndroid Build Coastguard Worker struct subconv_context {
414*4bdc9457SAndroid Build Coastguard Worker   const struct subconvolution_params* subconvolution_params;
415*4bdc9457SAndroid Build Coastguard Worker   size_t kc;
416*4bdc9457SAndroid Build Coastguard Worker   size_t a_offset;
417*4bdc9457SAndroid Build Coastguard Worker   void* zero;
418*4bdc9457SAndroid Build Coastguard Worker   size_t cx_stride;
419*4bdc9457SAndroid Build Coastguard Worker   size_t cy_stride;
420*4bdc9457SAndroid Build Coastguard Worker   size_t cn_stride;
421*4bdc9457SAndroid Build Coastguard Worker   size_t ga_stride;
422*4bdc9457SAndroid Build Coastguard Worker   size_t gw_stride;
423*4bdc9457SAndroid Build Coastguard Worker   size_t gc_stride;
424*4bdc9457SAndroid Build Coastguard Worker   size_t ba_stride;
425*4bdc9457SAndroid Build Coastguard Worker   size_t bc_stride;
426*4bdc9457SAndroid Build Coastguard Worker   uint32_t log2_csize;
427*4bdc9457SAndroid Build Coastguard Worker   struct xnn_hmp_igemm_ukernel ukernel;
428*4bdc9457SAndroid Build Coastguard Worker   union {
429*4bdc9457SAndroid Build Coastguard Worker     union xnn_qs8_conv_minmax_params qs8;
430*4bdc9457SAndroid Build Coastguard Worker     union xnn_qu8_conv_minmax_params qu8;
431*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_scaleminmax_params f16;
432*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_minmax_params f32;
433*4bdc9457SAndroid Build Coastguard Worker   } params;
434*4bdc9457SAndroid Build Coastguard Worker };
435*4bdc9457SAndroid Build Coastguard Worker 
436*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
437*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_grouped_subconv2d(
438*4bdc9457SAndroid Build Coastguard Worker       const struct subconv_context context[restrict XNN_MIN_ELEMENTS(1)],
439*4bdc9457SAndroid Build Coastguard Worker       size_t batch_index,
440*4bdc9457SAndroid Build Coastguard Worker       size_t group_index,
441*4bdc9457SAndroid Build Coastguard Worker       size_t subkernel_index,
442*4bdc9457SAndroid Build Coastguard Worker       size_t slice_y,
443*4bdc9457SAndroid Build Coastguard Worker       size_t slice_x_start,
444*4bdc9457SAndroid Build Coastguard Worker       size_t nr_block_start,
445*4bdc9457SAndroid Build Coastguard Worker       size_t slice_x_max,
446*4bdc9457SAndroid Build Coastguard Worker       size_t nr_block_size);
447*4bdc9457SAndroid Build Coastguard Worker 
448*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_subconv2d(
449*4bdc9457SAndroid Build Coastguard Worker       const struct subconv_context context[restrict XNN_MIN_ELEMENTS(1)],
450*4bdc9457SAndroid Build Coastguard Worker       size_t batch_index,
451*4bdc9457SAndroid Build Coastguard Worker       size_t subkernel_index,
452*4bdc9457SAndroid Build Coastguard Worker       size_t slice_y,
453*4bdc9457SAndroid Build Coastguard Worker       size_t slice_x_start,
454*4bdc9457SAndroid Build Coastguard Worker       size_t nr_block_start,
455*4bdc9457SAndroid Build Coastguard Worker       size_t slice_x_max,
456*4bdc9457SAndroid Build Coastguard Worker       size_t nr_block_size);
457*4bdc9457SAndroid Build Coastguard Worker #endif
458*4bdc9457SAndroid Build Coastguard Worker 
459*4bdc9457SAndroid Build Coastguard Worker struct conv2d_context {
460*4bdc9457SAndroid Build Coastguard Worker   size_t input_height;
461*4bdc9457SAndroid Build Coastguard Worker   size_t input_width;
462*4bdc9457SAndroid Build Coastguard Worker   const void* input;
463*4bdc9457SAndroid Build Coastguard Worker   size_t input_batch_stride;
464*4bdc9457SAndroid Build Coastguard Worker   const void* zero;
465*4bdc9457SAndroid Build Coastguard Worker   const void* packed_weights;
466*4bdc9457SAndroid Build Coastguard Worker   void* output;
467*4bdc9457SAndroid Build Coastguard Worker   size_t output_batch_stride;
468*4bdc9457SAndroid Build Coastguard Worker   size_t input_padding_top;
469*4bdc9457SAndroid Build Coastguard Worker   size_t output_channels;
470*4bdc9457SAndroid Build Coastguard Worker   size_t output_height_stride;
471*4bdc9457SAndroid Build Coastguard Worker   size_t output_channel_stride;
472*4bdc9457SAndroid Build Coastguard Worker   union {
473*4bdc9457SAndroid Build Coastguard Worker     xnn_conv_hwc2chw_ukernel_function hwc2chw_ukernel;
474*4bdc9457SAndroid Build Coastguard Worker   };
475*4bdc9457SAndroid Build Coastguard Worker   union {
476*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_minmax_params f32;
477*4bdc9457SAndroid Build Coastguard Worker   } params;
478*4bdc9457SAndroid Build Coastguard Worker };
479*4bdc9457SAndroid Build Coastguard Worker 
480*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
481*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_conv2d_hwc2chw(
482*4bdc9457SAndroid Build Coastguard Worker       const struct conv2d_context context[restrict XNN_MIN_ELEMENTS(1)],
483*4bdc9457SAndroid Build Coastguard Worker       size_t batch_index,
484*4bdc9457SAndroid Build Coastguard Worker       size_t output_y_start,
485*4bdc9457SAndroid Build Coastguard Worker       size_t output_y_slice);
486*4bdc9457SAndroid Build Coastguard Worker #endif
487*4bdc9457SAndroid Build Coastguard Worker 
488*4bdc9457SAndroid Build Coastguard Worker struct dwconv_context {
489*4bdc9457SAndroid Build Coastguard Worker   const void** indirect_input;
490*4bdc9457SAndroid Build Coastguard Worker   size_t indirect_input_width_stride;
491*4bdc9457SAndroid Build Coastguard Worker   size_t indirect_input_height_stride;
492*4bdc9457SAndroid Build Coastguard Worker   size_t input_offset;
493*4bdc9457SAndroid Build Coastguard Worker   size_t input_batch_stride;
494*4bdc9457SAndroid Build Coastguard Worker   const void* packed_weights;
495*4bdc9457SAndroid Build Coastguard Worker   void* output;
496*4bdc9457SAndroid Build Coastguard Worker   size_t output_batch_stride;
497*4bdc9457SAndroid Build Coastguard Worker   size_t output_height_stride;
498*4bdc9457SAndroid Build Coastguard Worker   size_t output_width;
499*4bdc9457SAndroid Build Coastguard Worker   size_t groups;
500*4bdc9457SAndroid Build Coastguard Worker   const void* zero;
501*4bdc9457SAndroid Build Coastguard Worker   size_t output_increment;
502*4bdc9457SAndroid Build Coastguard Worker   union {
503*4bdc9457SAndroid Build Coastguard Worker     union xnn_qs8_conv_minmax_params qs8;
504*4bdc9457SAndroid Build Coastguard Worker     union xnn_qu8_conv_minmax_params qu8;
505*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_minmax_params f16;
506*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_minmax_params f32;
507*4bdc9457SAndroid Build Coastguard Worker   } params;
508*4bdc9457SAndroid Build Coastguard Worker   union {
509*4bdc9457SAndroid Build Coastguard Worker     xnn_dwconv_unipass_ukernel_function unipass_ukernel;
510*4bdc9457SAndroid Build Coastguard Worker   };
511*4bdc9457SAndroid Build Coastguard Worker };
512*4bdc9457SAndroid Build Coastguard Worker 
513*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
514*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_dwconv_unipass(
515*4bdc9457SAndroid Build Coastguard Worker       const struct dwconv_context context[restrict XNN_MIN_ELEMENTS(1)],
516*4bdc9457SAndroid Build Coastguard Worker       size_t batch_index,
517*4bdc9457SAndroid Build Coastguard Worker       size_t output_y);
518*4bdc9457SAndroid Build Coastguard Worker #endif
519*4bdc9457SAndroid Build Coastguard Worker 
520*4bdc9457SAndroid Build Coastguard Worker struct dwconv2d_context {
521*4bdc9457SAndroid Build Coastguard Worker   size_t input_height;
522*4bdc9457SAndroid Build Coastguard Worker   size_t input_width;
523*4bdc9457SAndroid Build Coastguard Worker   const void* input;
524*4bdc9457SAndroid Build Coastguard Worker   const void* zero;
525*4bdc9457SAndroid Build Coastguard Worker   uint32_t input_padding_top;
526*4bdc9457SAndroid Build Coastguard Worker   size_t input_channel_stride;
527*4bdc9457SAndroid Build Coastguard Worker   size_t input_batch_stride;
528*4bdc9457SAndroid Build Coastguard Worker   const void* packed_weights;
529*4bdc9457SAndroid Build Coastguard Worker   size_t weights_channel_stride;
530*4bdc9457SAndroid Build Coastguard Worker   void* output;
531*4bdc9457SAndroid Build Coastguard Worker   size_t output_channel_stride;
532*4bdc9457SAndroid Build Coastguard Worker   size_t output_batch_stride;
533*4bdc9457SAndroid Build Coastguard Worker   union {
534*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_chw_params f32;
535*4bdc9457SAndroid Build Coastguard Worker   } params;
536*4bdc9457SAndroid Build Coastguard Worker   union {
537*4bdc9457SAndroid Build Coastguard Worker     xnn_dwconv2d_chw_ukernel_function chw_ukernel;
538*4bdc9457SAndroid Build Coastguard Worker   };
539*4bdc9457SAndroid Build Coastguard Worker };
540*4bdc9457SAndroid Build Coastguard Worker 
541*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
542*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_dwconv2d_chw(
543*4bdc9457SAndroid Build Coastguard Worker       const struct dwconv2d_context context[restrict XNN_MIN_ELEMENTS(1)],
544*4bdc9457SAndroid Build Coastguard Worker       size_t batch_index,
545*4bdc9457SAndroid Build Coastguard Worker       size_t channel);
546*4bdc9457SAndroid Build Coastguard Worker #endif
547*4bdc9457SAndroid Build Coastguard Worker 
548*4bdc9457SAndroid Build Coastguard Worker struct max_pooling_context {
549*4bdc9457SAndroid Build Coastguard Worker   const void** indirect_input;
550*4bdc9457SAndroid Build Coastguard Worker   size_t indirect_input_height_stride;
551*4bdc9457SAndroid Build Coastguard Worker   size_t input_offset;
552*4bdc9457SAndroid Build Coastguard Worker   size_t input_batch_stride;
553*4bdc9457SAndroid Build Coastguard Worker   void* output;
554*4bdc9457SAndroid Build Coastguard Worker   size_t output_batch_stride;
555*4bdc9457SAndroid Build Coastguard Worker   size_t output_height_stride;
556*4bdc9457SAndroid Build Coastguard Worker   size_t output_width;
557*4bdc9457SAndroid Build Coastguard Worker   size_t pooling_size;
558*4bdc9457SAndroid Build Coastguard Worker   size_t channels;
559*4bdc9457SAndroid Build Coastguard Worker   size_t input_increment;
560*4bdc9457SAndroid Build Coastguard Worker   size_t output_increment;
561*4bdc9457SAndroid Build Coastguard Worker   union {
562*4bdc9457SAndroid Build Coastguard Worker     union xnn_u8_minmax_params u8;
563*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_minmax_params f32;
564*4bdc9457SAndroid Build Coastguard Worker   } params;
565*4bdc9457SAndroid Build Coastguard Worker   xnn_maxpool_ukernel_function ukernel;
566*4bdc9457SAndroid Build Coastguard Worker };
567*4bdc9457SAndroid Build Coastguard Worker 
568*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
569*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_max_pooling(
570*4bdc9457SAndroid Build Coastguard Worker       const struct max_pooling_context context[restrict XNN_MIN_ELEMENTS(1)],
571*4bdc9457SAndroid Build Coastguard Worker       size_t batch_index,
572*4bdc9457SAndroid Build Coastguard Worker       size_t output_y);
573*4bdc9457SAndroid Build Coastguard Worker #endif
574*4bdc9457SAndroid Build Coastguard Worker 
575*4bdc9457SAndroid Build Coastguard Worker struct unpooling_context {
576*4bdc9457SAndroid Build Coastguard Worker   const void* input;
577*4bdc9457SAndroid Build Coastguard Worker   size_t input_height_stride;
578*4bdc9457SAndroid Build Coastguard Worker   size_t input_width_stride;
579*4bdc9457SAndroid Build Coastguard Worker   const uint32_t* index;
580*4bdc9457SAndroid Build Coastguard Worker   size_t index_height_stride;
581*4bdc9457SAndroid Build Coastguard Worker   size_t index_width_stride;
582*4bdc9457SAndroid Build Coastguard Worker   const void** indirect_output;
583*4bdc9457SAndroid Build Coastguard Worker   size_t indirect_output_height_stride;
584*4bdc9457SAndroid Build Coastguard Worker   size_t indirect_output_width_stride;
585*4bdc9457SAndroid Build Coastguard Worker   size_t pooling_size;
586*4bdc9457SAndroid Build Coastguard Worker   size_t channels;
587*4bdc9457SAndroid Build Coastguard Worker   uint32_t fill_value;
588*4bdc9457SAndroid Build Coastguard Worker   xnn_unpool_ukernel_function ukernel;
589*4bdc9457SAndroid Build Coastguard Worker };
590*4bdc9457SAndroid Build Coastguard Worker 
591*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
592*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_unpooling(
593*4bdc9457SAndroid Build Coastguard Worker       const struct unpooling_context context[restrict XNN_MIN_ELEMENTS(1)],
594*4bdc9457SAndroid Build Coastguard Worker       size_t input_y,
595*4bdc9457SAndroid Build Coastguard Worker       size_t input_x);
596*4bdc9457SAndroid Build Coastguard Worker #endif
597*4bdc9457SAndroid Build Coastguard Worker 
598*4bdc9457SAndroid Build Coastguard Worker struct argmax_pooling_context {
599*4bdc9457SAndroid Build Coastguard Worker   const void** indirect_input;
600*4bdc9457SAndroid Build Coastguard Worker   size_t indirect_input_height_stride;
601*4bdc9457SAndroid Build Coastguard Worker   size_t input_offset;
602*4bdc9457SAndroid Build Coastguard Worker   size_t input_batch_stride;
603*4bdc9457SAndroid Build Coastguard Worker   void* output;
604*4bdc9457SAndroid Build Coastguard Worker   size_t output_batch_stride;
605*4bdc9457SAndroid Build Coastguard Worker   size_t output_height_stride;
606*4bdc9457SAndroid Build Coastguard Worker   size_t output_width;
607*4bdc9457SAndroid Build Coastguard Worker   uint32_t* index;
608*4bdc9457SAndroid Build Coastguard Worker   size_t index_batch_stride;
609*4bdc9457SAndroid Build Coastguard Worker   size_t index_height_stride;
610*4bdc9457SAndroid Build Coastguard Worker   size_t pooling_size;
611*4bdc9457SAndroid Build Coastguard Worker   size_t channels;
612*4bdc9457SAndroid Build Coastguard Worker   size_t input_increment;
613*4bdc9457SAndroid Build Coastguard Worker   size_t output_increment;
614*4bdc9457SAndroid Build Coastguard Worker   union {
615*4bdc9457SAndroid Build Coastguard Worker     xnn_argmaxpool_unipass_ukernel_function unipass_ukernel;
616*4bdc9457SAndroid Build Coastguard Worker     xnn_argmaxpool_multipass_ukernel_function multipass_ukernel;
617*4bdc9457SAndroid Build Coastguard Worker   };
618*4bdc9457SAndroid Build Coastguard Worker };
619*4bdc9457SAndroid Build Coastguard Worker 
620*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
621*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_argmax_pooling_unipass(
622*4bdc9457SAndroid Build Coastguard Worker       const struct argmax_pooling_context context[restrict XNN_MIN_ELEMENTS(1)],
623*4bdc9457SAndroid Build Coastguard Worker       size_t batch_index,
624*4bdc9457SAndroid Build Coastguard Worker       size_t output_y);
625*4bdc9457SAndroid Build Coastguard Worker 
626*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_argmax_pooling_multipass(
627*4bdc9457SAndroid Build Coastguard Worker       const struct argmax_pooling_context context[restrict XNN_MIN_ELEMENTS(1)],
628*4bdc9457SAndroid Build Coastguard Worker       size_t batch_index,
629*4bdc9457SAndroid Build Coastguard Worker       size_t output_y);
630*4bdc9457SAndroid Build Coastguard Worker #endif
631*4bdc9457SAndroid Build Coastguard Worker 
632*4bdc9457SAndroid Build Coastguard Worker struct average_pooling_context {
633*4bdc9457SAndroid Build Coastguard Worker   const void** indirect_input;
634*4bdc9457SAndroid Build Coastguard Worker   size_t indirect_input_height_stride;
635*4bdc9457SAndroid Build Coastguard Worker   size_t input_offset;
636*4bdc9457SAndroid Build Coastguard Worker   size_t input_batch_stride;
637*4bdc9457SAndroid Build Coastguard Worker   void* output;
638*4bdc9457SAndroid Build Coastguard Worker   size_t output_batch_stride;
639*4bdc9457SAndroid Build Coastguard Worker   size_t output_height_stride;
640*4bdc9457SAndroid Build Coastguard Worker   size_t output_width;
641*4bdc9457SAndroid Build Coastguard Worker   size_t pooling_size;
642*4bdc9457SAndroid Build Coastguard Worker   size_t channels;
643*4bdc9457SAndroid Build Coastguard Worker   const void* zero;
644*4bdc9457SAndroid Build Coastguard Worker   size_t input_increment;
645*4bdc9457SAndroid Build Coastguard Worker   size_t output_increment;
646*4bdc9457SAndroid Build Coastguard Worker   union {
647*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_scaleminmax_params f16;
648*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_scaleminmax_params f32;
649*4bdc9457SAndroid Build Coastguard Worker     union xnn_qu8_avgpool_minmax_params qu8;
650*4bdc9457SAndroid Build Coastguard Worker   } params;
651*4bdc9457SAndroid Build Coastguard Worker   union {
652*4bdc9457SAndroid Build Coastguard Worker     xnn_avgpool_unipass_ukernel_function unipass_ukernel;
653*4bdc9457SAndroid Build Coastguard Worker     xnn_avgpool_multipass_ukernel_function multipass_ukernel;
654*4bdc9457SAndroid Build Coastguard Worker   };
655*4bdc9457SAndroid Build Coastguard Worker };
656*4bdc9457SAndroid Build Coastguard Worker 
657*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
658*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_average_pooling_unipass(
659*4bdc9457SAndroid Build Coastguard Worker       const struct average_pooling_context context[restrict XNN_MIN_ELEMENTS(1)],
660*4bdc9457SAndroid Build Coastguard Worker       size_t batch_index,
661*4bdc9457SAndroid Build Coastguard Worker       size_t output_y);
662*4bdc9457SAndroid Build Coastguard Worker 
663*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_average_pooling_multipass(
664*4bdc9457SAndroid Build Coastguard Worker       const struct average_pooling_context context[restrict XNN_MIN_ELEMENTS(1)],
665*4bdc9457SAndroid Build Coastguard Worker       size_t batch_index,
666*4bdc9457SAndroid Build Coastguard Worker       size_t output_y);
667*4bdc9457SAndroid Build Coastguard Worker #endif
668*4bdc9457SAndroid Build Coastguard Worker 
669*4bdc9457SAndroid Build Coastguard Worker struct pixelwise_average_pooling_context {
670*4bdc9457SAndroid Build Coastguard Worker   const void** indirect_input;
671*4bdc9457SAndroid Build Coastguard Worker   size_t indirect_input_height_stride;
672*4bdc9457SAndroid Build Coastguard Worker   size_t input_offset;
673*4bdc9457SAndroid Build Coastguard Worker   size_t input_batch_stride;
674*4bdc9457SAndroid Build Coastguard Worker   const void* pixelwise_buffer;
675*4bdc9457SAndroid Build Coastguard Worker   size_t pixelwise_buffer_height_stride;
676*4bdc9457SAndroid Build Coastguard Worker   void* output;
677*4bdc9457SAndroid Build Coastguard Worker   size_t output_batch_stride;
678*4bdc9457SAndroid Build Coastguard Worker   size_t output_height_stride;
679*4bdc9457SAndroid Build Coastguard Worker   size_t output_width;
680*4bdc9457SAndroid Build Coastguard Worker   size_t pooling_size;
681*4bdc9457SAndroid Build Coastguard Worker   size_t channels;
682*4bdc9457SAndroid Build Coastguard Worker   const void* zero;
683*4bdc9457SAndroid Build Coastguard Worker   size_t input_increment;
684*4bdc9457SAndroid Build Coastguard Worker   size_t output_increment;
685*4bdc9457SAndroid Build Coastguard Worker   union {
686*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_minmax_params f16;
687*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_minmax_params f32;
688*4bdc9457SAndroid Build Coastguard Worker     union xnn_u8_minmax_params u8;
689*4bdc9457SAndroid Build Coastguard Worker   } params;
690*4bdc9457SAndroid Build Coastguard Worker   union {
691*4bdc9457SAndroid Build Coastguard Worker     xnn_pavgpool_unipass_ukernel_function unipass_ukernel;
692*4bdc9457SAndroid Build Coastguard Worker     xnn_pavgpool_multipass_ukernel_function multipass_ukernel;
693*4bdc9457SAndroid Build Coastguard Worker   };
694*4bdc9457SAndroid Build Coastguard Worker };
695*4bdc9457SAndroid Build Coastguard Worker 
696*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
697*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_pixelwise_average_pooling_unipass(
698*4bdc9457SAndroid Build Coastguard Worker       const struct pixelwise_average_pooling_context context[restrict XNN_MIN_ELEMENTS(1)],
699*4bdc9457SAndroid Build Coastguard Worker       size_t batch_index,
700*4bdc9457SAndroid Build Coastguard Worker       size_t output_y);
701*4bdc9457SAndroid Build Coastguard Worker 
702*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_pixelwise_average_pooling_multipass(
703*4bdc9457SAndroid Build Coastguard Worker       const struct pixelwise_average_pooling_context context[restrict XNN_MIN_ELEMENTS(1)],
704*4bdc9457SAndroid Build Coastguard Worker       size_t batch_index,
705*4bdc9457SAndroid Build Coastguard Worker       size_t output_y);
706*4bdc9457SAndroid Build Coastguard Worker #endif
707*4bdc9457SAndroid Build Coastguard Worker 
708*4bdc9457SAndroid Build Coastguard Worker struct global_average_pooling_nwc_context {
709*4bdc9457SAndroid Build Coastguard Worker   const void* input;
710*4bdc9457SAndroid Build Coastguard Worker   const void* zero;
711*4bdc9457SAndroid Build Coastguard Worker   size_t input_pixel_stride;
712*4bdc9457SAndroid Build Coastguard Worker   size_t input_batch_stride;
713*4bdc9457SAndroid Build Coastguard Worker   size_t input_elements;
714*4bdc9457SAndroid Build Coastguard Worker   size_t channels;
715*4bdc9457SAndroid Build Coastguard Worker   void* output;
716*4bdc9457SAndroid Build Coastguard Worker   size_t output_batch_stride;
717*4bdc9457SAndroid Build Coastguard Worker   union {
718*4bdc9457SAndroid Build Coastguard Worker     union xnn_qs8_avgpool_minmax_params qs8;
719*4bdc9457SAndroid Build Coastguard Worker     union xnn_qu8_avgpool_minmax_params qu8;
720*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_scaleminmax_params f16;
721*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_scaleminmax_params f32;
722*4bdc9457SAndroid Build Coastguard Worker   } params;
723*4bdc9457SAndroid Build Coastguard Worker   union {
724*4bdc9457SAndroid Build Coastguard Worker     xnn_gavgpool_unipass_ukernel_function unipass_ukernel;
725*4bdc9457SAndroid Build Coastguard Worker     xnn_gavgpool_multipass_ukernel_function multipass_ukernel;
726*4bdc9457SAndroid Build Coastguard Worker   };
727*4bdc9457SAndroid Build Coastguard Worker };
728*4bdc9457SAndroid Build Coastguard Worker 
729*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
730*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_global_average_pooling_nwc_unipass(
731*4bdc9457SAndroid Build Coastguard Worker       const struct global_average_pooling_nwc_context context[restrict XNN_MIN_ELEMENTS(1)],
732*4bdc9457SAndroid Build Coastguard Worker       size_t batch_index);
733*4bdc9457SAndroid Build Coastguard Worker 
734*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_global_average_pooling_nwc_multipass(
735*4bdc9457SAndroid Build Coastguard Worker       const struct global_average_pooling_nwc_context context[restrict XNN_MIN_ELEMENTS(1)],
736*4bdc9457SAndroid Build Coastguard Worker       size_t batch_index);
737*4bdc9457SAndroid Build Coastguard Worker #endif
738*4bdc9457SAndroid Build Coastguard Worker 
739*4bdc9457SAndroid Build Coastguard Worker struct global_average_pooling_ncw_context {
740*4bdc9457SAndroid Build Coastguard Worker   size_t input_elements;
741*4bdc9457SAndroid Build Coastguard Worker   const void* input;
742*4bdc9457SAndroid Build Coastguard Worker   size_t input_channel_stride;
743*4bdc9457SAndroid Build Coastguard Worker   size_t input_batch_stride;
744*4bdc9457SAndroid Build Coastguard Worker   void* output;
745*4bdc9457SAndroid Build Coastguard Worker   size_t output_channel_stride;
746*4bdc9457SAndroid Build Coastguard Worker   size_t output_batch_stride;
747*4bdc9457SAndroid Build Coastguard Worker   xnn_gavgpool_cw_ukernel_function ukernel;
748*4bdc9457SAndroid Build Coastguard Worker   union {
749*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_gavgpool_params f32;
750*4bdc9457SAndroid Build Coastguard Worker   } params;
751*4bdc9457SAndroid Build Coastguard Worker };
752*4bdc9457SAndroid Build Coastguard Worker 
753*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
754*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_global_average_pooling_ncw(
755*4bdc9457SAndroid Build Coastguard Worker       const struct global_average_pooling_ncw_context context[restrict XNN_MIN_ELEMENTS(1)],
756*4bdc9457SAndroid Build Coastguard Worker       size_t batch_index,
757*4bdc9457SAndroid Build Coastguard Worker       size_t channels_start,
758*4bdc9457SAndroid Build Coastguard Worker       size_t channels_slice);
759*4bdc9457SAndroid Build Coastguard Worker #endif
760*4bdc9457SAndroid Build Coastguard Worker 
761*4bdc9457SAndroid Build Coastguard Worker struct resize_bilinear_context {
762*4bdc9457SAndroid Build Coastguard Worker   // Number of channels multiplied by sizeof(input element).
763*4bdc9457SAndroid Build Coastguard Worker   size_t scaled_channels;
764*4bdc9457SAndroid Build Coastguard Worker   // Indirection buffer with pointers related to rows of input pixels.
765*4bdc9457SAndroid Build Coastguard Worker   const void** indirect_input;
766*4bdc9457SAndroid Build Coastguard Worker   // Offset, in bytes, to be added to pointers in indirection buffer.
767*4bdc9457SAndroid Build Coastguard Worker   size_t input_offset;
768*4bdc9457SAndroid Build Coastguard Worker   // Stride, in bytes, between images of consecutive batches in the input.
769*4bdc9457SAndroid Build Coastguard Worker   size_t input_batch_stride;
770*4bdc9457SAndroid Build Coastguard Worker   // Packed pairs of (x, y) linear interpolation coefficients.
771*4bdc9457SAndroid Build Coastguard Worker   const void* packed_weights;
772*4bdc9457SAndroid Build Coastguard Worker   // Pointer to the output tensor.
773*4bdc9457SAndroid Build Coastguard Worker   void* output;
774*4bdc9457SAndroid Build Coastguard Worker   // Stride, in bytes, between adjacent pixels in the output.
775*4bdc9457SAndroid Build Coastguard Worker   size_t output_pixel_stride;
776*4bdc9457SAndroid Build Coastguard Worker   // Stride, in bytes, between images of consecutive batches in the output.
777*4bdc9457SAndroid Build Coastguard Worker   size_t output_batch_stride;
778*4bdc9457SAndroid Build Coastguard Worker   // log2(sizeof(weight element)).
779*4bdc9457SAndroid Build Coastguard Worker   uint32_t log2_wsize;
780*4bdc9457SAndroid Build Coastguard Worker   // Pointer to BILINEAR micro-kernel function.
781*4bdc9457SAndroid Build Coastguard Worker   xnn_ibilinear_ukernel_function ukernel;
782*4bdc9457SAndroid Build Coastguard Worker };
783*4bdc9457SAndroid Build Coastguard Worker 
784*4bdc9457SAndroid Build Coastguard Worker struct resize_bilinear_chw_context {
785*4bdc9457SAndroid Build Coastguard Worker   // Number of pixels per output image plane.
786*4bdc9457SAndroid Build Coastguard Worker   size_t output_pixels;
787*4bdc9457SAndroid Build Coastguard Worker   // Number of channels multiplied by sizeof(input element).
788*4bdc9457SAndroid Build Coastguard Worker   size_t channels;
789*4bdc9457SAndroid Build Coastguard Worker   // Stride, in bytes, between adjacent channels in the input.
790*4bdc9457SAndroid Build Coastguard Worker   size_t input_channel_stride;
791*4bdc9457SAndroid Build Coastguard Worker   // Indirection buffer with pointers related to rows of input pixels.
792*4bdc9457SAndroid Build Coastguard Worker   const void** indirect_input;
793*4bdc9457SAndroid Build Coastguard Worker   // Offset, in bytes, to be added to pointers in indirection buffer.
794*4bdc9457SAndroid Build Coastguard Worker   size_t input_offset;
795*4bdc9457SAndroid Build Coastguard Worker   // Stride, in bytes, between images of consecutive batches in the input.
796*4bdc9457SAndroid Build Coastguard Worker   size_t input_batch_stride;
797*4bdc9457SAndroid Build Coastguard Worker   // Packed pairs of (x, y) linear interpolation coefficients.
798*4bdc9457SAndroid Build Coastguard Worker   const void* packed_weights;
799*4bdc9457SAndroid Build Coastguard Worker   // Pointer to the output tensor.
800*4bdc9457SAndroid Build Coastguard Worker   void* output;
801*4bdc9457SAndroid Build Coastguard Worker   // Stride, in bytes, between images of consecutive batches in the output.
802*4bdc9457SAndroid Build Coastguard Worker   size_t output_batch_stride;
803*4bdc9457SAndroid Build Coastguard Worker   // Stride, in bytes, between consecutive channels of an output image.
804*4bdc9457SAndroid Build Coastguard Worker   size_t output_channel_stride;
805*4bdc9457SAndroid Build Coastguard Worker   // Pointer to BILINEAR micro-kernel function.
806*4bdc9457SAndroid Build Coastguard Worker   xnn_ibilinear_chw_ukernel_function ukernel;
807*4bdc9457SAndroid Build Coastguard Worker };
808*4bdc9457SAndroid Build Coastguard Worker 
809*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
810*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_resize_bilinear(
811*4bdc9457SAndroid Build Coastguard Worker       const struct resize_bilinear_context context[restrict XNN_MIN_ELEMENTS(1)],
812*4bdc9457SAndroid Build Coastguard Worker       size_t batch_index,
813*4bdc9457SAndroid Build Coastguard Worker       size_t pixel_start,
814*4bdc9457SAndroid Build Coastguard Worker       size_t pixel_range);
815*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_resize_bilinear_chw(
816*4bdc9457SAndroid Build Coastguard Worker     const struct resize_bilinear_chw_context context[restrict XNN_MIN_ELEMENTS(1)],
817*4bdc9457SAndroid Build Coastguard Worker     size_t batch_index,
818*4bdc9457SAndroid Build Coastguard Worker     size_t pixel_start,
819*4bdc9457SAndroid Build Coastguard Worker     size_t pixel_range);
820*4bdc9457SAndroid Build Coastguard Worker #endif
821*4bdc9457SAndroid Build Coastguard Worker 
822*4bdc9457SAndroid Build Coastguard Worker struct elementwise_binary_context {
823*4bdc9457SAndroid Build Coastguard Worker   const void* a;
824*4bdc9457SAndroid Build Coastguard Worker   size_t a_stride[XNN_MAX_TENSOR_DIMS - 1];
825*4bdc9457SAndroid Build Coastguard Worker   const void* b;
826*4bdc9457SAndroid Build Coastguard Worker   size_t b_stride[XNN_MAX_TENSOR_DIMS - 1];
827*4bdc9457SAndroid Build Coastguard Worker   void* y;
828*4bdc9457SAndroid Build Coastguard Worker   size_t y_stride[XNN_MAX_TENSOR_DIMS - 1];
829*4bdc9457SAndroid Build Coastguard Worker   size_t elements;
830*4bdc9457SAndroid Build Coastguard Worker   union {
831*4bdc9457SAndroid Build Coastguard Worker     union xnn_qs8_add_minmax_params qs8_addsub;
832*4bdc9457SAndroid Build Coastguard Worker     union xnn_qu8_add_minmax_params qu8_addsub;
833*4bdc9457SAndroid Build Coastguard Worker     union xnn_qs8_mul_minmax_params qs8_mul;
834*4bdc9457SAndroid Build Coastguard Worker     union xnn_qu8_mul_minmax_params qu8_mul;
835*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_minmax_params f16;
836*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_minmax_params f32;
837*4bdc9457SAndroid Build Coastguard Worker   } params;
838*4bdc9457SAndroid Build Coastguard Worker   xnn_vbinary_ukernel_function ukernel;
839*4bdc9457SAndroid Build Coastguard Worker };
840*4bdc9457SAndroid Build Coastguard Worker 
841*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
842*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_elementwise_binary_1d(
843*4bdc9457SAndroid Build Coastguard Worker       const struct elementwise_binary_context context[restrict XNN_MIN_ELEMENTS(1)],
844*4bdc9457SAndroid Build Coastguard Worker       size_t i);
845*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_elementwise_binary_2d(
846*4bdc9457SAndroid Build Coastguard Worker       const struct elementwise_binary_context context[restrict XNN_MIN_ELEMENTS(1)],
847*4bdc9457SAndroid Build Coastguard Worker       size_t i, size_t j);
848*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_elementwise_binary_3d(
849*4bdc9457SAndroid Build Coastguard Worker       const struct elementwise_binary_context context[restrict XNN_MIN_ELEMENTS(1)],
850*4bdc9457SAndroid Build Coastguard Worker       size_t i, size_t j, size_t k);
851*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_elementwise_binary_4d(
852*4bdc9457SAndroid Build Coastguard Worker       const struct elementwise_binary_context context[restrict XNN_MIN_ELEMENTS(1)],
853*4bdc9457SAndroid Build Coastguard Worker       size_t i, size_t j, size_t k, size_t l);
854*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_elementwise_binary_5d(
855*4bdc9457SAndroid Build Coastguard Worker       const struct elementwise_binary_context context[restrict XNN_MIN_ELEMENTS(1)],
856*4bdc9457SAndroid Build Coastguard Worker       size_t i, size_t j, size_t k, size_t l, size_t m);
857*4bdc9457SAndroid Build Coastguard Worker #endif
858*4bdc9457SAndroid Build Coastguard Worker 
859*4bdc9457SAndroid Build Coastguard Worker struct channel_shuffle_context {
860*4bdc9457SAndroid Build Coastguard Worker   const void* x;
861*4bdc9457SAndroid Build Coastguard Worker   size_t x_stride;
862*4bdc9457SAndroid Build Coastguard Worker   void* y;
863*4bdc9457SAndroid Build Coastguard Worker   size_t y_stride;
864*4bdc9457SAndroid Build Coastguard Worker   size_t n;
865*4bdc9457SAndroid Build Coastguard Worker   size_t m;
866*4bdc9457SAndroid Build Coastguard Worker   union {
867*4bdc9457SAndroid Build Coastguard Worker     xnn_zipc_ukernel_function fixed_ukernel;
868*4bdc9457SAndroid Build Coastguard Worker     xnn_zipv_ukernel_function variable_ukernel;
869*4bdc9457SAndroid Build Coastguard Worker   };
870*4bdc9457SAndroid Build Coastguard Worker };
871*4bdc9457SAndroid Build Coastguard Worker 
872*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
873*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_channel_shuffle_fixed(
874*4bdc9457SAndroid Build Coastguard Worker       const struct channel_shuffle_context context[restrict XNN_MIN_ELEMENTS(1)],
875*4bdc9457SAndroid Build Coastguard Worker       size_t index);
876*4bdc9457SAndroid Build Coastguard Worker 
877*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_channel_shuffle_variable(
878*4bdc9457SAndroid Build Coastguard Worker       const struct channel_shuffle_context context[restrict XNN_MIN_ELEMENTS(1)],
879*4bdc9457SAndroid Build Coastguard Worker       size_t index);
880*4bdc9457SAndroid Build Coastguard Worker #endif
881*4bdc9457SAndroid Build Coastguard Worker 
882*4bdc9457SAndroid Build Coastguard Worker struct lut_strided_context {
883*4bdc9457SAndroid Build Coastguard Worker   size_t n;
884*4bdc9457SAndroid Build Coastguard Worker   const void* x;
885*4bdc9457SAndroid Build Coastguard Worker   size_t x_stride;
886*4bdc9457SAndroid Build Coastguard Worker   const void* t;
887*4bdc9457SAndroid Build Coastguard Worker   void* y;
888*4bdc9457SAndroid Build Coastguard Worker   size_t y_stride;
889*4bdc9457SAndroid Build Coastguard Worker   xnn_x8_lut_ukernel_function ukernel;
890*4bdc9457SAndroid Build Coastguard Worker };
891*4bdc9457SAndroid Build Coastguard Worker 
892*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
893*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_lut_strided(
894*4bdc9457SAndroid Build Coastguard Worker       const struct lut_strided_context context[restrict XNN_MIN_ELEMENTS(1)],
895*4bdc9457SAndroid Build Coastguard Worker       size_t batch_index);
896*4bdc9457SAndroid Build Coastguard Worker #endif
897*4bdc9457SAndroid Build Coastguard Worker 
898*4bdc9457SAndroid Build Coastguard Worker struct lut_contiguous_context {
899*4bdc9457SAndroid Build Coastguard Worker   const void* x;
900*4bdc9457SAndroid Build Coastguard Worker   size_t x_stride;
901*4bdc9457SAndroid Build Coastguard Worker   const void* t;
902*4bdc9457SAndroid Build Coastguard Worker   void* y;
903*4bdc9457SAndroid Build Coastguard Worker   size_t y_stride;
904*4bdc9457SAndroid Build Coastguard Worker   xnn_x8_lut_ukernel_function ukernel;
905*4bdc9457SAndroid Build Coastguard Worker };
906*4bdc9457SAndroid Build Coastguard Worker 
907*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
908*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_lut_contiguous(
909*4bdc9457SAndroid Build Coastguard Worker       const struct lut_contiguous_context context[restrict XNN_MIN_ELEMENTS(1)],
910*4bdc9457SAndroid Build Coastguard Worker       size_t offset,
911*4bdc9457SAndroid Build Coastguard Worker       size_t size);
912*4bdc9457SAndroid Build Coastguard Worker #endif
913*4bdc9457SAndroid Build Coastguard Worker 
914*4bdc9457SAndroid Build Coastguard Worker struct univector_strided_context {
915*4bdc9457SAndroid Build Coastguard Worker   size_t n;
916*4bdc9457SAndroid Build Coastguard Worker   const void* x;
917*4bdc9457SAndroid Build Coastguard Worker   size_t x_stride;
918*4bdc9457SAndroid Build Coastguard Worker   void* y;
919*4bdc9457SAndroid Build Coastguard Worker   size_t y_stride;
920*4bdc9457SAndroid Build Coastguard Worker   xnn_vunary_ukernel_function ukernel;
921*4bdc9457SAndroid Build Coastguard Worker   union {
922*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_abs_params f16_abs;
923*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_default_params f16_default;
924*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_f32_cvt_params f16_f32_cvt;
925*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_hswish_params f16_hswish;
926*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_lrelu_params f16_lrelu;
927*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_minmax_params f16_minmax;
928*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_neg_params f16_neg;
929*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_sigmoid_params f16_sigmoid;
930*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_abs_params f32_abs;
931*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_default_params f32_default;
932*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_elu_params f32_elu;
933*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_f16_cvt_params f32_f16_cvt;
934*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_hswish_params f32_hswish;
935*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_lrelu_params f32_lrelu;
936*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_minmax_params f32_minmax;
937*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_neg_params f32_neg;
938*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_qs8_cvt_params f32_qs8_cvt;
939*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_qu8_cvt_params f32_qu8_cvt;
940*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_rnd_params f32_rnd;
941*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_sigmoid_params f32_sigmoid;
942*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_sqrt_params f32_sqrt;
943*4bdc9457SAndroid Build Coastguard Worker     union xnn_qs8_cvt_params qs8_cvt;
944*4bdc9457SAndroid Build Coastguard Worker     union xnn_qs8_f32_cvt_params qs8_f32_cvt;
945*4bdc9457SAndroid Build Coastguard Worker     union xnn_qs8_lrelu_params qs8_lrelu;
946*4bdc9457SAndroid Build Coastguard Worker     union xnn_qu8_cvt_params qu8_cvt;
947*4bdc9457SAndroid Build Coastguard Worker     union xnn_qu8_f32_cvt_params qu8_f32_cvt;
948*4bdc9457SAndroid Build Coastguard Worker     union xnn_qu8_lrelu_params qu8_lrelu;
949*4bdc9457SAndroid Build Coastguard Worker     union xnn_s8_minmax_params s8_minmax;
950*4bdc9457SAndroid Build Coastguard Worker     union xnn_u8_minmax_params u8_minmax;
951*4bdc9457SAndroid Build Coastguard Worker   } params;
952*4bdc9457SAndroid Build Coastguard Worker };
953*4bdc9457SAndroid Build Coastguard Worker 
954*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
955*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_univector_strided(
956*4bdc9457SAndroid Build Coastguard Worker       const struct univector_strided_context context[restrict XNN_MIN_ELEMENTS(1)],
957*4bdc9457SAndroid Build Coastguard Worker       size_t batch_index,
958*4bdc9457SAndroid Build Coastguard Worker       size_t batch_range);
959*4bdc9457SAndroid Build Coastguard Worker #endif
960*4bdc9457SAndroid Build Coastguard Worker 
961*4bdc9457SAndroid Build Coastguard Worker struct univector_contiguous_context {
962*4bdc9457SAndroid Build Coastguard Worker   const void* x;
963*4bdc9457SAndroid Build Coastguard Worker   void* y;
964*4bdc9457SAndroid Build Coastguard Worker   uint16_t log2_xsize;
965*4bdc9457SAndroid Build Coastguard Worker   uint16_t log2_ysize;
966*4bdc9457SAndroid Build Coastguard Worker   xnn_vunary_ukernel_function ukernel;
967*4bdc9457SAndroid Build Coastguard Worker   union {
968*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_abs_params f16_abs;
969*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_default_params f16_default;
970*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_f32_cvt_params f16_f32_cvt;
971*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_hswish_params f16_hswish;
972*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_lrelu_params f16_lrelu;
973*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_minmax_params f16_minmax;
974*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_neg_params f16_neg;
975*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_sigmoid_params f16_sigmoid;
976*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_abs_params f32_abs;
977*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_default_params f32_default;
978*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_elu_params f32_elu;
979*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_f16_cvt_params f32_f16_cvt;
980*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_hswish_params f32_hswish;
981*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_lrelu_params f32_lrelu;
982*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_minmax_params f32_minmax;
983*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_neg_params f32_neg;
984*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_qs8_cvt_params f32_qs8_cvt;
985*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_qu8_cvt_params f32_qu8_cvt;
986*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_rnd_params f32_rnd;
987*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_sigmoid_params f32_sigmoid;
988*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_sqrt_params f32_sqrt;
989*4bdc9457SAndroid Build Coastguard Worker     union xnn_qs8_cvt_params qs8_cvt;
990*4bdc9457SAndroid Build Coastguard Worker     union xnn_qs8_f32_cvt_params qs8_f32_cvt;
991*4bdc9457SAndroid Build Coastguard Worker     union xnn_qs8_lrelu_params qs8_lrelu;
992*4bdc9457SAndroid Build Coastguard Worker     union xnn_qu8_cvt_params qu8_cvt;
993*4bdc9457SAndroid Build Coastguard Worker     union xnn_qu8_f32_cvt_params qu8_f32_cvt;
994*4bdc9457SAndroid Build Coastguard Worker     union xnn_qu8_lrelu_params qu8_lrelu;
995*4bdc9457SAndroid Build Coastguard Worker     union xnn_s8_minmax_params s8_minmax;
996*4bdc9457SAndroid Build Coastguard Worker     union xnn_u8_minmax_params u8_minmax;
997*4bdc9457SAndroid Build Coastguard Worker   } params;
998*4bdc9457SAndroid Build Coastguard Worker };
999*4bdc9457SAndroid Build Coastguard Worker 
1000*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
1001*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_univector_contiguous(
1002*4bdc9457SAndroid Build Coastguard Worker       const struct univector_contiguous_context context[restrict XNN_MIN_ELEMENTS(1)],
1003*4bdc9457SAndroid Build Coastguard Worker       size_t offset,
1004*4bdc9457SAndroid Build Coastguard Worker       size_t size);
1005*4bdc9457SAndroid Build Coastguard Worker #endif
1006*4bdc9457SAndroid Build Coastguard Worker 
1007*4bdc9457SAndroid Build Coastguard Worker struct prelu_context {
1008*4bdc9457SAndroid Build Coastguard Worker   size_t n;
1009*4bdc9457SAndroid Build Coastguard Worker   const void* x;
1010*4bdc9457SAndroid Build Coastguard Worker   size_t x_stride;
1011*4bdc9457SAndroid Build Coastguard Worker   const void* w;
1012*4bdc9457SAndroid Build Coastguard Worker   void* y;
1013*4bdc9457SAndroid Build Coastguard Worker   size_t y_stride;
1014*4bdc9457SAndroid Build Coastguard Worker   xnn_prelu_ukernel_function ukernel;
1015*4bdc9457SAndroid Build Coastguard Worker };
1016*4bdc9457SAndroid Build Coastguard Worker 
1017*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
1018*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_prelu(
1019*4bdc9457SAndroid Build Coastguard Worker       const struct prelu_context context[restrict XNN_MIN_ELEMENTS(1)],
1020*4bdc9457SAndroid Build Coastguard Worker       size_t batch_start,
1021*4bdc9457SAndroid Build Coastguard Worker       size_t batch_range);
1022*4bdc9457SAndroid Build Coastguard Worker #endif
1023*4bdc9457SAndroid Build Coastguard Worker 
1024*4bdc9457SAndroid Build Coastguard Worker struct vmulcaddc_context {
1025*4bdc9457SAndroid Build Coastguard Worker   size_t n;
1026*4bdc9457SAndroid Build Coastguard Worker   const void* x;
1027*4bdc9457SAndroid Build Coastguard Worker   size_t x_stride;
1028*4bdc9457SAndroid Build Coastguard Worker   const void* w;
1029*4bdc9457SAndroid Build Coastguard Worker   void* y;
1030*4bdc9457SAndroid Build Coastguard Worker   size_t y_stride;
1031*4bdc9457SAndroid Build Coastguard Worker   xnn_vmulcaddc_ukernel_function ukernel;
1032*4bdc9457SAndroid Build Coastguard Worker   union {
1033*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_minmax_params f16;
1034*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_minmax_params f32;
1035*4bdc9457SAndroid Build Coastguard Worker   } params;
1036*4bdc9457SAndroid Build Coastguard Worker };
1037*4bdc9457SAndroid Build Coastguard Worker 
1038*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
1039*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_vmulcaddc(
1040*4bdc9457SAndroid Build Coastguard Worker       const struct vmulcaddc_context context[restrict XNN_MIN_ELEMENTS(1)],
1041*4bdc9457SAndroid Build Coastguard Worker       size_t batch_start,
1042*4bdc9457SAndroid Build Coastguard Worker       size_t batch_size);
1043*4bdc9457SAndroid Build Coastguard Worker #endif
1044*4bdc9457SAndroid Build Coastguard Worker 
1045*4bdc9457SAndroid Build Coastguard Worker struct pad_context {
1046*4bdc9457SAndroid Build Coastguard Worker   const void* input;
1047*4bdc9457SAndroid Build Coastguard Worker   size_t input_stride[XNN_MAX_TENSOR_DIMS - 1];
1048*4bdc9457SAndroid Build Coastguard Worker   void* output;
1049*4bdc9457SAndroid Build Coastguard Worker   size_t output_stride[XNN_MAX_TENSOR_DIMS - 1];
1050*4bdc9457SAndroid Build Coastguard Worker   size_t pre_paddings[XNN_MAX_TENSOR_DIMS];
1051*4bdc9457SAndroid Build Coastguard Worker   size_t post_paddings[1];
1052*4bdc9457SAndroid Build Coastguard Worker   size_t input_size[XNN_MAX_TENSOR_DIMS];
1053*4bdc9457SAndroid Build Coastguard Worker   size_t output_size[1];
1054*4bdc9457SAndroid Build Coastguard Worker   uint32_t padding_value;
1055*4bdc9457SAndroid Build Coastguard Worker   xnn_pad_ukernel_function pad_ukernel;
1056*4bdc9457SAndroid Build Coastguard Worker   xnn_fill_ukernel_function fill_ukernel;
1057*4bdc9457SAndroid Build Coastguard Worker };
1058*4bdc9457SAndroid Build Coastguard Worker 
1059*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
1060*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_pad_5d(
1061*4bdc9457SAndroid Build Coastguard Worker       const struct pad_context context[restrict XNN_MIN_ELEMENTS(1)],
1062*4bdc9457SAndroid Build Coastguard Worker       size_t i, size_t j, size_t k, size_t l, size_t m);
1063*4bdc9457SAndroid Build Coastguard Worker #endif
1064*4bdc9457SAndroid Build Coastguard Worker 
1065*4bdc9457SAndroid Build Coastguard Worker struct u8_softmax_context {
1066*4bdc9457SAndroid Build Coastguard Worker   size_t n;
1067*4bdc9457SAndroid Build Coastguard Worker   const uint8_t* x;
1068*4bdc9457SAndroid Build Coastguard Worker   size_t x_stride;
1069*4bdc9457SAndroid Build Coastguard Worker   const uint32_t* t;
1070*4bdc9457SAndroid Build Coastguard Worker   uint8_t* y;
1071*4bdc9457SAndroid Build Coastguard Worker   size_t y_stride;
1072*4bdc9457SAndroid Build Coastguard Worker   xnn_u8_rmax_ukernel_function rmax_ukernel;
1073*4bdc9457SAndroid Build Coastguard Worker   xnn_u8_lut32norm_ukernel_function lut_norm_ukernel;
1074*4bdc9457SAndroid Build Coastguard Worker };
1075*4bdc9457SAndroid Build Coastguard Worker 
1076*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
1077*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_u8_softmax(
1078*4bdc9457SAndroid Build Coastguard Worker       const struct u8_softmax_context context[restrict XNN_MIN_ELEMENTS(1)],
1079*4bdc9457SAndroid Build Coastguard Worker       size_t batch_index);
1080*4bdc9457SAndroid Build Coastguard Worker #endif
1081*4bdc9457SAndroid Build Coastguard Worker 
1082*4bdc9457SAndroid Build Coastguard Worker typedef void (*xnn_compute_reciprocal_function)(const void* input, void* output);
1083*4bdc9457SAndroid Build Coastguard Worker 
1084*4bdc9457SAndroid Build Coastguard Worker struct floating_point_softmax_context {
1085*4bdc9457SAndroid Build Coastguard Worker   size_t n;
1086*4bdc9457SAndroid Build Coastguard Worker   const void* x;
1087*4bdc9457SAndroid Build Coastguard Worker   size_t x_stride;
1088*4bdc9457SAndroid Build Coastguard Worker   void* y;
1089*4bdc9457SAndroid Build Coastguard Worker   size_t y_stride;
1090*4bdc9457SAndroid Build Coastguard Worker   xnn_rmax_ukernel_function rmax_ukernel;
1091*4bdc9457SAndroid Build Coastguard Worker   xnn_raddstoreexpminusmax_ukernel_function raddstoreexpminusmax_ukernel;
1092*4bdc9457SAndroid Build Coastguard Worker   xnn_compute_reciprocal_function compute_reciprocal;
1093*4bdc9457SAndroid Build Coastguard Worker   xnn_vbinary_ukernel_function vmulc_ukernel;
1094*4bdc9457SAndroid Build Coastguard Worker   union {
1095*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_minmax_params f16;
1096*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_minmax_params f32;
1097*4bdc9457SAndroid Build Coastguard Worker   } minmax_params;
1098*4bdc9457SAndroid Build Coastguard Worker   union {
1099*4bdc9457SAndroid Build Coastguard Worker     union xnn_f16_expminus_params f16;
1100*4bdc9457SAndroid Build Coastguard Worker     union xnn_f32_expminus_params f32;
1101*4bdc9457SAndroid Build Coastguard Worker   } expminus_params;
1102*4bdc9457SAndroid Build Coastguard Worker };
1103*4bdc9457SAndroid Build Coastguard Worker 
1104*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
1105*4bdc9457SAndroid Build Coastguard Worker   XNN_PRIVATE void xnn_compute_floating_point_softmax(
1106*4bdc9457SAndroid Build Coastguard Worker       const struct floating_point_softmax_context context[restrict XNN_MIN_ELEMENTS(1)],
1107*4bdc9457SAndroid Build Coastguard Worker       size_t batch_index);
1108*4bdc9457SAndroid Build Coastguard Worker #endif
1109