xref: /aosp_15_r20/external/XNNPACK/src/init.c (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1*4bdc9457SAndroid Build Coastguard Worker // Copyright (c) Facebook, Inc. and its affiliates.
2*4bdc9457SAndroid Build Coastguard Worker // All rights reserved.
3*4bdc9457SAndroid Build Coastguard Worker //
4*4bdc9457SAndroid Build Coastguard Worker // Copyright 2019 Google LLC
5*4bdc9457SAndroid Build Coastguard Worker //
6*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the
7*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree.
8*4bdc9457SAndroid Build Coastguard Worker 
9*4bdc9457SAndroid Build Coastguard Worker #include <math.h>
10*4bdc9457SAndroid Build Coastguard Worker #include <stdbool.h>
11*4bdc9457SAndroid Build Coastguard Worker #include <stddef.h>
12*4bdc9457SAndroid Build Coastguard Worker #include <stdint.h>
13*4bdc9457SAndroid Build Coastguard Worker #include <string.h>
14*4bdc9457SAndroid Build Coastguard Worker 
15*4bdc9457SAndroid Build Coastguard Worker #ifdef _WIN32
16*4bdc9457SAndroid Build Coastguard Worker   #include <windows.h>
17*4bdc9457SAndroid Build Coastguard Worker #else
18*4bdc9457SAndroid Build Coastguard Worker   #include <errno.h>
19*4bdc9457SAndroid Build Coastguard Worker   #include <pthread.h>
20*4bdc9457SAndroid Build Coastguard Worker   #include <sys/mman.h>
21*4bdc9457SAndroid Build Coastguard Worker   #include <unistd.h>
22*4bdc9457SAndroid Build Coastguard Worker #endif
23*4bdc9457SAndroid Build Coastguard Worker 
24*4bdc9457SAndroid Build Coastguard Worker #ifdef _MSC_VER
25*4bdc9457SAndroid Build Coastguard Worker   #include <intrin.h>
26*4bdc9457SAndroid Build Coastguard Worker #endif
27*4bdc9457SAndroid Build Coastguard Worker 
28*4bdc9457SAndroid Build Coastguard Worker #ifndef __EMSCRIPTEN__
29*4bdc9457SAndroid Build Coastguard Worker   #include <cpuinfo.h>
30*4bdc9457SAndroid Build Coastguard Worker #endif
31*4bdc9457SAndroid Build Coastguard Worker 
32*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack.h>
33*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/allocator.h>
34*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/argmaxpool.h>
35*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/avgpool.h>
36*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/common.h>
37*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/conv.h>
38*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/dwconv.h>
39*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/gavgpool.h>
40*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/gemm.h>
41*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/fill.h>
42*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/ibilinear.h>
43*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/igemm.h>
44*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/log.h>
45*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/lut.h>
46*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/maxpool.h>
47*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/pad.h>
48*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/params.h>
49*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/microparams-init.h>
50*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/pavgpool.h>
51*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/prelu.h>
52*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/raddstoreexpminusmax.h>
53*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/rmax.h>
54*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/spmm.h>
55*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/unpool.h>
56*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/transpose.h>
57*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/vadd.h>
58*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/vbinary.h>
59*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/vcvt.h>
60*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/vlrelu.h>
61*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/vmul.h>
62*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/vmulcaddc.h>
63*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/vunary.h>
64*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/zip.h>
65*4bdc9457SAndroid Build Coastguard Worker 
66*4bdc9457SAndroid Build Coastguard Worker #ifndef XNN_ENABLE_ASSEMBLY
67*4bdc9457SAndroid Build Coastguard Worker   #define XNN_ENABLE_ASSEMBLY 1
68*4bdc9457SAndroid Build Coastguard Worker #endif
69*4bdc9457SAndroid Build Coastguard Worker 
70*4bdc9457SAndroid Build Coastguard Worker #if XNN_PLATFORM_WINDOWS
71*4bdc9457SAndroid Build Coastguard Worker   static INIT_ONCE init_guard = INIT_ONCE_STATIC_INIT;
72*4bdc9457SAndroid Build Coastguard Worker #else
73*4bdc9457SAndroid Build Coastguard Worker   static pthread_once_t init_guard = PTHREAD_ONCE_INIT;
74*4bdc9457SAndroid Build Coastguard Worker #endif
75*4bdc9457SAndroid Build Coastguard Worker 
76*4bdc9457SAndroid Build Coastguard Worker #define XNN_MR_TO_INDEX(MR) (MR-1)
77*4bdc9457SAndroid Build Coastguard Worker 
78*4bdc9457SAndroid Build Coastguard Worker #ifndef XNN_ENABLE_GEMM_M_SPECIALIZATION
79*4bdc9457SAndroid Build Coastguard Worker #error "XNN_ENABLE_GEMM_M_SPECIALIZATION is not defined"
80*4bdc9457SAndroid Build Coastguard Worker #endif
81*4bdc9457SAndroid Build Coastguard Worker 
82*4bdc9457SAndroid Build Coastguard Worker static const struct xnn_allocator* volatile init_allocator = NULL;
83*4bdc9457SAndroid Build Coastguard Worker 
init(void)84*4bdc9457SAndroid Build Coastguard Worker static void init(void) {
85*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
86*4bdc9457SAndroid Build Coastguard Worker   // Unlike most other architectures, on x86/x86-64 when floating-point instructions
87*4bdc9457SAndroid Build Coastguard Worker   // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
88*4bdc9457SAndroid Build Coastguard Worker   // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
89*4bdc9457SAndroid Build Coastguard Worker   // of two infinities (must produce NaN per IEEE 754 standard).
90*4bdc9457SAndroid Build Coastguard Worker   static const volatile float inf = INFINITY;
91*4bdc9457SAndroid Build Coastguard Worker   const bool is_wasm_x86 = signbit(inf - inf);
92*4bdc9457SAndroid Build Coastguard Worker #endif
93*4bdc9457SAndroid Build Coastguard Worker   uint32_t init_flags = XNN_INIT_FLAG_XNNPACK;
94*4bdc9457SAndroid Build Coastguard Worker 
95*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM
96*4bdc9457SAndroid Build Coastguard Worker   #if XNN_PLATFORM_MOBILE
97*4bdc9457SAndroid Build Coastguard Worker     if (!cpuinfo_has_arm_neon()) {
98*4bdc9457SAndroid Build Coastguard Worker       xnn_log_error("XNNPACK initialization failed: NEON is not supported");
99*4bdc9457SAndroid Build Coastguard Worker       return;
100*4bdc9457SAndroid Build Coastguard Worker     }
101*4bdc9457SAndroid Build Coastguard Worker   #else
102*4bdc9457SAndroid Build Coastguard Worker     if (!cpuinfo_has_arm_v6()) {
103*4bdc9457SAndroid Build Coastguard Worker       xnn_log_error("XNNPACK initialization failed: ARMv6 instructions not supported");
104*4bdc9457SAndroid Build Coastguard Worker       return;
105*4bdc9457SAndroid Build Coastguard Worker     }
106*4bdc9457SAndroid Build Coastguard Worker 
107*4bdc9457SAndroid Build Coastguard Worker     if (!cpuinfo_has_arm_vfpv2() && !cpuinfo_has_arm_vfpv3()) {
108*4bdc9457SAndroid Build Coastguard Worker       xnn_log_error("XNNPACK initialization failed: VFP is not supported");
109*4bdc9457SAndroid Build Coastguard Worker       return;
110*4bdc9457SAndroid Build Coastguard Worker     }
111*4bdc9457SAndroid Build Coastguard Worker   #endif
112*4bdc9457SAndroid Build Coastguard Worker 
113*4bdc9457SAndroid Build Coastguard Worker   if (cpuinfo_has_arm_neon()) {
114*4bdc9457SAndroid Build Coastguard Worker     /**************************** QC8 AArch32 micro-kernels ****************************/
115*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_QC8_OPERATORS
116*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_QC8;
117*4bdc9457SAndroid Build Coastguard Worker 
118*4bdc9457SAndroid Build Coastguard Worker       #if XNN_ENABLE_ASSEMBLY
119*4bdc9457SAndroid Build Coastguard Worker         if (XNN_ENABLE_ARM_DOTPROD && cpuinfo_has_arm_neon_dot()) {
120*4bdc9457SAndroid Build Coastguard Worker           #if XNN_ENABLE_ARM_DOTPROD
121*4bdc9457SAndroid Build Coastguard Worker             switch (cpuinfo_get_uarch(0)->uarch) {
122*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_a55:
123*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55);
124*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55);
125*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot);
126*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot);
127*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
128*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.mr = 4;
129*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.nr = 8;
130*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.log2_kr = 2;
131*4bdc9457SAndroid Build Coastguard Worker                 break;
132*4bdc9457SAndroid Build Coastguard Worker               default:
133*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64);
134*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64);
135*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot);
136*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot);
137*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
138*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.mr = 4;
139*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.nr = 8;
140*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.log2_kr = 2;
141*4bdc9457SAndroid Build Coastguard Worker                 break;
142*4bdc9457SAndroid Build Coastguard Worker             }
143*4bdc9457SAndroid Build Coastguard Worker           #endif  // XNN_ENABLE_ARM_DOTPROD
144*4bdc9457SAndroid Build Coastguard Worker         } else {
145*4bdc9457SAndroid Build Coastguard Worker           switch (cpuinfo_get_uarch(0)->uarch) {
146*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a5:
147*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a7:
148*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_krait:
149*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_kryo:
150*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7);
151*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7);
152*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7);
153*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7);
154*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_neon_params;
155*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.mr = 4;
156*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.nr = 8;
157*4bdc9457SAndroid Build Coastguard Worker               break;
158*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a32:
159*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a35:
160*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35);
161*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35);
162*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35);
163*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35);
164*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
165*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.mr = 4;
166*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.nr = 8;
167*4bdc9457SAndroid Build Coastguard Worker               break;
168*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a53:
169*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a57:
170*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53);
171*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53);
172*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35);
173*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35);
174*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
175*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.mr = 4;
176*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.nr = 8;
177*4bdc9457SAndroid Build Coastguard Worker               break;
178*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a55r0:
179*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53);
180*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53);
181*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35);
182*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35);
183*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
184*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.mr = 4;
185*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.nr = 8;
186*4bdc9457SAndroid Build Coastguard Worker               break;
187*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a72:
188*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal);
189*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal);
190*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal);
191*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal);
192*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
193*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.mr = 2;
194*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.nr = 8;
195*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.log2_kr = 1;
196*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.log2_sr = 2;
197*4bdc9457SAndroid Build Coastguard Worker               break;
198*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_exynos_m1:
199*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_exynos_m2:
200*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_exynos_m3:
201*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64);
202*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64);
203*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35);
204*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35);
205*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
206*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.mr = 4;
207*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.nr = 8;
208*4bdc9457SAndroid Build Coastguard Worker               break;
209*4bdc9457SAndroid Build Coastguard Worker 
210*4bdc9457SAndroid Build Coastguard Worker             default:
211*4bdc9457SAndroid Build Coastguard Worker               if (cpuinfo_has_arm_neon_v8()) {
212*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64);
213*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64);
214*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35);
215*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35);
216*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
217*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.mr = 4;
218*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.nr = 8;
219*4bdc9457SAndroid Build Coastguard Worker               } else {
220*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64);
221*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64);
222*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7);
223*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7);
224*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_neon_params;
225*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.mr = 4;
226*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.nr = 8;
227*4bdc9457SAndroid Build Coastguard Worker               }
228*4bdc9457SAndroid Build Coastguard Worker               break;
229*4bdc9457SAndroid Build Coastguard Worker           }
230*4bdc9457SAndroid Build Coastguard Worker         }
231*4bdc9457SAndroid Build Coastguard Worker         #if XNN_MAX_UARCH_TYPES > 1
232*4bdc9457SAndroid Build Coastguard Worker         {
233*4bdc9457SAndroid Build Coastguard Worker           /* Choose micro-kernels for little cores according to micro-kernel specification for the big core */
234*4bdc9457SAndroid Build Coastguard Worker           const uint32_t mr = xnn_params.qc8.gemm.mr;
235*4bdc9457SAndroid Build Coastguard Worker           const uint32_t nr = xnn_params.qc8.gemm.nr;
236*4bdc9457SAndroid Build Coastguard Worker           const uint32_t log2_kr = xnn_params.qc8.gemm.log2_kr;
237*4bdc9457SAndroid Build Coastguard Worker           for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
238*4bdc9457SAndroid Build Coastguard Worker             const struct cpuinfo_uarch_info* uarch_info = cpuinfo_get_uarch(i);
239*4bdc9457SAndroid Build Coastguard Worker             if (uarch_info == NULL) {
240*4bdc9457SAndroid Build Coastguard Worker               /* No more microarchitectures in the system */
241*4bdc9457SAndroid Build Coastguard Worker               break;
242*4bdc9457SAndroid Build Coastguard Worker             }
243*4bdc9457SAndroid Build Coastguard Worker 
244*4bdc9457SAndroid Build Coastguard Worker             switch (uarch_info->uarch) {
245*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_a55:
246*4bdc9457SAndroid Build Coastguard Worker                 #if XNN_ENABLE_ARM_DOTPROD
247*4bdc9457SAndroid Build Coastguard Worker                   if (mr == 4 && nr == 8 && log2_kr == 2 && cpuinfo_has_arm_neon_dot()) {
248*4bdc9457SAndroid Build Coastguard Worker                     xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55;
249*4bdc9457SAndroid Build Coastguard Worker                     xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55;
250*4bdc9457SAndroid Build Coastguard Worker                     xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot;
251*4bdc9457SAndroid Build Coastguard Worker                     xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot;
252*4bdc9457SAndroid Build Coastguard Worker                   }
253*4bdc9457SAndroid Build Coastguard Worker                 #endif  // XNN_ENABLE_ARM_DOTPROD
254*4bdc9457SAndroid Build Coastguard Worker                 break;
255*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_a53:
256*4bdc9457SAndroid Build Coastguard Worker                 if (mr == 4 && nr == 8 && log2_kr == 0) {
257*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53;
258*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53;
259*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35;
260*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35;
261*4bdc9457SAndroid Build Coastguard Worker                 }
262*4bdc9457SAndroid Build Coastguard Worker                 break;
263*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_a55r0:
264*4bdc9457SAndroid Build Coastguard Worker                 if (mr == 4 && nr == 8 && log2_kr == 0) {
265*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53;
266*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53;
267*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35;
268*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35;
269*4bdc9457SAndroid Build Coastguard Worker                 }
270*4bdc9457SAndroid Build Coastguard Worker                 break;
271*4bdc9457SAndroid Build Coastguard Worker 
272*4bdc9457SAndroid Build Coastguard Worker               default:
273*4bdc9457SAndroid Build Coastguard Worker                 break;
274*4bdc9457SAndroid Build Coastguard Worker             }
275*4bdc9457SAndroid Build Coastguard Worker           }
276*4bdc9457SAndroid Build Coastguard Worker         }
277*4bdc9457SAndroid Build Coastguard Worker         #endif  // XNN_MAX_UARCH_TYPES > 1
278*4bdc9457SAndroid Build Coastguard Worker       #else  // XNN_ENABLE_ASSEMBLY
279*4bdc9457SAndroid Build Coastguard Worker         if (XNN_ENABLE_ARM_DOTPROD && cpuinfo_has_arm_neon_dot()) {
280*4bdc9457SAndroid Build Coastguard Worker           #if XNN_ENABLE_ARM_DOTPROD
281*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot);
282*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot);
283*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot);
284*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot);
285*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
286*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.mr = 4;
287*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.nr = 8;
288*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.log2_kr = 2;
289*4bdc9457SAndroid Build Coastguard Worker           #endif  // XNN_ENABLE_ARM_DOTPROD
290*4bdc9457SAndroid Build Coastguard Worker         } else if (cpuinfo_has_arm_v8()) {
291*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal);
292*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal);
293*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal);
294*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal);
295*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
296*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.mr = 2;
297*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.nr = 8;
298*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.log2_kr = 1;
299*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.log2_sr = 2;
300*4bdc9457SAndroid Build Coastguard Worker         } else {
301*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal);
302*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal);
303*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal);
304*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal);
305*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_neon_params;
306*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.mr = 2;
307*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.nr = 8;
308*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.log2_kr = 1;
309*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.log2_sr = 2;
310*4bdc9457SAndroid Build Coastguard Worker         }
311*4bdc9457SAndroid Build Coastguard Worker       #endif  // XNN_ENABLE_ASSEMBLY
312*4bdc9457SAndroid Build Coastguard Worker 
313*4bdc9457SAndroid Build Coastguard Worker       if (cpuinfo_has_arm_neon_v8()) {
314*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qc8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__aarch32_neonv8_mla8_cortex_a35;
315*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qc8.dwconv[0].init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
316*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qc8.dwconv[0].channel_tile = 16;
317*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qc8.dwconv[0].primary_tile = 3;
318*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qc8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64;
319*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qc8.dwconv[1].init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
320*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qc8.dwconv[1].channel_tile = 16;
321*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qc8.dwconv[1].primary_tile = 9;
322*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qc8.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mla8_ld64;
323*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qc8.dwconv[2].init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
324*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qc8.dwconv[2].channel_tile = 8;
325*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qc8.dwconv[2].primary_tile = 25;
326*4bdc9457SAndroid Build Coastguard Worker       } else {
327*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qc8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128;
328*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qc8.dwconv[0].init.qc8 = xnn_init_qc8_conv_minmax_fp32_neon_params;
329*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qc8.dwconv[0].channel_tile = 16;
330*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qc8.dwconv[0].primary_tile = 3;
331*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qc8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neon_mla8_ld64;
332*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qc8.dwconv[1].init.qc8 = xnn_init_qc8_conv_minmax_fp32_neon_params;
333*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qc8.dwconv[1].channel_tile = 16;
334*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qc8.dwconv[1].primary_tile = 9;
335*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qc8.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mla8_ld64;
336*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qc8.dwconv[2].init.qc8 = xnn_init_qc8_conv_minmax_fp32_neon_params;
337*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qc8.dwconv[2].channel_tile = 8;
338*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qc8.dwconv[2].primary_tile = 25;
339*4bdc9457SAndroid Build Coastguard Worker       }
340*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_QC8_OPERATORS
341*4bdc9457SAndroid Build Coastguard Worker 
342*4bdc9457SAndroid Build Coastguard Worker     /**************************** QS8 AArch32 micro-kernels ****************************/
343*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_QS8_OPERATORS
344*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_QS8;
345*4bdc9457SAndroid Build Coastguard Worker 
346*4bdc9457SAndroid Build Coastguard Worker       #if XNN_ENABLE_ASSEMBLY
347*4bdc9457SAndroid Build Coastguard Worker         if (XNN_ENABLE_ARM_DOTPROD && cpuinfo_has_arm_neon_dot()) {
348*4bdc9457SAndroid Build Coastguard Worker           #if XNN_ENABLE_ARM_DOTPROD
349*4bdc9457SAndroid Build Coastguard Worker             switch (cpuinfo_get_uarch(0)->uarch) {
350*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_a55:
351*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55);
352*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55);
353*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neondot);
354*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot);
355*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
356*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qs8.gemm.mr = 4;
357*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qs8.gemm.nr = 8;
358*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qs8.gemm.log2_kr = 2;
359*4bdc9457SAndroid Build Coastguard Worker                 break;
360*4bdc9457SAndroid Build Coastguard Worker               default:
361*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64);
362*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64);
363*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neondot);
364*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot);
365*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
366*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qs8.gemm.mr = 4;
367*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qs8.gemm.nr = 8;
368*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qs8.gemm.log2_kr = 2;
369*4bdc9457SAndroid Build Coastguard Worker                 break;
370*4bdc9457SAndroid Build Coastguard Worker             }
371*4bdc9457SAndroid Build Coastguard Worker           #endif  // XNN_ENABLE_ARM_DOTPROD
372*4bdc9457SAndroid Build Coastguard Worker         } else {
373*4bdc9457SAndroid Build Coastguard Worker           switch (cpuinfo_get_uarch(0)->uarch) {
374*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a5:
375*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a7:
376*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_krait:
377*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_kryo:
378*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7);
379*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7);
380*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7);
381*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7);
382*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
383*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.mr = 4;
384*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.nr = 8;
385*4bdc9457SAndroid Build Coastguard Worker               break;
386*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a32:
387*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a35:
388*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7);
389*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7);
390*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7);
391*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7);
392*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
393*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.mr = 4;
394*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.nr = 8;
395*4bdc9457SAndroid Build Coastguard Worker               break;
396*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a53:
397*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a57:
398*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53);
399*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53);
400*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7);
401*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7);
402*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
403*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.mr = 4;
404*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.nr = 8;
405*4bdc9457SAndroid Build Coastguard Worker               break;
406*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a55r0:
407*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53);
408*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53);
409*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7);
410*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7);
411*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
412*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.mr = 4;
413*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.nr = 8;
414*4bdc9457SAndroid Build Coastguard Worker               break;
415*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a72:
416*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal);
417*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal);
418*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal);
419*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal);
420*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
421*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.mr = 2;
422*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.nr = 8;
423*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.log2_kr = 1;
424*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.log2_sr = 2;
425*4bdc9457SAndroid Build Coastguard Worker               break;
426*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_exynos_m1:
427*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_exynos_m2:
428*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_exynos_m3:
429*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64);
430*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64);
431*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7);
432*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7);
433*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
434*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.mr = 4;
435*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.nr = 8;
436*4bdc9457SAndroid Build Coastguard Worker               break;
437*4bdc9457SAndroid Build Coastguard Worker             default:
438*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64);
439*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64);
440*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7);
441*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7);
442*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
443*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.mr = 4;
444*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.nr = 8;
445*4bdc9457SAndroid Build Coastguard Worker               break;
446*4bdc9457SAndroid Build Coastguard Worker           }
447*4bdc9457SAndroid Build Coastguard Worker         }
448*4bdc9457SAndroid Build Coastguard Worker         #if XNN_MAX_UARCH_TYPES > 1
449*4bdc9457SAndroid Build Coastguard Worker         {
450*4bdc9457SAndroid Build Coastguard Worker           /* Choose micro-kernels for little cores according to micro-kernel specification for the big core */
451*4bdc9457SAndroid Build Coastguard Worker           const uint32_t mr = xnn_params.qs8.gemm.mr;
452*4bdc9457SAndroid Build Coastguard Worker           const uint32_t nr = xnn_params.qs8.gemm.nr;
453*4bdc9457SAndroid Build Coastguard Worker           const uint32_t log2_kr = xnn_params.qs8.gemm.log2_kr;
454*4bdc9457SAndroid Build Coastguard Worker           for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
455*4bdc9457SAndroid Build Coastguard Worker             const struct cpuinfo_uarch_info* uarch_info = cpuinfo_get_uarch(i);
456*4bdc9457SAndroid Build Coastguard Worker             if (uarch_info == NULL) {
457*4bdc9457SAndroid Build Coastguard Worker               /* No more microarchitectures in the system */
458*4bdc9457SAndroid Build Coastguard Worker               break;
459*4bdc9457SAndroid Build Coastguard Worker             }
460*4bdc9457SAndroid Build Coastguard Worker 
461*4bdc9457SAndroid Build Coastguard Worker             switch (uarch_info->uarch) {
462*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_a55:
463*4bdc9457SAndroid Build Coastguard Worker                 #if XNN_ENABLE_ARM_DOTPROD
464*4bdc9457SAndroid Build Coastguard Worker                   if (mr == 4 && nr == 8 && log2_kr == 2 && cpuinfo_has_arm_neon_dot()) {
465*4bdc9457SAndroid Build Coastguard Worker                     xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55;
466*4bdc9457SAndroid Build Coastguard Worker                     xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55;
467*4bdc9457SAndroid Build Coastguard Worker                     xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neondot;
468*4bdc9457SAndroid Build Coastguard Worker                     xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot;
469*4bdc9457SAndroid Build Coastguard Worker                   }
470*4bdc9457SAndroid Build Coastguard Worker                 #endif  // XNN_ENABLE_ARM_DOTPROD
471*4bdc9457SAndroid Build Coastguard Worker                 break;
472*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_a53:
473*4bdc9457SAndroid Build Coastguard Worker                 if (mr == 4 && nr == 8 && log2_kr == 0) {
474*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53;
475*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53;
476*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7;
477*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7;
478*4bdc9457SAndroid Build Coastguard Worker                 }
479*4bdc9457SAndroid Build Coastguard Worker                 break;
480*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_a55r0:
481*4bdc9457SAndroid Build Coastguard Worker                 if (mr == 4 && nr == 8 && log2_kr == 0) {
482*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53;
483*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53;
484*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7;
485*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7;
486*4bdc9457SAndroid Build Coastguard Worker                 }
487*4bdc9457SAndroid Build Coastguard Worker                 break;
488*4bdc9457SAndroid Build Coastguard Worker               default:
489*4bdc9457SAndroid Build Coastguard Worker                 break;
490*4bdc9457SAndroid Build Coastguard Worker             }
491*4bdc9457SAndroid Build Coastguard Worker           }
492*4bdc9457SAndroid Build Coastguard Worker         }
493*4bdc9457SAndroid Build Coastguard Worker         #endif  // XNN_MAX_UARCH_TYPES > 1
494*4bdc9457SAndroid Build Coastguard Worker       #else  // XNN_ENABLE_ASSEMBLY
495*4bdc9457SAndroid Build Coastguard Worker         if (XNN_ENABLE_ARM_DOTPROD && cpuinfo_has_arm_neon_dot()) {
496*4bdc9457SAndroid Build Coastguard Worker           #if XNN_ENABLE_ARM_DOTPROD
497*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neondot);
498*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neondot);
499*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neondot);
500*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot);
501*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
502*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.mr = 4;
503*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.nr = 8;
504*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.log2_kr = 2;
505*4bdc9457SAndroid Build Coastguard Worker           #endif  // XNN_ENABLE_ARM_DOTPROD
506*4bdc9457SAndroid Build Coastguard Worker         } else {
507*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal);
508*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal);
509*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal);
510*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal);
511*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
512*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.mr = 2;
513*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.nr = 8;
514*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.log2_kr = 1;
515*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.log2_sr = 2;
516*4bdc9457SAndroid Build Coastguard Worker         }
517*4bdc9457SAndroid Build Coastguard Worker       #endif  // XNN_ENABLE_ASSEMBLY
518*4bdc9457SAndroid Build Coastguard Worker 
519*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64;
520*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
521*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].channel_tile = 16;
522*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].primary_tile = 9;
523*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64;
524*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
525*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].channel_tile = 8;
526*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].primary_tile = 25;
527*4bdc9457SAndroid Build Coastguard Worker 
528*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gavgpool = (struct gavgpool_parameters) {
529*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_qs8_gavgpool_minmax_rndnu_ukernel_7x__neon_c8,
530*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8,
531*4bdc9457SAndroid Build Coastguard Worker         .init.qs8 = xnn_init_qs8_avgpool_minmax_rndnu_neon_params,
532*4bdc9457SAndroid Build Coastguard Worker         .update.qs8 = xnn_update_qs8_avgpool_minmax_rndnu_neon_params,
533*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 7,
534*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 8,
535*4bdc9457SAndroid Build Coastguard Worker       };
536*4bdc9457SAndroid Build Coastguard Worker 
537*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.vadd = (struct vbinary_parameters) {
538*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16,
539*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16,
540*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16,
541*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_add = xnn_init_qs8_add_minmax_neon_params,
542*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
543*4bdc9457SAndroid Build Coastguard Worker       };
544*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.vmul = (struct vbinary_parameters) {
545*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld64_x16,
546*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld64_x16,
547*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld64_x16,
548*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_mul = xnn_init_qs8_mul_minmax_rndnu_neon_params,
549*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
550*4bdc9457SAndroid Build Coastguard Worker       };
551*4bdc9457SAndroid Build Coastguard Worker 
552*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.lrelu = (struct vunary_parameters) {
553*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vlrelu_ukernel__neon_x32,
554*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_lrelu = xnn_init_qs8_lrelu_neon_params,
555*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
556*4bdc9457SAndroid Build Coastguard Worker       };
557*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_QS8_OPERATORS
558*4bdc9457SAndroid Build Coastguard Worker 
559*4bdc9457SAndroid Build Coastguard Worker     /*************************** QU8 AArch32 micro-kernels ***************************/
560*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_QU8_OPERATORS
561*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_QU8;
562*4bdc9457SAndroid Build Coastguard Worker 
563*4bdc9457SAndroid Build Coastguard Worker       #if XNN_ENABLE_ASSEMBLY
564*4bdc9457SAndroid Build Coastguard Worker         if (XNN_ENABLE_ARM_DOTPROD && cpuinfo_has_arm_neon_dot()) {
565*4bdc9457SAndroid Build Coastguard Worker           #if XNN_ENABLE_ARM_DOTPROD
566*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__neondot);
567*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot);
568*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot);
569*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot);
570*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_rndnu_neon_params;
571*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.mr = 4;
572*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.nr = 8;
573*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.log2_kr = 2;
574*4bdc9457SAndroid Build Coastguard Worker           #endif  // XNN_ENABLE_ARM_DOTPROD
575*4bdc9457SAndroid Build Coastguard Worker         } else {
576*4bdc9457SAndroid Build Coastguard Worker           switch (cpuinfo_get_uarch(0)->uarch) {
577*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a5:
578*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a7:
579*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_krait:
580*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_kryo:
581*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7);
582*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7);
583*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7);
584*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7);
585*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_rndnu_neon_params;
586*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.mr = 4;
587*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.nr = 8;
588*4bdc9457SAndroid Build Coastguard Worker               break;
589*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a32:
590*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a35:
591*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7);
592*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7);
593*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7);
594*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7);
595*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_rndnu_neon_params;
596*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.mr = 4;
597*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.nr = 8;
598*4bdc9457SAndroid Build Coastguard Worker               break;
599*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a53:
600*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a57:
601*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a72:
602*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53);
603*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53);
604*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7);
605*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7);
606*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_rndnu_neon_params;
607*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.mr = 4;
608*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.nr = 8;
609*4bdc9457SAndroid Build Coastguard Worker               break;
610*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a55r0:
611*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53);
612*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53);
613*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7);
614*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7);
615*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_rndnu_neon_params;
616*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.mr = 4;
617*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.nr = 8;
618*4bdc9457SAndroid Build Coastguard Worker               break;
619*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_exynos_m1:
620*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_exynos_m2:
621*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_exynos_m3:
622*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64);
623*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64);
624*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7);
625*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7);
626*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_rndnu_neon_params;
627*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.mr = 4;
628*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.nr = 8;
629*4bdc9457SAndroid Build Coastguard Worker               break;
630*4bdc9457SAndroid Build Coastguard Worker             default:
631*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64);
632*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64);
633*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7);
634*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7);
635*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_rndnu_neon_params;
636*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.mr = 4;
637*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.nr = 8;
638*4bdc9457SAndroid Build Coastguard Worker               break;
639*4bdc9457SAndroid Build Coastguard Worker           }
640*4bdc9457SAndroid Build Coastguard Worker         }
641*4bdc9457SAndroid Build Coastguard Worker         #if XNN_MAX_UARCH_TYPES > 1
642*4bdc9457SAndroid Build Coastguard Worker         {
643*4bdc9457SAndroid Build Coastguard Worker           /* Choose micro-kernels for little cores according to micro-kernel specification for the big core */
644*4bdc9457SAndroid Build Coastguard Worker           const uint32_t mr = xnn_params.qu8.gemm.mr;
645*4bdc9457SAndroid Build Coastguard Worker           const uint32_t nr = xnn_params.qu8.gemm.nr;
646*4bdc9457SAndroid Build Coastguard Worker           const uint32_t log2_kr = xnn_params.qu8.gemm.log2_kr;
647*4bdc9457SAndroid Build Coastguard Worker           for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
648*4bdc9457SAndroid Build Coastguard Worker             const struct cpuinfo_uarch_info* uarch_info = cpuinfo_get_uarch(i);
649*4bdc9457SAndroid Build Coastguard Worker             if (uarch_info == NULL) {
650*4bdc9457SAndroid Build Coastguard Worker               /* No more microarchitectures in the system */
651*4bdc9457SAndroid Build Coastguard Worker               break;
652*4bdc9457SAndroid Build Coastguard Worker             }
653*4bdc9457SAndroid Build Coastguard Worker 
654*4bdc9457SAndroid Build Coastguard Worker             switch (uarch_info->uarch) {
655*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_a53:
656*4bdc9457SAndroid Build Coastguard Worker                 if (mr == 4 && nr == 8 && log2_kr == 0) {
657*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53;
658*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53;
659*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7;
660*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7;
661*4bdc9457SAndroid Build Coastguard Worker                 }
662*4bdc9457SAndroid Build Coastguard Worker                 break;
663*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_a55r0:
664*4bdc9457SAndroid Build Coastguard Worker                 if (mr == 4 && nr == 8 && log2_kr == 0) {
665*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53;
666*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53;
667*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7;
668*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7;
669*4bdc9457SAndroid Build Coastguard Worker                 }
670*4bdc9457SAndroid Build Coastguard Worker                 break;
671*4bdc9457SAndroid Build Coastguard Worker               default:
672*4bdc9457SAndroid Build Coastguard Worker                 break;
673*4bdc9457SAndroid Build Coastguard Worker             }
674*4bdc9457SAndroid Build Coastguard Worker           }
675*4bdc9457SAndroid Build Coastguard Worker         }
676*4bdc9457SAndroid Build Coastguard Worker         #endif  // XNN_MAX_UARCH_TYPES > 1
677*4bdc9457SAndroid Build Coastguard Worker       #else  // XNN_ENABLE_ASSEMBLY
678*4bdc9457SAndroid Build Coastguard Worker         if (XNN_ENABLE_ARM_DOTPROD && cpuinfo_has_arm_neon_dot()) {
679*4bdc9457SAndroid Build Coastguard Worker           #if XNN_ENABLE_ARM_DOTPROD
680*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__neondot);
681*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot);
682*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot);
683*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot);
684*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_rndnu_neon_params;
685*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.mr = 4;
686*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.nr = 8;
687*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.log2_kr = 2;
688*4bdc9457SAndroid Build Coastguard Worker           #endif  // XNN_ENABLE_ARM_DOTPROD
689*4bdc9457SAndroid Build Coastguard Worker         } else {
690*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane);
691*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane);
692*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane);
693*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane);
694*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_rndnu_neon_params;
695*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qu8.gemm.mr = 3;
696*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qu8.gemm.nr = 8;
697*4bdc9457SAndroid Build Coastguard Worker         }
698*4bdc9457SAndroid Build Coastguard Worker       #endif  // XNN_ENABLE_ASSEMBLY
699*4bdc9457SAndroid Build Coastguard Worker 
700*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8;
701*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].init.qu8 = xnn_init_qu8_conv_minmax_rndnu_neon_params;
702*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].channel_tile = 16;
703*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].primary_tile = 9;
704*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8;
705*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].init.qu8 = xnn_init_qu8_conv_minmax_rndnu_neon_params;
706*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].channel_tile = 8;
707*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].primary_tile = 25;
708*4bdc9457SAndroid Build Coastguard Worker 
709*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.avgpool = (struct avgpool_parameters) {
710*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_avgpool_unipass_ukernel_function) xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8,
711*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_avgpool_multipass_ukernel_function) xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8,
712*4bdc9457SAndroid Build Coastguard Worker         .init.qu8 = xnn_init_qu8_avgpool_minmax_neon_params,
713*4bdc9457SAndroid Build Coastguard Worker         .primary_tile = 9,
714*4bdc9457SAndroid Build Coastguard Worker         .incremental_tile = 8,
715*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 8,
716*4bdc9457SAndroid Build Coastguard Worker       };
717*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gavgpool = (struct gavgpool_parameters) {
718*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_qu8_gavgpool_minmax_rndnu_ukernel_7x__neon_c8,
719*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8,
720*4bdc9457SAndroid Build Coastguard Worker         .init.qu8 = xnn_init_qu8_avgpool_minmax_rndnu_neon_params,
721*4bdc9457SAndroid Build Coastguard Worker         .update.qu8 = xnn_update_qu8_avgpool_minmax_rndnu_neon_params,
722*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 7,
723*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 8,
724*4bdc9457SAndroid Build Coastguard Worker       };
725*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.vadd = (struct vbinary_parameters) {
726*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16,
727*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x16,
728*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x16,
729*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_add = xnn_init_qu8_add_minmax_neon_params,
730*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
731*4bdc9457SAndroid Build Coastguard Worker       };
732*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.vmul = (struct vbinary_parameters) {
733*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld64_x16,
734*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld64_x16,
735*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld64_x16,
736*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_mul = xnn_init_qu8_mul_minmax_rndnu_neon_params,
737*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
738*4bdc9457SAndroid Build Coastguard Worker       };
739*4bdc9457SAndroid Build Coastguard Worker 
740*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.lrelu = (struct vunary_parameters) {
741*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vlrelu_ukernel__neon_x32,
742*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_lrelu = xnn_init_qu8_lrelu_neon_params,
743*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
744*4bdc9457SAndroid Build Coastguard Worker       };
745*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_QU8_OPERATORS
746*4bdc9457SAndroid Build Coastguard Worker 
747*4bdc9457SAndroid Build Coastguard Worker     /**************************** S8 AArch32 micro-kernels ****************************/
748*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_S8_OPERATORS
749*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_S8;
750*4bdc9457SAndroid Build Coastguard Worker 
751*4bdc9457SAndroid Build Coastguard Worker       xnn_params.s8.clamp = (struct vunary_parameters) {
752*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_s8_vclamp_ukernel__neon_x64,
753*4bdc9457SAndroid Build Coastguard Worker         .init.s8_minmax = xnn_init_s8_minmax_neon_params,
754*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 64,
755*4bdc9457SAndroid Build Coastguard Worker       };
756*4bdc9457SAndroid Build Coastguard Worker       xnn_params.s8.ibilinear = (struct ibilinear_parameters) {
757*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_ibilinear_ukernel_function) xnn_s8_ibilinear_ukernel__neon_c8,
758*4bdc9457SAndroid Build Coastguard Worker         .pixel_tile = 1,
759*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 8,
760*4bdc9457SAndroid Build Coastguard Worker       };
761*4bdc9457SAndroid Build Coastguard Worker       xnn_params.s8.maxpool = (struct maxpool_parameters) {
762*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_maxpool_ukernel_function) xnn_s8_maxpool_minmax_ukernel_9p8x__neon_c16,
763*4bdc9457SAndroid Build Coastguard Worker         .init.s8 = xnn_init_s8_minmax_neon_params,
764*4bdc9457SAndroid Build Coastguard Worker         .mr = 9,
765*4bdc9457SAndroid Build Coastguard Worker         .qr = 8,
766*4bdc9457SAndroid Build Coastguard Worker       };
767*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_S8_OPERATORS
768*4bdc9457SAndroid Build Coastguard Worker 
769*4bdc9457SAndroid Build Coastguard Worker     /**************************** U8 AArch32 micro-kernels ****************************/
770*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_U8_OPERATORS
771*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_U8;
772*4bdc9457SAndroid Build Coastguard Worker 
773*4bdc9457SAndroid Build Coastguard Worker       xnn_params.u8.clamp = (struct vunary_parameters) {
774*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_u8_vclamp_ukernel__neon_x64,
775*4bdc9457SAndroid Build Coastguard Worker         .init.u8_minmax = xnn_init_u8_minmax_neon_params,
776*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 64,
777*4bdc9457SAndroid Build Coastguard Worker       };
778*4bdc9457SAndroid Build Coastguard Worker       xnn_params.u8.ibilinear = (struct ibilinear_parameters) {
779*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_ibilinear_ukernel_function) xnn_u8_ibilinear_ukernel__neon_c8,
780*4bdc9457SAndroid Build Coastguard Worker         .pixel_tile = 1,
781*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 8,
782*4bdc9457SAndroid Build Coastguard Worker       };
783*4bdc9457SAndroid Build Coastguard Worker       xnn_params.u8.maxpool = (struct maxpool_parameters) {
784*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_minmax_ukernel_9p8x__neon_c16,
785*4bdc9457SAndroid Build Coastguard Worker         .init.u8 = xnn_init_u8_minmax_neon_params,
786*4bdc9457SAndroid Build Coastguard Worker         .mr = 9,
787*4bdc9457SAndroid Build Coastguard Worker         .qr = 8,
788*4bdc9457SAndroid Build Coastguard Worker       };
789*4bdc9457SAndroid Build Coastguard Worker       xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
790*4bdc9457SAndroid Build Coastguard Worker       xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
791*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_U8_OPERATORS
792*4bdc9457SAndroid Build Coastguard Worker 
793*4bdc9457SAndroid Build Coastguard Worker     /**************************** X8 AArch32 micro-kernels ****************************/
794*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_X8_OPERATORS
795*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_X8;
796*4bdc9457SAndroid Build Coastguard Worker 
797*4bdc9457SAndroid Build Coastguard Worker       xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar_x4;
798*4bdc9457SAndroid Build Coastguard Worker       xnn_params.x8.zip = (struct zip_parameters) {
799*4bdc9457SAndroid Build Coastguard Worker         .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
800*4bdc9457SAndroid Build Coastguard Worker         .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
801*4bdc9457SAndroid Build Coastguard Worker         .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
802*4bdc9457SAndroid Build Coastguard Worker         .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
803*4bdc9457SAndroid Build Coastguard Worker       };
804*4bdc9457SAndroid Build Coastguard Worker 
805*4bdc9457SAndroid Build Coastguard Worker       xnn_params.x8.transpose = (struct transpose_parameters) {
806*4bdc9457SAndroid Build Coastguard Worker         .const_size_ukernel = (xnn_transposec_ukernel_function) xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon,
807*4bdc9457SAndroid Build Coastguard Worker         .tile_size = 32,
808*4bdc9457SAndroid Build Coastguard Worker       };
809*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_X8_OPERATORS
810*4bdc9457SAndroid Build Coastguard Worker 
811*4bdc9457SAndroid Build Coastguard Worker     /**************************** X16 AArch32 micro-kernels ****************************/
812*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_X16_OPERATORS
813*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_X16;
814*4bdc9457SAndroid Build Coastguard Worker 
815*4bdc9457SAndroid Build Coastguard Worker       xnn_params.x16.transpose = (struct transpose_parameters) {
816*4bdc9457SAndroid Build Coastguard Worker         .const_size_ukernel = (xnn_transposec_ukernel_function) xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon,
817*4bdc9457SAndroid Build Coastguard Worker         .tile_size = 32,
818*4bdc9457SAndroid Build Coastguard Worker       };
819*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_X16_OPERATORS
820*4bdc9457SAndroid Build Coastguard Worker 
821*4bdc9457SAndroid Build Coastguard Worker     /**************************** F32 AArch32 micro-kernels ****************************/
822*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_F32_OPERATORS
823*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_F32;
824*4bdc9457SAndroid Build Coastguard Worker 
825*4bdc9457SAndroid Build Coastguard Worker       #if XNN_ENABLE_ASSEMBLY
826*4bdc9457SAndroid Build Coastguard Worker         switch (cpuinfo_get_uarch(0)->uarch) {
827*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_cortex_a5:
828*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_cortex_a7:
829*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_krait:
830*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_kryo:
831*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7);
832*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7);
833*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64);
834*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64);
835*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_scalar_params;
836*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.mr = 4;
837*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.nr = 8;
838*4bdc9457SAndroid Build Coastguard Worker             break;
839*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_cortex_a53:
840*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53);
841*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53);
842*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64);
843*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64);
844*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_scalar_params;
845*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.mr = 4;
846*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.nr = 8;
847*4bdc9457SAndroid Build Coastguard Worker             break;
848*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_cortex_a55r0:
849*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53);
850*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53);
851*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64);
852*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64);
853*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_scalar_params;
854*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.mr = 4;
855*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.nr = 8;
856*4bdc9457SAndroid Build Coastguard Worker             break;
857*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_cortex_a32:
858*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_cortex_a35:
859*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_cortex_a55:
860*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55);
861*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55);
862*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64);
863*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64);
864*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_scalar_params;
865*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.mr = 4;
866*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.nr = 8;
867*4bdc9457SAndroid Build Coastguard Worker             break;
868*4bdc9457SAndroid Build Coastguard Worker 
869*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_cortex_a57:
870*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_cortex_a72:
871*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_cortex_a73:
872*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75);
873*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75);
874*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64);
875*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64);
876*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_scalar_params;
877*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.mr = 4;
878*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.nr = 8;
879*4bdc9457SAndroid Build Coastguard Worker             break;
880*4bdc9457SAndroid Build Coastguard Worker 
881*4bdc9457SAndroid Build Coastguard Worker           default:
882*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75);
883*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75);
884*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64);
885*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64);
886*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_scalar_params;
887*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.mr = 4;
888*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.nr = 8;
889*4bdc9457SAndroid Build Coastguard Worker             #if XNN_ENABLE_JIT
890*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f32.gemm.generator.gemm = xnn_init_hmp_gemm_codegen(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75);
891*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f32.gemm.generator.igemm = xnn_init_hmp_igemm_codegen(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75);
892*4bdc9457SAndroid Build Coastguard Worker             #endif
893*4bdc9457SAndroid Build Coastguard Worker             break;
894*4bdc9457SAndroid Build Coastguard Worker         }
895*4bdc9457SAndroid Build Coastguard Worker         #if XNN_MAX_UARCH_TYPES > 1
896*4bdc9457SAndroid Build Coastguard Worker         {
897*4bdc9457SAndroid Build Coastguard Worker           /* Choose micro-kernels for little cores according to micro-kernel specification for the big core */
898*4bdc9457SAndroid Build Coastguard Worker           const uint32_t mr = xnn_params.f32.gemm.mr;
899*4bdc9457SAndroid Build Coastguard Worker           const uint32_t nr = xnn_params.f32.gemm.nr;
900*4bdc9457SAndroid Build Coastguard Worker           for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
901*4bdc9457SAndroid Build Coastguard Worker             const struct cpuinfo_uarch_info* uarch_info = cpuinfo_get_uarch(i);
902*4bdc9457SAndroid Build Coastguard Worker             if (uarch_info == NULL) {
903*4bdc9457SAndroid Build Coastguard Worker               /* No more microarchitectures in the system */
904*4bdc9457SAndroid Build Coastguard Worker               break;
905*4bdc9457SAndroid Build Coastguard Worker             }
906*4bdc9457SAndroid Build Coastguard Worker 
907*4bdc9457SAndroid Build Coastguard Worker             switch (uarch_info->uarch) {
908*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_a53:
909*4bdc9457SAndroid Build Coastguard Worker                 if (mr == 4 && nr == 8) {
910*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53;
911*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53;
912*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64;
913*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64;
914*4bdc9457SAndroid Build Coastguard Worker                 }
915*4bdc9457SAndroid Build Coastguard Worker                 break;
916*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_a55r0:
917*4bdc9457SAndroid Build Coastguard Worker                 if (mr == 4 && nr == 8) {
918*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53;
919*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53;
920*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64;
921*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64;
922*4bdc9457SAndroid Build Coastguard Worker                 }
923*4bdc9457SAndroid Build Coastguard Worker                 break;
924*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_a55:
925*4bdc9457SAndroid Build Coastguard Worker                 if (mr == 4 && nr == 8) {
926*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55;
927*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55;
928*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64;
929*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64;
930*4bdc9457SAndroid Build Coastguard Worker                 }
931*4bdc9457SAndroid Build Coastguard Worker                 break;
932*4bdc9457SAndroid Build Coastguard Worker               default:
933*4bdc9457SAndroid Build Coastguard Worker                 break;
934*4bdc9457SAndroid Build Coastguard Worker             }
935*4bdc9457SAndroid Build Coastguard Worker           }
936*4bdc9457SAndroid Build Coastguard Worker         }
937*4bdc9457SAndroid Build Coastguard Worker         #endif  // XNN_MAX_UARCH_TYPES > 1
938*4bdc9457SAndroid Build Coastguard Worker       #else  // XNN_ENABLE_ASSEMBLY
939*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128);
940*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128);
941*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64);
942*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64);
943*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_scalar_params;
944*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.mr = 4;
945*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.nr = 8;
946*4bdc9457SAndroid Build Coastguard Worker       #endif  // XNN_ENABLE_ASSEMBLY
947*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64);
948*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64);
949*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.init.f32 = xnn_init_f32_minmax_scalar_params;
950*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.mr = 4;
951*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.nr = 2;
952*4bdc9457SAndroid Build Coastguard Worker 
953*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up8x3__neon;
954*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].init.f32 = xnn_init_f32_minmax_scalar_params;
955*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].channel_tile = 8,
956*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].primary_tile = 3,
957*4bdc9457SAndroid Build Coastguard Worker 
958*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up8x4__neon;
959*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].init.f32 = xnn_init_f32_minmax_scalar_params;
960*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].channel_tile = 8,
961*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].primary_tile = 4,
962*4bdc9457SAndroid Build Coastguard Worker 
963*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up8x9__neon;
964*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].init.f32 = xnn_init_f32_minmax_scalar_params;
965*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].channel_tile = 8;
966*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].primary_tile = 9;
967*4bdc9457SAndroid Build Coastguard Worker 
968*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2;
969*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].init.f32 = xnn_init_f32_minmax_scalar_params;
970*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].channel_tile = 8;
971*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].primary_tile = 25;
972*4bdc9457SAndroid Build Coastguard Worker 
973*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.avgpool = (struct avgpool_parameters) {
974*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_avgpool_unipass_ukernel_function) xnn_f32_avgpool_minmax_ukernel_9x__neon_c4,
975*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_avgpool_multipass_ukernel_function) xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4,
976*4bdc9457SAndroid Build Coastguard Worker         .init.f32 = xnn_init_f32_scaleminmax_scalar_params,
977*4bdc9457SAndroid Build Coastguard Worker         .primary_tile = 9,
978*4bdc9457SAndroid Build Coastguard Worker         .incremental_tile = 8,
979*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 4,
980*4bdc9457SAndroid Build Coastguard Worker       };
981*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
982*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_pavgpool_unipass_ukernel_function) xnn_f32_pavgpool_minmax_ukernel_9x__neon_c4,
983*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_pavgpool_multipass_ukernel_function) xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4,
984*4bdc9457SAndroid Build Coastguard Worker         .init.f32 = xnn_init_f32_minmax_scalar_params,
985*4bdc9457SAndroid Build Coastguard Worker         .primary_tile = 9,
986*4bdc9457SAndroid Build Coastguard Worker         .incremental_tile = 8,
987*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 4,
988*4bdc9457SAndroid Build Coastguard Worker       };
989*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
990*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_f32_gavgpool_minmax_ukernel_7x__neon_c4,
991*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4,
992*4bdc9457SAndroid Build Coastguard Worker         .init.f32 = xnn_init_f32_scaleminmax_scalar_params,
993*4bdc9457SAndroid Build Coastguard Worker         .update.f32 = xnn_update_f32_scaleminmax_scalar_params,
994*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 7,
995*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 4,
996*4bdc9457SAndroid Build Coastguard Worker       };
997*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.maxpool = (struct maxpool_parameters) {
998*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_minmax_ukernel_9p8x__neon_c4,
999*4bdc9457SAndroid Build Coastguard Worker         .init.f32 = xnn_init_f32_minmax_scalar_params,
1000*4bdc9457SAndroid Build Coastguard Worker         .mr = 9,
1001*4bdc9457SAndroid Build Coastguard Worker         .qr = 8,
1002*4bdc9457SAndroid Build Coastguard Worker       };
1003*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
1004*4bdc9457SAndroid Build Coastguard Worker         .up = (xnn_argmaxpool_unipass_ukernel_function) xnn_f32_argmaxpool_ukernel_4x__neon_c4,
1005*4bdc9457SAndroid Build Coastguard Worker         .mr = 4,
1006*4bdc9457SAndroid Build Coastguard Worker       };
1007*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
1008*4bdc9457SAndroid Build Coastguard Worker         .up = (xnn_argmaxpool_unipass_ukernel_function) xnn_f32_argmaxpool_ukernel_9x__neon_c4,
1009*4bdc9457SAndroid Build Coastguard Worker         .mr = 9,
1010*4bdc9457SAndroid Build Coastguard Worker       };
1011*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
1012*4bdc9457SAndroid Build Coastguard Worker         .mp = (xnn_argmaxpool_multipass_ukernel_function) xnn_f32_argmaxpool_ukernel_9p8x__neon_c4,
1013*4bdc9457SAndroid Build Coastguard Worker         .mr = 9,
1014*4bdc9457SAndroid Build Coastguard Worker         .qr = 8,
1015*4bdc9457SAndroid Build Coastguard Worker       };
1016*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.ibilinear = (struct ibilinear_parameters) {
1017*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_ibilinear_ukernel_function) xnn_f32_ibilinear_ukernel__neon_c8,
1018*4bdc9457SAndroid Build Coastguard Worker         .pixel_tile = 1,
1019*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 8,
1020*4bdc9457SAndroid Build Coastguard Worker       };
1021*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.abs = (struct vunary_parameters) {
1022*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vabs_ukernel__neon_x8,
1023*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
1024*4bdc9457SAndroid Build Coastguard Worker       };
1025*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.clamp = (struct vunary_parameters) {
1026*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vclamp_ukernel__neon_x8,
1027*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
1028*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
1029*4bdc9457SAndroid Build Coastguard Worker       };
1030*4bdc9457SAndroid Build Coastguard Worker       if (cpuinfo_has_arm_neon_fma()) {
1031*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.elu = (struct vunary_parameters) {
1032*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f32_velu_ukernel__neonfma_rr1_p6_x8,
1033*4bdc9457SAndroid Build Coastguard Worker           .init.f32_elu = xnn_init_f32_elu_neonfma_rr1_p6_params,
1034*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 8,
1035*4bdc9457SAndroid Build Coastguard Worker         };
1036*4bdc9457SAndroid Build Coastguard Worker       } else {
1037*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.elu = (struct vunary_parameters) {
1038*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8,
1039*4bdc9457SAndroid Build Coastguard Worker           .init.f32_elu = xnn_init_f32_elu_neon_rr2_lut16_p3_params,
1040*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 8,
1041*4bdc9457SAndroid Build Coastguard Worker         };
1042*4bdc9457SAndroid Build Coastguard Worker       }
1043*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.hswish = (struct vunary_parameters) {
1044*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vhswish_ukernel__neon_x16,
1045*4bdc9457SAndroid Build Coastguard Worker         .init.f32_hswish = xnn_init_f32_hswish_scalar_params,
1046*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
1047*4bdc9457SAndroid Build Coastguard Worker       };
1048*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.lrelu = (struct vunary_parameters) {
1049*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vlrelu_ukernel__neon_x8,
1050*4bdc9457SAndroid Build Coastguard Worker         .init.f32_lrelu = xnn_init_f32_lrelu_scalar_params,
1051*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
1052*4bdc9457SAndroid Build Coastguard Worker       };
1053*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.neg = (struct vunary_parameters) {
1054*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vneg_ukernel__neon_x8,
1055*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
1056*4bdc9457SAndroid Build Coastguard Worker       };
1057*4bdc9457SAndroid Build Coastguard Worker       if (cpuinfo_has_arm_neon_v8()) {
1058*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.rndne = (struct vunary_parameters) {
1059*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndne_ukernel__neonv8_x8,
1060*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 8,
1061*4bdc9457SAndroid Build Coastguard Worker         };
1062*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.rndz = (struct vunary_parameters) {
1063*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndz_ukernel__neonv8_x8,
1064*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 8,
1065*4bdc9457SAndroid Build Coastguard Worker         };
1066*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.rndu = (struct vunary_parameters) {
1067*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndu_ukernel__neonv8_x8,
1068*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 8,
1069*4bdc9457SAndroid Build Coastguard Worker         };
1070*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.rndd = (struct vunary_parameters) {
1071*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndd_ukernel__neonv8_x8,
1072*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 8,
1073*4bdc9457SAndroid Build Coastguard Worker         };
1074*4bdc9457SAndroid Build Coastguard Worker       } else {
1075*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.rndne = (struct vunary_parameters) {
1076*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndne_ukernel__neon_x8,
1077*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 8,
1078*4bdc9457SAndroid Build Coastguard Worker         };
1079*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.rndz = (struct vunary_parameters) {
1080*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndz_ukernel__neon_x8,
1081*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 8,
1082*4bdc9457SAndroid Build Coastguard Worker         };
1083*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.rndu = (struct vunary_parameters) {
1084*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndu_ukernel__neon_x8,
1085*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 8,
1086*4bdc9457SAndroid Build Coastguard Worker         };
1087*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.rndd = (struct vunary_parameters) {
1088*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndd_ukernel__neon_x8,
1089*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 8,
1090*4bdc9457SAndroid Build Coastguard Worker         };
1091*4bdc9457SAndroid Build Coastguard Worker       }
1092*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.sigmoid = (struct vunary_parameters) {
1093*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8,
1094*4bdc9457SAndroid Build Coastguard Worker         .init.f32_sigmoid = xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params,
1095*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
1096*4bdc9457SAndroid Build Coastguard Worker       };
1097*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.sqr = (struct vunary_parameters) {
1098*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsqr_ukernel__neon_x8,
1099*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
1100*4bdc9457SAndroid Build Coastguard Worker       };
1101*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.sqrt = (struct vunary_parameters) {
1102*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsqrt_ukernel__scalar_sqrt_x1,
1103*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 1,
1104*4bdc9457SAndroid Build Coastguard Worker       };
1105*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.prelu = (struct prelu_parameters) {
1106*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
1107*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 2,
1108*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 8,
1109*4bdc9457SAndroid Build Coastguard Worker       };
1110*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.raddstoreexpminusmax = (struct raddstoreexpminusmax_parameters) {
1111*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_raddstoreexpminusmax_ukernel_function) xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x8,
1112*4bdc9457SAndroid Build Coastguard Worker         .init.f32 = xnn_init_f32_expminus_neon_rr2_lut64_p2_params,
1113*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
1114*4bdc9457SAndroid Build Coastguard Worker       };
1115*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.rmax = (xnn_rmax_ukernel_function) xnn_f32_rmax_ukernel__neon;
1116*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vadd = (struct vbinary_parameters) {
1117*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_minmax_ukernel__neon_x8,
1118*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_minmax_ukernel__neon_x8,
1119*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_minmax_ukernel__neon_x8,
1120*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
1121*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
1122*4bdc9457SAndroid Build Coastguard Worker       };
1123*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vdiv = (struct vbinary_parameters) {
1124*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_minmax_ukernel__scalar_x2,
1125*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_minmax_ukernel__scalar_x2,
1126*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_minmax_ukernel__scalar_x2,
1127*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
1128*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 2,
1129*4bdc9457SAndroid Build Coastguard Worker       };
1130*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmax = (struct vbinary_parameters) {
1131*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__neon_x8,
1132*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__neon_x8,
1133*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__neon_x8,
1134*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
1135*4bdc9457SAndroid Build Coastguard Worker       };
1136*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmin = (struct vbinary_parameters) {
1137*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmin_ukernel__neon_x8,
1138*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__neon_x8,
1139*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__neon_x8,
1140*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
1141*4bdc9457SAndroid Build Coastguard Worker       };
1142*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmul = (struct vbinary_parameters) {
1143*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_minmax_ukernel__neon_x8,
1144*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_minmax_ukernel__neon_x8,
1145*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_minmax_ukernel__neon_x8,
1146*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
1147*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
1148*4bdc9457SAndroid Build Coastguard Worker       };
1149*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vsub = (struct vbinary_parameters) {
1150*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_minmax_ukernel__neon_x8,
1151*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_minmax_ukernel__neon_x8,
1152*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_minmax_ukernel__neon_x8,
1153*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
1154*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
1155*4bdc9457SAndroid Build Coastguard Worker       };
1156*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vsqrdiff = (struct vbinary_parameters) {
1157*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiff_ukernel__neon_x8,
1158*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiffc_ukernel__neon_x8,
1159*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiffc_ukernel__neon_x8,
1160*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
1161*4bdc9457SAndroid Build Coastguard Worker       };
1162*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
1163*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x,
1164*4bdc9457SAndroid Build Coastguard Worker         .init.f32 = xnn_init_f32_minmax_scalar_params,
1165*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 4,
1166*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 2,
1167*4bdc9457SAndroid Build Coastguard Worker       };
1168*4bdc9457SAndroid Build Coastguard Worker       #ifndef XNN_NO_NCHW_OPERATORS
1169*4bdc9457SAndroid Build Coastguard Worker         init_flags |= XNN_INIT_FLAG_CHW_OPT;
1170*4bdc9457SAndroid Build Coastguard Worker 
1171*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.spmm = (struct spmm_parameters) {
1172*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_minmax_ukernel_32x1__neon,
1173*4bdc9457SAndroid Build Coastguard Worker           .mr = 32,
1174*4bdc9457SAndroid Build Coastguard Worker           .nr = 1,
1175*4bdc9457SAndroid Build Coastguard Worker         };
1176*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.conv_hwc2chw_3x3c3s2 = (struct conv_hwc2chw_parameters) {
1177*4bdc9457SAndroid Build Coastguard Worker           .ukernel_with_symm_padding =
1178*4bdc9457SAndroid Build Coastguard Worker             (xnn_conv_hwc2chw_ukernel_function) xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2,
1179*4bdc9457SAndroid Build Coastguard Worker           .output_channel_tile = 4,
1180*4bdc9457SAndroid Build Coastguard Worker           .output_height_tile = 2,
1181*4bdc9457SAndroid Build Coastguard Worker           .output_width_tile = 2,
1182*4bdc9457SAndroid Build Coastguard Worker         };
1183*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv2d_chw_3x3 = (struct dwconv2d_chw_parameters) {
1184*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_2x4,
1185*4bdc9457SAndroid Build Coastguard Worker           .output_width_tile = 4,
1186*4bdc9457SAndroid Build Coastguard Worker           .output_height_tile = 2,
1187*4bdc9457SAndroid Build Coastguard Worker         };
1188*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv2d_chw_3x3s2 = (struct dwconv2d_chw_parameters) {
1189*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4,
1190*4bdc9457SAndroid Build Coastguard Worker           .output_width_tile = 4,
1191*4bdc9457SAndroid Build Coastguard Worker           .output_height_tile = 1,
1192*4bdc9457SAndroid Build Coastguard Worker         };
1193*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv2d_chw_5x5 = (struct dwconv2d_chw_parameters) {
1194*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4,
1195*4bdc9457SAndroid Build Coastguard Worker           .output_width_tile = 4,
1196*4bdc9457SAndroid Build Coastguard Worker           .output_height_tile = 1,
1197*4bdc9457SAndroid Build Coastguard Worker         };
1198*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv2d_chw_5x5s2 = (struct dwconv2d_chw_parameters) {
1199*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_1x4,
1200*4bdc9457SAndroid Build Coastguard Worker           .output_width_tile = 4,
1201*4bdc9457SAndroid Build Coastguard Worker           .output_height_tile = 1,
1202*4bdc9457SAndroid Build Coastguard Worker         };
1203*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gavgpool_cw = (struct gavgpool_cw_parameters) {
1204*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_gavgpool_cw_ukernel_function) xnn_f32_gavgpool_cw_ukernel__neon_x4,
1205*4bdc9457SAndroid Build Coastguard Worker           .channel_tile = 4,
1206*4bdc9457SAndroid Build Coastguard Worker         };
1207*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.ibilinear_chw = (struct ibilinear_chw_parameters) {
1208*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_ibilinear_chw_ukernel_function) xnn_f32_ibilinear_chw_ukernel__neon_p8,
1209*4bdc9457SAndroid Build Coastguard Worker           .channel_tile = 1,
1210*4bdc9457SAndroid Build Coastguard Worker           .pixel_tile = 8,
1211*4bdc9457SAndroid Build Coastguard Worker         };
1212*4bdc9457SAndroid Build Coastguard Worker       #endif  // XNN_NO_NCHW_OPERATORS
1213*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_F32_OPERATORS
1214*4bdc9457SAndroid Build Coastguard Worker 
1215*4bdc9457SAndroid Build Coastguard Worker     /*************************** VCVT AArch32 micro-kernels ***************************/
1216*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_VCVT_OPERATORS
1217*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_VCVT;
1218*4bdc9457SAndroid Build Coastguard Worker 
1219*4bdc9457SAndroid Build Coastguard Worker       if (cpuinfo_has_arm_neon_fp16()) {
1220*4bdc9457SAndroid Build Coastguard Worker         xnn_params.vcvt.f16_to_f32 = (struct vunary_parameters) {
1221*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f16_f32_vcvt_ukernel__neonfp16_x16,
1222*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 16,
1223*4bdc9457SAndroid Build Coastguard Worker         };
1224*4bdc9457SAndroid Build Coastguard Worker         xnn_params.vcvt.f32_to_f16 = (struct vunary_parameters) {
1225*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f32_f16_vcvt_ukernel__neonfp16_x16,
1226*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 16,
1227*4bdc9457SAndroid Build Coastguard Worker         };
1228*4bdc9457SAndroid Build Coastguard Worker       } else {
1229*4bdc9457SAndroid Build Coastguard Worker         xnn_params.vcvt.f16_to_f32 = (struct vunary_parameters) {
1230*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f16_f32_vcvt_ukernel__neon_int16_x16,
1231*4bdc9457SAndroid Build Coastguard Worker           .init.f16_f32_cvt = xnn_init_f16_f32_cvt_neon_params,
1232*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 16,
1233*4bdc9457SAndroid Build Coastguard Worker         };
1234*4bdc9457SAndroid Build Coastguard Worker         xnn_params.vcvt.f32_to_f16 = (struct vunary_parameters) {
1235*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f32_f16_vcvt_ukernel__neon_x8,
1236*4bdc9457SAndroid Build Coastguard Worker           .init.f32_f16_cvt = xnn_init_f32_f16_cvt_neon_params,
1237*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 8,
1238*4bdc9457SAndroid Build Coastguard Worker         };
1239*4bdc9457SAndroid Build Coastguard Worker       }
1240*4bdc9457SAndroid Build Coastguard Worker       if (cpuinfo_has_arm_neon_v8()) {
1241*4bdc9457SAndroid Build Coastguard Worker         xnn_params.vcvt.f32_to_qs8 = (struct vunary_parameters) {
1242*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qs8_vcvt_ukernel__neonv8_x32,
1243*4bdc9457SAndroid Build Coastguard Worker           .init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_neonv8_params,
1244*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 32,
1245*4bdc9457SAndroid Build Coastguard Worker         };
1246*4bdc9457SAndroid Build Coastguard Worker         xnn_params.vcvt.f32_to_qu8 = (struct vunary_parameters) {
1247*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qu8_vcvt_ukernel__neonv8_x32,
1248*4bdc9457SAndroid Build Coastguard Worker           .init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_neonv8_params,
1249*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 32,
1250*4bdc9457SAndroid Build Coastguard Worker         };
1251*4bdc9457SAndroid Build Coastguard Worker       } else {
1252*4bdc9457SAndroid Build Coastguard Worker         xnn_params.vcvt.f32_to_qs8 = (struct vunary_parameters) {
1253*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qs8_vcvt_ukernel__neon_x32,
1254*4bdc9457SAndroid Build Coastguard Worker           .init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_neon_params,
1255*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 32,
1256*4bdc9457SAndroid Build Coastguard Worker         };
1257*4bdc9457SAndroid Build Coastguard Worker         xnn_params.vcvt.f32_to_qu8 = (struct vunary_parameters) {
1258*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qu8_vcvt_ukernel__neon_x32,
1259*4bdc9457SAndroid Build Coastguard Worker           .init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_neon_params,
1260*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 32,
1261*4bdc9457SAndroid Build Coastguard Worker         };
1262*4bdc9457SAndroid Build Coastguard Worker       }
1263*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qs8 = (struct vunary_parameters) {
1264*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vcvt_ukernel__neon_x32,
1265*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_cvt = xnn_init_qs8_cvt_neon_params,
1266*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
1267*4bdc9457SAndroid Build Coastguard Worker       };
1268*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qs8_to_f32 = (struct vunary_parameters) {
1269*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_f32_vcvt_ukernel__neon_x32,
1270*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_neon_params,
1271*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
1272*4bdc9457SAndroid Build Coastguard Worker       };
1273*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qu8 = (struct vunary_parameters) {
1274*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vcvt_ukernel__neon_x32,
1275*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_cvt = xnn_init_qu8_cvt_neon_params,
1276*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
1277*4bdc9457SAndroid Build Coastguard Worker       };
1278*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qu8_to_f32 = (struct vunary_parameters) {
1279*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_f32_vcvt_ukernel__neon_x32,
1280*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_neon_params,
1281*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
1282*4bdc9457SAndroid Build Coastguard Worker       };
1283*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_VCVT_OPERATORS
1284*4bdc9457SAndroid Build Coastguard Worker 
1285*4bdc9457SAndroid Build Coastguard Worker     /**************************** X32 AArch32 micro-kernels ****************************/
1286*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_X32_OPERATORS
1287*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_X32;
1288*4bdc9457SAndroid Build Coastguard Worker 
1289*4bdc9457SAndroid Build Coastguard Worker       xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__neon;
1290*4bdc9457SAndroid Build Coastguard Worker       xnn_params.x32.zip = (struct zip_parameters) {
1291*4bdc9457SAndroid Build Coastguard Worker         .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
1292*4bdc9457SAndroid Build Coastguard Worker         .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
1293*4bdc9457SAndroid Build Coastguard Worker         .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
1294*4bdc9457SAndroid Build Coastguard Worker         .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
1295*4bdc9457SAndroid Build Coastguard Worker       };
1296*4bdc9457SAndroid Build Coastguard Worker 
1297*4bdc9457SAndroid Build Coastguard Worker       xnn_params.x32.transpose = (struct transpose_parameters) {
1298*4bdc9457SAndroid Build Coastguard Worker         .const_size_ukernel = (xnn_transposec_ukernel_function) xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon,
1299*4bdc9457SAndroid Build Coastguard Worker         .tile_size = 32,
1300*4bdc9457SAndroid Build Coastguard Worker       };
1301*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_X32_OPERATORS
1302*4bdc9457SAndroid Build Coastguard Worker 
1303*4bdc9457SAndroid Build Coastguard Worker     /**************************** XX AArch32 micro-kernels ****************************/
1304*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_XX_OPERATORS
1305*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_XX;
1306*4bdc9457SAndroid Build Coastguard Worker 
1307*4bdc9457SAndroid Build Coastguard Worker       xnn_params.xx.copy = (xnn_vunary_ukernel_function) xnn_xx_copy_ukernel__memcpy;
1308*4bdc9457SAndroid Build Coastguard Worker       xnn_params.xx.fill = (struct fill_parameters) {
1309*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_fill_ukernel_function) xnn_xx_fill_ukernel__neon_x64,
1310*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 1,
1311*4bdc9457SAndroid Build Coastguard Worker       };
1312*4bdc9457SAndroid Build Coastguard Worker       xnn_params.xx.pad = (struct pad_parameters) {
1313*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_pad_ukernel_function) xnn_xx_pad_ukernel__neon,
1314*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 1,
1315*4bdc9457SAndroid Build Coastguard Worker       };
1316*4bdc9457SAndroid Build Coastguard Worker       xnn_params.xx.transpose = (struct transpose_parameters) {
1317*4bdc9457SAndroid Build Coastguard Worker         .variable_size_ukernel = xnn_xx_transposev_ukernel__1x1_memcpy,
1318*4bdc9457SAndroid Build Coastguard Worker         .tile_size = 32,
1319*4bdc9457SAndroid Build Coastguard Worker       };
1320*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_XX_OPERATORS
1321*4bdc9457SAndroid Build Coastguard Worker 
1322*4bdc9457SAndroid Build Coastguard Worker   } else if (!XNN_PLATFORM_MOBILE) {
1323*4bdc9457SAndroid Build Coastguard Worker 
1324*4bdc9457SAndroid Build Coastguard Worker     /*************************** QC8 AArch32 Pre-NEON micro-kernels ***************************/
1325*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_QC8_OPERATORS
1326*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_QC8;
1327*4bdc9457SAndroid Build Coastguard Worker 
1328*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32);
1329*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32);
1330*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32);
1331*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32);
1332*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_armsimd32_params;
1333*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.mr = 2;
1334*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.nr = 2;
1335*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.log2_kr = 2;
1336*4bdc9457SAndroid Build Coastguard Worker 
1337*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up1x3__scalar_fmagic;
1338*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].init.qc8 = xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params;
1339*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].channel_tile = 1;
1340*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].primary_tile = 3;
1341*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic;
1342*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].init.qc8 = xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params;
1343*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].channel_tile = 1;
1344*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].primary_tile = 9;
1345*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic;
1346*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].init.qc8 = xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params;
1347*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].channel_tile = 1;
1348*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].primary_tile = 25;
1349*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_QS8_OPERATORS
1350*4bdc9457SAndroid Build Coastguard Worker 
1351*4bdc9457SAndroid Build Coastguard Worker     /*************************** QS8 AArch32 Pre-NEON micro-kernels ***************************/
1352*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_QS8_OPERATORS
1353*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_QS8;
1354*4bdc9457SAndroid Build Coastguard Worker 
1355*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32);
1356*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32);
1357*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32);
1358*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32);
1359*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_fp32_armsimd32_params;
1360*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.mr = 2;
1361*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.nr = 2;
1362*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.log2_kr = 2;
1363*4bdc9457SAndroid Build Coastguard Worker 
1364*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic;
1365*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params;
1366*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].channel_tile = 1;
1367*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].primary_tile = 9;
1368*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic;
1369*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params;
1370*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].channel_tile = 1;
1371*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].primary_tile = 25;
1372*4bdc9457SAndroid Build Coastguard Worker 
1373*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gavgpool = (struct gavgpool_parameters) {
1374*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__scalar_imagic_c1,
1375*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__scalar_imagic_c1,
1376*4bdc9457SAndroid Build Coastguard Worker         .init.qs8 = xnn_init_qs8_avgpool_minmax_fp32_scalar_imagic_params,
1377*4bdc9457SAndroid Build Coastguard Worker         .update.qs8 = xnn_update_qs8_avgpool_minmax_fp32_scalar_imagic_params,
1378*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 7,
1379*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 1,
1380*4bdc9457SAndroid Build Coastguard Worker       };
1381*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.vadd = (struct vbinary_parameters) {
1382*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vadd_minmax_ukernel__scalar_x1,
1383*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vaddc_minmax_ukernel__scalar_x1,
1384*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vaddc_minmax_ukernel__scalar_x1,
1385*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_add = xnn_init_qs8_add_minmax_scalar_params,
1386*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 1,
1387*4bdc9457SAndroid Build Coastguard Worker       };
1388*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.vmul = (struct vbinary_parameters) {
1389*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4,
1390*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4,
1391*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4,
1392*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_mul = xnn_init_qs8_mul_minmax_fp32_scalar_params,
1393*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
1394*4bdc9457SAndroid Build Coastguard Worker       };
1395*4bdc9457SAndroid Build Coastguard Worker 
1396*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.lrelu = (struct vunary_parameters) {
1397*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vlrelu_ukernel__armsimd32_x4,
1398*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_lrelu = xnn_init_qs8_lrelu_armsimd32_params,
1399*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
1400*4bdc9457SAndroid Build Coastguard Worker       };
1401*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_QS8_OPERATORS
1402*4bdc9457SAndroid Build Coastguard Worker 
1403*4bdc9457SAndroid Build Coastguard Worker     /*************************** QU8 AArch32 Pre-NEON micro-kernels ***************************/
1404*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_QU8_OPERATORS
1405*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_QU8;
1406*4bdc9457SAndroid Build Coastguard Worker 
1407*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32);
1408*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32);
1409*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32);
1410*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32);
1411*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_fp32_armsimd32_params;
1412*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.mr = 2;
1413*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.nr = 2;
1414*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.log2_kr = 2;
1415*4bdc9457SAndroid Build Coastguard Worker 
1416*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic;
1417*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].init.qu8 = xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params;
1418*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].channel_tile = 1;
1419*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].primary_tile = 9;
1420*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic;
1421*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].init.qu8 = xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params;
1422*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].channel_tile = 1;
1423*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].primary_tile = 25;
1424*4bdc9457SAndroid Build Coastguard Worker 
1425*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.avgpool = (struct avgpool_parameters) {
1426*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_avgpool_unipass_ukernel_function) xnn_qu8_avgpool_minmax_ukernel_9x__scalar_c1,
1427*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_avgpool_multipass_ukernel_function) xnn_qu8_avgpool_minmax_ukernel_9p8x__scalar_c1,
1428*4bdc9457SAndroid Build Coastguard Worker         .init.qu8 = xnn_init_qu8_avgpool_minmax_scalar_params,
1429*4bdc9457SAndroid Build Coastguard Worker         .primary_tile = 9,
1430*4bdc9457SAndroid Build Coastguard Worker         .incremental_tile = 8,
1431*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 1,
1432*4bdc9457SAndroid Build Coastguard Worker       };
1433*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gavgpool = (struct gavgpool_parameters) {
1434*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__scalar_imagic_c1,
1435*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__scalar_imagic_c1,
1436*4bdc9457SAndroid Build Coastguard Worker         .init.qu8 = xnn_init_qu8_avgpool_minmax_fp32_scalar_imagic_params,
1437*4bdc9457SAndroid Build Coastguard Worker         .update.qu8 = xnn_update_qu8_avgpool_minmax_fp32_scalar_imagic_params,
1438*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 7,
1439*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 1,
1440*4bdc9457SAndroid Build Coastguard Worker       };
1441*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.vadd = (struct vbinary_parameters) {
1442*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vadd_minmax_ukernel__scalar_x1,
1443*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vaddc_minmax_ukernel__scalar_x1,
1444*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vaddc_minmax_ukernel__scalar_x1,
1445*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_add = xnn_init_qu8_add_minmax_scalar_params,
1446*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 1,
1447*4bdc9457SAndroid Build Coastguard Worker       };
1448*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.vmul = (struct vbinary_parameters) {
1449*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmul_minmax_fp32_ukernel__scalar_x4,
1450*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmulc_minmax_fp32_ukernel__scalar_x4,
1451*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmulc_minmax_fp32_ukernel__scalar_x4,
1452*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_mul = xnn_init_qu8_mul_minmax_fp32_scalar_params,
1453*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
1454*4bdc9457SAndroid Build Coastguard Worker       };
1455*4bdc9457SAndroid Build Coastguard Worker 
1456*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.lrelu = (struct vunary_parameters) {
1457*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vlrelu_ukernel__armsimd32_x4,
1458*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_lrelu = xnn_init_qu8_lrelu_armsimd32_params,
1459*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
1460*4bdc9457SAndroid Build Coastguard Worker       };
1461*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_QU8_OPERATORS
1462*4bdc9457SAndroid Build Coastguard Worker 
1463*4bdc9457SAndroid Build Coastguard Worker     /**************************** S8 AArch32 Pre-NEON micro-kernels ****************************/
1464*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_S8_OPERATORS
1465*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_S8;
1466*4bdc9457SAndroid Build Coastguard Worker 
1467*4bdc9457SAndroid Build Coastguard Worker       xnn_params.s8.clamp = (struct vunary_parameters) {
1468*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_s8_vclamp_ukernel__scalar_x4,
1469*4bdc9457SAndroid Build Coastguard Worker         .init.s8_minmax = xnn_init_s8_minmax_scalar_params,
1470*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
1471*4bdc9457SAndroid Build Coastguard Worker       };
1472*4bdc9457SAndroid Build Coastguard Worker       xnn_params.s8.ibilinear = (struct ibilinear_parameters) {
1473*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_ibilinear_ukernel_function) xnn_s8_ibilinear_ukernel__scalar_c1,
1474*4bdc9457SAndroid Build Coastguard Worker         .pixel_tile = 1,
1475*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 1,
1476*4bdc9457SAndroid Build Coastguard Worker       };
1477*4bdc9457SAndroid Build Coastguard Worker       xnn_params.s8.maxpool = (struct maxpool_parameters) {
1478*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_maxpool_ukernel_function) xnn_s8_maxpool_minmax_ukernel_9p8x__scalar_c1,
1479*4bdc9457SAndroid Build Coastguard Worker         .init.s8 = xnn_init_s8_minmax_scalar_params,
1480*4bdc9457SAndroid Build Coastguard Worker         .mr = 9,
1481*4bdc9457SAndroid Build Coastguard Worker         .qr = 8,
1482*4bdc9457SAndroid Build Coastguard Worker       };
1483*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_S8_OPERATORS
1484*4bdc9457SAndroid Build Coastguard Worker 
1485*4bdc9457SAndroid Build Coastguard Worker     /**************************** U8 AArch32 Pre-NEON micro-kernels ****************************/
1486*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_U8_OPERATORS
1487*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_U8;
1488*4bdc9457SAndroid Build Coastguard Worker 
1489*4bdc9457SAndroid Build Coastguard Worker       xnn_params.u8.clamp = (struct vunary_parameters) {
1490*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_u8_vclamp_ukernel__scalar_x4,
1491*4bdc9457SAndroid Build Coastguard Worker         .init.u8_minmax = xnn_init_u8_minmax_scalar_params,
1492*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
1493*4bdc9457SAndroid Build Coastguard Worker       };
1494*4bdc9457SAndroid Build Coastguard Worker       xnn_params.u8.ibilinear = (struct ibilinear_parameters) {
1495*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_ibilinear_ukernel_function) xnn_u8_ibilinear_ukernel__scalar_c1,
1496*4bdc9457SAndroid Build Coastguard Worker         .pixel_tile = 1,
1497*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 1,
1498*4bdc9457SAndroid Build Coastguard Worker       };
1499*4bdc9457SAndroid Build Coastguard Worker       xnn_params.u8.maxpool = (struct maxpool_parameters) {
1500*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_minmax_ukernel_9p8x__scalar_c1,
1501*4bdc9457SAndroid Build Coastguard Worker         .init.u8 = xnn_init_u8_minmax_scalar_params,
1502*4bdc9457SAndroid Build Coastguard Worker         .mr = 9,
1503*4bdc9457SAndroid Build Coastguard Worker         .qr = 8,
1504*4bdc9457SAndroid Build Coastguard Worker       };
1505*4bdc9457SAndroid Build Coastguard Worker       xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
1506*4bdc9457SAndroid Build Coastguard Worker       xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
1507*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_U8_OPERATORS
1508*4bdc9457SAndroid Build Coastguard Worker 
1509*4bdc9457SAndroid Build Coastguard Worker     /**************************** X8 AArch32 Pre-NEON micro-kernels ****************************/
1510*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_X8_OPERATORS
1511*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_X8;
1512*4bdc9457SAndroid Build Coastguard Worker 
1513*4bdc9457SAndroid Build Coastguard Worker       xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar_x4;
1514*4bdc9457SAndroid Build Coastguard Worker       xnn_params.x8.zip = (struct zip_parameters) {
1515*4bdc9457SAndroid Build Coastguard Worker         .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
1516*4bdc9457SAndroid Build Coastguard Worker         .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
1517*4bdc9457SAndroid Build Coastguard Worker         .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
1518*4bdc9457SAndroid Build Coastguard Worker         .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
1519*4bdc9457SAndroid Build Coastguard Worker       };
1520*4bdc9457SAndroid Build Coastguard Worker 
1521*4bdc9457SAndroid Build Coastguard Worker       xnn_params.x8.transpose = (struct transpose_parameters) {
1522*4bdc9457SAndroid Build Coastguard Worker         .const_size_ukernel = (xnn_transposec_ukernel_function) xnn_x8_transposec_ukernel__2x4_scalar_int,
1523*4bdc9457SAndroid Build Coastguard Worker         .tile_size = 32,
1524*4bdc9457SAndroid Build Coastguard Worker       };
1525*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_X8_OPERATORS
1526*4bdc9457SAndroid Build Coastguard Worker 
1527*4bdc9457SAndroid Build Coastguard Worker     /**************************** X16 AArch32 Pre-NEON micro-kernels ****************************/
1528*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_X16_OPERATORS
1529*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_X16;
1530*4bdc9457SAndroid Build Coastguard Worker 
1531*4bdc9457SAndroid Build Coastguard Worker       xnn_params.x16.transpose = (struct transpose_parameters) {
1532*4bdc9457SAndroid Build Coastguard Worker         .const_size_ukernel = (xnn_transposec_ukernel_function) xnn_x16_transposec_ukernel__2x4_scalar_int,
1533*4bdc9457SAndroid Build Coastguard Worker         .tile_size = 32,
1534*4bdc9457SAndroid Build Coastguard Worker       };
1535*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_X16_OPERATORS
1536*4bdc9457SAndroid Build Coastguard Worker 
1537*4bdc9457SAndroid Build Coastguard Worker     /**************************** F32 AArch32 Pre-NEON micro-kernels ****************************/
1538*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_F32_OPERATORS
1539*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_F32;
1540*4bdc9457SAndroid Build Coastguard Worker 
1541*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x4__scalar);
1542*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x4__scalar);
1543*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x4__scalar);
1544*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x4__scalar);
1545*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.relu.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_relu_ukernel_4x4__scalar);
1546*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.relu.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_relu_ukernel_4x4__scalar);
1547*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.relu.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_relu_ukernel_1x4__scalar);
1548*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.relu.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_relu_ukernel_1x4__scalar);
1549*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.linear.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x4__scalar);
1550*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.linear.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x4__scalar);
1551*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.linear.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar);
1552*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.linear.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar);
1553*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_scalar_params;
1554*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.mr = 4;
1555*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.nr = 4;
1556*4bdc9457SAndroid Build Coastguard Worker 
1557*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x2__scalar);
1558*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x2__scalar);
1559*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.linear.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x2__scalar);
1560*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.linear.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__scalar);
1561*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.init.f32 = xnn_init_f32_minmax_scalar_params;
1562*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.mr = 4;
1563*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.nr = 2;
1564*4bdc9457SAndroid Build Coastguard Worker 
1565*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up1x3__scalar_acc2;
1566*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].linear.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_ukernel_up1x3__scalar_acc2;
1567*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].init.f32 = xnn_init_f32_minmax_scalar_params;
1568*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].channel_tile = 1;
1569*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].primary_tile = 3;
1570*4bdc9457SAndroid Build Coastguard Worker 
1571*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up1x4__scalar_acc2;
1572*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].linear.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_ukernel_up1x4__scalar_acc2;
1573*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].init.f32 = xnn_init_f32_minmax_scalar_params;
1574*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].channel_tile = 1;
1575*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].primary_tile = 4;
1576*4bdc9457SAndroid Build Coastguard Worker 
1577*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up1x9__scalar_acc2;
1578*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].linear.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_ukernel_up1x9__scalar_acc2;
1579*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].init.f32 = xnn_init_f32_minmax_scalar_params;
1580*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].channel_tile = 1;
1581*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].primary_tile = 9;
1582*4bdc9457SAndroid Build Coastguard Worker 
1583*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up1x25__scalar_acc2;
1584*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].linear.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_ukernel_up1x25__scalar_acc2;
1585*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].init.f32 = xnn_init_f32_minmax_scalar_params;
1586*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].channel_tile = 1;
1587*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].primary_tile = 25;
1588*4bdc9457SAndroid Build Coastguard Worker 
1589*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.avgpool = (struct avgpool_parameters) {
1590*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_avgpool_unipass_ukernel_function) xnn_f32_avgpool_minmax_ukernel_9x__scalar_c1,
1591*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_avgpool_multipass_ukernel_function) xnn_f32_avgpool_minmax_ukernel_9p8x__scalar_c1,
1592*4bdc9457SAndroid Build Coastguard Worker         .init.f32 = xnn_init_f32_scaleminmax_scalar_params,
1593*4bdc9457SAndroid Build Coastguard Worker         .primary_tile = 9,
1594*4bdc9457SAndroid Build Coastguard Worker         .incremental_tile = 8,
1595*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 1,
1596*4bdc9457SAndroid Build Coastguard Worker       };
1597*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
1598*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_pavgpool_unipass_ukernel_function) xnn_f32_pavgpool_minmax_ukernel_9x__scalar_c1,
1599*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_pavgpool_multipass_ukernel_function) xnn_f32_pavgpool_minmax_ukernel_9p8x__scalar_c1,
1600*4bdc9457SAndroid Build Coastguard Worker         .init.f32 = xnn_init_f32_minmax_scalar_params,
1601*4bdc9457SAndroid Build Coastguard Worker         .primary_tile = 9,
1602*4bdc9457SAndroid Build Coastguard Worker         .incremental_tile = 8,
1603*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 1,
1604*4bdc9457SAndroid Build Coastguard Worker       };
1605*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
1606*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_f32_gavgpool_minmax_ukernel_7x__scalar_c1,
1607*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1,
1608*4bdc9457SAndroid Build Coastguard Worker         .init.f32 = xnn_init_f32_scaleminmax_scalar_params,
1609*4bdc9457SAndroid Build Coastguard Worker         .update.f32 = xnn_update_f32_scaleminmax_scalar_params,
1610*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 7,
1611*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 1,
1612*4bdc9457SAndroid Build Coastguard Worker       };
1613*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.maxpool = (struct maxpool_parameters) {
1614*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_minmax_ukernel_9p8x__scalar_c1,
1615*4bdc9457SAndroid Build Coastguard Worker         .init.f32 = xnn_init_f32_minmax_scalar_params,
1616*4bdc9457SAndroid Build Coastguard Worker         .mr = 9,
1617*4bdc9457SAndroid Build Coastguard Worker         .qr = 8,
1618*4bdc9457SAndroid Build Coastguard Worker       };
1619*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
1620*4bdc9457SAndroid Build Coastguard Worker         .up = (xnn_argmaxpool_unipass_ukernel_function) xnn_f32_argmaxpool_ukernel_4x__scalar_c1,
1621*4bdc9457SAndroid Build Coastguard Worker         .mr = 4,
1622*4bdc9457SAndroid Build Coastguard Worker       };
1623*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
1624*4bdc9457SAndroid Build Coastguard Worker         .up = (xnn_argmaxpool_unipass_ukernel_function) xnn_f32_argmaxpool_ukernel_9x__scalar_c1,
1625*4bdc9457SAndroid Build Coastguard Worker         .mr = 9,
1626*4bdc9457SAndroid Build Coastguard Worker       };
1627*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
1628*4bdc9457SAndroid Build Coastguard Worker         .mp = (xnn_argmaxpool_multipass_ukernel_function) xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1,
1629*4bdc9457SAndroid Build Coastguard Worker         .mr = 9,
1630*4bdc9457SAndroid Build Coastguard Worker         .qr = 8,
1631*4bdc9457SAndroid Build Coastguard Worker       };
1632*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.ibilinear = (struct ibilinear_parameters) {
1633*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_ibilinear_ukernel_function) xnn_f32_ibilinear_ukernel__scalar_c2,
1634*4bdc9457SAndroid Build Coastguard Worker         .pixel_tile = 1,
1635*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 2,
1636*4bdc9457SAndroid Build Coastguard Worker       };
1637*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.abs = (struct vunary_parameters) {
1638*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vabs_ukernel__scalar_x4,
1639*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
1640*4bdc9457SAndroid Build Coastguard Worker       };
1641*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.clamp = (struct vunary_parameters) {
1642*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vclamp_ukernel__scalar_x4,
1643*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
1644*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
1645*4bdc9457SAndroid Build Coastguard Worker       };
1646*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.elu = (struct vunary_parameters) {
1647*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4,
1648*4bdc9457SAndroid Build Coastguard Worker         .init.f32_elu = xnn_init_f32_elu_scalar_rr2_lut16_p3_params,
1649*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
1650*4bdc9457SAndroid Build Coastguard Worker       };
1651*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.hswish = (struct vunary_parameters) {
1652*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vhswish_ukernel__scalar_x4,
1653*4bdc9457SAndroid Build Coastguard Worker         .init.f32_hswish = xnn_init_f32_hswish_scalar_params,
1654*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
1655*4bdc9457SAndroid Build Coastguard Worker       };
1656*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.lrelu = (struct vunary_parameters) {
1657*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vlrelu_ukernel__scalar_x4,
1658*4bdc9457SAndroid Build Coastguard Worker         .init.f32_lrelu = xnn_init_f32_lrelu_scalar_params,
1659*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
1660*4bdc9457SAndroid Build Coastguard Worker       };
1661*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.neg = (struct vunary_parameters) {
1662*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vneg_ukernel__scalar_x4,
1663*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
1664*4bdc9457SAndroid Build Coastguard Worker       };
1665*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.rndne = (struct vunary_parameters) {
1666*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndne_ukernel__scalar_libm_x1,
1667*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 1,
1668*4bdc9457SAndroid Build Coastguard Worker       };
1669*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.rndz = (struct vunary_parameters) {
1670*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndz_ukernel__scalar_libm_x1,
1671*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 1,
1672*4bdc9457SAndroid Build Coastguard Worker       };
1673*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.rndu = (struct vunary_parameters) {
1674*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndu_ukernel__scalar_libm_x1,
1675*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 1,
1676*4bdc9457SAndroid Build Coastguard Worker       };
1677*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.rndd = (struct vunary_parameters) {
1678*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndd_ukernel__scalar_libm_x1,
1679*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 1,
1680*4bdc9457SAndroid Build Coastguard Worker       };
1681*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.sigmoid = (struct vunary_parameters) {
1682*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x2,
1683*4bdc9457SAndroid Build Coastguard Worker         .init.f32_sigmoid = xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params,
1684*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 2,
1685*4bdc9457SAndroid Build Coastguard Worker       };
1686*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.sqr = (struct vunary_parameters) {
1687*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsqr_ukernel__scalar_x4,
1688*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
1689*4bdc9457SAndroid Build Coastguard Worker       };
1690*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.sqrt = (struct vunary_parameters) {
1691*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsqrt_ukernel__scalar_sqrt_x1,
1692*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 1,
1693*4bdc9457SAndroid Build Coastguard Worker       };
1694*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.prelu = (struct prelu_parameters) {
1695*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__scalar_2x4,
1696*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 4,
1697*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 4,
1698*4bdc9457SAndroid Build Coastguard Worker       };
1699*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.raddstoreexpminusmax = (struct raddstoreexpminusmax_parameters) {
1700*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_raddstoreexpminusmax_ukernel_function) xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_x4_acc2,
1701*4bdc9457SAndroid Build Coastguard Worker         .init.f32 = xnn_init_f32_expminus_scalar_rr2_p5_params,
1702*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
1703*4bdc9457SAndroid Build Coastguard Worker       };
1704*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.rmax = (xnn_rmax_ukernel_function) xnn_f32_rmax_ukernel__scalar;
1705*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vadd = (struct vbinary_parameters) {
1706*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_minmax_ukernel__scalar_x8,
1707*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_minmax_ukernel__scalar_x8,
1708*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_minmax_ukernel__scalar_x8,
1709*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
1710*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
1711*4bdc9457SAndroid Build Coastguard Worker       };
1712*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vdiv = (struct vbinary_parameters) {
1713*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_minmax_ukernel__scalar_x2,
1714*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_minmax_ukernel__scalar_x2,
1715*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_minmax_ukernel__scalar_x2,
1716*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
1717*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 2,
1718*4bdc9457SAndroid Build Coastguard Worker       };
1719*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmax = (struct vbinary_parameters) {
1720*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__scalar_x8,
1721*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__scalar_x8,
1722*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__scalar_x8,
1723*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
1724*4bdc9457SAndroid Build Coastguard Worker       };
1725*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmin = (struct vbinary_parameters) {
1726*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmin_ukernel__scalar_x8,
1727*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__scalar_x8,
1728*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__scalar_x8,
1729*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
1730*4bdc9457SAndroid Build Coastguard Worker       };
1731*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmul = (struct vbinary_parameters) {
1732*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_minmax_ukernel__scalar_x8,
1733*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_minmax_ukernel__scalar_x8,
1734*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_minmax_ukernel__scalar_x8,
1735*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
1736*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
1737*4bdc9457SAndroid Build Coastguard Worker       };
1738*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vsub = (struct vbinary_parameters) {
1739*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_minmax_ukernel__scalar_x8,
1740*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_minmax_ukernel__scalar_x8,
1741*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_minmax_ukernel__scalar_x8,
1742*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
1743*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
1744*4bdc9457SAndroid Build Coastguard Worker       };
1745*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vsqrdiff = (struct vbinary_parameters) {
1746*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiff_ukernel__scalar_x8,
1747*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiffc_ukernel__scalar_x8,
1748*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiffc_ukernel__scalar_x8,
1749*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
1750*4bdc9457SAndroid Build Coastguard Worker       };
1751*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
1752*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x,
1753*4bdc9457SAndroid Build Coastguard Worker         .init.f32 = xnn_init_f32_minmax_scalar_params,
1754*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 1,
1755*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 2,
1756*4bdc9457SAndroid Build Coastguard Worker       };
1757*4bdc9457SAndroid Build Coastguard Worker       #ifndef XNN_NO_NCHW_OPERATORS
1758*4bdc9457SAndroid Build Coastguard Worker         init_flags |= XNN_INIT_FLAG_CHW_OPT;
1759*4bdc9457SAndroid Build Coastguard Worker 
1760*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.spmm = (struct spmm_parameters) {
1761*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_minmax_ukernel_8x1__scalar,
1762*4bdc9457SAndroid Build Coastguard Worker           .mr = 8,
1763*4bdc9457SAndroid Build Coastguard Worker           .nr = 1,
1764*4bdc9457SAndroid Build Coastguard Worker         };
1765*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.spmm2 = (struct spmm_parameters) {
1766*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_minmax_ukernel_8x2__scalar,
1767*4bdc9457SAndroid Build Coastguard Worker           .mr = 8,
1768*4bdc9457SAndroid Build Coastguard Worker           .nr = 2,
1769*4bdc9457SAndroid Build Coastguard Worker         };
1770*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.spmm4 = (struct spmm_parameters) {
1771*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_minmax_ukernel_8x4__scalar,
1772*4bdc9457SAndroid Build Coastguard Worker           .mr = 8,
1773*4bdc9457SAndroid Build Coastguard Worker           .nr = 4,
1774*4bdc9457SAndroid Build Coastguard Worker         };
1775*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.conv_hwc2chw_3x3c3s2 = (struct conv_hwc2chw_parameters) {
1776*4bdc9457SAndroid Build Coastguard Worker           .ukernel_with_symm_padding =
1777*4bdc9457SAndroid Build Coastguard Worker             (xnn_conv_hwc2chw_ukernel_function) xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1,
1778*4bdc9457SAndroid Build Coastguard Worker           .output_channel_tile = 4,
1779*4bdc9457SAndroid Build Coastguard Worker           .output_height_tile = 1,
1780*4bdc9457SAndroid Build Coastguard Worker           .output_width_tile = 1,
1781*4bdc9457SAndroid Build Coastguard Worker         };
1782*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv2d_chw_3x3 = (struct dwconv2d_chw_parameters) {
1783*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1,
1784*4bdc9457SAndroid Build Coastguard Worker           .output_width_tile = 1,
1785*4bdc9457SAndroid Build Coastguard Worker           .output_height_tile = 4,
1786*4bdc9457SAndroid Build Coastguard Worker         };
1787*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv2d_chw_3x3s2 = (struct dwconv2d_chw_parameters) {
1788*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2,
1789*4bdc9457SAndroid Build Coastguard Worker           .output_width_tile = 1,
1790*4bdc9457SAndroid Build Coastguard Worker           .output_height_tile = 2,
1791*4bdc9457SAndroid Build Coastguard Worker         };
1792*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv2d_chw_5x5 = (struct dwconv2d_chw_parameters) {
1793*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2,
1794*4bdc9457SAndroid Build Coastguard Worker           .output_width_tile = 1,
1795*4bdc9457SAndroid Build Coastguard Worker           .output_height_tile = 2,
1796*4bdc9457SAndroid Build Coastguard Worker         };
1797*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv2d_chw_5x5s2 = (struct dwconv2d_chw_parameters) {
1798*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2,
1799*4bdc9457SAndroid Build Coastguard Worker           .output_width_tile = 1,
1800*4bdc9457SAndroid Build Coastguard Worker           .output_height_tile = 2,
1801*4bdc9457SAndroid Build Coastguard Worker         };
1802*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gavgpool_cw = (struct gavgpool_cw_parameters) {
1803*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_gavgpool_cw_ukernel_function) xnn_f32_gavgpool_cw_ukernel__scalar_x1,
1804*4bdc9457SAndroid Build Coastguard Worker           .channel_tile = 1,
1805*4bdc9457SAndroid Build Coastguard Worker         };
1806*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.ibilinear_chw = (struct ibilinear_chw_parameters) {
1807*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_ibilinear_chw_ukernel_function) xnn_f32_ibilinear_chw_ukernel__scalar_p4,
1808*4bdc9457SAndroid Build Coastguard Worker           .channel_tile = 1,
1809*4bdc9457SAndroid Build Coastguard Worker           .pixel_tile = 4,
1810*4bdc9457SAndroid Build Coastguard Worker         };
1811*4bdc9457SAndroid Build Coastguard Worker       #endif  // XNN_NO_NCHW_OPERATORS
1812*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_F32_OPERATORS
1813*4bdc9457SAndroid Build Coastguard Worker 
1814*4bdc9457SAndroid Build Coastguard Worker     /*************************** VCVT AArch32 Pre-NEON micro-kernels ***************************/
1815*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_VCVT_OPERATORS
1816*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_VCVT;
1817*4bdc9457SAndroid Build Coastguard Worker 
1818*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f16_to_f32 = (struct vunary_parameters) {
1819*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f16_f32_vcvt_ukernel__scalar_x4,
1820*4bdc9457SAndroid Build Coastguard Worker         .init.f16_f32_cvt = xnn_init_f16_f32_cvt_scalar_params,
1821*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
1822*4bdc9457SAndroid Build Coastguard Worker       };
1823*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f32_to_f16 = (struct vunary_parameters) {
1824*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_f16_vcvt_ukernel__scalar_fabsf_x2,
1825*4bdc9457SAndroid Build Coastguard Worker         .init.f32_f16_cvt = xnn_init_f32_f16_cvt_scalar_fabsf_params,
1826*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 2,
1827*4bdc9457SAndroid Build Coastguard Worker       };
1828*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f32_to_qs8 = (struct vunary_parameters) {
1829*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qs8_vcvt_ukernel__scalar_imagic_x4,
1830*4bdc9457SAndroid Build Coastguard Worker         .init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_scalar_imagic_params,
1831*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
1832*4bdc9457SAndroid Build Coastguard Worker       };
1833*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f32_to_qu8 = (struct vunary_parameters) {
1834*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qu8_vcvt_ukernel__scalar_imagic_x4,
1835*4bdc9457SAndroid Build Coastguard Worker         .init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_scalar_imagic_params,
1836*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
1837*4bdc9457SAndroid Build Coastguard Worker       };
1838*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qs8 = (struct vunary_parameters) {
1839*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vcvt_ukernel__armsimd32_x8,
1840*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_cvt = xnn_init_qs8_cvt_scalar_params,
1841*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
1842*4bdc9457SAndroid Build Coastguard Worker       };
1843*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qs8_to_f32 = (struct vunary_parameters) {
1844*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_f32_vcvt_ukernel__scalar_x4,
1845*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_scalar_params,
1846*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
1847*4bdc9457SAndroid Build Coastguard Worker       };
1848*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qu8 = (struct vunary_parameters) {
1849*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vcvt_ukernel__armsimd32_x8,
1850*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_cvt = xnn_init_qu8_cvt_scalar_params,
1851*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
1852*4bdc9457SAndroid Build Coastguard Worker       };
1853*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qu8_to_f32 = (struct vunary_parameters) {
1854*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_f32_vcvt_ukernel__scalar_x4,
1855*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_scalar_params,
1856*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
1857*4bdc9457SAndroid Build Coastguard Worker       };
1858*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_VCVT_OPERATORS
1859*4bdc9457SAndroid Build Coastguard Worker 
1860*4bdc9457SAndroid Build Coastguard Worker     /**************************** X32 AArch32 Pre-NEON micro-kernels ****************************/
1861*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_X32_OPERATORS
1862*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_X32;
1863*4bdc9457SAndroid Build Coastguard Worker 
1864*4bdc9457SAndroid Build Coastguard Worker       xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__scalar;
1865*4bdc9457SAndroid Build Coastguard Worker       xnn_params.x32.zip = (struct zip_parameters) {
1866*4bdc9457SAndroid Build Coastguard Worker         .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__scalar,
1867*4bdc9457SAndroid Build Coastguard Worker         .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__scalar,
1868*4bdc9457SAndroid Build Coastguard Worker         .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__scalar,
1869*4bdc9457SAndroid Build Coastguard Worker         .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__scalar,
1870*4bdc9457SAndroid Build Coastguard Worker       };
1871*4bdc9457SAndroid Build Coastguard Worker 
1872*4bdc9457SAndroid Build Coastguard Worker       xnn_params.x32.transpose = (struct transpose_parameters) {
1873*4bdc9457SAndroid Build Coastguard Worker         .const_size_ukernel = (xnn_transposec_ukernel_function) xnn_x32_transposec_ukernel__2x4_scalar_int,
1874*4bdc9457SAndroid Build Coastguard Worker         .tile_size = 32,
1875*4bdc9457SAndroid Build Coastguard Worker       };
1876*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_X32_OPERATORS
1877*4bdc9457SAndroid Build Coastguard Worker 
1878*4bdc9457SAndroid Build Coastguard Worker     /**************************** XX AArch32 Pre-NEON micro-kernels ****************************/
1879*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_XX_OPERATORS
1880*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_XX;
1881*4bdc9457SAndroid Build Coastguard Worker 
1882*4bdc9457SAndroid Build Coastguard Worker       xnn_params.xx.copy = (xnn_vunary_ukernel_function) xnn_xx_copy_ukernel__memcpy;
1883*4bdc9457SAndroid Build Coastguard Worker       xnn_params.xx.fill = (struct fill_parameters) {
1884*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_fill_ukernel_function) xnn_xx_fill_ukernel__scalar_x16,
1885*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 1,
1886*4bdc9457SAndroid Build Coastguard Worker       };
1887*4bdc9457SAndroid Build Coastguard Worker       xnn_params.xx.pad = (struct pad_parameters) {
1888*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_pad_ukernel_function) xnn_xx_pad_ukernel__scalar,
1889*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 1,
1890*4bdc9457SAndroid Build Coastguard Worker       };
1891*4bdc9457SAndroid Build Coastguard Worker       xnn_params.xx.transpose = (struct transpose_parameters) {
1892*4bdc9457SAndroid Build Coastguard Worker         .variable_size_ukernel = xnn_xx_transposev_ukernel__1x1_memcpy,
1893*4bdc9457SAndroid Build Coastguard Worker         .tile_size = 32,
1894*4bdc9457SAndroid Build Coastguard Worker       };
1895*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_XX_OPERATORS
1896*4bdc9457SAndroid Build Coastguard Worker   }
1897*4bdc9457SAndroid Build Coastguard Worker 
1898*4bdc9457SAndroid Build Coastguard Worker #elif XNN_ARCH_ARM64
1899*4bdc9457SAndroid Build Coastguard Worker 
1900*4bdc9457SAndroid Build Coastguard Worker   /**************************** QC8 AArch64 micro-kernels ****************************/
1901*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_QC8_OPERATORS
1902*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_QC8;
1903*4bdc9457SAndroid Build Coastguard Worker 
1904*4bdc9457SAndroid Build Coastguard Worker     #if XNN_PLATFORM_IOS || XNN_PLATFORM_MAC
1905*4bdc9457SAndroid Build Coastguard Worker       #if XNN_ENABLE_ASSEMBLY
1906*4bdc9457SAndroid Build Coastguard Worker         if (XNN_ENABLE_ARM_DOTPROD && cpuinfo_has_arm_neon_dot()) {
1907*4bdc9457SAndroid Build Coastguard Worker           #if XNN_ENABLE_ARM_DOTPROD
1908*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128);
1909*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot);
1910*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128);
1911*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot);
1912*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
1913*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.mr = 4;
1914*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.nr = 16;
1915*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.log2_kr = 2;
1916*4bdc9457SAndroid Build Coastguard Worker           #endif  // XNN_ENABLE_ARM_DOTPROD
1917*4bdc9457SAndroid Build Coastguard Worker         } else {
1918*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal);
1919*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal);
1920*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal);
1921*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal);
1922*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
1923*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.mr = 2;
1924*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.nr = 8;
1925*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.log2_kr = 3;
1926*4bdc9457SAndroid Build Coastguard Worker         }
1927*4bdc9457SAndroid Build Coastguard Worker       #else  // !XNN_ENABLE_ASSEMBLY
1928*4bdc9457SAndroid Build Coastguard Worker         if (XNN_ENABLE_ARM_DOTPROD && cpuinfo_has_arm_neon_dot()) {
1929*4bdc9457SAndroid Build Coastguard Worker           #if XNN_ENABLE_ARM_DOTPROD
1930*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot);
1931*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot);
1932*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot);
1933*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot);
1934*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
1935*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.mr = 4;
1936*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.nr = 16;
1937*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.log2_kr = 2;
1938*4bdc9457SAndroid Build Coastguard Worker           #endif  // XNN_ENABLE_ARM_DOTPROD
1939*4bdc9457SAndroid Build Coastguard Worker         } else {
1940*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal);
1941*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal);
1942*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal);
1943*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal);
1944*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
1945*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.mr = 2;
1946*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.nr = 8;
1947*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.log2_kr = 1;
1948*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.log2_sr = 2;
1949*4bdc9457SAndroid Build Coastguard Worker         }
1950*4bdc9457SAndroid Build Coastguard Worker       #endif  // XNN_ENABLE_ASSEMBLY
1951*4bdc9457SAndroid Build Coastguard Worker     #else  // !XNN_PLATFORM_IOS && !XNN_PLATFORM_MAC
1952*4bdc9457SAndroid Build Coastguard Worker       #if XNN_ENABLE_ASSEMBLY
1953*4bdc9457SAndroid Build Coastguard Worker         if (XNN_ENABLE_ARM_DOTPROD && cpuinfo_has_arm_neon_dot()) {
1954*4bdc9457SAndroid Build Coastguard Worker           #if XNN_ENABLE_ARM_DOTPROD
1955*4bdc9457SAndroid Build Coastguard Worker             switch (cpuinfo_get_core(0)->uarch) {
1956*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_a55:
1957*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55);
1958*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55);
1959*4bdc9457SAndroid Build Coastguard Worker                 break;
1960*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_x1:
1961*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_a78:
1962*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128);
1963*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128);
1964*4bdc9457SAndroid Build Coastguard Worker                 break;
1965*4bdc9457SAndroid Build Coastguard Worker               default:
1966*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64);
1967*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64);
1968*4bdc9457SAndroid Build Coastguard Worker                 break;
1969*4bdc9457SAndroid Build Coastguard Worker             }
1970*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot);
1971*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot);
1972*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
1973*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.mr = 4;
1974*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.nr = 16;
1975*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.log2_kr = 2;
1976*4bdc9457SAndroid Build Coastguard Worker           #endif  // XNN_ENABLE_ARM_DOTPROD
1977*4bdc9457SAndroid Build Coastguard Worker         } else {
1978*4bdc9457SAndroid Build Coastguard Worker           switch (cpuinfo_get_core(0)->uarch) {
1979*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a35:
1980*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_kryo:
1981*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64);
1982*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64);
1983*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane);
1984*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane);
1985*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
1986*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.mr = 4;
1987*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.nr = 16;
1988*4bdc9457SAndroid Build Coastguard Worker               break;
1989*4bdc9457SAndroid Build Coastguard Worker 
1990*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a53:
1991*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a55r0:
1992*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53);
1993*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53);
1994*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane);
1995*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane);
1996*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
1997*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.mr = 4;
1998*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.nr = 16;
1999*4bdc9457SAndroid Build Coastguard Worker               break;
2000*4bdc9457SAndroid Build Coastguard Worker 
2001*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a72:
2002*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a73:
2003*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm);
2004*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm);
2005*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm);
2006*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm);
2007*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
2008*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.mr = 2;
2009*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.nr = 8;
2010*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.log2_kr = 3;
2011*4bdc9457SAndroid Build Coastguard Worker               break;
2012*4bdc9457SAndroid Build Coastguard Worker 
2013*4bdc9457SAndroid Build Coastguard Worker             default:
2014*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal);
2015*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal);
2016*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal);
2017*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal);
2018*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
2019*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.mr = 2;
2020*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.nr = 8;
2021*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qc8.gemm.log2_kr = 3;
2022*4bdc9457SAndroid Build Coastguard Worker               break;
2023*4bdc9457SAndroid Build Coastguard Worker           }
2024*4bdc9457SAndroid Build Coastguard Worker         }
2025*4bdc9457SAndroid Build Coastguard Worker         #if XNN_MAX_UARCH_TYPES > 1
2026*4bdc9457SAndroid Build Coastguard Worker         {
2027*4bdc9457SAndroid Build Coastguard Worker           /* Choose micro-kernels for little cores according to micro-kernel specification for the big core */
2028*4bdc9457SAndroid Build Coastguard Worker           const uint32_t mr = xnn_params.qc8.gemm.mr;
2029*4bdc9457SAndroid Build Coastguard Worker           const uint32_t nr = xnn_params.qc8.gemm.nr;
2030*4bdc9457SAndroid Build Coastguard Worker           const uint32_t log2_kr = xnn_params.qc8.gemm.log2_kr;
2031*4bdc9457SAndroid Build Coastguard Worker           for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
2032*4bdc9457SAndroid Build Coastguard Worker             const struct cpuinfo_uarch_info* uarch_info = cpuinfo_get_uarch(i);
2033*4bdc9457SAndroid Build Coastguard Worker             if (uarch_info == NULL) {
2034*4bdc9457SAndroid Build Coastguard Worker               /* No more microarchitectures in the system */
2035*4bdc9457SAndroid Build Coastguard Worker               break;
2036*4bdc9457SAndroid Build Coastguard Worker             }
2037*4bdc9457SAndroid Build Coastguard Worker 
2038*4bdc9457SAndroid Build Coastguard Worker             switch (uarch_info->uarch) {
2039*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_a53:
2040*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_a55r0:
2041*4bdc9457SAndroid Build Coastguard Worker                 if (mr == 2 && nr == 8 && log2_kr == 3) {
2042*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)].function[i] = (xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53;
2043*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)].function[i] = (xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53;
2044*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53;
2045*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53;
2046*4bdc9457SAndroid Build Coastguard Worker                 }
2047*4bdc9457SAndroid Build Coastguard Worker                 break;
2048*4bdc9457SAndroid Build Coastguard Worker 
2049*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_a55:
2050*4bdc9457SAndroid Build Coastguard Worker                 #if XNN_ENABLE_ARM_DOTPROD
2051*4bdc9457SAndroid Build Coastguard Worker                   if (mr == 4 && nr == 16 && log2_kr == 2 && cpuinfo_has_arm_neon_dot()) {
2052*4bdc9457SAndroid Build Coastguard Worker                     xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55;
2053*4bdc9457SAndroid Build Coastguard Worker                     xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55;
2054*4bdc9457SAndroid Build Coastguard Worker                     xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot;
2055*4bdc9457SAndroid Build Coastguard Worker                     xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot;
2056*4bdc9457SAndroid Build Coastguard Worker                   }
2057*4bdc9457SAndroid Build Coastguard Worker                 #endif  // XNN_ENABLE_ARM_DOTPROD
2058*4bdc9457SAndroid Build Coastguard Worker                 break;
2059*4bdc9457SAndroid Build Coastguard Worker               default:
2060*4bdc9457SAndroid Build Coastguard Worker                 break;
2061*4bdc9457SAndroid Build Coastguard Worker             }
2062*4bdc9457SAndroid Build Coastguard Worker           }
2063*4bdc9457SAndroid Build Coastguard Worker         }
2064*4bdc9457SAndroid Build Coastguard Worker         #endif  // XNN_MAX_UARCH_TYPES > 1
2065*4bdc9457SAndroid Build Coastguard Worker       #else  // !XNN_ENABLE_ASSEMBLY
2066*4bdc9457SAndroid Build Coastguard Worker         if (XNN_ENABLE_ARM_DOTPROD && cpuinfo_has_arm_neon_dot()) {
2067*4bdc9457SAndroid Build Coastguard Worker           #if XNN_ENABLE_ARM_DOTPROD
2068*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot);
2069*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot);
2070*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x16c4__neondot);
2071*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot);
2072*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
2073*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.mr = 4;
2074*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.nr = 16;
2075*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qc8.gemm.log2_kr = 2;
2076*4bdc9457SAndroid Build Coastguard Worker           #endif  // XNN_ENABLE_ARM_DOTPROD
2077*4bdc9457SAndroid Build Coastguard Worker         } else {
2078*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal);
2079*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal);
2080*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal);
2081*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal);
2082*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
2083*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.mr = 2;
2084*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.nr = 8;
2085*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.log2_kr = 1;
2086*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qc8.gemm.log2_sr = 2;
2087*4bdc9457SAndroid Build Coastguard Worker         }
2088*4bdc9457SAndroid Build Coastguard Worker       #endif  // XNN_ENABLE_ASSEMBLY
2089*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_PLATFORM_IOS || XNN_PLATFORM_MAC
2090*4bdc9457SAndroid Build Coastguard Worker 
2091*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128;
2092*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[0].init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
2093*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[0].channel_tile = 16;
2094*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[0].primary_tile = 3;
2095*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mla8_ld64;
2096*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[1].init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
2097*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[1].channel_tile = 16;
2098*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[1].primary_tile = 9;
2099*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mla8_ld64;
2100*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[2].init.qc8 = xnn_init_qc8_conv_minmax_fp32_neonv8_params;
2101*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[2].channel_tile = 16;
2102*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[2].primary_tile = 25;
2103*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_QC8_OPERATORS
2104*4bdc9457SAndroid Build Coastguard Worker 
2105*4bdc9457SAndroid Build Coastguard Worker   /**************************** QS8 AArch64 micro-kernels ****************************/
2106*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_QS8_OPERATORS
2107*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_QS8;
2108*4bdc9457SAndroid Build Coastguard Worker 
2109*4bdc9457SAndroid Build Coastguard Worker     #if XNN_PLATFORM_IOS || XNN_PLATFORM_MAC
2110*4bdc9457SAndroid Build Coastguard Worker       #if XNN_ENABLE_ASSEMBLY
2111*4bdc9457SAndroid Build Coastguard Worker         if (XNN_ENABLE_ARM_DOTPROD && cpuinfo_has_arm_neon_dot()) {
2112*4bdc9457SAndroid Build Coastguard Worker           #if XNN_ENABLE_ARM_DOTPROD
2113*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128);
2114*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neondot);
2115*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128);
2116*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neondot);
2117*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
2118*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.mr = 4;
2119*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.nr = 16;
2120*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.log2_kr = 2;
2121*4bdc9457SAndroid Build Coastguard Worker           #endif  // XNN_ENABLE_ARM_DOTPROD
2122*4bdc9457SAndroid Build Coastguard Worker         } else {
2123*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal);
2124*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal);
2125*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal);
2126*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal);
2127*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
2128*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.mr = 2;
2129*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.nr = 8;
2130*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.log2_kr = 3;
2131*4bdc9457SAndroid Build Coastguard Worker         }
2132*4bdc9457SAndroid Build Coastguard Worker       #else  // !XNN_ENABLE_ASSEMBLY
2133*4bdc9457SAndroid Build Coastguard Worker         if (XNN_ENABLE_ARM_DOTPROD && cpuinfo_has_arm_neon_dot()) {
2134*4bdc9457SAndroid Build Coastguard Worker           #if XNN_ENABLE_ARM_DOTPROD
2135*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot);
2136*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neondot);
2137*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot);
2138*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neondot);
2139*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
2140*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.mr = 4;
2141*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.nr = 16;
2142*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.log2_kr = 2;
2143*4bdc9457SAndroid Build Coastguard Worker           #endif  // XNN_ENABLE_ARM_DOTPROD
2144*4bdc9457SAndroid Build Coastguard Worker         } else {
2145*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal);
2146*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal);
2147*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal);
2148*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal);
2149*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
2150*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.mr = 2;
2151*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.nr = 8;
2152*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.log2_kr = 1;
2153*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.log2_sr = 2;
2154*4bdc9457SAndroid Build Coastguard Worker         }
2155*4bdc9457SAndroid Build Coastguard Worker       #endif  // XNN_ENABLE_ASSEMBLY
2156*4bdc9457SAndroid Build Coastguard Worker     #else  // !XNN_PLATFORM_IOS && !XNN_PLATFORM_MAC
2157*4bdc9457SAndroid Build Coastguard Worker       #if XNN_ENABLE_ASSEMBLY
2158*4bdc9457SAndroid Build Coastguard Worker         if (XNN_ENABLE_ARM_DOTPROD && cpuinfo_has_arm_neon_dot()) {
2159*4bdc9457SAndroid Build Coastguard Worker           #if XNN_ENABLE_ARM_DOTPROD
2160*4bdc9457SAndroid Build Coastguard Worker             switch (cpuinfo_get_core(0)->uarch) {
2161*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_a55:
2162*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55);
2163*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55);
2164*4bdc9457SAndroid Build Coastguard Worker                 break;
2165*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_x1:
2166*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_a78:
2167*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128);
2168*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128);
2169*4bdc9457SAndroid Build Coastguard Worker                 break;
2170*4bdc9457SAndroid Build Coastguard Worker               default:
2171*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64);
2172*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64);
2173*4bdc9457SAndroid Build Coastguard Worker                 break;
2174*4bdc9457SAndroid Build Coastguard Worker             }
2175*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neondot);
2176*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neondot);
2177*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
2178*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.mr = 4;
2179*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.nr = 16;
2180*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.log2_kr = 2;
2181*4bdc9457SAndroid Build Coastguard Worker           #endif  // XNN_ENABLE_ARM_DOTPROD
2182*4bdc9457SAndroid Build Coastguard Worker         } else {
2183*4bdc9457SAndroid Build Coastguard Worker           switch (cpuinfo_get_core(0)->uarch) {
2184*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a35:
2185*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_kryo:
2186*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64);
2187*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64);
2188*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane);
2189*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane);
2190*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
2191*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.mr = 4;
2192*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.nr = 16;
2193*4bdc9457SAndroid Build Coastguard Worker               break;
2194*4bdc9457SAndroid Build Coastguard Worker 
2195*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a53:
2196*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a55r0:
2197*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53);
2198*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53);
2199*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane);
2200*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane);
2201*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
2202*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.mr = 4;
2203*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.nr = 16;
2204*4bdc9457SAndroid Build Coastguard Worker               break;
2205*4bdc9457SAndroid Build Coastguard Worker 
2206*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a72:
2207*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a73:
2208*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm);
2209*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm);
2210*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_prfm);
2211*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_prfm);
2212*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
2213*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.mr = 2;
2214*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.nr = 8;
2215*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.log2_kr = 3;
2216*4bdc9457SAndroid Build Coastguard Worker               break;
2217*4bdc9457SAndroid Build Coastguard Worker 
2218*4bdc9457SAndroid Build Coastguard Worker             default:
2219*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal);
2220*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal);
2221*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal);
2222*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal);
2223*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
2224*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.mr = 2;
2225*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.nr = 8;
2226*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qs8.gemm.log2_kr = 3;
2227*4bdc9457SAndroid Build Coastguard Worker               break;
2228*4bdc9457SAndroid Build Coastguard Worker           }
2229*4bdc9457SAndroid Build Coastguard Worker         }
2230*4bdc9457SAndroid Build Coastguard Worker         #if XNN_MAX_UARCH_TYPES > 1
2231*4bdc9457SAndroid Build Coastguard Worker         {
2232*4bdc9457SAndroid Build Coastguard Worker           /* Choose micro-kernels for little cores according to micro-kernel specification for the big core */
2233*4bdc9457SAndroid Build Coastguard Worker           const uint32_t mr = xnn_params.qs8.gemm.mr;
2234*4bdc9457SAndroid Build Coastguard Worker           const uint32_t nr = xnn_params.qs8.gemm.nr;
2235*4bdc9457SAndroid Build Coastguard Worker           const uint32_t log2_kr = xnn_params.qs8.gemm.log2_kr;
2236*4bdc9457SAndroid Build Coastguard Worker           for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
2237*4bdc9457SAndroid Build Coastguard Worker             const struct cpuinfo_uarch_info* uarch_info = cpuinfo_get_uarch(i);
2238*4bdc9457SAndroid Build Coastguard Worker             if (uarch_info == NULL) {
2239*4bdc9457SAndroid Build Coastguard Worker               /* No more microarchitectures in the system */
2240*4bdc9457SAndroid Build Coastguard Worker               break;
2241*4bdc9457SAndroid Build Coastguard Worker             }
2242*4bdc9457SAndroid Build Coastguard Worker 
2243*4bdc9457SAndroid Build Coastguard Worker             switch (uarch_info->uarch) {
2244*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_a53:
2245*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_a55r0:
2246*4bdc9457SAndroid Build Coastguard Worker                 if (mr == 2 && nr == 8 && log2_kr == 3) {
2247*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)].function[i] = (xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53;
2248*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)].function[i] = (xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53;
2249*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53;
2250*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53;
2251*4bdc9457SAndroid Build Coastguard Worker                 }
2252*4bdc9457SAndroid Build Coastguard Worker                 break;
2253*4bdc9457SAndroid Build Coastguard Worker 
2254*4bdc9457SAndroid Build Coastguard Worker               case cpuinfo_uarch_cortex_a55:
2255*4bdc9457SAndroid Build Coastguard Worker                 #if XNN_ENABLE_ARM_DOTPROD
2256*4bdc9457SAndroid Build Coastguard Worker                   if (mr == 4 && nr == 16 && log2_kr == 2 && cpuinfo_has_arm_neon_dot()) {
2257*4bdc9457SAndroid Build Coastguard Worker                     xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55;
2258*4bdc9457SAndroid Build Coastguard Worker                     xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55;
2259*4bdc9457SAndroid Build Coastguard Worker                     xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neondot;
2260*4bdc9457SAndroid Build Coastguard Worker                     xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neondot;
2261*4bdc9457SAndroid Build Coastguard Worker                   }
2262*4bdc9457SAndroid Build Coastguard Worker                 #endif  // XNN_ENABLE_ARM_DOTPROD
2263*4bdc9457SAndroid Build Coastguard Worker                 break;
2264*4bdc9457SAndroid Build Coastguard Worker               default:
2265*4bdc9457SAndroid Build Coastguard Worker                 break;
2266*4bdc9457SAndroid Build Coastguard Worker             }
2267*4bdc9457SAndroid Build Coastguard Worker           }
2268*4bdc9457SAndroid Build Coastguard Worker         }
2269*4bdc9457SAndroid Build Coastguard Worker         #endif  // XNN_MAX_UARCH_TYPES > 1
2270*4bdc9457SAndroid Build Coastguard Worker       #else  // !XNN_ENABLE_ASSEMBLY
2271*4bdc9457SAndroid Build Coastguard Worker         if (XNN_ENABLE_ARM_DOTPROD && cpuinfo_has_arm_neon_dot()) {
2272*4bdc9457SAndroid Build Coastguard Worker           #if XNN_ENABLE_ARM_DOTPROD
2273*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot);
2274*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neondot);
2275*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot);
2276*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neondot);
2277*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
2278*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.mr = 4;
2279*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.nr = 16;
2280*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qs8.gemm.log2_kr = 2;
2281*4bdc9457SAndroid Build Coastguard Worker           #endif  // XNN_ENABLE_ARM_DOTPROD
2282*4bdc9457SAndroid Build Coastguard Worker         } else {
2283*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal);
2284*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal);
2285*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal);
2286*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal);
2287*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
2288*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.mr = 2;
2289*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.nr = 8;
2290*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.log2_kr = 1;
2291*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qs8.gemm.log2_sr = 2;
2292*4bdc9457SAndroid Build Coastguard Worker         }
2293*4bdc9457SAndroid Build Coastguard Worker       #endif  // XNN_ENABLE_ASSEMBLY
2294*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_PLATFORM_IOS || XNN_PLATFORM_MAC
2295*4bdc9457SAndroid Build Coastguard Worker 
2296*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64;
2297*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
2298*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[0].channel_tile = 16;
2299*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[0].primary_tile = 9;
2300*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64;
2301*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_rndnu_neon_params;
2302*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[1].channel_tile = 16;
2303*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[1].primary_tile = 25;
2304*4bdc9457SAndroid Build Coastguard Worker 
2305*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.gavgpool = (struct gavgpool_parameters) {
2306*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_qs8_gavgpool_minmax_rndnu_ukernel_7x__neon_c8,
2307*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8,
2308*4bdc9457SAndroid Build Coastguard Worker       .init.qs8 = xnn_init_qs8_avgpool_minmax_rndnu_neon_params,
2309*4bdc9457SAndroid Build Coastguard Worker       .update.qs8 = xnn_update_qs8_avgpool_minmax_rndnu_neon_params,
2310*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 7,
2311*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 8,
2312*4bdc9457SAndroid Build Coastguard Worker     };
2313*4bdc9457SAndroid Build Coastguard Worker 
2314*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.vadd = (struct vbinary_parameters) {
2315*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32,
2316*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32,
2317*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32,
2318*4bdc9457SAndroid Build Coastguard Worker       .init.qs8_add = xnn_init_qs8_add_minmax_neon_params,
2319*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 32,
2320*4bdc9457SAndroid Build Coastguard Worker     };
2321*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.vmul = (struct vbinary_parameters) {
2322*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld64_x16,
2323*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld64_x16,
2324*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld64_x16,
2325*4bdc9457SAndroid Build Coastguard Worker       .init.qs8_mul = xnn_init_qs8_mul_minmax_rndnu_neon_params,
2326*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 16,
2327*4bdc9457SAndroid Build Coastguard Worker     };
2328*4bdc9457SAndroid Build Coastguard Worker 
2329*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.lrelu = (struct vunary_parameters) {
2330*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vlrelu_ukernel__neon_x32,
2331*4bdc9457SAndroid Build Coastguard Worker       .init.qs8_lrelu = xnn_init_qs8_lrelu_neon_params,
2332*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 32,
2333*4bdc9457SAndroid Build Coastguard Worker     };
2334*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_QS8_OPERATORS
2335*4bdc9457SAndroid Build Coastguard Worker 
2336*4bdc9457SAndroid Build Coastguard Worker   /**************************** QU8 AArch64 micro-kernels ****************************/
2337*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_QU8_OPERATORS
2338*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_QU8;
2339*4bdc9457SAndroid Build Coastguard Worker 
2340*4bdc9457SAndroid Build Coastguard Worker     #if XNN_ENABLE_ASSEMBLY
2341*4bdc9457SAndroid Build Coastguard Worker       if (XNN_ENABLE_ARM_DOTPROD && cpuinfo_has_arm_neon_dot()) {
2342*4bdc9457SAndroid Build Coastguard Worker         #if XNN_ENABLE_ARM_DOTPROD
2343*4bdc9457SAndroid Build Coastguard Worker           switch (cpuinfo_get_core(0)->uarch) {
2344*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a55:
2345*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55);
2346*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55);
2347*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_1x16c4__neondot);
2348*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot);
2349*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_rndnu_neon_params;
2350*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.mr = 4;
2351*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.nr = 16;
2352*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.log2_kr = 2;
2353*4bdc9457SAndroid Build Coastguard Worker               break;
2354*4bdc9457SAndroid Build Coastguard Worker             default:
2355*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128);
2356*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128);
2357*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_1x16c4__neondot);
2358*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot);
2359*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_rndnu_neon_params;
2360*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.mr = 4;
2361*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.nr = 16;
2362*4bdc9457SAndroid Build Coastguard Worker               xnn_params.qu8.gemm.log2_kr = 2;
2363*4bdc9457SAndroid Build Coastguard Worker               break;
2364*4bdc9457SAndroid Build Coastguard Worker           }
2365*4bdc9457SAndroid Build Coastguard Worker         #endif  // XNN_ENABLE_ARM_DOTPROD
2366*4bdc9457SAndroid Build Coastguard Worker       } else {
2367*4bdc9457SAndroid Build Coastguard Worker         switch (cpuinfo_get_core(0)->uarch) {
2368*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_cortex_a53:
2369*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_cortex_a55r0:
2370*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_kryo:
2371*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53);
2372*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53);
2373*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane);
2374*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane);
2375*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_rndnu_neon_params;
2376*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.mr = 4;
2377*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.nr = 16;
2378*4bdc9457SAndroid Build Coastguard Worker             break;
2379*4bdc9457SAndroid Build Coastguard Worker 
2380*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_cortex_a57:
2381*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_cortex_a72:
2382*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_cortex_a73:
2383*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_cortex_a75:
2384*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_cortex_a76:
2385*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_exynos_m1:
2386*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_exynos_m2:
2387*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_exynos_m3:
2388*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_exynos_m4:
2389*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75);
2390*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75);
2391*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane);
2392*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane);
2393*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_rndnu_neon_params;
2394*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.mr = 4;
2395*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.nr = 16;
2396*4bdc9457SAndroid Build Coastguard Worker             break;
2397*4bdc9457SAndroid Build Coastguard Worker 
2398*4bdc9457SAndroid Build Coastguard Worker           default:
2399*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75);
2400*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75);
2401*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane);
2402*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane);
2403*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_rndnu_neon_params;
2404*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.mr = 4;
2405*4bdc9457SAndroid Build Coastguard Worker             xnn_params.qu8.gemm.nr = 16;
2406*4bdc9457SAndroid Build Coastguard Worker             break;
2407*4bdc9457SAndroid Build Coastguard Worker         }
2408*4bdc9457SAndroid Build Coastguard Worker       }
2409*4bdc9457SAndroid Build Coastguard Worker       #if XNN_MAX_UARCH_TYPES > 1
2410*4bdc9457SAndroid Build Coastguard Worker       {
2411*4bdc9457SAndroid Build Coastguard Worker         /* Choose micro-kernels for little cores according to micro-kernel specification for the big core */
2412*4bdc9457SAndroid Build Coastguard Worker         const uint32_t mr = xnn_params.qu8.gemm.mr;
2413*4bdc9457SAndroid Build Coastguard Worker         const uint32_t nr = xnn_params.qu8.gemm.nr;
2414*4bdc9457SAndroid Build Coastguard Worker         const uint32_t log2_kr = xnn_params.qu8.gemm.log2_kr;
2415*4bdc9457SAndroid Build Coastguard Worker         for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
2416*4bdc9457SAndroid Build Coastguard Worker           const struct cpuinfo_uarch_info* uarch_info = cpuinfo_get_uarch(i);
2417*4bdc9457SAndroid Build Coastguard Worker           if (uarch_info == NULL) {
2418*4bdc9457SAndroid Build Coastguard Worker             /* No more microarchitectures in the system */
2419*4bdc9457SAndroid Build Coastguard Worker             break;
2420*4bdc9457SAndroid Build Coastguard Worker           }
2421*4bdc9457SAndroid Build Coastguard Worker 
2422*4bdc9457SAndroid Build Coastguard Worker           switch (uarch_info->uarch) {
2423*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a53:
2424*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a55r0:
2425*4bdc9457SAndroid Build Coastguard Worker               if (mr == 4 && nr == 16 && log2_kr == 0) {
2426*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53;
2427*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53;
2428*4bdc9457SAndroid Build Coastguard Worker               }
2429*4bdc9457SAndroid Build Coastguard Worker               break;
2430*4bdc9457SAndroid Build Coastguard Worker 
2431*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a55:
2432*4bdc9457SAndroid Build Coastguard Worker               #if XNN_ENABLE_ARM_DOTPROD
2433*4bdc9457SAndroid Build Coastguard Worker                 if (mr == 4 && nr == 16 && log2_kr == 2 && cpuinfo_has_arm_neon_dot()) {
2434*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55;
2435*4bdc9457SAndroid Build Coastguard Worker                   xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55;
2436*4bdc9457SAndroid Build Coastguard Worker                 }
2437*4bdc9457SAndroid Build Coastguard Worker               #endif  // XNN_ENABLE_ARM_DOTPROD
2438*4bdc9457SAndroid Build Coastguard Worker               break;
2439*4bdc9457SAndroid Build Coastguard Worker             default:
2440*4bdc9457SAndroid Build Coastguard Worker               break;
2441*4bdc9457SAndroid Build Coastguard Worker           }
2442*4bdc9457SAndroid Build Coastguard Worker         }
2443*4bdc9457SAndroid Build Coastguard Worker       }
2444*4bdc9457SAndroid Build Coastguard Worker       #endif  // XNN_MAX_UARCH_TYPES > 1
2445*4bdc9457SAndroid Build Coastguard Worker     #else  // !XNN_ENABLE_ASSEMBLY
2446*4bdc9457SAndroid Build Coastguard Worker       if (XNN_ENABLE_ARM_DOTPROD && cpuinfo_has_arm_neon_dot()) {
2447*4bdc9457SAndroid Build Coastguard Worker         #if XNN_ENABLE_ARM_DOTPROD
2448*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot);
2449*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot);
2450*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_1x16c4__neondot);
2451*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot);
2452*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_rndnu_neon_params;
2453*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qu8.gemm.mr = 4;
2454*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qu8.gemm.nr = 16;
2455*4bdc9457SAndroid Build Coastguard Worker           xnn_params.qu8.gemm.log2_kr = 2;
2456*4bdc9457SAndroid Build Coastguard Worker         #endif  // XNN_ENABLE_ARM_DOTPROD
2457*4bdc9457SAndroid Build Coastguard Worker       } else {
2458*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane);
2459*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane);
2460*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane);
2461*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane);
2462*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_rndnu_neon_params;
2463*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qu8.gemm.mr = 4;
2464*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qu8.gemm.nr = 16;
2465*4bdc9457SAndroid Build Coastguard Worker       }
2466*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_ENABLE_ASSEMBLY
2467*4bdc9457SAndroid Build Coastguard Worker 
2468*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8;
2469*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[0].init.qu8 = xnn_init_qu8_conv_minmax_rndnu_neon_params;
2470*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[0].channel_tile = 16;
2471*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[0].primary_tile = 9;
2472*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8;
2473*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[1].init.qu8 = xnn_init_qu8_conv_minmax_rndnu_neon_params;
2474*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[1].channel_tile = 8;
2475*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[1].primary_tile = 25;
2476*4bdc9457SAndroid Build Coastguard Worker 
2477*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.avgpool = (struct avgpool_parameters) {
2478*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_avgpool_unipass_ukernel_function) xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8,
2479*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_avgpool_multipass_ukernel_function) xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8,
2480*4bdc9457SAndroid Build Coastguard Worker       .init.qu8 = xnn_init_qu8_avgpool_minmax_neon_params,
2481*4bdc9457SAndroid Build Coastguard Worker       .primary_tile = 9,
2482*4bdc9457SAndroid Build Coastguard Worker       .incremental_tile = 8,
2483*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 8,
2484*4bdc9457SAndroid Build Coastguard Worker     };
2485*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.gavgpool = (struct gavgpool_parameters) {
2486*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_qu8_gavgpool_minmax_rndnu_ukernel_7x__neon_c8,
2487*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c8,
2488*4bdc9457SAndroid Build Coastguard Worker       .init.qu8 = xnn_init_qu8_avgpool_minmax_rndnu_neon_params,
2489*4bdc9457SAndroid Build Coastguard Worker       .update.qu8 = xnn_update_qu8_avgpool_minmax_rndnu_neon_params,
2490*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 7,
2491*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 8,
2492*4bdc9457SAndroid Build Coastguard Worker     };
2493*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.vadd = (struct vbinary_parameters) {
2494*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32,
2495*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x32,
2496*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x32,
2497*4bdc9457SAndroid Build Coastguard Worker       .init.qu8_add = xnn_init_qu8_add_minmax_neon_params,
2498*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
2499*4bdc9457SAndroid Build Coastguard Worker     };
2500*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.vmul = (struct vbinary_parameters) {
2501*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld64_x16,
2502*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld64_x16,
2503*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld64_x16,
2504*4bdc9457SAndroid Build Coastguard Worker       .init.qu8_mul = xnn_init_qu8_mul_minmax_rndnu_neon_params,
2505*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 16,
2506*4bdc9457SAndroid Build Coastguard Worker     };
2507*4bdc9457SAndroid Build Coastguard Worker 
2508*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.lrelu = (struct vunary_parameters) {
2509*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vlrelu_ukernel__neon_x32,
2510*4bdc9457SAndroid Build Coastguard Worker       .init.qu8_lrelu = xnn_init_qu8_lrelu_neon_params,
2511*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 32,
2512*4bdc9457SAndroid Build Coastguard Worker     };
2513*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_QU8_OPERATORS
2514*4bdc9457SAndroid Build Coastguard Worker 
2515*4bdc9457SAndroid Build Coastguard Worker   /**************************** S8 AArch64 micro-kernels ****************************/
2516*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_S8_OPERATORS
2517*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_S8;
2518*4bdc9457SAndroid Build Coastguard Worker 
2519*4bdc9457SAndroid Build Coastguard Worker     xnn_params.s8.clamp = (struct vunary_parameters) {
2520*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_s8_vclamp_ukernel__neon_x64,
2521*4bdc9457SAndroid Build Coastguard Worker       .init.s8_minmax = xnn_init_s8_minmax_neon_params,
2522*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 64,
2523*4bdc9457SAndroid Build Coastguard Worker     };
2524*4bdc9457SAndroid Build Coastguard Worker     xnn_params.s8.ibilinear = (struct ibilinear_parameters) {
2525*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_ibilinear_ukernel_function) xnn_s8_ibilinear_ukernel__neon_c16,
2526*4bdc9457SAndroid Build Coastguard Worker       .pixel_tile = 1,
2527*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 16,
2528*4bdc9457SAndroid Build Coastguard Worker     };
2529*4bdc9457SAndroid Build Coastguard Worker     xnn_params.s8.maxpool = (struct maxpool_parameters) {
2530*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_maxpool_ukernel_function) xnn_s8_maxpool_minmax_ukernel_9p8x__neon_c16,
2531*4bdc9457SAndroid Build Coastguard Worker       .init.s8 = xnn_init_s8_minmax_neon_params,
2532*4bdc9457SAndroid Build Coastguard Worker       .mr = 9,
2533*4bdc9457SAndroid Build Coastguard Worker       .qr = 8,
2534*4bdc9457SAndroid Build Coastguard Worker     };
2535*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_S8_OPERATORS
2536*4bdc9457SAndroid Build Coastguard Worker 
2537*4bdc9457SAndroid Build Coastguard Worker   /**************************** U8 AArch64 micro-kernels ****************************/
2538*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_U8_OPERATORS
2539*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_U8;
2540*4bdc9457SAndroid Build Coastguard Worker 
2541*4bdc9457SAndroid Build Coastguard Worker     xnn_params.u8.clamp = (struct vunary_parameters) {
2542*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_u8_vclamp_ukernel__neon_x64,
2543*4bdc9457SAndroid Build Coastguard Worker       .init.u8_minmax = xnn_init_u8_minmax_neon_params,
2544*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 64,
2545*4bdc9457SAndroid Build Coastguard Worker     };
2546*4bdc9457SAndroid Build Coastguard Worker     xnn_params.u8.ibilinear = (struct ibilinear_parameters) {
2547*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_ibilinear_ukernel_function) xnn_u8_ibilinear_ukernel__neon_c16,
2548*4bdc9457SAndroid Build Coastguard Worker       .pixel_tile = 1,
2549*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 16,
2550*4bdc9457SAndroid Build Coastguard Worker     };
2551*4bdc9457SAndroid Build Coastguard Worker     xnn_params.u8.maxpool = (struct maxpool_parameters) {
2552*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_minmax_ukernel_9p8x__neon_c16,
2553*4bdc9457SAndroid Build Coastguard Worker       .init.u8 = xnn_init_u8_minmax_neon_params,
2554*4bdc9457SAndroid Build Coastguard Worker       .mr = 9,
2555*4bdc9457SAndroid Build Coastguard Worker       .qr = 8,
2556*4bdc9457SAndroid Build Coastguard Worker     };
2557*4bdc9457SAndroid Build Coastguard Worker     xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
2558*4bdc9457SAndroid Build Coastguard Worker     xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
2559*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_U8_OPERATORS
2560*4bdc9457SAndroid Build Coastguard Worker 
2561*4bdc9457SAndroid Build Coastguard Worker   /**************************** X8 AArch64 micro-kernels ****************************/
2562*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_X8_OPERATORS
2563*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_X8;
2564*4bdc9457SAndroid Build Coastguard Worker 
2565*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x8.lut = xnn_x8_lut_ukernel__neon_tbx128x4_x64;
2566*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x8.zip = (struct zip_parameters) {
2567*4bdc9457SAndroid Build Coastguard Worker       .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
2568*4bdc9457SAndroid Build Coastguard Worker       .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
2569*4bdc9457SAndroid Build Coastguard Worker       .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
2570*4bdc9457SAndroid Build Coastguard Worker       .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
2571*4bdc9457SAndroid Build Coastguard Worker     };
2572*4bdc9457SAndroid Build Coastguard Worker 
2573*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x8.transpose = (struct transpose_parameters) {
2574*4bdc9457SAndroid Build Coastguard Worker       .const_size_ukernel = (xnn_transposec_ukernel_function) xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon,
2575*4bdc9457SAndroid Build Coastguard Worker       .tile_size = 32,
2576*4bdc9457SAndroid Build Coastguard Worker     };
2577*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_X8_OPERATORS
2578*4bdc9457SAndroid Build Coastguard Worker 
2579*4bdc9457SAndroid Build Coastguard Worker   /**************************** X16 AArch64 micro-kernels ****************************/
2580*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_X16_OPERATORS
2581*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_X16;
2582*4bdc9457SAndroid Build Coastguard Worker 
2583*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x16.transpose = (struct transpose_parameters) {
2584*4bdc9457SAndroid Build Coastguard Worker       .const_size_ukernel = (xnn_transposec_ukernel_function) xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon,
2585*4bdc9457SAndroid Build Coastguard Worker       .tile_size = 32,
2586*4bdc9457SAndroid Build Coastguard Worker     };
2587*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_X16_OPERATORS
2588*4bdc9457SAndroid Build Coastguard Worker 
2589*4bdc9457SAndroid Build Coastguard Worker   /**************************** F16 AArch64 micro-kernels ****************************/
2590*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_F16_OPERATORS
2591*4bdc9457SAndroid Build Coastguard Worker     #if XNN_ENABLE_ARM_FP16
2592*4bdc9457SAndroid Build Coastguard Worker       if (cpuinfo_has_arm_neon_fp16_arith()) {
2593*4bdc9457SAndroid Build Coastguard Worker         init_flags |= XNN_INIT_FLAG_F16 | XNN_INIT_FLAG_F16_NATIVE;
2594*4bdc9457SAndroid Build Coastguard Worker 
2595*4bdc9457SAndroid Build Coastguard Worker         #if XNN_ENABLE_ASSEMBLY
2596*4bdc9457SAndroid Build Coastguard Worker           switch (cpuinfo_get_core(0)->uarch) {
2597*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a55:
2598*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.minmax.gemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55);
2599*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.minmax.igemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f16_igemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55);
2600*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld64);
2601*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f16_igemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld64);
2602*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.init.f16 = xnn_init_f16_minmax_neon_params;
2603*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.mr = 6;
2604*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.nr = 16;
2605*4bdc9457SAndroid Build Coastguard Worker               break;
2606*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a55r0:
2607*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a75:
2608*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.minmax.gemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55r0);
2609*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.minmax.igemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f16_igemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55r0);
2610*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld64);
2611*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f16_igemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld64);
2612*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.init.f16 = xnn_init_f16_minmax_neon_params;
2613*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.mr = 6;
2614*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.nr = 16;
2615*4bdc9457SAndroid Build Coastguard Worker               break;
2616*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_exynos_m5:
2617*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld64);
2618*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f16_igemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld64);
2619*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld64);
2620*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f16_igemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld64);
2621*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.init.f16 = xnn_init_f16_minmax_neon_params;
2622*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.mr = 4;
2623*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.nr = 16;
2624*4bdc9457SAndroid Build Coastguard Worker               break;
2625*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_exynos_m4:
2626*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.minmax.gemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld64);
2627*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.minmax.igemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f16_igemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld64);
2628*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld64);
2629*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f16_igemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld64);
2630*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.init.f16 = xnn_init_f16_minmax_neon_params;
2631*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.mr = 6;
2632*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.nr = 16;
2633*4bdc9457SAndroid Build Coastguard Worker               break;
2634*4bdc9457SAndroid Build Coastguard Worker             default:
2635*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a76:
2636*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a77:
2637*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a78:
2638*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_x1:
2639*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.minmax.gemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75);
2640*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.minmax.igemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f16_igemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75);
2641*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld64);
2642*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f16_igemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld64);
2643*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.init.f16 = xnn_init_f16_minmax_neon_params;
2644*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.mr = 6;
2645*4bdc9457SAndroid Build Coastguard Worker               xnn_params.f16.gemm.nr = 16;
2646*4bdc9457SAndroid Build Coastguard Worker               break;
2647*4bdc9457SAndroid Build Coastguard Worker           }
2648*4bdc9457SAndroid Build Coastguard Worker 
2649*4bdc9457SAndroid Build Coastguard Worker           #if XNN_MAX_UARCH_TYPES > 1
2650*4bdc9457SAndroid Build Coastguard Worker           {
2651*4bdc9457SAndroid Build Coastguard Worker             /* Choose micro-kernels for little cores according to micro-kernel specification for the big core */
2652*4bdc9457SAndroid Build Coastguard Worker             const uint32_t mr = xnn_params.f16.gemm.mr;
2653*4bdc9457SAndroid Build Coastguard Worker             const uint32_t nr = xnn_params.f16.gemm.nr;
2654*4bdc9457SAndroid Build Coastguard Worker             for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
2655*4bdc9457SAndroid Build Coastguard Worker               const struct cpuinfo_uarch_info* uarch_info = cpuinfo_get_uarch(i);
2656*4bdc9457SAndroid Build Coastguard Worker               if (uarch_info == NULL) {
2657*4bdc9457SAndroid Build Coastguard Worker                 /* No more microarchitectures in the system */
2658*4bdc9457SAndroid Build Coastguard Worker                 break;
2659*4bdc9457SAndroid Build Coastguard Worker               }
2660*4bdc9457SAndroid Build Coastguard Worker 
2661*4bdc9457SAndroid Build Coastguard Worker               switch (uarch_info->uarch) {
2662*4bdc9457SAndroid Build Coastguard Worker                 case cpuinfo_uarch_cortex_a55:
2663*4bdc9457SAndroid Build Coastguard Worker                   if (mr == 6 && nr == 16) {
2664*4bdc9457SAndroid Build Coastguard Worker                     xnn_params.f16.gemm.minmax.gemm[XNN_MR_TO_INDEX(6)].function[i] = (xnn_gemm_ukernel_function) xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55;
2665*4bdc9457SAndroid Build Coastguard Worker                     xnn_params.f16.gemm.minmax.igemm[XNN_MR_TO_INDEX(6)].function[i] = (xnn_igemm_ukernel_function) xnn_f16_igemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55;
2666*4bdc9457SAndroid Build Coastguard Worker                   }
2667*4bdc9457SAndroid Build Coastguard Worker                   break;
2668*4bdc9457SAndroid Build Coastguard Worker                 case cpuinfo_uarch_cortex_a55r0:
2669*4bdc9457SAndroid Build Coastguard Worker                 case cpuinfo_uarch_cortex_a75:
2670*4bdc9457SAndroid Build Coastguard Worker                   if (mr == 6 && nr == 16) {
2671*4bdc9457SAndroid Build Coastguard Worker                     xnn_params.f16.gemm.minmax.gemm[XNN_MR_TO_INDEX(6)].function[i] = (xnn_gemm_ukernel_function) xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55r0;
2672*4bdc9457SAndroid Build Coastguard Worker                     xnn_params.f16.gemm.minmax.igemm[XNN_MR_TO_INDEX(6)].function[i] = (xnn_igemm_ukernel_function) xnn_f16_igemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55r0;
2673*4bdc9457SAndroid Build Coastguard Worker                   }
2674*4bdc9457SAndroid Build Coastguard Worker                   break;
2675*4bdc9457SAndroid Build Coastguard Worker                 default:
2676*4bdc9457SAndroid Build Coastguard Worker                   break;
2677*4bdc9457SAndroid Build Coastguard Worker               }
2678*4bdc9457SAndroid Build Coastguard Worker             }
2679*4bdc9457SAndroid Build Coastguard Worker           }
2680*4bdc9457SAndroid Build Coastguard Worker           #endif  // XNN_MAX_UARCH_TYPES > 1
2681*4bdc9457SAndroid Build Coastguard Worker         #else  // XNN_ENABLE_ASSEMBLY
2682*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f16.gemm.minmax.gemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64);
2683*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f16.gemm.minmax.igemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64);
2684*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f16.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64);
2685*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f16.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64);
2686*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f16.gemm.init.f16 = xnn_init_f16_minmax_neon_params;
2687*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f16.gemm.mr = 6;
2688*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f16.gemm.nr = 16;
2689*4bdc9457SAndroid Build Coastguard Worker         #endif  // XNN_ENABLE_ASSEMBLY
2690*4bdc9457SAndroid Build Coastguard Worker 
2691*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith;
2692*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.dwconv[0].init.f16 = xnn_init_f16_minmax_neon_params;
2693*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.dwconv[0].channel_tile = 16;
2694*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.dwconv[0].primary_tile = 3;
2695*4bdc9457SAndroid Build Coastguard Worker 
2696*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith;
2697*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.dwconv[1].init.f16 = xnn_init_f16_minmax_neon_params;
2698*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.dwconv[1].channel_tile = 16;
2699*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.dwconv[1].primary_tile = 4;
2700*4bdc9457SAndroid Build Coastguard Worker 
2701*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith;
2702*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.dwconv[2].init.f16 = xnn_init_f16_minmax_neon_params;
2703*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.dwconv[2].channel_tile = 16;
2704*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.dwconv[2].primary_tile = 9;
2705*4bdc9457SAndroid Build Coastguard Worker 
2706*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.dwconv[3].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2;
2707*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.dwconv[3].init.f16 = xnn_init_f16_minmax_neon_params;
2708*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.dwconv[3].channel_tile = 8;
2709*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.dwconv[3].primary_tile = 25;
2710*4bdc9457SAndroid Build Coastguard Worker 
2711*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.avgpool = (struct avgpool_parameters) {
2712*4bdc9457SAndroid Build Coastguard Worker           .unipass = (xnn_avgpool_unipass_ukernel_function) xnn_f16_avgpool_minmax_ukernel_9x__neonfp16arith_c8,
2713*4bdc9457SAndroid Build Coastguard Worker           .multipass = (xnn_avgpool_multipass_ukernel_function) xnn_f16_avgpool_minmax_ukernel_9p8x__neonfp16arith_c8,
2714*4bdc9457SAndroid Build Coastguard Worker           .init.f16 = xnn_init_f16_scaleminmax_neon_params,
2715*4bdc9457SAndroid Build Coastguard Worker           .primary_tile = 9,
2716*4bdc9457SAndroid Build Coastguard Worker           .incremental_tile = 8,
2717*4bdc9457SAndroid Build Coastguard Worker           .channel_tile = 8,
2718*4bdc9457SAndroid Build Coastguard Worker         };
2719*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.pavgpool = (struct pavgpool_parameters) {
2720*4bdc9457SAndroid Build Coastguard Worker           .unipass = (xnn_pavgpool_unipass_ukernel_function) xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8,
2721*4bdc9457SAndroid Build Coastguard Worker           .multipass = (xnn_pavgpool_multipass_ukernel_function) xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8,
2722*4bdc9457SAndroid Build Coastguard Worker           .init.f16 = xnn_init_f16_minmax_neon_params,
2723*4bdc9457SAndroid Build Coastguard Worker           .primary_tile = 9,
2724*4bdc9457SAndroid Build Coastguard Worker           .incremental_tile = 8,
2725*4bdc9457SAndroid Build Coastguard Worker           .channel_tile = 8,
2726*4bdc9457SAndroid Build Coastguard Worker         };
2727*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.gavgpool = (struct gavgpool_parameters) {
2728*4bdc9457SAndroid Build Coastguard Worker           .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c8,
2729*4bdc9457SAndroid Build Coastguard Worker           .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8,
2730*4bdc9457SAndroid Build Coastguard Worker           .init.f16 = xnn_init_f16_scaleminmax_neon_params,
2731*4bdc9457SAndroid Build Coastguard Worker           .update.f16 = xnn_update_f16_scaleminmax_neon_params,
2732*4bdc9457SAndroid Build Coastguard Worker           .row_tile = 7,
2733*4bdc9457SAndroid Build Coastguard Worker           .channel_tile = 8,
2734*4bdc9457SAndroid Build Coastguard Worker         };
2735*4bdc9457SAndroid Build Coastguard Worker 
2736*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.maxpool = (struct maxpool_parameters) {
2737*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_maxpool_ukernel_function) xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8,
2738*4bdc9457SAndroid Build Coastguard Worker           .init.f16 = xnn_init_f16_minmax_neon_params,
2739*4bdc9457SAndroid Build Coastguard Worker           .mr = 9,
2740*4bdc9457SAndroid Build Coastguard Worker           .qr = 8,
2741*4bdc9457SAndroid Build Coastguard Worker         };
2742*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.ibilinear = (struct ibilinear_parameters) {
2743*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_ibilinear_ukernel_function) xnn_f16_ibilinear_ukernel__neonfp16arith_c8,
2744*4bdc9457SAndroid Build Coastguard Worker           .pixel_tile = 1,
2745*4bdc9457SAndroid Build Coastguard Worker           .channel_tile = 8,
2746*4bdc9457SAndroid Build Coastguard Worker         };
2747*4bdc9457SAndroid Build Coastguard Worker 
2748*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.prelu = (struct prelu_parameters) {
2749*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_prelu_ukernel_function) xnn_f16_prelu_ukernel__neonfp16arith_2x16,
2750*4bdc9457SAndroid Build Coastguard Worker           .row_tile = 2,
2751*4bdc9457SAndroid Build Coastguard Worker           .channel_tile = 16,
2752*4bdc9457SAndroid Build Coastguard Worker         };
2753*4bdc9457SAndroid Build Coastguard Worker 
2754*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.raddstoreexpminusmax = (struct raddstoreexpminusmax_parameters) {
2755*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_raddstoreexpminusmax_ukernel_function) xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40,
2756*4bdc9457SAndroid Build Coastguard Worker           .init.f16 = xnn_init_f16_expminus_neonfp16arith_rr2_p2_params,
2757*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 40,
2758*4bdc9457SAndroid Build Coastguard Worker         };
2759*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.rmax = (xnn_rmax_ukernel_function) xnn_f16_rmax_ukernel__neonfp16arith;
2760*4bdc9457SAndroid Build Coastguard Worker 
2761*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.vadd = (struct vbinary_parameters) {
2762*4bdc9457SAndroid Build Coastguard Worker           .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vadd_minmax_ukernel__neonfp16arith_x16,
2763*4bdc9457SAndroid Build Coastguard Worker           .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vaddc_minmax_ukernel__neonfp16arith_x16,
2764*4bdc9457SAndroid Build Coastguard Worker           .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vaddc_minmax_ukernel__neonfp16arith_x16,
2765*4bdc9457SAndroid Build Coastguard Worker           .init.f16_minmax = xnn_init_f16_minmax_neon_params,
2766*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 16,
2767*4bdc9457SAndroid Build Coastguard Worker         };
2768*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.vdiv = (struct vbinary_parameters) {
2769*4bdc9457SAndroid Build Coastguard Worker           .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vdiv_minmax_ukernel__neonfp16arith_x8,
2770*4bdc9457SAndroid Build Coastguard Worker           .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vdivc_minmax_ukernel__neonfp16arith_x8,
2771*4bdc9457SAndroid Build Coastguard Worker           .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vrdivc_minmax_ukernel__neonfp16arith_x8,
2772*4bdc9457SAndroid Build Coastguard Worker           .init.f16_minmax = xnn_init_f16_minmax_neon_params,
2773*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 8,
2774*4bdc9457SAndroid Build Coastguard Worker         };
2775*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.vmax = (struct vbinary_parameters) {
2776*4bdc9457SAndroid Build Coastguard Worker           .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vmax_ukernel__neonfp16arith_x16,
2777*4bdc9457SAndroid Build Coastguard Worker           .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vmaxc_ukernel__neonfp16arith_x16,
2778*4bdc9457SAndroid Build Coastguard Worker           .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vmaxc_ukernel__neonfp16arith_x16,
2779*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 16,
2780*4bdc9457SAndroid Build Coastguard Worker         };
2781*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.vmin = (struct vbinary_parameters) {
2782*4bdc9457SAndroid Build Coastguard Worker           .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vmin_ukernel__neonfp16arith_x16,
2783*4bdc9457SAndroid Build Coastguard Worker           .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vminc_ukernel__neonfp16arith_x16,
2784*4bdc9457SAndroid Build Coastguard Worker           .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vminc_ukernel__neonfp16arith_x16,
2785*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 16,
2786*4bdc9457SAndroid Build Coastguard Worker         };
2787*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.vmul = (struct vbinary_parameters) {
2788*4bdc9457SAndroid Build Coastguard Worker           .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vmul_minmax_ukernel__neonfp16arith_x16,
2789*4bdc9457SAndroid Build Coastguard Worker           .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vmulc_minmax_ukernel__neonfp16arith_x16,
2790*4bdc9457SAndroid Build Coastguard Worker           .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vmulc_minmax_ukernel__neonfp16arith_x16,
2791*4bdc9457SAndroid Build Coastguard Worker           .init.f16_minmax = xnn_init_f16_minmax_neon_params,
2792*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 16,
2793*4bdc9457SAndroid Build Coastguard Worker         };
2794*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.vsub = (struct vbinary_parameters) {
2795*4bdc9457SAndroid Build Coastguard Worker           .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16,
2796*4bdc9457SAndroid Build Coastguard Worker           .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vsubc_minmax_ukernel__neonfp16arith_x16,
2797*4bdc9457SAndroid Build Coastguard Worker           .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vrsubc_minmax_ukernel__neonfp16arith_x16,
2798*4bdc9457SAndroid Build Coastguard Worker           .init.f16_minmax = xnn_init_f16_minmax_neon_params,
2799*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 16,
2800*4bdc9457SAndroid Build Coastguard Worker         };
2801*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.vsqrdiff = (struct vbinary_parameters) {
2802*4bdc9457SAndroid Build Coastguard Worker           .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vsqrdiff_ukernel__neonfp16arith_x16,
2803*4bdc9457SAndroid Build Coastguard Worker           .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vsqrdiffc_ukernel__neonfp16arith_x16,
2804*4bdc9457SAndroid Build Coastguard Worker           .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vsqrdiffc_ukernel__neonfp16arith_x16,
2805*4bdc9457SAndroid Build Coastguard Worker           .init.f16_minmax = xnn_init_f16_minmax_neon_params,
2806*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 16,
2807*4bdc9457SAndroid Build Coastguard Worker         };
2808*4bdc9457SAndroid Build Coastguard Worker 
2809*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.vmulcaddc = (struct vmulcaddc_parameters) {
2810*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x,
2811*4bdc9457SAndroid Build Coastguard Worker           .init.f16 = xnn_init_f16_minmax_neon_params,
2812*4bdc9457SAndroid Build Coastguard Worker           .channel_tile = 8,
2813*4bdc9457SAndroid Build Coastguard Worker           .row_tile = 2,
2814*4bdc9457SAndroid Build Coastguard Worker         };
2815*4bdc9457SAndroid Build Coastguard Worker 
2816*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.abs = (struct vunary_parameters) {
2817*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f16_vabs_ukernel__neonfp16arith_x16,
2818*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 16,
2819*4bdc9457SAndroid Build Coastguard Worker         };
2820*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.clamp = (struct vunary_parameters) {
2821*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f16_vclamp_ukernel__neonfp16arith_x16,
2822*4bdc9457SAndroid Build Coastguard Worker           .init.f16_minmax = xnn_init_f16_minmax_neon_params,
2823*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 16,
2824*4bdc9457SAndroid Build Coastguard Worker         };
2825*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.elu = (struct vunary_parameters) {
2826*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_x16,
2827*4bdc9457SAndroid Build Coastguard Worker           .init.f16_elu = xnn_init_f16_elu_neonfp16arith_rr1_p3_params,
2828*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 16,
2829*4bdc9457SAndroid Build Coastguard Worker         };
2830*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.hswish = (struct vunary_parameters) {
2831*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f16_vhswish_ukernel__neonfp16arith_x16,
2832*4bdc9457SAndroid Build Coastguard Worker           .init.f16_hswish = xnn_init_f16_hswish_neon_params,
2833*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 16,
2834*4bdc9457SAndroid Build Coastguard Worker         };
2835*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.lrelu = (struct vunary_parameters) {
2836*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f16_vlrelu_ukernel__neonfp16arith_x16,
2837*4bdc9457SAndroid Build Coastguard Worker           .init.f16_lrelu = xnn_init_f16_lrelu_neon_params,
2838*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 16,
2839*4bdc9457SAndroid Build Coastguard Worker         };
2840*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.neg = (struct vunary_parameters) {
2841*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f16_vneg_ukernel__neonfp16arith_x16,
2842*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 16,
2843*4bdc9457SAndroid Build Coastguard Worker         };
2844*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.rndne = (struct vunary_parameters) {
2845*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f16_vrndne_ukernel__neonfp16arith_x16,
2846*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 16,
2847*4bdc9457SAndroid Build Coastguard Worker         };
2848*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.rndz = (struct vunary_parameters) {
2849*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f16_vrndz_ukernel__neonfp16arith_x16,
2850*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 16,
2851*4bdc9457SAndroid Build Coastguard Worker         };
2852*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.rndu = (struct vunary_parameters) {
2853*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f16_vrndu_ukernel__neonfp16arith_x16,
2854*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 16,
2855*4bdc9457SAndroid Build Coastguard Worker         };
2856*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.rndd = (struct vunary_parameters) {
2857*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f16_vrndd_ukernel__neonfp16arith_x16,
2858*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 16,
2859*4bdc9457SAndroid Build Coastguard Worker         };
2860*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.sigmoid = (struct vunary_parameters) {
2861*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x40,
2862*4bdc9457SAndroid Build Coastguard Worker           .init.f16_sigmoid = xnn_init_f16_sigmoid_neonfp16arith_rr2_p2_params,
2863*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 40,
2864*4bdc9457SAndroid Build Coastguard Worker         };
2865*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.sqr = (struct vunary_parameters) {
2866*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f16_vsqr_ukernel__neonfp16arith_x16,
2867*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 16,
2868*4bdc9457SAndroid Build Coastguard Worker         };
2869*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f16.sqrt = (struct vunary_parameters) {
2870*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_f16_vsqrt_ukernel__neonfp16arith_sqrt_x8,
2871*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 8,
2872*4bdc9457SAndroid Build Coastguard Worker         };
2873*4bdc9457SAndroid Build Coastguard Worker 
2874*4bdc9457SAndroid Build Coastguard Worker         #ifndef XNN_NO_NCHW_OPERATORS
2875*4bdc9457SAndroid Build Coastguard Worker           init_flags |= XNN_INIT_FLAG_CHW_OPT;
2876*4bdc9457SAndroid Build Coastguard Worker 
2877*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f16.spmm = (struct spmm_parameters) {
2878*4bdc9457SAndroid Build Coastguard Worker             .ukernel = (xnn_spmm_ukernel_function) xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith,
2879*4bdc9457SAndroid Build Coastguard Worker             .mr = 32,
2880*4bdc9457SAndroid Build Coastguard Worker             .nr = 1,
2881*4bdc9457SAndroid Build Coastguard Worker           };
2882*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f16.conv_hwc2chw_3x3c3s2 = (struct conv_hwc2chw_parameters) {
2883*4bdc9457SAndroid Build Coastguard Worker             .ukernel_with_symm_padding =
2884*4bdc9457SAndroid Build Coastguard Worker               (xnn_conv_hwc2chw_ukernel_function) xnn_f16_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfp16arith_2x2,
2885*4bdc9457SAndroid Build Coastguard Worker             .output_channel_tile = 4,
2886*4bdc9457SAndroid Build Coastguard Worker             .output_height_tile = 2,
2887*4bdc9457SAndroid Build Coastguard Worker             .output_width_tile = 2,
2888*4bdc9457SAndroid Build Coastguard Worker           };
2889*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f16.dwconv2d_chw_3x3 = (struct dwconv2d_chw_parameters) {
2890*4bdc9457SAndroid Build Coastguard Worker             .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8,
2891*4bdc9457SAndroid Build Coastguard Worker             .output_width_tile = 8,
2892*4bdc9457SAndroid Build Coastguard Worker             .output_height_tile = 2,
2893*4bdc9457SAndroid Build Coastguard Worker           };
2894*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f16.dwconv2d_chw_3x3s2 = (struct dwconv2d_chw_parameters) {
2895*4bdc9457SAndroid Build Coastguard Worker             .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4,
2896*4bdc9457SAndroid Build Coastguard Worker             .output_width_tile = 4,
2897*4bdc9457SAndroid Build Coastguard Worker             .output_height_tile = 1,
2898*4bdc9457SAndroid Build Coastguard Worker           };
2899*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f16.dwconv2d_chw_5x5 = (struct dwconv2d_chw_parameters) {
2900*4bdc9457SAndroid Build Coastguard Worker             .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4,
2901*4bdc9457SAndroid Build Coastguard Worker             .output_width_tile = 4,
2902*4bdc9457SAndroid Build Coastguard Worker             .output_height_tile = 1,
2903*4bdc9457SAndroid Build Coastguard Worker           };
2904*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f16.dwconv2d_chw_5x5s2 = (struct dwconv2d_chw_parameters) {
2905*4bdc9457SAndroid Build Coastguard Worker             .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4,
2906*4bdc9457SAndroid Build Coastguard Worker             .output_width_tile = 4,
2907*4bdc9457SAndroid Build Coastguard Worker             .output_height_tile = 1,
2908*4bdc9457SAndroid Build Coastguard Worker           };
2909*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f16.gavgpool_cw = (struct gavgpool_cw_parameters) {
2910*4bdc9457SAndroid Build Coastguard Worker             .ukernel = (xnn_gavgpool_cw_ukernel_function) xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x4,
2911*4bdc9457SAndroid Build Coastguard Worker             .channel_tile = 4,
2912*4bdc9457SAndroid Build Coastguard Worker           };
2913*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f16.ibilinear_chw = (struct ibilinear_chw_parameters) {
2914*4bdc9457SAndroid Build Coastguard Worker             .ukernel = (xnn_ibilinear_chw_ukernel_function) xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p8,
2915*4bdc9457SAndroid Build Coastguard Worker             .channel_tile = 1,
2916*4bdc9457SAndroid Build Coastguard Worker             .pixel_tile = 8,
2917*4bdc9457SAndroid Build Coastguard Worker           };
2918*4bdc9457SAndroid Build Coastguard Worker         #endif  // XNN_NO_NCHW_OPERATORS
2919*4bdc9457SAndroid Build Coastguard Worker       }
2920*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_ENABLE_ARM_FP16
2921*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_F16_OPERATORS
2922*4bdc9457SAndroid Build Coastguard Worker 
2923*4bdc9457SAndroid Build Coastguard Worker   /**************************** F32 AArch64 micro-kernels ****************************/
2924*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_F32_OPERATORS
2925*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_F32;
2926*4bdc9457SAndroid Build Coastguard Worker 
2927*4bdc9457SAndroid Build Coastguard Worker     #if XNN_ENABLE_ASSEMBLY && !XNN_PLATFORM_IOS && !XNN_PLATFORM_MAC
2928*4bdc9457SAndroid Build Coastguard Worker       switch (cpuinfo_get_core(0)->uarch) {
2929*4bdc9457SAndroid Build Coastguard Worker         case cpuinfo_uarch_cortex_a72:
2930*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75);
2931*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75);
2932*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75);
2933*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75);
2934*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_scalar_params;
2935*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.mr = 4;
2936*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.nr = 8;
2937*4bdc9457SAndroid Build Coastguard Worker           break;
2938*4bdc9457SAndroid Build Coastguard Worker         case cpuinfo_uarch_cortex_a57:
2939*4bdc9457SAndroid Build Coastguard Worker         case cpuinfo_uarch_cortex_a75:
2940*4bdc9457SAndroid Build Coastguard Worker         case cpuinfo_uarch_cortex_a76:
2941*4bdc9457SAndroid Build Coastguard Worker         case cpuinfo_uarch_exynos_m3:
2942*4bdc9457SAndroid Build Coastguard Worker         case cpuinfo_uarch_exynos_m4:
2943*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75);
2944*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75);
2945*4bdc9457SAndroid Build Coastguard Worker           #if XNN_ENABLE_GEMM_M_SPECIALIZATION
2946*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75);
2947*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75);
2948*4bdc9457SAndroid Build Coastguard Worker           #endif
2949*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75);
2950*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75);
2951*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_scalar_params;
2952*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.mr = 6;
2953*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.nr = 8;
2954*4bdc9457SAndroid Build Coastguard Worker           #if XNN_ENABLE_JIT
2955*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.generator.gemm = xnn_init_hmp_gemm_codegen(xnn_generate_f32_gemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75);
2956*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.generator.igemm = xnn_init_hmp_igemm_codegen(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75);
2957*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.generator.gemm1 = xnn_init_hmp_gemm_codegen(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75);
2958*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.generator.igemm1 = xnn_init_hmp_igemm_codegen(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75);
2959*4bdc9457SAndroid Build Coastguard Worker           #endif
2960*4bdc9457SAndroid Build Coastguard Worker           break;
2961*4bdc9457SAndroid Build Coastguard Worker         case cpuinfo_uarch_exynos_m1:
2962*4bdc9457SAndroid Build Coastguard Worker         case cpuinfo_uarch_exynos_m2:
2963*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma);
2964*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma);
2965*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma);
2966*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma);
2967*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_scalar_params;
2968*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.mr = 6;
2969*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.nr = 8;
2970*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.log2_sr = 2;
2971*4bdc9457SAndroid Build Coastguard Worker           break;
2972*4bdc9457SAndroid Build Coastguard Worker         case cpuinfo_uarch_cortex_a53:
2973*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53);
2974*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53);
2975*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53);
2976*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53);
2977*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_scalar_params;
2978*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.mr = 6;
2979*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.nr = 8;
2980*4bdc9457SAndroid Build Coastguard Worker           break;
2981*4bdc9457SAndroid Build Coastguard Worker         case cpuinfo_uarch_cortex_a55r0:
2982*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53);
2983*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53);
2984*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53);
2985*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53);
2986*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_scalar_params;
2987*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.mr = 6;
2988*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.nr = 8;
2989*4bdc9457SAndroid Build Coastguard Worker           break;
2990*4bdc9457SAndroid Build Coastguard Worker         case cpuinfo_uarch_cortex_a35:
2991*4bdc9457SAndroid Build Coastguard Worker         case cpuinfo_uarch_cortex_a55:
2992*4bdc9457SAndroid Build Coastguard Worker         case cpuinfo_uarch_kryo:
2993*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55);
2994*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55);
2995*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53);
2996*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53);
2997*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_scalar_params;
2998*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.mr = 6;
2999*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.nr = 8;
3000*4bdc9457SAndroid Build Coastguard Worker           break;
3001*4bdc9457SAndroid Build Coastguard Worker         case cpuinfo_uarch_cortex_a73:
3002*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73);
3003*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73);
3004*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75);
3005*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75);
3006*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_scalar_params;
3007*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.mr = 6;
3008*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.nr = 8;
3009*4bdc9457SAndroid Build Coastguard Worker           break;
3010*4bdc9457SAndroid Build Coastguard Worker         case cpuinfo_uarch_cortex_a77:
3011*4bdc9457SAndroid Build Coastguard Worker         case cpuinfo_uarch_exynos_m5:
3012*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75);
3013*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75);
3014*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75);
3015*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75);
3016*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_scalar_params;
3017*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.mr = 4;
3018*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.nr = 8;
3019*4bdc9457SAndroid Build Coastguard Worker           break;
3020*4bdc9457SAndroid Build Coastguard Worker         case cpuinfo_uarch_cortex_a78:
3021*4bdc9457SAndroid Build Coastguard Worker         case cpuinfo_uarch_cortex_x1:
3022*4bdc9457SAndroid Build Coastguard Worker         default:
3023*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128);
3024*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128);
3025*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__neonfma_lane_ld64);
3026*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64);
3027*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_scalar_params;
3028*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.mr = 6;
3029*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.nr = 8;
3030*4bdc9457SAndroid Build Coastguard Worker           #if XNN_ENABLE_JIT
3031*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.generator.gemm = xnn_init_hmp_gemm_codegen(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_ld128);
3032*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.gemm.generator.igemm = xnn_init_hmp_igemm_codegen(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128);
3033*4bdc9457SAndroid Build Coastguard Worker           #endif
3034*4bdc9457SAndroid Build Coastguard Worker           break;
3035*4bdc9457SAndroid Build Coastguard Worker       }
3036*4bdc9457SAndroid Build Coastguard Worker       #if XNN_MAX_UARCH_TYPES > 1
3037*4bdc9457SAndroid Build Coastguard Worker       {
3038*4bdc9457SAndroid Build Coastguard Worker         /* Choose micro-kernels for little cores according to micro-kernel specification for the big core */
3039*4bdc9457SAndroid Build Coastguard Worker         const uint32_t mr = xnn_params.f32.gemm.mr;
3040*4bdc9457SAndroid Build Coastguard Worker         const uint32_t nr = xnn_params.f32.gemm.nr;
3041*4bdc9457SAndroid Build Coastguard Worker         const uint32_t log2_sr = xnn_params.f32.gemm.log2_sr;
3042*4bdc9457SAndroid Build Coastguard Worker         for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
3043*4bdc9457SAndroid Build Coastguard Worker           const struct cpuinfo_uarch_info* uarch_info = cpuinfo_get_uarch(i);
3044*4bdc9457SAndroid Build Coastguard Worker           if (uarch_info == NULL) {
3045*4bdc9457SAndroid Build Coastguard Worker             /* No more microarchitectures in the system */
3046*4bdc9457SAndroid Build Coastguard Worker             break;
3047*4bdc9457SAndroid Build Coastguard Worker           }
3048*4bdc9457SAndroid Build Coastguard Worker 
3049*4bdc9457SAndroid Build Coastguard Worker           switch (uarch_info->uarch) {
3050*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a53:
3051*4bdc9457SAndroid Build Coastguard Worker               if (mr == 6 && nr == 8 && log2_sr == 0) {
3052*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(6)].function[i] = (xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53;
3053*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(6)].function[i] = (xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53;
3054*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53;
3055*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53;
3056*4bdc9457SAndroid Build Coastguard Worker               } else if (mr == 4 && nr == 8 && log2_sr == 0) {
3057*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53;
3058*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53;
3059*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53;
3060*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53;
3061*4bdc9457SAndroid Build Coastguard Worker               }
3062*4bdc9457SAndroid Build Coastguard Worker               break;
3063*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a55r0:
3064*4bdc9457SAndroid Build Coastguard Worker               if (mr == 6 && nr == 8 && log2_sr == 0) {
3065*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(6)].function[i] = (xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53;
3066*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(6)].function[i] = (xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53;
3067*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53;
3068*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53;
3069*4bdc9457SAndroid Build Coastguard Worker               } else if (mr == 4 && nr == 8 && log2_sr == 0) {
3070*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53;
3071*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53;
3072*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53;
3073*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53;
3074*4bdc9457SAndroid Build Coastguard Worker               }
3075*4bdc9457SAndroid Build Coastguard Worker               break;
3076*4bdc9457SAndroid Build Coastguard Worker             case cpuinfo_uarch_cortex_a55:
3077*4bdc9457SAndroid Build Coastguard Worker               if (mr == 6 && nr == 8 && log2_sr == 0) {
3078*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(6)].function[i] = (xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55;
3079*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(6)].function[i] = (xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55;
3080*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53;
3081*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53;
3082*4bdc9457SAndroid Build Coastguard Worker               } else if (mr == 4 && nr == 8 && log2_sr == 0) {
3083*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55;
3084*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)].function[i] = (xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55;
3085*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53;
3086*4bdc9457SAndroid Build Coastguard Worker                 xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)].function[i] = (xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53;
3087*4bdc9457SAndroid Build Coastguard Worker               }
3088*4bdc9457SAndroid Build Coastguard Worker               break;
3089*4bdc9457SAndroid Build Coastguard Worker             default:
3090*4bdc9457SAndroid Build Coastguard Worker               break;
3091*4bdc9457SAndroid Build Coastguard Worker           }
3092*4bdc9457SAndroid Build Coastguard Worker         }
3093*4bdc9457SAndroid Build Coastguard Worker       }
3094*4bdc9457SAndroid Build Coastguard Worker       #endif  // XNN_MAX_UARCH_TYPES > 1
3095*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75);
3096*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75);
3097*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.init.f32 = xnn_init_f32_minmax_scalar_params;
3098*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.mr = 4;
3099*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.nr = 2;
3100*4bdc9457SAndroid Build Coastguard Worker 
3101*4bdc9457SAndroid Build Coastguard Worker     #else  // XNN_ENABLE_ASSEMBLY && !XNN_PLATFORM_IOS && !XNN_PLATFORM_MAC
3102*4bdc9457SAndroid Build Coastguard Worker       #if XNN_ENABLE_ASSEMBLY
3103*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75);
3104*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75);
3105*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75);
3106*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75);
3107*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_scalar_params;
3108*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.mr = 6;
3109*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.nr = 8;
3110*4bdc9457SAndroid Build Coastguard Worker 
3111*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75);
3112*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75);
3113*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.init.f32 = xnn_init_f32_minmax_scalar_params;
3114*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.mr = 4;
3115*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.nr = 2;
3116*4bdc9457SAndroid Build Coastguard Worker       #else  // !XNN_ENABLE_ASSEMBLY
3117*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64);
3118*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(6)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64);
3119*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__neonfma_lane_ld64);
3120*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64);
3121*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_scalar_params;
3122*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.mr = 6;
3123*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.nr = 8;
3124*4bdc9457SAndroid Build Coastguard Worker 
3125*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64);
3126*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64);
3127*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.init.f32 = xnn_init_f32_minmax_scalar_params;
3128*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.mr = 4;
3129*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.nr = 2;
3130*4bdc9457SAndroid Build Coastguard Worker        #endif  // XNN_ENABLE_ASSEMBLY
3131*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_ENABLE_ASSEMBLY && !XNN_PLATFORM_IOS && !XNN_PLATFORM_MAC
3132*4bdc9457SAndroid Build Coastguard Worker 
3133*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up8x3__neonfma;
3134*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[0].init.f32 = xnn_init_f32_minmax_scalar_params;
3135*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[0].channel_tile = 8;
3136*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[0].primary_tile = 3;
3137*4bdc9457SAndroid Build Coastguard Worker 
3138*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma;
3139*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[1].init.f32 = xnn_init_f32_minmax_scalar_params;
3140*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[1].channel_tile = 8;
3141*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[1].primary_tile = 4;
3142*4bdc9457SAndroid Build Coastguard Worker 
3143*4bdc9457SAndroid Build Coastguard Worker     #if XNN_PLATFORM_IOS || XNN_PLATFORM_MAC
3144*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma;
3145*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].init.f32 = xnn_init_f32_minmax_scalar_params;
3146*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].channel_tile = 8;
3147*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].primary_tile = 9;
3148*4bdc9457SAndroid Build Coastguard Worker     #else  // !XNN_PLATFORM_IOS && !XNN_PLATFORM_MAC
3149*4bdc9457SAndroid Build Coastguard Worker       switch (cpuinfo_get_core(0)->uarch) {
3150*4bdc9457SAndroid Build Coastguard Worker         case cpuinfo_uarch_kryo:
3151*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma;
3152*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.dwconv[2].init.f32 = xnn_init_f32_minmax_scalar_params;
3153*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.dwconv[2].channel_tile = 8;
3154*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.dwconv[2].primary_tile = 9;
3155*4bdc9457SAndroid Build Coastguard Worker           break;
3156*4bdc9457SAndroid Build Coastguard Worker         #if XNN_ENABLE_ASSEMBLY
3157*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_cortex_a53:
3158*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_cortex_a55r0:
3159*4bdc9457SAndroid Build Coastguard Worker           case cpuinfo_uarch_cortex_a55:
3160*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up4x9__aarch64_neonfma_cortex_a55;
3161*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.dwconv[2].init.f32 = xnn_init_f32_minmax_scalar_params;
3162*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.dwconv[2].channel_tile = 4;
3163*4bdc9457SAndroid Build Coastguard Worker             xnn_params.f32.dwconv[2].primary_tile = 9;
3164*4bdc9457SAndroid Build Coastguard Worker             break;
3165*4bdc9457SAndroid Build Coastguard Worker         #endif  // XNN_ENABLE_ASSEMBLY
3166*4bdc9457SAndroid Build Coastguard Worker         default:
3167*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma;
3168*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.dwconv[2].init.f32 = xnn_init_f32_minmax_scalar_params;
3169*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.dwconv[2].channel_tile = 8;
3170*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.dwconv[2].primary_tile = 9;
3171*4bdc9457SAndroid Build Coastguard Worker           break;
3172*4bdc9457SAndroid Build Coastguard Worker       }
3173*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_PLATFORM_IOS && XNN_PLATFORM_MAC
3174*4bdc9457SAndroid Build Coastguard Worker 
3175*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[3].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2;
3176*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[3].init.f32 = xnn_init_f32_minmax_scalar_params;
3177*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[3].channel_tile = 8;
3178*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[3].primary_tile = 25;
3179*4bdc9457SAndroid Build Coastguard Worker 
3180*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.avgpool = (struct avgpool_parameters) {
3181*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_avgpool_unipass_ukernel_function) xnn_f32_avgpool_minmax_ukernel_9x__neon_c4,
3182*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_avgpool_multipass_ukernel_function) xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4,
3183*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_scaleminmax_scalar_params,
3184*4bdc9457SAndroid Build Coastguard Worker       .primary_tile = 9,
3185*4bdc9457SAndroid Build Coastguard Worker       .incremental_tile = 8,
3186*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 4,
3187*4bdc9457SAndroid Build Coastguard Worker     };
3188*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
3189*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_pavgpool_unipass_ukernel_function) xnn_f32_pavgpool_minmax_ukernel_9x__neon_c4,
3190*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_pavgpool_multipass_ukernel_function) xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4,
3191*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_minmax_scalar_params,
3192*4bdc9457SAndroid Build Coastguard Worker       .primary_tile = 9,
3193*4bdc9457SAndroid Build Coastguard Worker       .incremental_tile = 8,
3194*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 4,
3195*4bdc9457SAndroid Build Coastguard Worker     };
3196*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
3197*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_f32_gavgpool_minmax_ukernel_7x__neon_c4,
3198*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4,
3199*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_scaleminmax_scalar_params,
3200*4bdc9457SAndroid Build Coastguard Worker       .update.f32 = xnn_update_f32_scaleminmax_scalar_params,
3201*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 7,
3202*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 4,
3203*4bdc9457SAndroid Build Coastguard Worker     };
3204*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.maxpool = (struct maxpool_parameters) {
3205*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_minmax_ukernel_9p8x__neon_c4,
3206*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_minmax_scalar_params,
3207*4bdc9457SAndroid Build Coastguard Worker       .mr = 9,
3208*4bdc9457SAndroid Build Coastguard Worker       .qr = 8,
3209*4bdc9457SAndroid Build Coastguard Worker     };
3210*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
3211*4bdc9457SAndroid Build Coastguard Worker       .up = (xnn_argmaxpool_unipass_ukernel_function) xnn_f32_argmaxpool_ukernel_4x__neon_c4,
3212*4bdc9457SAndroid Build Coastguard Worker       .mr = 4,
3213*4bdc9457SAndroid Build Coastguard Worker     };
3214*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
3215*4bdc9457SAndroid Build Coastguard Worker       .up = (xnn_argmaxpool_unipass_ukernel_function) xnn_f32_argmaxpool_ukernel_9x__neon_c4,
3216*4bdc9457SAndroid Build Coastguard Worker       .mr = 9,
3217*4bdc9457SAndroid Build Coastguard Worker     };
3218*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
3219*4bdc9457SAndroid Build Coastguard Worker       .mp = (xnn_argmaxpool_multipass_ukernel_function) xnn_f32_argmaxpool_ukernel_9p8x__neon_c4,
3220*4bdc9457SAndroid Build Coastguard Worker       .mr = 9,
3221*4bdc9457SAndroid Build Coastguard Worker       .qr = 8,
3222*4bdc9457SAndroid Build Coastguard Worker     };
3223*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.ibilinear = (struct ibilinear_parameters) {
3224*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_ibilinear_ukernel_function) xnn_f32_ibilinear_ukernel__neonfma_c8,
3225*4bdc9457SAndroid Build Coastguard Worker       .pixel_tile = 1,
3226*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 8,
3227*4bdc9457SAndroid Build Coastguard Worker     };
3228*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.abs = (struct vunary_parameters) {
3229*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vabs_ukernel__neon_x8,
3230*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
3231*4bdc9457SAndroid Build Coastguard Worker     };
3232*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.clamp = (struct vunary_parameters) {
3233*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vclamp_ukernel__neon_x8,
3234*4bdc9457SAndroid Build Coastguard Worker       .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
3235*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
3236*4bdc9457SAndroid Build Coastguard Worker     };
3237*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.elu = (struct vunary_parameters) {
3238*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16,
3239*4bdc9457SAndroid Build Coastguard Worker       .init.f32_elu = xnn_init_f32_elu_neonfma_rr1_lut16_p3_params,
3240*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 16,
3241*4bdc9457SAndroid Build Coastguard Worker     };
3242*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.hswish = (struct vunary_parameters) {
3243*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vhswish_ukernel__neon_x16,
3244*4bdc9457SAndroid Build Coastguard Worker       .init.f32_hswish = xnn_init_f32_hswish_scalar_params,
3245*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 16,
3246*4bdc9457SAndroid Build Coastguard Worker     };
3247*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.lrelu = (struct vunary_parameters) {
3248*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vlrelu_ukernel__neon_x8,
3249*4bdc9457SAndroid Build Coastguard Worker       .init.f32_lrelu = xnn_init_f32_lrelu_scalar_params,
3250*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
3251*4bdc9457SAndroid Build Coastguard Worker     };
3252*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.neg = (struct vunary_parameters) {
3253*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vneg_ukernel__neon_x8,
3254*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
3255*4bdc9457SAndroid Build Coastguard Worker     };
3256*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.rndne = (struct vunary_parameters) {
3257*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndne_ukernel__neonv8_x8,
3258*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
3259*4bdc9457SAndroid Build Coastguard Worker     };
3260*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.rndz = (struct vunary_parameters) {
3261*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndz_ukernel__neonv8_x8,
3262*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
3263*4bdc9457SAndroid Build Coastguard Worker     };
3264*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.rndu = (struct vunary_parameters) {
3265*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndu_ukernel__neonv8_x8,
3266*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
3267*4bdc9457SAndroid Build Coastguard Worker     };
3268*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.rndd = (struct vunary_parameters) {
3269*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndd_ukernel__neonv8_x8,
3270*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
3271*4bdc9457SAndroid Build Coastguard Worker     };
3272*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.sigmoid = (struct vunary_parameters) {
3273*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x16,
3274*4bdc9457SAndroid Build Coastguard Worker       .init.f32_sigmoid = xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params,
3275*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 16,
3276*4bdc9457SAndroid Build Coastguard Worker     };
3277*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.sqr = (struct vunary_parameters) {
3278*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsqr_ukernel__neon_x8,
3279*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
3280*4bdc9457SAndroid Build Coastguard Worker     };
3281*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.sqrt = (struct vunary_parameters) {
3282*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsqrt_ukernel__neon_sqrt_x4,
3283*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
3284*4bdc9457SAndroid Build Coastguard Worker     };
3285*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.prelu = (struct prelu_parameters) {
3286*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
3287*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 2,
3288*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 8,
3289*4bdc9457SAndroid Build Coastguard Worker     };
3290*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.raddstoreexpminusmax = (struct raddstoreexpminusmax_parameters) {
3291*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_raddstoreexpminusmax_ukernel_function) xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x16,
3292*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_expminus_neonfma_rr1_lut64_p2_params,
3293*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 16,
3294*4bdc9457SAndroid Build Coastguard Worker     };
3295*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.rmax = (xnn_rmax_ukernel_function) xnn_f32_rmax_ukernel__neon;
3296*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vadd = (struct vbinary_parameters) {
3297*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_minmax_ukernel__neon_x8,
3298*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_minmax_ukernel__neon_x8,
3299*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_minmax_ukernel__neon_x8,
3300*4bdc9457SAndroid Build Coastguard Worker       .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
3301*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
3302*4bdc9457SAndroid Build Coastguard Worker     };
3303*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vdiv = (struct vbinary_parameters) {
3304*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_minmax_ukernel__neon_x8,
3305*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_minmax_ukernel__neon_x8,
3306*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_minmax_ukernel__neon_x8,
3307*4bdc9457SAndroid Build Coastguard Worker       .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
3308*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
3309*4bdc9457SAndroid Build Coastguard Worker     };
3310*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vmax = (struct vbinary_parameters) {
3311*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__neon_x8,
3312*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__neon_x8,
3313*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__neon_x8,
3314*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
3315*4bdc9457SAndroid Build Coastguard Worker     };
3316*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vmin = (struct vbinary_parameters) {
3317*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmin_ukernel__neon_x8,
3318*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__neon_x8,
3319*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__neon_x8,
3320*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
3321*4bdc9457SAndroid Build Coastguard Worker     };
3322*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vmul = (struct vbinary_parameters) {
3323*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_minmax_ukernel__neon_x8,
3324*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_minmax_ukernel__neon_x8,
3325*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_minmax_ukernel__neon_x8,
3326*4bdc9457SAndroid Build Coastguard Worker       .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
3327*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
3328*4bdc9457SAndroid Build Coastguard Worker     };
3329*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vsub = (struct vbinary_parameters) {
3330*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_minmax_ukernel__neon_x8,
3331*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_minmax_ukernel__neon_x8,
3332*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_minmax_ukernel__neon_x8,
3333*4bdc9457SAndroid Build Coastguard Worker       .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
3334*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
3335*4bdc9457SAndroid Build Coastguard Worker     };
3336*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vsqrdiff = (struct vbinary_parameters) {
3337*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiff_ukernel__neon_x8,
3338*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiffc_ukernel__neon_x8,
3339*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiffc_ukernel__neon_x8,
3340*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
3341*4bdc9457SAndroid Build Coastguard Worker     };
3342*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
3343*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x,
3344*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_minmax_scalar_params,
3345*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 4,
3346*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 2,
3347*4bdc9457SAndroid Build Coastguard Worker     };
3348*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_NCHW_OPERATORS
3349*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_CHW_OPT;
3350*4bdc9457SAndroid Build Coastguard Worker 
3351*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.spmm = (struct spmm_parameters) {
3352*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined,
3353*4bdc9457SAndroid Build Coastguard Worker         .mr = 32,
3354*4bdc9457SAndroid Build Coastguard Worker         .nr = 1,
3355*4bdc9457SAndroid Build Coastguard Worker       };
3356*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.spmm2 = (struct spmm_parameters) {
3357*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_minmax_ukernel_32x2__neonfma,
3358*4bdc9457SAndroid Build Coastguard Worker         .mr = 32,
3359*4bdc9457SAndroid Build Coastguard Worker         .nr = 2,
3360*4bdc9457SAndroid Build Coastguard Worker       };
3361*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.spmm4 = (struct spmm_parameters) {
3362*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_minmax_ukernel_32x4__neonfma,
3363*4bdc9457SAndroid Build Coastguard Worker         .mr = 32,
3364*4bdc9457SAndroid Build Coastguard Worker         .nr = 4,
3365*4bdc9457SAndroid Build Coastguard Worker       };
3366*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.conv_hwc2chw_3x3c3s2 = (struct conv_hwc2chw_parameters) {
3367*4bdc9457SAndroid Build Coastguard Worker         .ukernel_with_symm_padding =
3368*4bdc9457SAndroid Build Coastguard Worker           (xnn_conv_hwc2chw_ukernel_function) xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2,
3369*4bdc9457SAndroid Build Coastguard Worker         .output_channel_tile = 4,
3370*4bdc9457SAndroid Build Coastguard Worker         .output_height_tile = 2,
3371*4bdc9457SAndroid Build Coastguard Worker         .output_width_tile = 2,
3372*4bdc9457SAndroid Build Coastguard Worker       };
3373*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv2d_chw_3x3 = (struct dwconv2d_chw_parameters) {
3374*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4,
3375*4bdc9457SAndroid Build Coastguard Worker         .output_width_tile = 4,
3376*4bdc9457SAndroid Build Coastguard Worker         .output_height_tile = 3,
3377*4bdc9457SAndroid Build Coastguard Worker       };
3378*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv2d_chw_3x3s2 = (struct dwconv2d_chw_parameters) {
3379*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2,
3380*4bdc9457SAndroid Build Coastguard Worker         .output_width_tile = 4,
3381*4bdc9457SAndroid Build Coastguard Worker         .output_height_tile = 2,
3382*4bdc9457SAndroid Build Coastguard Worker       };
3383*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv2d_chw_5x5 = (struct dwconv2d_chw_parameters) {
3384*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4,
3385*4bdc9457SAndroid Build Coastguard Worker         .output_width_tile = 4,
3386*4bdc9457SAndroid Build Coastguard Worker         .output_height_tile = 4,
3387*4bdc9457SAndroid Build Coastguard Worker       };
3388*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv2d_chw_5x5s2 = (struct dwconv2d_chw_parameters) {
3389*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_1x4_acc2,
3390*4bdc9457SAndroid Build Coastguard Worker         .output_width_tile = 4,
3391*4bdc9457SAndroid Build Coastguard Worker         .output_height_tile = 1,
3392*4bdc9457SAndroid Build Coastguard Worker       };
3393*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gavgpool_cw = (struct gavgpool_cw_parameters) {
3394*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_gavgpool_cw_ukernel_function) xnn_f32_gavgpool_cw_ukernel__neon_x4,
3395*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 4,
3396*4bdc9457SAndroid Build Coastguard Worker       };
3397*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.ibilinear_chw = (struct ibilinear_chw_parameters) {
3398*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_ibilinear_chw_ukernel_function) xnn_f32_ibilinear_chw_ukernel__neonfma_p8,
3399*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 1,
3400*4bdc9457SAndroid Build Coastguard Worker         .pixel_tile = 8,
3401*4bdc9457SAndroid Build Coastguard Worker       };
3402*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_NCHW_OPERATORS
3403*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_F32_OPERATORS
3404*4bdc9457SAndroid Build Coastguard Worker 
3405*4bdc9457SAndroid Build Coastguard Worker   /*************************** VCVT AArch64 micro-kernels ***************************/
3406*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_VCVT_OPERATORS
3407*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_VCVT;
3408*4bdc9457SAndroid Build Coastguard Worker 
3409*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.f16_to_f32 = (struct vunary_parameters) {
3410*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f16_f32_vcvt_ukernel__neonfp16_x16,
3411*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 16,
3412*4bdc9457SAndroid Build Coastguard Worker     };
3413*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.f32_to_f16 = (struct vunary_parameters) {
3414*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_f16_vcvt_ukernel__neonfp16_x16,
3415*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 16,
3416*4bdc9457SAndroid Build Coastguard Worker     };
3417*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.f32_to_qs8 = (struct vunary_parameters) {
3418*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qs8_vcvt_ukernel__neonv8_x32,
3419*4bdc9457SAndroid Build Coastguard Worker       .init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_neonv8_params,
3420*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 32,
3421*4bdc9457SAndroid Build Coastguard Worker     };
3422*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.f32_to_qu8 = (struct vunary_parameters) {
3423*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qu8_vcvt_ukernel__neonv8_x32,
3424*4bdc9457SAndroid Build Coastguard Worker       .init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_neonv8_params,
3425*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 32,
3426*4bdc9457SAndroid Build Coastguard Worker     };
3427*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.qs8 = (struct vunary_parameters) {
3428*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vcvt_ukernel__neon_x32,
3429*4bdc9457SAndroid Build Coastguard Worker       .init.qs8_cvt = xnn_init_qs8_cvt_neon_params,
3430*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 32,
3431*4bdc9457SAndroid Build Coastguard Worker     };
3432*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.qs8_to_f32 = (struct vunary_parameters) {
3433*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_f32_vcvt_ukernel__neon_x32,
3434*4bdc9457SAndroid Build Coastguard Worker       .init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_neon_params,
3435*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 32,
3436*4bdc9457SAndroid Build Coastguard Worker     };
3437*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.qu8 = (struct vunary_parameters) {
3438*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vcvt_ukernel__neon_x32,
3439*4bdc9457SAndroid Build Coastguard Worker       .init.qu8_cvt = xnn_init_qu8_cvt_neon_params,
3440*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 32,
3441*4bdc9457SAndroid Build Coastguard Worker     };
3442*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.qu8_to_f32 = (struct vunary_parameters) {
3443*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_f32_vcvt_ukernel__neon_x32,
3444*4bdc9457SAndroid Build Coastguard Worker       .init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_neon_params,
3445*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 32,
3446*4bdc9457SAndroid Build Coastguard Worker     };
3447*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_VCVT_OPERATORS
3448*4bdc9457SAndroid Build Coastguard Worker 
3449*4bdc9457SAndroid Build Coastguard Worker   /**************************** X32 AArch64 micro-kernels ****************************/
3450*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_X32_OPERATORS
3451*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_X32;
3452*4bdc9457SAndroid Build Coastguard Worker 
3453*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__neon;
3454*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x32.zip = (struct zip_parameters) {
3455*4bdc9457SAndroid Build Coastguard Worker       .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
3456*4bdc9457SAndroid Build Coastguard Worker       .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
3457*4bdc9457SAndroid Build Coastguard Worker       .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
3458*4bdc9457SAndroid Build Coastguard Worker       .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
3459*4bdc9457SAndroid Build Coastguard Worker     };
3460*4bdc9457SAndroid Build Coastguard Worker 
3461*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x32.transpose = (struct transpose_parameters) {
3462*4bdc9457SAndroid Build Coastguard Worker       .const_size_ukernel = (xnn_transposec_ukernel_function) xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl,
3463*4bdc9457SAndroid Build Coastguard Worker       .tile_size = 32,
3464*4bdc9457SAndroid Build Coastguard Worker     };
3465*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_X32_OPERATORS
3466*4bdc9457SAndroid Build Coastguard Worker 
3467*4bdc9457SAndroid Build Coastguard Worker   /**************************** XX AArch64 micro-kernels ****************************/
3468*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_XX_OPERATORS
3469*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_XX;
3470*4bdc9457SAndroid Build Coastguard Worker 
3471*4bdc9457SAndroid Build Coastguard Worker     xnn_params.xx.copy = (xnn_vunary_ukernel_function) xnn_xx_copy_ukernel__memcpy;
3472*4bdc9457SAndroid Build Coastguard Worker     xnn_params.xx.fill = (struct fill_parameters) {
3473*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_fill_ukernel_function) xnn_xx_fill_ukernel__neon_x64,
3474*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 1,
3475*4bdc9457SAndroid Build Coastguard Worker     };
3476*4bdc9457SAndroid Build Coastguard Worker     xnn_params.xx.pad = (struct pad_parameters) {
3477*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_pad_ukernel_function) xnn_xx_pad_ukernel__neon,
3478*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 1,
3479*4bdc9457SAndroid Build Coastguard Worker     };
3480*4bdc9457SAndroid Build Coastguard Worker     xnn_params.xx.transpose = (struct transpose_parameters) {
3481*4bdc9457SAndroid Build Coastguard Worker       .variable_size_ukernel = xnn_xx_transposev_ukernel__1x1_memcpy,
3482*4bdc9457SAndroid Build Coastguard Worker       .tile_size = 32,
3483*4bdc9457SAndroid Build Coastguard Worker     };
3484*4bdc9457SAndroid Build Coastguard Worker   #endif
3485*4bdc9457SAndroid Build Coastguard Worker 
3486*4bdc9457SAndroid Build Coastguard Worker #elif XNN_ARCH_X86 || XNN_ARCH_X86_64
3487*4bdc9457SAndroid Build Coastguard Worker   if (!cpuinfo_has_x86_sse2()) {
3488*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error("XNNPACK initialization failed: SSE2 is not supported");
3489*4bdc9457SAndroid Build Coastguard Worker     return;
3490*4bdc9457SAndroid Build Coastguard Worker   }
3491*4bdc9457SAndroid Build Coastguard Worker 
3492*4bdc9457SAndroid Build Coastguard Worker   /**************************** QC8 x86 micro-kernels ****************************/
3493*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_QC8_OPERATORS
3494*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_QC8;
3495*4bdc9457SAndroid Build Coastguard Worker 
3496*4bdc9457SAndroid Build Coastguard Worker     if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f() && cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512vl()) {
3497*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx);
3498*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx);
3499*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx);
3500*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx);
3501*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_avx512_params;
3502*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.mr = 4;
3503*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.nr = 16;
3504*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.log2_kr = 3;
3505*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_xop()) {
3506*4bdc9457SAndroid Build Coastguard Worker       // XOP should be checked before AVX2: AMD Excavator supports both, but performs better with XOP microkernels
3507*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64);
3508*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64);
3509*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64);
3510*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64);
3511*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_sse4_params;
3512*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.mr = 2;
3513*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.nr = 4;
3514*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.log2_kr = 3;
3515*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx2()) {
3516*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2);
3517*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2);
3518*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2);
3519*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2);
3520*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_avx2_params;
3521*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.mr = 3;
3522*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.nr = 8;
3523*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.log2_kr = 3;
3524*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
3525*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128);
3526*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128);
3527*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128);
3528*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128);
3529*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_sse4_params;
3530*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.mr = 2;
3531*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.nr = 4;
3532*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.log2_kr = 3;
3533*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse4_1()) {
3534*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64);
3535*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64);
3536*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64);
3537*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64);
3538*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_sse4_params;
3539*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.mr = 3;
3540*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.nr = 4;
3541*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.log2_kr = 3;
3542*4bdc9457SAndroid Build Coastguard Worker     } else {
3543*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64);
3544*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64);
3545*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64);
3546*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64);
3547*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_sse2_params;
3548*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.mr = 3;
3549*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.nr = 4;
3550*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.log2_kr = 3;
3551*4bdc9457SAndroid Build Coastguard Worker     }
3552*4bdc9457SAndroid Build Coastguard Worker 
3553*4bdc9457SAndroid Build Coastguard Worker     if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f() && cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512vl()) {
3554*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up32x3__avx512skx_mul32;
3555*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].init.qc8 = xnn_init_qc8_conv_minmax_fp32_avx512_params;
3556*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].channel_tile = 32;
3557*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32;
3558*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].init.qc8 = xnn_init_qc8_conv_minmax_fp32_avx512_params;
3559*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].channel_tile = 32;
3560*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32;
3561*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].init.qc8 = xnn_init_qc8_conv_minmax_fp32_avx512_params;
3562*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].channel_tile = 32;
3563*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_xop()) {
3564*4bdc9457SAndroid Build Coastguard Worker       // XOP should be checked before AVX2: AMD Excavator supports both, but performs better with XOP microkernels
3565*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16;
3566*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].init.qc8 = xnn_init_qc8_conv_minmax_fp32_sse4_params;
3567*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].channel_tile = 16;
3568*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16;
3569*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].init.qc8 = xnn_init_qc8_conv_minmax_fp32_sse4_params;
3570*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].channel_tile = 16;
3571*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16;
3572*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].init.qc8 = xnn_init_qc8_conv_minmax_fp32_sse4_params;
3573*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].channel_tile = 16;
3574*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx2()) {
3575*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx2_mul32;
3576*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].init.qc8 = xnn_init_qc8_conv_minmax_fp32_avx2_params;
3577*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].channel_tile = 16;
3578*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32;
3579*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].init.qc8 = xnn_init_qc8_conv_minmax_fp32_avx2_params;
3580*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].channel_tile = 16;
3581*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32;
3582*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].init.qc8 = xnn_init_qc8_conv_minmax_fp32_avx2_params;
3583*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].channel_tile = 16;
3584*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
3585*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16;
3586*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].init.qc8 = xnn_init_qc8_conv_minmax_fp32_sse4_params;
3587*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].channel_tile = 16;
3588*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16;
3589*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].init.qc8 = xnn_init_qc8_conv_minmax_fp32_sse4_params;
3590*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].channel_tile = 16;
3591*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16;
3592*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].init.qc8 = xnn_init_qc8_conv_minmax_fp32_sse4_params;
3593*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].channel_tile = 16;
3594*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse4_1()) {
3595*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16;
3596*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].init.qc8 = xnn_init_qc8_conv_minmax_fp32_sse4_params;
3597*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].channel_tile = 8;
3598*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16;
3599*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].init.qc8 = xnn_init_qc8_conv_minmax_fp32_sse4_params;
3600*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].channel_tile = 8;
3601*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16;
3602*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].init.qc8 = xnn_init_qc8_conv_minmax_fp32_sse4_params;
3603*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].channel_tile = 8;
3604*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse2()) {
3605*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16;
3606*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].init.qc8 = xnn_init_qc8_conv_minmax_fp32_sse2_params;
3607*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].channel_tile = 8;
3608*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16;
3609*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].init.qc8 = xnn_init_qc8_conv_minmax_fp32_sse2_params;
3610*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].channel_tile = 8;
3611*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16;
3612*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].init.qc8 = xnn_init_qc8_conv_minmax_fp32_sse2_params;
3613*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].channel_tile = 8;
3614*4bdc9457SAndroid Build Coastguard Worker     }
3615*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[0].primary_tile = 3;
3616*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[1].primary_tile = 9;
3617*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[2].primary_tile = 25;
3618*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_QC8_OPERATORS
3619*4bdc9457SAndroid Build Coastguard Worker 
3620*4bdc9457SAndroid Build Coastguard Worker   /**************************** QS8 x86 micro-kernels ****************************/
3621*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_QS8_OPERATORS
3622*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_QS8;
3623*4bdc9457SAndroid Build Coastguard Worker 
3624*4bdc9457SAndroid Build Coastguard Worker     if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f() && cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512vl()) {
3625*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx);
3626*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx);
3627*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx);
3628*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx);
3629*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_fp32_avx512_params;
3630*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.mr = 4;
3631*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.nr = 16;
3632*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.log2_kr = 3;
3633*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_xop()) {
3634*4bdc9457SAndroid Build Coastguard Worker       // XOP should be checked before AVX2: AMD Excavator supports both, but performs better with XOP microkernels
3635*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64);
3636*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64);
3637*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64);
3638*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64);
3639*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_fp32_sse4_params;
3640*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.mr = 2;
3641*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.nr = 4;
3642*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.log2_kr = 3;
3643*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx2()) {
3644*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2);
3645*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2);
3646*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2);
3647*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2);
3648*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_fp32_avx2_params;
3649*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.mr = 3;
3650*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.nr = 8;
3651*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.log2_kr = 3;
3652*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
3653*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128);
3654*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128);
3655*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128);
3656*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128);
3657*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_fp32_sse4_params;
3658*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.mr = 2;
3659*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.nr = 4;
3660*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.log2_kr = 3;
3661*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse4_1()) {
3662*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64);
3663*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64);
3664*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64);
3665*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64);
3666*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_fp32_sse4_params;
3667*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.mr = 3;
3668*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.nr = 4;
3669*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.log2_kr = 3;
3670*4bdc9457SAndroid Build Coastguard Worker     } else {
3671*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64);
3672*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64);
3673*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64);
3674*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64);
3675*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_fp32_sse2_params;
3676*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.mr = 3;
3677*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.nr = 4;
3678*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.log2_kr = 3;
3679*4bdc9457SAndroid Build Coastguard Worker     }
3680*4bdc9457SAndroid Build Coastguard Worker 
3681*4bdc9457SAndroid Build Coastguard Worker     if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f() && cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512vl()) {
3682*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32;
3683*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_fp32_avx512_params;
3684*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].channel_tile = 32;
3685*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32;
3686*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_fp32_avx512_params;
3687*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].channel_tile = 32;
3688*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_xop()) {
3689*4bdc9457SAndroid Build Coastguard Worker       // XOP should be checked before AVX2: AMD Excavator supports both, but performs better with XOP microkernels
3690*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16;
3691*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_fp32_sse4_params;
3692*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].channel_tile = 16;
3693*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul16_add16;
3694*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_fp32_sse4_params;
3695*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].channel_tile = 16;
3696*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx2()) {
3697*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32;
3698*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_fp32_avx2_params;
3699*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].channel_tile = 16;
3700*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32;
3701*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_fp32_avx2_params;
3702*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].channel_tile = 16;
3703*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
3704*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16;
3705*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_fp32_sse4_params;
3706*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].channel_tile = 16;
3707*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16_add16;
3708*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_fp32_sse4_params;
3709*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].channel_tile = 16;
3710*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse4_1()) {
3711*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16;
3712*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_fp32_sse4_params;
3713*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].channel_tile = 8;
3714*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16_add16;
3715*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_fp32_sse4_params;
3716*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].channel_tile = 8;
3717*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse2()) {
3718*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16;
3719*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_fp32_sse2_params;
3720*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].channel_tile = 8;
3721*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16_add16;
3722*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_fp32_sse2_params;
3723*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].channel_tile = 8;
3724*4bdc9457SAndroid Build Coastguard Worker     }
3725*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[0].primary_tile = 9;
3726*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[1].primary_tile = 25;
3727*4bdc9457SAndroid Build Coastguard Worker 
3728*4bdc9457SAndroid Build Coastguard Worker     if (cpuinfo_has_x86_sse4_1()) {
3729*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gavgpool = (struct gavgpool_parameters) {
3730*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse41_c8,
3731*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse41_c8,
3732*4bdc9457SAndroid Build Coastguard Worker         .init.qs8 = xnn_init_qs8_avgpool_minmax_fp32_sse4_params,
3733*4bdc9457SAndroid Build Coastguard Worker         .update.qs8 = xnn_update_qs8_avgpool_minmax_fp32_sse4_params,
3734*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 7,
3735*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 8,
3736*4bdc9457SAndroid Build Coastguard Worker       };
3737*4bdc9457SAndroid Build Coastguard Worker     } else {
3738*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gavgpool = (struct gavgpool_parameters) {
3739*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8,
3740*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8,
3741*4bdc9457SAndroid Build Coastguard Worker         .init.qs8 = xnn_init_qs8_avgpool_minmax_fp32_sse2_params,
3742*4bdc9457SAndroid Build Coastguard Worker         .update.qs8 = xnn_update_qs8_avgpool_minmax_fp32_sse2_params,
3743*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 7,
3744*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 8,
3745*4bdc9457SAndroid Build Coastguard Worker       };
3746*4bdc9457SAndroid Build Coastguard Worker     }
3747*4bdc9457SAndroid Build Coastguard Worker 
3748*4bdc9457SAndroid Build Coastguard Worker     if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f() && cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512vl()) {
3749*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.vadd = (struct vbinary_parameters) {
3750*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16,
3751*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16,
3752*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16,
3753*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_add = xnn_init_qs8_add_minmax_avx512_params,
3754*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
3755*4bdc9457SAndroid Build Coastguard Worker       };
3756*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_xop()) {
3757*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.vadd = (struct vbinary_parameters) {
3758*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8,
3759*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8,
3760*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8,
3761*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_add = xnn_init_qs8_add_minmax_sse4_mul32_params,
3762*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
3763*4bdc9457SAndroid Build Coastguard Worker       };
3764*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx2()) {
3765*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.vadd = (struct vbinary_parameters) {
3766*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16,
3767*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16,
3768*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16,
3769*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_add = xnn_init_qs8_add_minmax_avx2_params,
3770*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
3771*4bdc9457SAndroid Build Coastguard Worker       };
3772*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
3773*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.vadd = (struct vbinary_parameters) {
3774*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8,
3775*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8,
3776*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8,
3777*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_add = xnn_init_qs8_add_minmax_sse4_mul32_params,
3778*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
3779*4bdc9457SAndroid Build Coastguard Worker       };
3780*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse4_1()) {
3781*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.vadd = (struct vbinary_parameters) {
3782*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8,
3783*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8,
3784*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8,
3785*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_add = xnn_init_qs8_add_minmax_sse4_mul16_params,
3786*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
3787*4bdc9457SAndroid Build Coastguard Worker       };
3788*4bdc9457SAndroid Build Coastguard Worker     } else {
3789*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.vadd = (struct vbinary_parameters) {
3790*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8,
3791*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8,
3792*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8,
3793*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_add = xnn_init_qs8_add_minmax_sse2_params,
3794*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
3795*4bdc9457SAndroid Build Coastguard Worker       };
3796*4bdc9457SAndroid Build Coastguard Worker     }
3797*4bdc9457SAndroid Build Coastguard Worker     if (cpuinfo_has_x86_avx()) {
3798*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.vmul = (struct vbinary_parameters) {
3799*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16,
3800*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16,
3801*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16,
3802*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_mul = xnn_init_qs8_mul_minmax_fp32_sse4_params,
3803*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
3804*4bdc9457SAndroid Build Coastguard Worker       };
3805*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse4_1()) {
3806*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.vmul = (struct vbinary_parameters) {
3807*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16,
3808*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16,
3809*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16,
3810*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_mul = xnn_init_qs8_mul_minmax_fp32_sse4_params,
3811*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
3812*4bdc9457SAndroid Build Coastguard Worker       };
3813*4bdc9457SAndroid Build Coastguard Worker     } else {
3814*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.vmul = (struct vbinary_parameters) {
3815*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8,
3816*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8,
3817*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8,
3818*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_mul = xnn_init_qs8_mul_minmax_fp32_sse2_params,
3819*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
3820*4bdc9457SAndroid Build Coastguard Worker       };
3821*4bdc9457SAndroid Build Coastguard Worker     }
3822*4bdc9457SAndroid Build Coastguard Worker 
3823*4bdc9457SAndroid Build Coastguard Worker     if (cpuinfo_has_x86_avx2()) {
3824*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.lrelu = (struct vunary_parameters) {
3825*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vlrelu_ukernel__avx2_x32,
3826*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_lrelu = xnn_init_qs8_lrelu_avx2_params,
3827*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
3828*4bdc9457SAndroid Build Coastguard Worker       };
3829*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
3830*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.lrelu = (struct vunary_parameters) {
3831*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vlrelu_ukernel__avx_x32,
3832*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_lrelu = xnn_init_qs8_lrelu_avx_params,
3833*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
3834*4bdc9457SAndroid Build Coastguard Worker       };
3835*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse4_1()) {
3836*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.lrelu = (struct vunary_parameters) {
3837*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vlrelu_ukernel__sse41_x32,
3838*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_lrelu = xnn_init_qs8_lrelu_sse2_params,
3839*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
3840*4bdc9457SAndroid Build Coastguard Worker       };
3841*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse4_1()) {
3842*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.lrelu = (struct vunary_parameters) {
3843*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vlrelu_ukernel__ssse3_x32,
3844*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_lrelu = xnn_init_qs8_lrelu_sse2_params,
3845*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
3846*4bdc9457SAndroid Build Coastguard Worker       };
3847*4bdc9457SAndroid Build Coastguard Worker     } else {
3848*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.lrelu = (struct vunary_parameters) {
3849*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vlrelu_ukernel__sse2_x32,
3850*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_lrelu = xnn_init_qs8_lrelu_sse2_params,
3851*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
3852*4bdc9457SAndroid Build Coastguard Worker       };
3853*4bdc9457SAndroid Build Coastguard Worker     }
3854*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_QS8_OPERATORS
3855*4bdc9457SAndroid Build Coastguard Worker 
3856*4bdc9457SAndroid Build Coastguard Worker   /**************************** QU8 x86 micro-kernels ****************************/
3857*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_QU8_OPERATORS
3858*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_QU8;
3859*4bdc9457SAndroid Build Coastguard Worker 
3860*4bdc9457SAndroid Build Coastguard Worker     if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f() && cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512vl()) {
3861*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx);
3862*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx);
3863*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx);
3864*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx);
3865*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_fp32_avx512_params;
3866*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.mr = 4;
3867*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.nr = 16;
3868*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.log2_kr = 3;
3869*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_xop()) {
3870*4bdc9457SAndroid Build Coastguard Worker       // XOP should be checked before AVX2: AMD Excavator supports both, but performs better with XOP microkernels
3871*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64);
3872*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64);
3873*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64);
3874*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64);
3875*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_fp32_sse2_params;
3876*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.mr = 2;
3877*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.nr = 4;
3878*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.log2_kr = 3;
3879*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx2()) {
3880*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2);
3881*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2);
3882*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_fp32_ukernel_1x8c8__avx2);
3883*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2);
3884*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_fp32_avx2_params;
3885*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.mr = 3;
3886*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.nr = 8;
3887*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.log2_kr = 3;
3888*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
3889*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128);
3890*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128);
3891*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128);
3892*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128);
3893*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_fp32_sse2_params;
3894*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.mr = 2;
3895*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.nr = 4;
3896*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.log2_kr = 3;
3897*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse4_1()) {
3898*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64);
3899*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64);
3900*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64);
3901*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64);
3902*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_fp32_sse2_params;
3903*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.mr = 3;
3904*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.nr = 4;
3905*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.log2_kr = 3;
3906*4bdc9457SAndroid Build Coastguard Worker     } else {
3907*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64);
3908*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64);
3909*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64);
3910*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64);
3911*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_fp32_sse2_params;
3912*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.mr = 3;
3913*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.nr = 4;
3914*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.log2_kr = 3;
3915*4bdc9457SAndroid Build Coastguard Worker     }
3916*4bdc9457SAndroid Build Coastguard Worker 
3917*4bdc9457SAndroid Build Coastguard Worker     if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f() && cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512vl()) {
3918*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32;
3919*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].init.qu8 = xnn_init_qu8_conv_minmax_fp32_avx512_params;
3920*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].channel_tile = 32;
3921*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32;
3922*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].init.qu8 = xnn_init_qu8_conv_minmax_fp32_avx512_params;
3923*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].channel_tile = 32;
3924*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_xop()) {
3925*4bdc9457SAndroid Build Coastguard Worker       // XOP should be checked before AVX2: AMD Excavator supports both, but performs better with XOP microkernels
3926*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32;
3927*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].init.qu8 = xnn_init_qu8_conv_minmax_fp32_sse2_params;
3928*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].channel_tile = 16;
3929*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32;
3930*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].init.qu8 = xnn_init_qu8_conv_minmax_fp32_sse2_params;
3931*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].channel_tile = 16;
3932*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx2()) {
3933*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32;
3934*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].init.qu8 = xnn_init_qu8_conv_minmax_fp32_avx2_params;
3935*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].channel_tile = 16;
3936*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32;
3937*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].init.qu8 = xnn_init_qu8_conv_minmax_fp32_avx2_params;
3938*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].channel_tile = 16;
3939*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
3940*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16;
3941*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].init.qu8 = xnn_init_qu8_conv_minmax_fp32_sse2_params;
3942*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].channel_tile = 16;
3943*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16;
3944*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].init.qu8 = xnn_init_qu8_conv_minmax_fp32_sse2_params;
3945*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].channel_tile = 16;
3946*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse4_1()) {
3947*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16;
3948*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].init.qu8 = xnn_init_qu8_conv_minmax_fp32_sse2_params;
3949*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].channel_tile = 8;
3950*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16;
3951*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].init.qu8 = xnn_init_qu8_conv_minmax_fp32_sse2_params;
3952*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].channel_tile = 8;
3953*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse2()) {
3954*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16;
3955*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].init.qu8 = xnn_init_qu8_conv_minmax_fp32_sse2_params;
3956*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].channel_tile = 8;
3957*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16;
3958*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].init.qu8 = xnn_init_qu8_conv_minmax_fp32_sse2_params;
3959*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].channel_tile = 8;
3960*4bdc9457SAndroid Build Coastguard Worker     }
3961*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[0].primary_tile = 9;
3962*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[1].primary_tile = 25;
3963*4bdc9457SAndroid Build Coastguard Worker 
3964*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.avgpool = (struct avgpool_parameters) {
3965*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_avgpool_unipass_ukernel_function) xnn_qu8_avgpool_minmax_ukernel_9x__sse2_c8,
3966*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_avgpool_multipass_ukernel_function) xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8,
3967*4bdc9457SAndroid Build Coastguard Worker       .init.qu8 = xnn_init_qu8_avgpool_minmax_sse2_params,
3968*4bdc9457SAndroid Build Coastguard Worker       .primary_tile = 9,
3969*4bdc9457SAndroid Build Coastguard Worker       .incremental_tile = 8,
3970*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 8,
3971*4bdc9457SAndroid Build Coastguard Worker     };
3972*4bdc9457SAndroid Build Coastguard Worker     if (cpuinfo_has_x86_sse4_1()) {
3973*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gavgpool = (struct gavgpool_parameters) {
3974*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse41_c8,
3975*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse41_c8,
3976*4bdc9457SAndroid Build Coastguard Worker         .init.qu8 = xnn_init_qu8_avgpool_minmax_fp32_sse4_params,
3977*4bdc9457SAndroid Build Coastguard Worker         .update.qu8 = xnn_update_qu8_avgpool_minmax_fp32_sse4_params,
3978*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 7,
3979*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 8,
3980*4bdc9457SAndroid Build Coastguard Worker       };
3981*4bdc9457SAndroid Build Coastguard Worker     } else {
3982*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gavgpool = (struct gavgpool_parameters) {
3983*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8,
3984*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c8,
3985*4bdc9457SAndroid Build Coastguard Worker         .init.qu8 = xnn_init_qu8_avgpool_minmax_fp32_sse2_params,
3986*4bdc9457SAndroid Build Coastguard Worker         .update.qu8 = xnn_update_qu8_avgpool_minmax_fp32_sse2_params,
3987*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 7,
3988*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 8,
3989*4bdc9457SAndroid Build Coastguard Worker       };
3990*4bdc9457SAndroid Build Coastguard Worker     }
3991*4bdc9457SAndroid Build Coastguard Worker 
3992*4bdc9457SAndroid Build Coastguard Worker     if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f() && cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512vl()) {
3993*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.vadd = (struct vbinary_parameters) {
3994*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16,
3995*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16,
3996*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16,
3997*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_add = xnn_init_qu8_add_minmax_avx512_params,
3998*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
3999*4bdc9457SAndroid Build Coastguard Worker       };
4000*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_xop()) {
4001*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.vadd = (struct vbinary_parameters) {
4002*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8,
4003*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x8,
4004*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x8,
4005*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_add = xnn_init_qu8_add_minmax_sse4_params,
4006*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4007*4bdc9457SAndroid Build Coastguard Worker       };
4008*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx2()) {
4009*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.vadd = (struct vbinary_parameters) {
4010*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16,
4011*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16,
4012*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16,
4013*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_add = xnn_init_qu8_add_minmax_avx2_params,
4014*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4015*4bdc9457SAndroid Build Coastguard Worker       };
4016*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
4017*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.vadd = (struct vbinary_parameters) {
4018*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8,
4019*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x8,
4020*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x8,
4021*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_add = xnn_init_qu8_add_minmax_sse4_params,
4022*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4023*4bdc9457SAndroid Build Coastguard Worker       };
4024*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse4_1()) {
4025*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.vadd = (struct vbinary_parameters) {
4026*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8,
4027*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8,
4028*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8,
4029*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_add = xnn_init_qu8_add_minmax_sse2_params,
4030*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4031*4bdc9457SAndroid Build Coastguard Worker       };
4032*4bdc9457SAndroid Build Coastguard Worker     } else {
4033*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.vadd = (struct vbinary_parameters) {
4034*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8,
4035*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8,
4036*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8,
4037*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_add = xnn_init_qu8_add_minmax_sse2_params,
4038*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4039*4bdc9457SAndroid Build Coastguard Worker       };
4040*4bdc9457SAndroid Build Coastguard Worker     }
4041*4bdc9457SAndroid Build Coastguard Worker     if (cpuinfo_has_x86_avx()) {
4042*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.vmul = (struct vbinary_parameters) {
4043*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16,
4044*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16,
4045*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16,
4046*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_mul = xnn_init_qu8_mul_minmax_fp32_sse2_params,
4047*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4048*4bdc9457SAndroid Build Coastguard Worker       };
4049*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse4_1()) {
4050*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.vmul = (struct vbinary_parameters) {
4051*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16,
4052*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16,
4053*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16,
4054*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_mul = xnn_init_qu8_mul_minmax_fp32_sse2_params,
4055*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4056*4bdc9457SAndroid Build Coastguard Worker       };
4057*4bdc9457SAndroid Build Coastguard Worker     } else {
4058*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.vmul = (struct vbinary_parameters) {
4059*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8,
4060*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8,
4061*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8,
4062*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_mul = xnn_init_qu8_mul_minmax_fp32_sse2_params,
4063*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4064*4bdc9457SAndroid Build Coastguard Worker       };
4065*4bdc9457SAndroid Build Coastguard Worker     }
4066*4bdc9457SAndroid Build Coastguard Worker 
4067*4bdc9457SAndroid Build Coastguard Worker     if (cpuinfo_has_x86_avx2()) {
4068*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.lrelu = (struct vunary_parameters) {
4069*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vlrelu_ukernel__avx2_x32,
4070*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_lrelu = xnn_init_qu8_lrelu_avx2_params,
4071*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
4072*4bdc9457SAndroid Build Coastguard Worker       };
4073*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
4074*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.lrelu = (struct vunary_parameters) {
4075*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vlrelu_ukernel__avx_x32,
4076*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_lrelu = xnn_init_qu8_lrelu_avx_params,
4077*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
4078*4bdc9457SAndroid Build Coastguard Worker       };
4079*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse4_1()) {
4080*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.lrelu = (struct vunary_parameters) {
4081*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vlrelu_ukernel__sse41_x32,
4082*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_lrelu = xnn_init_qu8_lrelu_sse2_params,
4083*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
4084*4bdc9457SAndroid Build Coastguard Worker       };
4085*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse4_1()) {
4086*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.lrelu = (struct vunary_parameters) {
4087*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vlrelu_ukernel__ssse3_x32,
4088*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_lrelu = xnn_init_qu8_lrelu_sse2_params,
4089*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
4090*4bdc9457SAndroid Build Coastguard Worker       };
4091*4bdc9457SAndroid Build Coastguard Worker     } else {
4092*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.lrelu = (struct vunary_parameters) {
4093*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vlrelu_ukernel__sse2_x32,
4094*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_lrelu = xnn_init_qu8_lrelu_sse2_params,
4095*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
4096*4bdc9457SAndroid Build Coastguard Worker       };
4097*4bdc9457SAndroid Build Coastguard Worker     }
4098*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_QU8_OPERATORS
4099*4bdc9457SAndroid Build Coastguard Worker 
4100*4bdc9457SAndroid Build Coastguard Worker   /**************************** U8 x86 micro-kernels ****************************/
4101*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_S8_OPERATORS
4102*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_S8;
4103*4bdc9457SAndroid Build Coastguard Worker 
4104*4bdc9457SAndroid Build Coastguard Worker     if (cpuinfo_has_x86_sse4_1()) {
4105*4bdc9457SAndroid Build Coastguard Worker       xnn_params.s8.clamp = (struct vunary_parameters) {
4106*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_s8_vclamp_ukernel__sse41_x64,
4107*4bdc9457SAndroid Build Coastguard Worker         .init.s8_minmax = xnn_init_s8_minmax_sse4_params,
4108*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 64,
4109*4bdc9457SAndroid Build Coastguard Worker       };
4110*4bdc9457SAndroid Build Coastguard Worker       xnn_params.s8.ibilinear = (struct ibilinear_parameters) {
4111*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_ibilinear_ukernel_function) xnn_s8_ibilinear_ukernel__sse41_c16,
4112*4bdc9457SAndroid Build Coastguard Worker         .pixel_tile = 1,
4113*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 16,
4114*4bdc9457SAndroid Build Coastguard Worker       };
4115*4bdc9457SAndroid Build Coastguard Worker       xnn_params.s8.maxpool = (struct maxpool_parameters) {
4116*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_maxpool_ukernel_function) xnn_s8_maxpool_minmax_ukernel_9p8x__sse41_c16,
4117*4bdc9457SAndroid Build Coastguard Worker         .init.s8 = xnn_init_s8_minmax_sse4_params,
4118*4bdc9457SAndroid Build Coastguard Worker         .mr = 9,
4119*4bdc9457SAndroid Build Coastguard Worker         .qr = 8,
4120*4bdc9457SAndroid Build Coastguard Worker       };
4121*4bdc9457SAndroid Build Coastguard Worker     } else {
4122*4bdc9457SAndroid Build Coastguard Worker       xnn_params.s8.clamp = (struct vunary_parameters) {
4123*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_s8_vclamp_ukernel__sse2_x64,
4124*4bdc9457SAndroid Build Coastguard Worker         .init.s8_minmax = xnn_init_s8_minmax_sse2_params,
4125*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 64,
4126*4bdc9457SAndroid Build Coastguard Worker       };
4127*4bdc9457SAndroid Build Coastguard Worker       xnn_params.s8.ibilinear = (struct ibilinear_parameters) {
4128*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_ibilinear_ukernel_function) xnn_s8_ibilinear_ukernel__sse2_c8,
4129*4bdc9457SAndroid Build Coastguard Worker         .pixel_tile = 1,
4130*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 8,
4131*4bdc9457SAndroid Build Coastguard Worker       };
4132*4bdc9457SAndroid Build Coastguard Worker       xnn_params.s8.maxpool = (struct maxpool_parameters) {
4133*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_maxpool_ukernel_function) xnn_s8_maxpool_minmax_ukernel_9p8x__sse2_c16,
4134*4bdc9457SAndroid Build Coastguard Worker         .init.s8 = xnn_init_s8_minmax_sse2_params,
4135*4bdc9457SAndroid Build Coastguard Worker         .mr = 9,
4136*4bdc9457SAndroid Build Coastguard Worker         .qr = 8,
4137*4bdc9457SAndroid Build Coastguard Worker       };
4138*4bdc9457SAndroid Build Coastguard Worker     }
4139*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_S8_OPERATORS
4140*4bdc9457SAndroid Build Coastguard Worker 
4141*4bdc9457SAndroid Build Coastguard Worker   /**************************** U8 x86 micro-kernels ****************************/
4142*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_U8_OPERATORS
4143*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_U8;
4144*4bdc9457SAndroid Build Coastguard Worker 
4145*4bdc9457SAndroid Build Coastguard Worker     xnn_params.u8.clamp = (struct vunary_parameters) {
4146*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_u8_vclamp_ukernel__sse2_x64,
4147*4bdc9457SAndroid Build Coastguard Worker       .init.u8_minmax = xnn_init_u8_minmax_sse2_params,
4148*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 64,
4149*4bdc9457SAndroid Build Coastguard Worker     };
4150*4bdc9457SAndroid Build Coastguard Worker     if (cpuinfo_has_x86_sse4_1()) {
4151*4bdc9457SAndroid Build Coastguard Worker       xnn_params.u8.ibilinear = (struct ibilinear_parameters) {
4152*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_ibilinear_ukernel_function) xnn_u8_ibilinear_ukernel__sse41_c16,
4153*4bdc9457SAndroid Build Coastguard Worker         .pixel_tile = 1,
4154*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 16,
4155*4bdc9457SAndroid Build Coastguard Worker       };
4156*4bdc9457SAndroid Build Coastguard Worker     } else {
4157*4bdc9457SAndroid Build Coastguard Worker       xnn_params.u8.ibilinear = (struct ibilinear_parameters) {
4158*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_ibilinear_ukernel_function) xnn_u8_ibilinear_ukernel__sse2_c8,
4159*4bdc9457SAndroid Build Coastguard Worker         .pixel_tile = 1,
4160*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 8,
4161*4bdc9457SAndroid Build Coastguard Worker       };
4162*4bdc9457SAndroid Build Coastguard Worker     }
4163*4bdc9457SAndroid Build Coastguard Worker     xnn_params.u8.maxpool = (struct maxpool_parameters) {
4164*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16,
4165*4bdc9457SAndroid Build Coastguard Worker       .init.u8 = xnn_init_u8_minmax_sse2_params,
4166*4bdc9457SAndroid Build Coastguard Worker       .mr = 9,
4167*4bdc9457SAndroid Build Coastguard Worker       .qr = 8,
4168*4bdc9457SAndroid Build Coastguard Worker     };
4169*4bdc9457SAndroid Build Coastguard Worker     xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
4170*4bdc9457SAndroid Build Coastguard Worker     xnn_params.u8.rmax = xnn_u8_rmax_ukernel__sse2;
4171*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_U8_OPERATORS
4172*4bdc9457SAndroid Build Coastguard Worker 
4173*4bdc9457SAndroid Build Coastguard Worker   /**************************** X8 x86 micro-kernels ****************************/
4174*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_X8_OPERATORS
4175*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_X8;
4176*4bdc9457SAndroid Build Coastguard Worker 
4177*4bdc9457SAndroid Build Coastguard Worker     if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f() && cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512vl()) {
4178*4bdc9457SAndroid Build Coastguard Worker       xnn_params.x8.lut = xnn_x8_lut_ukernel__avx512skx_vpshufb_x64;
4179*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx2()) {
4180*4bdc9457SAndroid Build Coastguard Worker       xnn_params.x8.lut = xnn_x8_lut_ukernel__avx2_x128;
4181*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
4182*4bdc9457SAndroid Build Coastguard Worker       xnn_params.x8.lut = xnn_x8_lut_ukernel__avx_x64;
4183*4bdc9457SAndroid Build Coastguard Worker     } else {
4184*4bdc9457SAndroid Build Coastguard Worker       // Note: SSSE3 version is usually slower than scalar
4185*4bdc9457SAndroid Build Coastguard Worker       xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar_x4;
4186*4bdc9457SAndroid Build Coastguard Worker     }
4187*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x8.zip = (struct zip_parameters) {
4188*4bdc9457SAndroid Build Coastguard Worker       .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__sse2,
4189*4bdc9457SAndroid Build Coastguard Worker       .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__sse2,
4190*4bdc9457SAndroid Build Coastguard Worker       .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__sse2,
4191*4bdc9457SAndroid Build Coastguard Worker       .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__sse2,
4192*4bdc9457SAndroid Build Coastguard Worker     };
4193*4bdc9457SAndroid Build Coastguard Worker 
4194*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x8.transpose = (struct transpose_parameters) {
4195*4bdc9457SAndroid Build Coastguard Worker       .const_size_ukernel = (xnn_transposec_ukernel_function) xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2,
4196*4bdc9457SAndroid Build Coastguard Worker       .tile_size = 32,
4197*4bdc9457SAndroid Build Coastguard Worker     };
4198*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_X8_OPERATORS
4199*4bdc9457SAndroid Build Coastguard Worker 
4200*4bdc9457SAndroid Build Coastguard Worker 
4201*4bdc9457SAndroid Build Coastguard Worker   /**************************** X16 x86 micro-kernels ****************************/
4202*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_X16_OPERATORS
4203*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_X16;
4204*4bdc9457SAndroid Build Coastguard Worker 
4205*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x16.transpose = (struct transpose_parameters) {
4206*4bdc9457SAndroid Build Coastguard Worker       .const_size_ukernel = (xnn_transposec_ukernel_function) xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2,
4207*4bdc9457SAndroid Build Coastguard Worker       .tile_size = 32,
4208*4bdc9457SAndroid Build Coastguard Worker     };
4209*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_X16_OPERATORS
4210*4bdc9457SAndroid Build Coastguard Worker 
4211*4bdc9457SAndroid Build Coastguard Worker   /**************************** F16 x86 micro-kernels ****************************/
4212*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_F16_OPERATORS
4213*4bdc9457SAndroid Build Coastguard Worker     if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx2()) {
4214*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_F16;
4215*4bdc9457SAndroid Build Coastguard Worker 
4216*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f16_gemm_minmax_ukernel_4x16__avx2_broadcast);
4217*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f16_igemm_minmax_ukernel_4x16__avx2_broadcast);
4218*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f16_gemm_minmax_ukernel_1x16__avx2_broadcast);
4219*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f16_igemm_minmax_ukernel_1x16__avx2_broadcast);
4220*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.gemm.init.f16 = xnn_init_f16_minmax_avx_params;
4221*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.gemm.mr = 4;
4222*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.gemm.nr = 16;
4223*4bdc9457SAndroid Build Coastguard Worker 
4224*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f16_dwconv_minmax_ukernel_up16x3__fma3;
4225*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.dwconv[0].init.f16 = xnn_init_f16_minmax_avx_params;
4226*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.dwconv[0].channel_tile = 16;
4227*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.dwconv[0].primary_tile = 3;
4228*4bdc9457SAndroid Build Coastguard Worker 
4229*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f16_dwconv_minmax_ukernel_up16x4__fma3;
4230*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.dwconv[1].init.f16 = xnn_init_f16_minmax_avx_params;
4231*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.dwconv[1].channel_tile = 16;
4232*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.dwconv[1].primary_tile = 4;
4233*4bdc9457SAndroid Build Coastguard Worker 
4234*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f16_dwconv_minmax_ukernel_up16x9__fma3;
4235*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.dwconv[2].init.f16 = xnn_init_f16_minmax_avx_params;
4236*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.dwconv[2].channel_tile = 16;
4237*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.dwconv[2].primary_tile = 9;
4238*4bdc9457SAndroid Build Coastguard Worker 
4239*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.dwconv[3].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2;
4240*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.dwconv[3].init.f16 = xnn_init_f16_minmax_avx_params;
4241*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.dwconv[3].channel_tile = 8;
4242*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.dwconv[3].primary_tile = 25;
4243*4bdc9457SAndroid Build Coastguard Worker 
4244*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.avgpool = (struct avgpool_parameters) {
4245*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_avgpool_unipass_ukernel_function) xnn_f16_avgpool_minmax_ukernel_9x__f16c_c8,
4246*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_avgpool_multipass_ukernel_function) xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8,
4247*4bdc9457SAndroid Build Coastguard Worker         .init.f16 = xnn_init_f16_scaleminmax_avx_params,
4248*4bdc9457SAndroid Build Coastguard Worker         .primary_tile = 9,
4249*4bdc9457SAndroid Build Coastguard Worker         .incremental_tile = 8,
4250*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 8,
4251*4bdc9457SAndroid Build Coastguard Worker       };
4252*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.pavgpool = (struct pavgpool_parameters) {
4253*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_pavgpool_unipass_ukernel_function) xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8,
4254*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_pavgpool_multipass_ukernel_function) xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8,
4255*4bdc9457SAndroid Build Coastguard Worker         .init.f16 = xnn_init_f16_minmax_avx_params,
4256*4bdc9457SAndroid Build Coastguard Worker         .primary_tile = 9,
4257*4bdc9457SAndroid Build Coastguard Worker         .incremental_tile = 8,
4258*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 8,
4259*4bdc9457SAndroid Build Coastguard Worker       };
4260*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.gavgpool = (struct gavgpool_parameters) {
4261*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c8,
4262*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c8,
4263*4bdc9457SAndroid Build Coastguard Worker         .init.f16 = xnn_init_f16_scaleminmax_avx_params,
4264*4bdc9457SAndroid Build Coastguard Worker         .update.f16 = xnn_update_f16_scaleminmax_avx_params,
4265*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 7,
4266*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 8,
4267*4bdc9457SAndroid Build Coastguard Worker       };
4268*4bdc9457SAndroid Build Coastguard Worker 
4269*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.maxpool = (struct maxpool_parameters) {
4270*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_maxpool_ukernel_function) xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8,
4271*4bdc9457SAndroid Build Coastguard Worker         .init.f16 = xnn_init_f16_minmax_avx_params,
4272*4bdc9457SAndroid Build Coastguard Worker         .mr = 9,
4273*4bdc9457SAndroid Build Coastguard Worker         .qr = 8,
4274*4bdc9457SAndroid Build Coastguard Worker       };
4275*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.ibilinear = (struct ibilinear_parameters) {
4276*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_ibilinear_ukernel_function) xnn_f16_ibilinear_ukernel__fma3_c8,
4277*4bdc9457SAndroid Build Coastguard Worker         .pixel_tile = 1,
4278*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 8,
4279*4bdc9457SAndroid Build Coastguard Worker       };
4280*4bdc9457SAndroid Build Coastguard Worker 
4281*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.prelu = (struct prelu_parameters) {
4282*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_prelu_ukernel_function) xnn_f16_prelu_ukernel__f16c_2x16,
4283*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 2,
4284*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 16,
4285*4bdc9457SAndroid Build Coastguard Worker       };
4286*4bdc9457SAndroid Build Coastguard Worker 
4287*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.raddstoreexpminusmax = (struct raddstoreexpminusmax_parameters) {
4288*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_raddstoreexpminusmax_ukernel_function) xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x40,
4289*4bdc9457SAndroid Build Coastguard Worker         .init.f16 = xnn_init_f16_expminus_avx2_rr1_p2_params,
4290*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 40,
4291*4bdc9457SAndroid Build Coastguard Worker       };
4292*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.rmax = (xnn_rmax_ukernel_function) xnn_f16_rmax_ukernel__f16c;
4293*4bdc9457SAndroid Build Coastguard Worker 
4294*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.vadd = (struct vbinary_parameters) {
4295*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vadd_minmax_ukernel__f16c_x16,
4296*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vaddc_minmax_ukernel__f16c_x16,
4297*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vaddc_minmax_ukernel__f16c_x16,
4298*4bdc9457SAndroid Build Coastguard Worker         .init.f16_minmax = xnn_init_f16_minmax_avx_params,
4299*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4300*4bdc9457SAndroid Build Coastguard Worker       };
4301*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.vdiv = (struct vbinary_parameters) {
4302*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vdiv_minmax_ukernel__f16c_x8,
4303*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vdivc_minmax_ukernel__f16c_x8,
4304*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vrdivc_minmax_ukernel__f16c_x8,
4305*4bdc9457SAndroid Build Coastguard Worker         .init.f16_minmax = xnn_init_f16_minmax_avx_params,
4306*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4307*4bdc9457SAndroid Build Coastguard Worker       };
4308*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.vmax = (struct vbinary_parameters) {
4309*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vmax_ukernel__f16c_x16,
4310*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vmaxc_ukernel__f16c_x16,
4311*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vmaxc_ukernel__f16c_x16,
4312*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4313*4bdc9457SAndroid Build Coastguard Worker       };
4314*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.vmin = (struct vbinary_parameters) {
4315*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vmin_ukernel__f16c_x16,
4316*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vminc_ukernel__f16c_x16,
4317*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vminc_ukernel__f16c_x16,
4318*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4319*4bdc9457SAndroid Build Coastguard Worker       };
4320*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.vmul = (struct vbinary_parameters) {
4321*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vmul_minmax_ukernel__f16c_x16,
4322*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vmulc_minmax_ukernel__f16c_x16,
4323*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vmulc_minmax_ukernel__f16c_x16,
4324*4bdc9457SAndroid Build Coastguard Worker         .init.f16_minmax = xnn_init_f16_minmax_avx_params,
4325*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4326*4bdc9457SAndroid Build Coastguard Worker       };
4327*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.vsub = (struct vbinary_parameters) {
4328*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vsub_minmax_ukernel__f16c_x16,
4329*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vsubc_minmax_ukernel__f16c_x16,
4330*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vrsubc_minmax_ukernel__f16c_x16,
4331*4bdc9457SAndroid Build Coastguard Worker         .init.f16_minmax = xnn_init_f16_minmax_avx_params,
4332*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4333*4bdc9457SAndroid Build Coastguard Worker       };
4334*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.vsqrdiff = (struct vbinary_parameters) {
4335*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vsqrdiff_ukernel__f16c_x16,
4336*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vsqrdiffc_ukernel__f16c_x16,
4337*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f16_vsqrdiffc_ukernel__f16c_x16,
4338*4bdc9457SAndroid Build Coastguard Worker         .init.f16_minmax = xnn_init_f16_minmax_avx_params,
4339*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4340*4bdc9457SAndroid Build Coastguard Worker       };
4341*4bdc9457SAndroid Build Coastguard Worker 
4342*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.vmulcaddc = (struct vmulcaddc_parameters) {
4343*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x,
4344*4bdc9457SAndroid Build Coastguard Worker         .init.f16 = xnn_init_f16_minmax_avx_params,
4345*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 8,
4346*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 2,
4347*4bdc9457SAndroid Build Coastguard Worker       };
4348*4bdc9457SAndroid Build Coastguard Worker 
4349*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.abs = (struct vunary_parameters) {
4350*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f16_vabs_ukernel__sse2_x16,
4351*4bdc9457SAndroid Build Coastguard Worker         .init.f16_abs = xnn_init_f16_abs_sse_params,
4352*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4353*4bdc9457SAndroid Build Coastguard Worker       };
4354*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.clamp = (struct vunary_parameters) {
4355*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f16_vclamp_ukernel__f16c_x16,
4356*4bdc9457SAndroid Build Coastguard Worker         .init.f16_minmax = xnn_init_f16_minmax_avx_params,
4357*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4358*4bdc9457SAndroid Build Coastguard Worker       };
4359*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.elu = (struct vunary_parameters) {
4360*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f16_velu_ukernel__avx2_rr1_p3_x16,
4361*4bdc9457SAndroid Build Coastguard Worker         .init.f16_elu = xnn_init_f16_elu_avx2_rr1_p3_params,
4362*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4363*4bdc9457SAndroid Build Coastguard Worker       };
4364*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.hswish = (struct vunary_parameters) {
4365*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f16_vhswish_ukernel__f16c_x16,
4366*4bdc9457SAndroid Build Coastguard Worker         .init.f16_hswish = xnn_init_f16_hswish_avx_params,
4367*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4368*4bdc9457SAndroid Build Coastguard Worker       };
4369*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.lrelu = (struct vunary_parameters) {
4370*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f16_vlrelu_ukernel__f16c_x16,
4371*4bdc9457SAndroid Build Coastguard Worker         .init.f16_lrelu = xnn_init_f16_lrelu_avx_params,
4372*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4373*4bdc9457SAndroid Build Coastguard Worker       };
4374*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.neg = (struct vunary_parameters) {
4375*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f16_vneg_ukernel__sse2_x16,
4376*4bdc9457SAndroid Build Coastguard Worker         .init.f16_neg = xnn_init_f16_neg_sse_params,
4377*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4378*4bdc9457SAndroid Build Coastguard Worker       };
4379*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.rndne = (struct vunary_parameters) {
4380*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f16_vrndne_ukernel__f16c_x16,
4381*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4382*4bdc9457SAndroid Build Coastguard Worker       };
4383*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.rndz = (struct vunary_parameters) {
4384*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f16_vrndz_ukernel__f16c_x16,
4385*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4386*4bdc9457SAndroid Build Coastguard Worker       };
4387*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.rndu = (struct vunary_parameters) {
4388*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f16_vrndu_ukernel__f16c_x16,
4389*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4390*4bdc9457SAndroid Build Coastguard Worker       };
4391*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.rndd = (struct vunary_parameters) {
4392*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f16_vrndd_ukernel__f16c_x16,
4393*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4394*4bdc9457SAndroid Build Coastguard Worker       };
4395*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.sigmoid = (struct vunary_parameters) {
4396*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x32,
4397*4bdc9457SAndroid Build Coastguard Worker         .init.f16_sigmoid = xnn_init_f16_sigmoid_avx2_rr1_p2_params,
4398*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
4399*4bdc9457SAndroid Build Coastguard Worker       };
4400*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.sqr = (struct vunary_parameters) {
4401*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f16_vsqr_ukernel__f16c_x16,
4402*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4403*4bdc9457SAndroid Build Coastguard Worker       };
4404*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f16.sqrt = (struct vunary_parameters) {
4405*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f16_vsqrt_ukernel__f16c_sqrt_x8,
4406*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4407*4bdc9457SAndroid Build Coastguard Worker       };
4408*4bdc9457SAndroid Build Coastguard Worker     }
4409*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_F16_OPERATORS
4410*4bdc9457SAndroid Build Coastguard Worker 
4411*4bdc9457SAndroid Build Coastguard Worker   /**************************** F32 x86 micro-kernels ****************************/
4412*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_F32_OPERATORS
4413*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_F32;
4414*4bdc9457SAndroid Build Coastguard Worker 
4415*4bdc9457SAndroid Build Coastguard Worker     if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
4416*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast);
4417*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast);
4418*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast);
4419*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast);
4420*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_scalar_params;
4421*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.mr = 7;
4422*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.nr = 16;
4423*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_fma3()) {
4424*4bdc9457SAndroid Build Coastguard Worker       switch (cpuinfo_get_core(0)->uarch) {
4425*4bdc9457SAndroid Build Coastguard Worker         case cpuinfo_uarch_zen:
4426*4bdc9457SAndroid Build Coastguard Worker         case cpuinfo_uarch_dhyana:
4427*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast);
4428*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast);
4429*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast);
4430*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast);
4431*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_avx_params;
4432*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.mr = 4;
4433*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.nr = 16;
4434*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.log2_sr = 2;
4435*4bdc9457SAndroid Build Coastguard Worker           break;
4436*4bdc9457SAndroid Build Coastguard Worker         default:
4437*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_5x16__fma3_broadcast);
4438*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast);
4439*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast);
4440*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast);
4441*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_avx_params;
4442*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.mr = 5;
4443*4bdc9457SAndroid Build Coastguard Worker           xnn_params.f32.gemm.nr = 16;
4444*4bdc9457SAndroid Build Coastguard Worker           break;
4445*4bdc9457SAndroid Build Coastguard Worker       }
4446*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
4447*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_5x16__avx_broadcast);
4448*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast);
4449*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast);
4450*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast);
4451*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_avx_params;
4452*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.mr = 5;
4453*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.nr = 16;
4454*4bdc9457SAndroid Build Coastguard Worker     } else {
4455*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x8__sse_load1);
4456*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x8__sse_load1);
4457*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__sse_load1);
4458*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__sse_load1);
4459*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_sse_params;
4460*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.mr = 4;
4461*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.nr = 8;
4462*4bdc9457SAndroid Build Coastguard Worker     }
4463*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm2.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x2c4__sse);
4464*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm2.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x2c4__sse);
4465*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm2.init.f32 = xnn_init_f32_minmax_sse_params;
4466*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm2.mr = 4;
4467*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm2.nr = 2;
4468*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm2.log2_kr = 2;
4469*4bdc9457SAndroid Build Coastguard Worker 
4470*4bdc9457SAndroid Build Coastguard Worker     if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
4471*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up16x3__avx512f;
4472*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].init.f32 = xnn_init_f32_minmax_scalar_params;
4473*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].channel_tile = 16;
4474*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].primary_tile = 3;
4475*4bdc9457SAndroid Build Coastguard Worker 
4476*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f;
4477*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].init.f32 = xnn_init_f32_minmax_scalar_params;
4478*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].channel_tile = 16;
4479*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].primary_tile = 4;
4480*4bdc9457SAndroid Build Coastguard Worker 
4481*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f;
4482*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].init.f32 = xnn_init_f32_minmax_scalar_params;
4483*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].channel_tile = 16;
4484*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].primary_tile = 9;
4485*4bdc9457SAndroid Build Coastguard Worker 
4486*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f;
4487*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].init.f32 = xnn_init_f32_minmax_scalar_params;
4488*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].channel_tile = 16;
4489*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].primary_tile = 25;
4490*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_fma3()) {
4491*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up16x3__fma3;
4492*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].init.f32 = xnn_init_f32_minmax_avx_params;
4493*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].channel_tile = 16;
4494*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].primary_tile = 3;
4495*4bdc9457SAndroid Build Coastguard Worker 
4496*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up16x4__fma3;
4497*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].init.f32 = xnn_init_f32_minmax_avx_params;
4498*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].channel_tile = 16;
4499*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].primary_tile = 4;
4500*4bdc9457SAndroid Build Coastguard Worker 
4501*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up16x9__fma3;
4502*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].init.f32 = xnn_init_f32_minmax_avx_params;
4503*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].channel_tile = 16;
4504*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].primary_tile = 9;
4505*4bdc9457SAndroid Build Coastguard Worker 
4506*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up8x25__fma3;
4507*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].init.f32 = xnn_init_f32_minmax_avx_params;
4508*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].channel_tile = 8;
4509*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].primary_tile = 25;
4510*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
4511*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up16x3__avx;
4512*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].init.f32 = xnn_init_f32_minmax_avx_params;
4513*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].channel_tile = 16;
4514*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].primary_tile = 3;
4515*4bdc9457SAndroid Build Coastguard Worker 
4516*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up16x4__avx;
4517*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].init.f32 = xnn_init_f32_minmax_avx_params;
4518*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].channel_tile = 16;
4519*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].primary_tile = 4;
4520*4bdc9457SAndroid Build Coastguard Worker 
4521*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up16x9__avx;
4522*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].init.f32 = xnn_init_f32_minmax_avx_params;
4523*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].channel_tile = 16;
4524*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].primary_tile = 9;
4525*4bdc9457SAndroid Build Coastguard Worker 
4526*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up8x25__avx;
4527*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].init.f32 = xnn_init_f32_minmax_avx_params;
4528*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].channel_tile = 8;
4529*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].primary_tile = 25;
4530*4bdc9457SAndroid Build Coastguard Worker     } else {
4531*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up8x3__sse;
4532*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].init.f32 = xnn_init_f32_minmax_sse_params;
4533*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].channel_tile = 8;
4534*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].primary_tile = 3;
4535*4bdc9457SAndroid Build Coastguard Worker 
4536*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up8x4__sse;
4537*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].init.f32 = xnn_init_f32_minmax_sse_params;
4538*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].channel_tile = 8;
4539*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].primary_tile = 4;
4540*4bdc9457SAndroid Build Coastguard Worker 
4541*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up8x9__sse;
4542*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].init.f32 = xnn_init_f32_minmax_sse_params;
4543*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].channel_tile = 8;
4544*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].primary_tile = 9;
4545*4bdc9457SAndroid Build Coastguard Worker 
4546*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up8x25__sse;
4547*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].init.f32 = xnn_init_f32_minmax_sse_params;
4548*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].channel_tile = 8;
4549*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].primary_tile = 25;
4550*4bdc9457SAndroid Build Coastguard Worker     }
4551*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.avgpool = (struct avgpool_parameters) {
4552*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_avgpool_unipass_ukernel_function) xnn_f32_avgpool_minmax_ukernel_9x__sse_c4,
4553*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_avgpool_multipass_ukernel_function) xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4,
4554*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_scaleminmax_sse_params,
4555*4bdc9457SAndroid Build Coastguard Worker       .primary_tile = 9,
4556*4bdc9457SAndroid Build Coastguard Worker       .incremental_tile = 8,
4557*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 4,
4558*4bdc9457SAndroid Build Coastguard Worker     };
4559*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
4560*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_pavgpool_unipass_ukernel_function) xnn_f32_pavgpool_minmax_ukernel_9x__sse_c4,
4561*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_pavgpool_multipass_ukernel_function) xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4,
4562*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_minmax_sse_params,
4563*4bdc9457SAndroid Build Coastguard Worker       .primary_tile = 9,
4564*4bdc9457SAndroid Build Coastguard Worker       .incremental_tile = 8,
4565*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 4,
4566*4bdc9457SAndroid Build Coastguard Worker     };
4567*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
4568*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_f32_gavgpool_minmax_ukernel_7x__sse_c4,
4569*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4,
4570*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_scaleminmax_sse_params,
4571*4bdc9457SAndroid Build Coastguard Worker       .update.f32 = xnn_update_f32_scaleminmax_sse_params,
4572*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 7,
4573*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 4,
4574*4bdc9457SAndroid Build Coastguard Worker     };
4575*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.maxpool = (struct maxpool_parameters) {
4576*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4,
4577*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_minmax_sse_params,
4578*4bdc9457SAndroid Build Coastguard Worker       .mr = 9,
4579*4bdc9457SAndroid Build Coastguard Worker       .qr = 8,
4580*4bdc9457SAndroid Build Coastguard Worker     };
4581*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
4582*4bdc9457SAndroid Build Coastguard Worker       .up = (xnn_argmaxpool_unipass_ukernel_function) xnn_f32_argmaxpool_ukernel_4x__sse2_c4,
4583*4bdc9457SAndroid Build Coastguard Worker       .mr = 4,
4584*4bdc9457SAndroid Build Coastguard Worker     };
4585*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
4586*4bdc9457SAndroid Build Coastguard Worker       .up = (xnn_argmaxpool_unipass_ukernel_function) xnn_f32_argmaxpool_ukernel_9x__sse2_c4,
4587*4bdc9457SAndroid Build Coastguard Worker       .mr = 9,
4588*4bdc9457SAndroid Build Coastguard Worker     };
4589*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
4590*4bdc9457SAndroid Build Coastguard Worker       .mp = (xnn_argmaxpool_multipass_ukernel_function) xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4,
4591*4bdc9457SAndroid Build Coastguard Worker       .mr = 9,
4592*4bdc9457SAndroid Build Coastguard Worker       .qr = 8,
4593*4bdc9457SAndroid Build Coastguard Worker     };
4594*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.ibilinear = (struct ibilinear_parameters) {
4595*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_ibilinear_ukernel_function) xnn_f32_ibilinear_ukernel__sse_c8,
4596*4bdc9457SAndroid Build Coastguard Worker       .pixel_tile = 1,
4597*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 8,
4598*4bdc9457SAndroid Build Coastguard Worker     };
4599*4bdc9457SAndroid Build Coastguard Worker     if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
4600*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.abs = (struct vunary_parameters) {
4601*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vabs_ukernel__avx512f_x16,
4602*4bdc9457SAndroid Build Coastguard Worker         .init.f32_abs = xnn_init_f32_abs_avx512_params,
4603*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4604*4bdc9457SAndroid Build Coastguard Worker       };
4605*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
4606*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.abs = (struct vunary_parameters) {
4607*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vabs_ukernel__avx_x16,
4608*4bdc9457SAndroid Build Coastguard Worker         .init.f32_abs = xnn_init_f32_abs_avx_params,
4609*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4610*4bdc9457SAndroid Build Coastguard Worker       };
4611*4bdc9457SAndroid Build Coastguard Worker     } else {
4612*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.abs = (struct vunary_parameters) {
4613*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vabs_ukernel__sse_x8,
4614*4bdc9457SAndroid Build Coastguard Worker         .init.f32_abs = xnn_init_f32_abs_sse_params,
4615*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4616*4bdc9457SAndroid Build Coastguard Worker       };
4617*4bdc9457SAndroid Build Coastguard Worker     }
4618*4bdc9457SAndroid Build Coastguard Worker     if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
4619*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.clamp = (struct vunary_parameters) {
4620*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vclamp_ukernel__avx512f_x16,
4621*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
4622*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4623*4bdc9457SAndroid Build Coastguard Worker       };
4624*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
4625*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.clamp = (struct vunary_parameters) {
4626*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vclamp_ukernel__avx_x16,
4627*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_avx_params,
4628*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4629*4bdc9457SAndroid Build Coastguard Worker       };
4630*4bdc9457SAndroid Build Coastguard Worker     } else {
4631*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.clamp = (struct vunary_parameters) {
4632*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vclamp_ukernel__sse_x8,
4633*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_sse_params,
4634*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4635*4bdc9457SAndroid Build Coastguard Worker       };
4636*4bdc9457SAndroid Build Coastguard Worker     }
4637*4bdc9457SAndroid Build Coastguard Worker     if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
4638*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.elu = (struct vunary_parameters) {
4639*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64,
4640*4bdc9457SAndroid Build Coastguard Worker         .init.f32_elu = xnn_init_f32_elu_avx512_rr1_lut16_p3_params,
4641*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 64,
4642*4bdc9457SAndroid Build Coastguard Worker       };
4643*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx2()) {
4644*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.elu = (struct vunary_parameters) {
4645*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56,
4646*4bdc9457SAndroid Build Coastguard Worker         .init.f32_elu = xnn_init_f32_elu_avx2_rr1_lut4_p4_params,
4647*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 56,
4648*4bdc9457SAndroid Build Coastguard Worker       };
4649*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
4650*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.elu = (struct vunary_parameters) {
4651*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32,
4652*4bdc9457SAndroid Build Coastguard Worker         .init.f32_elu = xnn_init_f32_elu_avx_rr2_lut4_p4_params,
4653*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
4654*4bdc9457SAndroid Build Coastguard Worker       };
4655*4bdc9457SAndroid Build Coastguard Worker     } else {
4656*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.elu = (struct vunary_parameters) {
4657*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12,
4658*4bdc9457SAndroid Build Coastguard Worker         .init.f32_elu = xnn_init_f32_elu_sse2_rr2_lut16_p3_params,
4659*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 12,
4660*4bdc9457SAndroid Build Coastguard Worker       };
4661*4bdc9457SAndroid Build Coastguard Worker     }
4662*4bdc9457SAndroid Build Coastguard Worker     if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
4663*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.hswish = (struct vunary_parameters) {
4664*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vhswish_ukernel__avx512f_x16,
4665*4bdc9457SAndroid Build Coastguard Worker         .init.f32_hswish = xnn_init_f32_hswish_avx512_params,
4666*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4667*4bdc9457SAndroid Build Coastguard Worker       };
4668*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_fma3()) {
4669*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.hswish = (struct vunary_parameters) {
4670*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vhswish_ukernel__fma3_x16,
4671*4bdc9457SAndroid Build Coastguard Worker         .init.f32_hswish = xnn_init_f32_hswish_avx_params,
4672*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4673*4bdc9457SAndroid Build Coastguard Worker       };
4674*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
4675*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.hswish = (struct vunary_parameters) {
4676*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vhswish_ukernel__avx_x16,
4677*4bdc9457SAndroid Build Coastguard Worker         .init.f32_hswish = xnn_init_f32_hswish_avx_params,
4678*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4679*4bdc9457SAndroid Build Coastguard Worker       };
4680*4bdc9457SAndroid Build Coastguard Worker     } else {
4681*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.hswish = (struct vunary_parameters) {
4682*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vhswish_ukernel__sse_x8,
4683*4bdc9457SAndroid Build Coastguard Worker         .init.f32_hswish = xnn_init_f32_hswish_sse_params,
4684*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4685*4bdc9457SAndroid Build Coastguard Worker       };
4686*4bdc9457SAndroid Build Coastguard Worker     }
4687*4bdc9457SAndroid Build Coastguard Worker     if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
4688*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.lrelu = (struct vunary_parameters) {
4689*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vlrelu_ukernel__avx512f_x16,
4690*4bdc9457SAndroid Build Coastguard Worker         .init.f32_lrelu = xnn_init_f32_lrelu_scalar_params,
4691*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4692*4bdc9457SAndroid Build Coastguard Worker       };
4693*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
4694*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.lrelu = (struct vunary_parameters) {
4695*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vlrelu_ukernel__avx_x16,
4696*4bdc9457SAndroid Build Coastguard Worker         .init.f32_lrelu = xnn_init_f32_lrelu_avx_params,
4697*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4698*4bdc9457SAndroid Build Coastguard Worker       };
4699*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse4_1()) {
4700*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.lrelu = (struct vunary_parameters) {
4701*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vlrelu_ukernel__sse41_x8,
4702*4bdc9457SAndroid Build Coastguard Worker         .init.f32_lrelu = xnn_init_f32_lrelu_sse_params,
4703*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4704*4bdc9457SAndroid Build Coastguard Worker       };
4705*4bdc9457SAndroid Build Coastguard Worker     } else {
4706*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.lrelu = (struct vunary_parameters) {
4707*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vlrelu_ukernel__sse_x8,
4708*4bdc9457SAndroid Build Coastguard Worker         .init.f32_lrelu = xnn_init_f32_lrelu_sse_params,
4709*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4710*4bdc9457SAndroid Build Coastguard Worker       };
4711*4bdc9457SAndroid Build Coastguard Worker     }
4712*4bdc9457SAndroid Build Coastguard Worker     if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
4713*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.neg = (struct vunary_parameters) {
4714*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vneg_ukernel__avx512f_x16,
4715*4bdc9457SAndroid Build Coastguard Worker         .init.f32_neg = xnn_init_f32_neg_avx512_params,
4716*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4717*4bdc9457SAndroid Build Coastguard Worker       };
4718*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
4719*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.neg = (struct vunary_parameters) {
4720*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vneg_ukernel__avx_x16,
4721*4bdc9457SAndroid Build Coastguard Worker         .init.f32_neg = xnn_init_f32_neg_avx_params,
4722*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4723*4bdc9457SAndroid Build Coastguard Worker       };
4724*4bdc9457SAndroid Build Coastguard Worker     } else {
4725*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.neg = (struct vunary_parameters) {
4726*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vneg_ukernel__sse_x8,
4727*4bdc9457SAndroid Build Coastguard Worker         .init.f32_neg = xnn_init_f32_neg_sse_params,
4728*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4729*4bdc9457SAndroid Build Coastguard Worker       };
4730*4bdc9457SAndroid Build Coastguard Worker     }
4731*4bdc9457SAndroid Build Coastguard Worker     if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
4732*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.rndne = (struct vunary_parameters) {
4733*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndne_ukernel__avx512f_x16,
4734*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4735*4bdc9457SAndroid Build Coastguard Worker       };
4736*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.rndz = (struct vunary_parameters) {
4737*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndz_ukernel__avx512f_x16,
4738*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4739*4bdc9457SAndroid Build Coastguard Worker       };
4740*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.rndu = (struct vunary_parameters) {
4741*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndu_ukernel__avx512f_x16,
4742*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4743*4bdc9457SAndroid Build Coastguard Worker       };
4744*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.rndd = (struct vunary_parameters) {
4745*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndd_ukernel__avx512f_x16,
4746*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4747*4bdc9457SAndroid Build Coastguard Worker       };
4748*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
4749*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.rndne = (struct vunary_parameters) {
4750*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndne_ukernel__avx_x16,
4751*4bdc9457SAndroid Build Coastguard Worker         .init.f32_rnd = xnn_init_f32_rnd_avx_params,
4752*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4753*4bdc9457SAndroid Build Coastguard Worker       };
4754*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.rndz = (struct vunary_parameters) {
4755*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndz_ukernel__avx_x16,
4756*4bdc9457SAndroid Build Coastguard Worker         .init.f32_rnd = xnn_init_f32_rnd_avx_params,
4757*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4758*4bdc9457SAndroid Build Coastguard Worker       };
4759*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.rndu = (struct vunary_parameters) {
4760*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndu_ukernel__avx_x16,
4761*4bdc9457SAndroid Build Coastguard Worker         .init.f32_rnd = xnn_init_f32_rnd_avx_params,
4762*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4763*4bdc9457SAndroid Build Coastguard Worker       };
4764*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.rndd = (struct vunary_parameters) {
4765*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndd_ukernel__avx_x16,
4766*4bdc9457SAndroid Build Coastguard Worker         .init.f32_rnd = xnn_init_f32_rnd_avx_params,
4767*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4768*4bdc9457SAndroid Build Coastguard Worker       };
4769*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse4_1()) {
4770*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.rndne = (struct vunary_parameters) {
4771*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndne_ukernel__sse41_x8,
4772*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4773*4bdc9457SAndroid Build Coastguard Worker       };
4774*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.rndz = (struct vunary_parameters) {
4775*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndz_ukernel__sse41_x8,
4776*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4777*4bdc9457SAndroid Build Coastguard Worker       };
4778*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.rndu = (struct vunary_parameters) {
4779*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndu_ukernel__sse41_x8,
4780*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4781*4bdc9457SAndroid Build Coastguard Worker       };
4782*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.rndd = (struct vunary_parameters) {
4783*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndd_ukernel__sse41_x8,
4784*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4785*4bdc9457SAndroid Build Coastguard Worker       };
4786*4bdc9457SAndroid Build Coastguard Worker     } else {
4787*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.rndne = (struct vunary_parameters) {
4788*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndne_ukernel__sse2_x8,
4789*4bdc9457SAndroid Build Coastguard Worker         .init.f32_rnd = xnn_init_f32_rnd_sse2_params,
4790*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4791*4bdc9457SAndroid Build Coastguard Worker       };
4792*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.rndz = (struct vunary_parameters) {
4793*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndz_ukernel__sse2_x8,
4794*4bdc9457SAndroid Build Coastguard Worker         .init.f32_rnd = xnn_init_f32_rnd_sse2_params,
4795*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4796*4bdc9457SAndroid Build Coastguard Worker       };
4797*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.rndu = (struct vunary_parameters) {
4798*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndu_ukernel__sse2_x8,
4799*4bdc9457SAndroid Build Coastguard Worker         .init.f32_rnd = xnn_init_f32_rnd_sse2_params,
4800*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4801*4bdc9457SAndroid Build Coastguard Worker       };
4802*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.rndd = (struct vunary_parameters) {
4803*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndd_ukernel__sse2_x8,
4804*4bdc9457SAndroid Build Coastguard Worker         .init.f32_rnd = xnn_init_f32_rnd_sse2_params,
4805*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4806*4bdc9457SAndroid Build Coastguard Worker       };
4807*4bdc9457SAndroid Build Coastguard Worker     }
4808*4bdc9457SAndroid Build Coastguard Worker     if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
4809*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.sigmoid = (struct vunary_parameters) {
4810*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x64,
4811*4bdc9457SAndroid Build Coastguard Worker         .init.f32_sigmoid = xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params,
4812*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 64,
4813*4bdc9457SAndroid Build Coastguard Worker       };
4814*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx2()) {
4815*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.sigmoid = (struct vunary_parameters) {
4816*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x40,
4817*4bdc9457SAndroid Build Coastguard Worker         .init.f32_sigmoid = xnn_init_f32_sigmoid_avx2_rr1_p5_params,
4818*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 40,
4819*4bdc9457SAndroid Build Coastguard Worker       };
4820*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
4821*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.sigmoid = (struct vunary_parameters) {
4822*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x40,
4823*4bdc9457SAndroid Build Coastguard Worker         .init.f32_sigmoid = xnn_init_f32_sigmoid_avx_rr2_p5_params,
4824*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 40,
4825*4bdc9457SAndroid Build Coastguard Worker       };
4826*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse4_1()) {
4827*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.sigmoid = (struct vunary_parameters) {
4828*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8,
4829*4bdc9457SAndroid Build Coastguard Worker         .init.f32_sigmoid = xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params,
4830*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4831*4bdc9457SAndroid Build Coastguard Worker       };
4832*4bdc9457SAndroid Build Coastguard Worker     } else {
4833*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.sigmoid = (struct vunary_parameters) {
4834*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8,
4835*4bdc9457SAndroid Build Coastguard Worker         .init.f32_sigmoid = xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params,
4836*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4837*4bdc9457SAndroid Build Coastguard Worker       };
4838*4bdc9457SAndroid Build Coastguard Worker     }
4839*4bdc9457SAndroid Build Coastguard Worker     if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
4840*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.sqr = (struct vunary_parameters) {
4841*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsqr_ukernel__avx512f_x16,
4842*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4843*4bdc9457SAndroid Build Coastguard Worker       };
4844*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
4845*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.sqr = (struct vunary_parameters) {
4846*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsqr_ukernel__avx_x16,
4847*4bdc9457SAndroid Build Coastguard Worker         .init.f32_default = xnn_init_f32_default_avx_params,
4848*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4849*4bdc9457SAndroid Build Coastguard Worker       };
4850*4bdc9457SAndroid Build Coastguard Worker     } else {
4851*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.sqr = (struct vunary_parameters) {
4852*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsqr_ukernel__sse_x8,
4853*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4854*4bdc9457SAndroid Build Coastguard Worker       };
4855*4bdc9457SAndroid Build Coastguard Worker     }
4856*4bdc9457SAndroid Build Coastguard Worker     if (cpuinfo_has_x86_avx()) {
4857*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.sqrt = (struct vunary_parameters) {
4858*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsqrt_ukernel__avx_sqrt_x8,
4859*4bdc9457SAndroid Build Coastguard Worker         .init.f32_sqrt = xnn_init_f32_sqrt_avx_params,
4860*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
4861*4bdc9457SAndroid Build Coastguard Worker       };
4862*4bdc9457SAndroid Build Coastguard Worker     } else {
4863*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.sqrt = (struct vunary_parameters) {
4864*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsqrt_ukernel__sse_sqrt_x4,
4865*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
4866*4bdc9457SAndroid Build Coastguard Worker       };
4867*4bdc9457SAndroid Build Coastguard Worker     }
4868*4bdc9457SAndroid Build Coastguard Worker     if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
4869*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.prelu = (struct prelu_parameters) {
4870*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__avx512f_2x16,
4871*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 2,
4872*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 16,
4873*4bdc9457SAndroid Build Coastguard Worker       };
4874*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
4875*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.prelu = (struct prelu_parameters) {
4876*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__avx_2x16,
4877*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 2,
4878*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 16,
4879*4bdc9457SAndroid Build Coastguard Worker       };
4880*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse4_1()) {
4881*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.prelu = (struct prelu_parameters) {
4882*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__sse41_2x8,
4883*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 2,
4884*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 8,
4885*4bdc9457SAndroid Build Coastguard Worker       };
4886*4bdc9457SAndroid Build Coastguard Worker     } else {
4887*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.prelu = (struct prelu_parameters) {
4888*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__sse2_2x8,
4889*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 2,
4890*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 8,
4891*4bdc9457SAndroid Build Coastguard Worker       };
4892*4bdc9457SAndroid Build Coastguard Worker     }
4893*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.raddstoreexpminusmax = (struct raddstoreexpminusmax_parameters) {
4894*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_raddstoreexpminusmax_ukernel_function) xnn_f32_raddstoreexpminusmax_ukernel__sse2_rr2_p5_x20_acc2,
4895*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_expminus_sse2_rr2_p5_params,
4896*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 20,
4897*4bdc9457SAndroid Build Coastguard Worker     };
4898*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.rmax = (xnn_rmax_ukernel_function) xnn_f32_rmax_ukernel__sse;
4899*4bdc9457SAndroid Build Coastguard Worker     if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
4900*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vadd = (struct vbinary_parameters) {
4901*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_minmax_ukernel__avx512f_x32,
4902*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_minmax_ukernel__avx512f_x32,
4903*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_minmax_ukernel__avx512f_x32,
4904*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
4905*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
4906*4bdc9457SAndroid Build Coastguard Worker       };
4907*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vdiv = (struct vbinary_parameters) {
4908*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_minmax_ukernel__avx512f_x32,
4909*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_minmax_ukernel__avx512f_x32,
4910*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_minmax_ukernel__avx512f_x32,
4911*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
4912*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
4913*4bdc9457SAndroid Build Coastguard Worker       };
4914*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmax = (struct vbinary_parameters) {
4915*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__avx512f_x32,
4916*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__avx512f_x32,
4917*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__avx512f_x32,
4918*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
4919*4bdc9457SAndroid Build Coastguard Worker       };
4920*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmin = (struct vbinary_parameters) {
4921*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmin_ukernel__avx512f_x32,
4922*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__avx512f_x32,
4923*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__avx512f_x32,
4924*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
4925*4bdc9457SAndroid Build Coastguard Worker       };
4926*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmul = (struct vbinary_parameters) {
4927*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_minmax_ukernel__avx512f_x32,
4928*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_minmax_ukernel__avx512f_x32,
4929*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_minmax_ukernel__avx512f_x32,
4930*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
4931*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
4932*4bdc9457SAndroid Build Coastguard Worker       };
4933*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vsub = (struct vbinary_parameters) {
4934*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_minmax_ukernel__avx512f_x32,
4935*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_minmax_ukernel__avx512f_x32,
4936*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_minmax_ukernel__avx512f_x32,
4937*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
4938*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
4939*4bdc9457SAndroid Build Coastguard Worker       };
4940*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vsqrdiff = (struct vbinary_parameters) {
4941*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiff_ukernel__avx512f_x32,
4942*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiffc_ukernel__avx512f_x32,
4943*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiffc_ukernel__avx512f_x32,
4944*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
4945*4bdc9457SAndroid Build Coastguard Worker       };
4946*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
4947*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vadd = (struct vbinary_parameters) {
4948*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_minmax_ukernel__avx_x16,
4949*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_minmax_ukernel__avx_x16,
4950*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_minmax_ukernel__avx_x16,
4951*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_avx_params,
4952*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4953*4bdc9457SAndroid Build Coastguard Worker       };
4954*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vdiv = (struct vbinary_parameters) {
4955*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_minmax_ukernel__avx_x16,
4956*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_minmax_ukernel__avx_x16,
4957*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_minmax_ukernel__avx_x16,
4958*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_avx_params,
4959*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4960*4bdc9457SAndroid Build Coastguard Worker       };
4961*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmax = (struct vbinary_parameters) {
4962*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__avx_x16,
4963*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__avx_x16,
4964*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__avx_x16,
4965*4bdc9457SAndroid Build Coastguard Worker         .init.f32_default = xnn_init_f32_default_avx_params,
4966*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4967*4bdc9457SAndroid Build Coastguard Worker       };
4968*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmin = (struct vbinary_parameters) {
4969*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmin_ukernel__avx_x16,
4970*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__avx_x16,
4971*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__avx_x16,
4972*4bdc9457SAndroid Build Coastguard Worker         .init.f32_default = xnn_init_f32_default_avx_params,
4973*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4974*4bdc9457SAndroid Build Coastguard Worker       };
4975*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmul = (struct vbinary_parameters) {
4976*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_minmax_ukernel__avx_x16,
4977*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_minmax_ukernel__avx_x16,
4978*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_minmax_ukernel__avx_x16,
4979*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_avx_params,
4980*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4981*4bdc9457SAndroid Build Coastguard Worker       };
4982*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vsub = (struct vbinary_parameters) {
4983*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_minmax_ukernel__avx_x16,
4984*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_minmax_ukernel__avx_x16,
4985*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_minmax_ukernel__avx_x16,
4986*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_avx_params,
4987*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4988*4bdc9457SAndroid Build Coastguard Worker       };
4989*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vsqrdiff = (struct vbinary_parameters) {
4990*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiff_ukernel__avx_x16,
4991*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiffc_ukernel__avx_x16,
4992*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiffc_ukernel__avx_x16,
4993*4bdc9457SAndroid Build Coastguard Worker         .init.f32_default = xnn_init_f32_default_avx_params,
4994*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
4995*4bdc9457SAndroid Build Coastguard Worker       };
4996*4bdc9457SAndroid Build Coastguard Worker     } else {
4997*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vadd = (struct vbinary_parameters) {
4998*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_minmax_ukernel__sse_x8,
4999*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_minmax_ukernel__sse_x8,
5000*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_minmax_ukernel__sse_x8,
5001*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_sse_params,
5002*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
5003*4bdc9457SAndroid Build Coastguard Worker       };
5004*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vdiv = (struct vbinary_parameters) {
5005*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_minmax_ukernel__sse_x8,
5006*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_minmax_ukernel__sse_x8,
5007*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_minmax_ukernel__sse_x8,
5008*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_sse_params,
5009*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
5010*4bdc9457SAndroid Build Coastguard Worker       };
5011*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmax = (struct vbinary_parameters) {
5012*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__sse_x8,
5013*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__sse_x8,
5014*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__sse_x8,
5015*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
5016*4bdc9457SAndroid Build Coastguard Worker       };
5017*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmin = (struct vbinary_parameters) {
5018*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmin_ukernel__sse_x8,
5019*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__sse_x8,
5020*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__sse_x8,
5021*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
5022*4bdc9457SAndroid Build Coastguard Worker       };
5023*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmul = (struct vbinary_parameters) {
5024*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_minmax_ukernel__sse_x8,
5025*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_minmax_ukernel__sse_x8,
5026*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_minmax_ukernel__sse_x8,
5027*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_sse_params,
5028*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
5029*4bdc9457SAndroid Build Coastguard Worker       };
5030*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vsub = (struct vbinary_parameters) {
5031*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_minmax_ukernel__sse_x8,
5032*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_minmax_ukernel__sse_x8,
5033*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_minmax_ukernel__sse_x8,
5034*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_sse_params,
5035*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
5036*4bdc9457SAndroid Build Coastguard Worker       };
5037*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vsqrdiff = (struct vbinary_parameters) {
5038*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiff_ukernel__sse_x8,
5039*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiffc_ukernel__sse_x8,
5040*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiffc_ukernel__sse_x8,
5041*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
5042*4bdc9457SAndroid Build Coastguard Worker       };
5043*4bdc9457SAndroid Build Coastguard Worker     }
5044*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
5045*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x,
5046*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_minmax_sse_params,
5047*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 4,
5048*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 2,
5049*4bdc9457SAndroid Build Coastguard Worker     };
5050*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_NCHW_OPERATORS
5051*4bdc9457SAndroid Build Coastguard Worker       // Sparse microkernels on x86 currently target only SSE, and on processors
5052*4bdc9457SAndroid Build Coastguard Worker       // with AVX ISA dense inference is expected to be faster than sparse.
5053*4bdc9457SAndroid Build Coastguard Worker       if (!cpuinfo_has_x86_avx()) {
5054*4bdc9457SAndroid Build Coastguard Worker         init_flags |= XNN_INIT_FLAG_CHW_OPT;
5055*4bdc9457SAndroid Build Coastguard Worker       }
5056*4bdc9457SAndroid Build Coastguard Worker 
5057*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.spmm = (struct spmm_parameters) {
5058*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_minmax_ukernel_32x1__sse,
5059*4bdc9457SAndroid Build Coastguard Worker         .mr = 32,
5060*4bdc9457SAndroid Build Coastguard Worker         .nr = 1,
5061*4bdc9457SAndroid Build Coastguard Worker       };
5062*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.conv_hwc2chw_3x3c3s2 = (struct conv_hwc2chw_parameters) {
5063*4bdc9457SAndroid Build Coastguard Worker         .ukernel_with_symm_padding =
5064*4bdc9457SAndroid Build Coastguard Worker           (xnn_conv_hwc2chw_ukernel_function) xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2,
5065*4bdc9457SAndroid Build Coastguard Worker         .output_channel_tile = 4,
5066*4bdc9457SAndroid Build Coastguard Worker         .output_height_tile = 2,
5067*4bdc9457SAndroid Build Coastguard Worker         .output_width_tile = 2,
5068*4bdc9457SAndroid Build Coastguard Worker       };
5069*4bdc9457SAndroid Build Coastguard Worker       if (cpuinfo_has_x86_ssse3()) {
5070*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv2d_chw_3x3 = (struct dwconv2d_chw_parameters) {
5071*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2,
5072*4bdc9457SAndroid Build Coastguard Worker           .output_width_tile = 4,
5073*4bdc9457SAndroid Build Coastguard Worker           .output_height_tile = 2,
5074*4bdc9457SAndroid Build Coastguard Worker         };
5075*4bdc9457SAndroid Build Coastguard Worker       } else {
5076*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv2d_chw_3x3 = (struct dwconv2d_chw_parameters) {
5077*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2,
5078*4bdc9457SAndroid Build Coastguard Worker           .output_width_tile = 4,
5079*4bdc9457SAndroid Build Coastguard Worker           .output_height_tile = 2,
5080*4bdc9457SAndroid Build Coastguard Worker         };
5081*4bdc9457SAndroid Build Coastguard Worker       }
5082*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv2d_chw_3x3s2 = (struct dwconv2d_chw_parameters) {
5083*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_1x4_acc3,
5084*4bdc9457SAndroid Build Coastguard Worker         .output_width_tile = 4,
5085*4bdc9457SAndroid Build Coastguard Worker         .output_height_tile = 1,
5086*4bdc9457SAndroid Build Coastguard Worker       };
5087*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv2d_chw_5x5 = (struct dwconv2d_chw_parameters) {
5088*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4,
5089*4bdc9457SAndroid Build Coastguard Worker         .output_width_tile = 4,
5090*4bdc9457SAndroid Build Coastguard Worker         .output_height_tile = 4,
5091*4bdc9457SAndroid Build Coastguard Worker       };
5092*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv2d_chw_5x5s2 = (struct dwconv2d_chw_parameters) {
5093*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_2x4,
5094*4bdc9457SAndroid Build Coastguard Worker         .output_width_tile = 4,
5095*4bdc9457SAndroid Build Coastguard Worker         .output_height_tile = 2,
5096*4bdc9457SAndroid Build Coastguard Worker       };
5097*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gavgpool_cw = (struct gavgpool_cw_parameters) {
5098*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_gavgpool_cw_ukernel_function) xnn_f32_gavgpool_cw_ukernel__sse_x4,
5099*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 4,
5100*4bdc9457SAndroid Build Coastguard Worker       };
5101*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.ibilinear_chw = (struct ibilinear_chw_parameters) {
5102*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_ibilinear_chw_ukernel_function) xnn_f32_ibilinear_chw_ukernel__sse_p8,
5103*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 1,
5104*4bdc9457SAndroid Build Coastguard Worker         .pixel_tile = 8,
5105*4bdc9457SAndroid Build Coastguard Worker       };
5106*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_NCHW_OPERATORS
5107*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_F32_OPERATORS
5108*4bdc9457SAndroid Build Coastguard Worker 
5109*4bdc9457SAndroid Build Coastguard Worker   /*************************** VCVT x86 micro-kernels ***************************/
5110*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_VCVT_OPERATORS
5111*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_VCVT;
5112*4bdc9457SAndroid Build Coastguard Worker 
5113*4bdc9457SAndroid Build Coastguard Worker     if (cpuinfo_has_x86_avx512f() && cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512vl()) {
5114*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f16_to_f32 = (struct vunary_parameters) {
5115*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f16_f32_vcvt_ukernel__avx512skx_x16,
5116*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
5117*4bdc9457SAndroid Build Coastguard Worker       };
5118*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f32_to_f16 = (struct vunary_parameters) {
5119*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_f16_vcvt_ukernel__avx512skx_x16,
5120*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
5121*4bdc9457SAndroid Build Coastguard Worker       };
5122*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_f16c()) {
5123*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f16_to_f32 = (struct vunary_parameters) {
5124*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f16_f32_vcvt_ukernel__f16c_x16,
5125*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
5126*4bdc9457SAndroid Build Coastguard Worker       };
5127*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f32_to_f16 = (struct vunary_parameters) {
5128*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_f16_vcvt_ukernel__f16c_x16,
5129*4bdc9457SAndroid Build Coastguard Worker         .init.f32_f16_cvt = xnn_init_f32_f16_cvt_f16c_params,
5130*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
5131*4bdc9457SAndroid Build Coastguard Worker       };
5132*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
5133*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f16_to_f32 = (struct vunary_parameters) {
5134*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f16_f32_vcvt_ukernel__avx_int16_x16,
5135*4bdc9457SAndroid Build Coastguard Worker         .init.f16_f32_cvt = xnn_init_f16_f32_cvt_sse_int16_params,
5136*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
5137*4bdc9457SAndroid Build Coastguard Worker       };
5138*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f32_to_f16 = (struct vunary_parameters) {
5139*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_f16_vcvt_ukernel__avx_x24,
5140*4bdc9457SAndroid Build Coastguard Worker         .init.f32_f16_cvt = xnn_init_f32_f16_cvt_sse2_params,
5141*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 24,
5142*4bdc9457SAndroid Build Coastguard Worker       };
5143*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse4_1()) {
5144*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f16_to_f32 = (struct vunary_parameters) {
5145*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f16_f32_vcvt_ukernel__sse41_int16_x16,
5146*4bdc9457SAndroid Build Coastguard Worker         .init.f16_f32_cvt = xnn_init_f16_f32_cvt_sse_int16_params,
5147*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
5148*4bdc9457SAndroid Build Coastguard Worker       };
5149*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f32_to_f16 = (struct vunary_parameters) {
5150*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_f16_vcvt_ukernel__sse41_x8,
5151*4bdc9457SAndroid Build Coastguard Worker         .init.f32_f16_cvt = xnn_init_f32_f16_cvt_sse2_params,
5152*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
5153*4bdc9457SAndroid Build Coastguard Worker       };
5154*4bdc9457SAndroid Build Coastguard Worker     } else {
5155*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f16_to_f32 = (struct vunary_parameters) {
5156*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f16_f32_vcvt_ukernel__sse2_int16_x32,
5157*4bdc9457SAndroid Build Coastguard Worker         .init.f16_f32_cvt = xnn_init_f16_f32_cvt_sse_int16_params,
5158*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
5159*4bdc9457SAndroid Build Coastguard Worker       };
5160*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f32_to_f16 = (struct vunary_parameters) {
5161*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_f16_vcvt_ukernel__sse2_x16,
5162*4bdc9457SAndroid Build Coastguard Worker         .init.f32_f16_cvt = xnn_init_f32_f16_cvt_sse2_params,
5163*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
5164*4bdc9457SAndroid Build Coastguard Worker       };
5165*4bdc9457SAndroid Build Coastguard Worker     }
5166*4bdc9457SAndroid Build Coastguard Worker     if (cpuinfo_has_x86_avx512f() && cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512vl()) {
5167*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f32_to_qs8 = (struct vunary_parameters) {
5168*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qs8_vcvt_ukernel__avx512skx_x128,
5169*4bdc9457SAndroid Build Coastguard Worker         .init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_avx512_params,
5170*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 128,
5171*4bdc9457SAndroid Build Coastguard Worker       };
5172*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx2()) {
5173*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f32_to_qs8 = (struct vunary_parameters) {
5174*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qs8_vcvt_ukernel__avx2_x64,
5175*4bdc9457SAndroid Build Coastguard Worker         .init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_avx2_params,
5176*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 64,
5177*4bdc9457SAndroid Build Coastguard Worker       };
5178*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
5179*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f32_to_qs8 = (struct vunary_parameters) {
5180*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qs8_vcvt_ukernel__avx_x32,
5181*4bdc9457SAndroid Build Coastguard Worker         .init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_avx_params,
5182*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
5183*4bdc9457SAndroid Build Coastguard Worker       };
5184*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse4_1()) {
5185*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f32_to_qs8 = (struct vunary_parameters) {
5186*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qs8_vcvt_ukernel__sse41_x32,
5187*4bdc9457SAndroid Build Coastguard Worker         .init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_sse4_params,
5188*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
5189*4bdc9457SAndroid Build Coastguard Worker       };
5190*4bdc9457SAndroid Build Coastguard Worker     } else {
5191*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f32_to_qs8 = (struct vunary_parameters) {
5192*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qs8_vcvt_ukernel__sse2_x32,
5193*4bdc9457SAndroid Build Coastguard Worker         .init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_sse2_params,
5194*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
5195*4bdc9457SAndroid Build Coastguard Worker       };
5196*4bdc9457SAndroid Build Coastguard Worker     }
5197*4bdc9457SAndroid Build Coastguard Worker     if (cpuinfo_has_x86_avx512f() && cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512vl()) {
5198*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f32_to_qu8 = (struct vunary_parameters) {
5199*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qu8_vcvt_ukernel__avx512skx_x128,
5200*4bdc9457SAndroid Build Coastguard Worker         .init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_avx512_params,
5201*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 128,
5202*4bdc9457SAndroid Build Coastguard Worker       };
5203*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx2()) {
5204*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f32_to_qu8 = (struct vunary_parameters) {
5205*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qu8_vcvt_ukernel__avx2_x64,
5206*4bdc9457SAndroid Build Coastguard Worker         .init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_avx2_params,
5207*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 64,
5208*4bdc9457SAndroid Build Coastguard Worker       };
5209*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
5210*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f32_to_qu8 = (struct vunary_parameters) {
5211*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qu8_vcvt_ukernel__avx_x32,
5212*4bdc9457SAndroid Build Coastguard Worker         .init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_avx_params,
5213*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
5214*4bdc9457SAndroid Build Coastguard Worker       };
5215*4bdc9457SAndroid Build Coastguard Worker     } else {
5216*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f32_to_qu8 = (struct vunary_parameters) {
5217*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qu8_vcvt_ukernel__sse2_x32,
5218*4bdc9457SAndroid Build Coastguard Worker         .init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_sse2_params,
5219*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
5220*4bdc9457SAndroid Build Coastguard Worker       };
5221*4bdc9457SAndroid Build Coastguard Worker     }
5222*4bdc9457SAndroid Build Coastguard Worker     if (cpuinfo_has_x86_avx2()) {
5223*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qs8 = (struct vunary_parameters) {
5224*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vcvt_ukernel__avx2_x32,
5225*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_cvt = xnn_init_qs8_cvt_avx2_params,
5226*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
5227*4bdc9457SAndroid Build Coastguard Worker       };
5228*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qu8 = (struct vunary_parameters) {
5229*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vcvt_ukernel__avx2_x32,
5230*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_cvt = xnn_init_qu8_cvt_avx2_params,
5231*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
5232*4bdc9457SAndroid Build Coastguard Worker       };
5233*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
5234*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qs8 = (struct vunary_parameters) {
5235*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vcvt_ukernel__avx_x32,
5236*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_cvt = xnn_init_qs8_cvt_ssse3_params,
5237*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
5238*4bdc9457SAndroid Build Coastguard Worker       };
5239*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qu8 = (struct vunary_parameters) {
5240*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vcvt_ukernel__avx_x32,
5241*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_cvt = xnn_init_qu8_cvt_ssse3_params,
5242*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
5243*4bdc9457SAndroid Build Coastguard Worker       };
5244*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse4_1()) {
5245*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qs8 = (struct vunary_parameters) {
5246*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vcvt_ukernel__sse41_x32,
5247*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_cvt = xnn_init_qs8_cvt_ssse3_params,
5248*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
5249*4bdc9457SAndroid Build Coastguard Worker       };
5250*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qu8 = (struct vunary_parameters) {
5251*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vcvt_ukernel__sse41_x32,
5252*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_cvt = xnn_init_qu8_cvt_ssse3_params,
5253*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
5254*4bdc9457SAndroid Build Coastguard Worker       };
5255*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_ssse3()) {
5256*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qs8 = (struct vunary_parameters) {
5257*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vcvt_ukernel__ssse3_x32,
5258*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_cvt = xnn_init_qs8_cvt_ssse3_params,
5259*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
5260*4bdc9457SAndroid Build Coastguard Worker       };
5261*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qu8 = (struct vunary_parameters) {
5262*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vcvt_ukernel__ssse3_x32,
5263*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_cvt = xnn_init_qu8_cvt_ssse3_params,
5264*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
5265*4bdc9457SAndroid Build Coastguard Worker       };
5266*4bdc9457SAndroid Build Coastguard Worker     } else {
5267*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qs8 = (struct vunary_parameters) {
5268*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vcvt_ukernel__sse2_x32,
5269*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_cvt = xnn_init_qs8_cvt_sse2_params,
5270*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
5271*4bdc9457SAndroid Build Coastguard Worker       };
5272*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qu8 = (struct vunary_parameters) {
5273*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vcvt_ukernel__sse2_x32,
5274*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_cvt = xnn_init_qu8_cvt_sse2_params,
5275*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
5276*4bdc9457SAndroid Build Coastguard Worker       };
5277*4bdc9457SAndroid Build Coastguard Worker     }
5278*4bdc9457SAndroid Build Coastguard Worker     if (cpuinfo_has_x86_avx512f() && cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512vl()) {
5279*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qs8_to_f32 = (struct vunary_parameters) {
5280*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_f32_vcvt_ukernel__avx512skx_x32,
5281*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_avx512_params,
5282*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
5283*4bdc9457SAndroid Build Coastguard Worker       };
5284*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qu8_to_f32 = (struct vunary_parameters) {
5285*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_f32_vcvt_ukernel__avx512skx_x32,
5286*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_avx512_params,
5287*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
5288*4bdc9457SAndroid Build Coastguard Worker       };
5289*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx2()) {
5290*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qs8_to_f32 = (struct vunary_parameters) {
5291*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_f32_vcvt_ukernel__avx2_x16,
5292*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_avx_params,
5293*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
5294*4bdc9457SAndroid Build Coastguard Worker       };
5295*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qu8_to_f32 = (struct vunary_parameters) {
5296*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_f32_vcvt_ukernel__avx2_x16,
5297*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_avx_params,
5298*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
5299*4bdc9457SAndroid Build Coastguard Worker       };
5300*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_avx()) {
5301*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qs8_to_f32 = (struct vunary_parameters) {
5302*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_f32_vcvt_ukernel__avx_x32,
5303*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_avx_params,
5304*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
5305*4bdc9457SAndroid Build Coastguard Worker       };
5306*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qu8_to_f32 = (struct vunary_parameters) {
5307*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_f32_vcvt_ukernel__avx_x32,
5308*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_avx_params,
5309*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
5310*4bdc9457SAndroid Build Coastguard Worker       };
5311*4bdc9457SAndroid Build Coastguard Worker     } else if (cpuinfo_has_x86_sse4_1()) {
5312*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qs8_to_f32 = (struct vunary_parameters) {
5313*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_f32_vcvt_ukernel__sse41_x16,
5314*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_sse4_params,
5315*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
5316*4bdc9457SAndroid Build Coastguard Worker       };
5317*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qu8_to_f32 = (struct vunary_parameters) {
5318*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_f32_vcvt_ukernel__sse41_x16,
5319*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_sse4_params,
5320*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
5321*4bdc9457SAndroid Build Coastguard Worker       };
5322*4bdc9457SAndroid Build Coastguard Worker     } else {
5323*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qs8_to_f32 = (struct vunary_parameters) {
5324*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_f32_vcvt_ukernel__sse2_x32,
5325*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_sse2_params,
5326*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
5327*4bdc9457SAndroid Build Coastguard Worker       };
5328*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qu8_to_f32 = (struct vunary_parameters) {
5329*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_f32_vcvt_ukernel__sse2_x32,
5330*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_sse2_params,
5331*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
5332*4bdc9457SAndroid Build Coastguard Worker       };
5333*4bdc9457SAndroid Build Coastguard Worker     }
5334*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_VCVT_OPERATORS
5335*4bdc9457SAndroid Build Coastguard Worker 
5336*4bdc9457SAndroid Build Coastguard Worker   /**************************** X32 x86 micro-kernels ****************************/
5337*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_X32_OPERATORS
5338*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_X32;
5339*4bdc9457SAndroid Build Coastguard Worker 
5340*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__sse2;
5341*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x32.zip = (struct zip_parameters) {
5342*4bdc9457SAndroid Build Coastguard Worker       .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__sse2,
5343*4bdc9457SAndroid Build Coastguard Worker       .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__sse2,
5344*4bdc9457SAndroid Build Coastguard Worker       .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__sse2,
5345*4bdc9457SAndroid Build Coastguard Worker       .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__sse2,
5346*4bdc9457SAndroid Build Coastguard Worker     };
5347*4bdc9457SAndroid Build Coastguard Worker 
5348*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x32.transpose = (struct transpose_parameters) {
5349*4bdc9457SAndroid Build Coastguard Worker       .const_size_ukernel = (xnn_transposec_ukernel_function) xnn_x32_transposec_ukernel__4x4_sse,
5350*4bdc9457SAndroid Build Coastguard Worker       .tile_size = 32,
5351*4bdc9457SAndroid Build Coastguard Worker     };
5352*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_X32_OPERATORS
5353*4bdc9457SAndroid Build Coastguard Worker 
5354*4bdc9457SAndroid Build Coastguard Worker   /**************************** XX x86 micro-kernels ****************************/
5355*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_XX_OPERATORS
5356*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_XX;
5357*4bdc9457SAndroid Build Coastguard Worker 
5358*4bdc9457SAndroid Build Coastguard Worker     xnn_params.xx.copy = (xnn_vunary_ukernel_function) xnn_xx_copy_ukernel__memcpy;
5359*4bdc9457SAndroid Build Coastguard Worker     xnn_params.xx.fill = (struct fill_parameters) {
5360*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_fill_ukernel_function) xnn_xx_fill_ukernel__sse2_x64,
5361*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 1,
5362*4bdc9457SAndroid Build Coastguard Worker     };
5363*4bdc9457SAndroid Build Coastguard Worker     xnn_params.xx.pad = (struct pad_parameters) {
5364*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_pad_ukernel_function) xnn_xx_pad_ukernel__sse2,
5365*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 1,
5366*4bdc9457SAndroid Build Coastguard Worker     };
5367*4bdc9457SAndroid Build Coastguard Worker     xnn_params.xx.transpose = (struct transpose_parameters) {
5368*4bdc9457SAndroid Build Coastguard Worker       .variable_size_ukernel = xnn_xx_transposev_ukernel__1x1_memcpy,
5369*4bdc9457SAndroid Build Coastguard Worker       .tile_size = 32,
5370*4bdc9457SAndroid Build Coastguard Worker     };
5371*4bdc9457SAndroid Build Coastguard Worker   #endif
5372*4bdc9457SAndroid Build Coastguard Worker 
5373*4bdc9457SAndroid Build Coastguard Worker #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5374*4bdc9457SAndroid Build Coastguard Worker 
5375*4bdc9457SAndroid Build Coastguard Worker   /**************************** QC8 WAsm SIMD micro-kernels****************************/
5376*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_QS8_OPERATORS
5377*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_QC8;
5378*4bdc9457SAndroid Build Coastguard Worker 
5379*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128);
5380*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128);
5381*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128);
5382*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128);
5383*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_wasmsimd_params;
5384*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.gemm.mr = 4;
5385*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.gemm.nr = 4;
5386*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.gemm.log2_kr = 1;
5387*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.gemm.log2_sr = 2;
5388*4bdc9457SAndroid Build Coastguard Worker 
5389*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16;
5390*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[0].init.qc8 = xnn_init_qc8_conv_minmax_fp32_wasmsimd_params;
5391*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[0].channel_tile = 16;
5392*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[0].primary_tile = 3;
5393*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16;
5394*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[1].init.qc8 = xnn_init_qc8_conv_minmax_fp32_wasmsimd_params;
5395*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[1].channel_tile = 16;
5396*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[1].primary_tile = 9;
5397*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16;
5398*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[2].init.qc8 = xnn_init_qc8_conv_minmax_fp32_wasmsimd_params;
5399*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[2].channel_tile = 16;
5400*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[2].primary_tile = 25;
5401*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_QC8_OPERATORS
5402*4bdc9457SAndroid Build Coastguard Worker 
5403*4bdc9457SAndroid Build Coastguard Worker   /**************************** QS8 WAsm SIMD micro-kernels****************************/
5404*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_QS8_OPERATORS
5405*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_QS8;
5406*4bdc9457SAndroid Build Coastguard Worker 
5407*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128);
5408*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128);
5409*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128);
5410*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128);
5411*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_fp32_wasmsimd_params;
5412*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.gemm.mr = 4;
5413*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.gemm.nr = 4;
5414*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.gemm.log2_kr = 1;
5415*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.gemm.log2_sr = 2;
5416*4bdc9457SAndroid Build Coastguard Worker 
5417*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16;
5418*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_fp32_wasmsimd_params;
5419*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[0].channel_tile = 16;
5420*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[0].primary_tile = 9;
5421*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16_add16;
5422*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_fp32_wasmsimd_params;
5423*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[1].channel_tile = 16;
5424*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[1].primary_tile = 25;
5425*4bdc9457SAndroid Build Coastguard Worker 
5426*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.gavgpool = (struct gavgpool_parameters) {
5427*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c16,
5428*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c16,
5429*4bdc9457SAndroid Build Coastguard Worker       .init.qs8 = xnn_init_qs8_avgpool_minmax_fp32_wasmsimd_params,
5430*4bdc9457SAndroid Build Coastguard Worker       .update.qs8 = xnn_update_qs8_avgpool_minmax_fp32_wasmsimd_params,
5431*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 7,
5432*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 16,
5433*4bdc9457SAndroid Build Coastguard Worker     };
5434*4bdc9457SAndroid Build Coastguard Worker 
5435*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.vadd = (struct vbinary_parameters) {
5436*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32,
5437*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32,
5438*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32,
5439*4bdc9457SAndroid Build Coastguard Worker       .init.qs8_add = xnn_init_qs8_add_minmax_wasmsimd_params,
5440*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 32,
5441*4bdc9457SAndroid Build Coastguard Worker     };
5442*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.vmul = (struct vbinary_parameters) {
5443*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8,
5444*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8,
5445*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8,
5446*4bdc9457SAndroid Build Coastguard Worker       .init.qs8_mul = xnn_init_qs8_mul_minmax_fp32_wasmsimd_params,
5447*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
5448*4bdc9457SAndroid Build Coastguard Worker     };
5449*4bdc9457SAndroid Build Coastguard Worker 
5450*4bdc9457SAndroid Build Coastguard Worker     #if XNN_ARCH_WASMRELAXEDSIMD
5451*4bdc9457SAndroid Build Coastguard Worker       if (is_wasm_x86) {
5452*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qs8.lrelu = (struct vunary_parameters) {
5453*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vlrelu_ukernel__wasmrelaxedsimd_x86_x32,
5454*4bdc9457SAndroid Build Coastguard Worker           .init.qs8_lrelu = xnn_init_qs8_lrelu_wasmsimd_x86_params,
5455*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 32,
5456*4bdc9457SAndroid Build Coastguard Worker         };
5457*4bdc9457SAndroid Build Coastguard Worker       } else {
5458*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qs8.lrelu = (struct vunary_parameters) {
5459*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vlrelu_ukernel__wasmrelaxedsimd_arm_x32,
5460*4bdc9457SAndroid Build Coastguard Worker           .init.qs8_lrelu = xnn_init_qs8_lrelu_wasmsimd_arm_params,
5461*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 32,
5462*4bdc9457SAndroid Build Coastguard Worker         };
5463*4bdc9457SAndroid Build Coastguard Worker       }
5464*4bdc9457SAndroid Build Coastguard Worker     #else
5465*4bdc9457SAndroid Build Coastguard Worker       if (is_wasm_x86) {
5466*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qs8.lrelu = (struct vunary_parameters) {
5467*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vlrelu_ukernel__wasmsimd_x86_x16,
5468*4bdc9457SAndroid Build Coastguard Worker           .init.qs8_lrelu = xnn_init_qs8_lrelu_wasmsimd_x86_params,
5469*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 16,
5470*4bdc9457SAndroid Build Coastguard Worker         };
5471*4bdc9457SAndroid Build Coastguard Worker       } else {
5472*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qs8.lrelu = (struct vunary_parameters) {
5473*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vlrelu_ukernel__wasmsimd_arm_x32,
5474*4bdc9457SAndroid Build Coastguard Worker           .init.qs8_lrelu = xnn_init_qs8_lrelu_wasmsimd_arm_params,
5475*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 32,
5476*4bdc9457SAndroid Build Coastguard Worker         };
5477*4bdc9457SAndroid Build Coastguard Worker       }
5478*4bdc9457SAndroid Build Coastguard Worker     #endif
5479*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_QS8_OPERATORS
5480*4bdc9457SAndroid Build Coastguard Worker 
5481*4bdc9457SAndroid Build Coastguard Worker   /**************************** QU8 WAsm SIMD micro-kernels****************************/
5482*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_QU8_OPERATORS
5483*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_QU8;
5484*4bdc9457SAndroid Build Coastguard Worker 
5485*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128);
5486*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128);
5487*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128);
5488*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128);
5489*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_fp32_wasmsimd_params;
5490*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.gemm.mr = 4;
5491*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.gemm.nr = 4;
5492*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.gemm.log2_kr = 1;
5493*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.gemm.log2_sr = 2;
5494*4bdc9457SAndroid Build Coastguard Worker 
5495*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16;
5496*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[0].init.qu8 = xnn_init_qu8_conv_minmax_fp32_wasmsimd_params;
5497*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[0].channel_tile = 8;
5498*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[0].primary_tile = 9;
5499*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16;
5500*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[1].init.qu8 = xnn_init_qu8_conv_minmax_fp32_wasmsimd_params;
5501*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[1].channel_tile = 8;
5502*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[1].primary_tile = 25;
5503*4bdc9457SAndroid Build Coastguard Worker 
5504*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.avgpool = (struct avgpool_parameters) {
5505*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_avgpool_unipass_ukernel_function) xnn_qu8_avgpool_minmax_ukernel_9x__scalar_c1,
5506*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_avgpool_multipass_ukernel_function) xnn_qu8_avgpool_minmax_ukernel_9p8x__scalar_c1,
5507*4bdc9457SAndroid Build Coastguard Worker       .init.qu8 = xnn_init_qu8_avgpool_minmax_scalar_params,
5508*4bdc9457SAndroid Build Coastguard Worker       .primary_tile = 9,
5509*4bdc9457SAndroid Build Coastguard Worker       .incremental_tile = 8,
5510*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 1,
5511*4bdc9457SAndroid Build Coastguard Worker     };
5512*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.gavgpool = (struct gavgpool_parameters) {
5513*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c16,
5514*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c16,
5515*4bdc9457SAndroid Build Coastguard Worker       .init.qu8 = xnn_init_qu8_avgpool_minmax_fp32_wasmsimd_params,
5516*4bdc9457SAndroid Build Coastguard Worker       .update.qu8 = xnn_update_qu8_avgpool_minmax_fp32_wasmsimd_params,
5517*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 7,
5518*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 16,
5519*4bdc9457SAndroid Build Coastguard Worker     };
5520*4bdc9457SAndroid Build Coastguard Worker 
5521*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.vadd = (struct vbinary_parameters) {
5522*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32,
5523*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x32,
5524*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x32,
5525*4bdc9457SAndroid Build Coastguard Worker       .init.qu8_add = xnn_init_qu8_add_minmax_wasmsimd_params,
5526*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 32,
5527*4bdc9457SAndroid Build Coastguard Worker     };
5528*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.vmul = (struct vbinary_parameters) {
5529*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8,
5530*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8,
5531*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8,
5532*4bdc9457SAndroid Build Coastguard Worker       .init.qu8_mul = xnn_init_qu8_mul_minmax_fp32_wasmsimd_params,
5533*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
5534*4bdc9457SAndroid Build Coastguard Worker     };
5535*4bdc9457SAndroid Build Coastguard Worker 
5536*4bdc9457SAndroid Build Coastguard Worker     #if XNN_ARCH_WASMRELAXEDSIMD
5537*4bdc9457SAndroid Build Coastguard Worker       if (is_wasm_x86) {
5538*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qu8.lrelu = (struct vunary_parameters) {
5539*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vlrelu_ukernel__wasmrelaxedsimd_x86_x32,
5540*4bdc9457SAndroid Build Coastguard Worker           .init.qu8_lrelu = xnn_init_qu8_lrelu_wasmsimd_x86_params,
5541*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 32,
5542*4bdc9457SAndroid Build Coastguard Worker         };
5543*4bdc9457SAndroid Build Coastguard Worker       } else {
5544*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qu8.lrelu = (struct vunary_parameters) {
5545*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vlrelu_ukernel__wasmrelaxedsimd_arm_x32,
5546*4bdc9457SAndroid Build Coastguard Worker           .init.qu8_lrelu = xnn_init_qu8_lrelu_wasmsimd_arm_params,
5547*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 32,
5548*4bdc9457SAndroid Build Coastguard Worker         };
5549*4bdc9457SAndroid Build Coastguard Worker       }
5550*4bdc9457SAndroid Build Coastguard Worker     #else
5551*4bdc9457SAndroid Build Coastguard Worker       if (is_wasm_x86) {
5552*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qu8.lrelu = (struct vunary_parameters) {
5553*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vlrelu_ukernel__wasmsimd_x86_x16,
5554*4bdc9457SAndroid Build Coastguard Worker           .init.qu8_lrelu = xnn_init_qu8_lrelu_wasmsimd_x86_params,
5555*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 16,
5556*4bdc9457SAndroid Build Coastguard Worker         };
5557*4bdc9457SAndroid Build Coastguard Worker       } else {
5558*4bdc9457SAndroid Build Coastguard Worker         xnn_params.qu8.lrelu = (struct vunary_parameters) {
5559*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vlrelu_ukernel__wasmsimd_arm_x32,
5560*4bdc9457SAndroid Build Coastguard Worker           .init.qu8_lrelu = xnn_init_qu8_lrelu_wasmsimd_arm_params,
5561*4bdc9457SAndroid Build Coastguard Worker           .element_tile = 32,
5562*4bdc9457SAndroid Build Coastguard Worker         };
5563*4bdc9457SAndroid Build Coastguard Worker       }
5564*4bdc9457SAndroid Build Coastguard Worker     #endif
5565*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_QU8_OPERATORS
5566*4bdc9457SAndroid Build Coastguard Worker 
5567*4bdc9457SAndroid Build Coastguard Worker   /**************************** S8 WAsm SIMD micro-kernels****************************/
5568*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_S8_OPERATORS
5569*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_S8;
5570*4bdc9457SAndroid Build Coastguard Worker 
5571*4bdc9457SAndroid Build Coastguard Worker     xnn_params.s8.clamp = (struct vunary_parameters) {
5572*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_s8_vclamp_ukernel__wasmsimd_x64,
5573*4bdc9457SAndroid Build Coastguard Worker       .init.s8_minmax = xnn_init_s8_minmax_wasmsimd_params,
5574*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 64,
5575*4bdc9457SAndroid Build Coastguard Worker     };
5576*4bdc9457SAndroid Build Coastguard Worker     xnn_params.s8.ibilinear = (struct ibilinear_parameters) {
5577*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_ibilinear_ukernel_function) xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8,
5578*4bdc9457SAndroid Build Coastguard Worker       .pixel_tile = 1,
5579*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 8,
5580*4bdc9457SAndroid Build Coastguard Worker     };
5581*4bdc9457SAndroid Build Coastguard Worker     xnn_params.s8.maxpool = (struct maxpool_parameters) {
5582*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_maxpool_ukernel_function) xnn_s8_maxpool_minmax_ukernel_9p8x__wasmsimd_c16,
5583*4bdc9457SAndroid Build Coastguard Worker       .init.s8 = xnn_init_s8_minmax_wasmsimd_params,
5584*4bdc9457SAndroid Build Coastguard Worker       .mr = 9,
5585*4bdc9457SAndroid Build Coastguard Worker       .qr = 8,
5586*4bdc9457SAndroid Build Coastguard Worker     };
5587*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_S8_OPERATORS
5588*4bdc9457SAndroid Build Coastguard Worker 
5589*4bdc9457SAndroid Build Coastguard Worker   /**************************** U8 WAsm SIMD micro-kernels****************************/
5590*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_U8_OPERATORS
5591*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_U8;
5592*4bdc9457SAndroid Build Coastguard Worker 
5593*4bdc9457SAndroid Build Coastguard Worker     xnn_params.u8.clamp = (struct vunary_parameters) {
5594*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_u8_vclamp_ukernel__wasmsimd_x64,
5595*4bdc9457SAndroid Build Coastguard Worker       .init.u8_minmax = xnn_init_u8_minmax_wasmsimd_params,
5596*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 64,
5597*4bdc9457SAndroid Build Coastguard Worker     };
5598*4bdc9457SAndroid Build Coastguard Worker     xnn_params.u8.ibilinear = (struct ibilinear_parameters) {
5599*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_ibilinear_ukernel_function) xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8,
5600*4bdc9457SAndroid Build Coastguard Worker       .pixel_tile = 1,
5601*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 8,
5602*4bdc9457SAndroid Build Coastguard Worker     };
5603*4bdc9457SAndroid Build Coastguard Worker     xnn_params.u8.maxpool = (struct maxpool_parameters) {
5604*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_minmax_ukernel_9p8x__wasmsimd_c16,
5605*4bdc9457SAndroid Build Coastguard Worker       .init.u8 = xnn_init_u8_minmax_wasmsimd_params,
5606*4bdc9457SAndroid Build Coastguard Worker       .mr = 9,
5607*4bdc9457SAndroid Build Coastguard Worker       .qr = 8,
5608*4bdc9457SAndroid Build Coastguard Worker     };
5609*4bdc9457SAndroid Build Coastguard Worker     xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
5610*4bdc9457SAndroid Build Coastguard Worker     xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
5611*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_U8_OPERATORS
5612*4bdc9457SAndroid Build Coastguard Worker 
5613*4bdc9457SAndroid Build Coastguard Worker   /**************************** X8 WAsm SIMD micro-kernels****************************/
5614*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_X8_OPERATORS
5615*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_X8;
5616*4bdc9457SAndroid Build Coastguard Worker 
5617*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar_x4;
5618*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x8.zip = (struct zip_parameters) {
5619*4bdc9457SAndroid Build Coastguard Worker       .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
5620*4bdc9457SAndroid Build Coastguard Worker       .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
5621*4bdc9457SAndroid Build Coastguard Worker       .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
5622*4bdc9457SAndroid Build Coastguard Worker       .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
5623*4bdc9457SAndroid Build Coastguard Worker     };
5624*4bdc9457SAndroid Build Coastguard Worker 
5625*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x8.transpose = (struct transpose_parameters) {
5626*4bdc9457SAndroid Build Coastguard Worker       .const_size_ukernel = (xnn_transposec_ukernel_function) xnn_x8_transposec_ukernel__2x4_scalar_int,
5627*4bdc9457SAndroid Build Coastguard Worker       .tile_size = 32,
5628*4bdc9457SAndroid Build Coastguard Worker     };
5629*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_X8_OPERATORS
5630*4bdc9457SAndroid Build Coastguard Worker 
5631*4bdc9457SAndroid Build Coastguard Worker   /**************************** X16 WAsm SIMD micro-kernels****************************/
5632*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_X16_OPERATORS
5633*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_X16;
5634*4bdc9457SAndroid Build Coastguard Worker 
5635*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x16.transpose = (struct transpose_parameters) {
5636*4bdc9457SAndroid Build Coastguard Worker       .const_size_ukernel = (xnn_transposec_ukernel_function) xnn_x16_transposec_ukernel__2x4_scalar_int,
5637*4bdc9457SAndroid Build Coastguard Worker       .tile_size = 32,
5638*4bdc9457SAndroid Build Coastguard Worker     };
5639*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_X16_OPERATORS
5640*4bdc9457SAndroid Build Coastguard Worker 
5641*4bdc9457SAndroid Build Coastguard Worker   /**************************** F32 WAsm SIMD micro-kernels****************************/
5642*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_F32_OPERATORS
5643*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_F32;
5644*4bdc9457SAndroid Build Coastguard Worker 
5645*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
5646*4bdc9457SAndroid Build Coastguard Worker       #if XNN_ARCH_WASMRELAXEDSIMD
5647*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
5648*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
5649*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
5650*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
5651*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.relu.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
5652*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.relu.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
5653*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.relu.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
5654*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.relu.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
5655*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.linear.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
5656*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.linear.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
5657*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.linear.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
5658*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.linear.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
5659*4bdc9457SAndroid Build Coastguard Worker       #else
5660*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_splat);
5661*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat);
5662*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat);
5663*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat);
5664*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.relu.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
5665*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.relu.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
5666*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.relu.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
5667*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.relu.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
5668*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.linear.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
5669*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.linear.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
5670*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.linear.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
5671*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.linear.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
5672*4bdc9457SAndroid Build Coastguard Worker       #endif
5673*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_wasmsimd_params;
5674*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.mr = 4;
5675*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.nr = 8;
5676*4bdc9457SAndroid Build Coastguard Worker 
5677*4bdc9457SAndroid Build Coastguard Worker       #if XNN_ARCH_WASMRELAXEDSIMD
5678*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma);
5679*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma);
5680*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.linear.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
5681*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.linear.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
5682*4bdc9457SAndroid Build Coastguard Worker       #else
5683*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86);
5684*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86);
5685*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.linear.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
5686*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.linear.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
5687*4bdc9457SAndroid Build Coastguard Worker       #endif
5688*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.init.f32 = xnn_init_f32_minmax_wasmsimd_params;
5689*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.mr = 4;
5690*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.nr = 2;
5691*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.log2_kr = 2;
5692*4bdc9457SAndroid Build Coastguard Worker     } else {
5693*4bdc9457SAndroid Build Coastguard Worker       #if XNN_ARCH_WASMRELAXEDSIMD
5694*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5695*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5696*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat);
5697*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat);
5698*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.relu.gemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5699*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.relu.igemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5700*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.relu.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
5701*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.relu.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
5702*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.linear.gemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5703*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.linear.igemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5704*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.linear.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
5705*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.linear.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
5706*4bdc9457SAndroid Build Coastguard Worker       #else
5707*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_splat);
5708*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat);
5709*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat);
5710*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat);
5711*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.relu.gemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
5712*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.relu.igemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
5713*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.relu.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
5714*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.relu.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
5715*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.linear.gemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
5716*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.linear.igemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
5717*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.linear.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
5718*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm.linear.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
5719*4bdc9457SAndroid Build Coastguard Worker       #endif
5720*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_wasmsimd_params;
5721*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.mr = 5;
5722*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.nr = 8;
5723*4bdc9457SAndroid Build Coastguard Worker 
5724*4bdc9457SAndroid Build Coastguard Worker       #if XNN_ARCH_WASMRELAXEDSIMD
5725*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma);
5726*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma);
5727*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.linear.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
5728*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.linear.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
5729*4bdc9457SAndroid Build Coastguard Worker       #else
5730*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm);
5731*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm);
5732*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.linear.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
5733*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gemm2.linear.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
5734*4bdc9457SAndroid Build Coastguard Worker       #endif
5735*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.init.f32 = xnn_init_f32_minmax_wasmsimd_params;
5736*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.mr = 4;
5737*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.nr = 2;
5738*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm2.log2_kr = 2;
5739*4bdc9457SAndroid Build Coastguard Worker     }
5740*4bdc9457SAndroid Build Coastguard Worker 
5741*4bdc9457SAndroid Build Coastguard Worker     #if XNN_ARCH_WASMRELAXEDSIMD
5742*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd_fma;
5743*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].linear.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_ukernel_up8x3__wasmrelaxedsimd_fma;
5744*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].init.f32 = xnn_init_f32_minmax_wasmsimd_params;
5745*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].channel_tile = 8;
5746*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[0].primary_tile = 3;
5747*4bdc9457SAndroid Build Coastguard Worker 
5748*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up8x4__wasmrelaxedsimd_fma;
5749*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].linear.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_ukernel_up8x4__wasmrelaxedsimd_fma;
5750*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].init.f32 = xnn_init_f32_minmax_wasmsimd_params;
5751*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].channel_tile = 8;
5752*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[1].primary_tile = 4;
5753*4bdc9457SAndroid Build Coastguard Worker 
5754*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd_fma;
5755*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].linear.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__wasmrelaxedsimd_fma;
5756*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].init.f32 = xnn_init_f32_minmax_wasmsimd_params;
5757*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].channel_tile = 8;
5758*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[2].primary_tile = 9;
5759*4bdc9457SAndroid Build Coastguard Worker     #else
5760*4bdc9457SAndroid Build Coastguard Worker       if (is_wasm_x86) {
5761*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_x86;
5762*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[0].linear.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_ukernel_up8x3__wasmsimd;
5763*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[0].init.f32 = xnn_init_f32_minmax_wasmsimd_params;
5764*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[0].channel_tile = 8;
5765*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[0].primary_tile = 3;
5766*4bdc9457SAndroid Build Coastguard Worker 
5767*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86;
5768*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[1].linear.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_ukernel_up8x4__wasmsimd;
5769*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[1].init.f32 = xnn_init_f32_minmax_wasmsimd_params;
5770*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[1].channel_tile = 8;
5771*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[1].primary_tile = 4;
5772*4bdc9457SAndroid Build Coastguard Worker 
5773*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86;
5774*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[2].linear.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__wasmsimd;
5775*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[2].init.f32 = xnn_init_f32_minmax_wasmsimd_params;
5776*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[2].channel_tile = 8;
5777*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[2].primary_tile = 9;
5778*4bdc9457SAndroid Build Coastguard Worker       } else {
5779*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up4x3__wasmsimd_arm;
5780*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[0].linear.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_ukernel_up4x3__wasmsimd;
5781*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[0].init.f32 = xnn_init_f32_minmax_wasmsimd_params;
5782*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[0].channel_tile = 4;
5783*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[0].primary_tile = 3;
5784*4bdc9457SAndroid Build Coastguard Worker 
5785*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up4x4__wasmsimd_arm;
5786*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[1].linear.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__wasmsimd;
5787*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[1].init.f32 = xnn_init_f32_minmax_wasmsimd_params;
5788*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[1].channel_tile = 4;
5789*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[1].primary_tile = 4;
5790*4bdc9457SAndroid Build Coastguard Worker 
5791*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm;
5792*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[2].linear.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__wasmsimd;
5793*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[2].init.f32 = xnn_init_f32_minmax_wasmsimd_params;
5794*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[2].channel_tile = 4;
5795*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv[2].primary_tile = 9;
5796*4bdc9457SAndroid Build Coastguard Worker       }
5797*4bdc9457SAndroid Build Coastguard Worker     #endif
5798*4bdc9457SAndroid Build Coastguard Worker 
5799*4bdc9457SAndroid Build Coastguard Worker     #if XNN_ARCH_WASMRELAXEDSIMD
5800*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up8x25__wasmrelaxedsimd_fma;
5801*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].linear.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_ukernel_up8x25__wasmrelaxedsimd_fma;
5802*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].init.f32 = xnn_init_f32_minmax_wasmsimd_params;
5803*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].channel_tile = 8;
5804*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].primary_tile = 25;
5805*4bdc9457SAndroid Build Coastguard Worker     #else
5806*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm;
5807*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].linear.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__wasmsimd;
5808*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].init.f32 = xnn_init_f32_minmax_wasmsimd_params;
5809*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].channel_tile = 4;
5810*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv[3].primary_tile = 25;
5811*4bdc9457SAndroid Build Coastguard Worker     #endif
5812*4bdc9457SAndroid Build Coastguard Worker 
5813*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
5814*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.avgpool = (struct avgpool_parameters) {
5815*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_avgpool_unipass_ukernel_function) xnn_f32_avgpool_minmax_ukernel_9x__wasmsimd_x86_c4,
5816*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_avgpool_multipass_ukernel_function) xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4,
5817*4bdc9457SAndroid Build Coastguard Worker         .init.f32 = xnn_init_f32_scaleminmax_scalar_params,
5818*4bdc9457SAndroid Build Coastguard Worker         .primary_tile = 9,
5819*4bdc9457SAndroid Build Coastguard Worker         .incremental_tile = 8,
5820*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 4,
5821*4bdc9457SAndroid Build Coastguard Worker       };
5822*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
5823*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_pavgpool_unipass_ukernel_function) xnn_f32_pavgpool_minmax_ukernel_9x__wasmsimd_x86_c4,
5824*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_pavgpool_multipass_ukernel_function) xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4,
5825*4bdc9457SAndroid Build Coastguard Worker         .init.f32 = xnn_init_f32_minmax_wasmsimd_params,
5826*4bdc9457SAndroid Build Coastguard Worker         .primary_tile = 9,
5827*4bdc9457SAndroid Build Coastguard Worker         .incremental_tile = 8,
5828*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 4,
5829*4bdc9457SAndroid Build Coastguard Worker       };
5830*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
5831*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_f32_gavgpool_minmax_ukernel_7x__wasmsimd_x86_c4,
5832*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4,
5833*4bdc9457SAndroid Build Coastguard Worker         .init.f32 = xnn_init_f32_scaleminmax_scalar_params,
5834*4bdc9457SAndroid Build Coastguard Worker         .update.f32 = xnn_update_f32_scaleminmax_scalar_params,
5835*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 7,
5836*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 4,
5837*4bdc9457SAndroid Build Coastguard Worker       };
5838*4bdc9457SAndroid Build Coastguard Worker     } else {
5839*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.avgpool = (struct avgpool_parameters) {
5840*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_avgpool_unipass_ukernel_function) xnn_f32_avgpool_minmax_ukernel_9x__wasmsimd_arm_c4,
5841*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_avgpool_multipass_ukernel_function) xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4,
5842*4bdc9457SAndroid Build Coastguard Worker         .init.f32 = xnn_init_f32_scaleminmax_scalar_params,
5843*4bdc9457SAndroid Build Coastguard Worker         .primary_tile = 9,
5844*4bdc9457SAndroid Build Coastguard Worker         .incremental_tile = 8,
5845*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 4,
5846*4bdc9457SAndroid Build Coastguard Worker       };
5847*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
5848*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_pavgpool_unipass_ukernel_function) xnn_f32_pavgpool_minmax_ukernel_9x__wasmsimd_arm_c4,
5849*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_pavgpool_multipass_ukernel_function) xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4,
5850*4bdc9457SAndroid Build Coastguard Worker         .init.f32 = xnn_init_f32_minmax_wasmsimd_params,
5851*4bdc9457SAndroid Build Coastguard Worker         .primary_tile = 9,
5852*4bdc9457SAndroid Build Coastguard Worker         .incremental_tile = 8,
5853*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 4,
5854*4bdc9457SAndroid Build Coastguard Worker       };
5855*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
5856*4bdc9457SAndroid Build Coastguard Worker         .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_f32_gavgpool_minmax_ukernel_7x__wasmsimd_arm_c4,
5857*4bdc9457SAndroid Build Coastguard Worker         .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4,
5858*4bdc9457SAndroid Build Coastguard Worker         .init.f32 = xnn_init_f32_scaleminmax_scalar_params,
5859*4bdc9457SAndroid Build Coastguard Worker         .update.f32 = xnn_update_f32_scaleminmax_scalar_params,
5860*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 7,
5861*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 4,
5862*4bdc9457SAndroid Build Coastguard Worker       };
5863*4bdc9457SAndroid Build Coastguard Worker     }
5864*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
5865*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.maxpool = (struct maxpool_parameters) {
5866*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_x86_c4,
5867*4bdc9457SAndroid Build Coastguard Worker         .init.f32 = xnn_init_f32_minmax_wasmsimd_params,
5868*4bdc9457SAndroid Build Coastguard Worker         .mr = 9,
5869*4bdc9457SAndroid Build Coastguard Worker         .qr = 8,
5870*4bdc9457SAndroid Build Coastguard Worker       };
5871*4bdc9457SAndroid Build Coastguard Worker     } else {
5872*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.maxpool = (struct maxpool_parameters) {
5873*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_arm_c4,
5874*4bdc9457SAndroid Build Coastguard Worker         .init.f32 = xnn_init_f32_minmax_wasmsimd_params,
5875*4bdc9457SAndroid Build Coastguard Worker         .mr = 9,
5876*4bdc9457SAndroid Build Coastguard Worker         .qr = 8,
5877*4bdc9457SAndroid Build Coastguard Worker       };
5878*4bdc9457SAndroid Build Coastguard Worker     }
5879*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
5880*4bdc9457SAndroid Build Coastguard Worker       .up = (xnn_argmaxpool_unipass_ukernel_function) xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4,
5881*4bdc9457SAndroid Build Coastguard Worker       .mr = 4,
5882*4bdc9457SAndroid Build Coastguard Worker     };
5883*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
5884*4bdc9457SAndroid Build Coastguard Worker       .up = (xnn_argmaxpool_unipass_ukernel_function) xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4,
5885*4bdc9457SAndroid Build Coastguard Worker       .mr = 9,
5886*4bdc9457SAndroid Build Coastguard Worker     };
5887*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
5888*4bdc9457SAndroid Build Coastguard Worker       .mp = (xnn_argmaxpool_multipass_ukernel_function) xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4,
5889*4bdc9457SAndroid Build Coastguard Worker       .mr = 9,
5890*4bdc9457SAndroid Build Coastguard Worker       .qr = 8,
5891*4bdc9457SAndroid Build Coastguard Worker     };
5892*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.ibilinear = (struct ibilinear_parameters) {
5893*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_ibilinear_ukernel_function) xnn_f32_ibilinear_ukernel__wasmsimd_c8,
5894*4bdc9457SAndroid Build Coastguard Worker       .pixel_tile = 1,
5895*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 8,
5896*4bdc9457SAndroid Build Coastguard Worker     };
5897*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.abs = (struct vunary_parameters) {
5898*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vabs_ukernel__wasmsimd_x8,
5899*4bdc9457SAndroid Build Coastguard Worker       .init.f32_abs = xnn_init_f32_abs_wasmsimd_params,
5900*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 16,
5901*4bdc9457SAndroid Build Coastguard Worker     };
5902*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
5903*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.clamp = (struct vunary_parameters) {
5904*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vclamp_ukernel__wasmsimd_x86_x8,
5905*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_wasmsimd_params,
5906*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
5907*4bdc9457SAndroid Build Coastguard Worker       };
5908*4bdc9457SAndroid Build Coastguard Worker     } else {
5909*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.clamp = (struct vunary_parameters) {
5910*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vclamp_ukernel__wasmsimd_arm_x8,
5911*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_wasmsimd_params,
5912*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
5913*4bdc9457SAndroid Build Coastguard Worker       };
5914*4bdc9457SAndroid Build Coastguard Worker     }
5915*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
5916*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.elu = (struct vunary_parameters) {
5917*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20,
5918*4bdc9457SAndroid Build Coastguard Worker         .init.f32_elu = xnn_init_f32_elu_wasmsimd_rr2_p6_params,
5919*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 20,
5920*4bdc9457SAndroid Build Coastguard Worker       };
5921*4bdc9457SAndroid Build Coastguard Worker     } else {
5922*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.elu = (struct vunary_parameters) {
5923*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20,
5924*4bdc9457SAndroid Build Coastguard Worker         .init.f32_elu = xnn_init_f32_elu_wasmsimd_rr2_p6_params,
5925*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 20,
5926*4bdc9457SAndroid Build Coastguard Worker       };
5927*4bdc9457SAndroid Build Coastguard Worker     }
5928*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.hswish = (struct vunary_parameters) {
5929*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vhswish_ukernel__wasmsimd_x16,
5930*4bdc9457SAndroid Build Coastguard Worker       .init.f32_hswish = xnn_init_f32_hswish_wasmsimd_params,
5931*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 16,
5932*4bdc9457SAndroid Build Coastguard Worker     };
5933*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
5934*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.lrelu = (struct vunary_parameters) {
5935*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8,
5936*4bdc9457SAndroid Build Coastguard Worker         .init.f32_lrelu = xnn_init_f32_lrelu_wasmsimd_params,
5937*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
5938*4bdc9457SAndroid Build Coastguard Worker       };
5939*4bdc9457SAndroid Build Coastguard Worker     } else {
5940*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.lrelu = (struct vunary_parameters) {
5941*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vlrelu_ukernel__wasmsimd_bitselect_x8,
5942*4bdc9457SAndroid Build Coastguard Worker         .init.f32_lrelu = xnn_init_f32_lrelu_wasmsimd_params,
5943*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
5944*4bdc9457SAndroid Build Coastguard Worker       };
5945*4bdc9457SAndroid Build Coastguard Worker     }
5946*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.neg = (struct vunary_parameters) {
5947*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vneg_ukernel__wasmsimd_x8,
5948*4bdc9457SAndroid Build Coastguard Worker       .init.f32_neg = xnn_init_f32_neg_wasmsimd_params,
5949*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 16,
5950*4bdc9457SAndroid Build Coastguard Worker     };
5951*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.relu = (struct vunary_parameters) {
5952*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrelu_ukernel__wasmsimd_x16,
5953*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 16,
5954*4bdc9457SAndroid Build Coastguard Worker     };
5955*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.rndne = (struct vunary_parameters) {
5956*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndne_ukernel__wasmsimd_x8,
5957*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
5958*4bdc9457SAndroid Build Coastguard Worker     };
5959*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.rndz = (struct vunary_parameters) {
5960*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndz_ukernel__wasmsimd_x8,
5961*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
5962*4bdc9457SAndroid Build Coastguard Worker     };
5963*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.rndu = (struct vunary_parameters) {
5964*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndu_ukernel__wasmsimd_x8,
5965*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
5966*4bdc9457SAndroid Build Coastguard Worker     };
5967*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.rndd = (struct vunary_parameters) {
5968*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndd_ukernel__wasmsimd_x8,
5969*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
5970*4bdc9457SAndroid Build Coastguard Worker     };
5971*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.sigmoid = (struct vunary_parameters) {
5972*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x16,
5973*4bdc9457SAndroid Build Coastguard Worker       .init.f32_sigmoid = xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params,
5974*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 16,
5975*4bdc9457SAndroid Build Coastguard Worker     };
5976*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.sqr = (struct vunary_parameters) {
5977*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsqr_ukernel__wasmsimd_x8,
5978*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 16,
5979*4bdc9457SAndroid Build Coastguard Worker     };
5980*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.sqrt = (struct vunary_parameters) {
5981*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x8,
5982*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
5983*4bdc9457SAndroid Build Coastguard Worker     };
5984*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
5985*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.prelu = (struct prelu_parameters) {
5986*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8,
5987*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 2,
5988*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 8,
5989*4bdc9457SAndroid Build Coastguard Worker       };
5990*4bdc9457SAndroid Build Coastguard Worker     } else {
5991*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.prelu = (struct prelu_parameters) {
5992*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8,
5993*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 2,
5994*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 8,
5995*4bdc9457SAndroid Build Coastguard Worker       };
5996*4bdc9457SAndroid Build Coastguard Worker     }
5997*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.raddstoreexpminusmax = (struct raddstoreexpminusmax_parameters) {
5998*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_raddstoreexpminusmax_ukernel_function) xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x16_acc2,
5999*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_expminus_wasmsimd_rr2_p5_params,
6000*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 16,
6001*4bdc9457SAndroid Build Coastguard Worker     };
6002*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
6003*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.rmax = (xnn_rmax_ukernel_function) xnn_f32_rmax_ukernel__wasmsimd_x86;
6004*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vadd = (struct vbinary_parameters) {
6005*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_minmax_ukernel__wasmsimd_x86_x16,
6006*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_minmax_ukernel__wasmsimd_x86_x16,
6007*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_minmax_ukernel__wasmsimd_x86_x16,
6008*4bdc9457SAndroid Build Coastguard Worker         .linear.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_ukernel__wasmsimd_x16,
6009*4bdc9457SAndroid Build Coastguard Worker         .linear.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__wasmsimd_x16,
6010*4bdc9457SAndroid Build Coastguard Worker         .linear.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__wasmsimd_x16,
6011*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_wasmsimd_params,
6012*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
6013*4bdc9457SAndroid Build Coastguard Worker       };
6014*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vdiv = (struct vbinary_parameters) {
6015*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_minmax_ukernel__wasmsimd_x86_x16,
6016*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_minmax_ukernel__wasmsimd_x86_x16,
6017*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_minmax_ukernel__wasmsimd_x86_x16,
6018*4bdc9457SAndroid Build Coastguard Worker         .linear.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_ukernel__wasmsimd_x16,
6019*4bdc9457SAndroid Build Coastguard Worker         .linear.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_ukernel__wasmsimd_x16,
6020*4bdc9457SAndroid Build Coastguard Worker         .linear.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_ukernel__wasmsimd_x16,
6021*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_wasmsimd_params,
6022*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
6023*4bdc9457SAndroid Build Coastguard Worker       };
6024*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmax = (struct vbinary_parameters) {
6025*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__wasmsimd_x86_x16,
6026*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__wasmsimd_x86_x16,
6027*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__wasmsimd_x86_x16,
6028*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
6029*4bdc9457SAndroid Build Coastguard Worker       };
6030*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmin = (struct vbinary_parameters) {
6031*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmin_ukernel__wasmsimd_x86_x16,
6032*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__wasmsimd_x86_x16,
6033*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__wasmsimd_x86_x16,
6034*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
6035*4bdc9457SAndroid Build Coastguard Worker       };
6036*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmul = (struct vbinary_parameters) {
6037*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_minmax_ukernel__wasmsimd_x86_x16,
6038*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_minmax_ukernel__wasmsimd_x86_x16,
6039*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_minmax_ukernel__wasmsimd_x86_x16,
6040*4bdc9457SAndroid Build Coastguard Worker         .linear.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_ukernel__wasmsimd_x16,
6041*4bdc9457SAndroid Build Coastguard Worker         .linear.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__wasmsimd_x16,
6042*4bdc9457SAndroid Build Coastguard Worker         .linear.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__wasmsimd_x16,
6043*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_wasmsimd_params,
6044*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
6045*4bdc9457SAndroid Build Coastguard Worker       };
6046*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vsub = (struct vbinary_parameters) {
6047*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_minmax_ukernel__wasmsimd_x86_x16,
6048*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_minmax_ukernel__wasmsimd_x86_x16,
6049*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_minmax_ukernel__wasmsimd_x86_x16,
6050*4bdc9457SAndroid Build Coastguard Worker         .linear.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_ukernel__wasmsimd_x16,
6051*4bdc9457SAndroid Build Coastguard Worker         .linear.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_ukernel__wasmsimd_x16,
6052*4bdc9457SAndroid Build Coastguard Worker         .linear.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_ukernel__wasmsimd_x16,
6053*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_wasmsimd_params,
6054*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
6055*4bdc9457SAndroid Build Coastguard Worker       };
6056*4bdc9457SAndroid Build Coastguard Worker     } else {
6057*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.rmax = (xnn_rmax_ukernel_function) xnn_f32_rmax_ukernel__wasmsimd_arm;
6058*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vadd = (struct vbinary_parameters) {
6059*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_minmax_ukernel__wasmsimd_arm_x16,
6060*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_minmax_ukernel__wasmsimd_arm_x16,
6061*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_minmax_ukernel__wasmsimd_arm_x16,
6062*4bdc9457SAndroid Build Coastguard Worker         .linear.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_ukernel__wasmsimd_x16,
6063*4bdc9457SAndroid Build Coastguard Worker         .linear.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__wasmsimd_x16,
6064*4bdc9457SAndroid Build Coastguard Worker         .linear.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__wasmsimd_x16,
6065*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_wasmsimd_params,
6066*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
6067*4bdc9457SAndroid Build Coastguard Worker       };
6068*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vdiv = (struct vbinary_parameters) {
6069*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_minmax_ukernel__wasmsimd_arm_x16,
6070*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_minmax_ukernel__wasmsimd_arm_x16,
6071*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_minmax_ukernel__wasmsimd_arm_x16,
6072*4bdc9457SAndroid Build Coastguard Worker         .linear.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_ukernel__wasmsimd_x16,
6073*4bdc9457SAndroid Build Coastguard Worker         .linear.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_ukernel__wasmsimd_x16,
6074*4bdc9457SAndroid Build Coastguard Worker         .linear.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_ukernel__wasmsimd_x16,
6075*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_wasmsimd_params,
6076*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
6077*4bdc9457SAndroid Build Coastguard Worker       };
6078*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmax = (struct vbinary_parameters) {
6079*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__wasmsimd_arm_x16,
6080*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__wasmsimd_arm_x16,
6081*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__wasmsimd_arm_x16,
6082*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
6083*4bdc9457SAndroid Build Coastguard Worker       };
6084*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmin = (struct vbinary_parameters) {
6085*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmin_ukernel__wasmsimd_arm_x16,
6086*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__wasmsimd_arm_x16,
6087*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__wasmsimd_arm_x16,
6088*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
6089*4bdc9457SAndroid Build Coastguard Worker       };
6090*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmul = (struct vbinary_parameters) {
6091*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_minmax_ukernel__wasmsimd_arm_x16,
6092*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_minmax_ukernel__wasmsimd_arm_x16,
6093*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_minmax_ukernel__wasmsimd_arm_x16,
6094*4bdc9457SAndroid Build Coastguard Worker         .linear.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_ukernel__wasmsimd_x16,
6095*4bdc9457SAndroid Build Coastguard Worker         .linear.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__wasmsimd_x16,
6096*4bdc9457SAndroid Build Coastguard Worker         .linear.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__wasmsimd_x16,
6097*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_wasmsimd_params,
6098*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
6099*4bdc9457SAndroid Build Coastguard Worker       };
6100*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vsub = (struct vbinary_parameters) {
6101*4bdc9457SAndroid Build Coastguard Worker         .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_minmax_ukernel__wasmsimd_arm_x16,
6102*4bdc9457SAndroid Build Coastguard Worker         .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_minmax_ukernel__wasmsimd_arm_x16,
6103*4bdc9457SAndroid Build Coastguard Worker         .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_minmax_ukernel__wasmsimd_arm_x16,
6104*4bdc9457SAndroid Build Coastguard Worker         .linear.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_ukernel__wasmsimd_x16,
6105*4bdc9457SAndroid Build Coastguard Worker         .linear.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_ukernel__wasmsimd_x16,
6106*4bdc9457SAndroid Build Coastguard Worker         .linear.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_ukernel__wasmsimd_x16,
6107*4bdc9457SAndroid Build Coastguard Worker         .init.f32_minmax = xnn_init_f32_minmax_wasmsimd_params,
6108*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
6109*4bdc9457SAndroid Build Coastguard Worker       };
6110*4bdc9457SAndroid Build Coastguard Worker     }
6111*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vsqrdiff = (struct vbinary_parameters) {
6112*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiff_ukernel__wasmsimd_x16,
6113*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiffc_ukernel__wasmsimd_x16,
6114*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiffc_ukernel__wasmsimd_x16,
6115*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 16,
6116*4bdc9457SAndroid Build Coastguard Worker     };
6117*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
6118*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
6119*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x,
6120*4bdc9457SAndroid Build Coastguard Worker         .init.f32 = xnn_init_f32_minmax_wasmsimd_params,
6121*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 4,
6122*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 2,
6123*4bdc9457SAndroid Build Coastguard Worker       };
6124*4bdc9457SAndroid Build Coastguard Worker     } else {
6125*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
6126*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x,
6127*4bdc9457SAndroid Build Coastguard Worker         .init.f32 = xnn_init_f32_minmax_wasmsimd_params,
6128*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 4,
6129*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 2,
6130*4bdc9457SAndroid Build Coastguard Worker       };
6131*4bdc9457SAndroid Build Coastguard Worker     }
6132*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_NCHW_OPERATORS
6133*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_CHW_OPT;
6134*4bdc9457SAndroid Build Coastguard Worker 
6135*4bdc9457SAndroid Build Coastguard Worker       if (is_wasm_x86) {
6136*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.spmm = (struct spmm_parameters) {
6137*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86,
6138*4bdc9457SAndroid Build Coastguard Worker           .mr = 32,
6139*4bdc9457SAndroid Build Coastguard Worker           .nr = 1,
6140*4bdc9457SAndroid Build Coastguard Worker         };
6141*4bdc9457SAndroid Build Coastguard Worker       } else {
6142*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.spmm = (struct spmm_parameters) {
6143*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm,
6144*4bdc9457SAndroid Build Coastguard Worker           .mr = 32,
6145*4bdc9457SAndroid Build Coastguard Worker           .nr = 1,
6146*4bdc9457SAndroid Build Coastguard Worker         };
6147*4bdc9457SAndroid Build Coastguard Worker       }
6148*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.conv_hwc2chw_3x3c3s2 = (struct conv_hwc2chw_parameters) {
6149*4bdc9457SAndroid Build Coastguard Worker         .ukernel_with_symm_padding =
6150*4bdc9457SAndroid Build Coastguard Worker           (xnn_conv_hwc2chw_ukernel_function) xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2,
6151*4bdc9457SAndroid Build Coastguard Worker         .output_channel_tile = 4,
6152*4bdc9457SAndroid Build Coastguard Worker         .output_height_tile = 2,
6153*4bdc9457SAndroid Build Coastguard Worker         .output_width_tile = 2,
6154*4bdc9457SAndroid Build Coastguard Worker       };
6155*4bdc9457SAndroid Build Coastguard Worker       if (is_wasm_x86) {
6156*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv2d_chw_3x3 = (struct dwconv2d_chw_parameters) {
6157*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_2x4,
6158*4bdc9457SAndroid Build Coastguard Worker           .output_width_tile = 4,
6159*4bdc9457SAndroid Build Coastguard Worker           .output_height_tile = 2,
6160*4bdc9457SAndroid Build Coastguard Worker         };
6161*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv2d_chw_3x3s2 = (struct dwconv2d_chw_parameters) {
6162*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2,
6163*4bdc9457SAndroid Build Coastguard Worker           .output_width_tile = 4,
6164*4bdc9457SAndroid Build Coastguard Worker           .output_height_tile = 1,
6165*4bdc9457SAndroid Build Coastguard Worker         };
6166*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv2d_chw_5x5 = (struct dwconv2d_chw_parameters) {
6167*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4,
6168*4bdc9457SAndroid Build Coastguard Worker           .output_width_tile = 4,
6169*4bdc9457SAndroid Build Coastguard Worker           .output_height_tile = 3,
6170*4bdc9457SAndroid Build Coastguard Worker         };
6171*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv2d_chw_5x5s2 = (struct dwconv2d_chw_parameters) {
6172*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc2,
6173*4bdc9457SAndroid Build Coastguard Worker           .output_width_tile = 4,
6174*4bdc9457SAndroid Build Coastguard Worker           .output_height_tile = 1,
6175*4bdc9457SAndroid Build Coastguard Worker         };
6176*4bdc9457SAndroid Build Coastguard Worker       } else {
6177*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv2d_chw_3x3 = (struct dwconv2d_chw_parameters) {
6178*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_2x4,
6179*4bdc9457SAndroid Build Coastguard Worker           .output_width_tile = 4,
6180*4bdc9457SAndroid Build Coastguard Worker           .output_height_tile = 2,
6181*4bdc9457SAndroid Build Coastguard Worker         };
6182*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv2d_chw_3x3s2 = (struct dwconv2d_chw_parameters) {
6183*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc4,
6184*4bdc9457SAndroid Build Coastguard Worker           .output_width_tile = 4,
6185*4bdc9457SAndroid Build Coastguard Worker           .output_height_tile = 1,
6186*4bdc9457SAndroid Build Coastguard Worker         };
6187*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv2d_chw_5x5 = (struct dwconv2d_chw_parameters) {
6188*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4,
6189*4bdc9457SAndroid Build Coastguard Worker           .output_width_tile = 4,
6190*4bdc9457SAndroid Build Coastguard Worker           .output_height_tile = 3,
6191*4bdc9457SAndroid Build Coastguard Worker         };
6192*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.dwconv2d_chw_5x5s2 = (struct dwconv2d_chw_parameters) {
6193*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc2,
6194*4bdc9457SAndroid Build Coastguard Worker           .output_width_tile = 4,
6195*4bdc9457SAndroid Build Coastguard Worker           .output_height_tile = 1,
6196*4bdc9457SAndroid Build Coastguard Worker         };
6197*4bdc9457SAndroid Build Coastguard Worker       }
6198*4bdc9457SAndroid Build Coastguard Worker       if (is_wasm_x86) {
6199*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gavgpool_cw = (struct gavgpool_cw_parameters) {
6200*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_gavgpool_cw_ukernel_function) xnn_f32_gavgpool_cw_ukernel__wasmsimd_x86_x4,
6201*4bdc9457SAndroid Build Coastguard Worker           .channel_tile = 4,
6202*4bdc9457SAndroid Build Coastguard Worker         };
6203*4bdc9457SAndroid Build Coastguard Worker       } else {
6204*4bdc9457SAndroid Build Coastguard Worker         xnn_params.f32.gavgpool_cw = (struct gavgpool_cw_parameters) {
6205*4bdc9457SAndroid Build Coastguard Worker           .ukernel = (xnn_gavgpool_cw_ukernel_function) xnn_f32_gavgpool_cw_ukernel__wasmsimd_arm_x4,
6206*4bdc9457SAndroid Build Coastguard Worker           .channel_tile = 4,
6207*4bdc9457SAndroid Build Coastguard Worker         };
6208*4bdc9457SAndroid Build Coastguard Worker       }
6209*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.ibilinear_chw = (struct ibilinear_chw_parameters) {
6210*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_ibilinear_chw_ukernel_function) xnn_f32_ibilinear_chw_ukernel__wasmsimd_p8,
6211*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 1,
6212*4bdc9457SAndroid Build Coastguard Worker         .pixel_tile = 8,
6213*4bdc9457SAndroid Build Coastguard Worker       };
6214*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_NCHW_OPERATORS
6215*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_F32_OPERATORS
6216*4bdc9457SAndroid Build Coastguard Worker 
6217*4bdc9457SAndroid Build Coastguard Worker   /*************************** VCVT WAsm SIMD micro-kernels***************************/
6218*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_VCVT_OPERATORS
6219*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_VCVT;
6220*4bdc9457SAndroid Build Coastguard Worker 
6221*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.f16_to_f32 = (struct vunary_parameters) {
6222*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x16,
6223*4bdc9457SAndroid Build Coastguard Worker       .init.f16_f32_cvt = xnn_init_f16_f32_cvt_wasmsimd_int16_params,
6224*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 16,
6225*4bdc9457SAndroid Build Coastguard Worker     };
6226*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.f32_to_f16 = (struct vunary_parameters) {
6227*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_f16_vcvt_ukernel__wasmsimd_x24,
6228*4bdc9457SAndroid Build Coastguard Worker       .init.f32_f16_cvt = xnn_init_f32_f16_cvt_wasmsimd_params,
6229*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 24,
6230*4bdc9457SAndroid Build Coastguard Worker     };
6231*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.f32_to_qs8 = (struct vunary_parameters) {
6232*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x32,
6233*4bdc9457SAndroid Build Coastguard Worker       .init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_wasmsimd_magic_params,
6234*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 32,
6235*4bdc9457SAndroid Build Coastguard Worker     };
6236*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.f32_to_qu8 = (struct vunary_parameters) {
6237*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x32,
6238*4bdc9457SAndroid Build Coastguard Worker       .init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_wasmsimd_magic_params,
6239*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 32,
6240*4bdc9457SAndroid Build Coastguard Worker     };
6241*4bdc9457SAndroid Build Coastguard Worker     #if XNN_ARCH_WASMRELAXEDSIMD
6242*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qs8 = (struct vunary_parameters) {
6243*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vcvt_ukernel__wasmrelaxedsimd_x32,
6244*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_cvt = xnn_init_qs8_cvt_wasmsimd_params,
6245*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
6246*4bdc9457SAndroid Build Coastguard Worker       };
6247*4bdc9457SAndroid Build Coastguard Worker     #else
6248*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qs8 = (struct vunary_parameters) {
6249*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vcvt_ukernel__wasmsimd_x16,
6250*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_cvt = xnn_init_qs8_cvt_wasmsimd_params,
6251*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
6252*4bdc9457SAndroid Build Coastguard Worker       };
6253*4bdc9457SAndroid Build Coastguard Worker     #endif
6254*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.qs8_to_f32 = (struct vunary_parameters) {
6255*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_f32_vcvt_ukernel__wasmsimd_x32,
6256*4bdc9457SAndroid Build Coastguard Worker       .init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_wasmsimd_params,
6257*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 32,
6258*4bdc9457SAndroid Build Coastguard Worker     };
6259*4bdc9457SAndroid Build Coastguard Worker     #if XNN_ARCH_WASMRELAXEDSIMD
6260*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qu8 = (struct vunary_parameters) {
6261*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vcvt_ukernel__wasmrelaxedsimd_x32,
6262*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_cvt = xnn_init_qu8_cvt_wasmsimd_params,
6263*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 32,
6264*4bdc9457SAndroid Build Coastguard Worker       };
6265*4bdc9457SAndroid Build Coastguard Worker     #else
6266*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qu8 = (struct vunary_parameters) {
6267*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vcvt_ukernel__wasmsimd_x16,
6268*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_cvt = xnn_init_qu8_cvt_wasmsimd_params,
6269*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 16,
6270*4bdc9457SAndroid Build Coastguard Worker       };
6271*4bdc9457SAndroid Build Coastguard Worker     #endif
6272*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.qu8_to_f32 = (struct vunary_parameters) {
6273*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_f32_vcvt_ukernel__wasmsimd_x32,
6274*4bdc9457SAndroid Build Coastguard Worker       .init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_wasmsimd_params,
6275*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 32,
6276*4bdc9457SAndroid Build Coastguard Worker     };
6277*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_VCVT_OPERATORS
6278*4bdc9457SAndroid Build Coastguard Worker 
6279*4bdc9457SAndroid Build Coastguard Worker   /**************************** X32 WAsm SIMD micro-kernels****************************/
6280*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_X32_OPERATORS
6281*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_X32;
6282*4bdc9457SAndroid Build Coastguard Worker 
6283*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__wasmsimd;
6284*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x32.zip = (struct zip_parameters) {
6285*4bdc9457SAndroid Build Coastguard Worker       .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__wasmsimd,
6286*4bdc9457SAndroid Build Coastguard Worker       .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__wasmsimd,
6287*4bdc9457SAndroid Build Coastguard Worker       .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__wasmsimd,
6288*4bdc9457SAndroid Build Coastguard Worker       .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__wasmsimd,
6289*4bdc9457SAndroid Build Coastguard Worker     };
6290*4bdc9457SAndroid Build Coastguard Worker 
6291*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x32.transpose = (struct transpose_parameters) {
6292*4bdc9457SAndroid Build Coastguard Worker       .const_size_ukernel = (xnn_transposec_ukernel_function) xnn_x32_transposec_ukernel__2x4_scalar_int,
6293*4bdc9457SAndroid Build Coastguard Worker       .tile_size = 32,
6294*4bdc9457SAndroid Build Coastguard Worker     };
6295*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_X32_OPERATORS
6296*4bdc9457SAndroid Build Coastguard Worker 
6297*4bdc9457SAndroid Build Coastguard Worker   /**************************** XX WAsm SIMD micro-kernels****************************/
6298*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_XX_OPERATORS
6299*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_XX;
6300*4bdc9457SAndroid Build Coastguard Worker 
6301*4bdc9457SAndroid Build Coastguard Worker     xnn_params.xx.copy = (xnn_vunary_ukernel_function) xnn_xx_copy_ukernel__memcpy;
6302*4bdc9457SAndroid Build Coastguard Worker     xnn_params.xx.fill = (struct fill_parameters) {
6303*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_fill_ukernel_function) xnn_xx_fill_ukernel__wasmsimd_x64,
6304*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 1,
6305*4bdc9457SAndroid Build Coastguard Worker     };
6306*4bdc9457SAndroid Build Coastguard Worker     xnn_params.xx.pad = (struct pad_parameters) {
6307*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_pad_ukernel_function) xnn_xx_pad_ukernel__wasmsimd,
6308*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 1,
6309*4bdc9457SAndroid Build Coastguard Worker     };
6310*4bdc9457SAndroid Build Coastguard Worker     xnn_params.xx.transpose = (struct transpose_parameters) {
6311*4bdc9457SAndroid Build Coastguard Worker       .variable_size_ukernel = xnn_xx_transposev_ukernel__1x1_memcpy,
6312*4bdc9457SAndroid Build Coastguard Worker       .tile_size = 32,
6313*4bdc9457SAndroid Build Coastguard Worker     };
6314*4bdc9457SAndroid Build Coastguard Worker   #endif
6315*4bdc9457SAndroid Build Coastguard Worker 
6316*4bdc9457SAndroid Build Coastguard Worker #elif XNN_ARCH_WASM
6317*4bdc9457SAndroid Build Coastguard Worker 
6318*4bdc9457SAndroid Build Coastguard Worker   /**************************** QC8 WAsm micro-kernels****************************/
6319*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_QC8_OPERATORS
6320*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_QC8;
6321*4bdc9457SAndroid Build Coastguard Worker 
6322*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
6323*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic);
6324*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic);
6325*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic);
6326*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic);
6327*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params;
6328*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.mr = 2;
6329*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.nr = 2;
6330*4bdc9457SAndroid Build Coastguard Worker     } else {
6331*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic);
6332*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic);
6333*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic);
6334*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic);
6335*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params;
6336*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.mr = 4;
6337*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.gemm.nr = 4;
6338*4bdc9457SAndroid Build Coastguard Worker     }
6339*4bdc9457SAndroid Build Coastguard Worker 
6340*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
6341*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_imagic;
6342*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].init.qc8 = xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params;
6343*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].channel_tile = 2;
6344*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].primary_tile = 3;
6345*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic;
6346*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].init.qc8 = xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params;
6347*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].channel_tile = 2;
6348*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].primary_tile = 9;
6349*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic;
6350*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].init.qc8 = xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params;
6351*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].channel_tile = 1;
6352*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].primary_tile = 25;
6353*4bdc9457SAndroid Build Coastguard Worker     } else {
6354*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__wasm_fmagic;
6355*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].init.qc8 = xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params;
6356*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].channel_tile = 2;
6357*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[0].primary_tile = 3;
6358*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic;
6359*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].init.qc8 = xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params;
6360*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].channel_tile = 2;
6361*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[1].primary_tile = 9;
6362*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic;
6363*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].init.qc8 = xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params;
6364*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].channel_tile = 2;
6365*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qc8.dwconv[2].primary_tile = 25;
6366*4bdc9457SAndroid Build Coastguard Worker     }
6367*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_QC8_OPERATORS
6368*4bdc9457SAndroid Build Coastguard Worker 
6369*4bdc9457SAndroid Build Coastguard Worker   /**************************** QS8 WAsm micro-kernels****************************/
6370*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_QS8_OPERATORS
6371*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_QS8;
6372*4bdc9457SAndroid Build Coastguard Worker 
6373*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
6374*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic);
6375*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic);
6376*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic);
6377*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic);
6378*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params;
6379*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.mr = 2;
6380*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.nr = 2;
6381*4bdc9457SAndroid Build Coastguard Worker     } else {
6382*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic);
6383*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic);
6384*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic);
6385*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic);
6386*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params;
6387*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.mr = 4;
6388*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.gemm.nr = 4;
6389*4bdc9457SAndroid Build Coastguard Worker     }
6390*4bdc9457SAndroid Build Coastguard Worker 
6391*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
6392*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic;
6393*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params;
6394*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].channel_tile = 2;
6395*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].primary_tile = 9;
6396*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic;
6397*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params;
6398*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].channel_tile = 1;
6399*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].primary_tile = 25;
6400*4bdc9457SAndroid Build Coastguard Worker     } else {
6401*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic;
6402*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params;
6403*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].channel_tile = 2;
6404*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[0].primary_tile = 9;
6405*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic;
6406*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params;
6407*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].channel_tile = 2;
6408*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.dwconv[1].primary_tile = 25;
6409*4bdc9457SAndroid Build Coastguard Worker     }
6410*4bdc9457SAndroid Build Coastguard Worker 
6411*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.gavgpool = (struct gavgpool_parameters) {
6412*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__scalar_imagic_c4,
6413*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__scalar_imagic_c4,
6414*4bdc9457SAndroid Build Coastguard Worker       .init.qs8 = xnn_init_qs8_avgpool_minmax_fp32_scalar_imagic_params,
6415*4bdc9457SAndroid Build Coastguard Worker       .update.qs8 = xnn_update_qs8_avgpool_minmax_fp32_scalar_imagic_params,
6416*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 7,
6417*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 4,
6418*4bdc9457SAndroid Build Coastguard Worker     };
6419*4bdc9457SAndroid Build Coastguard Worker 
6420*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.vadd = (struct vbinary_parameters) {
6421*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vadd_minmax_ukernel__scalar_x4,
6422*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vaddc_minmax_ukernel__scalar_x4,
6423*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vaddc_minmax_ukernel__scalar_x4,
6424*4bdc9457SAndroid Build Coastguard Worker       .init.qs8_add = xnn_init_qs8_add_minmax_scalar_params,
6425*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
6426*4bdc9457SAndroid Build Coastguard Worker     };
6427*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.vmul = (struct vbinary_parameters) {
6428*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4,
6429*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4,
6430*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4,
6431*4bdc9457SAndroid Build Coastguard Worker       .init.qs8_mul = xnn_init_qs8_mul_minmax_fp32_scalar_params,
6432*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
6433*4bdc9457SAndroid Build Coastguard Worker     };
6434*4bdc9457SAndroid Build Coastguard Worker 
6435*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
6436*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.lrelu = (struct vunary_parameters) {
6437*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vlrelu_ukernel__scalar_select_x4,
6438*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_lrelu = xnn_init_qs8_lrelu_scalar_select_params,
6439*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
6440*4bdc9457SAndroid Build Coastguard Worker       };
6441*4bdc9457SAndroid Build Coastguard Worker     } else {
6442*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qs8.lrelu = (struct vunary_parameters) {
6443*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vlrelu_ukernel__scalar_andxor_x4,
6444*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_lrelu = xnn_init_qs8_lrelu_scalar_andxor_params,
6445*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
6446*4bdc9457SAndroid Build Coastguard Worker       };
6447*4bdc9457SAndroid Build Coastguard Worker     }
6448*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_QS8_OPERATORS
6449*4bdc9457SAndroid Build Coastguard Worker 
6450*4bdc9457SAndroid Build Coastguard Worker   /**************************** QU8 WAsm micro-kernels****************************/
6451*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_QU8_OPERATORS
6452*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_QU8;
6453*4bdc9457SAndroid Build Coastguard Worker 
6454*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
6455*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic);
6456*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic);
6457*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic);
6458*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic);
6459*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params;
6460*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.mr = 2;
6461*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.nr = 2;
6462*4bdc9457SAndroid Build Coastguard Worker     } else {
6463*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic);
6464*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic);
6465*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic);
6466*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic);
6467*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params;
6468*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.mr = 4;
6469*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.gemm.nr = 4;
6470*4bdc9457SAndroid Build Coastguard Worker     }
6471*4bdc9457SAndroid Build Coastguard Worker 
6472*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
6473*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic;
6474*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].init.qu8 = xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params;
6475*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].channel_tile = 2;
6476*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].primary_tile = 9;
6477*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic;
6478*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].init.qu8 = xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params;
6479*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].channel_tile = 1;
6480*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].primary_tile = 25;
6481*4bdc9457SAndroid Build Coastguard Worker     } else {
6482*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic;
6483*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].init.qu8 = xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params;
6484*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].channel_tile = 2;
6485*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[0].primary_tile = 9;
6486*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic;
6487*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].init.qu8 = xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params;
6488*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].channel_tile = 2;
6489*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[1].primary_tile = 25;
6490*4bdc9457SAndroid Build Coastguard Worker     }
6491*4bdc9457SAndroid Build Coastguard Worker 
6492*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.avgpool = (struct avgpool_parameters) {
6493*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_avgpool_unipass_ukernel_function) xnn_qu8_avgpool_minmax_ukernel_9x__scalar_c1,
6494*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_avgpool_multipass_ukernel_function) xnn_qu8_avgpool_minmax_ukernel_9p8x__scalar_c1,
6495*4bdc9457SAndroid Build Coastguard Worker       .init.qu8 = xnn_init_qu8_avgpool_minmax_scalar_params,
6496*4bdc9457SAndroid Build Coastguard Worker       .primary_tile = 9,
6497*4bdc9457SAndroid Build Coastguard Worker       .incremental_tile = 8,
6498*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 1,
6499*4bdc9457SAndroid Build Coastguard Worker     };
6500*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.gavgpool = (struct gavgpool_parameters) {
6501*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__scalar_imagic_c4,
6502*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__scalar_imagic_c4,
6503*4bdc9457SAndroid Build Coastguard Worker       .init.qu8 = xnn_init_qu8_avgpool_minmax_fp32_scalar_imagic_params,
6504*4bdc9457SAndroid Build Coastguard Worker       .update.qu8 = xnn_update_qu8_avgpool_minmax_fp32_scalar_imagic_params,
6505*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 7,
6506*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 4,
6507*4bdc9457SAndroid Build Coastguard Worker     };
6508*4bdc9457SAndroid Build Coastguard Worker 
6509*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.vadd = (struct vbinary_parameters) {
6510*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vadd_minmax_ukernel__scalar_x4,
6511*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vaddc_minmax_ukernel__scalar_x4,
6512*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vaddc_minmax_ukernel__scalar_x4,
6513*4bdc9457SAndroid Build Coastguard Worker       .init.qu8_add = xnn_init_qu8_add_minmax_scalar_params,
6514*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
6515*4bdc9457SAndroid Build Coastguard Worker     };
6516*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.vmul = (struct vbinary_parameters) {
6517*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmul_minmax_fp32_ukernel__scalar_x4,
6518*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmulc_minmax_fp32_ukernel__scalar_x4,
6519*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmulc_minmax_fp32_ukernel__scalar_x4,
6520*4bdc9457SAndroid Build Coastguard Worker       .init.qu8_mul = xnn_init_qu8_mul_minmax_fp32_scalar_params,
6521*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
6522*4bdc9457SAndroid Build Coastguard Worker     };
6523*4bdc9457SAndroid Build Coastguard Worker 
6524*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
6525*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.lrelu = (struct vunary_parameters) {
6526*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vlrelu_ukernel__scalar_select_x4,
6527*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_lrelu = xnn_init_qu8_lrelu_scalar_select_params,
6528*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
6529*4bdc9457SAndroid Build Coastguard Worker       };
6530*4bdc9457SAndroid Build Coastguard Worker     } else {
6531*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.lrelu = (struct vunary_parameters) {
6532*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vlrelu_ukernel__scalar_andxor_x4,
6533*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_lrelu = xnn_init_qu8_lrelu_scalar_andxor_params,
6534*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
6535*4bdc9457SAndroid Build Coastguard Worker       };
6536*4bdc9457SAndroid Build Coastguard Worker     }
6537*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_QU8_OPERATORS
6538*4bdc9457SAndroid Build Coastguard Worker 
6539*4bdc9457SAndroid Build Coastguard Worker   /**************************** S8 WAsm micro-kernels****************************/
6540*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_S8_OPERATORS
6541*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_S8;
6542*4bdc9457SAndroid Build Coastguard Worker 
6543*4bdc9457SAndroid Build Coastguard Worker     xnn_params.s8.clamp = (struct vunary_parameters) {
6544*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_s8_vclamp_ukernel__scalar_x4,
6545*4bdc9457SAndroid Build Coastguard Worker       .init.s8_minmax = xnn_init_s8_minmax_scalar_params,
6546*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
6547*4bdc9457SAndroid Build Coastguard Worker     };
6548*4bdc9457SAndroid Build Coastguard Worker     xnn_params.s8.ibilinear = (struct ibilinear_parameters) {
6549*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_ibilinear_ukernel_function) xnn_s8_ibilinear_ukernel__scalar_c1,
6550*4bdc9457SAndroid Build Coastguard Worker       .pixel_tile = 1,
6551*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 1,
6552*4bdc9457SAndroid Build Coastguard Worker     };
6553*4bdc9457SAndroid Build Coastguard Worker     xnn_params.s8.maxpool = (struct maxpool_parameters) {
6554*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_maxpool_ukernel_function) xnn_s8_maxpool_minmax_ukernel_9p8x__scalar_c1,
6555*4bdc9457SAndroid Build Coastguard Worker       .init.s8 = xnn_init_s8_minmax_scalar_params,
6556*4bdc9457SAndroid Build Coastguard Worker       .mr = 9,
6557*4bdc9457SAndroid Build Coastguard Worker       .qr = 8,
6558*4bdc9457SAndroid Build Coastguard Worker     };
6559*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_S8_OPERATORS
6560*4bdc9457SAndroid Build Coastguard Worker 
6561*4bdc9457SAndroid Build Coastguard Worker   /**************************** U8 WAsm micro-kernels****************************/
6562*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_U8_OPERATORS
6563*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_U8;
6564*4bdc9457SAndroid Build Coastguard Worker 
6565*4bdc9457SAndroid Build Coastguard Worker     xnn_params.u8.clamp = (struct vunary_parameters) {
6566*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_u8_vclamp_ukernel__scalar_x4,
6567*4bdc9457SAndroid Build Coastguard Worker       .init.u8_minmax = xnn_init_u8_minmax_scalar_params,
6568*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
6569*4bdc9457SAndroid Build Coastguard Worker     };
6570*4bdc9457SAndroid Build Coastguard Worker     xnn_params.u8.ibilinear = (struct ibilinear_parameters) {
6571*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_ibilinear_ukernel_function) xnn_u8_ibilinear_ukernel__scalar_c1,
6572*4bdc9457SAndroid Build Coastguard Worker       .pixel_tile = 1,
6573*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 1,
6574*4bdc9457SAndroid Build Coastguard Worker     };
6575*4bdc9457SAndroid Build Coastguard Worker     xnn_params.u8.maxpool = (struct maxpool_parameters) {
6576*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_minmax_ukernel_9p8x__scalar_c1,
6577*4bdc9457SAndroid Build Coastguard Worker       .init.u8 = xnn_init_u8_minmax_scalar_params,
6578*4bdc9457SAndroid Build Coastguard Worker       .mr = 9,
6579*4bdc9457SAndroid Build Coastguard Worker       .qr = 8,
6580*4bdc9457SAndroid Build Coastguard Worker     };
6581*4bdc9457SAndroid Build Coastguard Worker     xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
6582*4bdc9457SAndroid Build Coastguard Worker     xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
6583*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_U8_OPERATORS
6584*4bdc9457SAndroid Build Coastguard Worker 
6585*4bdc9457SAndroid Build Coastguard Worker   /**************************** X8 WAsm micro-kernels****************************/
6586*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_X8_OPERATORS
6587*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_X8;
6588*4bdc9457SAndroid Build Coastguard Worker 
6589*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar_x4;
6590*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x8.zip = (struct zip_parameters) {
6591*4bdc9457SAndroid Build Coastguard Worker       .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
6592*4bdc9457SAndroid Build Coastguard Worker       .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
6593*4bdc9457SAndroid Build Coastguard Worker       .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
6594*4bdc9457SAndroid Build Coastguard Worker       .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
6595*4bdc9457SAndroid Build Coastguard Worker     };
6596*4bdc9457SAndroid Build Coastguard Worker 
6597*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x8.transpose = (struct transpose_parameters) {
6598*4bdc9457SAndroid Build Coastguard Worker       .const_size_ukernel = (xnn_transposec_ukernel_function) xnn_x8_transposec_ukernel__2x4_scalar_int,
6599*4bdc9457SAndroid Build Coastguard Worker       .tile_size = 32,
6600*4bdc9457SAndroid Build Coastguard Worker     };
6601*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_X8_OPERATORS
6602*4bdc9457SAndroid Build Coastguard Worker 
6603*4bdc9457SAndroid Build Coastguard Worker   /**************************** X16 WAsm micro-kernels****************************/
6604*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_X16_OPERATORS
6605*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_X16;
6606*4bdc9457SAndroid Build Coastguard Worker 
6607*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x16.transpose = (struct transpose_parameters) {
6608*4bdc9457SAndroid Build Coastguard Worker       .const_size_ukernel = (xnn_transposec_ukernel_function) xnn_x16_transposec_ukernel__2x4_scalar_int,
6609*4bdc9457SAndroid Build Coastguard Worker       .tile_size = 32,
6610*4bdc9457SAndroid Build Coastguard Worker     };
6611*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_X16_OPERATORS
6612*4bdc9457SAndroid Build Coastguard Worker 
6613*4bdc9457SAndroid Build Coastguard Worker   /**************************** F32 WAsm micro-kernels****************************/
6614*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_F32_OPERATORS
6615*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_F32;
6616*4bdc9457SAndroid Build Coastguard Worker 
6617*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
6618*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_2x4__scalar);
6619*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_2x4__scalar);
6620*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x4__wasm);
6621*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x4__wasm);
6622*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.relu.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_relu_ukernel_2x4__scalar);
6623*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.relu.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_relu_ukernel_2x4__scalar);
6624*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.relu.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_relu_ukernel_1x4__wasm);
6625*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.relu.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_relu_ukernel_1x4__wasm);
6626*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.linear.gemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_2x4__scalar);
6627*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.linear.igemm[XNN_MR_TO_INDEX(2)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_2x4__scalar);
6628*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.linear.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar);
6629*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.linear.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar);
6630*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_scalar_params;
6631*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.mr = 2;
6632*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.nr = 4;
6633*4bdc9457SAndroid Build Coastguard Worker     } else {
6634*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x4__wasm);
6635*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x4__wasm);
6636*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x4__wasm);
6637*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x4__wasm);
6638*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.relu.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_relu_ukernel_4x4__wasm);
6639*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.relu.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_relu_ukernel_4x4__wasm);
6640*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.relu.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_relu_ukernel_1x4__wasm);
6641*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.relu.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_relu_ukernel_1x4__wasm);
6642*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.linear.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x4__scalar);
6643*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.linear.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x4__scalar);
6644*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.linear.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar);
6645*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.linear.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar);
6646*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_scalar_params;
6647*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.mr = 4;
6648*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gemm.nr = 4;
6649*4bdc9457SAndroid Build Coastguard Worker     }
6650*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm2.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x2__wasm);
6651*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm2.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x2__wasm);
6652*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm2.linear.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x2__scalar);
6653*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm2.linear.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__scalar);
6654*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm2.init.f32 = xnn_init_f32_minmax_scalar_params;
6655*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm2.mr = 4;
6656*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm2.nr = 2;
6657*4bdc9457SAndroid Build Coastguard Worker 
6658*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up1x3__wasm_acc2;
6659*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[0].linear.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_ukernel_up1x3__scalar_acc2;
6660*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[0].init.f32 = xnn_init_f32_minmax_scalar_params;
6661*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[0].channel_tile = 1;
6662*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[0].primary_tile = 3;
6663*4bdc9457SAndroid Build Coastguard Worker 
6664*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up1x4__wasm_acc2;
6665*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[1].linear.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_ukernel_up1x4__scalar_acc2;
6666*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[1].init.f32 = xnn_init_f32_minmax_scalar_params;
6667*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[1].channel_tile = 1;
6668*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[1].primary_tile = 4;
6669*4bdc9457SAndroid Build Coastguard Worker 
6670*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up1x9__wasm_acc2;
6671*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[2].linear.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_ukernel_up1x9__scalar_acc2;
6672*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[2].init.f32 = xnn_init_f32_minmax_scalar_params;
6673*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[2].channel_tile = 1;
6674*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[2].primary_tile = 9;
6675*4bdc9457SAndroid Build Coastguard Worker 
6676*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[3].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up1x25__wasm_acc2;
6677*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[3].linear.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_ukernel_up1x25__scalar_acc2;
6678*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[3].init.f32 = xnn_init_f32_minmax_scalar_params;
6679*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[3].channel_tile = 1;
6680*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[3].primary_tile = 25;
6681*4bdc9457SAndroid Build Coastguard Worker 
6682*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.avgpool = (struct avgpool_parameters) {
6683*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_avgpool_unipass_ukernel_function) xnn_f32_avgpool_minmax_ukernel_9x__wasm_c1,
6684*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_avgpool_multipass_ukernel_function) xnn_f32_avgpool_minmax_ukernel_9p8x__wasm_c1,
6685*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_scaleminmax_scalar_params,
6686*4bdc9457SAndroid Build Coastguard Worker       .primary_tile = 9,
6687*4bdc9457SAndroid Build Coastguard Worker       .incremental_tile = 8,
6688*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 1,
6689*4bdc9457SAndroid Build Coastguard Worker     };
6690*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
6691*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_pavgpool_unipass_ukernel_function) xnn_f32_pavgpool_minmax_ukernel_9x__wasm_c1,
6692*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_pavgpool_multipass_ukernel_function) xnn_f32_pavgpool_minmax_ukernel_9p8x__wasm_c1,
6693*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_minmax_scalar_params,
6694*4bdc9457SAndroid Build Coastguard Worker       .primary_tile = 9,
6695*4bdc9457SAndroid Build Coastguard Worker       .incremental_tile = 8,
6696*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 1,
6697*4bdc9457SAndroid Build Coastguard Worker     };
6698*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
6699*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_f32_gavgpool_minmax_ukernel_7x__wasm_c1,
6700*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1,
6701*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_scaleminmax_scalar_params,
6702*4bdc9457SAndroid Build Coastguard Worker       .update.f32 = xnn_update_f32_scaleminmax_scalar_params,
6703*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 7,
6704*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 1,
6705*4bdc9457SAndroid Build Coastguard Worker     };
6706*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.maxpool = (struct maxpool_parameters) {
6707*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_minmax_ukernel_9p8x__wasm_c1,
6708*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_minmax_scalar_params,
6709*4bdc9457SAndroid Build Coastguard Worker       .mr = 9,
6710*4bdc9457SAndroid Build Coastguard Worker       .qr = 8,
6711*4bdc9457SAndroid Build Coastguard Worker     };
6712*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
6713*4bdc9457SAndroid Build Coastguard Worker       .up = (xnn_argmaxpool_unipass_ukernel_function) xnn_f32_argmaxpool_ukernel_4x__scalar_c1,
6714*4bdc9457SAndroid Build Coastguard Worker       .mr = 4,
6715*4bdc9457SAndroid Build Coastguard Worker     };
6716*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
6717*4bdc9457SAndroid Build Coastguard Worker       .up = (xnn_argmaxpool_unipass_ukernel_function) xnn_f32_argmaxpool_ukernel_9x__scalar_c1,
6718*4bdc9457SAndroid Build Coastguard Worker       .mr = 9,
6719*4bdc9457SAndroid Build Coastguard Worker     };
6720*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
6721*4bdc9457SAndroid Build Coastguard Worker       .mp = (xnn_argmaxpool_multipass_ukernel_function) xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1,
6722*4bdc9457SAndroid Build Coastguard Worker       .mr = 9,
6723*4bdc9457SAndroid Build Coastguard Worker       .qr = 8,
6724*4bdc9457SAndroid Build Coastguard Worker     };
6725*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.ibilinear = (struct ibilinear_parameters) {
6726*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_ibilinear_ukernel_function) xnn_f32_ibilinear_ukernel__scalar_c2,
6727*4bdc9457SAndroid Build Coastguard Worker       .pixel_tile = 1,
6728*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 2,
6729*4bdc9457SAndroid Build Coastguard Worker     };
6730*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.abs = (struct vunary_parameters) {
6731*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vabs_ukernel__scalar_x4,
6732*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
6733*4bdc9457SAndroid Build Coastguard Worker     };
6734*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.clamp = (struct vunary_parameters) {
6735*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vclamp_ukernel__wasm_x4,
6736*4bdc9457SAndroid Build Coastguard Worker       .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
6737*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
6738*4bdc9457SAndroid Build Coastguard Worker     };
6739*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
6740*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.hswish = (struct vunary_parameters) {
6741*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vhswish_ukernel__scalar_x4,
6742*4bdc9457SAndroid Build Coastguard Worker         .init.f32_hswish = xnn_init_f32_hswish_scalar_params,
6743*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
6744*4bdc9457SAndroid Build Coastguard Worker       };
6745*4bdc9457SAndroid Build Coastguard Worker     } else {
6746*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.hswish = (struct vunary_parameters) {
6747*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vhswish_ukernel__wasm_x4,
6748*4bdc9457SAndroid Build Coastguard Worker         .init.f32_hswish = xnn_init_f32_hswish_scalar_params,
6749*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
6750*4bdc9457SAndroid Build Coastguard Worker       };
6751*4bdc9457SAndroid Build Coastguard Worker     }
6752*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
6753*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.elu = (struct vunary_parameters) {
6754*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2,
6755*4bdc9457SAndroid Build Coastguard Worker         .init.f32_elu = xnn_init_f32_elu_scalar_rr2_lut16_p3_params,
6756*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 2,
6757*4bdc9457SAndroid Build Coastguard Worker       };
6758*4bdc9457SAndroid Build Coastguard Worker     } else {
6759*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.elu = (struct vunary_parameters) {
6760*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_velu_ukernel__wasm_rr2_p6_x6,
6761*4bdc9457SAndroid Build Coastguard Worker         .init.f32_elu = xnn_init_f32_elu_scalar_rr2_p6_params,
6762*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 6,
6763*4bdc9457SAndroid Build Coastguard Worker       };
6764*4bdc9457SAndroid Build Coastguard Worker     }
6765*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.lrelu = (struct vunary_parameters) {
6766*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vlrelu_ukernel__scalar_x4,
6767*4bdc9457SAndroid Build Coastguard Worker       .init.f32_lrelu = xnn_init_f32_lrelu_scalar_params,
6768*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
6769*4bdc9457SAndroid Build Coastguard Worker     };
6770*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.neg = (struct vunary_parameters) {
6771*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vneg_ukernel__scalar_x4,
6772*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
6773*4bdc9457SAndroid Build Coastguard Worker     };
6774*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
6775*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.relu = (struct vunary_parameters) {
6776*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrelu_ukernel__scalar_x8,
6777*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
6778*4bdc9457SAndroid Build Coastguard Worker       };
6779*4bdc9457SAndroid Build Coastguard Worker     } else {
6780*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.relu = (struct vunary_parameters) {
6781*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrelu_ukernel__wasm_x8,
6782*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 8,
6783*4bdc9457SAndroid Build Coastguard Worker       };
6784*4bdc9457SAndroid Build Coastguard Worker     }
6785*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.rndne = (struct vunary_parameters) {
6786*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndne_ukernel__scalar_libm_x4,
6787*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
6788*4bdc9457SAndroid Build Coastguard Worker     };
6789*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.rndz = (struct vunary_parameters) {
6790*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndz_ukernel__scalar_libm_x4,
6791*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
6792*4bdc9457SAndroid Build Coastguard Worker     };
6793*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.rndu = (struct vunary_parameters) {
6794*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndu_ukernel__scalar_libm_x4,
6795*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
6796*4bdc9457SAndroid Build Coastguard Worker     };
6797*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.rndd = (struct vunary_parameters) {
6798*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndd_ukernel__scalar_libm_x4,
6799*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
6800*4bdc9457SAndroid Build Coastguard Worker     };
6801*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.sigmoid = (struct vunary_parameters) {
6802*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x2,
6803*4bdc9457SAndroid Build Coastguard Worker       .init.f32_sigmoid = xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params,
6804*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 2,
6805*4bdc9457SAndroid Build Coastguard Worker     };
6806*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.sqr = (struct vunary_parameters) {
6807*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsqr_ukernel__scalar_x4,
6808*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
6809*4bdc9457SAndroid Build Coastguard Worker     };
6810*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.sqrt = (struct vunary_parameters) {
6811*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsqrt_ukernel__scalar_sqrt_x1,
6812*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 1,
6813*4bdc9457SAndroid Build Coastguard Worker     };
6814*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
6815*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.prelu = (struct prelu_parameters) {
6816*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__scalar_2x4,
6817*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 2,
6818*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 4,
6819*4bdc9457SAndroid Build Coastguard Worker       };
6820*4bdc9457SAndroid Build Coastguard Worker     } else {
6821*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.prelu = (struct prelu_parameters) {
6822*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__wasm_2x4,
6823*4bdc9457SAndroid Build Coastguard Worker         .row_tile = 2,
6824*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 4,
6825*4bdc9457SAndroid Build Coastguard Worker       };
6826*4bdc9457SAndroid Build Coastguard Worker     }
6827*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.raddstoreexpminusmax = (struct raddstoreexpminusmax_parameters) {
6828*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_raddstoreexpminusmax_ukernel_function) xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_x4_acc2,
6829*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_expminus_scalar_rr2_p5_params,
6830*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
6831*4bdc9457SAndroid Build Coastguard Worker     };
6832*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.rmax = (xnn_rmax_ukernel_function) xnn_f32_rmax_ukernel__scalar;
6833*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vadd = (struct vbinary_parameters) {
6834*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_minmax_ukernel__wasm_x8,
6835*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_minmax_ukernel__wasm_x8,
6836*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_minmax_ukernel__wasm_x8,
6837*4bdc9457SAndroid Build Coastguard Worker       .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
6838*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
6839*4bdc9457SAndroid Build Coastguard Worker     };
6840*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vdiv = (struct vbinary_parameters) {
6841*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_minmax_ukernel__wasm_x8,
6842*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_minmax_ukernel__wasm_x8,
6843*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_minmax_ukernel__wasm_x8,
6844*4bdc9457SAndroid Build Coastguard Worker       .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
6845*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
6846*4bdc9457SAndroid Build Coastguard Worker     };
6847*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vmax = (struct vbinary_parameters) {
6848*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__wasm_x8,
6849*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__wasm_x8,
6850*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__wasm_x8,
6851*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
6852*4bdc9457SAndroid Build Coastguard Worker     };
6853*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vmin = (struct vbinary_parameters) {
6854*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmin_ukernel__wasm_x8,
6855*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__wasm_x8,
6856*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__wasm_x8,
6857*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
6858*4bdc9457SAndroid Build Coastguard Worker     };
6859*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vmul = (struct vbinary_parameters) {
6860*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_minmax_ukernel__wasm_x8,
6861*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_minmax_ukernel__wasm_x8,
6862*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_minmax_ukernel__wasm_x8,
6863*4bdc9457SAndroid Build Coastguard Worker       .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
6864*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
6865*4bdc9457SAndroid Build Coastguard Worker     };
6866*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vsub = (struct vbinary_parameters) {
6867*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_minmax_ukernel__wasm_x8,
6868*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_minmax_ukernel__wasm_x8,
6869*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_minmax_ukernel__wasm_x8,
6870*4bdc9457SAndroid Build Coastguard Worker       .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
6871*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
6872*4bdc9457SAndroid Build Coastguard Worker     };
6873*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vsqrdiff = (struct vbinary_parameters) {
6874*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiff_ukernel__scalar_x8,
6875*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiffc_ukernel__scalar_x8,
6876*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiffc_ukernel__scalar_x8,
6877*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
6878*4bdc9457SAndroid Build Coastguard Worker     };
6879*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
6880*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x,
6881*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_minmax_scalar_params,
6882*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 1,
6883*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 2,
6884*4bdc9457SAndroid Build Coastguard Worker     };
6885*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_NCHW_OPERATORS
6886*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_CHW_OPT;
6887*4bdc9457SAndroid Build Coastguard Worker 
6888*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.spmm = (struct spmm_parameters) {
6889*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_minmax_ukernel_8x1__scalar,
6890*4bdc9457SAndroid Build Coastguard Worker         .mr = 8,
6891*4bdc9457SAndroid Build Coastguard Worker         .nr = 1,
6892*4bdc9457SAndroid Build Coastguard Worker       };
6893*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.spmm2 = (struct spmm_parameters) {
6894*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_minmax_ukernel_8x2__scalar,
6895*4bdc9457SAndroid Build Coastguard Worker         .mr = 8,
6896*4bdc9457SAndroid Build Coastguard Worker         .nr = 2,
6897*4bdc9457SAndroid Build Coastguard Worker       };
6898*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.spmm4 = (struct spmm_parameters) {
6899*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_minmax_ukernel_8x4__scalar,
6900*4bdc9457SAndroid Build Coastguard Worker         .mr = 8,
6901*4bdc9457SAndroid Build Coastguard Worker         .nr = 4,
6902*4bdc9457SAndroid Build Coastguard Worker       };
6903*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.conv_hwc2chw_3x3c3s2 = (struct conv_hwc2chw_parameters) {
6904*4bdc9457SAndroid Build Coastguard Worker         .ukernel_with_symm_padding =
6905*4bdc9457SAndroid Build Coastguard Worker           (xnn_conv_hwc2chw_ukernel_function) xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1,
6906*4bdc9457SAndroid Build Coastguard Worker         .output_channel_tile = 4,
6907*4bdc9457SAndroid Build Coastguard Worker         .output_height_tile = 1,
6908*4bdc9457SAndroid Build Coastguard Worker         .output_width_tile = 1,
6909*4bdc9457SAndroid Build Coastguard Worker       };
6910*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv2d_chw_3x3 = (struct dwconv2d_chw_parameters) {
6911*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2,
6912*4bdc9457SAndroid Build Coastguard Worker         .output_width_tile = 1,
6913*4bdc9457SAndroid Build Coastguard Worker         .output_height_tile = 2,
6914*4bdc9457SAndroid Build Coastguard Worker       };
6915*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv2d_chw_3x3s2 = (struct dwconv2d_chw_parameters) {
6916*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc2,
6917*4bdc9457SAndroid Build Coastguard Worker         .output_width_tile = 1,
6918*4bdc9457SAndroid Build Coastguard Worker         .output_height_tile = 1,
6919*4bdc9457SAndroid Build Coastguard Worker       };
6920*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv2d_chw_5x5 = (struct dwconv2d_chw_parameters) {
6921*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5,
6922*4bdc9457SAndroid Build Coastguard Worker         .output_width_tile = 1,
6923*4bdc9457SAndroid Build Coastguard Worker         .output_height_tile = 1,
6924*4bdc9457SAndroid Build Coastguard Worker       };
6925*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv2d_chw_5x5s2 = (struct dwconv2d_chw_parameters) {
6926*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5,
6927*4bdc9457SAndroid Build Coastguard Worker         .output_width_tile = 1,
6928*4bdc9457SAndroid Build Coastguard Worker         .output_height_tile = 1,
6929*4bdc9457SAndroid Build Coastguard Worker       };
6930*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gavgpool_cw = (struct gavgpool_cw_parameters) {
6931*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_gavgpool_cw_ukernel_function) xnn_f32_gavgpool_cw_ukernel__scalar_x1,
6932*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 1,
6933*4bdc9457SAndroid Build Coastguard Worker       };
6934*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.ibilinear_chw = (struct ibilinear_chw_parameters) {
6935*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_ibilinear_chw_ukernel_function) xnn_f32_ibilinear_chw_ukernel__scalar_p4,
6936*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 1,
6937*4bdc9457SAndroid Build Coastguard Worker         .pixel_tile = 4,
6938*4bdc9457SAndroid Build Coastguard Worker       };
6939*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_NCHW_OPERATORS
6940*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_F32_OPERATORS
6941*4bdc9457SAndroid Build Coastguard Worker 
6942*4bdc9457SAndroid Build Coastguard Worker   /*************************** VCVT WAsm micro-kernels***************************/
6943*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_VCVT_OPERATORS
6944*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_VCVT;
6945*4bdc9457SAndroid Build Coastguard Worker 
6946*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.f16_to_f32 = (struct vunary_parameters) {
6947*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f16_f32_vcvt_ukernel__scalar_x1,
6948*4bdc9457SAndroid Build Coastguard Worker       .init.f16_f32_cvt = xnn_init_f16_f32_cvt_scalar_params,
6949*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 1,
6950*4bdc9457SAndroid Build Coastguard Worker     };
6951*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.f32_to_f16 = (struct vunary_parameters) {
6952*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_f16_vcvt_ukernel__scalar_bitcast_x4,
6953*4bdc9457SAndroid Build Coastguard Worker       .init.f32_f16_cvt = xnn_init_f32_f16_cvt_scalar_bitcast_params,
6954*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
6955*4bdc9457SAndroid Build Coastguard Worker     };
6956*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
6957*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f32_to_qs8 = (struct vunary_parameters) {
6958*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qu8_vcvt_ukernel__scalar_imagic_x1,
6959*4bdc9457SAndroid Build Coastguard Worker         .init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_scalar_imagic_params,
6960*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 1,
6961*4bdc9457SAndroid Build Coastguard Worker       };
6962*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f32_to_qu8 = (struct vunary_parameters) {
6963*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qu8_vcvt_ukernel__scalar_imagic_x1,
6964*4bdc9457SAndroid Build Coastguard Worker         .init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_scalar_imagic_params,
6965*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 1,
6966*4bdc9457SAndroid Build Coastguard Worker       };
6967*4bdc9457SAndroid Build Coastguard Worker     } else {
6968*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f32_to_qs8 = (struct vunary_parameters) {
6969*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qu8_vcvt_ukernel__wasm_fmagic_x4,
6970*4bdc9457SAndroid Build Coastguard Worker         .init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_scalar_fmagic_params,
6971*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
6972*4bdc9457SAndroid Build Coastguard Worker       };
6973*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.f32_to_qu8 = (struct vunary_parameters) {
6974*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qu8_vcvt_ukernel__wasm_fmagic_x4,
6975*4bdc9457SAndroid Build Coastguard Worker         .init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_scalar_fmagic_params,
6976*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
6977*4bdc9457SAndroid Build Coastguard Worker       };
6978*4bdc9457SAndroid Build Coastguard Worker     }
6979*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
6980*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qs8 = (struct vunary_parameters) {
6981*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vcvt_ukernel__scalar_x1,
6982*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_cvt = xnn_init_qs8_cvt_scalar_params,
6983*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 1,
6984*4bdc9457SAndroid Build Coastguard Worker       };
6985*4bdc9457SAndroid Build Coastguard Worker     } else {
6986*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qs8 = (struct vunary_parameters) {
6987*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vcvt_ukernel__scalar_x4,
6988*4bdc9457SAndroid Build Coastguard Worker         .init.qs8_cvt = xnn_init_qs8_cvt_scalar_params,
6989*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
6990*4bdc9457SAndroid Build Coastguard Worker       };
6991*4bdc9457SAndroid Build Coastguard Worker     }
6992*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.qs8_to_f32 = (struct vunary_parameters) {
6993*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_f32_vcvt_ukernel__scalar_x1,
6994*4bdc9457SAndroid Build Coastguard Worker       .init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_scalar_params,
6995*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 1,
6996*4bdc9457SAndroid Build Coastguard Worker     };
6997*4bdc9457SAndroid Build Coastguard Worker     if (is_wasm_x86) {
6998*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qu8 = (struct vunary_parameters) {
6999*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vcvt_ukernel__scalar_x1,
7000*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_cvt = xnn_init_qu8_cvt_scalar_params,
7001*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 1,
7002*4bdc9457SAndroid Build Coastguard Worker       };
7003*4bdc9457SAndroid Build Coastguard Worker     } else {
7004*4bdc9457SAndroid Build Coastguard Worker       xnn_params.vcvt.qu8 = (struct vunary_parameters) {
7005*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vcvt_ukernel__scalar_x4,
7006*4bdc9457SAndroid Build Coastguard Worker         .init.qu8_cvt = xnn_init_qu8_cvt_scalar_params,
7007*4bdc9457SAndroid Build Coastguard Worker         .element_tile = 4,
7008*4bdc9457SAndroid Build Coastguard Worker       };
7009*4bdc9457SAndroid Build Coastguard Worker     }
7010*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.qu8_to_f32 = (struct vunary_parameters) {
7011*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_f32_vcvt_ukernel__scalar_x1,
7012*4bdc9457SAndroid Build Coastguard Worker       .init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_scalar_params,
7013*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 1,
7014*4bdc9457SAndroid Build Coastguard Worker     };
7015*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_VCVT_OPERATORS
7016*4bdc9457SAndroid Build Coastguard Worker 
7017*4bdc9457SAndroid Build Coastguard Worker   /**************************** X32 WAsm micro-kernels****************************/
7018*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_X32_OPERATORS
7019*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_X32;
7020*4bdc9457SAndroid Build Coastguard Worker 
7021*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__scalar;
7022*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x32.zip = (struct zip_parameters) {
7023*4bdc9457SAndroid Build Coastguard Worker       .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__scalar,
7024*4bdc9457SAndroid Build Coastguard Worker       .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__scalar,
7025*4bdc9457SAndroid Build Coastguard Worker       .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__scalar,
7026*4bdc9457SAndroid Build Coastguard Worker       .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__scalar,
7027*4bdc9457SAndroid Build Coastguard Worker     };
7028*4bdc9457SAndroid Build Coastguard Worker 
7029*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x32.transpose = (struct transpose_parameters) {
7030*4bdc9457SAndroid Build Coastguard Worker       .const_size_ukernel = (xnn_transposec_ukernel_function) xnn_x32_transposec_ukernel__2x4_scalar_int,
7031*4bdc9457SAndroid Build Coastguard Worker       .tile_size = 32,
7032*4bdc9457SAndroid Build Coastguard Worker     };
7033*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_X32_OPERATORS
7034*4bdc9457SAndroid Build Coastguard Worker 
7035*4bdc9457SAndroid Build Coastguard Worker   /**************************** XX WAsm micro-kernels****************************/
7036*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_XX_OPERATORS
7037*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_XX;
7038*4bdc9457SAndroid Build Coastguard Worker 
7039*4bdc9457SAndroid Build Coastguard Worker     xnn_params.xx.copy = (xnn_vunary_ukernel_function) xnn_xx_copy_ukernel__memcpy;
7040*4bdc9457SAndroid Build Coastguard Worker     xnn_params.xx.fill = (struct fill_parameters) {
7041*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_fill_ukernel_function) xnn_xx_fill_ukernel__scalar_x16,
7042*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 1,
7043*4bdc9457SAndroid Build Coastguard Worker     };
7044*4bdc9457SAndroid Build Coastguard Worker     xnn_params.xx.pad = (struct pad_parameters) {
7045*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_pad_ukernel_function) xnn_xx_pad_ukernel__scalar,
7046*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 1,
7047*4bdc9457SAndroid Build Coastguard Worker     };
7048*4bdc9457SAndroid Build Coastguard Worker     xnn_params.xx.transpose = (struct transpose_parameters) {
7049*4bdc9457SAndroid Build Coastguard Worker       .variable_size_ukernel = xnn_xx_transposev_ukernel__1x1_memcpy,
7050*4bdc9457SAndroid Build Coastguard Worker       .tile_size = 32,
7051*4bdc9457SAndroid Build Coastguard Worker     };
7052*4bdc9457SAndroid Build Coastguard Worker   #endif
7053*4bdc9457SAndroid Build Coastguard Worker 
7054*4bdc9457SAndroid Build Coastguard Worker #elif XNN_ARCH_RISCV
7055*4bdc9457SAndroid Build Coastguard Worker 
7056*4bdc9457SAndroid Build Coastguard Worker   /************************** QC8 RISC-V micro-kernels **************************/
7057*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_QC8_OPERATORS
7058*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_QC8;
7059*4bdc9457SAndroid Build Coastguard Worker 
7060*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf);
7061*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf);
7062*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf);
7063*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf);
7064*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.gemm.init.qc8 = xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params;
7065*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.gemm.mr = 3;
7066*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.gemm.nr = 4;
7067*4bdc9457SAndroid Build Coastguard Worker 
7068*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up2x3__scalar_lrintf;
7069*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[0].init.qc8 = xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params;
7070*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[0].channel_tile = 2;
7071*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[0].primary_tile = 3;
7072*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf;
7073*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[1].init.qc8 = xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params;
7074*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[1].channel_tile = 2;
7075*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[1].primary_tile = 9;
7076*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf;
7077*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[2].init.qc8 = xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params;
7078*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[2].channel_tile = 2;
7079*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qc8.dwconv[2].primary_tile = 25;
7080*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_QS8_OPERATORS
7081*4bdc9457SAndroid Build Coastguard Worker 
7082*4bdc9457SAndroid Build Coastguard Worker   /************************** QS8 RISC-V micro-kernels **************************/
7083*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_QS8_OPERATORS
7084*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_QS8;
7085*4bdc9457SAndroid Build Coastguard Worker 
7086*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf);
7087*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf);
7088*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf);
7089*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf);
7090*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params;
7091*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.gemm.mr = 3;
7092*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.gemm.nr = 4;
7093*4bdc9457SAndroid Build Coastguard Worker 
7094*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf;
7095*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params;
7096*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[0].channel_tile = 2;
7097*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[0].primary_tile = 9;
7098*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf;
7099*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params;
7100*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[1].channel_tile = 2;
7101*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.dwconv[1].primary_tile = 25;
7102*4bdc9457SAndroid Build Coastguard Worker 
7103*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.gavgpool = (struct gavgpool_parameters) {
7104*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__scalar_imagic_c1,
7105*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__scalar_imagic_c1,
7106*4bdc9457SAndroid Build Coastguard Worker       .init.qs8 = xnn_init_qs8_avgpool_minmax_fp32_scalar_imagic_params,
7107*4bdc9457SAndroid Build Coastguard Worker       .update.qs8 = xnn_update_qs8_avgpool_minmax_fp32_scalar_imagic_params,
7108*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 7,
7109*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 1,
7110*4bdc9457SAndroid Build Coastguard Worker     };
7111*4bdc9457SAndroid Build Coastguard Worker 
7112*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.vadd = (struct vbinary_parameters) {
7113*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vadd_minmax_ukernel__scalar_x4,
7114*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vaddc_minmax_ukernel__scalar_x4,
7115*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vaddc_minmax_ukernel__scalar_x4,
7116*4bdc9457SAndroid Build Coastguard Worker       .init.qs8_add = xnn_init_qs8_add_minmax_scalar_params,
7117*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
7118*4bdc9457SAndroid Build Coastguard Worker     };
7119*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.vmul = (struct vbinary_parameters) {
7120*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4,
7121*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4,
7122*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4,
7123*4bdc9457SAndroid Build Coastguard Worker       .init.qs8_mul = xnn_init_qs8_mul_minmax_fp32_scalar_params,
7124*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
7125*4bdc9457SAndroid Build Coastguard Worker     };
7126*4bdc9457SAndroid Build Coastguard Worker 
7127*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.lrelu = (struct vunary_parameters) {
7128*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vlrelu_ukernel__scalar_andxor_x4,
7129*4bdc9457SAndroid Build Coastguard Worker       .init.qs8_lrelu = xnn_init_qs8_lrelu_scalar_andxor_params,
7130*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
7131*4bdc9457SAndroid Build Coastguard Worker     };
7132*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_QS8_OPERATORS
7133*4bdc9457SAndroid Build Coastguard Worker 
7134*4bdc9457SAndroid Build Coastguard Worker   /************************** QU8 RISC-V micro-kernels **************************/
7135*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_QU8_OPERATORS
7136*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_QU8;
7137*4bdc9457SAndroid Build Coastguard Worker 
7138*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf);
7139*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf);
7140*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf);
7141*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf);
7142*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.gemm.init.qu8 = xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params;
7143*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.gemm.mr = 3;
7144*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.gemm.nr = 4;
7145*4bdc9457SAndroid Build Coastguard Worker 
7146*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf;
7147*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[0].init.qu8 = xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params;
7148*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[0].channel_tile = 2;
7149*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[0].primary_tile = 9;
7150*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf;
7151*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[1].init.qu8 = xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params;
7152*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[1].channel_tile = 2;
7153*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.dwconv[1].primary_tile = 25;
7154*4bdc9457SAndroid Build Coastguard Worker 
7155*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.avgpool = (struct avgpool_parameters) {
7156*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_avgpool_unipass_ukernel_function) xnn_qu8_avgpool_minmax_ukernel_9x__scalar_c1,
7157*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_avgpool_multipass_ukernel_function) xnn_qu8_avgpool_minmax_ukernel_9p8x__scalar_c1,
7158*4bdc9457SAndroid Build Coastguard Worker       .init.qu8 = xnn_init_qu8_avgpool_minmax_scalar_params,
7159*4bdc9457SAndroid Build Coastguard Worker       .primary_tile = 9,
7160*4bdc9457SAndroid Build Coastguard Worker       .incremental_tile = 8,
7161*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 1,
7162*4bdc9457SAndroid Build Coastguard Worker     };
7163*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.gavgpool = (struct gavgpool_parameters) {
7164*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__scalar_imagic_c1,
7165*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__scalar_imagic_c1,
7166*4bdc9457SAndroid Build Coastguard Worker       .init.qu8 = xnn_init_qu8_avgpool_minmax_fp32_scalar_imagic_params,
7167*4bdc9457SAndroid Build Coastguard Worker       .update.qu8 = xnn_update_qu8_avgpool_minmax_fp32_scalar_imagic_params,
7168*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 7,
7169*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 1,
7170*4bdc9457SAndroid Build Coastguard Worker     };
7171*4bdc9457SAndroid Build Coastguard Worker 
7172*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.vadd = (struct vbinary_parameters) {
7173*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vadd_minmax_ukernel__scalar_x4,
7174*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vaddc_minmax_ukernel__scalar_x4,
7175*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vaddc_minmax_ukernel__scalar_x4,
7176*4bdc9457SAndroid Build Coastguard Worker       .init.qu8_add = xnn_init_qu8_add_minmax_scalar_params,
7177*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
7178*4bdc9457SAndroid Build Coastguard Worker     };
7179*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.vmul = (struct vbinary_parameters) {
7180*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmul_minmax_fp32_ukernel__scalar_x4,
7181*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmulc_minmax_fp32_ukernel__scalar_x4,
7182*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_qu8_vmulc_minmax_fp32_ukernel__scalar_x4,
7183*4bdc9457SAndroid Build Coastguard Worker       .init.qu8_mul = xnn_init_qu8_mul_minmax_fp32_scalar_params,
7184*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
7185*4bdc9457SAndroid Build Coastguard Worker     };
7186*4bdc9457SAndroid Build Coastguard Worker 
7187*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.lrelu = (struct vunary_parameters) {
7188*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vlrelu_ukernel__scalar_andxor_x4,
7189*4bdc9457SAndroid Build Coastguard Worker       .init.qu8_lrelu = xnn_init_qu8_lrelu_scalar_andxor_params,
7190*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
7191*4bdc9457SAndroid Build Coastguard Worker     };
7192*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_QU8_OPERATORS
7193*4bdc9457SAndroid Build Coastguard Worker 
7194*4bdc9457SAndroid Build Coastguard Worker   /************************** S8 RISC-V micro-kernels ***************************/
7195*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_S8_OPERATORS
7196*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_S8;
7197*4bdc9457SAndroid Build Coastguard Worker 
7198*4bdc9457SAndroid Build Coastguard Worker     xnn_params.s8.clamp = (struct vunary_parameters) {
7199*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_s8_vclamp_ukernel__scalar_x4,
7200*4bdc9457SAndroid Build Coastguard Worker       .init.s8_minmax = xnn_init_s8_minmax_scalar_params,
7201*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
7202*4bdc9457SAndroid Build Coastguard Worker     };
7203*4bdc9457SAndroid Build Coastguard Worker     xnn_params.s8.ibilinear = (struct ibilinear_parameters) {
7204*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_ibilinear_ukernel_function) xnn_s8_ibilinear_ukernel__scalar_c1,
7205*4bdc9457SAndroid Build Coastguard Worker       .pixel_tile = 1,
7206*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 1,
7207*4bdc9457SAndroid Build Coastguard Worker     };
7208*4bdc9457SAndroid Build Coastguard Worker     xnn_params.s8.maxpool = (struct maxpool_parameters) {
7209*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_maxpool_ukernel_function) xnn_s8_maxpool_minmax_ukernel_9p8x__scalar_c1,
7210*4bdc9457SAndroid Build Coastguard Worker       .init.s8 = xnn_init_s8_minmax_scalar_params,
7211*4bdc9457SAndroid Build Coastguard Worker       .mr = 9,
7212*4bdc9457SAndroid Build Coastguard Worker       .qr = 8,
7213*4bdc9457SAndroid Build Coastguard Worker     };
7214*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_S8_OPERATORS
7215*4bdc9457SAndroid Build Coastguard Worker 
7216*4bdc9457SAndroid Build Coastguard Worker   /************************** U8 RISC-V micro-kernels ***************************/
7217*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_U8_OPERATORS
7218*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_U8;
7219*4bdc9457SAndroid Build Coastguard Worker 
7220*4bdc9457SAndroid Build Coastguard Worker     xnn_params.u8.clamp = (struct vunary_parameters) {
7221*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_u8_vclamp_ukernel__scalar_x4,
7222*4bdc9457SAndroid Build Coastguard Worker       .init.u8_minmax = xnn_init_u8_minmax_scalar_params,
7223*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
7224*4bdc9457SAndroid Build Coastguard Worker     };
7225*4bdc9457SAndroid Build Coastguard Worker     xnn_params.u8.ibilinear = (struct ibilinear_parameters) {
7226*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_ibilinear_ukernel_function) xnn_u8_ibilinear_ukernel__scalar_c1,
7227*4bdc9457SAndroid Build Coastguard Worker       .pixel_tile = 1,
7228*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 1,
7229*4bdc9457SAndroid Build Coastguard Worker     };
7230*4bdc9457SAndroid Build Coastguard Worker     xnn_params.u8.maxpool = (struct maxpool_parameters) {
7231*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_minmax_ukernel_9p8x__scalar_c1,
7232*4bdc9457SAndroid Build Coastguard Worker       .init.u8 = xnn_init_u8_minmax_scalar_params,
7233*4bdc9457SAndroid Build Coastguard Worker       .mr = 9,
7234*4bdc9457SAndroid Build Coastguard Worker       .qr = 8,
7235*4bdc9457SAndroid Build Coastguard Worker     };
7236*4bdc9457SAndroid Build Coastguard Worker     xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
7237*4bdc9457SAndroid Build Coastguard Worker     xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
7238*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_U8_OPERATORS
7239*4bdc9457SAndroid Build Coastguard Worker 
7240*4bdc9457SAndroid Build Coastguard Worker   /************************** X8 RISC-V micro-kernels ***************************/
7241*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_X8_OPERATORS
7242*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_X8;
7243*4bdc9457SAndroid Build Coastguard Worker 
7244*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar_x4;
7245*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x8.zip = (struct zip_parameters) {
7246*4bdc9457SAndroid Build Coastguard Worker       .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
7247*4bdc9457SAndroid Build Coastguard Worker       .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
7248*4bdc9457SAndroid Build Coastguard Worker       .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
7249*4bdc9457SAndroid Build Coastguard Worker       .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
7250*4bdc9457SAndroid Build Coastguard Worker     };
7251*4bdc9457SAndroid Build Coastguard Worker 
7252*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x8.transpose = (struct transpose_parameters) {
7253*4bdc9457SAndroid Build Coastguard Worker       .const_size_ukernel = (xnn_transposec_ukernel_function) xnn_x8_transposec_ukernel__2x4_scalar_int,
7254*4bdc9457SAndroid Build Coastguard Worker       .tile_size = 32,
7255*4bdc9457SAndroid Build Coastguard Worker     };
7256*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_X8_OPERATORS
7257*4bdc9457SAndroid Build Coastguard Worker 
7258*4bdc9457SAndroid Build Coastguard Worker   /************************** X16 RISC-V micro-kernels ***************************/
7259*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_X16_OPERATORS
7260*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_X16;
7261*4bdc9457SAndroid Build Coastguard Worker 
7262*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x16.transpose = (struct transpose_parameters) {
7263*4bdc9457SAndroid Build Coastguard Worker       .const_size_ukernel = (xnn_transposec_ukernel_function) xnn_x16_transposec_ukernel__2x4_scalar_int,
7264*4bdc9457SAndroid Build Coastguard Worker       .tile_size = 32,
7265*4bdc9457SAndroid Build Coastguard Worker     };
7266*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_X16_OPERATORS
7267*4bdc9457SAndroid Build Coastguard Worker 
7268*4bdc9457SAndroid Build Coastguard Worker   /************************** F32 RISC-V micro-kernels **************************/
7269*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_F32_OPERATORS
7270*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_F32;
7271*4bdc9457SAndroid Build Coastguard Worker 
7272*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x4__scalar);
7273*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x4__scalar);
7274*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_1x4__scalar);
7275*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_1x4__scalar);
7276*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm.relu.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_relu_ukernel_4x4__scalar);
7277*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm.relu.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_relu_ukernel_4x4__scalar);
7278*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm.relu.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_relu_ukernel_1x4__scalar);
7279*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm.relu.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_relu_ukernel_1x4__scalar);
7280*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm.linear.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x4__scalar);
7281*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm.linear.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x4__scalar);
7282*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm.linear.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar);
7283*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm.linear.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar);
7284*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm.init.f32 = xnn_init_f32_minmax_scalar_params;
7285*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm.mr = 4;
7286*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm.nr = 4;
7287*4bdc9457SAndroid Build Coastguard Worker 
7288*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm2.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_minmax_ukernel_4x2__scalar);
7289*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm2.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_minmax_ukernel_4x2__scalar);
7290*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm2.linear.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x2__scalar);
7291*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm2.linear.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__scalar);
7292*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm2.init.f32 = xnn_init_f32_minmax_scalar_params;
7293*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm2.mr = 4;
7294*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gemm2.nr = 2;
7295*4bdc9457SAndroid Build Coastguard Worker 
7296*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up1x3__scalar_acc2;
7297*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[0].linear.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_ukernel_up1x3__scalar_acc2;
7298*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[0].init.f32 = xnn_init_f32_minmax_scalar_params;
7299*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[0].channel_tile = 1;
7300*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[0].primary_tile = 3;
7301*4bdc9457SAndroid Build Coastguard Worker 
7302*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up1x4__scalar_acc2;
7303*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[1].linear.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_ukernel_up1x4__scalar_acc2;
7304*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[1].init.f32 = xnn_init_f32_minmax_scalar_params;
7305*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[1].channel_tile = 1;
7306*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[1].primary_tile = 4;
7307*4bdc9457SAndroid Build Coastguard Worker 
7308*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up1x9__scalar_acc2;
7309*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[2].linear.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_ukernel_up1x9__scalar_acc2;
7310*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[2].init.f32 = xnn_init_f32_minmax_scalar_params;
7311*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[2].channel_tile = 1;
7312*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[2].primary_tile = 9;
7313*4bdc9457SAndroid Build Coastguard Worker 
7314*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[3].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_minmax_ukernel_up1x25__scalar_acc2;
7315*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[3].linear.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_f32_dwconv_ukernel_up1x25__scalar_acc2;
7316*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[3].init.f32 = xnn_init_f32_minmax_scalar_params;
7317*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[3].channel_tile = 1;
7318*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.dwconv[3].primary_tile = 25;
7319*4bdc9457SAndroid Build Coastguard Worker 
7320*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.avgpool = (struct avgpool_parameters) {
7321*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_avgpool_unipass_ukernel_function) xnn_f32_avgpool_minmax_ukernel_9x__scalar_c1,
7322*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_avgpool_multipass_ukernel_function) xnn_f32_avgpool_minmax_ukernel_9p8x__scalar_c1,
7323*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_scaleminmax_scalar_params,
7324*4bdc9457SAndroid Build Coastguard Worker       .primary_tile = 9,
7325*4bdc9457SAndroid Build Coastguard Worker       .incremental_tile = 8,
7326*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 1,
7327*4bdc9457SAndroid Build Coastguard Worker     };
7328*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
7329*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_pavgpool_unipass_ukernel_function) xnn_f32_pavgpool_minmax_ukernel_9x__scalar_c1,
7330*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_pavgpool_multipass_ukernel_function) xnn_f32_pavgpool_minmax_ukernel_9p8x__scalar_c1,
7331*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_minmax_scalar_params,
7332*4bdc9457SAndroid Build Coastguard Worker       .primary_tile = 9,
7333*4bdc9457SAndroid Build Coastguard Worker       .incremental_tile = 8,
7334*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 1,
7335*4bdc9457SAndroid Build Coastguard Worker     };
7336*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
7337*4bdc9457SAndroid Build Coastguard Worker       .unipass = (xnn_gavgpool_unipass_ukernel_function) xnn_f32_gavgpool_minmax_ukernel_7x__scalar_c1,
7338*4bdc9457SAndroid Build Coastguard Worker       .multipass = (xnn_gavgpool_multipass_ukernel_function) xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1,
7339*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_scaleminmax_scalar_params,
7340*4bdc9457SAndroid Build Coastguard Worker       .update.f32 = xnn_update_f32_scaleminmax_scalar_params,
7341*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 7,
7342*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 1,
7343*4bdc9457SAndroid Build Coastguard Worker     };
7344*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.maxpool = (struct maxpool_parameters) {
7345*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_minmax_ukernel_9p8x__scalar_c1,
7346*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_minmax_scalar_params,
7347*4bdc9457SAndroid Build Coastguard Worker       .mr = 9,
7348*4bdc9457SAndroid Build Coastguard Worker       .qr = 8,
7349*4bdc9457SAndroid Build Coastguard Worker     };
7350*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
7351*4bdc9457SAndroid Build Coastguard Worker       .up = (xnn_argmaxpool_unipass_ukernel_function) xnn_f32_argmaxpool_ukernel_4x__scalar_c1,
7352*4bdc9457SAndroid Build Coastguard Worker       .mr = 4,
7353*4bdc9457SAndroid Build Coastguard Worker     };
7354*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
7355*4bdc9457SAndroid Build Coastguard Worker       .up = (xnn_argmaxpool_unipass_ukernel_function) xnn_f32_argmaxpool_ukernel_9x__scalar_c1,
7356*4bdc9457SAndroid Build Coastguard Worker       .mr = 9,
7357*4bdc9457SAndroid Build Coastguard Worker     };
7358*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
7359*4bdc9457SAndroid Build Coastguard Worker       .mp = (xnn_argmaxpool_multipass_ukernel_function) xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1,
7360*4bdc9457SAndroid Build Coastguard Worker       .mr = 9,
7361*4bdc9457SAndroid Build Coastguard Worker       .qr = 8,
7362*4bdc9457SAndroid Build Coastguard Worker     };
7363*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.ibilinear = (struct ibilinear_parameters) {
7364*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_ibilinear_ukernel_function) xnn_f32_ibilinear_ukernel__scalar_c2,
7365*4bdc9457SAndroid Build Coastguard Worker       .pixel_tile = 1,
7366*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 2,
7367*4bdc9457SAndroid Build Coastguard Worker     };
7368*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.abs = (struct vunary_parameters) {
7369*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vabs_ukernel__scalar_x4,
7370*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
7371*4bdc9457SAndroid Build Coastguard Worker     };
7372*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.clamp = (struct vunary_parameters) {
7373*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vclamp_ukernel__scalar_x4,
7374*4bdc9457SAndroid Build Coastguard Worker       .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
7375*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
7376*4bdc9457SAndroid Build Coastguard Worker     };
7377*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.elu = (struct vunary_parameters) {
7378*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4,
7379*4bdc9457SAndroid Build Coastguard Worker       .init.f32_elu = xnn_init_f32_elu_scalar_rr2_lut16_p3_params,
7380*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
7381*4bdc9457SAndroid Build Coastguard Worker     };
7382*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.hswish = (struct vunary_parameters) {
7383*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vhswish_ukernel__scalar_x4,
7384*4bdc9457SAndroid Build Coastguard Worker       .init.f32_hswish = xnn_init_f32_hswish_scalar_params,
7385*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
7386*4bdc9457SAndroid Build Coastguard Worker     };
7387*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.lrelu = (struct vunary_parameters) {
7388*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vlrelu_ukernel__scalar_x4,
7389*4bdc9457SAndroid Build Coastguard Worker       .init.f32_lrelu = xnn_init_f32_lrelu_scalar_params,
7390*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
7391*4bdc9457SAndroid Build Coastguard Worker     };
7392*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.neg = (struct vunary_parameters) {
7393*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vneg_ukernel__scalar_x4,
7394*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
7395*4bdc9457SAndroid Build Coastguard Worker     };
7396*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.rndne = (struct vunary_parameters) {
7397*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndne_ukernel__scalar_libm_x1,
7398*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 1,
7399*4bdc9457SAndroid Build Coastguard Worker     };
7400*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.rndz = (struct vunary_parameters) {
7401*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndz_ukernel__scalar_libm_x1,
7402*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 1,
7403*4bdc9457SAndroid Build Coastguard Worker     };
7404*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.rndu = (struct vunary_parameters) {
7405*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndu_ukernel__scalar_libm_x1,
7406*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 1,
7407*4bdc9457SAndroid Build Coastguard Worker     };
7408*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.rndd = (struct vunary_parameters) {
7409*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vrndd_ukernel__scalar_libm_x1,
7410*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 1,
7411*4bdc9457SAndroid Build Coastguard Worker     };
7412*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.sigmoid = (struct vunary_parameters) {
7413*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x2,
7414*4bdc9457SAndroid Build Coastguard Worker       .init.f32_sigmoid = xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params,
7415*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 2,
7416*4bdc9457SAndroid Build Coastguard Worker     };
7417*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.sqr = (struct vunary_parameters) {
7418*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsqr_ukernel__scalar_x4,
7419*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
7420*4bdc9457SAndroid Build Coastguard Worker     };
7421*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.sqrt = (struct vunary_parameters) {
7422*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_vsqrt_ukernel__scalar_sqrt_x1,
7423*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 1,
7424*4bdc9457SAndroid Build Coastguard Worker     };
7425*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.prelu = (struct prelu_parameters) {
7426*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__scalar_2x4,
7427*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 4,
7428*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 4,
7429*4bdc9457SAndroid Build Coastguard Worker     };
7430*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.raddstoreexpminusmax = (struct raddstoreexpminusmax_parameters) {
7431*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_raddstoreexpminusmax_ukernel_function) xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_x4_acc2,
7432*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_expminus_scalar_rr2_p5_params,
7433*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
7434*4bdc9457SAndroid Build Coastguard Worker     };
7435*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.rmax = (xnn_rmax_ukernel_function) xnn_f32_rmax_ukernel__scalar;
7436*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vadd = (struct vbinary_parameters) {
7437*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vadd_minmax_ukernel__scalar_x8,
7438*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_minmax_ukernel__scalar_x8,
7439*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_minmax_ukernel__scalar_x8,
7440*4bdc9457SAndroid Build Coastguard Worker       .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
7441*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
7442*4bdc9457SAndroid Build Coastguard Worker     };
7443*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vdiv = (struct vbinary_parameters) {
7444*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_minmax_ukernel__scalar_x2,
7445*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_minmax_ukernel__scalar_x2,
7446*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_minmax_ukernel__scalar_x2,
7447*4bdc9457SAndroid Build Coastguard Worker       .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
7448*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 2,
7449*4bdc9457SAndroid Build Coastguard Worker     };
7450*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vmax = (struct vbinary_parameters) {
7451*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__scalar_x8,
7452*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__scalar_x8,
7453*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__scalar_x8,
7454*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
7455*4bdc9457SAndroid Build Coastguard Worker     };
7456*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vmin = (struct vbinary_parameters) {
7457*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmin_ukernel__scalar_x8,
7458*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__scalar_x8,
7459*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vminc_ukernel__scalar_x8,
7460*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
7461*4bdc9457SAndroid Build Coastguard Worker     };
7462*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vmul = (struct vbinary_parameters) {
7463*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmul_minmax_ukernel__scalar_x8,
7464*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_minmax_ukernel__scalar_x8,
7465*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_minmax_ukernel__scalar_x8,
7466*4bdc9457SAndroid Build Coastguard Worker       .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
7467*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
7468*4bdc9457SAndroid Build Coastguard Worker     };
7469*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vsub = (struct vbinary_parameters) {
7470*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_minmax_ukernel__scalar_x8,
7471*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_minmax_ukernel__scalar_x8,
7472*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_minmax_ukernel__scalar_x8,
7473*4bdc9457SAndroid Build Coastguard Worker       .init.f32_minmax = xnn_init_f32_minmax_scalar_params,
7474*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
7475*4bdc9457SAndroid Build Coastguard Worker     };
7476*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vsqrdiff = (struct vbinary_parameters) {
7477*4bdc9457SAndroid Build Coastguard Worker       .minmax.op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiff_ukernel__scalar_x8,
7478*4bdc9457SAndroid Build Coastguard Worker       .minmax.opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiffc_ukernel__scalar_x8,
7479*4bdc9457SAndroid Build Coastguard Worker       .minmax.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsqrdiffc_ukernel__scalar_x8,
7480*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 8,
7481*4bdc9457SAndroid Build Coastguard Worker     };
7482*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
7483*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x,
7484*4bdc9457SAndroid Build Coastguard Worker       .init.f32 = xnn_init_f32_minmax_scalar_params,
7485*4bdc9457SAndroid Build Coastguard Worker       .channel_tile = 1,
7486*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 2,
7487*4bdc9457SAndroid Build Coastguard Worker     };
7488*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_NO_NCHW_OPERATORS
7489*4bdc9457SAndroid Build Coastguard Worker       init_flags |= XNN_INIT_FLAG_CHW_OPT;
7490*4bdc9457SAndroid Build Coastguard Worker 
7491*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.spmm = (struct spmm_parameters) {
7492*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_minmax_ukernel_8x1__scalar,
7493*4bdc9457SAndroid Build Coastguard Worker         .mr = 8,
7494*4bdc9457SAndroid Build Coastguard Worker         .nr = 1,
7495*4bdc9457SAndroid Build Coastguard Worker       };
7496*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.spmm2 = (struct spmm_parameters) {
7497*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_minmax_ukernel_8x2__scalar,
7498*4bdc9457SAndroid Build Coastguard Worker         .mr = 8,
7499*4bdc9457SAndroid Build Coastguard Worker         .nr = 2,
7500*4bdc9457SAndroid Build Coastguard Worker       };
7501*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.spmm4 = (struct spmm_parameters) {
7502*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_minmax_ukernel_8x4__scalar,
7503*4bdc9457SAndroid Build Coastguard Worker         .mr = 8,
7504*4bdc9457SAndroid Build Coastguard Worker         .nr = 4,
7505*4bdc9457SAndroid Build Coastguard Worker       };
7506*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.conv_hwc2chw_3x3c3s2 = (struct conv_hwc2chw_parameters) {
7507*4bdc9457SAndroid Build Coastguard Worker         .ukernel_with_symm_padding =
7508*4bdc9457SAndroid Build Coastguard Worker           (xnn_conv_hwc2chw_ukernel_function) xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1,
7509*4bdc9457SAndroid Build Coastguard Worker         .output_channel_tile = 4,
7510*4bdc9457SAndroid Build Coastguard Worker         .output_height_tile = 1,
7511*4bdc9457SAndroid Build Coastguard Worker         .output_width_tile = 1,
7512*4bdc9457SAndroid Build Coastguard Worker       };
7513*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv2d_chw_3x3 = (struct dwconv2d_chw_parameters) {
7514*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2,
7515*4bdc9457SAndroid Build Coastguard Worker         .output_width_tile = 1,
7516*4bdc9457SAndroid Build Coastguard Worker         .output_height_tile = 2,
7517*4bdc9457SAndroid Build Coastguard Worker       };
7518*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv2d_chw_3x3s2 = (struct dwconv2d_chw_parameters) {
7519*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_1x1_acc2,
7520*4bdc9457SAndroid Build Coastguard Worker         .output_width_tile = 1,
7521*4bdc9457SAndroid Build Coastguard Worker         .output_height_tile = 1,
7522*4bdc9457SAndroid Build Coastguard Worker       };
7523*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv2d_chw_5x5 = (struct dwconv2d_chw_parameters) {
7524*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5,
7525*4bdc9457SAndroid Build Coastguard Worker         .output_width_tile = 1,
7526*4bdc9457SAndroid Build Coastguard Worker         .output_height_tile = 1,
7527*4bdc9457SAndroid Build Coastguard Worker       };
7528*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.dwconv2d_chw_5x5s2 = (struct dwconv2d_chw_parameters) {
7529*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_dwconv2d_chw_ukernel_function) xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5,
7530*4bdc9457SAndroid Build Coastguard Worker         .output_width_tile = 1,
7531*4bdc9457SAndroid Build Coastguard Worker         .output_height_tile = 1,
7532*4bdc9457SAndroid Build Coastguard Worker       };
7533*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.gavgpool_cw = (struct gavgpool_cw_parameters) {
7534*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_gavgpool_cw_ukernel_function) xnn_f32_gavgpool_cw_ukernel__scalar_x1,
7535*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 1,
7536*4bdc9457SAndroid Build Coastguard Worker       };
7537*4bdc9457SAndroid Build Coastguard Worker       xnn_params.f32.ibilinear_chw = (struct ibilinear_chw_parameters) {
7538*4bdc9457SAndroid Build Coastguard Worker         .ukernel = (xnn_ibilinear_chw_ukernel_function) xnn_f32_ibilinear_chw_ukernel__scalar_p4,
7539*4bdc9457SAndroid Build Coastguard Worker         .channel_tile = 1,
7540*4bdc9457SAndroid Build Coastguard Worker         .pixel_tile = 4,
7541*4bdc9457SAndroid Build Coastguard Worker       };
7542*4bdc9457SAndroid Build Coastguard Worker     #endif  // XNN_NO_NCHW_OPERATORS
7543*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_F32_OPERATORS
7544*4bdc9457SAndroid Build Coastguard Worker 
7545*4bdc9457SAndroid Build Coastguard Worker   /************************** VCVT RISC-V micro-kernels *************************/
7546*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_VCVT_OPERATORS
7547*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_VCVT;
7548*4bdc9457SAndroid Build Coastguard Worker 
7549*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.f16_to_f32 = (struct vunary_parameters) {
7550*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f16_f32_vcvt_ukernel__scalar_x4,
7551*4bdc9457SAndroid Build Coastguard Worker       .init.f16_f32_cvt = xnn_init_f16_f32_cvt_scalar_params,
7552*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
7553*4bdc9457SAndroid Build Coastguard Worker     };
7554*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.f32_to_f16 = (struct vunary_parameters) {
7555*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_f16_vcvt_ukernel__scalar_fabsf_x2,
7556*4bdc9457SAndroid Build Coastguard Worker       .init.f32_f16_cvt = xnn_init_f32_f16_cvt_scalar_fabsf_params,
7557*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 2,
7558*4bdc9457SAndroid Build Coastguard Worker     };
7559*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.f32_to_qs8 = (struct vunary_parameters) {
7560*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qs8_vcvt_ukernel__scalar_lrintf_x4,
7561*4bdc9457SAndroid Build Coastguard Worker       .init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_scalar_lrintf_params,
7562*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
7563*4bdc9457SAndroid Build Coastguard Worker     };
7564*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.f32_to_qu8 = (struct vunary_parameters) {
7565*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_f32_qu8_vcvt_ukernel__scalar_lrintf_x4,
7566*4bdc9457SAndroid Build Coastguard Worker       .init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_scalar_lrintf_params,
7567*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
7568*4bdc9457SAndroid Build Coastguard Worker     };
7569*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.qs8 = (struct vunary_parameters) {
7570*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_vcvt_ukernel__scalar_x4,
7571*4bdc9457SAndroid Build Coastguard Worker       .init.qs8_cvt = xnn_init_qs8_cvt_scalar_params,
7572*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
7573*4bdc9457SAndroid Build Coastguard Worker     };
7574*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.qs8_to_f32 = (struct vunary_parameters) {
7575*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_qs8_f32_vcvt_ukernel__scalar_x4,
7576*4bdc9457SAndroid Build Coastguard Worker       .init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_scalar_params,
7577*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
7578*4bdc9457SAndroid Build Coastguard Worker     };
7579*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.qu8 = (struct vunary_parameters) {
7580*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_vcvt_ukernel__scalar_x4,
7581*4bdc9457SAndroid Build Coastguard Worker       .init.qu8_cvt = xnn_init_qu8_cvt_scalar_params,
7582*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
7583*4bdc9457SAndroid Build Coastguard Worker     };
7584*4bdc9457SAndroid Build Coastguard Worker     xnn_params.vcvt.qu8_to_f32 = (struct vunary_parameters) {
7585*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_vunary_ukernel_function) xnn_qu8_f32_vcvt_ukernel__scalar_x4,
7586*4bdc9457SAndroid Build Coastguard Worker       .init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_scalar_params,
7587*4bdc9457SAndroid Build Coastguard Worker       .element_tile = 4,
7588*4bdc9457SAndroid Build Coastguard Worker     };
7589*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_VCVT_OPERATORS
7590*4bdc9457SAndroid Build Coastguard Worker 
7591*4bdc9457SAndroid Build Coastguard Worker   /************************** X32 RISC-V micro-kernels **************************/
7592*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_X32_OPERATORS
7593*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_X32;
7594*4bdc9457SAndroid Build Coastguard Worker 
7595*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__scalar;
7596*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x32.zip = (struct zip_parameters) {
7597*4bdc9457SAndroid Build Coastguard Worker       .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__scalar,
7598*4bdc9457SAndroid Build Coastguard Worker       .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__scalar,
7599*4bdc9457SAndroid Build Coastguard Worker       .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__scalar,
7600*4bdc9457SAndroid Build Coastguard Worker       .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__scalar,
7601*4bdc9457SAndroid Build Coastguard Worker     };
7602*4bdc9457SAndroid Build Coastguard Worker 
7603*4bdc9457SAndroid Build Coastguard Worker     xnn_params.x32.transpose = (struct transpose_parameters) {
7604*4bdc9457SAndroid Build Coastguard Worker       .const_size_ukernel = (xnn_transposec_ukernel_function) xnn_x32_transposec_ukernel__2x4_scalar_int,
7605*4bdc9457SAndroid Build Coastguard Worker       .tile_size = 32,
7606*4bdc9457SAndroid Build Coastguard Worker     };
7607*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_X32_OPERATORS
7608*4bdc9457SAndroid Build Coastguard Worker 
7609*4bdc9457SAndroid Build Coastguard Worker   /************************** XX RISC-V micro-kernels ***************************/
7610*4bdc9457SAndroid Build Coastguard Worker   #ifndef XNN_NO_XX_OPERATORS
7611*4bdc9457SAndroid Build Coastguard Worker     init_flags |= XNN_INIT_FLAG_XX;
7612*4bdc9457SAndroid Build Coastguard Worker 
7613*4bdc9457SAndroid Build Coastguard Worker     xnn_params.xx.copy = (xnn_vunary_ukernel_function) xnn_xx_copy_ukernel__memcpy;
7614*4bdc9457SAndroid Build Coastguard Worker     xnn_params.xx.fill = (struct fill_parameters) {
7615*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_fill_ukernel_function) xnn_xx_fill_ukernel__scalar_x16,
7616*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 1,
7617*4bdc9457SAndroid Build Coastguard Worker     };
7618*4bdc9457SAndroid Build Coastguard Worker     xnn_params.xx.pad = (struct pad_parameters) {
7619*4bdc9457SAndroid Build Coastguard Worker       .ukernel = (xnn_pad_ukernel_function) xnn_xx_pad_ukernel__scalar,
7620*4bdc9457SAndroid Build Coastguard Worker       .row_tile = 1,
7621*4bdc9457SAndroid Build Coastguard Worker     };
7622*4bdc9457SAndroid Build Coastguard Worker     xnn_params.xx.transpose = (struct transpose_parameters) {
7623*4bdc9457SAndroid Build Coastguard Worker       .variable_size_ukernel = xnn_xx_transposev_ukernel__1x1_memcpy,
7624*4bdc9457SAndroid Build Coastguard Worker       .tile_size = 32,
7625*4bdc9457SAndroid Build Coastguard Worker     };
7626*4bdc9457SAndroid Build Coastguard Worker   #endif  // XNN_NO_XX_OPERATORS
7627*4bdc9457SAndroid Build Coastguard Worker 
7628*4bdc9457SAndroid Build Coastguard Worker #else
7629*4bdc9457SAndroid Build Coastguard Worker   #error "Unsupported architecture"
7630*4bdc9457SAndroid Build Coastguard Worker #endif
7631*4bdc9457SAndroid Build Coastguard Worker 
7632*4bdc9457SAndroid Build Coastguard Worker   // Get page size.
7633*4bdc9457SAndroid Build Coastguard Worker   #if XNN_PLATFORM_WINDOWS
7634*4bdc9457SAndroid Build Coastguard Worker     SYSTEM_INFO sysinfo;
7635*4bdc9457SAndroid Build Coastguard Worker     GetSystemInfo(&sysinfo);
7636*4bdc9457SAndroid Build Coastguard Worker     xnn_params.page_size = sysinfo.dwPageSize;
7637*4bdc9457SAndroid Build Coastguard Worker   #else
7638*4bdc9457SAndroid Build Coastguard Worker     const long res = sysconf(_SC_PAGESIZE);
7639*4bdc9457SAndroid Build Coastguard Worker     if (res == -1) {
7640*4bdc9457SAndroid Build Coastguard Worker       xnn_log_error("failed to get page size, error code: %d", errno);
7641*4bdc9457SAndroid Build Coastguard Worker       return;
7642*4bdc9457SAndroid Build Coastguard Worker     }
7643*4bdc9457SAndroid Build Coastguard Worker     xnn_params.page_size = res;
7644*4bdc9457SAndroid Build Coastguard Worker   #endif
7645*4bdc9457SAndroid Build Coastguard Worker 
7646*4bdc9457SAndroid Build Coastguard Worker   memcpy(&xnn_params.allocator, init_allocator, sizeof(struct xnn_allocator));
7647*4bdc9457SAndroid Build Coastguard Worker   xnn_params.init_flags = init_flags;
7648*4bdc9457SAndroid Build Coastguard Worker }
7649*4bdc9457SAndroid Build Coastguard Worker 
7650*4bdc9457SAndroid Build Coastguard Worker #if XNN_PLATFORM_WINDOWS
init_windows(PINIT_ONCE init_once,PVOID parameter,PVOID * context)7651*4bdc9457SAndroid Build Coastguard Worker   static BOOL CALLBACK init_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
7652*4bdc9457SAndroid Build Coastguard Worker     init();
7653*4bdc9457SAndroid Build Coastguard Worker     return TRUE;
7654*4bdc9457SAndroid Build Coastguard Worker   }
7655*4bdc9457SAndroid Build Coastguard Worker #endif
7656*4bdc9457SAndroid Build Coastguard Worker 
xnn_initialize(const struct xnn_allocator * allocator)7657*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_initialize(const struct xnn_allocator* allocator) {
7658*4bdc9457SAndroid Build Coastguard Worker   #if !XNN_PLATFORM_WEB && !XNN_ARCH_RISCV
7659*4bdc9457SAndroid Build Coastguard Worker     if (!cpuinfo_initialize()) {
7660*4bdc9457SAndroid Build Coastguard Worker       return xnn_status_out_of_memory;
7661*4bdc9457SAndroid Build Coastguard Worker     }
7662*4bdc9457SAndroid Build Coastguard Worker   #endif  // !XNN_PLATFORM_WEB && !XNN_ARCH_RISCV
7663*4bdc9457SAndroid Build Coastguard Worker   if (allocator == NULL) {
7664*4bdc9457SAndroid Build Coastguard Worker     allocator = &xnn_default_allocator;
7665*4bdc9457SAndroid Build Coastguard Worker   }
7666*4bdc9457SAndroid Build Coastguard Worker   #ifdef _MSC_VER
7667*4bdc9457SAndroid Build Coastguard Worker     _InterlockedCompareExchangePointer((PVOID volatile*) &init_allocator, (PVOID) allocator, NULL);
7668*4bdc9457SAndroid Build Coastguard Worker   #else
7669*4bdc9457SAndroid Build Coastguard Worker     __sync_bool_compare_and_swap(&init_allocator, NULL, allocator);
7670*4bdc9457SAndroid Build Coastguard Worker   #endif
7671*4bdc9457SAndroid Build Coastguard Worker   #if XNN_PLATFORM_WINDOWS
7672*4bdc9457SAndroid Build Coastguard Worker     InitOnceExecuteOnce(&init_guard, &init_windows, NULL, NULL);
7673*4bdc9457SAndroid Build Coastguard Worker   #else
7674*4bdc9457SAndroid Build Coastguard Worker     pthread_once(&init_guard, &init);
7675*4bdc9457SAndroid Build Coastguard Worker   #endif
7676*4bdc9457SAndroid Build Coastguard Worker   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) != 0) {
7677*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_success;
7678*4bdc9457SAndroid Build Coastguard Worker   } else {
7679*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_unsupported_hardware;
7680*4bdc9457SAndroid Build Coastguard Worker   }
7681*4bdc9457SAndroid Build Coastguard Worker }
7682*4bdc9457SAndroid Build Coastguard Worker 
xnn_deinitialize(void)7683*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_deinitialize(void) {
7684*4bdc9457SAndroid Build Coastguard Worker   #if !XNN_PLATFORM_WEB && !XNN_ARCH_RISCV
7685*4bdc9457SAndroid Build Coastguard Worker     cpuinfo_deinitialize();
7686*4bdc9457SAndroid Build Coastguard Worker   #endif  // !XNN_PLATFORM_WEB && !XNN_ARCH_RISCV
7687*4bdc9457SAndroid Build Coastguard Worker   return xnn_status_success;
7688*4bdc9457SAndroid Build Coastguard Worker }
7689