1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/f32-gemminc-minmax.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25
26
27 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_8)28 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8) {
29 TEST_REQUIRES_ARM_NEON_FMA;
30 GemmMicrokernelTester()
31 .mr(1)
32 .nr(8)
33 .kr(1)
34 .sr(1)
35 .m(1)
36 .n(8)
37 .k(8)
38 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
39 }
40
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,strided_cn)41 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, strided_cn) {
42 TEST_REQUIRES_ARM_NEON_FMA;
43 GemmMicrokernelTester()
44 .mr(1)
45 .nr(8)
46 .kr(1)
47 .sr(1)
48 .m(1)
49 .n(8)
50 .k(8)
51 .cn_stride(11)
52 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
53 }
54
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_8_strided_a)55 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_strided_a) {
56 TEST_REQUIRES_ARM_NEON_FMA;
57 GemmMicrokernelTester()
58 .mr(1)
59 .nr(8)
60 .kr(1)
61 .sr(1)
62 .m(1)
63 .n(8)
64 .k(8)
65 .a_stride(11)
66 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
67 }
68
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_8_subtile)69 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile) {
70 TEST_REQUIRES_ARM_NEON_FMA;
71 for (uint32_t n = 1; n <= 8; n++) {
72 for (uint32_t m = 1; m <= 1; m++) {
73 GemmMicrokernelTester()
74 .mr(1)
75 .nr(8)
76 .kr(1)
77 .sr(1)
78 .m(m)
79 .n(n)
80 .k(8)
81 .iterations(1)
82 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
83 }
84 }
85 }
86
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_8_subtile_m)87 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile_m) {
88 TEST_REQUIRES_ARM_NEON_FMA;
89 for (uint32_t m = 1; m <= 1; m++) {
90 GemmMicrokernelTester()
91 .mr(1)
92 .nr(8)
93 .kr(1)
94 .sr(1)
95 .m(m)
96 .n(8)
97 .k(8)
98 .iterations(1)
99 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
100 }
101 }
102
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_8_subtile_n)103 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile_n) {
104 TEST_REQUIRES_ARM_NEON_FMA;
105 for (uint32_t n = 1; n <= 8; n++) {
106 GemmMicrokernelTester()
107 .mr(1)
108 .nr(8)
109 .kr(1)
110 .sr(1)
111 .m(1)
112 .n(n)
113 .k(8)
114 .iterations(1)
115 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
116 }
117 }
118
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_16)119 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_16) {
120 TEST_REQUIRES_ARM_NEON_FMA;
121 GemmMicrokernelTester()
122 .mr(1)
123 .nr(8)
124 .kr(1)
125 .sr(1)
126 .m(1)
127 .n(8)
128 .k(16)
129 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
130 }
131
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_16_strided_a)132 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_16_strided_a) {
133 TEST_REQUIRES_ARM_NEON_FMA;
134 GemmMicrokernelTester()
135 .mr(1)
136 .nr(8)
137 .kr(1)
138 .sr(1)
139 .m(1)
140 .n(8)
141 .k(16)
142 .a_stride(19)
143 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
144 }
145
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_16_subtile)146 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_16_subtile) {
147 TEST_REQUIRES_ARM_NEON_FMA;
148 for (uint32_t n = 1; n <= 8; n++) {
149 for (uint32_t m = 1; m <= 1; m++) {
150 GemmMicrokernelTester()
151 .mr(1)
152 .nr(8)
153 .kr(1)
154 .sr(1)
155 .m(m)
156 .n(n)
157 .k(16)
158 .iterations(1)
159 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
160 }
161 }
162 }
163
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_lt_16)164 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_16) {
165 TEST_REQUIRES_ARM_NEON_FMA;
166 for (size_t k = 1; k < 16; k++) {
167 GemmMicrokernelTester()
168 .mr(1)
169 .nr(8)
170 .kr(1)
171 .sr(1)
172 .m(1)
173 .n(8)
174 .k(k)
175 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
176 }
177 }
178
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_lt_16_strided_a)179 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_16_strided_a) {
180 TEST_REQUIRES_ARM_NEON_FMA;
181 for (size_t k = 1; k < 16; k++) {
182 GemmMicrokernelTester()
183 .mr(1)
184 .nr(8)
185 .kr(1)
186 .sr(1)
187 .m(1)
188 .n(8)
189 .k(k)
190 .a_stride(19)
191 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
192 }
193 }
194
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_lt_16_subtile)195 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_16_subtile) {
196 TEST_REQUIRES_ARM_NEON_FMA;
197 for (size_t k = 1; k < 16; k++) {
198 for (uint32_t n = 1; n <= 8; n++) {
199 for (uint32_t m = 1; m <= 1; m++) {
200 GemmMicrokernelTester()
201 .mr(1)
202 .nr(8)
203 .kr(1)
204 .sr(1)
205 .m(m)
206 .n(n)
207 .k(k)
208 .iterations(1)
209 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
210 }
211 }
212 }
213 }
214
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_gt_16)215 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_16) {
216 TEST_REQUIRES_ARM_NEON_FMA;
217 for (size_t k = 17; k < 32; k++) {
218 GemmMicrokernelTester()
219 .mr(1)
220 .nr(8)
221 .kr(1)
222 .sr(1)
223 .m(1)
224 .n(8)
225 .k(k)
226 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
227 }
228 }
229
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_gt_16_strided_a)230 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_16_strided_a) {
231 TEST_REQUIRES_ARM_NEON_FMA;
232 for (size_t k = 17; k < 32; k++) {
233 GemmMicrokernelTester()
234 .mr(1)
235 .nr(8)
236 .kr(1)
237 .sr(1)
238 .m(1)
239 .n(8)
240 .k(k)
241 .a_stride(37)
242 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
243 }
244 }
245
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_gt_16_subtile)246 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_16_subtile) {
247 TEST_REQUIRES_ARM_NEON_FMA;
248 for (size_t k = 17; k < 32; k++) {
249 for (uint32_t n = 1; n <= 8; n++) {
250 for (uint32_t m = 1; m <= 1; m++) {
251 GemmMicrokernelTester()
252 .mr(1)
253 .nr(8)
254 .kr(1)
255 .sr(1)
256 .m(m)
257 .n(n)
258 .k(k)
259 .iterations(1)
260 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
261 }
262 }
263 }
264 }
265
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_div_8)266 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_div_8) {
267 TEST_REQUIRES_ARM_NEON_FMA;
268 for (size_t k = 24; k <= 80; k += 8) {
269 GemmMicrokernelTester()
270 .mr(1)
271 .nr(8)
272 .kr(1)
273 .sr(1)
274 .m(1)
275 .n(8)
276 .k(k)
277 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
278 }
279 }
280
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_div_8_strided_a)281 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_div_8_strided_a) {
282 TEST_REQUIRES_ARM_NEON_FMA;
283 for (size_t k = 24; k <= 80; k += 8) {
284 GemmMicrokernelTester()
285 .mr(1)
286 .nr(8)
287 .kr(1)
288 .sr(1)
289 .m(1)
290 .n(8)
291 .k(k)
292 .a_stride(83)
293 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
294 }
295 }
296
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,k_div_8_subtile)297 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_div_8_subtile) {
298 TEST_REQUIRES_ARM_NEON_FMA;
299 for (size_t k = 24; k <= 80; k += 8) {
300 for (uint32_t n = 1; n <= 8; n++) {
301 for (uint32_t m = 1; m <= 1; m++) {
302 GemmMicrokernelTester()
303 .mr(1)
304 .nr(8)
305 .kr(1)
306 .sr(1)
307 .m(m)
308 .n(n)
309 .k(k)
310 .iterations(1)
311 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
312 }
313 }
314 }
315 }
316
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,n_gt_8)317 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8) {
318 TEST_REQUIRES_ARM_NEON_FMA;
319 for (uint32_t n = 9; n < 16; n++) {
320 for (size_t k = 1; k <= 40; k += 9) {
321 GemmMicrokernelTester()
322 .mr(1)
323 .nr(8)
324 .kr(1)
325 .sr(1)
326 .m(1)
327 .n(n)
328 .k(k)
329 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
330 }
331 }
332 }
333
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,n_gt_8_strided_cn)334 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_strided_cn) {
335 TEST_REQUIRES_ARM_NEON_FMA;
336 for (uint32_t n = 9; n < 16; n++) {
337 for (size_t k = 1; k <= 40; k += 9) {
338 GemmMicrokernelTester()
339 .mr(1)
340 .nr(8)
341 .kr(1)
342 .sr(1)
343 .m(1)
344 .n(n)
345 .k(k)
346 .cn_stride(11)
347 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
348 }
349 }
350 }
351
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,n_gt_8_strided_a)352 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_strided_a) {
353 TEST_REQUIRES_ARM_NEON_FMA;
354 for (uint32_t n = 9; n < 16; n++) {
355 for (size_t k = 1; k <= 40; k += 9) {
356 GemmMicrokernelTester()
357 .mr(1)
358 .nr(8)
359 .kr(1)
360 .sr(1)
361 .m(1)
362 .n(n)
363 .k(k)
364 .a_stride(43)
365 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
366 }
367 }
368 }
369
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,n_gt_8_subtile)370 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_subtile) {
371 TEST_REQUIRES_ARM_NEON_FMA;
372 for (uint32_t n = 9; n < 16; n++) {
373 for (size_t k = 1; k <= 40; k += 9) {
374 for (uint32_t m = 1; m <= 1; m++) {
375 GemmMicrokernelTester()
376 .mr(1)
377 .nr(8)
378 .kr(1)
379 .sr(1)
380 .m(m)
381 .n(n)
382 .k(k)
383 .iterations(1)
384 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
385 }
386 }
387 }
388 }
389
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,n_div_8)390 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8) {
391 TEST_REQUIRES_ARM_NEON_FMA;
392 for (uint32_t n = 16; n <= 24; n += 8) {
393 for (size_t k = 1; k <= 40; k += 9) {
394 GemmMicrokernelTester()
395 .mr(1)
396 .nr(8)
397 .kr(1)
398 .sr(1)
399 .m(1)
400 .n(n)
401 .k(k)
402 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
403 }
404 }
405 }
406
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,n_div_8_strided_cn)407 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_strided_cn) {
408 TEST_REQUIRES_ARM_NEON_FMA;
409 for (uint32_t n = 16; n <= 24; n += 8) {
410 for (size_t k = 1; k <= 40; k += 9) {
411 GemmMicrokernelTester()
412 .mr(1)
413 .nr(8)
414 .kr(1)
415 .sr(1)
416 .m(1)
417 .n(n)
418 .k(k)
419 .cn_stride(11)
420 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
421 }
422 }
423 }
424
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,n_div_8_strided_a)425 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_strided_a) {
426 TEST_REQUIRES_ARM_NEON_FMA;
427 for (uint32_t n = 16; n <= 24; n += 8) {
428 for (size_t k = 1; k <= 40; k += 9) {
429 GemmMicrokernelTester()
430 .mr(1)
431 .nr(8)
432 .kr(1)
433 .sr(1)
434 .m(1)
435 .n(n)
436 .k(k)
437 .a_stride(43)
438 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
439 }
440 }
441 }
442
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,n_div_8_subtile)443 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_subtile) {
444 TEST_REQUIRES_ARM_NEON_FMA;
445 for (uint32_t n = 16; n <= 24; n += 8) {
446 for (size_t k = 1; k <= 40; k += 9) {
447 for (uint32_t m = 1; m <= 1; m++) {
448 GemmMicrokernelTester()
449 .mr(1)
450 .nr(8)
451 .kr(1)
452 .sr(1)
453 .m(m)
454 .n(n)
455 .k(k)
456 .iterations(1)
457 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
458 }
459 }
460 }
461 }
462
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,strided_cm_subtile)463 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm_subtile) {
464 TEST_REQUIRES_ARM_NEON_FMA;
465 for (size_t k = 1; k <= 40; k += 9) {
466 for (uint32_t n = 1; n <= 8; n++) {
467 for (uint32_t m = 1; m <= 1; m++) {
468 GemmMicrokernelTester()
469 .mr(1)
470 .nr(8)
471 .kr(1)
472 .sr(1)
473 .m(m)
474 .n(n)
475 .k(k)
476 .cm_stride(11)
477 .iterations(1)
478 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
479 }
480 }
481 }
482 }
483
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,qmin)484 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, qmin) {
485 TEST_REQUIRES_ARM_NEON_FMA;
486 GemmMicrokernelTester()
487 .mr(1)
488 .nr(8)
489 .kr(1)
490 .sr(1)
491 .m(1)
492 .n(8)
493 .k(8)
494 .qmin(128)
495 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
496 }
497
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,qmax)498 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, qmax) {
499 TEST_REQUIRES_ARM_NEON_FMA;
500 GemmMicrokernelTester()
501 .mr(1)
502 .nr(8)
503 .kr(1)
504 .sr(1)
505 .m(1)
506 .n(8)
507 .k(8)
508 .qmax(128)
509 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
510 }
511
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53,strided_cm)512 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm) {
513 TEST_REQUIRES_ARM_NEON_FMA;
514 GemmMicrokernelTester()
515 .mr(1)
516 .nr(8)
517 .kr(1)
518 .sr(1)
519 .m(1)
520 .n(8)
521 .k(8)
522 .cm_stride(11)
523 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
524 }
525 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
526
527
528 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8)529 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
530 TEST_REQUIRES_ARM_NEON_FMA;
531 GemmMicrokernelTester()
532 .mr(1)
533 .nr(8)
534 .kr(1)
535 .sr(1)
536 .m(1)
537 .n(8)
538 .k(8)
539 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
540 }
541
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cn)542 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
543 TEST_REQUIRES_ARM_NEON_FMA;
544 GemmMicrokernelTester()
545 .mr(1)
546 .nr(8)
547 .kr(1)
548 .sr(1)
549 .m(1)
550 .n(8)
551 .k(8)
552 .cn_stride(11)
553 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
554 }
555
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_strided_a)556 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_strided_a) {
557 TEST_REQUIRES_ARM_NEON_FMA;
558 GemmMicrokernelTester()
559 .mr(1)
560 .nr(8)
561 .kr(1)
562 .sr(1)
563 .m(1)
564 .n(8)
565 .k(8)
566 .a_stride(11)
567 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
568 }
569
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile)570 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
571 TEST_REQUIRES_ARM_NEON_FMA;
572 for (uint32_t n = 1; n <= 8; n++) {
573 for (uint32_t m = 1; m <= 1; m++) {
574 GemmMicrokernelTester()
575 .mr(1)
576 .nr(8)
577 .kr(1)
578 .sr(1)
579 .m(m)
580 .n(n)
581 .k(8)
582 .iterations(1)
583 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
584 }
585 }
586 }
587
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_m)588 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
589 TEST_REQUIRES_ARM_NEON_FMA;
590 for (uint32_t m = 1; m <= 1; m++) {
591 GemmMicrokernelTester()
592 .mr(1)
593 .nr(8)
594 .kr(1)
595 .sr(1)
596 .m(m)
597 .n(8)
598 .k(8)
599 .iterations(1)
600 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
601 }
602 }
603
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_n)604 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
605 TEST_REQUIRES_ARM_NEON_FMA;
606 for (uint32_t n = 1; n <= 8; n++) {
607 GemmMicrokernelTester()
608 .mr(1)
609 .nr(8)
610 .kr(1)
611 .sr(1)
612 .m(1)
613 .n(n)
614 .k(8)
615 .iterations(1)
616 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
617 }
618 }
619
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16)620 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
621 TEST_REQUIRES_ARM_NEON_FMA;
622 GemmMicrokernelTester()
623 .mr(1)
624 .nr(8)
625 .kr(1)
626 .sr(1)
627 .m(1)
628 .n(8)
629 .k(16)
630 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
631 }
632
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16_strided_a)633 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_strided_a) {
634 TEST_REQUIRES_ARM_NEON_FMA;
635 GemmMicrokernelTester()
636 .mr(1)
637 .nr(8)
638 .kr(1)
639 .sr(1)
640 .m(1)
641 .n(8)
642 .k(16)
643 .a_stride(19)
644 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
645 }
646
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16_subtile)647 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
648 TEST_REQUIRES_ARM_NEON_FMA;
649 for (uint32_t n = 1; n <= 8; n++) {
650 for (uint32_t m = 1; m <= 1; m++) {
651 GemmMicrokernelTester()
652 .mr(1)
653 .nr(8)
654 .kr(1)
655 .sr(1)
656 .m(m)
657 .n(n)
658 .k(16)
659 .iterations(1)
660 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
661 }
662 }
663 }
664
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16)665 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
666 TEST_REQUIRES_ARM_NEON_FMA;
667 for (size_t k = 1; k < 16; k++) {
668 GemmMicrokernelTester()
669 .mr(1)
670 .nr(8)
671 .kr(1)
672 .sr(1)
673 .m(1)
674 .n(8)
675 .k(k)
676 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
677 }
678 }
679
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16_strided_a)680 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_strided_a) {
681 TEST_REQUIRES_ARM_NEON_FMA;
682 for (size_t k = 1; k < 16; k++) {
683 GemmMicrokernelTester()
684 .mr(1)
685 .nr(8)
686 .kr(1)
687 .sr(1)
688 .m(1)
689 .n(8)
690 .k(k)
691 .a_stride(19)
692 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
693 }
694 }
695
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16_subtile)696 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
697 TEST_REQUIRES_ARM_NEON_FMA;
698 for (size_t k = 1; k < 16; k++) {
699 for (uint32_t n = 1; n <= 8; n++) {
700 for (uint32_t m = 1; m <= 1; m++) {
701 GemmMicrokernelTester()
702 .mr(1)
703 .nr(8)
704 .kr(1)
705 .sr(1)
706 .m(m)
707 .n(n)
708 .k(k)
709 .iterations(1)
710 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
711 }
712 }
713 }
714 }
715
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16)716 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
717 TEST_REQUIRES_ARM_NEON_FMA;
718 for (size_t k = 17; k < 32; k++) {
719 GemmMicrokernelTester()
720 .mr(1)
721 .nr(8)
722 .kr(1)
723 .sr(1)
724 .m(1)
725 .n(8)
726 .k(k)
727 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
728 }
729 }
730
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16_strided_a)731 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_strided_a) {
732 TEST_REQUIRES_ARM_NEON_FMA;
733 for (size_t k = 17; k < 32; k++) {
734 GemmMicrokernelTester()
735 .mr(1)
736 .nr(8)
737 .kr(1)
738 .sr(1)
739 .m(1)
740 .n(8)
741 .k(k)
742 .a_stride(37)
743 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
744 }
745 }
746
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16_subtile)747 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
748 TEST_REQUIRES_ARM_NEON_FMA;
749 for (size_t k = 17; k < 32; k++) {
750 for (uint32_t n = 1; n <= 8; n++) {
751 for (uint32_t m = 1; m <= 1; m++) {
752 GemmMicrokernelTester()
753 .mr(1)
754 .nr(8)
755 .kr(1)
756 .sr(1)
757 .m(m)
758 .n(n)
759 .k(k)
760 .iterations(1)
761 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
762 }
763 }
764 }
765 }
766
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8)767 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
768 TEST_REQUIRES_ARM_NEON_FMA;
769 for (size_t k = 24; k <= 80; k += 8) {
770 GemmMicrokernelTester()
771 .mr(1)
772 .nr(8)
773 .kr(1)
774 .sr(1)
775 .m(1)
776 .n(8)
777 .k(k)
778 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
779 }
780 }
781
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8_strided_a)782 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_strided_a) {
783 TEST_REQUIRES_ARM_NEON_FMA;
784 for (size_t k = 24; k <= 80; k += 8) {
785 GemmMicrokernelTester()
786 .mr(1)
787 .nr(8)
788 .kr(1)
789 .sr(1)
790 .m(1)
791 .n(8)
792 .k(k)
793 .a_stride(83)
794 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
795 }
796 }
797
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8_subtile)798 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
799 TEST_REQUIRES_ARM_NEON_FMA;
800 for (size_t k = 24; k <= 80; k += 8) {
801 for (uint32_t n = 1; n <= 8; n++) {
802 for (uint32_t m = 1; m <= 1; m++) {
803 GemmMicrokernelTester()
804 .mr(1)
805 .nr(8)
806 .kr(1)
807 .sr(1)
808 .m(m)
809 .n(n)
810 .k(k)
811 .iterations(1)
812 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
813 }
814 }
815 }
816 }
817
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8)818 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8) {
819 TEST_REQUIRES_ARM_NEON_FMA;
820 for (uint32_t n = 9; n < 16; n++) {
821 for (size_t k = 1; k <= 40; k += 9) {
822 GemmMicrokernelTester()
823 .mr(1)
824 .nr(8)
825 .kr(1)
826 .sr(1)
827 .m(1)
828 .n(n)
829 .k(k)
830 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
831 }
832 }
833 }
834
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_strided_cn)835 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
836 TEST_REQUIRES_ARM_NEON_FMA;
837 for (uint32_t n = 9; n < 16; n++) {
838 for (size_t k = 1; k <= 40; k += 9) {
839 GemmMicrokernelTester()
840 .mr(1)
841 .nr(8)
842 .kr(1)
843 .sr(1)
844 .m(1)
845 .n(n)
846 .k(k)
847 .cn_stride(11)
848 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
849 }
850 }
851 }
852
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_strided_a)853 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_a) {
854 TEST_REQUIRES_ARM_NEON_FMA;
855 for (uint32_t n = 9; n < 16; n++) {
856 for (size_t k = 1; k <= 40; k += 9) {
857 GemmMicrokernelTester()
858 .mr(1)
859 .nr(8)
860 .kr(1)
861 .sr(1)
862 .m(1)
863 .n(n)
864 .k(k)
865 .a_stride(43)
866 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
867 }
868 }
869 }
870
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_subtile)871 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_subtile) {
872 TEST_REQUIRES_ARM_NEON_FMA;
873 for (uint32_t n = 9; n < 16; n++) {
874 for (size_t k = 1; k <= 40; k += 9) {
875 for (uint32_t m = 1; m <= 1; m++) {
876 GemmMicrokernelTester()
877 .mr(1)
878 .nr(8)
879 .kr(1)
880 .sr(1)
881 .m(m)
882 .n(n)
883 .k(k)
884 .iterations(1)
885 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
886 }
887 }
888 }
889 }
890
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8)891 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8) {
892 TEST_REQUIRES_ARM_NEON_FMA;
893 for (uint32_t n = 16; n <= 24; n += 8) {
894 for (size_t k = 1; k <= 40; k += 9) {
895 GemmMicrokernelTester()
896 .mr(1)
897 .nr(8)
898 .kr(1)
899 .sr(1)
900 .m(1)
901 .n(n)
902 .k(k)
903 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
904 }
905 }
906 }
907
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_strided_cn)908 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_cn) {
909 TEST_REQUIRES_ARM_NEON_FMA;
910 for (uint32_t n = 16; n <= 24; n += 8) {
911 for (size_t k = 1; k <= 40; k += 9) {
912 GemmMicrokernelTester()
913 .mr(1)
914 .nr(8)
915 .kr(1)
916 .sr(1)
917 .m(1)
918 .n(n)
919 .k(k)
920 .cn_stride(11)
921 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
922 }
923 }
924 }
925
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_strided_a)926 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_a) {
927 TEST_REQUIRES_ARM_NEON_FMA;
928 for (uint32_t n = 16; n <= 24; n += 8) {
929 for (size_t k = 1; k <= 40; k += 9) {
930 GemmMicrokernelTester()
931 .mr(1)
932 .nr(8)
933 .kr(1)
934 .sr(1)
935 .m(1)
936 .n(n)
937 .k(k)
938 .a_stride(43)
939 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
940 }
941 }
942 }
943
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_subtile)944 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_subtile) {
945 TEST_REQUIRES_ARM_NEON_FMA;
946 for (uint32_t n = 16; n <= 24; n += 8) {
947 for (size_t k = 1; k <= 40; k += 9) {
948 for (uint32_t m = 1; m <= 1; m++) {
949 GemmMicrokernelTester()
950 .mr(1)
951 .nr(8)
952 .kr(1)
953 .sr(1)
954 .m(m)
955 .n(n)
956 .k(k)
957 .iterations(1)
958 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
959 }
960 }
961 }
962 }
963
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cm_subtile)964 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
965 TEST_REQUIRES_ARM_NEON_FMA;
966 for (size_t k = 1; k <= 40; k += 9) {
967 for (uint32_t n = 1; n <= 8; n++) {
968 for (uint32_t m = 1; m <= 1; m++) {
969 GemmMicrokernelTester()
970 .mr(1)
971 .nr(8)
972 .kr(1)
973 .sr(1)
974 .m(m)
975 .n(n)
976 .k(k)
977 .cm_stride(11)
978 .iterations(1)
979 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
980 }
981 }
982 }
983 }
984
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,qmin)985 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
986 TEST_REQUIRES_ARM_NEON_FMA;
987 GemmMicrokernelTester()
988 .mr(1)
989 .nr(8)
990 .kr(1)
991 .sr(1)
992 .m(1)
993 .n(8)
994 .k(8)
995 .qmin(128)
996 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
997 }
998
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,qmax)999 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
1000 TEST_REQUIRES_ARM_NEON_FMA;
1001 GemmMicrokernelTester()
1002 .mr(1)
1003 .nr(8)
1004 .kr(1)
1005 .sr(1)
1006 .m(1)
1007 .n(8)
1008 .k(8)
1009 .qmax(128)
1010 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1011 }
1012
TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cm)1013 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
1014 TEST_REQUIRES_ARM_NEON_FMA;
1015 GemmMicrokernelTester()
1016 .mr(1)
1017 .nr(8)
1018 .kr(1)
1019 .sr(1)
1020 .m(1)
1021 .n(8)
1022 .k(8)
1023 .cm_stride(11)
1024 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1025 }
1026 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
1027
1028
1029 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_4)1030 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4) {
1031 TEST_REQUIRES_ARM_NEON_FMA;
1032 GemmMicrokernelTester()
1033 .mr(1)
1034 .nr(12)
1035 .kr(1)
1036 .sr(1)
1037 .m(1)
1038 .n(12)
1039 .k(4)
1040 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1041 }
1042
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,strided_cn)1043 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, strided_cn) {
1044 TEST_REQUIRES_ARM_NEON_FMA;
1045 GemmMicrokernelTester()
1046 .mr(1)
1047 .nr(12)
1048 .kr(1)
1049 .sr(1)
1050 .m(1)
1051 .n(12)
1052 .k(4)
1053 .cn_stride(17)
1054 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1055 }
1056
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_strided_a)1057 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_strided_a) {
1058 TEST_REQUIRES_ARM_NEON_FMA;
1059 GemmMicrokernelTester()
1060 .mr(1)
1061 .nr(12)
1062 .kr(1)
1063 .sr(1)
1064 .m(1)
1065 .n(12)
1066 .k(4)
1067 .a_stride(7)
1068 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1069 }
1070
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_subtile)1071 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile) {
1072 TEST_REQUIRES_ARM_NEON_FMA;
1073 for (uint32_t n = 1; n <= 12; n++) {
1074 for (uint32_t m = 1; m <= 1; m++) {
1075 GemmMicrokernelTester()
1076 .mr(1)
1077 .nr(12)
1078 .kr(1)
1079 .sr(1)
1080 .m(m)
1081 .n(n)
1082 .k(4)
1083 .iterations(1)
1084 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1085 }
1086 }
1087 }
1088
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_subtile_m)1089 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_m) {
1090 TEST_REQUIRES_ARM_NEON_FMA;
1091 for (uint32_t m = 1; m <= 1; m++) {
1092 GemmMicrokernelTester()
1093 .mr(1)
1094 .nr(12)
1095 .kr(1)
1096 .sr(1)
1097 .m(m)
1098 .n(12)
1099 .k(4)
1100 .iterations(1)
1101 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1102 }
1103 }
1104
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_subtile_n)1105 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_n) {
1106 TEST_REQUIRES_ARM_NEON_FMA;
1107 for (uint32_t n = 1; n <= 12; n++) {
1108 GemmMicrokernelTester()
1109 .mr(1)
1110 .nr(12)
1111 .kr(1)
1112 .sr(1)
1113 .m(1)
1114 .n(n)
1115 .k(4)
1116 .iterations(1)
1117 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1118 }
1119 }
1120
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_8)1121 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_8) {
1122 TEST_REQUIRES_ARM_NEON_FMA;
1123 GemmMicrokernelTester()
1124 .mr(1)
1125 .nr(12)
1126 .kr(1)
1127 .sr(1)
1128 .m(1)
1129 .n(12)
1130 .k(8)
1131 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1132 }
1133
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_8_strided_a)1134 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_strided_a) {
1135 TEST_REQUIRES_ARM_NEON_FMA;
1136 GemmMicrokernelTester()
1137 .mr(1)
1138 .nr(12)
1139 .kr(1)
1140 .sr(1)
1141 .m(1)
1142 .n(12)
1143 .k(8)
1144 .a_stride(11)
1145 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1146 }
1147
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_8_subtile)1148 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile) {
1149 TEST_REQUIRES_ARM_NEON_FMA;
1150 for (uint32_t n = 1; n <= 12; n++) {
1151 for (uint32_t m = 1; m <= 1; m++) {
1152 GemmMicrokernelTester()
1153 .mr(1)
1154 .nr(12)
1155 .kr(1)
1156 .sr(1)
1157 .m(m)
1158 .n(n)
1159 .k(8)
1160 .iterations(1)
1161 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1162 }
1163 }
1164 }
1165
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_lt_8)1166 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_lt_8) {
1167 TEST_REQUIRES_ARM_NEON_FMA;
1168 for (size_t k = 1; k < 8; k++) {
1169 GemmMicrokernelTester()
1170 .mr(1)
1171 .nr(12)
1172 .kr(1)
1173 .sr(1)
1174 .m(1)
1175 .n(12)
1176 .k(k)
1177 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1178 }
1179 }
1180
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_lt_8_strided_a)1181 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_lt_8_strided_a) {
1182 TEST_REQUIRES_ARM_NEON_FMA;
1183 for (size_t k = 1; k < 8; k++) {
1184 GemmMicrokernelTester()
1185 .mr(1)
1186 .nr(12)
1187 .kr(1)
1188 .sr(1)
1189 .m(1)
1190 .n(12)
1191 .k(k)
1192 .a_stride(11)
1193 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1194 }
1195 }
1196
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_lt_8_subtile)1197 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_lt_8_subtile) {
1198 TEST_REQUIRES_ARM_NEON_FMA;
1199 for (size_t k = 1; k < 8; k++) {
1200 for (uint32_t n = 1; n <= 12; n++) {
1201 for (uint32_t m = 1; m <= 1; m++) {
1202 GemmMicrokernelTester()
1203 .mr(1)
1204 .nr(12)
1205 .kr(1)
1206 .sr(1)
1207 .m(m)
1208 .n(n)
1209 .k(k)
1210 .iterations(1)
1211 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1212 }
1213 }
1214 }
1215 }
1216
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_gt_8)1217 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_gt_8) {
1218 TEST_REQUIRES_ARM_NEON_FMA;
1219 for (size_t k = 9; k < 16; k++) {
1220 GemmMicrokernelTester()
1221 .mr(1)
1222 .nr(12)
1223 .kr(1)
1224 .sr(1)
1225 .m(1)
1226 .n(12)
1227 .k(k)
1228 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1229 }
1230 }
1231
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_gt_8_strided_a)1232 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_gt_8_strided_a) {
1233 TEST_REQUIRES_ARM_NEON_FMA;
1234 for (size_t k = 9; k < 16; k++) {
1235 GemmMicrokernelTester()
1236 .mr(1)
1237 .nr(12)
1238 .kr(1)
1239 .sr(1)
1240 .m(1)
1241 .n(12)
1242 .k(k)
1243 .a_stride(19)
1244 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1245 }
1246 }
1247
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_gt_8_subtile)1248 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_gt_8_subtile) {
1249 TEST_REQUIRES_ARM_NEON_FMA;
1250 for (size_t k = 9; k < 16; k++) {
1251 for (uint32_t n = 1; n <= 12; n++) {
1252 for (uint32_t m = 1; m <= 1; m++) {
1253 GemmMicrokernelTester()
1254 .mr(1)
1255 .nr(12)
1256 .kr(1)
1257 .sr(1)
1258 .m(m)
1259 .n(n)
1260 .k(k)
1261 .iterations(1)
1262 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1263 }
1264 }
1265 }
1266 }
1267
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_div_4)1268 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_div_4) {
1269 TEST_REQUIRES_ARM_NEON_FMA;
1270 for (size_t k = 12; k <= 40; k += 4) {
1271 GemmMicrokernelTester()
1272 .mr(1)
1273 .nr(12)
1274 .kr(1)
1275 .sr(1)
1276 .m(1)
1277 .n(12)
1278 .k(k)
1279 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1280 }
1281 }
1282
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_div_4_strided_a)1283 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_div_4_strided_a) {
1284 TEST_REQUIRES_ARM_NEON_FMA;
1285 for (size_t k = 12; k <= 40; k += 4) {
1286 GemmMicrokernelTester()
1287 .mr(1)
1288 .nr(12)
1289 .kr(1)
1290 .sr(1)
1291 .m(1)
1292 .n(12)
1293 .k(k)
1294 .a_stride(43)
1295 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1296 }
1297 }
1298
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,k_div_4_subtile)1299 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_div_4_subtile) {
1300 TEST_REQUIRES_ARM_NEON_FMA;
1301 for (size_t k = 12; k <= 40; k += 4) {
1302 for (uint32_t n = 1; n <= 12; n++) {
1303 for (uint32_t m = 1; m <= 1; m++) {
1304 GemmMicrokernelTester()
1305 .mr(1)
1306 .nr(12)
1307 .kr(1)
1308 .sr(1)
1309 .m(m)
1310 .n(n)
1311 .k(k)
1312 .iterations(1)
1313 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1314 }
1315 }
1316 }
1317 }
1318
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,n_gt_12)1319 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12) {
1320 TEST_REQUIRES_ARM_NEON_FMA;
1321 for (uint32_t n = 13; n < 24; n++) {
1322 for (size_t k = 1; k <= 20; k += 5) {
1323 GemmMicrokernelTester()
1324 .mr(1)
1325 .nr(12)
1326 .kr(1)
1327 .sr(1)
1328 .m(1)
1329 .n(n)
1330 .k(k)
1331 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1332 }
1333 }
1334 }
1335
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,n_gt_12_strided_cn)1336 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12_strided_cn) {
1337 TEST_REQUIRES_ARM_NEON_FMA;
1338 for (uint32_t n = 13; n < 24; n++) {
1339 for (size_t k = 1; k <= 20; k += 5) {
1340 GemmMicrokernelTester()
1341 .mr(1)
1342 .nr(12)
1343 .kr(1)
1344 .sr(1)
1345 .m(1)
1346 .n(n)
1347 .k(k)
1348 .cn_stride(17)
1349 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1350 }
1351 }
1352 }
1353
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,n_gt_12_strided_a)1354 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12_strided_a) {
1355 TEST_REQUIRES_ARM_NEON_FMA;
1356 for (uint32_t n = 13; n < 24; n++) {
1357 for (size_t k = 1; k <= 20; k += 5) {
1358 GemmMicrokernelTester()
1359 .mr(1)
1360 .nr(12)
1361 .kr(1)
1362 .sr(1)
1363 .m(1)
1364 .n(n)
1365 .k(k)
1366 .a_stride(23)
1367 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1368 }
1369 }
1370 }
1371
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,n_gt_12_subtile)1372 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12_subtile) {
1373 TEST_REQUIRES_ARM_NEON_FMA;
1374 for (uint32_t n = 13; n < 24; n++) {
1375 for (size_t k = 1; k <= 20; k += 5) {
1376 for (uint32_t m = 1; m <= 1; m++) {
1377 GemmMicrokernelTester()
1378 .mr(1)
1379 .nr(12)
1380 .kr(1)
1381 .sr(1)
1382 .m(m)
1383 .n(n)
1384 .k(k)
1385 .iterations(1)
1386 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1387 }
1388 }
1389 }
1390 }
1391
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,n_div_12)1392 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12) {
1393 TEST_REQUIRES_ARM_NEON_FMA;
1394 for (uint32_t n = 24; n <= 36; n += 12) {
1395 for (size_t k = 1; k <= 20; k += 5) {
1396 GemmMicrokernelTester()
1397 .mr(1)
1398 .nr(12)
1399 .kr(1)
1400 .sr(1)
1401 .m(1)
1402 .n(n)
1403 .k(k)
1404 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1405 }
1406 }
1407 }
1408
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,n_div_12_strided_cn)1409 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12_strided_cn) {
1410 TEST_REQUIRES_ARM_NEON_FMA;
1411 for (uint32_t n = 24; n <= 36; n += 12) {
1412 for (size_t k = 1; k <= 20; k += 5) {
1413 GemmMicrokernelTester()
1414 .mr(1)
1415 .nr(12)
1416 .kr(1)
1417 .sr(1)
1418 .m(1)
1419 .n(n)
1420 .k(k)
1421 .cn_stride(17)
1422 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1423 }
1424 }
1425 }
1426
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,n_div_12_strided_a)1427 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12_strided_a) {
1428 TEST_REQUIRES_ARM_NEON_FMA;
1429 for (uint32_t n = 24; n <= 36; n += 12) {
1430 for (size_t k = 1; k <= 20; k += 5) {
1431 GemmMicrokernelTester()
1432 .mr(1)
1433 .nr(12)
1434 .kr(1)
1435 .sr(1)
1436 .m(1)
1437 .n(n)
1438 .k(k)
1439 .a_stride(23)
1440 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1441 }
1442 }
1443 }
1444
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,n_div_12_subtile)1445 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12_subtile) {
1446 TEST_REQUIRES_ARM_NEON_FMA;
1447 for (uint32_t n = 24; n <= 36; n += 12) {
1448 for (size_t k = 1; k <= 20; k += 5) {
1449 for (uint32_t m = 1; m <= 1; m++) {
1450 GemmMicrokernelTester()
1451 .mr(1)
1452 .nr(12)
1453 .kr(1)
1454 .sr(1)
1455 .m(m)
1456 .n(n)
1457 .k(k)
1458 .iterations(1)
1459 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1460 }
1461 }
1462 }
1463 }
1464
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,strided_cm_subtile)1465 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, strided_cm_subtile) {
1466 TEST_REQUIRES_ARM_NEON_FMA;
1467 for (size_t k = 1; k <= 20; k += 5) {
1468 for (uint32_t n = 1; n <= 12; n++) {
1469 for (uint32_t m = 1; m <= 1; m++) {
1470 GemmMicrokernelTester()
1471 .mr(1)
1472 .nr(12)
1473 .kr(1)
1474 .sr(1)
1475 .m(m)
1476 .n(n)
1477 .k(k)
1478 .cm_stride(17)
1479 .iterations(1)
1480 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1481 }
1482 }
1483 }
1484 }
1485
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,qmin)1486 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, qmin) {
1487 TEST_REQUIRES_ARM_NEON_FMA;
1488 GemmMicrokernelTester()
1489 .mr(1)
1490 .nr(12)
1491 .kr(1)
1492 .sr(1)
1493 .m(1)
1494 .n(12)
1495 .k(4)
1496 .qmin(128)
1497 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1498 }
1499
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,qmax)1500 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, qmax) {
1501 TEST_REQUIRES_ARM_NEON_FMA;
1502 GemmMicrokernelTester()
1503 .mr(1)
1504 .nr(12)
1505 .kr(1)
1506 .sr(1)
1507 .m(1)
1508 .n(12)
1509 .k(4)
1510 .qmax(128)
1511 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1512 }
1513
TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53,strided_cm)1514 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, strided_cm) {
1515 TEST_REQUIRES_ARM_NEON_FMA;
1516 GemmMicrokernelTester()
1517 .mr(1)
1518 .nr(12)
1519 .kr(1)
1520 .sr(1)
1521 .m(1)
1522 .n(12)
1523 .k(4)
1524 .cm_stride(17)
1525 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1526 }
1527 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
1528
1529
1530 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_4)1531 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4) {
1532 TEST_REQUIRES_ARM_NEON_FMA;
1533 GemmMicrokernelTester()
1534 .mr(4)
1535 .nr(8)
1536 .kr(1)
1537 .sr(1)
1538 .m(4)
1539 .n(8)
1540 .k(4)
1541 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1542 }
1543
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,strided_cn)1544 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, strided_cn) {
1545 TEST_REQUIRES_ARM_NEON_FMA;
1546 GemmMicrokernelTester()
1547 .mr(4)
1548 .nr(8)
1549 .kr(1)
1550 .sr(1)
1551 .m(4)
1552 .n(8)
1553 .k(4)
1554 .cn_stride(11)
1555 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1556 }
1557
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_4_strided_a)1558 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_strided_a) {
1559 TEST_REQUIRES_ARM_NEON_FMA;
1560 GemmMicrokernelTester()
1561 .mr(4)
1562 .nr(8)
1563 .kr(1)
1564 .sr(1)
1565 .m(4)
1566 .n(8)
1567 .k(4)
1568 .a_stride(7)
1569 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1570 }
1571
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_4_subtile)1572 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile) {
1573 TEST_REQUIRES_ARM_NEON_FMA;
1574 for (uint32_t n = 1; n <= 8; n++) {
1575 for (uint32_t m = 1; m <= 4; m++) {
1576 GemmMicrokernelTester()
1577 .mr(4)
1578 .nr(8)
1579 .kr(1)
1580 .sr(1)
1581 .m(m)
1582 .n(n)
1583 .k(4)
1584 .iterations(1)
1585 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1586 }
1587 }
1588 }
1589
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_4_subtile_m)1590 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile_m) {
1591 TEST_REQUIRES_ARM_NEON_FMA;
1592 for (uint32_t m = 1; m <= 4; m++) {
1593 GemmMicrokernelTester()
1594 .mr(4)
1595 .nr(8)
1596 .kr(1)
1597 .sr(1)
1598 .m(m)
1599 .n(8)
1600 .k(4)
1601 .iterations(1)
1602 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1603 }
1604 }
1605
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_4_subtile_n)1606 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile_n) {
1607 TEST_REQUIRES_ARM_NEON_FMA;
1608 for (uint32_t n = 1; n <= 8; n++) {
1609 GemmMicrokernelTester()
1610 .mr(4)
1611 .nr(8)
1612 .kr(1)
1613 .sr(1)
1614 .m(4)
1615 .n(n)
1616 .k(4)
1617 .iterations(1)
1618 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1619 }
1620 }
1621
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_8)1622 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8) {
1623 TEST_REQUIRES_ARM_NEON_FMA;
1624 GemmMicrokernelTester()
1625 .mr(4)
1626 .nr(8)
1627 .kr(1)
1628 .sr(1)
1629 .m(4)
1630 .n(8)
1631 .k(8)
1632 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1633 }
1634
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_8_strided_a)1635 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8_strided_a) {
1636 TEST_REQUIRES_ARM_NEON_FMA;
1637 GemmMicrokernelTester()
1638 .mr(4)
1639 .nr(8)
1640 .kr(1)
1641 .sr(1)
1642 .m(4)
1643 .n(8)
1644 .k(8)
1645 .a_stride(11)
1646 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1647 }
1648
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_8_subtile)1649 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8_subtile) {
1650 TEST_REQUIRES_ARM_NEON_FMA;
1651 for (uint32_t n = 1; n <= 8; n++) {
1652 for (uint32_t m = 1; m <= 4; m++) {
1653 GemmMicrokernelTester()
1654 .mr(4)
1655 .nr(8)
1656 .kr(1)
1657 .sr(1)
1658 .m(m)
1659 .n(n)
1660 .k(8)
1661 .iterations(1)
1662 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1663 }
1664 }
1665 }
1666
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_lt_8)1667 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8) {
1668 TEST_REQUIRES_ARM_NEON_FMA;
1669 for (size_t k = 1; k < 8; k++) {
1670 GemmMicrokernelTester()
1671 .mr(4)
1672 .nr(8)
1673 .kr(1)
1674 .sr(1)
1675 .m(4)
1676 .n(8)
1677 .k(k)
1678 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1679 }
1680 }
1681
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_lt_8_strided_a)1682 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8_strided_a) {
1683 TEST_REQUIRES_ARM_NEON_FMA;
1684 for (size_t k = 1; k < 8; k++) {
1685 GemmMicrokernelTester()
1686 .mr(4)
1687 .nr(8)
1688 .kr(1)
1689 .sr(1)
1690 .m(4)
1691 .n(8)
1692 .k(k)
1693 .a_stride(11)
1694 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1695 }
1696 }
1697
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_lt_8_subtile)1698 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8_subtile) {
1699 TEST_REQUIRES_ARM_NEON_FMA;
1700 for (size_t k = 1; k < 8; k++) {
1701 for (uint32_t n = 1; n <= 8; n++) {
1702 for (uint32_t m = 1; m <= 4; m++) {
1703 GemmMicrokernelTester()
1704 .mr(4)
1705 .nr(8)
1706 .kr(1)
1707 .sr(1)
1708 .m(m)
1709 .n(n)
1710 .k(k)
1711 .iterations(1)
1712 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1713 }
1714 }
1715 }
1716 }
1717
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_gt_8)1718 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8) {
1719 TEST_REQUIRES_ARM_NEON_FMA;
1720 for (size_t k = 9; k < 16; k++) {
1721 GemmMicrokernelTester()
1722 .mr(4)
1723 .nr(8)
1724 .kr(1)
1725 .sr(1)
1726 .m(4)
1727 .n(8)
1728 .k(k)
1729 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1730 }
1731 }
1732
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_gt_8_strided_a)1733 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8_strided_a) {
1734 TEST_REQUIRES_ARM_NEON_FMA;
1735 for (size_t k = 9; k < 16; k++) {
1736 GemmMicrokernelTester()
1737 .mr(4)
1738 .nr(8)
1739 .kr(1)
1740 .sr(1)
1741 .m(4)
1742 .n(8)
1743 .k(k)
1744 .a_stride(19)
1745 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1746 }
1747 }
1748
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_gt_8_subtile)1749 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8_subtile) {
1750 TEST_REQUIRES_ARM_NEON_FMA;
1751 for (size_t k = 9; k < 16; k++) {
1752 for (uint32_t n = 1; n <= 8; n++) {
1753 for (uint32_t m = 1; m <= 4; m++) {
1754 GemmMicrokernelTester()
1755 .mr(4)
1756 .nr(8)
1757 .kr(1)
1758 .sr(1)
1759 .m(m)
1760 .n(n)
1761 .k(k)
1762 .iterations(1)
1763 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1764 }
1765 }
1766 }
1767 }
1768
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_div_4)1769 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4) {
1770 TEST_REQUIRES_ARM_NEON_FMA;
1771 for (size_t k = 12; k <= 40; k += 4) {
1772 GemmMicrokernelTester()
1773 .mr(4)
1774 .nr(8)
1775 .kr(1)
1776 .sr(1)
1777 .m(4)
1778 .n(8)
1779 .k(k)
1780 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1781 }
1782 }
1783
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_div_4_strided_a)1784 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4_strided_a) {
1785 TEST_REQUIRES_ARM_NEON_FMA;
1786 for (size_t k = 12; k <= 40; k += 4) {
1787 GemmMicrokernelTester()
1788 .mr(4)
1789 .nr(8)
1790 .kr(1)
1791 .sr(1)
1792 .m(4)
1793 .n(8)
1794 .k(k)
1795 .a_stride(43)
1796 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1797 }
1798 }
1799
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,k_div_4_subtile)1800 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4_subtile) {
1801 TEST_REQUIRES_ARM_NEON_FMA;
1802 for (size_t k = 12; k <= 40; k += 4) {
1803 for (uint32_t n = 1; n <= 8; n++) {
1804 for (uint32_t m = 1; m <= 4; m++) {
1805 GemmMicrokernelTester()
1806 .mr(4)
1807 .nr(8)
1808 .kr(1)
1809 .sr(1)
1810 .m(m)
1811 .n(n)
1812 .k(k)
1813 .iterations(1)
1814 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1815 }
1816 }
1817 }
1818 }
1819
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,n_gt_8)1820 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8) {
1821 TEST_REQUIRES_ARM_NEON_FMA;
1822 for (uint32_t n = 9; n < 16; n++) {
1823 for (size_t k = 1; k <= 20; k += 5) {
1824 GemmMicrokernelTester()
1825 .mr(4)
1826 .nr(8)
1827 .kr(1)
1828 .sr(1)
1829 .m(4)
1830 .n(n)
1831 .k(k)
1832 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1833 }
1834 }
1835 }
1836
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,n_gt_8_strided_cn)1837 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_strided_cn) {
1838 TEST_REQUIRES_ARM_NEON_FMA;
1839 for (uint32_t n = 9; n < 16; n++) {
1840 for (size_t k = 1; k <= 20; k += 5) {
1841 GemmMicrokernelTester()
1842 .mr(4)
1843 .nr(8)
1844 .kr(1)
1845 .sr(1)
1846 .m(4)
1847 .n(n)
1848 .k(k)
1849 .cn_stride(11)
1850 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1851 }
1852 }
1853 }
1854
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,n_gt_8_strided_a)1855 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_strided_a) {
1856 TEST_REQUIRES_ARM_NEON_FMA;
1857 for (uint32_t n = 9; n < 16; n++) {
1858 for (size_t k = 1; k <= 20; k += 5) {
1859 GemmMicrokernelTester()
1860 .mr(4)
1861 .nr(8)
1862 .kr(1)
1863 .sr(1)
1864 .m(4)
1865 .n(n)
1866 .k(k)
1867 .a_stride(23)
1868 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1869 }
1870 }
1871 }
1872
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,n_gt_8_subtile)1873 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_subtile) {
1874 TEST_REQUIRES_ARM_NEON_FMA;
1875 for (uint32_t n = 9; n < 16; n++) {
1876 for (size_t k = 1; k <= 20; k += 5) {
1877 for (uint32_t m = 1; m <= 4; m++) {
1878 GemmMicrokernelTester()
1879 .mr(4)
1880 .nr(8)
1881 .kr(1)
1882 .sr(1)
1883 .m(m)
1884 .n(n)
1885 .k(k)
1886 .iterations(1)
1887 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1888 }
1889 }
1890 }
1891 }
1892
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,n_div_8)1893 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8) {
1894 TEST_REQUIRES_ARM_NEON_FMA;
1895 for (uint32_t n = 16; n <= 24; n += 8) {
1896 for (size_t k = 1; k <= 20; k += 5) {
1897 GemmMicrokernelTester()
1898 .mr(4)
1899 .nr(8)
1900 .kr(1)
1901 .sr(1)
1902 .m(4)
1903 .n(n)
1904 .k(k)
1905 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1906 }
1907 }
1908 }
1909
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,n_div_8_strided_cn)1910 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_strided_cn) {
1911 TEST_REQUIRES_ARM_NEON_FMA;
1912 for (uint32_t n = 16; n <= 24; n += 8) {
1913 for (size_t k = 1; k <= 20; k += 5) {
1914 GemmMicrokernelTester()
1915 .mr(4)
1916 .nr(8)
1917 .kr(1)
1918 .sr(1)
1919 .m(4)
1920 .n(n)
1921 .k(k)
1922 .cn_stride(11)
1923 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1924 }
1925 }
1926 }
1927
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,n_div_8_strided_a)1928 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_strided_a) {
1929 TEST_REQUIRES_ARM_NEON_FMA;
1930 for (uint32_t n = 16; n <= 24; n += 8) {
1931 for (size_t k = 1; k <= 20; k += 5) {
1932 GemmMicrokernelTester()
1933 .mr(4)
1934 .nr(8)
1935 .kr(1)
1936 .sr(1)
1937 .m(4)
1938 .n(n)
1939 .k(k)
1940 .a_stride(23)
1941 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1942 }
1943 }
1944 }
1945
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,n_div_8_subtile)1946 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_subtile) {
1947 TEST_REQUIRES_ARM_NEON_FMA;
1948 for (uint32_t n = 16; n <= 24; n += 8) {
1949 for (size_t k = 1; k <= 20; k += 5) {
1950 for (uint32_t m = 1; m <= 4; m++) {
1951 GemmMicrokernelTester()
1952 .mr(4)
1953 .nr(8)
1954 .kr(1)
1955 .sr(1)
1956 .m(m)
1957 .n(n)
1958 .k(k)
1959 .iterations(1)
1960 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1961 }
1962 }
1963 }
1964 }
1965
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,strided_cm_subtile)1966 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, strided_cm_subtile) {
1967 TEST_REQUIRES_ARM_NEON_FMA;
1968 for (size_t k = 1; k <= 20; k += 5) {
1969 for (uint32_t n = 1; n <= 8; n++) {
1970 for (uint32_t m = 1; m <= 4; m++) {
1971 GemmMicrokernelTester()
1972 .mr(4)
1973 .nr(8)
1974 .kr(1)
1975 .sr(1)
1976 .m(m)
1977 .n(n)
1978 .k(k)
1979 .cm_stride(11)
1980 .iterations(1)
1981 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1982 }
1983 }
1984 }
1985 }
1986
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,qmin)1987 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, qmin) {
1988 TEST_REQUIRES_ARM_NEON_FMA;
1989 GemmMicrokernelTester()
1990 .mr(4)
1991 .nr(8)
1992 .kr(1)
1993 .sr(1)
1994 .m(4)
1995 .n(8)
1996 .k(4)
1997 .qmin(128)
1998 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1999 }
2000
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,qmax)2001 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, qmax) {
2002 TEST_REQUIRES_ARM_NEON_FMA;
2003 GemmMicrokernelTester()
2004 .mr(4)
2005 .nr(8)
2006 .kr(1)
2007 .sr(1)
2008 .m(4)
2009 .n(8)
2010 .k(4)
2011 .qmax(128)
2012 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2013 }
2014
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55,strided_cm)2015 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, strided_cm) {
2016 TEST_REQUIRES_ARM_NEON_FMA;
2017 GemmMicrokernelTester()
2018 .mr(4)
2019 .nr(8)
2020 .kr(1)
2021 .sr(1)
2022 .m(4)
2023 .n(8)
2024 .k(4)
2025 .cm_stride(11)
2026 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2027 }
2028 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2029
2030
2031 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8)2032 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
2033 TEST_REQUIRES_ARM_NEON_FMA;
2034 GemmMicrokernelTester()
2035 .mr(4)
2036 .nr(8)
2037 .kr(1)
2038 .sr(1)
2039 .m(4)
2040 .n(8)
2041 .k(8)
2042 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2043 }
2044
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,strided_cn)2045 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, strided_cn) {
2046 TEST_REQUIRES_ARM_NEON_FMA;
2047 GemmMicrokernelTester()
2048 .mr(4)
2049 .nr(8)
2050 .kr(1)
2051 .sr(1)
2052 .m(4)
2053 .n(8)
2054 .k(8)
2055 .cn_stride(11)
2056 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2057 }
2058
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_strided_a)2059 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_strided_a) {
2060 TEST_REQUIRES_ARM_NEON_FMA;
2061 GemmMicrokernelTester()
2062 .mr(4)
2063 .nr(8)
2064 .kr(1)
2065 .sr(1)
2066 .m(4)
2067 .n(8)
2068 .k(8)
2069 .a_stride(11)
2070 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2071 }
2072
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile)2073 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile) {
2074 TEST_REQUIRES_ARM_NEON_FMA;
2075 for (uint32_t n = 1; n <= 8; n++) {
2076 for (uint32_t m = 1; m <= 4; m++) {
2077 GemmMicrokernelTester()
2078 .mr(4)
2079 .nr(8)
2080 .kr(1)
2081 .sr(1)
2082 .m(m)
2083 .n(n)
2084 .k(8)
2085 .iterations(1)
2086 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2087 }
2088 }
2089 }
2090
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile_m)2091 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_m) {
2092 TEST_REQUIRES_ARM_NEON_FMA;
2093 for (uint32_t m = 1; m <= 4; m++) {
2094 GemmMicrokernelTester()
2095 .mr(4)
2096 .nr(8)
2097 .kr(1)
2098 .sr(1)
2099 .m(m)
2100 .n(8)
2101 .k(8)
2102 .iterations(1)
2103 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2104 }
2105 }
2106
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile_n)2107 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_n) {
2108 TEST_REQUIRES_ARM_NEON_FMA;
2109 for (uint32_t n = 1; n <= 8; n++) {
2110 GemmMicrokernelTester()
2111 .mr(4)
2112 .nr(8)
2113 .kr(1)
2114 .sr(1)
2115 .m(4)
2116 .n(n)
2117 .k(8)
2118 .iterations(1)
2119 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2120 }
2121 }
2122
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_16)2123 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16) {
2124 TEST_REQUIRES_ARM_NEON_FMA;
2125 GemmMicrokernelTester()
2126 .mr(4)
2127 .nr(8)
2128 .kr(1)
2129 .sr(1)
2130 .m(4)
2131 .n(8)
2132 .k(16)
2133 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2134 }
2135
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_16_strided_a)2136 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_strided_a) {
2137 TEST_REQUIRES_ARM_NEON_FMA;
2138 GemmMicrokernelTester()
2139 .mr(4)
2140 .nr(8)
2141 .kr(1)
2142 .sr(1)
2143 .m(4)
2144 .n(8)
2145 .k(16)
2146 .a_stride(19)
2147 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2148 }
2149
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_16_subtile)2150 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_subtile) {
2151 TEST_REQUIRES_ARM_NEON_FMA;
2152 for (uint32_t n = 1; n <= 8; n++) {
2153 for (uint32_t m = 1; m <= 4; m++) {
2154 GemmMicrokernelTester()
2155 .mr(4)
2156 .nr(8)
2157 .kr(1)
2158 .sr(1)
2159 .m(m)
2160 .n(n)
2161 .k(16)
2162 .iterations(1)
2163 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2164 }
2165 }
2166 }
2167
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_lt_16)2168 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16) {
2169 TEST_REQUIRES_ARM_NEON_FMA;
2170 for (size_t k = 1; k < 16; k++) {
2171 GemmMicrokernelTester()
2172 .mr(4)
2173 .nr(8)
2174 .kr(1)
2175 .sr(1)
2176 .m(4)
2177 .n(8)
2178 .k(k)
2179 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2180 }
2181 }
2182
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_lt_16_strided_a)2183 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_strided_a) {
2184 TEST_REQUIRES_ARM_NEON_FMA;
2185 for (size_t k = 1; k < 16; k++) {
2186 GemmMicrokernelTester()
2187 .mr(4)
2188 .nr(8)
2189 .kr(1)
2190 .sr(1)
2191 .m(4)
2192 .n(8)
2193 .k(k)
2194 .a_stride(19)
2195 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2196 }
2197 }
2198
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_lt_16_subtile)2199 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_subtile) {
2200 TEST_REQUIRES_ARM_NEON_FMA;
2201 for (size_t k = 1; k < 16; k++) {
2202 for (uint32_t n = 1; n <= 8; n++) {
2203 for (uint32_t m = 1; m <= 4; m++) {
2204 GemmMicrokernelTester()
2205 .mr(4)
2206 .nr(8)
2207 .kr(1)
2208 .sr(1)
2209 .m(m)
2210 .n(n)
2211 .k(k)
2212 .iterations(1)
2213 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2214 }
2215 }
2216 }
2217 }
2218
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_gt_16)2219 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16) {
2220 TEST_REQUIRES_ARM_NEON_FMA;
2221 for (size_t k = 17; k < 32; k++) {
2222 GemmMicrokernelTester()
2223 .mr(4)
2224 .nr(8)
2225 .kr(1)
2226 .sr(1)
2227 .m(4)
2228 .n(8)
2229 .k(k)
2230 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2231 }
2232 }
2233
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_gt_16_strided_a)2234 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_strided_a) {
2235 TEST_REQUIRES_ARM_NEON_FMA;
2236 for (size_t k = 17; k < 32; k++) {
2237 GemmMicrokernelTester()
2238 .mr(4)
2239 .nr(8)
2240 .kr(1)
2241 .sr(1)
2242 .m(4)
2243 .n(8)
2244 .k(k)
2245 .a_stride(37)
2246 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2247 }
2248 }
2249
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_gt_16_subtile)2250 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_subtile) {
2251 TEST_REQUIRES_ARM_NEON_FMA;
2252 for (size_t k = 17; k < 32; k++) {
2253 for (uint32_t n = 1; n <= 8; n++) {
2254 for (uint32_t m = 1; m <= 4; m++) {
2255 GemmMicrokernelTester()
2256 .mr(4)
2257 .nr(8)
2258 .kr(1)
2259 .sr(1)
2260 .m(m)
2261 .n(n)
2262 .k(k)
2263 .iterations(1)
2264 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2265 }
2266 }
2267 }
2268 }
2269
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_div_8)2270 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8) {
2271 TEST_REQUIRES_ARM_NEON_FMA;
2272 for (size_t k = 24; k <= 80; k += 8) {
2273 GemmMicrokernelTester()
2274 .mr(4)
2275 .nr(8)
2276 .kr(1)
2277 .sr(1)
2278 .m(4)
2279 .n(8)
2280 .k(k)
2281 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2282 }
2283 }
2284
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_div_8_strided_a)2285 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_strided_a) {
2286 TEST_REQUIRES_ARM_NEON_FMA;
2287 for (size_t k = 24; k <= 80; k += 8) {
2288 GemmMicrokernelTester()
2289 .mr(4)
2290 .nr(8)
2291 .kr(1)
2292 .sr(1)
2293 .m(4)
2294 .n(8)
2295 .k(k)
2296 .a_stride(83)
2297 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2298 }
2299 }
2300
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,k_div_8_subtile)2301 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_subtile) {
2302 TEST_REQUIRES_ARM_NEON_FMA;
2303 for (size_t k = 24; k <= 80; k += 8) {
2304 for (uint32_t n = 1; n <= 8; n++) {
2305 for (uint32_t m = 1; m <= 4; m++) {
2306 GemmMicrokernelTester()
2307 .mr(4)
2308 .nr(8)
2309 .kr(1)
2310 .sr(1)
2311 .m(m)
2312 .n(n)
2313 .k(k)
2314 .iterations(1)
2315 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2316 }
2317 }
2318 }
2319 }
2320
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8)2321 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8) {
2322 TEST_REQUIRES_ARM_NEON_FMA;
2323 for (uint32_t n = 9; n < 16; n++) {
2324 for (size_t k = 1; k <= 40; k += 9) {
2325 GemmMicrokernelTester()
2326 .mr(4)
2327 .nr(8)
2328 .kr(1)
2329 .sr(1)
2330 .m(4)
2331 .n(n)
2332 .k(k)
2333 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2334 }
2335 }
2336 }
2337
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_strided_cn)2338 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_cn) {
2339 TEST_REQUIRES_ARM_NEON_FMA;
2340 for (uint32_t n = 9; n < 16; n++) {
2341 for (size_t k = 1; k <= 40; k += 9) {
2342 GemmMicrokernelTester()
2343 .mr(4)
2344 .nr(8)
2345 .kr(1)
2346 .sr(1)
2347 .m(4)
2348 .n(n)
2349 .k(k)
2350 .cn_stride(11)
2351 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2352 }
2353 }
2354 }
2355
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_strided_a)2356 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_a) {
2357 TEST_REQUIRES_ARM_NEON_FMA;
2358 for (uint32_t n = 9; n < 16; n++) {
2359 for (size_t k = 1; k <= 40; k += 9) {
2360 GemmMicrokernelTester()
2361 .mr(4)
2362 .nr(8)
2363 .kr(1)
2364 .sr(1)
2365 .m(4)
2366 .n(n)
2367 .k(k)
2368 .a_stride(43)
2369 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2370 }
2371 }
2372 }
2373
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_subtile)2374 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_subtile) {
2375 TEST_REQUIRES_ARM_NEON_FMA;
2376 for (uint32_t n = 9; n < 16; n++) {
2377 for (size_t k = 1; k <= 40; k += 9) {
2378 for (uint32_t m = 1; m <= 4; m++) {
2379 GemmMicrokernelTester()
2380 .mr(4)
2381 .nr(8)
2382 .kr(1)
2383 .sr(1)
2384 .m(m)
2385 .n(n)
2386 .k(k)
2387 .iterations(1)
2388 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2389 }
2390 }
2391 }
2392 }
2393
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8)2394 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8) {
2395 TEST_REQUIRES_ARM_NEON_FMA;
2396 for (uint32_t n = 16; n <= 24; n += 8) {
2397 for (size_t k = 1; k <= 40; k += 9) {
2398 GemmMicrokernelTester()
2399 .mr(4)
2400 .nr(8)
2401 .kr(1)
2402 .sr(1)
2403 .m(4)
2404 .n(n)
2405 .k(k)
2406 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2407 }
2408 }
2409 }
2410
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_strided_cn)2411 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_cn) {
2412 TEST_REQUIRES_ARM_NEON_FMA;
2413 for (uint32_t n = 16; n <= 24; n += 8) {
2414 for (size_t k = 1; k <= 40; k += 9) {
2415 GemmMicrokernelTester()
2416 .mr(4)
2417 .nr(8)
2418 .kr(1)
2419 .sr(1)
2420 .m(4)
2421 .n(n)
2422 .k(k)
2423 .cn_stride(11)
2424 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2425 }
2426 }
2427 }
2428
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_strided_a)2429 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_a) {
2430 TEST_REQUIRES_ARM_NEON_FMA;
2431 for (uint32_t n = 16; n <= 24; n += 8) {
2432 for (size_t k = 1; k <= 40; k += 9) {
2433 GemmMicrokernelTester()
2434 .mr(4)
2435 .nr(8)
2436 .kr(1)
2437 .sr(1)
2438 .m(4)
2439 .n(n)
2440 .k(k)
2441 .a_stride(43)
2442 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2443 }
2444 }
2445 }
2446
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_subtile)2447 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_subtile) {
2448 TEST_REQUIRES_ARM_NEON_FMA;
2449 for (uint32_t n = 16; n <= 24; n += 8) {
2450 for (size_t k = 1; k <= 40; k += 9) {
2451 for (uint32_t m = 1; m <= 4; m++) {
2452 GemmMicrokernelTester()
2453 .mr(4)
2454 .nr(8)
2455 .kr(1)
2456 .sr(1)
2457 .m(m)
2458 .n(n)
2459 .k(k)
2460 .iterations(1)
2461 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2462 }
2463 }
2464 }
2465 }
2466
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,strided_cm_subtile)2467 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm_subtile) {
2468 TEST_REQUIRES_ARM_NEON_FMA;
2469 for (size_t k = 1; k <= 40; k += 9) {
2470 for (uint32_t n = 1; n <= 8; n++) {
2471 for (uint32_t m = 1; m <= 4; m++) {
2472 GemmMicrokernelTester()
2473 .mr(4)
2474 .nr(8)
2475 .kr(1)
2476 .sr(1)
2477 .m(m)
2478 .n(n)
2479 .k(k)
2480 .cm_stride(11)
2481 .iterations(1)
2482 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2483 }
2484 }
2485 }
2486 }
2487
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,qmin)2488 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, qmin) {
2489 TEST_REQUIRES_ARM_NEON_FMA;
2490 GemmMicrokernelTester()
2491 .mr(4)
2492 .nr(8)
2493 .kr(1)
2494 .sr(1)
2495 .m(4)
2496 .n(8)
2497 .k(8)
2498 .qmin(128)
2499 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2500 }
2501
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,qmax)2502 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, qmax) {
2503 TEST_REQUIRES_ARM_NEON_FMA;
2504 GemmMicrokernelTester()
2505 .mr(4)
2506 .nr(8)
2507 .kr(1)
2508 .sr(1)
2509 .m(4)
2510 .n(8)
2511 .k(8)
2512 .qmax(128)
2513 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2514 }
2515
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75,strided_cm)2516 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm) {
2517 TEST_REQUIRES_ARM_NEON_FMA;
2518 GemmMicrokernelTester()
2519 .mr(4)
2520 .nr(8)
2521 .kr(1)
2522 .sr(1)
2523 .m(4)
2524 .n(8)
2525 .k(8)
2526 .cm_stride(11)
2527 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2528 }
2529 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2530
2531
2532 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,k_eq_2)2533 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_eq_2) {
2534 TEST_REQUIRES_ARM_NEON_FMA;
2535 GemmMicrokernelTester()
2536 .mr(4)
2537 .nr(8)
2538 .kr(1)
2539 .sr(1)
2540 .m(4)
2541 .n(8)
2542 .k(2)
2543 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2544 }
2545
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,strided_cn)2546 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, strided_cn) {
2547 TEST_REQUIRES_ARM_NEON_FMA;
2548 GemmMicrokernelTester()
2549 .mr(4)
2550 .nr(8)
2551 .kr(1)
2552 .sr(1)
2553 .m(4)
2554 .n(8)
2555 .k(2)
2556 .cn_stride(11)
2557 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2558 }
2559
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,k_eq_2_strided_a)2560 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_eq_2_strided_a) {
2561 TEST_REQUIRES_ARM_NEON_FMA;
2562 GemmMicrokernelTester()
2563 .mr(4)
2564 .nr(8)
2565 .kr(1)
2566 .sr(1)
2567 .m(4)
2568 .n(8)
2569 .k(2)
2570 .a_stride(5)
2571 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2572 }
2573
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,k_eq_2_subtile)2574 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile) {
2575 TEST_REQUIRES_ARM_NEON_FMA;
2576 for (uint32_t n = 1; n <= 8; n++) {
2577 for (uint32_t m = 1; m <= 4; m++) {
2578 GemmMicrokernelTester()
2579 .mr(4)
2580 .nr(8)
2581 .kr(1)
2582 .sr(1)
2583 .m(m)
2584 .n(n)
2585 .k(2)
2586 .iterations(1)
2587 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2588 }
2589 }
2590 }
2591
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,k_eq_2_subtile_m)2592 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile_m) {
2593 TEST_REQUIRES_ARM_NEON_FMA;
2594 for (uint32_t m = 1; m <= 4; m++) {
2595 GemmMicrokernelTester()
2596 .mr(4)
2597 .nr(8)
2598 .kr(1)
2599 .sr(1)
2600 .m(m)
2601 .n(8)
2602 .k(2)
2603 .iterations(1)
2604 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2605 }
2606 }
2607
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,k_eq_2_subtile_n)2608 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile_n) {
2609 TEST_REQUIRES_ARM_NEON_FMA;
2610 for (uint32_t n = 1; n <= 8; n++) {
2611 GemmMicrokernelTester()
2612 .mr(4)
2613 .nr(8)
2614 .kr(1)
2615 .sr(1)
2616 .m(4)
2617 .n(n)
2618 .k(2)
2619 .iterations(1)
2620 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2621 }
2622 }
2623
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,k_lt_2)2624 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_lt_2) {
2625 TEST_REQUIRES_ARM_NEON_FMA;
2626 for (size_t k = 1; k < 2; k++) {
2627 GemmMicrokernelTester()
2628 .mr(4)
2629 .nr(8)
2630 .kr(1)
2631 .sr(1)
2632 .m(4)
2633 .n(8)
2634 .k(k)
2635 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2636 }
2637 }
2638
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,k_lt_2_strided_a)2639 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_lt_2_strided_a) {
2640 TEST_REQUIRES_ARM_NEON_FMA;
2641 for (size_t k = 1; k < 2; k++) {
2642 GemmMicrokernelTester()
2643 .mr(4)
2644 .nr(8)
2645 .kr(1)
2646 .sr(1)
2647 .m(4)
2648 .n(8)
2649 .k(k)
2650 .a_stride(5)
2651 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2652 }
2653 }
2654
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,k_lt_2_subtile)2655 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_lt_2_subtile) {
2656 TEST_REQUIRES_ARM_NEON_FMA;
2657 for (size_t k = 1; k < 2; k++) {
2658 for (uint32_t n = 1; n <= 8; n++) {
2659 for (uint32_t m = 1; m <= 4; m++) {
2660 GemmMicrokernelTester()
2661 .mr(4)
2662 .nr(8)
2663 .kr(1)
2664 .sr(1)
2665 .m(m)
2666 .n(n)
2667 .k(k)
2668 .iterations(1)
2669 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2670 }
2671 }
2672 }
2673 }
2674
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,k_gt_2)2675 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_gt_2) {
2676 TEST_REQUIRES_ARM_NEON_FMA;
2677 for (size_t k = 3; k < 4; k++) {
2678 GemmMicrokernelTester()
2679 .mr(4)
2680 .nr(8)
2681 .kr(1)
2682 .sr(1)
2683 .m(4)
2684 .n(8)
2685 .k(k)
2686 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2687 }
2688 }
2689
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,k_gt_2_strided_a)2690 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_gt_2_strided_a) {
2691 TEST_REQUIRES_ARM_NEON_FMA;
2692 for (size_t k = 3; k < 4; k++) {
2693 GemmMicrokernelTester()
2694 .mr(4)
2695 .nr(8)
2696 .kr(1)
2697 .sr(1)
2698 .m(4)
2699 .n(8)
2700 .k(k)
2701 .a_stride(7)
2702 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2703 }
2704 }
2705
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,k_gt_2_subtile)2706 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_gt_2_subtile) {
2707 TEST_REQUIRES_ARM_NEON_FMA;
2708 for (size_t k = 3; k < 4; k++) {
2709 for (uint32_t n = 1; n <= 8; n++) {
2710 for (uint32_t m = 1; m <= 4; m++) {
2711 GemmMicrokernelTester()
2712 .mr(4)
2713 .nr(8)
2714 .kr(1)
2715 .sr(1)
2716 .m(m)
2717 .n(n)
2718 .k(k)
2719 .iterations(1)
2720 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2721 }
2722 }
2723 }
2724 }
2725
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,k_div_2)2726 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_div_2) {
2727 TEST_REQUIRES_ARM_NEON_FMA;
2728 for (size_t k = 4; k <= 20; k += 2) {
2729 GemmMicrokernelTester()
2730 .mr(4)
2731 .nr(8)
2732 .kr(1)
2733 .sr(1)
2734 .m(4)
2735 .n(8)
2736 .k(k)
2737 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2738 }
2739 }
2740
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,k_div_2_strided_a)2741 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_div_2_strided_a) {
2742 TEST_REQUIRES_ARM_NEON_FMA;
2743 for (size_t k = 4; k <= 20; k += 2) {
2744 GemmMicrokernelTester()
2745 .mr(4)
2746 .nr(8)
2747 .kr(1)
2748 .sr(1)
2749 .m(4)
2750 .n(8)
2751 .k(k)
2752 .a_stride(23)
2753 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2754 }
2755 }
2756
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,k_div_2_subtile)2757 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_div_2_subtile) {
2758 TEST_REQUIRES_ARM_NEON_FMA;
2759 for (size_t k = 4; k <= 20; k += 2) {
2760 for (uint32_t n = 1; n <= 8; n++) {
2761 for (uint32_t m = 1; m <= 4; m++) {
2762 GemmMicrokernelTester()
2763 .mr(4)
2764 .nr(8)
2765 .kr(1)
2766 .sr(1)
2767 .m(m)
2768 .n(n)
2769 .k(k)
2770 .iterations(1)
2771 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2772 }
2773 }
2774 }
2775 }
2776
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,n_gt_8)2777 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_gt_8) {
2778 TEST_REQUIRES_ARM_NEON_FMA;
2779 for (uint32_t n = 9; n < 16; n++) {
2780 for (size_t k = 1; k <= 10; k += 3) {
2781 GemmMicrokernelTester()
2782 .mr(4)
2783 .nr(8)
2784 .kr(1)
2785 .sr(1)
2786 .m(4)
2787 .n(n)
2788 .k(k)
2789 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2790 }
2791 }
2792 }
2793
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,n_gt_8_strided_cn)2794 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_gt_8_strided_cn) {
2795 TEST_REQUIRES_ARM_NEON_FMA;
2796 for (uint32_t n = 9; n < 16; n++) {
2797 for (size_t k = 1; k <= 10; k += 3) {
2798 GemmMicrokernelTester()
2799 .mr(4)
2800 .nr(8)
2801 .kr(1)
2802 .sr(1)
2803 .m(4)
2804 .n(n)
2805 .k(k)
2806 .cn_stride(11)
2807 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2808 }
2809 }
2810 }
2811
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,n_gt_8_strided_a)2812 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_gt_8_strided_a) {
2813 TEST_REQUIRES_ARM_NEON_FMA;
2814 for (uint32_t n = 9; n < 16; n++) {
2815 for (size_t k = 1; k <= 10; k += 3) {
2816 GemmMicrokernelTester()
2817 .mr(4)
2818 .nr(8)
2819 .kr(1)
2820 .sr(1)
2821 .m(4)
2822 .n(n)
2823 .k(k)
2824 .a_stride(13)
2825 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2826 }
2827 }
2828 }
2829
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,n_gt_8_subtile)2830 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_gt_8_subtile) {
2831 TEST_REQUIRES_ARM_NEON_FMA;
2832 for (uint32_t n = 9; n < 16; n++) {
2833 for (size_t k = 1; k <= 10; k += 3) {
2834 for (uint32_t m = 1; m <= 4; m++) {
2835 GemmMicrokernelTester()
2836 .mr(4)
2837 .nr(8)
2838 .kr(1)
2839 .sr(1)
2840 .m(m)
2841 .n(n)
2842 .k(k)
2843 .iterations(1)
2844 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2845 }
2846 }
2847 }
2848 }
2849
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,n_div_8)2850 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_div_8) {
2851 TEST_REQUIRES_ARM_NEON_FMA;
2852 for (uint32_t n = 16; n <= 24; n += 8) {
2853 for (size_t k = 1; k <= 10; k += 3) {
2854 GemmMicrokernelTester()
2855 .mr(4)
2856 .nr(8)
2857 .kr(1)
2858 .sr(1)
2859 .m(4)
2860 .n(n)
2861 .k(k)
2862 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2863 }
2864 }
2865 }
2866
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,n_div_8_strided_cn)2867 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_div_8_strided_cn) {
2868 TEST_REQUIRES_ARM_NEON_FMA;
2869 for (uint32_t n = 16; n <= 24; n += 8) {
2870 for (size_t k = 1; k <= 10; k += 3) {
2871 GemmMicrokernelTester()
2872 .mr(4)
2873 .nr(8)
2874 .kr(1)
2875 .sr(1)
2876 .m(4)
2877 .n(n)
2878 .k(k)
2879 .cn_stride(11)
2880 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2881 }
2882 }
2883 }
2884
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,n_div_8_strided_a)2885 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_div_8_strided_a) {
2886 TEST_REQUIRES_ARM_NEON_FMA;
2887 for (uint32_t n = 16; n <= 24; n += 8) {
2888 for (size_t k = 1; k <= 10; k += 3) {
2889 GemmMicrokernelTester()
2890 .mr(4)
2891 .nr(8)
2892 .kr(1)
2893 .sr(1)
2894 .m(4)
2895 .n(n)
2896 .k(k)
2897 .a_stride(13)
2898 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2899 }
2900 }
2901 }
2902
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,n_div_8_subtile)2903 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_div_8_subtile) {
2904 TEST_REQUIRES_ARM_NEON_FMA;
2905 for (uint32_t n = 16; n <= 24; n += 8) {
2906 for (size_t k = 1; k <= 10; k += 3) {
2907 for (uint32_t m = 1; m <= 4; m++) {
2908 GemmMicrokernelTester()
2909 .mr(4)
2910 .nr(8)
2911 .kr(1)
2912 .sr(1)
2913 .m(m)
2914 .n(n)
2915 .k(k)
2916 .iterations(1)
2917 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2918 }
2919 }
2920 }
2921 }
2922
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,strided_cm_subtile)2923 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, strided_cm_subtile) {
2924 TEST_REQUIRES_ARM_NEON_FMA;
2925 for (size_t k = 1; k <= 10; k += 3) {
2926 for (uint32_t n = 1; n <= 8; n++) {
2927 for (uint32_t m = 1; m <= 4; m++) {
2928 GemmMicrokernelTester()
2929 .mr(4)
2930 .nr(8)
2931 .kr(1)
2932 .sr(1)
2933 .m(m)
2934 .n(n)
2935 .k(k)
2936 .cm_stride(11)
2937 .iterations(1)
2938 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2939 }
2940 }
2941 }
2942 }
2943
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,qmin)2944 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, qmin) {
2945 TEST_REQUIRES_ARM_NEON_FMA;
2946 GemmMicrokernelTester()
2947 .mr(4)
2948 .nr(8)
2949 .kr(1)
2950 .sr(1)
2951 .m(4)
2952 .n(8)
2953 .k(2)
2954 .qmin(128)
2955 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2956 }
2957
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,qmax)2958 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, qmax) {
2959 TEST_REQUIRES_ARM_NEON_FMA;
2960 GemmMicrokernelTester()
2961 .mr(4)
2962 .nr(8)
2963 .kr(1)
2964 .sr(1)
2965 .m(4)
2966 .n(8)
2967 .k(2)
2968 .qmax(128)
2969 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2970 }
2971
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64,strided_cm)2972 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, strided_cm) {
2973 TEST_REQUIRES_ARM_NEON_FMA;
2974 GemmMicrokernelTester()
2975 .mr(4)
2976 .nr(8)
2977 .kr(1)
2978 .sr(1)
2979 .m(4)
2980 .n(8)
2981 .k(2)
2982 .cm_stride(11)
2983 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
2984 }
2985 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2986
2987
2988 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,k_eq_4)2989 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_eq_4) {
2990 TEST_REQUIRES_ARM_NEON_FMA;
2991 GemmMicrokernelTester()
2992 .mr(4)
2993 .nr(8)
2994 .kr(1)
2995 .sr(1)
2996 .m(4)
2997 .n(8)
2998 .k(4)
2999 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3000 }
3001
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,strided_cn)3002 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, strided_cn) {
3003 TEST_REQUIRES_ARM_NEON_FMA;
3004 GemmMicrokernelTester()
3005 .mr(4)
3006 .nr(8)
3007 .kr(1)
3008 .sr(1)
3009 .m(4)
3010 .n(8)
3011 .k(4)
3012 .cn_stride(11)
3013 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3014 }
3015
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,k_eq_4_strided_a)3016 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_eq_4_strided_a) {
3017 TEST_REQUIRES_ARM_NEON_FMA;
3018 GemmMicrokernelTester()
3019 .mr(4)
3020 .nr(8)
3021 .kr(1)
3022 .sr(1)
3023 .m(4)
3024 .n(8)
3025 .k(4)
3026 .a_stride(7)
3027 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3028 }
3029
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,k_eq_4_subtile)3030 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile) {
3031 TEST_REQUIRES_ARM_NEON_FMA;
3032 for (uint32_t n = 1; n <= 8; n++) {
3033 for (uint32_t m = 1; m <= 4; m++) {
3034 GemmMicrokernelTester()
3035 .mr(4)
3036 .nr(8)
3037 .kr(1)
3038 .sr(1)
3039 .m(m)
3040 .n(n)
3041 .k(4)
3042 .iterations(1)
3043 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3044 }
3045 }
3046 }
3047
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,k_eq_4_subtile_m)3048 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile_m) {
3049 TEST_REQUIRES_ARM_NEON_FMA;
3050 for (uint32_t m = 1; m <= 4; m++) {
3051 GemmMicrokernelTester()
3052 .mr(4)
3053 .nr(8)
3054 .kr(1)
3055 .sr(1)
3056 .m(m)
3057 .n(8)
3058 .k(4)
3059 .iterations(1)
3060 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3061 }
3062 }
3063
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,k_eq_4_subtile_n)3064 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile_n) {
3065 TEST_REQUIRES_ARM_NEON_FMA;
3066 for (uint32_t n = 1; n <= 8; n++) {
3067 GemmMicrokernelTester()
3068 .mr(4)
3069 .nr(8)
3070 .kr(1)
3071 .sr(1)
3072 .m(4)
3073 .n(n)
3074 .k(4)
3075 .iterations(1)
3076 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3077 }
3078 }
3079
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,k_lt_4)3080 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_lt_4) {
3081 TEST_REQUIRES_ARM_NEON_FMA;
3082 for (size_t k = 1; k < 4; k++) {
3083 GemmMicrokernelTester()
3084 .mr(4)
3085 .nr(8)
3086 .kr(1)
3087 .sr(1)
3088 .m(4)
3089 .n(8)
3090 .k(k)
3091 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3092 }
3093 }
3094
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,k_lt_4_strided_a)3095 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_lt_4_strided_a) {
3096 TEST_REQUIRES_ARM_NEON_FMA;
3097 for (size_t k = 1; k < 4; k++) {
3098 GemmMicrokernelTester()
3099 .mr(4)
3100 .nr(8)
3101 .kr(1)
3102 .sr(1)
3103 .m(4)
3104 .n(8)
3105 .k(k)
3106 .a_stride(7)
3107 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3108 }
3109 }
3110
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,k_lt_4_subtile)3111 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_lt_4_subtile) {
3112 TEST_REQUIRES_ARM_NEON_FMA;
3113 for (size_t k = 1; k < 4; k++) {
3114 for (uint32_t n = 1; n <= 8; n++) {
3115 for (uint32_t m = 1; m <= 4; m++) {
3116 GemmMicrokernelTester()
3117 .mr(4)
3118 .nr(8)
3119 .kr(1)
3120 .sr(1)
3121 .m(m)
3122 .n(n)
3123 .k(k)
3124 .iterations(1)
3125 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3126 }
3127 }
3128 }
3129 }
3130
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,k_gt_4)3131 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_gt_4) {
3132 TEST_REQUIRES_ARM_NEON_FMA;
3133 for (size_t k = 5; k < 8; k++) {
3134 GemmMicrokernelTester()
3135 .mr(4)
3136 .nr(8)
3137 .kr(1)
3138 .sr(1)
3139 .m(4)
3140 .n(8)
3141 .k(k)
3142 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3143 }
3144 }
3145
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,k_gt_4_strided_a)3146 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_gt_4_strided_a) {
3147 TEST_REQUIRES_ARM_NEON_FMA;
3148 for (size_t k = 5; k < 8; k++) {
3149 GemmMicrokernelTester()
3150 .mr(4)
3151 .nr(8)
3152 .kr(1)
3153 .sr(1)
3154 .m(4)
3155 .n(8)
3156 .k(k)
3157 .a_stride(11)
3158 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3159 }
3160 }
3161
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,k_gt_4_subtile)3162 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_gt_4_subtile) {
3163 TEST_REQUIRES_ARM_NEON_FMA;
3164 for (size_t k = 5; k < 8; k++) {
3165 for (uint32_t n = 1; n <= 8; n++) {
3166 for (uint32_t m = 1; m <= 4; m++) {
3167 GemmMicrokernelTester()
3168 .mr(4)
3169 .nr(8)
3170 .kr(1)
3171 .sr(1)
3172 .m(m)
3173 .n(n)
3174 .k(k)
3175 .iterations(1)
3176 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3177 }
3178 }
3179 }
3180 }
3181
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,k_div_4)3182 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_div_4) {
3183 TEST_REQUIRES_ARM_NEON_FMA;
3184 for (size_t k = 8; k <= 40; k += 4) {
3185 GemmMicrokernelTester()
3186 .mr(4)
3187 .nr(8)
3188 .kr(1)
3189 .sr(1)
3190 .m(4)
3191 .n(8)
3192 .k(k)
3193 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3194 }
3195 }
3196
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,k_div_4_strided_a)3197 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_div_4_strided_a) {
3198 TEST_REQUIRES_ARM_NEON_FMA;
3199 for (size_t k = 8; k <= 40; k += 4) {
3200 GemmMicrokernelTester()
3201 .mr(4)
3202 .nr(8)
3203 .kr(1)
3204 .sr(1)
3205 .m(4)
3206 .n(8)
3207 .k(k)
3208 .a_stride(43)
3209 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3210 }
3211 }
3212
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,k_div_4_subtile)3213 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_div_4_subtile) {
3214 TEST_REQUIRES_ARM_NEON_FMA;
3215 for (size_t k = 8; k <= 40; k += 4) {
3216 for (uint32_t n = 1; n <= 8; n++) {
3217 for (uint32_t m = 1; m <= 4; m++) {
3218 GemmMicrokernelTester()
3219 .mr(4)
3220 .nr(8)
3221 .kr(1)
3222 .sr(1)
3223 .m(m)
3224 .n(n)
3225 .k(k)
3226 .iterations(1)
3227 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3228 }
3229 }
3230 }
3231 }
3232
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,n_gt_8)3233 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_gt_8) {
3234 TEST_REQUIRES_ARM_NEON_FMA;
3235 for (uint32_t n = 9; n < 16; n++) {
3236 for (size_t k = 1; k <= 20; k += 5) {
3237 GemmMicrokernelTester()
3238 .mr(4)
3239 .nr(8)
3240 .kr(1)
3241 .sr(1)
3242 .m(4)
3243 .n(n)
3244 .k(k)
3245 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3246 }
3247 }
3248 }
3249
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,n_gt_8_strided_cn)3250 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_gt_8_strided_cn) {
3251 TEST_REQUIRES_ARM_NEON_FMA;
3252 for (uint32_t n = 9; n < 16; n++) {
3253 for (size_t k = 1; k <= 20; k += 5) {
3254 GemmMicrokernelTester()
3255 .mr(4)
3256 .nr(8)
3257 .kr(1)
3258 .sr(1)
3259 .m(4)
3260 .n(n)
3261 .k(k)
3262 .cn_stride(11)
3263 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3264 }
3265 }
3266 }
3267
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,n_gt_8_strided_a)3268 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_gt_8_strided_a) {
3269 TEST_REQUIRES_ARM_NEON_FMA;
3270 for (uint32_t n = 9; n < 16; n++) {
3271 for (size_t k = 1; k <= 20; k += 5) {
3272 GemmMicrokernelTester()
3273 .mr(4)
3274 .nr(8)
3275 .kr(1)
3276 .sr(1)
3277 .m(4)
3278 .n(n)
3279 .k(k)
3280 .a_stride(23)
3281 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3282 }
3283 }
3284 }
3285
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,n_gt_8_subtile)3286 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_gt_8_subtile) {
3287 TEST_REQUIRES_ARM_NEON_FMA;
3288 for (uint32_t n = 9; n < 16; n++) {
3289 for (size_t k = 1; k <= 20; k += 5) {
3290 for (uint32_t m = 1; m <= 4; m++) {
3291 GemmMicrokernelTester()
3292 .mr(4)
3293 .nr(8)
3294 .kr(1)
3295 .sr(1)
3296 .m(m)
3297 .n(n)
3298 .k(k)
3299 .iterations(1)
3300 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3301 }
3302 }
3303 }
3304 }
3305
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,n_div_8)3306 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_div_8) {
3307 TEST_REQUIRES_ARM_NEON_FMA;
3308 for (uint32_t n = 16; n <= 24; n += 8) {
3309 for (size_t k = 1; k <= 20; k += 5) {
3310 GemmMicrokernelTester()
3311 .mr(4)
3312 .nr(8)
3313 .kr(1)
3314 .sr(1)
3315 .m(4)
3316 .n(n)
3317 .k(k)
3318 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3319 }
3320 }
3321 }
3322
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,n_div_8_strided_cn)3323 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_div_8_strided_cn) {
3324 TEST_REQUIRES_ARM_NEON_FMA;
3325 for (uint32_t n = 16; n <= 24; n += 8) {
3326 for (size_t k = 1; k <= 20; k += 5) {
3327 GemmMicrokernelTester()
3328 .mr(4)
3329 .nr(8)
3330 .kr(1)
3331 .sr(1)
3332 .m(4)
3333 .n(n)
3334 .k(k)
3335 .cn_stride(11)
3336 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3337 }
3338 }
3339 }
3340
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,n_div_8_strided_a)3341 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_div_8_strided_a) {
3342 TEST_REQUIRES_ARM_NEON_FMA;
3343 for (uint32_t n = 16; n <= 24; n += 8) {
3344 for (size_t k = 1; k <= 20; k += 5) {
3345 GemmMicrokernelTester()
3346 .mr(4)
3347 .nr(8)
3348 .kr(1)
3349 .sr(1)
3350 .m(4)
3351 .n(n)
3352 .k(k)
3353 .a_stride(23)
3354 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3355 }
3356 }
3357 }
3358
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,n_div_8_subtile)3359 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_div_8_subtile) {
3360 TEST_REQUIRES_ARM_NEON_FMA;
3361 for (uint32_t n = 16; n <= 24; n += 8) {
3362 for (size_t k = 1; k <= 20; k += 5) {
3363 for (uint32_t m = 1; m <= 4; m++) {
3364 GemmMicrokernelTester()
3365 .mr(4)
3366 .nr(8)
3367 .kr(1)
3368 .sr(1)
3369 .m(m)
3370 .n(n)
3371 .k(k)
3372 .iterations(1)
3373 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3374 }
3375 }
3376 }
3377 }
3378
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,strided_cm_subtile)3379 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, strided_cm_subtile) {
3380 TEST_REQUIRES_ARM_NEON_FMA;
3381 for (size_t k = 1; k <= 20; k += 5) {
3382 for (uint32_t n = 1; n <= 8; n++) {
3383 for (uint32_t m = 1; m <= 4; m++) {
3384 GemmMicrokernelTester()
3385 .mr(4)
3386 .nr(8)
3387 .kr(1)
3388 .sr(1)
3389 .m(m)
3390 .n(n)
3391 .k(k)
3392 .cm_stride(11)
3393 .iterations(1)
3394 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3395 }
3396 }
3397 }
3398 }
3399
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,qmin)3400 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, qmin) {
3401 TEST_REQUIRES_ARM_NEON_FMA;
3402 GemmMicrokernelTester()
3403 .mr(4)
3404 .nr(8)
3405 .kr(1)
3406 .sr(1)
3407 .m(4)
3408 .n(8)
3409 .k(4)
3410 .qmin(128)
3411 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3412 }
3413
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,qmax)3414 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, qmax) {
3415 TEST_REQUIRES_ARM_NEON_FMA;
3416 GemmMicrokernelTester()
3417 .mr(4)
3418 .nr(8)
3419 .kr(1)
3420 .sr(1)
3421 .m(4)
3422 .n(8)
3423 .k(4)
3424 .qmax(128)
3425 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3426 }
3427
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128,strided_cm)3428 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, strided_cm) {
3429 TEST_REQUIRES_ARM_NEON_FMA;
3430 GemmMicrokernelTester()
3431 .mr(4)
3432 .nr(8)
3433 .kr(1)
3434 .sr(1)
3435 .m(4)
3436 .n(8)
3437 .k(4)
3438 .cm_stride(11)
3439 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
3440 }
3441 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3442
3443
3444 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8)3445 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
3446 TEST_REQUIRES_ARM_NEON_FMA;
3447 GemmMicrokernelTester()
3448 .mr(4)
3449 .nr(8)
3450 .kr(1)
3451 .sr(1)
3452 .m(4)
3453 .n(8)
3454 .k(8)
3455 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3456 }
3457
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cn)3458 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
3459 TEST_REQUIRES_ARM_NEON_FMA;
3460 GemmMicrokernelTester()
3461 .mr(4)
3462 .nr(8)
3463 .kr(1)
3464 .sr(1)
3465 .m(4)
3466 .n(8)
3467 .k(8)
3468 .cn_stride(11)
3469 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3470 }
3471
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_strided_a)3472 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_strided_a) {
3473 TEST_REQUIRES_ARM_NEON_FMA;
3474 GemmMicrokernelTester()
3475 .mr(4)
3476 .nr(8)
3477 .kr(1)
3478 .sr(1)
3479 .m(4)
3480 .n(8)
3481 .k(8)
3482 .a_stride(11)
3483 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3484 }
3485
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile)3486 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
3487 TEST_REQUIRES_ARM_NEON_FMA;
3488 for (uint32_t n = 1; n <= 8; n++) {
3489 for (uint32_t m = 1; m <= 4; m++) {
3490 GemmMicrokernelTester()
3491 .mr(4)
3492 .nr(8)
3493 .kr(1)
3494 .sr(1)
3495 .m(m)
3496 .n(n)
3497 .k(8)
3498 .iterations(1)
3499 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3500 }
3501 }
3502 }
3503
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_m)3504 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
3505 TEST_REQUIRES_ARM_NEON_FMA;
3506 for (uint32_t m = 1; m <= 4; m++) {
3507 GemmMicrokernelTester()
3508 .mr(4)
3509 .nr(8)
3510 .kr(1)
3511 .sr(1)
3512 .m(m)
3513 .n(8)
3514 .k(8)
3515 .iterations(1)
3516 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3517 }
3518 }
3519
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_n)3520 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
3521 TEST_REQUIRES_ARM_NEON_FMA;
3522 for (uint32_t n = 1; n <= 8; n++) {
3523 GemmMicrokernelTester()
3524 .mr(4)
3525 .nr(8)
3526 .kr(1)
3527 .sr(1)
3528 .m(4)
3529 .n(n)
3530 .k(8)
3531 .iterations(1)
3532 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3533 }
3534 }
3535
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16)3536 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
3537 TEST_REQUIRES_ARM_NEON_FMA;
3538 GemmMicrokernelTester()
3539 .mr(4)
3540 .nr(8)
3541 .kr(1)
3542 .sr(1)
3543 .m(4)
3544 .n(8)
3545 .k(16)
3546 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3547 }
3548
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16_strided_a)3549 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_strided_a) {
3550 TEST_REQUIRES_ARM_NEON_FMA;
3551 GemmMicrokernelTester()
3552 .mr(4)
3553 .nr(8)
3554 .kr(1)
3555 .sr(1)
3556 .m(4)
3557 .n(8)
3558 .k(16)
3559 .a_stride(19)
3560 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3561 }
3562
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16_subtile)3563 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
3564 TEST_REQUIRES_ARM_NEON_FMA;
3565 for (uint32_t n = 1; n <= 8; n++) {
3566 for (uint32_t m = 1; m <= 4; m++) {
3567 GemmMicrokernelTester()
3568 .mr(4)
3569 .nr(8)
3570 .kr(1)
3571 .sr(1)
3572 .m(m)
3573 .n(n)
3574 .k(16)
3575 .iterations(1)
3576 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3577 }
3578 }
3579 }
3580
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16)3581 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
3582 TEST_REQUIRES_ARM_NEON_FMA;
3583 for (size_t k = 1; k < 16; k++) {
3584 GemmMicrokernelTester()
3585 .mr(4)
3586 .nr(8)
3587 .kr(1)
3588 .sr(1)
3589 .m(4)
3590 .n(8)
3591 .k(k)
3592 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3593 }
3594 }
3595
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16_strided_a)3596 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_strided_a) {
3597 TEST_REQUIRES_ARM_NEON_FMA;
3598 for (size_t k = 1; k < 16; k++) {
3599 GemmMicrokernelTester()
3600 .mr(4)
3601 .nr(8)
3602 .kr(1)
3603 .sr(1)
3604 .m(4)
3605 .n(8)
3606 .k(k)
3607 .a_stride(19)
3608 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3609 }
3610 }
3611
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16_subtile)3612 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
3613 TEST_REQUIRES_ARM_NEON_FMA;
3614 for (size_t k = 1; k < 16; k++) {
3615 for (uint32_t n = 1; n <= 8; n++) {
3616 for (uint32_t m = 1; m <= 4; m++) {
3617 GemmMicrokernelTester()
3618 .mr(4)
3619 .nr(8)
3620 .kr(1)
3621 .sr(1)
3622 .m(m)
3623 .n(n)
3624 .k(k)
3625 .iterations(1)
3626 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3627 }
3628 }
3629 }
3630 }
3631
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16)3632 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
3633 TEST_REQUIRES_ARM_NEON_FMA;
3634 for (size_t k = 17; k < 32; k++) {
3635 GemmMicrokernelTester()
3636 .mr(4)
3637 .nr(8)
3638 .kr(1)
3639 .sr(1)
3640 .m(4)
3641 .n(8)
3642 .k(k)
3643 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3644 }
3645 }
3646
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16_strided_a)3647 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_strided_a) {
3648 TEST_REQUIRES_ARM_NEON_FMA;
3649 for (size_t k = 17; k < 32; k++) {
3650 GemmMicrokernelTester()
3651 .mr(4)
3652 .nr(8)
3653 .kr(1)
3654 .sr(1)
3655 .m(4)
3656 .n(8)
3657 .k(k)
3658 .a_stride(37)
3659 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3660 }
3661 }
3662
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16_subtile)3663 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
3664 TEST_REQUIRES_ARM_NEON_FMA;
3665 for (size_t k = 17; k < 32; k++) {
3666 for (uint32_t n = 1; n <= 8; n++) {
3667 for (uint32_t m = 1; m <= 4; m++) {
3668 GemmMicrokernelTester()
3669 .mr(4)
3670 .nr(8)
3671 .kr(1)
3672 .sr(1)
3673 .m(m)
3674 .n(n)
3675 .k(k)
3676 .iterations(1)
3677 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3678 }
3679 }
3680 }
3681 }
3682
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8)3683 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
3684 TEST_REQUIRES_ARM_NEON_FMA;
3685 for (size_t k = 24; k <= 80; k += 8) {
3686 GemmMicrokernelTester()
3687 .mr(4)
3688 .nr(8)
3689 .kr(1)
3690 .sr(1)
3691 .m(4)
3692 .n(8)
3693 .k(k)
3694 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3695 }
3696 }
3697
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8_strided_a)3698 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_strided_a) {
3699 TEST_REQUIRES_ARM_NEON_FMA;
3700 for (size_t k = 24; k <= 80; k += 8) {
3701 GemmMicrokernelTester()
3702 .mr(4)
3703 .nr(8)
3704 .kr(1)
3705 .sr(1)
3706 .m(4)
3707 .n(8)
3708 .k(k)
3709 .a_stride(83)
3710 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3711 }
3712 }
3713
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8_subtile)3714 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
3715 TEST_REQUIRES_ARM_NEON_FMA;
3716 for (size_t k = 24; k <= 80; k += 8) {
3717 for (uint32_t n = 1; n <= 8; n++) {
3718 for (uint32_t m = 1; m <= 4; m++) {
3719 GemmMicrokernelTester()
3720 .mr(4)
3721 .nr(8)
3722 .kr(1)
3723 .sr(1)
3724 .m(m)
3725 .n(n)
3726 .k(k)
3727 .iterations(1)
3728 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3729 }
3730 }
3731 }
3732 }
3733
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8)3734 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8) {
3735 TEST_REQUIRES_ARM_NEON_FMA;
3736 for (uint32_t n = 9; n < 16; n++) {
3737 for (size_t k = 1; k <= 40; k += 9) {
3738 GemmMicrokernelTester()
3739 .mr(4)
3740 .nr(8)
3741 .kr(1)
3742 .sr(1)
3743 .m(4)
3744 .n(n)
3745 .k(k)
3746 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3747 }
3748 }
3749 }
3750
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_strided_cn)3751 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
3752 TEST_REQUIRES_ARM_NEON_FMA;
3753 for (uint32_t n = 9; n < 16; n++) {
3754 for (size_t k = 1; k <= 40; k += 9) {
3755 GemmMicrokernelTester()
3756 .mr(4)
3757 .nr(8)
3758 .kr(1)
3759 .sr(1)
3760 .m(4)
3761 .n(n)
3762 .k(k)
3763 .cn_stride(11)
3764 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3765 }
3766 }
3767 }
3768
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_strided_a)3769 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_a) {
3770 TEST_REQUIRES_ARM_NEON_FMA;
3771 for (uint32_t n = 9; n < 16; n++) {
3772 for (size_t k = 1; k <= 40; k += 9) {
3773 GemmMicrokernelTester()
3774 .mr(4)
3775 .nr(8)
3776 .kr(1)
3777 .sr(1)
3778 .m(4)
3779 .n(n)
3780 .k(k)
3781 .a_stride(43)
3782 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3783 }
3784 }
3785 }
3786
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_subtile)3787 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_subtile) {
3788 TEST_REQUIRES_ARM_NEON_FMA;
3789 for (uint32_t n = 9; n < 16; n++) {
3790 for (size_t k = 1; k <= 40; k += 9) {
3791 for (uint32_t m = 1; m <= 4; m++) {
3792 GemmMicrokernelTester()
3793 .mr(4)
3794 .nr(8)
3795 .kr(1)
3796 .sr(1)
3797 .m(m)
3798 .n(n)
3799 .k(k)
3800 .iterations(1)
3801 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3802 }
3803 }
3804 }
3805 }
3806
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8)3807 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8) {
3808 TEST_REQUIRES_ARM_NEON_FMA;
3809 for (uint32_t n = 16; n <= 24; n += 8) {
3810 for (size_t k = 1; k <= 40; k += 9) {
3811 GemmMicrokernelTester()
3812 .mr(4)
3813 .nr(8)
3814 .kr(1)
3815 .sr(1)
3816 .m(4)
3817 .n(n)
3818 .k(k)
3819 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3820 }
3821 }
3822 }
3823
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_strided_cn)3824 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_cn) {
3825 TEST_REQUIRES_ARM_NEON_FMA;
3826 for (uint32_t n = 16; n <= 24; n += 8) {
3827 for (size_t k = 1; k <= 40; k += 9) {
3828 GemmMicrokernelTester()
3829 .mr(4)
3830 .nr(8)
3831 .kr(1)
3832 .sr(1)
3833 .m(4)
3834 .n(n)
3835 .k(k)
3836 .cn_stride(11)
3837 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3838 }
3839 }
3840 }
3841
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_strided_a)3842 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_a) {
3843 TEST_REQUIRES_ARM_NEON_FMA;
3844 for (uint32_t n = 16; n <= 24; n += 8) {
3845 for (size_t k = 1; k <= 40; k += 9) {
3846 GemmMicrokernelTester()
3847 .mr(4)
3848 .nr(8)
3849 .kr(1)
3850 .sr(1)
3851 .m(4)
3852 .n(n)
3853 .k(k)
3854 .a_stride(43)
3855 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3856 }
3857 }
3858 }
3859
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_subtile)3860 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_subtile) {
3861 TEST_REQUIRES_ARM_NEON_FMA;
3862 for (uint32_t n = 16; n <= 24; n += 8) {
3863 for (size_t k = 1; k <= 40; k += 9) {
3864 for (uint32_t m = 1; m <= 4; m++) {
3865 GemmMicrokernelTester()
3866 .mr(4)
3867 .nr(8)
3868 .kr(1)
3869 .sr(1)
3870 .m(m)
3871 .n(n)
3872 .k(k)
3873 .iterations(1)
3874 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3875 }
3876 }
3877 }
3878 }
3879
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cm_subtile)3880 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
3881 TEST_REQUIRES_ARM_NEON_FMA;
3882 for (size_t k = 1; k <= 40; k += 9) {
3883 for (uint32_t n = 1; n <= 8; n++) {
3884 for (uint32_t m = 1; m <= 4; m++) {
3885 GemmMicrokernelTester()
3886 .mr(4)
3887 .nr(8)
3888 .kr(1)
3889 .sr(1)
3890 .m(m)
3891 .n(n)
3892 .k(k)
3893 .cm_stride(11)
3894 .iterations(1)
3895 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3896 }
3897 }
3898 }
3899 }
3900
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,qmin)3901 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
3902 TEST_REQUIRES_ARM_NEON_FMA;
3903 GemmMicrokernelTester()
3904 .mr(4)
3905 .nr(8)
3906 .kr(1)
3907 .sr(1)
3908 .m(4)
3909 .n(8)
3910 .k(8)
3911 .qmin(128)
3912 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3913 }
3914
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,qmax)3915 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
3916 TEST_REQUIRES_ARM_NEON_FMA;
3917 GemmMicrokernelTester()
3918 .mr(4)
3919 .nr(8)
3920 .kr(1)
3921 .sr(1)
3922 .m(4)
3923 .n(8)
3924 .k(8)
3925 .qmax(128)
3926 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3927 }
3928
TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cm)3929 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
3930 TEST_REQUIRES_ARM_NEON_FMA;
3931 GemmMicrokernelTester()
3932 .mr(4)
3933 .nr(8)
3934 .kr(1)
3935 .sr(1)
3936 .m(4)
3937 .n(8)
3938 .k(8)
3939 .cm_stride(11)
3940 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3941 }
3942 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3943
3944
3945 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_4)3946 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4) {
3947 TEST_REQUIRES_ARM_NEON_FMA;
3948 GemmMicrokernelTester()
3949 .mr(4)
3950 .nr(12)
3951 .kr(1)
3952 .sr(1)
3953 .m(4)
3954 .n(12)
3955 .k(4)
3956 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
3957 }
3958
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,strided_cn)3959 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, strided_cn) {
3960 TEST_REQUIRES_ARM_NEON_FMA;
3961 GemmMicrokernelTester()
3962 .mr(4)
3963 .nr(12)
3964 .kr(1)
3965 .sr(1)
3966 .m(4)
3967 .n(12)
3968 .k(4)
3969 .cn_stride(17)
3970 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
3971 }
3972
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_strided_a)3973 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_strided_a) {
3974 TEST_REQUIRES_ARM_NEON_FMA;
3975 GemmMicrokernelTester()
3976 .mr(4)
3977 .nr(12)
3978 .kr(1)
3979 .sr(1)
3980 .m(4)
3981 .n(12)
3982 .k(4)
3983 .a_stride(7)
3984 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
3985 }
3986
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_subtile)3987 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile) {
3988 TEST_REQUIRES_ARM_NEON_FMA;
3989 for (uint32_t n = 1; n <= 12; n++) {
3990 for (uint32_t m = 1; m <= 4; m++) {
3991 GemmMicrokernelTester()
3992 .mr(4)
3993 .nr(12)
3994 .kr(1)
3995 .sr(1)
3996 .m(m)
3997 .n(n)
3998 .k(4)
3999 .iterations(1)
4000 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4001 }
4002 }
4003 }
4004
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_subtile_m)4005 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_m) {
4006 TEST_REQUIRES_ARM_NEON_FMA;
4007 for (uint32_t m = 1; m <= 4; m++) {
4008 GemmMicrokernelTester()
4009 .mr(4)
4010 .nr(12)
4011 .kr(1)
4012 .sr(1)
4013 .m(m)
4014 .n(12)
4015 .k(4)
4016 .iterations(1)
4017 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4018 }
4019 }
4020
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_subtile_n)4021 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_n) {
4022 TEST_REQUIRES_ARM_NEON_FMA;
4023 for (uint32_t n = 1; n <= 12; n++) {
4024 GemmMicrokernelTester()
4025 .mr(4)
4026 .nr(12)
4027 .kr(1)
4028 .sr(1)
4029 .m(4)
4030 .n(n)
4031 .k(4)
4032 .iterations(1)
4033 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4034 }
4035 }
4036
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_8)4037 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_8) {
4038 TEST_REQUIRES_ARM_NEON_FMA;
4039 GemmMicrokernelTester()
4040 .mr(4)
4041 .nr(12)
4042 .kr(1)
4043 .sr(1)
4044 .m(4)
4045 .n(12)
4046 .k(8)
4047 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4048 }
4049
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_8_strided_a)4050 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_strided_a) {
4051 TEST_REQUIRES_ARM_NEON_FMA;
4052 GemmMicrokernelTester()
4053 .mr(4)
4054 .nr(12)
4055 .kr(1)
4056 .sr(1)
4057 .m(4)
4058 .n(12)
4059 .k(8)
4060 .a_stride(11)
4061 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4062 }
4063
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_eq_8_subtile)4064 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile) {
4065 TEST_REQUIRES_ARM_NEON_FMA;
4066 for (uint32_t n = 1; n <= 12; n++) {
4067 for (uint32_t m = 1; m <= 4; m++) {
4068 GemmMicrokernelTester()
4069 .mr(4)
4070 .nr(12)
4071 .kr(1)
4072 .sr(1)
4073 .m(m)
4074 .n(n)
4075 .k(8)
4076 .iterations(1)
4077 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4078 }
4079 }
4080 }
4081
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_lt_8)4082 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_lt_8) {
4083 TEST_REQUIRES_ARM_NEON_FMA;
4084 for (size_t k = 1; k < 8; k++) {
4085 GemmMicrokernelTester()
4086 .mr(4)
4087 .nr(12)
4088 .kr(1)
4089 .sr(1)
4090 .m(4)
4091 .n(12)
4092 .k(k)
4093 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4094 }
4095 }
4096
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_lt_8_strided_a)4097 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_lt_8_strided_a) {
4098 TEST_REQUIRES_ARM_NEON_FMA;
4099 for (size_t k = 1; k < 8; k++) {
4100 GemmMicrokernelTester()
4101 .mr(4)
4102 .nr(12)
4103 .kr(1)
4104 .sr(1)
4105 .m(4)
4106 .n(12)
4107 .k(k)
4108 .a_stride(11)
4109 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4110 }
4111 }
4112
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_lt_8_subtile)4113 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_lt_8_subtile) {
4114 TEST_REQUIRES_ARM_NEON_FMA;
4115 for (size_t k = 1; k < 8; k++) {
4116 for (uint32_t n = 1; n <= 12; n++) {
4117 for (uint32_t m = 1; m <= 4; m++) {
4118 GemmMicrokernelTester()
4119 .mr(4)
4120 .nr(12)
4121 .kr(1)
4122 .sr(1)
4123 .m(m)
4124 .n(n)
4125 .k(k)
4126 .iterations(1)
4127 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4128 }
4129 }
4130 }
4131 }
4132
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_gt_8)4133 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_gt_8) {
4134 TEST_REQUIRES_ARM_NEON_FMA;
4135 for (size_t k = 9; k < 16; k++) {
4136 GemmMicrokernelTester()
4137 .mr(4)
4138 .nr(12)
4139 .kr(1)
4140 .sr(1)
4141 .m(4)
4142 .n(12)
4143 .k(k)
4144 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4145 }
4146 }
4147
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_gt_8_strided_a)4148 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_gt_8_strided_a) {
4149 TEST_REQUIRES_ARM_NEON_FMA;
4150 for (size_t k = 9; k < 16; k++) {
4151 GemmMicrokernelTester()
4152 .mr(4)
4153 .nr(12)
4154 .kr(1)
4155 .sr(1)
4156 .m(4)
4157 .n(12)
4158 .k(k)
4159 .a_stride(19)
4160 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4161 }
4162 }
4163
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_gt_8_subtile)4164 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_gt_8_subtile) {
4165 TEST_REQUIRES_ARM_NEON_FMA;
4166 for (size_t k = 9; k < 16; k++) {
4167 for (uint32_t n = 1; n <= 12; n++) {
4168 for (uint32_t m = 1; m <= 4; m++) {
4169 GemmMicrokernelTester()
4170 .mr(4)
4171 .nr(12)
4172 .kr(1)
4173 .sr(1)
4174 .m(m)
4175 .n(n)
4176 .k(k)
4177 .iterations(1)
4178 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4179 }
4180 }
4181 }
4182 }
4183
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_div_4)4184 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_div_4) {
4185 TEST_REQUIRES_ARM_NEON_FMA;
4186 for (size_t k = 12; k <= 40; k += 4) {
4187 GemmMicrokernelTester()
4188 .mr(4)
4189 .nr(12)
4190 .kr(1)
4191 .sr(1)
4192 .m(4)
4193 .n(12)
4194 .k(k)
4195 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4196 }
4197 }
4198
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_div_4_strided_a)4199 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_div_4_strided_a) {
4200 TEST_REQUIRES_ARM_NEON_FMA;
4201 for (size_t k = 12; k <= 40; k += 4) {
4202 GemmMicrokernelTester()
4203 .mr(4)
4204 .nr(12)
4205 .kr(1)
4206 .sr(1)
4207 .m(4)
4208 .n(12)
4209 .k(k)
4210 .a_stride(43)
4211 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4212 }
4213 }
4214
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,k_div_4_subtile)4215 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_div_4_subtile) {
4216 TEST_REQUIRES_ARM_NEON_FMA;
4217 for (size_t k = 12; k <= 40; k += 4) {
4218 for (uint32_t n = 1; n <= 12; n++) {
4219 for (uint32_t m = 1; m <= 4; m++) {
4220 GemmMicrokernelTester()
4221 .mr(4)
4222 .nr(12)
4223 .kr(1)
4224 .sr(1)
4225 .m(m)
4226 .n(n)
4227 .k(k)
4228 .iterations(1)
4229 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4230 }
4231 }
4232 }
4233 }
4234
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,n_gt_12)4235 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12) {
4236 TEST_REQUIRES_ARM_NEON_FMA;
4237 for (uint32_t n = 13; n < 24; n++) {
4238 for (size_t k = 1; k <= 20; k += 5) {
4239 GemmMicrokernelTester()
4240 .mr(4)
4241 .nr(12)
4242 .kr(1)
4243 .sr(1)
4244 .m(4)
4245 .n(n)
4246 .k(k)
4247 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4248 }
4249 }
4250 }
4251
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,n_gt_12_strided_cn)4252 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12_strided_cn) {
4253 TEST_REQUIRES_ARM_NEON_FMA;
4254 for (uint32_t n = 13; n < 24; n++) {
4255 for (size_t k = 1; k <= 20; k += 5) {
4256 GemmMicrokernelTester()
4257 .mr(4)
4258 .nr(12)
4259 .kr(1)
4260 .sr(1)
4261 .m(4)
4262 .n(n)
4263 .k(k)
4264 .cn_stride(17)
4265 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4266 }
4267 }
4268 }
4269
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,n_gt_12_strided_a)4270 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12_strided_a) {
4271 TEST_REQUIRES_ARM_NEON_FMA;
4272 for (uint32_t n = 13; n < 24; n++) {
4273 for (size_t k = 1; k <= 20; k += 5) {
4274 GemmMicrokernelTester()
4275 .mr(4)
4276 .nr(12)
4277 .kr(1)
4278 .sr(1)
4279 .m(4)
4280 .n(n)
4281 .k(k)
4282 .a_stride(23)
4283 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4284 }
4285 }
4286 }
4287
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,n_gt_12_subtile)4288 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12_subtile) {
4289 TEST_REQUIRES_ARM_NEON_FMA;
4290 for (uint32_t n = 13; n < 24; n++) {
4291 for (size_t k = 1; k <= 20; k += 5) {
4292 for (uint32_t m = 1; m <= 4; m++) {
4293 GemmMicrokernelTester()
4294 .mr(4)
4295 .nr(12)
4296 .kr(1)
4297 .sr(1)
4298 .m(m)
4299 .n(n)
4300 .k(k)
4301 .iterations(1)
4302 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4303 }
4304 }
4305 }
4306 }
4307
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,n_div_12)4308 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12) {
4309 TEST_REQUIRES_ARM_NEON_FMA;
4310 for (uint32_t n = 24; n <= 36; n += 12) {
4311 for (size_t k = 1; k <= 20; k += 5) {
4312 GemmMicrokernelTester()
4313 .mr(4)
4314 .nr(12)
4315 .kr(1)
4316 .sr(1)
4317 .m(4)
4318 .n(n)
4319 .k(k)
4320 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4321 }
4322 }
4323 }
4324
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,n_div_12_strided_cn)4325 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12_strided_cn) {
4326 TEST_REQUIRES_ARM_NEON_FMA;
4327 for (uint32_t n = 24; n <= 36; n += 12) {
4328 for (size_t k = 1; k <= 20; k += 5) {
4329 GemmMicrokernelTester()
4330 .mr(4)
4331 .nr(12)
4332 .kr(1)
4333 .sr(1)
4334 .m(4)
4335 .n(n)
4336 .k(k)
4337 .cn_stride(17)
4338 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4339 }
4340 }
4341 }
4342
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,n_div_12_strided_a)4343 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12_strided_a) {
4344 TEST_REQUIRES_ARM_NEON_FMA;
4345 for (uint32_t n = 24; n <= 36; n += 12) {
4346 for (size_t k = 1; k <= 20; k += 5) {
4347 GemmMicrokernelTester()
4348 .mr(4)
4349 .nr(12)
4350 .kr(1)
4351 .sr(1)
4352 .m(4)
4353 .n(n)
4354 .k(k)
4355 .a_stride(23)
4356 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4357 }
4358 }
4359 }
4360
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,n_div_12_subtile)4361 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12_subtile) {
4362 TEST_REQUIRES_ARM_NEON_FMA;
4363 for (uint32_t n = 24; n <= 36; n += 12) {
4364 for (size_t k = 1; k <= 20; k += 5) {
4365 for (uint32_t m = 1; m <= 4; m++) {
4366 GemmMicrokernelTester()
4367 .mr(4)
4368 .nr(12)
4369 .kr(1)
4370 .sr(1)
4371 .m(m)
4372 .n(n)
4373 .k(k)
4374 .iterations(1)
4375 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4376 }
4377 }
4378 }
4379 }
4380
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,strided_cm_subtile)4381 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, strided_cm_subtile) {
4382 TEST_REQUIRES_ARM_NEON_FMA;
4383 for (size_t k = 1; k <= 20; k += 5) {
4384 for (uint32_t n = 1; n <= 12; n++) {
4385 for (uint32_t m = 1; m <= 4; m++) {
4386 GemmMicrokernelTester()
4387 .mr(4)
4388 .nr(12)
4389 .kr(1)
4390 .sr(1)
4391 .m(m)
4392 .n(n)
4393 .k(k)
4394 .cm_stride(17)
4395 .iterations(1)
4396 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4397 }
4398 }
4399 }
4400 }
4401
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,qmin)4402 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, qmin) {
4403 TEST_REQUIRES_ARM_NEON_FMA;
4404 GemmMicrokernelTester()
4405 .mr(4)
4406 .nr(12)
4407 .kr(1)
4408 .sr(1)
4409 .m(4)
4410 .n(12)
4411 .k(4)
4412 .qmin(128)
4413 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4414 }
4415
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,qmax)4416 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, qmax) {
4417 TEST_REQUIRES_ARM_NEON_FMA;
4418 GemmMicrokernelTester()
4419 .mr(4)
4420 .nr(12)
4421 .kr(1)
4422 .sr(1)
4423 .m(4)
4424 .n(12)
4425 .k(4)
4426 .qmax(128)
4427 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4428 }
4429
TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53,strided_cm)4430 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, strided_cm) {
4431 TEST_REQUIRES_ARM_NEON_FMA;
4432 GemmMicrokernelTester()
4433 .mr(4)
4434 .nr(12)
4435 .kr(1)
4436 .sr(1)
4437 .m(4)
4438 .n(12)
4439 .k(4)
4440 .cm_stride(17)
4441 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4442 }
4443 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4444
4445
4446 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_4)4447 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4) {
4448 TEST_REQUIRES_ARM_NEON_FMA;
4449 GemmMicrokernelTester()
4450 .mr(6)
4451 .nr(8)
4452 .kr(1)
4453 .sr(1)
4454 .m(6)
4455 .n(8)
4456 .k(4)
4457 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4458 }
4459
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,strided_cn)4460 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, strided_cn) {
4461 TEST_REQUIRES_ARM_NEON_FMA;
4462 GemmMicrokernelTester()
4463 .mr(6)
4464 .nr(8)
4465 .kr(1)
4466 .sr(1)
4467 .m(6)
4468 .n(8)
4469 .k(4)
4470 .cn_stride(11)
4471 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4472 }
4473
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_4_strided_a)4474 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_strided_a) {
4475 TEST_REQUIRES_ARM_NEON_FMA;
4476 GemmMicrokernelTester()
4477 .mr(6)
4478 .nr(8)
4479 .kr(1)
4480 .sr(1)
4481 .m(6)
4482 .n(8)
4483 .k(4)
4484 .a_stride(7)
4485 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4486 }
4487
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_4_subtile)4488 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile) {
4489 TEST_REQUIRES_ARM_NEON_FMA;
4490 for (uint32_t n = 1; n <= 8; n++) {
4491 for (uint32_t m = 1; m <= 6; m++) {
4492 GemmMicrokernelTester()
4493 .mr(6)
4494 .nr(8)
4495 .kr(1)
4496 .sr(1)
4497 .m(m)
4498 .n(n)
4499 .k(4)
4500 .iterations(1)
4501 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4502 }
4503 }
4504 }
4505
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_4_subtile_m)4506 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile_m) {
4507 TEST_REQUIRES_ARM_NEON_FMA;
4508 for (uint32_t m = 1; m <= 6; m++) {
4509 GemmMicrokernelTester()
4510 .mr(6)
4511 .nr(8)
4512 .kr(1)
4513 .sr(1)
4514 .m(m)
4515 .n(8)
4516 .k(4)
4517 .iterations(1)
4518 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4519 }
4520 }
4521
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_4_subtile_n)4522 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile_n) {
4523 TEST_REQUIRES_ARM_NEON_FMA;
4524 for (uint32_t n = 1; n <= 8; n++) {
4525 GemmMicrokernelTester()
4526 .mr(6)
4527 .nr(8)
4528 .kr(1)
4529 .sr(1)
4530 .m(6)
4531 .n(n)
4532 .k(4)
4533 .iterations(1)
4534 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4535 }
4536 }
4537
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_8)4538 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8) {
4539 TEST_REQUIRES_ARM_NEON_FMA;
4540 GemmMicrokernelTester()
4541 .mr(6)
4542 .nr(8)
4543 .kr(1)
4544 .sr(1)
4545 .m(6)
4546 .n(8)
4547 .k(8)
4548 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4549 }
4550
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_8_strided_a)4551 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8_strided_a) {
4552 TEST_REQUIRES_ARM_NEON_FMA;
4553 GemmMicrokernelTester()
4554 .mr(6)
4555 .nr(8)
4556 .kr(1)
4557 .sr(1)
4558 .m(6)
4559 .n(8)
4560 .k(8)
4561 .a_stride(11)
4562 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4563 }
4564
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_eq_8_subtile)4565 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8_subtile) {
4566 TEST_REQUIRES_ARM_NEON_FMA;
4567 for (uint32_t n = 1; n <= 8; n++) {
4568 for (uint32_t m = 1; m <= 6; m++) {
4569 GemmMicrokernelTester()
4570 .mr(6)
4571 .nr(8)
4572 .kr(1)
4573 .sr(1)
4574 .m(m)
4575 .n(n)
4576 .k(8)
4577 .iterations(1)
4578 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4579 }
4580 }
4581 }
4582
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_lt_8)4583 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8) {
4584 TEST_REQUIRES_ARM_NEON_FMA;
4585 for (size_t k = 1; k < 8; k++) {
4586 GemmMicrokernelTester()
4587 .mr(6)
4588 .nr(8)
4589 .kr(1)
4590 .sr(1)
4591 .m(6)
4592 .n(8)
4593 .k(k)
4594 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4595 }
4596 }
4597
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_lt_8_strided_a)4598 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8_strided_a) {
4599 TEST_REQUIRES_ARM_NEON_FMA;
4600 for (size_t k = 1; k < 8; k++) {
4601 GemmMicrokernelTester()
4602 .mr(6)
4603 .nr(8)
4604 .kr(1)
4605 .sr(1)
4606 .m(6)
4607 .n(8)
4608 .k(k)
4609 .a_stride(11)
4610 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4611 }
4612 }
4613
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_lt_8_subtile)4614 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8_subtile) {
4615 TEST_REQUIRES_ARM_NEON_FMA;
4616 for (size_t k = 1; k < 8; k++) {
4617 for (uint32_t n = 1; n <= 8; n++) {
4618 for (uint32_t m = 1; m <= 6; m++) {
4619 GemmMicrokernelTester()
4620 .mr(6)
4621 .nr(8)
4622 .kr(1)
4623 .sr(1)
4624 .m(m)
4625 .n(n)
4626 .k(k)
4627 .iterations(1)
4628 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4629 }
4630 }
4631 }
4632 }
4633
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_gt_8)4634 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8) {
4635 TEST_REQUIRES_ARM_NEON_FMA;
4636 for (size_t k = 9; k < 16; k++) {
4637 GemmMicrokernelTester()
4638 .mr(6)
4639 .nr(8)
4640 .kr(1)
4641 .sr(1)
4642 .m(6)
4643 .n(8)
4644 .k(k)
4645 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4646 }
4647 }
4648
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_gt_8_strided_a)4649 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8_strided_a) {
4650 TEST_REQUIRES_ARM_NEON_FMA;
4651 for (size_t k = 9; k < 16; k++) {
4652 GemmMicrokernelTester()
4653 .mr(6)
4654 .nr(8)
4655 .kr(1)
4656 .sr(1)
4657 .m(6)
4658 .n(8)
4659 .k(k)
4660 .a_stride(19)
4661 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4662 }
4663 }
4664
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_gt_8_subtile)4665 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8_subtile) {
4666 TEST_REQUIRES_ARM_NEON_FMA;
4667 for (size_t k = 9; k < 16; k++) {
4668 for (uint32_t n = 1; n <= 8; n++) {
4669 for (uint32_t m = 1; m <= 6; m++) {
4670 GemmMicrokernelTester()
4671 .mr(6)
4672 .nr(8)
4673 .kr(1)
4674 .sr(1)
4675 .m(m)
4676 .n(n)
4677 .k(k)
4678 .iterations(1)
4679 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4680 }
4681 }
4682 }
4683 }
4684
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_div_4)4685 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4) {
4686 TEST_REQUIRES_ARM_NEON_FMA;
4687 for (size_t k = 12; k <= 40; k += 4) {
4688 GemmMicrokernelTester()
4689 .mr(6)
4690 .nr(8)
4691 .kr(1)
4692 .sr(1)
4693 .m(6)
4694 .n(8)
4695 .k(k)
4696 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4697 }
4698 }
4699
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_div_4_strided_a)4700 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4_strided_a) {
4701 TEST_REQUIRES_ARM_NEON_FMA;
4702 for (size_t k = 12; k <= 40; k += 4) {
4703 GemmMicrokernelTester()
4704 .mr(6)
4705 .nr(8)
4706 .kr(1)
4707 .sr(1)
4708 .m(6)
4709 .n(8)
4710 .k(k)
4711 .a_stride(43)
4712 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4713 }
4714 }
4715
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,k_div_4_subtile)4716 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4_subtile) {
4717 TEST_REQUIRES_ARM_NEON_FMA;
4718 for (size_t k = 12; k <= 40; k += 4) {
4719 for (uint32_t n = 1; n <= 8; n++) {
4720 for (uint32_t m = 1; m <= 6; m++) {
4721 GemmMicrokernelTester()
4722 .mr(6)
4723 .nr(8)
4724 .kr(1)
4725 .sr(1)
4726 .m(m)
4727 .n(n)
4728 .k(k)
4729 .iterations(1)
4730 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4731 }
4732 }
4733 }
4734 }
4735
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,n_gt_8)4736 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8) {
4737 TEST_REQUIRES_ARM_NEON_FMA;
4738 for (uint32_t n = 9; n < 16; n++) {
4739 for (size_t k = 1; k <= 20; k += 5) {
4740 GemmMicrokernelTester()
4741 .mr(6)
4742 .nr(8)
4743 .kr(1)
4744 .sr(1)
4745 .m(6)
4746 .n(n)
4747 .k(k)
4748 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4749 }
4750 }
4751 }
4752
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,n_gt_8_strided_cn)4753 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_strided_cn) {
4754 TEST_REQUIRES_ARM_NEON_FMA;
4755 for (uint32_t n = 9; n < 16; n++) {
4756 for (size_t k = 1; k <= 20; k += 5) {
4757 GemmMicrokernelTester()
4758 .mr(6)
4759 .nr(8)
4760 .kr(1)
4761 .sr(1)
4762 .m(6)
4763 .n(n)
4764 .k(k)
4765 .cn_stride(11)
4766 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4767 }
4768 }
4769 }
4770
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,n_gt_8_strided_a)4771 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_strided_a) {
4772 TEST_REQUIRES_ARM_NEON_FMA;
4773 for (uint32_t n = 9; n < 16; n++) {
4774 for (size_t k = 1; k <= 20; k += 5) {
4775 GemmMicrokernelTester()
4776 .mr(6)
4777 .nr(8)
4778 .kr(1)
4779 .sr(1)
4780 .m(6)
4781 .n(n)
4782 .k(k)
4783 .a_stride(23)
4784 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4785 }
4786 }
4787 }
4788
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,n_gt_8_subtile)4789 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_subtile) {
4790 TEST_REQUIRES_ARM_NEON_FMA;
4791 for (uint32_t n = 9; n < 16; n++) {
4792 for (size_t k = 1; k <= 20; k += 5) {
4793 for (uint32_t m = 1; m <= 6; m++) {
4794 GemmMicrokernelTester()
4795 .mr(6)
4796 .nr(8)
4797 .kr(1)
4798 .sr(1)
4799 .m(m)
4800 .n(n)
4801 .k(k)
4802 .iterations(1)
4803 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4804 }
4805 }
4806 }
4807 }
4808
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,n_div_8)4809 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8) {
4810 TEST_REQUIRES_ARM_NEON_FMA;
4811 for (uint32_t n = 16; n <= 24; n += 8) {
4812 for (size_t k = 1; k <= 20; k += 5) {
4813 GemmMicrokernelTester()
4814 .mr(6)
4815 .nr(8)
4816 .kr(1)
4817 .sr(1)
4818 .m(6)
4819 .n(n)
4820 .k(k)
4821 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4822 }
4823 }
4824 }
4825
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,n_div_8_strided_cn)4826 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_strided_cn) {
4827 TEST_REQUIRES_ARM_NEON_FMA;
4828 for (uint32_t n = 16; n <= 24; n += 8) {
4829 for (size_t k = 1; k <= 20; k += 5) {
4830 GemmMicrokernelTester()
4831 .mr(6)
4832 .nr(8)
4833 .kr(1)
4834 .sr(1)
4835 .m(6)
4836 .n(n)
4837 .k(k)
4838 .cn_stride(11)
4839 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4840 }
4841 }
4842 }
4843
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,n_div_8_strided_a)4844 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_strided_a) {
4845 TEST_REQUIRES_ARM_NEON_FMA;
4846 for (uint32_t n = 16; n <= 24; n += 8) {
4847 for (size_t k = 1; k <= 20; k += 5) {
4848 GemmMicrokernelTester()
4849 .mr(6)
4850 .nr(8)
4851 .kr(1)
4852 .sr(1)
4853 .m(6)
4854 .n(n)
4855 .k(k)
4856 .a_stride(23)
4857 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4858 }
4859 }
4860 }
4861
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,n_div_8_subtile)4862 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_subtile) {
4863 TEST_REQUIRES_ARM_NEON_FMA;
4864 for (uint32_t n = 16; n <= 24; n += 8) {
4865 for (size_t k = 1; k <= 20; k += 5) {
4866 for (uint32_t m = 1; m <= 6; m++) {
4867 GemmMicrokernelTester()
4868 .mr(6)
4869 .nr(8)
4870 .kr(1)
4871 .sr(1)
4872 .m(m)
4873 .n(n)
4874 .k(k)
4875 .iterations(1)
4876 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4877 }
4878 }
4879 }
4880 }
4881
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,strided_cm_subtile)4882 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, strided_cm_subtile) {
4883 TEST_REQUIRES_ARM_NEON_FMA;
4884 for (size_t k = 1; k <= 20; k += 5) {
4885 for (uint32_t n = 1; n <= 8; n++) {
4886 for (uint32_t m = 1; m <= 6; m++) {
4887 GemmMicrokernelTester()
4888 .mr(6)
4889 .nr(8)
4890 .kr(1)
4891 .sr(1)
4892 .m(m)
4893 .n(n)
4894 .k(k)
4895 .cm_stride(11)
4896 .iterations(1)
4897 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4898 }
4899 }
4900 }
4901 }
4902
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,qmin)4903 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, qmin) {
4904 TEST_REQUIRES_ARM_NEON_FMA;
4905 GemmMicrokernelTester()
4906 .mr(6)
4907 .nr(8)
4908 .kr(1)
4909 .sr(1)
4910 .m(6)
4911 .n(8)
4912 .k(4)
4913 .qmin(128)
4914 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4915 }
4916
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,qmax)4917 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, qmax) {
4918 TEST_REQUIRES_ARM_NEON_FMA;
4919 GemmMicrokernelTester()
4920 .mr(6)
4921 .nr(8)
4922 .kr(1)
4923 .sr(1)
4924 .m(6)
4925 .n(8)
4926 .k(4)
4927 .qmax(128)
4928 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4929 }
4930
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55,strided_cm)4931 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, strided_cm) {
4932 TEST_REQUIRES_ARM_NEON_FMA;
4933 GemmMicrokernelTester()
4934 .mr(6)
4935 .nr(8)
4936 .kr(1)
4937 .sr(1)
4938 .m(6)
4939 .n(8)
4940 .k(4)
4941 .cm_stride(11)
4942 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4943 }
4944 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4945
4946
4947 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_4)4948 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_4) {
4949 TEST_REQUIRES_ARM_NEON_FMA;
4950 GemmMicrokernelTester()
4951 .mr(6)
4952 .nr(8)
4953 .kr(1)
4954 .sr(1)
4955 .m(6)
4956 .n(8)
4957 .k(4)
4958 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4959 }
4960
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,strided_cn)4961 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, strided_cn) {
4962 TEST_REQUIRES_ARM_NEON_FMA;
4963 GemmMicrokernelTester()
4964 .mr(6)
4965 .nr(8)
4966 .kr(1)
4967 .sr(1)
4968 .m(6)
4969 .n(8)
4970 .k(4)
4971 .cn_stride(11)
4972 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4973 }
4974
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_4_strided_a)4975 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_4_strided_a) {
4976 TEST_REQUIRES_ARM_NEON_FMA;
4977 GemmMicrokernelTester()
4978 .mr(6)
4979 .nr(8)
4980 .kr(1)
4981 .sr(1)
4982 .m(6)
4983 .n(8)
4984 .k(4)
4985 .a_stride(7)
4986 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4987 }
4988
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_4_subtile)4989 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_4_subtile) {
4990 TEST_REQUIRES_ARM_NEON_FMA;
4991 for (uint32_t n = 1; n <= 8; n++) {
4992 for (uint32_t m = 1; m <= 6; m++) {
4993 GemmMicrokernelTester()
4994 .mr(6)
4995 .nr(8)
4996 .kr(1)
4997 .sr(1)
4998 .m(m)
4999 .n(n)
5000 .k(4)
5001 .iterations(1)
5002 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5003 }
5004 }
5005 }
5006
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_4_subtile_m)5007 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_4_subtile_m) {
5008 TEST_REQUIRES_ARM_NEON_FMA;
5009 for (uint32_t m = 1; m <= 6; m++) {
5010 GemmMicrokernelTester()
5011 .mr(6)
5012 .nr(8)
5013 .kr(1)
5014 .sr(1)
5015 .m(m)
5016 .n(8)
5017 .k(4)
5018 .iterations(1)
5019 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5020 }
5021 }
5022
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_4_subtile_n)5023 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_4_subtile_n) {
5024 TEST_REQUIRES_ARM_NEON_FMA;
5025 for (uint32_t n = 1; n <= 8; n++) {
5026 GemmMicrokernelTester()
5027 .mr(6)
5028 .nr(8)
5029 .kr(1)
5030 .sr(1)
5031 .m(6)
5032 .n(n)
5033 .k(4)
5034 .iterations(1)
5035 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5036 }
5037 }
5038
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_lt_4)5039 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_4) {
5040 TEST_REQUIRES_ARM_NEON_FMA;
5041 for (size_t k = 1; k < 4; k++) {
5042 GemmMicrokernelTester()
5043 .mr(6)
5044 .nr(8)
5045 .kr(1)
5046 .sr(1)
5047 .m(6)
5048 .n(8)
5049 .k(k)
5050 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5051 }
5052 }
5053
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_lt_4_strided_a)5054 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_4_strided_a) {
5055 TEST_REQUIRES_ARM_NEON_FMA;
5056 for (size_t k = 1; k < 4; k++) {
5057 GemmMicrokernelTester()
5058 .mr(6)
5059 .nr(8)
5060 .kr(1)
5061 .sr(1)
5062 .m(6)
5063 .n(8)
5064 .k(k)
5065 .a_stride(7)
5066 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5067 }
5068 }
5069
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_lt_4_subtile)5070 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_4_subtile) {
5071 TEST_REQUIRES_ARM_NEON_FMA;
5072 for (size_t k = 1; k < 4; k++) {
5073 for (uint32_t n = 1; n <= 8; n++) {
5074 for (uint32_t m = 1; m <= 6; m++) {
5075 GemmMicrokernelTester()
5076 .mr(6)
5077 .nr(8)
5078 .kr(1)
5079 .sr(1)
5080 .m(m)
5081 .n(n)
5082 .k(k)
5083 .iterations(1)
5084 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5085 }
5086 }
5087 }
5088 }
5089
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_gt_4)5090 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_4) {
5091 TEST_REQUIRES_ARM_NEON_FMA;
5092 for (size_t k = 5; k < 8; k++) {
5093 GemmMicrokernelTester()
5094 .mr(6)
5095 .nr(8)
5096 .kr(1)
5097 .sr(1)
5098 .m(6)
5099 .n(8)
5100 .k(k)
5101 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5102 }
5103 }
5104
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_gt_4_strided_a)5105 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_4_strided_a) {
5106 TEST_REQUIRES_ARM_NEON_FMA;
5107 for (size_t k = 5; k < 8; k++) {
5108 GemmMicrokernelTester()
5109 .mr(6)
5110 .nr(8)
5111 .kr(1)
5112 .sr(1)
5113 .m(6)
5114 .n(8)
5115 .k(k)
5116 .a_stride(11)
5117 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5118 }
5119 }
5120
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_gt_4_subtile)5121 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_4_subtile) {
5122 TEST_REQUIRES_ARM_NEON_FMA;
5123 for (size_t k = 5; k < 8; k++) {
5124 for (uint32_t n = 1; n <= 8; n++) {
5125 for (uint32_t m = 1; m <= 6; m++) {
5126 GemmMicrokernelTester()
5127 .mr(6)
5128 .nr(8)
5129 .kr(1)
5130 .sr(1)
5131 .m(m)
5132 .n(n)
5133 .k(k)
5134 .iterations(1)
5135 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5136 }
5137 }
5138 }
5139 }
5140
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_div_4)5141 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_div_4) {
5142 TEST_REQUIRES_ARM_NEON_FMA;
5143 for (size_t k = 8; k <= 40; k += 4) {
5144 GemmMicrokernelTester()
5145 .mr(6)
5146 .nr(8)
5147 .kr(1)
5148 .sr(1)
5149 .m(6)
5150 .n(8)
5151 .k(k)
5152 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5153 }
5154 }
5155
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_div_4_strided_a)5156 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_div_4_strided_a) {
5157 TEST_REQUIRES_ARM_NEON_FMA;
5158 for (size_t k = 8; k <= 40; k += 4) {
5159 GemmMicrokernelTester()
5160 .mr(6)
5161 .nr(8)
5162 .kr(1)
5163 .sr(1)
5164 .m(6)
5165 .n(8)
5166 .k(k)
5167 .a_stride(43)
5168 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5169 }
5170 }
5171
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,k_div_4_subtile)5172 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_div_4_subtile) {
5173 TEST_REQUIRES_ARM_NEON_FMA;
5174 for (size_t k = 8; k <= 40; k += 4) {
5175 for (uint32_t n = 1; n <= 8; n++) {
5176 for (uint32_t m = 1; m <= 6; m++) {
5177 GemmMicrokernelTester()
5178 .mr(6)
5179 .nr(8)
5180 .kr(1)
5181 .sr(1)
5182 .m(m)
5183 .n(n)
5184 .k(k)
5185 .iterations(1)
5186 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5187 }
5188 }
5189 }
5190 }
5191
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8)5192 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8) {
5193 TEST_REQUIRES_ARM_NEON_FMA;
5194 for (uint32_t n = 9; n < 16; n++) {
5195 for (size_t k = 1; k <= 20; k += 5) {
5196 GemmMicrokernelTester()
5197 .mr(6)
5198 .nr(8)
5199 .kr(1)
5200 .sr(1)
5201 .m(6)
5202 .n(n)
5203 .k(k)
5204 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5205 }
5206 }
5207 }
5208
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_strided_cn)5209 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_cn) {
5210 TEST_REQUIRES_ARM_NEON_FMA;
5211 for (uint32_t n = 9; n < 16; n++) {
5212 for (size_t k = 1; k <= 20; k += 5) {
5213 GemmMicrokernelTester()
5214 .mr(6)
5215 .nr(8)
5216 .kr(1)
5217 .sr(1)
5218 .m(6)
5219 .n(n)
5220 .k(k)
5221 .cn_stride(11)
5222 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5223 }
5224 }
5225 }
5226
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_strided_a)5227 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_a) {
5228 TEST_REQUIRES_ARM_NEON_FMA;
5229 for (uint32_t n = 9; n < 16; n++) {
5230 for (size_t k = 1; k <= 20; k += 5) {
5231 GemmMicrokernelTester()
5232 .mr(6)
5233 .nr(8)
5234 .kr(1)
5235 .sr(1)
5236 .m(6)
5237 .n(n)
5238 .k(k)
5239 .a_stride(23)
5240 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5241 }
5242 }
5243 }
5244
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_subtile)5245 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_subtile) {
5246 TEST_REQUIRES_ARM_NEON_FMA;
5247 for (uint32_t n = 9; n < 16; n++) {
5248 for (size_t k = 1; k <= 20; k += 5) {
5249 for (uint32_t m = 1; m <= 6; m++) {
5250 GemmMicrokernelTester()
5251 .mr(6)
5252 .nr(8)
5253 .kr(1)
5254 .sr(1)
5255 .m(m)
5256 .n(n)
5257 .k(k)
5258 .iterations(1)
5259 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5260 }
5261 }
5262 }
5263 }
5264
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8)5265 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8) {
5266 TEST_REQUIRES_ARM_NEON_FMA;
5267 for (uint32_t n = 16; n <= 24; n += 8) {
5268 for (size_t k = 1; k <= 20; k += 5) {
5269 GemmMicrokernelTester()
5270 .mr(6)
5271 .nr(8)
5272 .kr(1)
5273 .sr(1)
5274 .m(6)
5275 .n(n)
5276 .k(k)
5277 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5278 }
5279 }
5280 }
5281
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_strided_cn)5282 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_cn) {
5283 TEST_REQUIRES_ARM_NEON_FMA;
5284 for (uint32_t n = 16; n <= 24; n += 8) {
5285 for (size_t k = 1; k <= 20; k += 5) {
5286 GemmMicrokernelTester()
5287 .mr(6)
5288 .nr(8)
5289 .kr(1)
5290 .sr(1)
5291 .m(6)
5292 .n(n)
5293 .k(k)
5294 .cn_stride(11)
5295 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5296 }
5297 }
5298 }
5299
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_strided_a)5300 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_a) {
5301 TEST_REQUIRES_ARM_NEON_FMA;
5302 for (uint32_t n = 16; n <= 24; n += 8) {
5303 for (size_t k = 1; k <= 20; k += 5) {
5304 GemmMicrokernelTester()
5305 .mr(6)
5306 .nr(8)
5307 .kr(1)
5308 .sr(1)
5309 .m(6)
5310 .n(n)
5311 .k(k)
5312 .a_stride(23)
5313 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5314 }
5315 }
5316 }
5317
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_subtile)5318 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_subtile) {
5319 TEST_REQUIRES_ARM_NEON_FMA;
5320 for (uint32_t n = 16; n <= 24; n += 8) {
5321 for (size_t k = 1; k <= 20; k += 5) {
5322 for (uint32_t m = 1; m <= 6; m++) {
5323 GemmMicrokernelTester()
5324 .mr(6)
5325 .nr(8)
5326 .kr(1)
5327 .sr(1)
5328 .m(m)
5329 .n(n)
5330 .k(k)
5331 .iterations(1)
5332 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5333 }
5334 }
5335 }
5336 }
5337
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,strided_cm_subtile)5338 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm_subtile) {
5339 TEST_REQUIRES_ARM_NEON_FMA;
5340 for (size_t k = 1; k <= 20; k += 5) {
5341 for (uint32_t n = 1; n <= 8; n++) {
5342 for (uint32_t m = 1; m <= 6; m++) {
5343 GemmMicrokernelTester()
5344 .mr(6)
5345 .nr(8)
5346 .kr(1)
5347 .sr(1)
5348 .m(m)
5349 .n(n)
5350 .k(k)
5351 .cm_stride(11)
5352 .iterations(1)
5353 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5354 }
5355 }
5356 }
5357 }
5358
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,qmin)5359 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, qmin) {
5360 TEST_REQUIRES_ARM_NEON_FMA;
5361 GemmMicrokernelTester()
5362 .mr(6)
5363 .nr(8)
5364 .kr(1)
5365 .sr(1)
5366 .m(6)
5367 .n(8)
5368 .k(4)
5369 .qmin(128)
5370 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5371 }
5372
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,qmax)5373 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, qmax) {
5374 TEST_REQUIRES_ARM_NEON_FMA;
5375 GemmMicrokernelTester()
5376 .mr(6)
5377 .nr(8)
5378 .kr(1)
5379 .sr(1)
5380 .m(6)
5381 .n(8)
5382 .k(4)
5383 .qmax(128)
5384 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5385 }
5386
TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75,strided_cm)5387 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm) {
5388 TEST_REQUIRES_ARM_NEON_FMA;
5389 GemmMicrokernelTester()
5390 .mr(6)
5391 .nr(8)
5392 .kr(1)
5393 .sr(1)
5394 .m(6)
5395 .n(8)
5396 .k(4)
5397 .cm_stride(11)
5398 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5399 }
5400 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
5401
5402
5403 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,k_eq_2)5404 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_eq_2) {
5405 TEST_REQUIRES_ARM_NEON;
5406 GemmMicrokernelTester()
5407 .mr(1)
5408 .nr(8)
5409 .kr(1)
5410 .sr(1)
5411 .m(1)
5412 .n(8)
5413 .k(2)
5414 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5415 }
5416
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,strided_cn)5417 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, strided_cn) {
5418 TEST_REQUIRES_ARM_NEON;
5419 GemmMicrokernelTester()
5420 .mr(1)
5421 .nr(8)
5422 .kr(1)
5423 .sr(1)
5424 .m(1)
5425 .n(8)
5426 .k(2)
5427 .cn_stride(11)
5428 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5429 }
5430
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,k_eq_2_strided_a)5431 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_eq_2_strided_a) {
5432 TEST_REQUIRES_ARM_NEON;
5433 GemmMicrokernelTester()
5434 .mr(1)
5435 .nr(8)
5436 .kr(1)
5437 .sr(1)
5438 .m(1)
5439 .n(8)
5440 .k(2)
5441 .a_stride(5)
5442 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5443 }
5444
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,k_eq_2_subtile)5445 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_eq_2_subtile) {
5446 TEST_REQUIRES_ARM_NEON;
5447 for (uint32_t n = 1; n <= 8; n++) {
5448 for (uint32_t m = 1; m <= 1; m++) {
5449 GemmMicrokernelTester()
5450 .mr(1)
5451 .nr(8)
5452 .kr(1)
5453 .sr(1)
5454 .m(m)
5455 .n(n)
5456 .k(2)
5457 .iterations(1)
5458 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5459 }
5460 }
5461 }
5462
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,k_eq_2_subtile_m)5463 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_eq_2_subtile_m) {
5464 TEST_REQUIRES_ARM_NEON;
5465 for (uint32_t m = 1; m <= 1; m++) {
5466 GemmMicrokernelTester()
5467 .mr(1)
5468 .nr(8)
5469 .kr(1)
5470 .sr(1)
5471 .m(m)
5472 .n(8)
5473 .k(2)
5474 .iterations(1)
5475 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5476 }
5477 }
5478
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,k_eq_2_subtile_n)5479 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_eq_2_subtile_n) {
5480 TEST_REQUIRES_ARM_NEON;
5481 for (uint32_t n = 1; n <= 8; n++) {
5482 GemmMicrokernelTester()
5483 .mr(1)
5484 .nr(8)
5485 .kr(1)
5486 .sr(1)
5487 .m(1)
5488 .n(n)
5489 .k(2)
5490 .iterations(1)
5491 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5492 }
5493 }
5494
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,k_lt_2)5495 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_lt_2) {
5496 TEST_REQUIRES_ARM_NEON;
5497 for (size_t k = 1; k < 2; k++) {
5498 GemmMicrokernelTester()
5499 .mr(1)
5500 .nr(8)
5501 .kr(1)
5502 .sr(1)
5503 .m(1)
5504 .n(8)
5505 .k(k)
5506 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5507 }
5508 }
5509
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,k_lt_2_strided_a)5510 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_lt_2_strided_a) {
5511 TEST_REQUIRES_ARM_NEON;
5512 for (size_t k = 1; k < 2; k++) {
5513 GemmMicrokernelTester()
5514 .mr(1)
5515 .nr(8)
5516 .kr(1)
5517 .sr(1)
5518 .m(1)
5519 .n(8)
5520 .k(k)
5521 .a_stride(5)
5522 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5523 }
5524 }
5525
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,k_lt_2_subtile)5526 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_lt_2_subtile) {
5527 TEST_REQUIRES_ARM_NEON;
5528 for (size_t k = 1; k < 2; k++) {
5529 for (uint32_t n = 1; n <= 8; n++) {
5530 for (uint32_t m = 1; m <= 1; m++) {
5531 GemmMicrokernelTester()
5532 .mr(1)
5533 .nr(8)
5534 .kr(1)
5535 .sr(1)
5536 .m(m)
5537 .n(n)
5538 .k(k)
5539 .iterations(1)
5540 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5541 }
5542 }
5543 }
5544 }
5545
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,k_gt_2)5546 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_gt_2) {
5547 TEST_REQUIRES_ARM_NEON;
5548 for (size_t k = 3; k < 4; k++) {
5549 GemmMicrokernelTester()
5550 .mr(1)
5551 .nr(8)
5552 .kr(1)
5553 .sr(1)
5554 .m(1)
5555 .n(8)
5556 .k(k)
5557 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5558 }
5559 }
5560
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,k_gt_2_strided_a)5561 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_gt_2_strided_a) {
5562 TEST_REQUIRES_ARM_NEON;
5563 for (size_t k = 3; k < 4; k++) {
5564 GemmMicrokernelTester()
5565 .mr(1)
5566 .nr(8)
5567 .kr(1)
5568 .sr(1)
5569 .m(1)
5570 .n(8)
5571 .k(k)
5572 .a_stride(7)
5573 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5574 }
5575 }
5576
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,k_gt_2_subtile)5577 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_gt_2_subtile) {
5578 TEST_REQUIRES_ARM_NEON;
5579 for (size_t k = 3; k < 4; k++) {
5580 for (uint32_t n = 1; n <= 8; n++) {
5581 for (uint32_t m = 1; m <= 1; m++) {
5582 GemmMicrokernelTester()
5583 .mr(1)
5584 .nr(8)
5585 .kr(1)
5586 .sr(1)
5587 .m(m)
5588 .n(n)
5589 .k(k)
5590 .iterations(1)
5591 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5592 }
5593 }
5594 }
5595 }
5596
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,k_div_2)5597 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_div_2) {
5598 TEST_REQUIRES_ARM_NEON;
5599 for (size_t k = 4; k <= 20; k += 2) {
5600 GemmMicrokernelTester()
5601 .mr(1)
5602 .nr(8)
5603 .kr(1)
5604 .sr(1)
5605 .m(1)
5606 .n(8)
5607 .k(k)
5608 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5609 }
5610 }
5611
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,k_div_2_strided_a)5612 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_div_2_strided_a) {
5613 TEST_REQUIRES_ARM_NEON;
5614 for (size_t k = 4; k <= 20; k += 2) {
5615 GemmMicrokernelTester()
5616 .mr(1)
5617 .nr(8)
5618 .kr(1)
5619 .sr(1)
5620 .m(1)
5621 .n(8)
5622 .k(k)
5623 .a_stride(23)
5624 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5625 }
5626 }
5627
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,k_div_2_subtile)5628 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_div_2_subtile) {
5629 TEST_REQUIRES_ARM_NEON;
5630 for (size_t k = 4; k <= 20; k += 2) {
5631 for (uint32_t n = 1; n <= 8; n++) {
5632 for (uint32_t m = 1; m <= 1; m++) {
5633 GemmMicrokernelTester()
5634 .mr(1)
5635 .nr(8)
5636 .kr(1)
5637 .sr(1)
5638 .m(m)
5639 .n(n)
5640 .k(k)
5641 .iterations(1)
5642 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5643 }
5644 }
5645 }
5646 }
5647
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,n_gt_8)5648 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, n_gt_8) {
5649 TEST_REQUIRES_ARM_NEON;
5650 for (uint32_t n = 9; n < 16; n++) {
5651 for (size_t k = 1; k <= 10; k += 3) {
5652 GemmMicrokernelTester()
5653 .mr(1)
5654 .nr(8)
5655 .kr(1)
5656 .sr(1)
5657 .m(1)
5658 .n(n)
5659 .k(k)
5660 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5661 }
5662 }
5663 }
5664
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,n_gt_8_strided_cn)5665 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, n_gt_8_strided_cn) {
5666 TEST_REQUIRES_ARM_NEON;
5667 for (uint32_t n = 9; n < 16; n++) {
5668 for (size_t k = 1; k <= 10; k += 3) {
5669 GemmMicrokernelTester()
5670 .mr(1)
5671 .nr(8)
5672 .kr(1)
5673 .sr(1)
5674 .m(1)
5675 .n(n)
5676 .k(k)
5677 .cn_stride(11)
5678 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5679 }
5680 }
5681 }
5682
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,n_gt_8_strided_a)5683 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, n_gt_8_strided_a) {
5684 TEST_REQUIRES_ARM_NEON;
5685 for (uint32_t n = 9; n < 16; n++) {
5686 for (size_t k = 1; k <= 10; k += 3) {
5687 GemmMicrokernelTester()
5688 .mr(1)
5689 .nr(8)
5690 .kr(1)
5691 .sr(1)
5692 .m(1)
5693 .n(n)
5694 .k(k)
5695 .a_stride(13)
5696 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5697 }
5698 }
5699 }
5700
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,n_gt_8_subtile)5701 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, n_gt_8_subtile) {
5702 TEST_REQUIRES_ARM_NEON;
5703 for (uint32_t n = 9; n < 16; n++) {
5704 for (size_t k = 1; k <= 10; k += 3) {
5705 for (uint32_t m = 1; m <= 1; m++) {
5706 GemmMicrokernelTester()
5707 .mr(1)
5708 .nr(8)
5709 .kr(1)
5710 .sr(1)
5711 .m(m)
5712 .n(n)
5713 .k(k)
5714 .iterations(1)
5715 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5716 }
5717 }
5718 }
5719 }
5720
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,n_div_8)5721 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, n_div_8) {
5722 TEST_REQUIRES_ARM_NEON;
5723 for (uint32_t n = 16; n <= 24; n += 8) {
5724 for (size_t k = 1; k <= 10; k += 3) {
5725 GemmMicrokernelTester()
5726 .mr(1)
5727 .nr(8)
5728 .kr(1)
5729 .sr(1)
5730 .m(1)
5731 .n(n)
5732 .k(k)
5733 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5734 }
5735 }
5736 }
5737
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,n_div_8_strided_cn)5738 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, n_div_8_strided_cn) {
5739 TEST_REQUIRES_ARM_NEON;
5740 for (uint32_t n = 16; n <= 24; n += 8) {
5741 for (size_t k = 1; k <= 10; k += 3) {
5742 GemmMicrokernelTester()
5743 .mr(1)
5744 .nr(8)
5745 .kr(1)
5746 .sr(1)
5747 .m(1)
5748 .n(n)
5749 .k(k)
5750 .cn_stride(11)
5751 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5752 }
5753 }
5754 }
5755
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,n_div_8_strided_a)5756 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, n_div_8_strided_a) {
5757 TEST_REQUIRES_ARM_NEON;
5758 for (uint32_t n = 16; n <= 24; n += 8) {
5759 for (size_t k = 1; k <= 10; k += 3) {
5760 GemmMicrokernelTester()
5761 .mr(1)
5762 .nr(8)
5763 .kr(1)
5764 .sr(1)
5765 .m(1)
5766 .n(n)
5767 .k(k)
5768 .a_stride(13)
5769 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5770 }
5771 }
5772 }
5773
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,n_div_8_subtile)5774 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, n_div_8_subtile) {
5775 TEST_REQUIRES_ARM_NEON;
5776 for (uint32_t n = 16; n <= 24; n += 8) {
5777 for (size_t k = 1; k <= 10; k += 3) {
5778 for (uint32_t m = 1; m <= 1; m++) {
5779 GemmMicrokernelTester()
5780 .mr(1)
5781 .nr(8)
5782 .kr(1)
5783 .sr(1)
5784 .m(m)
5785 .n(n)
5786 .k(k)
5787 .iterations(1)
5788 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5789 }
5790 }
5791 }
5792 }
5793
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,strided_cm_subtile)5794 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, strided_cm_subtile) {
5795 TEST_REQUIRES_ARM_NEON;
5796 for (size_t k = 1; k <= 10; k += 3) {
5797 for (uint32_t n = 1; n <= 8; n++) {
5798 for (uint32_t m = 1; m <= 1; m++) {
5799 GemmMicrokernelTester()
5800 .mr(1)
5801 .nr(8)
5802 .kr(1)
5803 .sr(1)
5804 .m(m)
5805 .n(n)
5806 .k(k)
5807 .cm_stride(11)
5808 .iterations(1)
5809 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5810 }
5811 }
5812 }
5813 }
5814
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,qmin)5815 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, qmin) {
5816 TEST_REQUIRES_ARM_NEON;
5817 GemmMicrokernelTester()
5818 .mr(1)
5819 .nr(8)
5820 .kr(1)
5821 .sr(1)
5822 .m(1)
5823 .n(8)
5824 .k(2)
5825 .qmin(128)
5826 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5827 }
5828
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,qmax)5829 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, qmax) {
5830 TEST_REQUIRES_ARM_NEON;
5831 GemmMicrokernelTester()
5832 .mr(1)
5833 .nr(8)
5834 .kr(1)
5835 .sr(1)
5836 .m(1)
5837 .n(8)
5838 .k(2)
5839 .qmax(128)
5840 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5841 }
5842
TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64,strided_cm)5843 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, strided_cm) {
5844 TEST_REQUIRES_ARM_NEON;
5845 GemmMicrokernelTester()
5846 .mr(1)
5847 .nr(8)
5848 .kr(1)
5849 .sr(1)
5850 .m(1)
5851 .n(8)
5852 .k(2)
5853 .cm_stride(11)
5854 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
5855 }
5856 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5857
5858
5859 #if XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,k_eq_2)5860 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_eq_2) {
5861 TEST_REQUIRES_ARM_NEON_FMA;
5862 GemmMicrokernelTester()
5863 .mr(1)
5864 .nr(8)
5865 .kr(1)
5866 .sr(1)
5867 .m(1)
5868 .n(8)
5869 .k(2)
5870 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
5871 }
5872
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,strided_cn)5873 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, strided_cn) {
5874 TEST_REQUIRES_ARM_NEON_FMA;
5875 GemmMicrokernelTester()
5876 .mr(1)
5877 .nr(8)
5878 .kr(1)
5879 .sr(1)
5880 .m(1)
5881 .n(8)
5882 .k(2)
5883 .cn_stride(11)
5884 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
5885 }
5886
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,k_eq_2_strided_a)5887 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_eq_2_strided_a) {
5888 TEST_REQUIRES_ARM_NEON_FMA;
5889 GemmMicrokernelTester()
5890 .mr(1)
5891 .nr(8)
5892 .kr(1)
5893 .sr(1)
5894 .m(1)
5895 .n(8)
5896 .k(2)
5897 .a_stride(5)
5898 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
5899 }
5900
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,k_eq_2_subtile)5901 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_eq_2_subtile) {
5902 TEST_REQUIRES_ARM_NEON_FMA;
5903 for (uint32_t n = 1; n <= 8; n++) {
5904 for (uint32_t m = 1; m <= 1; m++) {
5905 GemmMicrokernelTester()
5906 .mr(1)
5907 .nr(8)
5908 .kr(1)
5909 .sr(1)
5910 .m(m)
5911 .n(n)
5912 .k(2)
5913 .iterations(1)
5914 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
5915 }
5916 }
5917 }
5918
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,k_eq_2_subtile_m)5919 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_eq_2_subtile_m) {
5920 TEST_REQUIRES_ARM_NEON_FMA;
5921 for (uint32_t m = 1; m <= 1; m++) {
5922 GemmMicrokernelTester()
5923 .mr(1)
5924 .nr(8)
5925 .kr(1)
5926 .sr(1)
5927 .m(m)
5928 .n(8)
5929 .k(2)
5930 .iterations(1)
5931 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
5932 }
5933 }
5934
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,k_eq_2_subtile_n)5935 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_eq_2_subtile_n) {
5936 TEST_REQUIRES_ARM_NEON_FMA;
5937 for (uint32_t n = 1; n <= 8; n++) {
5938 GemmMicrokernelTester()
5939 .mr(1)
5940 .nr(8)
5941 .kr(1)
5942 .sr(1)
5943 .m(1)
5944 .n(n)
5945 .k(2)
5946 .iterations(1)
5947 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
5948 }
5949 }
5950
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,k_lt_2)5951 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_lt_2) {
5952 TEST_REQUIRES_ARM_NEON_FMA;
5953 for (size_t k = 1; k < 2; k++) {
5954 GemmMicrokernelTester()
5955 .mr(1)
5956 .nr(8)
5957 .kr(1)
5958 .sr(1)
5959 .m(1)
5960 .n(8)
5961 .k(k)
5962 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
5963 }
5964 }
5965
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,k_lt_2_strided_a)5966 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_lt_2_strided_a) {
5967 TEST_REQUIRES_ARM_NEON_FMA;
5968 for (size_t k = 1; k < 2; k++) {
5969 GemmMicrokernelTester()
5970 .mr(1)
5971 .nr(8)
5972 .kr(1)
5973 .sr(1)
5974 .m(1)
5975 .n(8)
5976 .k(k)
5977 .a_stride(5)
5978 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
5979 }
5980 }
5981
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,k_lt_2_subtile)5982 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_lt_2_subtile) {
5983 TEST_REQUIRES_ARM_NEON_FMA;
5984 for (size_t k = 1; k < 2; k++) {
5985 for (uint32_t n = 1; n <= 8; n++) {
5986 for (uint32_t m = 1; m <= 1; m++) {
5987 GemmMicrokernelTester()
5988 .mr(1)
5989 .nr(8)
5990 .kr(1)
5991 .sr(1)
5992 .m(m)
5993 .n(n)
5994 .k(k)
5995 .iterations(1)
5996 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
5997 }
5998 }
5999 }
6000 }
6001
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,k_gt_2)6002 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_gt_2) {
6003 TEST_REQUIRES_ARM_NEON_FMA;
6004 for (size_t k = 3; k < 4; k++) {
6005 GemmMicrokernelTester()
6006 .mr(1)
6007 .nr(8)
6008 .kr(1)
6009 .sr(1)
6010 .m(1)
6011 .n(8)
6012 .k(k)
6013 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
6014 }
6015 }
6016
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,k_gt_2_strided_a)6017 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_gt_2_strided_a) {
6018 TEST_REQUIRES_ARM_NEON_FMA;
6019 for (size_t k = 3; k < 4; k++) {
6020 GemmMicrokernelTester()
6021 .mr(1)
6022 .nr(8)
6023 .kr(1)
6024 .sr(1)
6025 .m(1)
6026 .n(8)
6027 .k(k)
6028 .a_stride(7)
6029 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
6030 }
6031 }
6032
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,k_gt_2_subtile)6033 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_gt_2_subtile) {
6034 TEST_REQUIRES_ARM_NEON_FMA;
6035 for (size_t k = 3; k < 4; k++) {
6036 for (uint32_t n = 1; n <= 8; n++) {
6037 for (uint32_t m = 1; m <= 1; m++) {
6038 GemmMicrokernelTester()
6039 .mr(1)
6040 .nr(8)
6041 .kr(1)
6042 .sr(1)
6043 .m(m)
6044 .n(n)
6045 .k(k)
6046 .iterations(1)
6047 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
6048 }
6049 }
6050 }
6051 }
6052
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,k_div_2)6053 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_div_2) {
6054 TEST_REQUIRES_ARM_NEON_FMA;
6055 for (size_t k = 4; k <= 20; k += 2) {
6056 GemmMicrokernelTester()
6057 .mr(1)
6058 .nr(8)
6059 .kr(1)
6060 .sr(1)
6061 .m(1)
6062 .n(8)
6063 .k(k)
6064 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
6065 }
6066 }
6067
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,k_div_2_strided_a)6068 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_div_2_strided_a) {
6069 TEST_REQUIRES_ARM_NEON_FMA;
6070 for (size_t k = 4; k <= 20; k += 2) {
6071 GemmMicrokernelTester()
6072 .mr(1)
6073 .nr(8)
6074 .kr(1)
6075 .sr(1)
6076 .m(1)
6077 .n(8)
6078 .k(k)
6079 .a_stride(23)
6080 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
6081 }
6082 }
6083
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,k_div_2_subtile)6084 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_div_2_subtile) {
6085 TEST_REQUIRES_ARM_NEON_FMA;
6086 for (size_t k = 4; k <= 20; k += 2) {
6087 for (uint32_t n = 1; n <= 8; n++) {
6088 for (uint32_t m = 1; m <= 1; m++) {
6089 GemmMicrokernelTester()
6090 .mr(1)
6091 .nr(8)
6092 .kr(1)
6093 .sr(1)
6094 .m(m)
6095 .n(n)
6096 .k(k)
6097 .iterations(1)
6098 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
6099 }
6100 }
6101 }
6102 }
6103
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,n_gt_8)6104 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, n_gt_8) {
6105 TEST_REQUIRES_ARM_NEON_FMA;
6106 for (uint32_t n = 9; n < 16; n++) {
6107 for (size_t k = 1; k <= 10; k += 3) {
6108 GemmMicrokernelTester()
6109 .mr(1)
6110 .nr(8)
6111 .kr(1)
6112 .sr(1)
6113 .m(1)
6114 .n(n)
6115 .k(k)
6116 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
6117 }
6118 }
6119 }
6120
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,n_gt_8_strided_cn)6121 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, n_gt_8_strided_cn) {
6122 TEST_REQUIRES_ARM_NEON_FMA;
6123 for (uint32_t n = 9; n < 16; n++) {
6124 for (size_t k = 1; k <= 10; k += 3) {
6125 GemmMicrokernelTester()
6126 .mr(1)
6127 .nr(8)
6128 .kr(1)
6129 .sr(1)
6130 .m(1)
6131 .n(n)
6132 .k(k)
6133 .cn_stride(11)
6134 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
6135 }
6136 }
6137 }
6138
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,n_gt_8_strided_a)6139 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, n_gt_8_strided_a) {
6140 TEST_REQUIRES_ARM_NEON_FMA;
6141 for (uint32_t n = 9; n < 16; n++) {
6142 for (size_t k = 1; k <= 10; k += 3) {
6143 GemmMicrokernelTester()
6144 .mr(1)
6145 .nr(8)
6146 .kr(1)
6147 .sr(1)
6148 .m(1)
6149 .n(n)
6150 .k(k)
6151 .a_stride(13)
6152 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
6153 }
6154 }
6155 }
6156
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,n_gt_8_subtile)6157 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, n_gt_8_subtile) {
6158 TEST_REQUIRES_ARM_NEON_FMA;
6159 for (uint32_t n = 9; n < 16; n++) {
6160 for (size_t k = 1; k <= 10; k += 3) {
6161 for (uint32_t m = 1; m <= 1; m++) {
6162 GemmMicrokernelTester()
6163 .mr(1)
6164 .nr(8)
6165 .kr(1)
6166 .sr(1)
6167 .m(m)
6168 .n(n)
6169 .k(k)
6170 .iterations(1)
6171 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
6172 }
6173 }
6174 }
6175 }
6176
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,n_div_8)6177 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, n_div_8) {
6178 TEST_REQUIRES_ARM_NEON_FMA;
6179 for (uint32_t n = 16; n <= 24; n += 8) {
6180 for (size_t k = 1; k <= 10; k += 3) {
6181 GemmMicrokernelTester()
6182 .mr(1)
6183 .nr(8)
6184 .kr(1)
6185 .sr(1)
6186 .m(1)
6187 .n(n)
6188 .k(k)
6189 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
6190 }
6191 }
6192 }
6193
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,n_div_8_strided_cn)6194 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, n_div_8_strided_cn) {
6195 TEST_REQUIRES_ARM_NEON_FMA;
6196 for (uint32_t n = 16; n <= 24; n += 8) {
6197 for (size_t k = 1; k <= 10; k += 3) {
6198 GemmMicrokernelTester()
6199 .mr(1)
6200 .nr(8)
6201 .kr(1)
6202 .sr(1)
6203 .m(1)
6204 .n(n)
6205 .k(k)
6206 .cn_stride(11)
6207 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
6208 }
6209 }
6210 }
6211
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,n_div_8_strided_a)6212 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, n_div_8_strided_a) {
6213 TEST_REQUIRES_ARM_NEON_FMA;
6214 for (uint32_t n = 16; n <= 24; n += 8) {
6215 for (size_t k = 1; k <= 10; k += 3) {
6216 GemmMicrokernelTester()
6217 .mr(1)
6218 .nr(8)
6219 .kr(1)
6220 .sr(1)
6221 .m(1)
6222 .n(n)
6223 .k(k)
6224 .a_stride(13)
6225 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
6226 }
6227 }
6228 }
6229
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,n_div_8_subtile)6230 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, n_div_8_subtile) {
6231 TEST_REQUIRES_ARM_NEON_FMA;
6232 for (uint32_t n = 16; n <= 24; n += 8) {
6233 for (size_t k = 1; k <= 10; k += 3) {
6234 for (uint32_t m = 1; m <= 1; m++) {
6235 GemmMicrokernelTester()
6236 .mr(1)
6237 .nr(8)
6238 .kr(1)
6239 .sr(1)
6240 .m(m)
6241 .n(n)
6242 .k(k)
6243 .iterations(1)
6244 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
6245 }
6246 }
6247 }
6248 }
6249
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,strided_cm_subtile)6250 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, strided_cm_subtile) {
6251 TEST_REQUIRES_ARM_NEON_FMA;
6252 for (size_t k = 1; k <= 10; k += 3) {
6253 for (uint32_t n = 1; n <= 8; n++) {
6254 for (uint32_t m = 1; m <= 1; m++) {
6255 GemmMicrokernelTester()
6256 .mr(1)
6257 .nr(8)
6258 .kr(1)
6259 .sr(1)
6260 .m(m)
6261 .n(n)
6262 .k(k)
6263 .cm_stride(11)
6264 .iterations(1)
6265 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
6266 }
6267 }
6268 }
6269 }
6270
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,qmin)6271 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, qmin) {
6272 TEST_REQUIRES_ARM_NEON_FMA;
6273 GemmMicrokernelTester()
6274 .mr(1)
6275 .nr(8)
6276 .kr(1)
6277 .sr(1)
6278 .m(1)
6279 .n(8)
6280 .k(2)
6281 .qmin(128)
6282 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
6283 }
6284
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,qmax)6285 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, qmax) {
6286 TEST_REQUIRES_ARM_NEON_FMA;
6287 GemmMicrokernelTester()
6288 .mr(1)
6289 .nr(8)
6290 .kr(1)
6291 .sr(1)
6292 .m(1)
6293 .n(8)
6294 .k(2)
6295 .qmax(128)
6296 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
6297 }
6298
TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64,strided_cm)6299 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, strided_cm) {
6300 TEST_REQUIRES_ARM_NEON_FMA;
6301 GemmMicrokernelTester()
6302 .mr(1)
6303 .nr(8)
6304 .kr(1)
6305 .sr(1)
6306 .m(1)
6307 .n(8)
6308 .k(2)
6309 .cm_stride(11)
6310 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
6311 }
6312 #endif // XNN_ARCH_ARM64
6313
6314
6315 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,k_eq_4)6316 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_eq_4) {
6317 TEST_REQUIRES_ARM_NEON;
6318 GemmMicrokernelTester()
6319 .mr(1)
6320 .nr(8)
6321 .kr(1)
6322 .sr(4)
6323 .m(1)
6324 .n(8)
6325 .k(4)
6326 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6327 }
6328
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,strided_cn)6329 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, strided_cn) {
6330 TEST_REQUIRES_ARM_NEON;
6331 GemmMicrokernelTester()
6332 .mr(1)
6333 .nr(8)
6334 .kr(1)
6335 .sr(4)
6336 .m(1)
6337 .n(8)
6338 .k(4)
6339 .cn_stride(11)
6340 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6341 }
6342
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,k_eq_4_strided_a)6343 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_eq_4_strided_a) {
6344 TEST_REQUIRES_ARM_NEON;
6345 GemmMicrokernelTester()
6346 .mr(1)
6347 .nr(8)
6348 .kr(1)
6349 .sr(4)
6350 .m(1)
6351 .n(8)
6352 .k(4)
6353 .a_stride(7)
6354 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6355 }
6356
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,k_eq_4_subtile)6357 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_eq_4_subtile) {
6358 TEST_REQUIRES_ARM_NEON;
6359 for (uint32_t n = 1; n <= 8; n++) {
6360 for (uint32_t m = 1; m <= 1; m++) {
6361 GemmMicrokernelTester()
6362 .mr(1)
6363 .nr(8)
6364 .kr(1)
6365 .sr(4)
6366 .m(m)
6367 .n(n)
6368 .k(4)
6369 .iterations(1)
6370 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6371 }
6372 }
6373 }
6374
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,k_eq_4_subtile_m)6375 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_eq_4_subtile_m) {
6376 TEST_REQUIRES_ARM_NEON;
6377 for (uint32_t m = 1; m <= 1; m++) {
6378 GemmMicrokernelTester()
6379 .mr(1)
6380 .nr(8)
6381 .kr(1)
6382 .sr(4)
6383 .m(m)
6384 .n(8)
6385 .k(4)
6386 .iterations(1)
6387 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6388 }
6389 }
6390
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,k_eq_4_subtile_n)6391 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_eq_4_subtile_n) {
6392 TEST_REQUIRES_ARM_NEON;
6393 for (uint32_t n = 1; n <= 8; n++) {
6394 GemmMicrokernelTester()
6395 .mr(1)
6396 .nr(8)
6397 .kr(1)
6398 .sr(4)
6399 .m(1)
6400 .n(n)
6401 .k(4)
6402 .iterations(1)
6403 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6404 }
6405 }
6406
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,k_lt_4)6407 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_lt_4) {
6408 TEST_REQUIRES_ARM_NEON;
6409 for (size_t k = 1; k < 4; k++) {
6410 GemmMicrokernelTester()
6411 .mr(1)
6412 .nr(8)
6413 .kr(1)
6414 .sr(4)
6415 .m(1)
6416 .n(8)
6417 .k(k)
6418 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6419 }
6420 }
6421
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,k_lt_4_strided_a)6422 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_lt_4_strided_a) {
6423 TEST_REQUIRES_ARM_NEON;
6424 for (size_t k = 1; k < 4; k++) {
6425 GemmMicrokernelTester()
6426 .mr(1)
6427 .nr(8)
6428 .kr(1)
6429 .sr(4)
6430 .m(1)
6431 .n(8)
6432 .k(k)
6433 .a_stride(7)
6434 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6435 }
6436 }
6437
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,k_lt_4_subtile)6438 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_lt_4_subtile) {
6439 TEST_REQUIRES_ARM_NEON;
6440 for (size_t k = 1; k < 4; k++) {
6441 for (uint32_t n = 1; n <= 8; n++) {
6442 for (uint32_t m = 1; m <= 1; m++) {
6443 GemmMicrokernelTester()
6444 .mr(1)
6445 .nr(8)
6446 .kr(1)
6447 .sr(4)
6448 .m(m)
6449 .n(n)
6450 .k(k)
6451 .iterations(1)
6452 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6453 }
6454 }
6455 }
6456 }
6457
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,k_gt_4)6458 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_gt_4) {
6459 TEST_REQUIRES_ARM_NEON;
6460 for (size_t k = 5; k < 8; k++) {
6461 GemmMicrokernelTester()
6462 .mr(1)
6463 .nr(8)
6464 .kr(1)
6465 .sr(4)
6466 .m(1)
6467 .n(8)
6468 .k(k)
6469 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6470 }
6471 }
6472
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,k_gt_4_strided_a)6473 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_gt_4_strided_a) {
6474 TEST_REQUIRES_ARM_NEON;
6475 for (size_t k = 5; k < 8; k++) {
6476 GemmMicrokernelTester()
6477 .mr(1)
6478 .nr(8)
6479 .kr(1)
6480 .sr(4)
6481 .m(1)
6482 .n(8)
6483 .k(k)
6484 .a_stride(11)
6485 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6486 }
6487 }
6488
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,k_gt_4_subtile)6489 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_gt_4_subtile) {
6490 TEST_REQUIRES_ARM_NEON;
6491 for (size_t k = 5; k < 8; k++) {
6492 for (uint32_t n = 1; n <= 8; n++) {
6493 for (uint32_t m = 1; m <= 1; m++) {
6494 GemmMicrokernelTester()
6495 .mr(1)
6496 .nr(8)
6497 .kr(1)
6498 .sr(4)
6499 .m(m)
6500 .n(n)
6501 .k(k)
6502 .iterations(1)
6503 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6504 }
6505 }
6506 }
6507 }
6508
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,k_div_4)6509 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_div_4) {
6510 TEST_REQUIRES_ARM_NEON;
6511 for (size_t k = 8; k <= 40; k += 4) {
6512 GemmMicrokernelTester()
6513 .mr(1)
6514 .nr(8)
6515 .kr(1)
6516 .sr(4)
6517 .m(1)
6518 .n(8)
6519 .k(k)
6520 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6521 }
6522 }
6523
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,k_div_4_strided_a)6524 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_div_4_strided_a) {
6525 TEST_REQUIRES_ARM_NEON;
6526 for (size_t k = 8; k <= 40; k += 4) {
6527 GemmMicrokernelTester()
6528 .mr(1)
6529 .nr(8)
6530 .kr(1)
6531 .sr(4)
6532 .m(1)
6533 .n(8)
6534 .k(k)
6535 .a_stride(43)
6536 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6537 }
6538 }
6539
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,k_div_4_subtile)6540 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_div_4_subtile) {
6541 TEST_REQUIRES_ARM_NEON;
6542 for (size_t k = 8; k <= 40; k += 4) {
6543 for (uint32_t n = 1; n <= 8; n++) {
6544 for (uint32_t m = 1; m <= 1; m++) {
6545 GemmMicrokernelTester()
6546 .mr(1)
6547 .nr(8)
6548 .kr(1)
6549 .sr(4)
6550 .m(m)
6551 .n(n)
6552 .k(k)
6553 .iterations(1)
6554 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6555 }
6556 }
6557 }
6558 }
6559
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,n_gt_8)6560 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, n_gt_8) {
6561 TEST_REQUIRES_ARM_NEON;
6562 for (uint32_t n = 9; n < 16; n++) {
6563 for (size_t k = 1; k <= 20; k += 5) {
6564 GemmMicrokernelTester()
6565 .mr(1)
6566 .nr(8)
6567 .kr(1)
6568 .sr(4)
6569 .m(1)
6570 .n(n)
6571 .k(k)
6572 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6573 }
6574 }
6575 }
6576
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,n_gt_8_strided_cn)6577 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, n_gt_8_strided_cn) {
6578 TEST_REQUIRES_ARM_NEON;
6579 for (uint32_t n = 9; n < 16; n++) {
6580 for (size_t k = 1; k <= 20; k += 5) {
6581 GemmMicrokernelTester()
6582 .mr(1)
6583 .nr(8)
6584 .kr(1)
6585 .sr(4)
6586 .m(1)
6587 .n(n)
6588 .k(k)
6589 .cn_stride(11)
6590 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6591 }
6592 }
6593 }
6594
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,n_gt_8_strided_a)6595 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, n_gt_8_strided_a) {
6596 TEST_REQUIRES_ARM_NEON;
6597 for (uint32_t n = 9; n < 16; n++) {
6598 for (size_t k = 1; k <= 20; k += 5) {
6599 GemmMicrokernelTester()
6600 .mr(1)
6601 .nr(8)
6602 .kr(1)
6603 .sr(4)
6604 .m(1)
6605 .n(n)
6606 .k(k)
6607 .a_stride(23)
6608 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6609 }
6610 }
6611 }
6612
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,n_gt_8_subtile)6613 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, n_gt_8_subtile) {
6614 TEST_REQUIRES_ARM_NEON;
6615 for (uint32_t n = 9; n < 16; n++) {
6616 for (size_t k = 1; k <= 20; k += 5) {
6617 for (uint32_t m = 1; m <= 1; m++) {
6618 GemmMicrokernelTester()
6619 .mr(1)
6620 .nr(8)
6621 .kr(1)
6622 .sr(4)
6623 .m(m)
6624 .n(n)
6625 .k(k)
6626 .iterations(1)
6627 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6628 }
6629 }
6630 }
6631 }
6632
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,n_div_8)6633 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, n_div_8) {
6634 TEST_REQUIRES_ARM_NEON;
6635 for (uint32_t n = 16; n <= 24; n += 8) {
6636 for (size_t k = 1; k <= 20; k += 5) {
6637 GemmMicrokernelTester()
6638 .mr(1)
6639 .nr(8)
6640 .kr(1)
6641 .sr(4)
6642 .m(1)
6643 .n(n)
6644 .k(k)
6645 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6646 }
6647 }
6648 }
6649
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,n_div_8_strided_cn)6650 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, n_div_8_strided_cn) {
6651 TEST_REQUIRES_ARM_NEON;
6652 for (uint32_t n = 16; n <= 24; n += 8) {
6653 for (size_t k = 1; k <= 20; k += 5) {
6654 GemmMicrokernelTester()
6655 .mr(1)
6656 .nr(8)
6657 .kr(1)
6658 .sr(4)
6659 .m(1)
6660 .n(n)
6661 .k(k)
6662 .cn_stride(11)
6663 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6664 }
6665 }
6666 }
6667
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,n_div_8_strided_a)6668 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, n_div_8_strided_a) {
6669 TEST_REQUIRES_ARM_NEON;
6670 for (uint32_t n = 16; n <= 24; n += 8) {
6671 for (size_t k = 1; k <= 20; k += 5) {
6672 GemmMicrokernelTester()
6673 .mr(1)
6674 .nr(8)
6675 .kr(1)
6676 .sr(4)
6677 .m(1)
6678 .n(n)
6679 .k(k)
6680 .a_stride(23)
6681 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6682 }
6683 }
6684 }
6685
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,n_div_8_subtile)6686 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, n_div_8_subtile) {
6687 TEST_REQUIRES_ARM_NEON;
6688 for (uint32_t n = 16; n <= 24; n += 8) {
6689 for (size_t k = 1; k <= 20; k += 5) {
6690 for (uint32_t m = 1; m <= 1; m++) {
6691 GemmMicrokernelTester()
6692 .mr(1)
6693 .nr(8)
6694 .kr(1)
6695 .sr(4)
6696 .m(m)
6697 .n(n)
6698 .k(k)
6699 .iterations(1)
6700 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6701 }
6702 }
6703 }
6704 }
6705
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,strided_cm_subtile)6706 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, strided_cm_subtile) {
6707 TEST_REQUIRES_ARM_NEON;
6708 for (size_t k = 1; k <= 20; k += 5) {
6709 for (uint32_t n = 1; n <= 8; n++) {
6710 for (uint32_t m = 1; m <= 1; m++) {
6711 GemmMicrokernelTester()
6712 .mr(1)
6713 .nr(8)
6714 .kr(1)
6715 .sr(4)
6716 .m(m)
6717 .n(n)
6718 .k(k)
6719 .cm_stride(11)
6720 .iterations(1)
6721 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6722 }
6723 }
6724 }
6725 }
6726
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,qmin)6727 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, qmin) {
6728 TEST_REQUIRES_ARM_NEON;
6729 GemmMicrokernelTester()
6730 .mr(1)
6731 .nr(8)
6732 .kr(1)
6733 .sr(4)
6734 .m(1)
6735 .n(8)
6736 .k(4)
6737 .qmin(128)
6738 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6739 }
6740
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,qmax)6741 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, qmax) {
6742 TEST_REQUIRES_ARM_NEON;
6743 GemmMicrokernelTester()
6744 .mr(1)
6745 .nr(8)
6746 .kr(1)
6747 .sr(4)
6748 .m(1)
6749 .n(8)
6750 .k(4)
6751 .qmax(128)
6752 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6753 }
6754
TEST(F32_GEMMINC_MINMAX_1X8S4__NEON,strided_cm)6755 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, strided_cm) {
6756 TEST_REQUIRES_ARM_NEON;
6757 GemmMicrokernelTester()
6758 .mr(1)
6759 .nr(8)
6760 .kr(1)
6761 .sr(4)
6762 .m(1)
6763 .n(8)
6764 .k(4)
6765 .cm_stride(11)
6766 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
6767 }
6768 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6769
6770
6771 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,k_eq_2)6772 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_eq_2) {
6773 TEST_REQUIRES_ARM_NEON_FMA;
6774 GemmMicrokernelTester()
6775 .mr(4)
6776 .nr(8)
6777 .kr(1)
6778 .sr(1)
6779 .m(4)
6780 .n(8)
6781 .k(2)
6782 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
6783 }
6784
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,strided_cn)6785 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, strided_cn) {
6786 TEST_REQUIRES_ARM_NEON_FMA;
6787 GemmMicrokernelTester()
6788 .mr(4)
6789 .nr(8)
6790 .kr(1)
6791 .sr(1)
6792 .m(4)
6793 .n(8)
6794 .k(2)
6795 .cn_stride(11)
6796 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
6797 }
6798
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,k_eq_2_strided_a)6799 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_eq_2_strided_a) {
6800 TEST_REQUIRES_ARM_NEON_FMA;
6801 GemmMicrokernelTester()
6802 .mr(4)
6803 .nr(8)
6804 .kr(1)
6805 .sr(1)
6806 .m(4)
6807 .n(8)
6808 .k(2)
6809 .a_stride(5)
6810 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
6811 }
6812
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,k_eq_2_subtile)6813 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_eq_2_subtile) {
6814 TEST_REQUIRES_ARM_NEON_FMA;
6815 for (uint32_t n = 1; n <= 8; n++) {
6816 for (uint32_t m = 1; m <= 4; m++) {
6817 GemmMicrokernelTester()
6818 .mr(4)
6819 .nr(8)
6820 .kr(1)
6821 .sr(1)
6822 .m(m)
6823 .n(n)
6824 .k(2)
6825 .iterations(1)
6826 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
6827 }
6828 }
6829 }
6830
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,k_eq_2_subtile_m)6831 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_eq_2_subtile_m) {
6832 TEST_REQUIRES_ARM_NEON_FMA;
6833 for (uint32_t m = 1; m <= 4; m++) {
6834 GemmMicrokernelTester()
6835 .mr(4)
6836 .nr(8)
6837 .kr(1)
6838 .sr(1)
6839 .m(m)
6840 .n(8)
6841 .k(2)
6842 .iterations(1)
6843 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
6844 }
6845 }
6846
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,k_eq_2_subtile_n)6847 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_eq_2_subtile_n) {
6848 TEST_REQUIRES_ARM_NEON_FMA;
6849 for (uint32_t n = 1; n <= 8; n++) {
6850 GemmMicrokernelTester()
6851 .mr(4)
6852 .nr(8)
6853 .kr(1)
6854 .sr(1)
6855 .m(4)
6856 .n(n)
6857 .k(2)
6858 .iterations(1)
6859 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
6860 }
6861 }
6862
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,k_lt_2)6863 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_lt_2) {
6864 TEST_REQUIRES_ARM_NEON_FMA;
6865 for (size_t k = 1; k < 2; k++) {
6866 GemmMicrokernelTester()
6867 .mr(4)
6868 .nr(8)
6869 .kr(1)
6870 .sr(1)
6871 .m(4)
6872 .n(8)
6873 .k(k)
6874 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
6875 }
6876 }
6877
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,k_lt_2_strided_a)6878 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_lt_2_strided_a) {
6879 TEST_REQUIRES_ARM_NEON_FMA;
6880 for (size_t k = 1; k < 2; k++) {
6881 GemmMicrokernelTester()
6882 .mr(4)
6883 .nr(8)
6884 .kr(1)
6885 .sr(1)
6886 .m(4)
6887 .n(8)
6888 .k(k)
6889 .a_stride(5)
6890 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
6891 }
6892 }
6893
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,k_lt_2_subtile)6894 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_lt_2_subtile) {
6895 TEST_REQUIRES_ARM_NEON_FMA;
6896 for (size_t k = 1; k < 2; k++) {
6897 for (uint32_t n = 1; n <= 8; n++) {
6898 for (uint32_t m = 1; m <= 4; m++) {
6899 GemmMicrokernelTester()
6900 .mr(4)
6901 .nr(8)
6902 .kr(1)
6903 .sr(1)
6904 .m(m)
6905 .n(n)
6906 .k(k)
6907 .iterations(1)
6908 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
6909 }
6910 }
6911 }
6912 }
6913
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,k_gt_2)6914 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_gt_2) {
6915 TEST_REQUIRES_ARM_NEON_FMA;
6916 for (size_t k = 3; k < 4; k++) {
6917 GemmMicrokernelTester()
6918 .mr(4)
6919 .nr(8)
6920 .kr(1)
6921 .sr(1)
6922 .m(4)
6923 .n(8)
6924 .k(k)
6925 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
6926 }
6927 }
6928
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,k_gt_2_strided_a)6929 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_gt_2_strided_a) {
6930 TEST_REQUIRES_ARM_NEON_FMA;
6931 for (size_t k = 3; k < 4; k++) {
6932 GemmMicrokernelTester()
6933 .mr(4)
6934 .nr(8)
6935 .kr(1)
6936 .sr(1)
6937 .m(4)
6938 .n(8)
6939 .k(k)
6940 .a_stride(7)
6941 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
6942 }
6943 }
6944
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,k_gt_2_subtile)6945 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_gt_2_subtile) {
6946 TEST_REQUIRES_ARM_NEON_FMA;
6947 for (size_t k = 3; k < 4; k++) {
6948 for (uint32_t n = 1; n <= 8; n++) {
6949 for (uint32_t m = 1; m <= 4; m++) {
6950 GemmMicrokernelTester()
6951 .mr(4)
6952 .nr(8)
6953 .kr(1)
6954 .sr(1)
6955 .m(m)
6956 .n(n)
6957 .k(k)
6958 .iterations(1)
6959 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
6960 }
6961 }
6962 }
6963 }
6964
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,k_div_2)6965 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_div_2) {
6966 TEST_REQUIRES_ARM_NEON_FMA;
6967 for (size_t k = 4; k <= 20; k += 2) {
6968 GemmMicrokernelTester()
6969 .mr(4)
6970 .nr(8)
6971 .kr(1)
6972 .sr(1)
6973 .m(4)
6974 .n(8)
6975 .k(k)
6976 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
6977 }
6978 }
6979
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,k_div_2_strided_a)6980 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_div_2_strided_a) {
6981 TEST_REQUIRES_ARM_NEON_FMA;
6982 for (size_t k = 4; k <= 20; k += 2) {
6983 GemmMicrokernelTester()
6984 .mr(4)
6985 .nr(8)
6986 .kr(1)
6987 .sr(1)
6988 .m(4)
6989 .n(8)
6990 .k(k)
6991 .a_stride(23)
6992 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
6993 }
6994 }
6995
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,k_div_2_subtile)6996 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_div_2_subtile) {
6997 TEST_REQUIRES_ARM_NEON_FMA;
6998 for (size_t k = 4; k <= 20; k += 2) {
6999 for (uint32_t n = 1; n <= 8; n++) {
7000 for (uint32_t m = 1; m <= 4; m++) {
7001 GemmMicrokernelTester()
7002 .mr(4)
7003 .nr(8)
7004 .kr(1)
7005 .sr(1)
7006 .m(m)
7007 .n(n)
7008 .k(k)
7009 .iterations(1)
7010 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
7011 }
7012 }
7013 }
7014 }
7015
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,n_gt_8)7016 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, n_gt_8) {
7017 TEST_REQUIRES_ARM_NEON_FMA;
7018 for (uint32_t n = 9; n < 16; n++) {
7019 for (size_t k = 1; k <= 10; k += 3) {
7020 GemmMicrokernelTester()
7021 .mr(4)
7022 .nr(8)
7023 .kr(1)
7024 .sr(1)
7025 .m(4)
7026 .n(n)
7027 .k(k)
7028 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
7029 }
7030 }
7031 }
7032
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,n_gt_8_strided_cn)7033 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, n_gt_8_strided_cn) {
7034 TEST_REQUIRES_ARM_NEON_FMA;
7035 for (uint32_t n = 9; n < 16; n++) {
7036 for (size_t k = 1; k <= 10; k += 3) {
7037 GemmMicrokernelTester()
7038 .mr(4)
7039 .nr(8)
7040 .kr(1)
7041 .sr(1)
7042 .m(4)
7043 .n(n)
7044 .k(k)
7045 .cn_stride(11)
7046 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
7047 }
7048 }
7049 }
7050
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,n_gt_8_strided_a)7051 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, n_gt_8_strided_a) {
7052 TEST_REQUIRES_ARM_NEON_FMA;
7053 for (uint32_t n = 9; n < 16; n++) {
7054 for (size_t k = 1; k <= 10; k += 3) {
7055 GemmMicrokernelTester()
7056 .mr(4)
7057 .nr(8)
7058 .kr(1)
7059 .sr(1)
7060 .m(4)
7061 .n(n)
7062 .k(k)
7063 .a_stride(13)
7064 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
7065 }
7066 }
7067 }
7068
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,n_gt_8_subtile)7069 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, n_gt_8_subtile) {
7070 TEST_REQUIRES_ARM_NEON_FMA;
7071 for (uint32_t n = 9; n < 16; n++) {
7072 for (size_t k = 1; k <= 10; k += 3) {
7073 for (uint32_t m = 1; m <= 4; m++) {
7074 GemmMicrokernelTester()
7075 .mr(4)
7076 .nr(8)
7077 .kr(1)
7078 .sr(1)
7079 .m(m)
7080 .n(n)
7081 .k(k)
7082 .iterations(1)
7083 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
7084 }
7085 }
7086 }
7087 }
7088
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,n_div_8)7089 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, n_div_8) {
7090 TEST_REQUIRES_ARM_NEON_FMA;
7091 for (uint32_t n = 16; n <= 24; n += 8) {
7092 for (size_t k = 1; k <= 10; k += 3) {
7093 GemmMicrokernelTester()
7094 .mr(4)
7095 .nr(8)
7096 .kr(1)
7097 .sr(1)
7098 .m(4)
7099 .n(n)
7100 .k(k)
7101 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
7102 }
7103 }
7104 }
7105
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,n_div_8_strided_cn)7106 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, n_div_8_strided_cn) {
7107 TEST_REQUIRES_ARM_NEON_FMA;
7108 for (uint32_t n = 16; n <= 24; n += 8) {
7109 for (size_t k = 1; k <= 10; k += 3) {
7110 GemmMicrokernelTester()
7111 .mr(4)
7112 .nr(8)
7113 .kr(1)
7114 .sr(1)
7115 .m(4)
7116 .n(n)
7117 .k(k)
7118 .cn_stride(11)
7119 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
7120 }
7121 }
7122 }
7123
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,n_div_8_strided_a)7124 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, n_div_8_strided_a) {
7125 TEST_REQUIRES_ARM_NEON_FMA;
7126 for (uint32_t n = 16; n <= 24; n += 8) {
7127 for (size_t k = 1; k <= 10; k += 3) {
7128 GemmMicrokernelTester()
7129 .mr(4)
7130 .nr(8)
7131 .kr(1)
7132 .sr(1)
7133 .m(4)
7134 .n(n)
7135 .k(k)
7136 .a_stride(13)
7137 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
7138 }
7139 }
7140 }
7141
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,n_div_8_subtile)7142 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, n_div_8_subtile) {
7143 TEST_REQUIRES_ARM_NEON_FMA;
7144 for (uint32_t n = 16; n <= 24; n += 8) {
7145 for (size_t k = 1; k <= 10; k += 3) {
7146 for (uint32_t m = 1; m <= 4; m++) {
7147 GemmMicrokernelTester()
7148 .mr(4)
7149 .nr(8)
7150 .kr(1)
7151 .sr(1)
7152 .m(m)
7153 .n(n)
7154 .k(k)
7155 .iterations(1)
7156 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
7157 }
7158 }
7159 }
7160 }
7161
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,strided_cm_subtile)7162 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, strided_cm_subtile) {
7163 TEST_REQUIRES_ARM_NEON_FMA;
7164 for (size_t k = 1; k <= 10; k += 3) {
7165 for (uint32_t n = 1; n <= 8; n++) {
7166 for (uint32_t m = 1; m <= 4; m++) {
7167 GemmMicrokernelTester()
7168 .mr(4)
7169 .nr(8)
7170 .kr(1)
7171 .sr(1)
7172 .m(m)
7173 .n(n)
7174 .k(k)
7175 .cm_stride(11)
7176 .iterations(1)
7177 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
7178 }
7179 }
7180 }
7181 }
7182
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,qmin)7183 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, qmin) {
7184 TEST_REQUIRES_ARM_NEON_FMA;
7185 GemmMicrokernelTester()
7186 .mr(4)
7187 .nr(8)
7188 .kr(1)
7189 .sr(1)
7190 .m(4)
7191 .n(8)
7192 .k(2)
7193 .qmin(128)
7194 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
7195 }
7196
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,qmax)7197 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, qmax) {
7198 TEST_REQUIRES_ARM_NEON_FMA;
7199 GemmMicrokernelTester()
7200 .mr(4)
7201 .nr(8)
7202 .kr(1)
7203 .sr(1)
7204 .m(4)
7205 .n(8)
7206 .k(2)
7207 .qmax(128)
7208 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
7209 }
7210
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64,strided_cm)7211 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, strided_cm) {
7212 TEST_REQUIRES_ARM_NEON_FMA;
7213 GemmMicrokernelTester()
7214 .mr(4)
7215 .nr(8)
7216 .kr(1)
7217 .sr(1)
7218 .m(4)
7219 .n(8)
7220 .k(2)
7221 .cm_stride(11)
7222 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
7223 }
7224 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7225
7226
7227 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,k_eq_4)7228 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_eq_4) {
7229 TEST_REQUIRES_ARM_NEON_FMA;
7230 GemmMicrokernelTester()
7231 .mr(4)
7232 .nr(8)
7233 .kr(1)
7234 .sr(1)
7235 .m(4)
7236 .n(8)
7237 .k(4)
7238 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7239 }
7240
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,strided_cn)7241 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, strided_cn) {
7242 TEST_REQUIRES_ARM_NEON_FMA;
7243 GemmMicrokernelTester()
7244 .mr(4)
7245 .nr(8)
7246 .kr(1)
7247 .sr(1)
7248 .m(4)
7249 .n(8)
7250 .k(4)
7251 .cn_stride(11)
7252 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7253 }
7254
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,k_eq_4_strided_a)7255 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_eq_4_strided_a) {
7256 TEST_REQUIRES_ARM_NEON_FMA;
7257 GemmMicrokernelTester()
7258 .mr(4)
7259 .nr(8)
7260 .kr(1)
7261 .sr(1)
7262 .m(4)
7263 .n(8)
7264 .k(4)
7265 .a_stride(7)
7266 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7267 }
7268
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,k_eq_4_subtile)7269 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_eq_4_subtile) {
7270 TEST_REQUIRES_ARM_NEON_FMA;
7271 for (uint32_t n = 1; n <= 8; n++) {
7272 for (uint32_t m = 1; m <= 4; m++) {
7273 GemmMicrokernelTester()
7274 .mr(4)
7275 .nr(8)
7276 .kr(1)
7277 .sr(1)
7278 .m(m)
7279 .n(n)
7280 .k(4)
7281 .iterations(1)
7282 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7283 }
7284 }
7285 }
7286
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,k_eq_4_subtile_m)7287 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_eq_4_subtile_m) {
7288 TEST_REQUIRES_ARM_NEON_FMA;
7289 for (uint32_t m = 1; m <= 4; m++) {
7290 GemmMicrokernelTester()
7291 .mr(4)
7292 .nr(8)
7293 .kr(1)
7294 .sr(1)
7295 .m(m)
7296 .n(8)
7297 .k(4)
7298 .iterations(1)
7299 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7300 }
7301 }
7302
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,k_eq_4_subtile_n)7303 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_eq_4_subtile_n) {
7304 TEST_REQUIRES_ARM_NEON_FMA;
7305 for (uint32_t n = 1; n <= 8; n++) {
7306 GemmMicrokernelTester()
7307 .mr(4)
7308 .nr(8)
7309 .kr(1)
7310 .sr(1)
7311 .m(4)
7312 .n(n)
7313 .k(4)
7314 .iterations(1)
7315 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7316 }
7317 }
7318
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,k_lt_4)7319 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_lt_4) {
7320 TEST_REQUIRES_ARM_NEON_FMA;
7321 for (size_t k = 1; k < 4; k++) {
7322 GemmMicrokernelTester()
7323 .mr(4)
7324 .nr(8)
7325 .kr(1)
7326 .sr(1)
7327 .m(4)
7328 .n(8)
7329 .k(k)
7330 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7331 }
7332 }
7333
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,k_lt_4_strided_a)7334 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_lt_4_strided_a) {
7335 TEST_REQUIRES_ARM_NEON_FMA;
7336 for (size_t k = 1; k < 4; k++) {
7337 GemmMicrokernelTester()
7338 .mr(4)
7339 .nr(8)
7340 .kr(1)
7341 .sr(1)
7342 .m(4)
7343 .n(8)
7344 .k(k)
7345 .a_stride(7)
7346 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7347 }
7348 }
7349
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,k_lt_4_subtile)7350 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_lt_4_subtile) {
7351 TEST_REQUIRES_ARM_NEON_FMA;
7352 for (size_t k = 1; k < 4; k++) {
7353 for (uint32_t n = 1; n <= 8; n++) {
7354 for (uint32_t m = 1; m <= 4; m++) {
7355 GemmMicrokernelTester()
7356 .mr(4)
7357 .nr(8)
7358 .kr(1)
7359 .sr(1)
7360 .m(m)
7361 .n(n)
7362 .k(k)
7363 .iterations(1)
7364 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7365 }
7366 }
7367 }
7368 }
7369
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,k_gt_4)7370 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_gt_4) {
7371 TEST_REQUIRES_ARM_NEON_FMA;
7372 for (size_t k = 5; k < 8; k++) {
7373 GemmMicrokernelTester()
7374 .mr(4)
7375 .nr(8)
7376 .kr(1)
7377 .sr(1)
7378 .m(4)
7379 .n(8)
7380 .k(k)
7381 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7382 }
7383 }
7384
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,k_gt_4_strided_a)7385 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_gt_4_strided_a) {
7386 TEST_REQUIRES_ARM_NEON_FMA;
7387 for (size_t k = 5; k < 8; k++) {
7388 GemmMicrokernelTester()
7389 .mr(4)
7390 .nr(8)
7391 .kr(1)
7392 .sr(1)
7393 .m(4)
7394 .n(8)
7395 .k(k)
7396 .a_stride(11)
7397 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7398 }
7399 }
7400
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,k_gt_4_subtile)7401 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_gt_4_subtile) {
7402 TEST_REQUIRES_ARM_NEON_FMA;
7403 for (size_t k = 5; k < 8; k++) {
7404 for (uint32_t n = 1; n <= 8; n++) {
7405 for (uint32_t m = 1; m <= 4; m++) {
7406 GemmMicrokernelTester()
7407 .mr(4)
7408 .nr(8)
7409 .kr(1)
7410 .sr(1)
7411 .m(m)
7412 .n(n)
7413 .k(k)
7414 .iterations(1)
7415 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7416 }
7417 }
7418 }
7419 }
7420
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,k_div_4)7421 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_div_4) {
7422 TEST_REQUIRES_ARM_NEON_FMA;
7423 for (size_t k = 8; k <= 40; k += 4) {
7424 GemmMicrokernelTester()
7425 .mr(4)
7426 .nr(8)
7427 .kr(1)
7428 .sr(1)
7429 .m(4)
7430 .n(8)
7431 .k(k)
7432 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7433 }
7434 }
7435
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,k_div_4_strided_a)7436 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_div_4_strided_a) {
7437 TEST_REQUIRES_ARM_NEON_FMA;
7438 for (size_t k = 8; k <= 40; k += 4) {
7439 GemmMicrokernelTester()
7440 .mr(4)
7441 .nr(8)
7442 .kr(1)
7443 .sr(1)
7444 .m(4)
7445 .n(8)
7446 .k(k)
7447 .a_stride(43)
7448 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7449 }
7450 }
7451
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,k_div_4_subtile)7452 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_div_4_subtile) {
7453 TEST_REQUIRES_ARM_NEON_FMA;
7454 for (size_t k = 8; k <= 40; k += 4) {
7455 for (uint32_t n = 1; n <= 8; n++) {
7456 for (uint32_t m = 1; m <= 4; m++) {
7457 GemmMicrokernelTester()
7458 .mr(4)
7459 .nr(8)
7460 .kr(1)
7461 .sr(1)
7462 .m(m)
7463 .n(n)
7464 .k(k)
7465 .iterations(1)
7466 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7467 }
7468 }
7469 }
7470 }
7471
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,n_gt_8)7472 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, n_gt_8) {
7473 TEST_REQUIRES_ARM_NEON_FMA;
7474 for (uint32_t n = 9; n < 16; n++) {
7475 for (size_t k = 1; k <= 20; k += 5) {
7476 GemmMicrokernelTester()
7477 .mr(4)
7478 .nr(8)
7479 .kr(1)
7480 .sr(1)
7481 .m(4)
7482 .n(n)
7483 .k(k)
7484 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7485 }
7486 }
7487 }
7488
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,n_gt_8_strided_cn)7489 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, n_gt_8_strided_cn) {
7490 TEST_REQUIRES_ARM_NEON_FMA;
7491 for (uint32_t n = 9; n < 16; n++) {
7492 for (size_t k = 1; k <= 20; k += 5) {
7493 GemmMicrokernelTester()
7494 .mr(4)
7495 .nr(8)
7496 .kr(1)
7497 .sr(1)
7498 .m(4)
7499 .n(n)
7500 .k(k)
7501 .cn_stride(11)
7502 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7503 }
7504 }
7505 }
7506
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,n_gt_8_strided_a)7507 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, n_gt_8_strided_a) {
7508 TEST_REQUIRES_ARM_NEON_FMA;
7509 for (uint32_t n = 9; n < 16; n++) {
7510 for (size_t k = 1; k <= 20; k += 5) {
7511 GemmMicrokernelTester()
7512 .mr(4)
7513 .nr(8)
7514 .kr(1)
7515 .sr(1)
7516 .m(4)
7517 .n(n)
7518 .k(k)
7519 .a_stride(23)
7520 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7521 }
7522 }
7523 }
7524
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,n_gt_8_subtile)7525 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, n_gt_8_subtile) {
7526 TEST_REQUIRES_ARM_NEON_FMA;
7527 for (uint32_t n = 9; n < 16; n++) {
7528 for (size_t k = 1; k <= 20; k += 5) {
7529 for (uint32_t m = 1; m <= 4; m++) {
7530 GemmMicrokernelTester()
7531 .mr(4)
7532 .nr(8)
7533 .kr(1)
7534 .sr(1)
7535 .m(m)
7536 .n(n)
7537 .k(k)
7538 .iterations(1)
7539 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7540 }
7541 }
7542 }
7543 }
7544
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,n_div_8)7545 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, n_div_8) {
7546 TEST_REQUIRES_ARM_NEON_FMA;
7547 for (uint32_t n = 16; n <= 24; n += 8) {
7548 for (size_t k = 1; k <= 20; k += 5) {
7549 GemmMicrokernelTester()
7550 .mr(4)
7551 .nr(8)
7552 .kr(1)
7553 .sr(1)
7554 .m(4)
7555 .n(n)
7556 .k(k)
7557 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7558 }
7559 }
7560 }
7561
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,n_div_8_strided_cn)7562 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, n_div_8_strided_cn) {
7563 TEST_REQUIRES_ARM_NEON_FMA;
7564 for (uint32_t n = 16; n <= 24; n += 8) {
7565 for (size_t k = 1; k <= 20; k += 5) {
7566 GemmMicrokernelTester()
7567 .mr(4)
7568 .nr(8)
7569 .kr(1)
7570 .sr(1)
7571 .m(4)
7572 .n(n)
7573 .k(k)
7574 .cn_stride(11)
7575 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7576 }
7577 }
7578 }
7579
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,n_div_8_strided_a)7580 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, n_div_8_strided_a) {
7581 TEST_REQUIRES_ARM_NEON_FMA;
7582 for (uint32_t n = 16; n <= 24; n += 8) {
7583 for (size_t k = 1; k <= 20; k += 5) {
7584 GemmMicrokernelTester()
7585 .mr(4)
7586 .nr(8)
7587 .kr(1)
7588 .sr(1)
7589 .m(4)
7590 .n(n)
7591 .k(k)
7592 .a_stride(23)
7593 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7594 }
7595 }
7596 }
7597
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,n_div_8_subtile)7598 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, n_div_8_subtile) {
7599 TEST_REQUIRES_ARM_NEON_FMA;
7600 for (uint32_t n = 16; n <= 24; n += 8) {
7601 for (size_t k = 1; k <= 20; k += 5) {
7602 for (uint32_t m = 1; m <= 4; m++) {
7603 GemmMicrokernelTester()
7604 .mr(4)
7605 .nr(8)
7606 .kr(1)
7607 .sr(1)
7608 .m(m)
7609 .n(n)
7610 .k(k)
7611 .iterations(1)
7612 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7613 }
7614 }
7615 }
7616 }
7617
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,strided_cm_subtile)7618 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, strided_cm_subtile) {
7619 TEST_REQUIRES_ARM_NEON_FMA;
7620 for (size_t k = 1; k <= 20; k += 5) {
7621 for (uint32_t n = 1; n <= 8; n++) {
7622 for (uint32_t m = 1; m <= 4; m++) {
7623 GemmMicrokernelTester()
7624 .mr(4)
7625 .nr(8)
7626 .kr(1)
7627 .sr(1)
7628 .m(m)
7629 .n(n)
7630 .k(k)
7631 .cm_stride(11)
7632 .iterations(1)
7633 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7634 }
7635 }
7636 }
7637 }
7638
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,qmin)7639 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, qmin) {
7640 TEST_REQUIRES_ARM_NEON_FMA;
7641 GemmMicrokernelTester()
7642 .mr(4)
7643 .nr(8)
7644 .kr(1)
7645 .sr(1)
7646 .m(4)
7647 .n(8)
7648 .k(4)
7649 .qmin(128)
7650 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7651 }
7652
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,qmax)7653 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, qmax) {
7654 TEST_REQUIRES_ARM_NEON_FMA;
7655 GemmMicrokernelTester()
7656 .mr(4)
7657 .nr(8)
7658 .kr(1)
7659 .sr(1)
7660 .m(4)
7661 .n(8)
7662 .k(4)
7663 .qmax(128)
7664 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7665 }
7666
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128,strided_cm)7667 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, strided_cm) {
7668 TEST_REQUIRES_ARM_NEON_FMA;
7669 GemmMicrokernelTester()
7670 .mr(4)
7671 .nr(8)
7672 .kr(1)
7673 .sr(1)
7674 .m(4)
7675 .n(8)
7676 .k(4)
7677 .cm_stride(11)
7678 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
7679 }
7680 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7681
7682
7683 #if XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,k_eq_2)7684 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_eq_2) {
7685 TEST_REQUIRES_ARM_NEON_FMA;
7686 GemmMicrokernelTester()
7687 .mr(4)
7688 .nr(8)
7689 .kr(1)
7690 .sr(1)
7691 .m(4)
7692 .n(8)
7693 .k(2)
7694 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
7695 }
7696
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,strided_cn)7697 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, strided_cn) {
7698 TEST_REQUIRES_ARM_NEON_FMA;
7699 GemmMicrokernelTester()
7700 .mr(4)
7701 .nr(8)
7702 .kr(1)
7703 .sr(1)
7704 .m(4)
7705 .n(8)
7706 .k(2)
7707 .cn_stride(11)
7708 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
7709 }
7710
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,k_eq_2_strided_a)7711 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_eq_2_strided_a) {
7712 TEST_REQUIRES_ARM_NEON_FMA;
7713 GemmMicrokernelTester()
7714 .mr(4)
7715 .nr(8)
7716 .kr(1)
7717 .sr(1)
7718 .m(4)
7719 .n(8)
7720 .k(2)
7721 .a_stride(5)
7722 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
7723 }
7724
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,k_eq_2_subtile)7725 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_eq_2_subtile) {
7726 TEST_REQUIRES_ARM_NEON_FMA;
7727 for (uint32_t n = 1; n <= 8; n++) {
7728 for (uint32_t m = 1; m <= 4; m++) {
7729 GemmMicrokernelTester()
7730 .mr(4)
7731 .nr(8)
7732 .kr(1)
7733 .sr(1)
7734 .m(m)
7735 .n(n)
7736 .k(2)
7737 .iterations(1)
7738 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
7739 }
7740 }
7741 }
7742
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,k_eq_2_subtile_m)7743 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_eq_2_subtile_m) {
7744 TEST_REQUIRES_ARM_NEON_FMA;
7745 for (uint32_t m = 1; m <= 4; m++) {
7746 GemmMicrokernelTester()
7747 .mr(4)
7748 .nr(8)
7749 .kr(1)
7750 .sr(1)
7751 .m(m)
7752 .n(8)
7753 .k(2)
7754 .iterations(1)
7755 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
7756 }
7757 }
7758
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,k_eq_2_subtile_n)7759 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_eq_2_subtile_n) {
7760 TEST_REQUIRES_ARM_NEON_FMA;
7761 for (uint32_t n = 1; n <= 8; n++) {
7762 GemmMicrokernelTester()
7763 .mr(4)
7764 .nr(8)
7765 .kr(1)
7766 .sr(1)
7767 .m(4)
7768 .n(n)
7769 .k(2)
7770 .iterations(1)
7771 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
7772 }
7773 }
7774
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,k_lt_2)7775 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_lt_2) {
7776 TEST_REQUIRES_ARM_NEON_FMA;
7777 for (size_t k = 1; k < 2; k++) {
7778 GemmMicrokernelTester()
7779 .mr(4)
7780 .nr(8)
7781 .kr(1)
7782 .sr(1)
7783 .m(4)
7784 .n(8)
7785 .k(k)
7786 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
7787 }
7788 }
7789
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,k_lt_2_strided_a)7790 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_lt_2_strided_a) {
7791 TEST_REQUIRES_ARM_NEON_FMA;
7792 for (size_t k = 1; k < 2; k++) {
7793 GemmMicrokernelTester()
7794 .mr(4)
7795 .nr(8)
7796 .kr(1)
7797 .sr(1)
7798 .m(4)
7799 .n(8)
7800 .k(k)
7801 .a_stride(5)
7802 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
7803 }
7804 }
7805
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,k_lt_2_subtile)7806 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_lt_2_subtile) {
7807 TEST_REQUIRES_ARM_NEON_FMA;
7808 for (size_t k = 1; k < 2; k++) {
7809 for (uint32_t n = 1; n <= 8; n++) {
7810 for (uint32_t m = 1; m <= 4; m++) {
7811 GemmMicrokernelTester()
7812 .mr(4)
7813 .nr(8)
7814 .kr(1)
7815 .sr(1)
7816 .m(m)
7817 .n(n)
7818 .k(k)
7819 .iterations(1)
7820 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
7821 }
7822 }
7823 }
7824 }
7825
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,k_gt_2)7826 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_gt_2) {
7827 TEST_REQUIRES_ARM_NEON_FMA;
7828 for (size_t k = 3; k < 4; k++) {
7829 GemmMicrokernelTester()
7830 .mr(4)
7831 .nr(8)
7832 .kr(1)
7833 .sr(1)
7834 .m(4)
7835 .n(8)
7836 .k(k)
7837 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
7838 }
7839 }
7840
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,k_gt_2_strided_a)7841 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_gt_2_strided_a) {
7842 TEST_REQUIRES_ARM_NEON_FMA;
7843 for (size_t k = 3; k < 4; k++) {
7844 GemmMicrokernelTester()
7845 .mr(4)
7846 .nr(8)
7847 .kr(1)
7848 .sr(1)
7849 .m(4)
7850 .n(8)
7851 .k(k)
7852 .a_stride(7)
7853 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
7854 }
7855 }
7856
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,k_gt_2_subtile)7857 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_gt_2_subtile) {
7858 TEST_REQUIRES_ARM_NEON_FMA;
7859 for (size_t k = 3; k < 4; k++) {
7860 for (uint32_t n = 1; n <= 8; n++) {
7861 for (uint32_t m = 1; m <= 4; m++) {
7862 GemmMicrokernelTester()
7863 .mr(4)
7864 .nr(8)
7865 .kr(1)
7866 .sr(1)
7867 .m(m)
7868 .n(n)
7869 .k(k)
7870 .iterations(1)
7871 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
7872 }
7873 }
7874 }
7875 }
7876
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,k_div_2)7877 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_div_2) {
7878 TEST_REQUIRES_ARM_NEON_FMA;
7879 for (size_t k = 4; k <= 20; k += 2) {
7880 GemmMicrokernelTester()
7881 .mr(4)
7882 .nr(8)
7883 .kr(1)
7884 .sr(1)
7885 .m(4)
7886 .n(8)
7887 .k(k)
7888 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
7889 }
7890 }
7891
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,k_div_2_strided_a)7892 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_div_2_strided_a) {
7893 TEST_REQUIRES_ARM_NEON_FMA;
7894 for (size_t k = 4; k <= 20; k += 2) {
7895 GemmMicrokernelTester()
7896 .mr(4)
7897 .nr(8)
7898 .kr(1)
7899 .sr(1)
7900 .m(4)
7901 .n(8)
7902 .k(k)
7903 .a_stride(23)
7904 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
7905 }
7906 }
7907
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,k_div_2_subtile)7908 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_div_2_subtile) {
7909 TEST_REQUIRES_ARM_NEON_FMA;
7910 for (size_t k = 4; k <= 20; k += 2) {
7911 for (uint32_t n = 1; n <= 8; n++) {
7912 for (uint32_t m = 1; m <= 4; m++) {
7913 GemmMicrokernelTester()
7914 .mr(4)
7915 .nr(8)
7916 .kr(1)
7917 .sr(1)
7918 .m(m)
7919 .n(n)
7920 .k(k)
7921 .iterations(1)
7922 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
7923 }
7924 }
7925 }
7926 }
7927
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,n_gt_8)7928 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, n_gt_8) {
7929 TEST_REQUIRES_ARM_NEON_FMA;
7930 for (uint32_t n = 9; n < 16; n++) {
7931 for (size_t k = 1; k <= 10; k += 3) {
7932 GemmMicrokernelTester()
7933 .mr(4)
7934 .nr(8)
7935 .kr(1)
7936 .sr(1)
7937 .m(4)
7938 .n(n)
7939 .k(k)
7940 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
7941 }
7942 }
7943 }
7944
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,n_gt_8_strided_cn)7945 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, n_gt_8_strided_cn) {
7946 TEST_REQUIRES_ARM_NEON_FMA;
7947 for (uint32_t n = 9; n < 16; n++) {
7948 for (size_t k = 1; k <= 10; k += 3) {
7949 GemmMicrokernelTester()
7950 .mr(4)
7951 .nr(8)
7952 .kr(1)
7953 .sr(1)
7954 .m(4)
7955 .n(n)
7956 .k(k)
7957 .cn_stride(11)
7958 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
7959 }
7960 }
7961 }
7962
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,n_gt_8_strided_a)7963 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, n_gt_8_strided_a) {
7964 TEST_REQUIRES_ARM_NEON_FMA;
7965 for (uint32_t n = 9; n < 16; n++) {
7966 for (size_t k = 1; k <= 10; k += 3) {
7967 GemmMicrokernelTester()
7968 .mr(4)
7969 .nr(8)
7970 .kr(1)
7971 .sr(1)
7972 .m(4)
7973 .n(n)
7974 .k(k)
7975 .a_stride(13)
7976 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
7977 }
7978 }
7979 }
7980
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,n_gt_8_subtile)7981 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, n_gt_8_subtile) {
7982 TEST_REQUIRES_ARM_NEON_FMA;
7983 for (uint32_t n = 9; n < 16; n++) {
7984 for (size_t k = 1; k <= 10; k += 3) {
7985 for (uint32_t m = 1; m <= 4; m++) {
7986 GemmMicrokernelTester()
7987 .mr(4)
7988 .nr(8)
7989 .kr(1)
7990 .sr(1)
7991 .m(m)
7992 .n(n)
7993 .k(k)
7994 .iterations(1)
7995 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
7996 }
7997 }
7998 }
7999 }
8000
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,n_div_8)8001 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, n_div_8) {
8002 TEST_REQUIRES_ARM_NEON_FMA;
8003 for (uint32_t n = 16; n <= 24; n += 8) {
8004 for (size_t k = 1; k <= 10; k += 3) {
8005 GemmMicrokernelTester()
8006 .mr(4)
8007 .nr(8)
8008 .kr(1)
8009 .sr(1)
8010 .m(4)
8011 .n(n)
8012 .k(k)
8013 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8014 }
8015 }
8016 }
8017
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,n_div_8_strided_cn)8018 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, n_div_8_strided_cn) {
8019 TEST_REQUIRES_ARM_NEON_FMA;
8020 for (uint32_t n = 16; n <= 24; n += 8) {
8021 for (size_t k = 1; k <= 10; k += 3) {
8022 GemmMicrokernelTester()
8023 .mr(4)
8024 .nr(8)
8025 .kr(1)
8026 .sr(1)
8027 .m(4)
8028 .n(n)
8029 .k(k)
8030 .cn_stride(11)
8031 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8032 }
8033 }
8034 }
8035
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,n_div_8_strided_a)8036 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, n_div_8_strided_a) {
8037 TEST_REQUIRES_ARM_NEON_FMA;
8038 for (uint32_t n = 16; n <= 24; n += 8) {
8039 for (size_t k = 1; k <= 10; k += 3) {
8040 GemmMicrokernelTester()
8041 .mr(4)
8042 .nr(8)
8043 .kr(1)
8044 .sr(1)
8045 .m(4)
8046 .n(n)
8047 .k(k)
8048 .a_stride(13)
8049 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8050 }
8051 }
8052 }
8053
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,n_div_8_subtile)8054 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, n_div_8_subtile) {
8055 TEST_REQUIRES_ARM_NEON_FMA;
8056 for (uint32_t n = 16; n <= 24; n += 8) {
8057 for (size_t k = 1; k <= 10; k += 3) {
8058 for (uint32_t m = 1; m <= 4; m++) {
8059 GemmMicrokernelTester()
8060 .mr(4)
8061 .nr(8)
8062 .kr(1)
8063 .sr(1)
8064 .m(m)
8065 .n(n)
8066 .k(k)
8067 .iterations(1)
8068 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8069 }
8070 }
8071 }
8072 }
8073
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,strided_cm_subtile)8074 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, strided_cm_subtile) {
8075 TEST_REQUIRES_ARM_NEON_FMA;
8076 for (size_t k = 1; k <= 10; k += 3) {
8077 for (uint32_t n = 1; n <= 8; n++) {
8078 for (uint32_t m = 1; m <= 4; m++) {
8079 GemmMicrokernelTester()
8080 .mr(4)
8081 .nr(8)
8082 .kr(1)
8083 .sr(1)
8084 .m(m)
8085 .n(n)
8086 .k(k)
8087 .cm_stride(11)
8088 .iterations(1)
8089 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8090 }
8091 }
8092 }
8093 }
8094
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,qmin)8095 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, qmin) {
8096 TEST_REQUIRES_ARM_NEON_FMA;
8097 GemmMicrokernelTester()
8098 .mr(4)
8099 .nr(8)
8100 .kr(1)
8101 .sr(1)
8102 .m(4)
8103 .n(8)
8104 .k(2)
8105 .qmin(128)
8106 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8107 }
8108
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,qmax)8109 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, qmax) {
8110 TEST_REQUIRES_ARM_NEON_FMA;
8111 GemmMicrokernelTester()
8112 .mr(4)
8113 .nr(8)
8114 .kr(1)
8115 .sr(1)
8116 .m(4)
8117 .n(8)
8118 .k(2)
8119 .qmax(128)
8120 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8121 }
8122
TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64,strided_cm)8123 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, strided_cm) {
8124 TEST_REQUIRES_ARM_NEON_FMA;
8125 GemmMicrokernelTester()
8126 .mr(4)
8127 .nr(8)
8128 .kr(1)
8129 .sr(1)
8130 .m(4)
8131 .n(8)
8132 .k(2)
8133 .cm_stride(11)
8134 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8135 }
8136 #endif // XNN_ARCH_ARM64
8137
8138
8139 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,k_eq_4)8140 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_eq_4) {
8141 TEST_REQUIRES_ARM_NEON;
8142 GemmMicrokernelTester()
8143 .mr(4)
8144 .nr(8)
8145 .kr(1)
8146 .sr(4)
8147 .m(4)
8148 .n(8)
8149 .k(4)
8150 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8151 }
8152
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,strided_cn)8153 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, strided_cn) {
8154 TEST_REQUIRES_ARM_NEON;
8155 GemmMicrokernelTester()
8156 .mr(4)
8157 .nr(8)
8158 .kr(1)
8159 .sr(4)
8160 .m(4)
8161 .n(8)
8162 .k(4)
8163 .cn_stride(11)
8164 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8165 }
8166
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,k_eq_4_strided_a)8167 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_eq_4_strided_a) {
8168 TEST_REQUIRES_ARM_NEON;
8169 GemmMicrokernelTester()
8170 .mr(4)
8171 .nr(8)
8172 .kr(1)
8173 .sr(4)
8174 .m(4)
8175 .n(8)
8176 .k(4)
8177 .a_stride(7)
8178 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8179 }
8180
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,k_eq_4_subtile)8181 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_eq_4_subtile) {
8182 TEST_REQUIRES_ARM_NEON;
8183 for (uint32_t n = 1; n <= 8; n++) {
8184 for (uint32_t m = 1; m <= 4; m++) {
8185 GemmMicrokernelTester()
8186 .mr(4)
8187 .nr(8)
8188 .kr(1)
8189 .sr(4)
8190 .m(m)
8191 .n(n)
8192 .k(4)
8193 .iterations(1)
8194 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8195 }
8196 }
8197 }
8198
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,k_eq_4_subtile_m)8199 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_eq_4_subtile_m) {
8200 TEST_REQUIRES_ARM_NEON;
8201 for (uint32_t m = 1; m <= 4; m++) {
8202 GemmMicrokernelTester()
8203 .mr(4)
8204 .nr(8)
8205 .kr(1)
8206 .sr(4)
8207 .m(m)
8208 .n(8)
8209 .k(4)
8210 .iterations(1)
8211 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8212 }
8213 }
8214
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,k_eq_4_subtile_n)8215 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_eq_4_subtile_n) {
8216 TEST_REQUIRES_ARM_NEON;
8217 for (uint32_t n = 1; n <= 8; n++) {
8218 GemmMicrokernelTester()
8219 .mr(4)
8220 .nr(8)
8221 .kr(1)
8222 .sr(4)
8223 .m(4)
8224 .n(n)
8225 .k(4)
8226 .iterations(1)
8227 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8228 }
8229 }
8230
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,k_lt_4)8231 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_lt_4) {
8232 TEST_REQUIRES_ARM_NEON;
8233 for (size_t k = 1; k < 4; k++) {
8234 GemmMicrokernelTester()
8235 .mr(4)
8236 .nr(8)
8237 .kr(1)
8238 .sr(4)
8239 .m(4)
8240 .n(8)
8241 .k(k)
8242 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8243 }
8244 }
8245
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,k_lt_4_strided_a)8246 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_lt_4_strided_a) {
8247 TEST_REQUIRES_ARM_NEON;
8248 for (size_t k = 1; k < 4; k++) {
8249 GemmMicrokernelTester()
8250 .mr(4)
8251 .nr(8)
8252 .kr(1)
8253 .sr(4)
8254 .m(4)
8255 .n(8)
8256 .k(k)
8257 .a_stride(7)
8258 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8259 }
8260 }
8261
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,k_lt_4_subtile)8262 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_lt_4_subtile) {
8263 TEST_REQUIRES_ARM_NEON;
8264 for (size_t k = 1; k < 4; k++) {
8265 for (uint32_t n = 1; n <= 8; n++) {
8266 for (uint32_t m = 1; m <= 4; m++) {
8267 GemmMicrokernelTester()
8268 .mr(4)
8269 .nr(8)
8270 .kr(1)
8271 .sr(4)
8272 .m(m)
8273 .n(n)
8274 .k(k)
8275 .iterations(1)
8276 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8277 }
8278 }
8279 }
8280 }
8281
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,k_gt_4)8282 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_gt_4) {
8283 TEST_REQUIRES_ARM_NEON;
8284 for (size_t k = 5; k < 8; k++) {
8285 GemmMicrokernelTester()
8286 .mr(4)
8287 .nr(8)
8288 .kr(1)
8289 .sr(4)
8290 .m(4)
8291 .n(8)
8292 .k(k)
8293 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8294 }
8295 }
8296
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,k_gt_4_strided_a)8297 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_gt_4_strided_a) {
8298 TEST_REQUIRES_ARM_NEON;
8299 for (size_t k = 5; k < 8; k++) {
8300 GemmMicrokernelTester()
8301 .mr(4)
8302 .nr(8)
8303 .kr(1)
8304 .sr(4)
8305 .m(4)
8306 .n(8)
8307 .k(k)
8308 .a_stride(11)
8309 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8310 }
8311 }
8312
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,k_gt_4_subtile)8313 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_gt_4_subtile) {
8314 TEST_REQUIRES_ARM_NEON;
8315 for (size_t k = 5; k < 8; k++) {
8316 for (uint32_t n = 1; n <= 8; n++) {
8317 for (uint32_t m = 1; m <= 4; m++) {
8318 GemmMicrokernelTester()
8319 .mr(4)
8320 .nr(8)
8321 .kr(1)
8322 .sr(4)
8323 .m(m)
8324 .n(n)
8325 .k(k)
8326 .iterations(1)
8327 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8328 }
8329 }
8330 }
8331 }
8332
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,k_div_4)8333 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_div_4) {
8334 TEST_REQUIRES_ARM_NEON;
8335 for (size_t k = 8; k <= 40; k += 4) {
8336 GemmMicrokernelTester()
8337 .mr(4)
8338 .nr(8)
8339 .kr(1)
8340 .sr(4)
8341 .m(4)
8342 .n(8)
8343 .k(k)
8344 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8345 }
8346 }
8347
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,k_div_4_strided_a)8348 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_div_4_strided_a) {
8349 TEST_REQUIRES_ARM_NEON;
8350 for (size_t k = 8; k <= 40; k += 4) {
8351 GemmMicrokernelTester()
8352 .mr(4)
8353 .nr(8)
8354 .kr(1)
8355 .sr(4)
8356 .m(4)
8357 .n(8)
8358 .k(k)
8359 .a_stride(43)
8360 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8361 }
8362 }
8363
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,k_div_4_subtile)8364 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_div_4_subtile) {
8365 TEST_REQUIRES_ARM_NEON;
8366 for (size_t k = 8; k <= 40; k += 4) {
8367 for (uint32_t n = 1; n <= 8; n++) {
8368 for (uint32_t m = 1; m <= 4; m++) {
8369 GemmMicrokernelTester()
8370 .mr(4)
8371 .nr(8)
8372 .kr(1)
8373 .sr(4)
8374 .m(m)
8375 .n(n)
8376 .k(k)
8377 .iterations(1)
8378 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8379 }
8380 }
8381 }
8382 }
8383
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,n_gt_8)8384 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, n_gt_8) {
8385 TEST_REQUIRES_ARM_NEON;
8386 for (uint32_t n = 9; n < 16; n++) {
8387 for (size_t k = 1; k <= 20; k += 5) {
8388 GemmMicrokernelTester()
8389 .mr(4)
8390 .nr(8)
8391 .kr(1)
8392 .sr(4)
8393 .m(4)
8394 .n(n)
8395 .k(k)
8396 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8397 }
8398 }
8399 }
8400
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,n_gt_8_strided_cn)8401 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, n_gt_8_strided_cn) {
8402 TEST_REQUIRES_ARM_NEON;
8403 for (uint32_t n = 9; n < 16; n++) {
8404 for (size_t k = 1; k <= 20; k += 5) {
8405 GemmMicrokernelTester()
8406 .mr(4)
8407 .nr(8)
8408 .kr(1)
8409 .sr(4)
8410 .m(4)
8411 .n(n)
8412 .k(k)
8413 .cn_stride(11)
8414 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8415 }
8416 }
8417 }
8418
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,n_gt_8_strided_a)8419 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, n_gt_8_strided_a) {
8420 TEST_REQUIRES_ARM_NEON;
8421 for (uint32_t n = 9; n < 16; n++) {
8422 for (size_t k = 1; k <= 20; k += 5) {
8423 GemmMicrokernelTester()
8424 .mr(4)
8425 .nr(8)
8426 .kr(1)
8427 .sr(4)
8428 .m(4)
8429 .n(n)
8430 .k(k)
8431 .a_stride(23)
8432 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8433 }
8434 }
8435 }
8436
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,n_gt_8_subtile)8437 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, n_gt_8_subtile) {
8438 TEST_REQUIRES_ARM_NEON;
8439 for (uint32_t n = 9; n < 16; n++) {
8440 for (size_t k = 1; k <= 20; k += 5) {
8441 for (uint32_t m = 1; m <= 4; m++) {
8442 GemmMicrokernelTester()
8443 .mr(4)
8444 .nr(8)
8445 .kr(1)
8446 .sr(4)
8447 .m(m)
8448 .n(n)
8449 .k(k)
8450 .iterations(1)
8451 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8452 }
8453 }
8454 }
8455 }
8456
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,n_div_8)8457 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, n_div_8) {
8458 TEST_REQUIRES_ARM_NEON;
8459 for (uint32_t n = 16; n <= 24; n += 8) {
8460 for (size_t k = 1; k <= 20; k += 5) {
8461 GemmMicrokernelTester()
8462 .mr(4)
8463 .nr(8)
8464 .kr(1)
8465 .sr(4)
8466 .m(4)
8467 .n(n)
8468 .k(k)
8469 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8470 }
8471 }
8472 }
8473
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,n_div_8_strided_cn)8474 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, n_div_8_strided_cn) {
8475 TEST_REQUIRES_ARM_NEON;
8476 for (uint32_t n = 16; n <= 24; n += 8) {
8477 for (size_t k = 1; k <= 20; k += 5) {
8478 GemmMicrokernelTester()
8479 .mr(4)
8480 .nr(8)
8481 .kr(1)
8482 .sr(4)
8483 .m(4)
8484 .n(n)
8485 .k(k)
8486 .cn_stride(11)
8487 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8488 }
8489 }
8490 }
8491
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,n_div_8_strided_a)8492 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, n_div_8_strided_a) {
8493 TEST_REQUIRES_ARM_NEON;
8494 for (uint32_t n = 16; n <= 24; n += 8) {
8495 for (size_t k = 1; k <= 20; k += 5) {
8496 GemmMicrokernelTester()
8497 .mr(4)
8498 .nr(8)
8499 .kr(1)
8500 .sr(4)
8501 .m(4)
8502 .n(n)
8503 .k(k)
8504 .a_stride(23)
8505 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8506 }
8507 }
8508 }
8509
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,n_div_8_subtile)8510 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, n_div_8_subtile) {
8511 TEST_REQUIRES_ARM_NEON;
8512 for (uint32_t n = 16; n <= 24; n += 8) {
8513 for (size_t k = 1; k <= 20; k += 5) {
8514 for (uint32_t m = 1; m <= 4; m++) {
8515 GemmMicrokernelTester()
8516 .mr(4)
8517 .nr(8)
8518 .kr(1)
8519 .sr(4)
8520 .m(m)
8521 .n(n)
8522 .k(k)
8523 .iterations(1)
8524 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8525 }
8526 }
8527 }
8528 }
8529
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,strided_cm_subtile)8530 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, strided_cm_subtile) {
8531 TEST_REQUIRES_ARM_NEON;
8532 for (size_t k = 1; k <= 20; k += 5) {
8533 for (uint32_t n = 1; n <= 8; n++) {
8534 for (uint32_t m = 1; m <= 4; m++) {
8535 GemmMicrokernelTester()
8536 .mr(4)
8537 .nr(8)
8538 .kr(1)
8539 .sr(4)
8540 .m(m)
8541 .n(n)
8542 .k(k)
8543 .cm_stride(11)
8544 .iterations(1)
8545 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8546 }
8547 }
8548 }
8549 }
8550
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,qmin)8551 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, qmin) {
8552 TEST_REQUIRES_ARM_NEON;
8553 GemmMicrokernelTester()
8554 .mr(4)
8555 .nr(8)
8556 .kr(1)
8557 .sr(4)
8558 .m(4)
8559 .n(8)
8560 .k(4)
8561 .qmin(128)
8562 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8563 }
8564
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,qmax)8565 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, qmax) {
8566 TEST_REQUIRES_ARM_NEON;
8567 GemmMicrokernelTester()
8568 .mr(4)
8569 .nr(8)
8570 .kr(1)
8571 .sr(4)
8572 .m(4)
8573 .n(8)
8574 .k(4)
8575 .qmax(128)
8576 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8577 }
8578
TEST(F32_GEMMINC_MINMAX_4X8S4__NEON,strided_cm)8579 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, strided_cm) {
8580 TEST_REQUIRES_ARM_NEON;
8581 GemmMicrokernelTester()
8582 .mr(4)
8583 .nr(8)
8584 .kr(1)
8585 .sr(4)
8586 .m(4)
8587 .n(8)
8588 .k(4)
8589 .cm_stride(11)
8590 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
8591 }
8592 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8593
8594
8595 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,k_eq_4)8596 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_eq_4) {
8597 TEST_REQUIRES_ARM_NEON_FMA;
8598 GemmMicrokernelTester()
8599 .mr(4)
8600 .nr(8)
8601 .kr(1)
8602 .sr(4)
8603 .m(4)
8604 .n(8)
8605 .k(4)
8606 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
8607 }
8608
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,strided_cn)8609 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, strided_cn) {
8610 TEST_REQUIRES_ARM_NEON_FMA;
8611 GemmMicrokernelTester()
8612 .mr(4)
8613 .nr(8)
8614 .kr(1)
8615 .sr(4)
8616 .m(4)
8617 .n(8)
8618 .k(4)
8619 .cn_stride(11)
8620 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
8621 }
8622
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,k_eq_4_strided_a)8623 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_eq_4_strided_a) {
8624 TEST_REQUIRES_ARM_NEON_FMA;
8625 GemmMicrokernelTester()
8626 .mr(4)
8627 .nr(8)
8628 .kr(1)
8629 .sr(4)
8630 .m(4)
8631 .n(8)
8632 .k(4)
8633 .a_stride(7)
8634 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
8635 }
8636
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,k_eq_4_subtile)8637 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_eq_4_subtile) {
8638 TEST_REQUIRES_ARM_NEON_FMA;
8639 for (uint32_t n = 1; n <= 8; n++) {
8640 for (uint32_t m = 1; m <= 4; m++) {
8641 GemmMicrokernelTester()
8642 .mr(4)
8643 .nr(8)
8644 .kr(1)
8645 .sr(4)
8646 .m(m)
8647 .n(n)
8648 .k(4)
8649 .iterations(1)
8650 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
8651 }
8652 }
8653 }
8654
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,k_eq_4_subtile_m)8655 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_eq_4_subtile_m) {
8656 TEST_REQUIRES_ARM_NEON_FMA;
8657 for (uint32_t m = 1; m <= 4; m++) {
8658 GemmMicrokernelTester()
8659 .mr(4)
8660 .nr(8)
8661 .kr(1)
8662 .sr(4)
8663 .m(m)
8664 .n(8)
8665 .k(4)
8666 .iterations(1)
8667 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
8668 }
8669 }
8670
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,k_eq_4_subtile_n)8671 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_eq_4_subtile_n) {
8672 TEST_REQUIRES_ARM_NEON_FMA;
8673 for (uint32_t n = 1; n <= 8; n++) {
8674 GemmMicrokernelTester()
8675 .mr(4)
8676 .nr(8)
8677 .kr(1)
8678 .sr(4)
8679 .m(4)
8680 .n(n)
8681 .k(4)
8682 .iterations(1)
8683 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
8684 }
8685 }
8686
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,k_lt_4)8687 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_lt_4) {
8688 TEST_REQUIRES_ARM_NEON_FMA;
8689 for (size_t k = 1; k < 4; k++) {
8690 GemmMicrokernelTester()
8691 .mr(4)
8692 .nr(8)
8693 .kr(1)
8694 .sr(4)
8695 .m(4)
8696 .n(8)
8697 .k(k)
8698 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
8699 }
8700 }
8701
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,k_lt_4_strided_a)8702 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_lt_4_strided_a) {
8703 TEST_REQUIRES_ARM_NEON_FMA;
8704 for (size_t k = 1; k < 4; k++) {
8705 GemmMicrokernelTester()
8706 .mr(4)
8707 .nr(8)
8708 .kr(1)
8709 .sr(4)
8710 .m(4)
8711 .n(8)
8712 .k(k)
8713 .a_stride(7)
8714 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
8715 }
8716 }
8717
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,k_lt_4_subtile)8718 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_lt_4_subtile) {
8719 TEST_REQUIRES_ARM_NEON_FMA;
8720 for (size_t k = 1; k < 4; k++) {
8721 for (uint32_t n = 1; n <= 8; n++) {
8722 for (uint32_t m = 1; m <= 4; m++) {
8723 GemmMicrokernelTester()
8724 .mr(4)
8725 .nr(8)
8726 .kr(1)
8727 .sr(4)
8728 .m(m)
8729 .n(n)
8730 .k(k)
8731 .iterations(1)
8732 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
8733 }
8734 }
8735 }
8736 }
8737
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,k_gt_4)8738 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_gt_4) {
8739 TEST_REQUIRES_ARM_NEON_FMA;
8740 for (size_t k = 5; k < 8; k++) {
8741 GemmMicrokernelTester()
8742 .mr(4)
8743 .nr(8)
8744 .kr(1)
8745 .sr(4)
8746 .m(4)
8747 .n(8)
8748 .k(k)
8749 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
8750 }
8751 }
8752
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,k_gt_4_strided_a)8753 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_gt_4_strided_a) {
8754 TEST_REQUIRES_ARM_NEON_FMA;
8755 for (size_t k = 5; k < 8; k++) {
8756 GemmMicrokernelTester()
8757 .mr(4)
8758 .nr(8)
8759 .kr(1)
8760 .sr(4)
8761 .m(4)
8762 .n(8)
8763 .k(k)
8764 .a_stride(11)
8765 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
8766 }
8767 }
8768
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,k_gt_4_subtile)8769 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_gt_4_subtile) {
8770 TEST_REQUIRES_ARM_NEON_FMA;
8771 for (size_t k = 5; k < 8; k++) {
8772 for (uint32_t n = 1; n <= 8; n++) {
8773 for (uint32_t m = 1; m <= 4; m++) {
8774 GemmMicrokernelTester()
8775 .mr(4)
8776 .nr(8)
8777 .kr(1)
8778 .sr(4)
8779 .m(m)
8780 .n(n)
8781 .k(k)
8782 .iterations(1)
8783 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
8784 }
8785 }
8786 }
8787 }
8788
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,k_div_4)8789 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_div_4) {
8790 TEST_REQUIRES_ARM_NEON_FMA;
8791 for (size_t k = 8; k <= 40; k += 4) {
8792 GemmMicrokernelTester()
8793 .mr(4)
8794 .nr(8)
8795 .kr(1)
8796 .sr(4)
8797 .m(4)
8798 .n(8)
8799 .k(k)
8800 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
8801 }
8802 }
8803
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,k_div_4_strided_a)8804 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_div_4_strided_a) {
8805 TEST_REQUIRES_ARM_NEON_FMA;
8806 for (size_t k = 8; k <= 40; k += 4) {
8807 GemmMicrokernelTester()
8808 .mr(4)
8809 .nr(8)
8810 .kr(1)
8811 .sr(4)
8812 .m(4)
8813 .n(8)
8814 .k(k)
8815 .a_stride(43)
8816 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
8817 }
8818 }
8819
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,k_div_4_subtile)8820 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_div_4_subtile) {
8821 TEST_REQUIRES_ARM_NEON_FMA;
8822 for (size_t k = 8; k <= 40; k += 4) {
8823 for (uint32_t n = 1; n <= 8; n++) {
8824 for (uint32_t m = 1; m <= 4; m++) {
8825 GemmMicrokernelTester()
8826 .mr(4)
8827 .nr(8)
8828 .kr(1)
8829 .sr(4)
8830 .m(m)
8831 .n(n)
8832 .k(k)
8833 .iterations(1)
8834 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
8835 }
8836 }
8837 }
8838 }
8839
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,n_gt_8)8840 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, n_gt_8) {
8841 TEST_REQUIRES_ARM_NEON_FMA;
8842 for (uint32_t n = 9; n < 16; n++) {
8843 for (size_t k = 1; k <= 20; k += 5) {
8844 GemmMicrokernelTester()
8845 .mr(4)
8846 .nr(8)
8847 .kr(1)
8848 .sr(4)
8849 .m(4)
8850 .n(n)
8851 .k(k)
8852 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
8853 }
8854 }
8855 }
8856
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,n_gt_8_strided_cn)8857 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, n_gt_8_strided_cn) {
8858 TEST_REQUIRES_ARM_NEON_FMA;
8859 for (uint32_t n = 9; n < 16; n++) {
8860 for (size_t k = 1; k <= 20; k += 5) {
8861 GemmMicrokernelTester()
8862 .mr(4)
8863 .nr(8)
8864 .kr(1)
8865 .sr(4)
8866 .m(4)
8867 .n(n)
8868 .k(k)
8869 .cn_stride(11)
8870 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
8871 }
8872 }
8873 }
8874
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,n_gt_8_strided_a)8875 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, n_gt_8_strided_a) {
8876 TEST_REQUIRES_ARM_NEON_FMA;
8877 for (uint32_t n = 9; n < 16; n++) {
8878 for (size_t k = 1; k <= 20; k += 5) {
8879 GemmMicrokernelTester()
8880 .mr(4)
8881 .nr(8)
8882 .kr(1)
8883 .sr(4)
8884 .m(4)
8885 .n(n)
8886 .k(k)
8887 .a_stride(23)
8888 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
8889 }
8890 }
8891 }
8892
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,n_gt_8_subtile)8893 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, n_gt_8_subtile) {
8894 TEST_REQUIRES_ARM_NEON_FMA;
8895 for (uint32_t n = 9; n < 16; n++) {
8896 for (size_t k = 1; k <= 20; k += 5) {
8897 for (uint32_t m = 1; m <= 4; m++) {
8898 GemmMicrokernelTester()
8899 .mr(4)
8900 .nr(8)
8901 .kr(1)
8902 .sr(4)
8903 .m(m)
8904 .n(n)
8905 .k(k)
8906 .iterations(1)
8907 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
8908 }
8909 }
8910 }
8911 }
8912
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,n_div_8)8913 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, n_div_8) {
8914 TEST_REQUIRES_ARM_NEON_FMA;
8915 for (uint32_t n = 16; n <= 24; n += 8) {
8916 for (size_t k = 1; k <= 20; k += 5) {
8917 GemmMicrokernelTester()
8918 .mr(4)
8919 .nr(8)
8920 .kr(1)
8921 .sr(4)
8922 .m(4)
8923 .n(n)
8924 .k(k)
8925 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
8926 }
8927 }
8928 }
8929
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,n_div_8_strided_cn)8930 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, n_div_8_strided_cn) {
8931 TEST_REQUIRES_ARM_NEON_FMA;
8932 for (uint32_t n = 16; n <= 24; n += 8) {
8933 for (size_t k = 1; k <= 20; k += 5) {
8934 GemmMicrokernelTester()
8935 .mr(4)
8936 .nr(8)
8937 .kr(1)
8938 .sr(4)
8939 .m(4)
8940 .n(n)
8941 .k(k)
8942 .cn_stride(11)
8943 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
8944 }
8945 }
8946 }
8947
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,n_div_8_strided_a)8948 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, n_div_8_strided_a) {
8949 TEST_REQUIRES_ARM_NEON_FMA;
8950 for (uint32_t n = 16; n <= 24; n += 8) {
8951 for (size_t k = 1; k <= 20; k += 5) {
8952 GemmMicrokernelTester()
8953 .mr(4)
8954 .nr(8)
8955 .kr(1)
8956 .sr(4)
8957 .m(4)
8958 .n(n)
8959 .k(k)
8960 .a_stride(23)
8961 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
8962 }
8963 }
8964 }
8965
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,n_div_8_subtile)8966 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, n_div_8_subtile) {
8967 TEST_REQUIRES_ARM_NEON_FMA;
8968 for (uint32_t n = 16; n <= 24; n += 8) {
8969 for (size_t k = 1; k <= 20; k += 5) {
8970 for (uint32_t m = 1; m <= 4; m++) {
8971 GemmMicrokernelTester()
8972 .mr(4)
8973 .nr(8)
8974 .kr(1)
8975 .sr(4)
8976 .m(m)
8977 .n(n)
8978 .k(k)
8979 .iterations(1)
8980 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
8981 }
8982 }
8983 }
8984 }
8985
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,strided_cm_subtile)8986 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, strided_cm_subtile) {
8987 TEST_REQUIRES_ARM_NEON_FMA;
8988 for (size_t k = 1; k <= 20; k += 5) {
8989 for (uint32_t n = 1; n <= 8; n++) {
8990 for (uint32_t m = 1; m <= 4; m++) {
8991 GemmMicrokernelTester()
8992 .mr(4)
8993 .nr(8)
8994 .kr(1)
8995 .sr(4)
8996 .m(m)
8997 .n(n)
8998 .k(k)
8999 .cm_stride(11)
9000 .iterations(1)
9001 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9002 }
9003 }
9004 }
9005 }
9006
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,qmin)9007 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, qmin) {
9008 TEST_REQUIRES_ARM_NEON_FMA;
9009 GemmMicrokernelTester()
9010 .mr(4)
9011 .nr(8)
9012 .kr(1)
9013 .sr(4)
9014 .m(4)
9015 .n(8)
9016 .k(4)
9017 .qmin(128)
9018 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9019 }
9020
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,qmax)9021 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, qmax) {
9022 TEST_REQUIRES_ARM_NEON_FMA;
9023 GemmMicrokernelTester()
9024 .mr(4)
9025 .nr(8)
9026 .kr(1)
9027 .sr(4)
9028 .m(4)
9029 .n(8)
9030 .k(4)
9031 .qmax(128)
9032 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9033 }
9034
TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA,strided_cm)9035 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, strided_cm) {
9036 TEST_REQUIRES_ARM_NEON_FMA;
9037 GemmMicrokernelTester()
9038 .mr(4)
9039 .nr(8)
9040 .kr(1)
9041 .sr(4)
9042 .m(4)
9043 .n(8)
9044 .k(4)
9045 .cm_stride(11)
9046 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9047 }
9048 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9049
9050
9051 #if XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,k_eq_2)9052 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_eq_2) {
9053 TEST_REQUIRES_ARM_NEON_FMA;
9054 GemmMicrokernelTester()
9055 .mr(5)
9056 .nr(8)
9057 .kr(1)
9058 .sr(1)
9059 .m(5)
9060 .n(8)
9061 .k(2)
9062 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9063 }
9064
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,strided_cn)9065 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, strided_cn) {
9066 TEST_REQUIRES_ARM_NEON_FMA;
9067 GemmMicrokernelTester()
9068 .mr(5)
9069 .nr(8)
9070 .kr(1)
9071 .sr(1)
9072 .m(5)
9073 .n(8)
9074 .k(2)
9075 .cn_stride(11)
9076 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9077 }
9078
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,k_eq_2_strided_a)9079 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_eq_2_strided_a) {
9080 TEST_REQUIRES_ARM_NEON_FMA;
9081 GemmMicrokernelTester()
9082 .mr(5)
9083 .nr(8)
9084 .kr(1)
9085 .sr(1)
9086 .m(5)
9087 .n(8)
9088 .k(2)
9089 .a_stride(5)
9090 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9091 }
9092
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,k_eq_2_subtile)9093 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_eq_2_subtile) {
9094 TEST_REQUIRES_ARM_NEON_FMA;
9095 for (uint32_t n = 1; n <= 8; n++) {
9096 for (uint32_t m = 1; m <= 5; m++) {
9097 GemmMicrokernelTester()
9098 .mr(5)
9099 .nr(8)
9100 .kr(1)
9101 .sr(1)
9102 .m(m)
9103 .n(n)
9104 .k(2)
9105 .iterations(1)
9106 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9107 }
9108 }
9109 }
9110
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,k_eq_2_subtile_m)9111 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_eq_2_subtile_m) {
9112 TEST_REQUIRES_ARM_NEON_FMA;
9113 for (uint32_t m = 1; m <= 5; m++) {
9114 GemmMicrokernelTester()
9115 .mr(5)
9116 .nr(8)
9117 .kr(1)
9118 .sr(1)
9119 .m(m)
9120 .n(8)
9121 .k(2)
9122 .iterations(1)
9123 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9124 }
9125 }
9126
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,k_eq_2_subtile_n)9127 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_eq_2_subtile_n) {
9128 TEST_REQUIRES_ARM_NEON_FMA;
9129 for (uint32_t n = 1; n <= 8; n++) {
9130 GemmMicrokernelTester()
9131 .mr(5)
9132 .nr(8)
9133 .kr(1)
9134 .sr(1)
9135 .m(5)
9136 .n(n)
9137 .k(2)
9138 .iterations(1)
9139 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9140 }
9141 }
9142
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,k_lt_2)9143 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_lt_2) {
9144 TEST_REQUIRES_ARM_NEON_FMA;
9145 for (size_t k = 1; k < 2; k++) {
9146 GemmMicrokernelTester()
9147 .mr(5)
9148 .nr(8)
9149 .kr(1)
9150 .sr(1)
9151 .m(5)
9152 .n(8)
9153 .k(k)
9154 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9155 }
9156 }
9157
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,k_lt_2_strided_a)9158 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_lt_2_strided_a) {
9159 TEST_REQUIRES_ARM_NEON_FMA;
9160 for (size_t k = 1; k < 2; k++) {
9161 GemmMicrokernelTester()
9162 .mr(5)
9163 .nr(8)
9164 .kr(1)
9165 .sr(1)
9166 .m(5)
9167 .n(8)
9168 .k(k)
9169 .a_stride(5)
9170 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9171 }
9172 }
9173
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,k_lt_2_subtile)9174 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_lt_2_subtile) {
9175 TEST_REQUIRES_ARM_NEON_FMA;
9176 for (size_t k = 1; k < 2; k++) {
9177 for (uint32_t n = 1; n <= 8; n++) {
9178 for (uint32_t m = 1; m <= 5; m++) {
9179 GemmMicrokernelTester()
9180 .mr(5)
9181 .nr(8)
9182 .kr(1)
9183 .sr(1)
9184 .m(m)
9185 .n(n)
9186 .k(k)
9187 .iterations(1)
9188 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9189 }
9190 }
9191 }
9192 }
9193
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,k_gt_2)9194 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_gt_2) {
9195 TEST_REQUIRES_ARM_NEON_FMA;
9196 for (size_t k = 3; k < 4; k++) {
9197 GemmMicrokernelTester()
9198 .mr(5)
9199 .nr(8)
9200 .kr(1)
9201 .sr(1)
9202 .m(5)
9203 .n(8)
9204 .k(k)
9205 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9206 }
9207 }
9208
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,k_gt_2_strided_a)9209 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_gt_2_strided_a) {
9210 TEST_REQUIRES_ARM_NEON_FMA;
9211 for (size_t k = 3; k < 4; k++) {
9212 GemmMicrokernelTester()
9213 .mr(5)
9214 .nr(8)
9215 .kr(1)
9216 .sr(1)
9217 .m(5)
9218 .n(8)
9219 .k(k)
9220 .a_stride(7)
9221 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9222 }
9223 }
9224
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,k_gt_2_subtile)9225 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_gt_2_subtile) {
9226 TEST_REQUIRES_ARM_NEON_FMA;
9227 for (size_t k = 3; k < 4; k++) {
9228 for (uint32_t n = 1; n <= 8; n++) {
9229 for (uint32_t m = 1; m <= 5; m++) {
9230 GemmMicrokernelTester()
9231 .mr(5)
9232 .nr(8)
9233 .kr(1)
9234 .sr(1)
9235 .m(m)
9236 .n(n)
9237 .k(k)
9238 .iterations(1)
9239 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9240 }
9241 }
9242 }
9243 }
9244
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,k_div_2)9245 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_div_2) {
9246 TEST_REQUIRES_ARM_NEON_FMA;
9247 for (size_t k = 4; k <= 20; k += 2) {
9248 GemmMicrokernelTester()
9249 .mr(5)
9250 .nr(8)
9251 .kr(1)
9252 .sr(1)
9253 .m(5)
9254 .n(8)
9255 .k(k)
9256 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9257 }
9258 }
9259
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,k_div_2_strided_a)9260 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_div_2_strided_a) {
9261 TEST_REQUIRES_ARM_NEON_FMA;
9262 for (size_t k = 4; k <= 20; k += 2) {
9263 GemmMicrokernelTester()
9264 .mr(5)
9265 .nr(8)
9266 .kr(1)
9267 .sr(1)
9268 .m(5)
9269 .n(8)
9270 .k(k)
9271 .a_stride(23)
9272 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9273 }
9274 }
9275
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,k_div_2_subtile)9276 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_div_2_subtile) {
9277 TEST_REQUIRES_ARM_NEON_FMA;
9278 for (size_t k = 4; k <= 20; k += 2) {
9279 for (uint32_t n = 1; n <= 8; n++) {
9280 for (uint32_t m = 1; m <= 5; m++) {
9281 GemmMicrokernelTester()
9282 .mr(5)
9283 .nr(8)
9284 .kr(1)
9285 .sr(1)
9286 .m(m)
9287 .n(n)
9288 .k(k)
9289 .iterations(1)
9290 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9291 }
9292 }
9293 }
9294 }
9295
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,n_gt_8)9296 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, n_gt_8) {
9297 TEST_REQUIRES_ARM_NEON_FMA;
9298 for (uint32_t n = 9; n < 16; n++) {
9299 for (size_t k = 1; k <= 10; k += 3) {
9300 GemmMicrokernelTester()
9301 .mr(5)
9302 .nr(8)
9303 .kr(1)
9304 .sr(1)
9305 .m(5)
9306 .n(n)
9307 .k(k)
9308 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9309 }
9310 }
9311 }
9312
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,n_gt_8_strided_cn)9313 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, n_gt_8_strided_cn) {
9314 TEST_REQUIRES_ARM_NEON_FMA;
9315 for (uint32_t n = 9; n < 16; n++) {
9316 for (size_t k = 1; k <= 10; k += 3) {
9317 GemmMicrokernelTester()
9318 .mr(5)
9319 .nr(8)
9320 .kr(1)
9321 .sr(1)
9322 .m(5)
9323 .n(n)
9324 .k(k)
9325 .cn_stride(11)
9326 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9327 }
9328 }
9329 }
9330
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,n_gt_8_strided_a)9331 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, n_gt_8_strided_a) {
9332 TEST_REQUIRES_ARM_NEON_FMA;
9333 for (uint32_t n = 9; n < 16; n++) {
9334 for (size_t k = 1; k <= 10; k += 3) {
9335 GemmMicrokernelTester()
9336 .mr(5)
9337 .nr(8)
9338 .kr(1)
9339 .sr(1)
9340 .m(5)
9341 .n(n)
9342 .k(k)
9343 .a_stride(13)
9344 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9345 }
9346 }
9347 }
9348
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,n_gt_8_subtile)9349 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, n_gt_8_subtile) {
9350 TEST_REQUIRES_ARM_NEON_FMA;
9351 for (uint32_t n = 9; n < 16; n++) {
9352 for (size_t k = 1; k <= 10; k += 3) {
9353 for (uint32_t m = 1; m <= 5; m++) {
9354 GemmMicrokernelTester()
9355 .mr(5)
9356 .nr(8)
9357 .kr(1)
9358 .sr(1)
9359 .m(m)
9360 .n(n)
9361 .k(k)
9362 .iterations(1)
9363 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9364 }
9365 }
9366 }
9367 }
9368
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,n_div_8)9369 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, n_div_8) {
9370 TEST_REQUIRES_ARM_NEON_FMA;
9371 for (uint32_t n = 16; n <= 24; n += 8) {
9372 for (size_t k = 1; k <= 10; k += 3) {
9373 GemmMicrokernelTester()
9374 .mr(5)
9375 .nr(8)
9376 .kr(1)
9377 .sr(1)
9378 .m(5)
9379 .n(n)
9380 .k(k)
9381 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9382 }
9383 }
9384 }
9385
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,n_div_8_strided_cn)9386 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, n_div_8_strided_cn) {
9387 TEST_REQUIRES_ARM_NEON_FMA;
9388 for (uint32_t n = 16; n <= 24; n += 8) {
9389 for (size_t k = 1; k <= 10; k += 3) {
9390 GemmMicrokernelTester()
9391 .mr(5)
9392 .nr(8)
9393 .kr(1)
9394 .sr(1)
9395 .m(5)
9396 .n(n)
9397 .k(k)
9398 .cn_stride(11)
9399 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9400 }
9401 }
9402 }
9403
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,n_div_8_strided_a)9404 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, n_div_8_strided_a) {
9405 TEST_REQUIRES_ARM_NEON_FMA;
9406 for (uint32_t n = 16; n <= 24; n += 8) {
9407 for (size_t k = 1; k <= 10; k += 3) {
9408 GemmMicrokernelTester()
9409 .mr(5)
9410 .nr(8)
9411 .kr(1)
9412 .sr(1)
9413 .m(5)
9414 .n(n)
9415 .k(k)
9416 .a_stride(13)
9417 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9418 }
9419 }
9420 }
9421
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,n_div_8_subtile)9422 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, n_div_8_subtile) {
9423 TEST_REQUIRES_ARM_NEON_FMA;
9424 for (uint32_t n = 16; n <= 24; n += 8) {
9425 for (size_t k = 1; k <= 10; k += 3) {
9426 for (uint32_t m = 1; m <= 5; m++) {
9427 GemmMicrokernelTester()
9428 .mr(5)
9429 .nr(8)
9430 .kr(1)
9431 .sr(1)
9432 .m(m)
9433 .n(n)
9434 .k(k)
9435 .iterations(1)
9436 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9437 }
9438 }
9439 }
9440 }
9441
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,strided_cm_subtile)9442 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, strided_cm_subtile) {
9443 TEST_REQUIRES_ARM_NEON_FMA;
9444 for (size_t k = 1; k <= 10; k += 3) {
9445 for (uint32_t n = 1; n <= 8; n++) {
9446 for (uint32_t m = 1; m <= 5; m++) {
9447 GemmMicrokernelTester()
9448 .mr(5)
9449 .nr(8)
9450 .kr(1)
9451 .sr(1)
9452 .m(m)
9453 .n(n)
9454 .k(k)
9455 .cm_stride(11)
9456 .iterations(1)
9457 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9458 }
9459 }
9460 }
9461 }
9462
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,qmin)9463 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, qmin) {
9464 TEST_REQUIRES_ARM_NEON_FMA;
9465 GemmMicrokernelTester()
9466 .mr(5)
9467 .nr(8)
9468 .kr(1)
9469 .sr(1)
9470 .m(5)
9471 .n(8)
9472 .k(2)
9473 .qmin(128)
9474 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9475 }
9476
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,qmax)9477 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, qmax) {
9478 TEST_REQUIRES_ARM_NEON_FMA;
9479 GemmMicrokernelTester()
9480 .mr(5)
9481 .nr(8)
9482 .kr(1)
9483 .sr(1)
9484 .m(5)
9485 .n(8)
9486 .k(2)
9487 .qmax(128)
9488 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9489 }
9490
TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64,strided_cm)9491 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, strided_cm) {
9492 TEST_REQUIRES_ARM_NEON_FMA;
9493 GemmMicrokernelTester()
9494 .mr(5)
9495 .nr(8)
9496 .kr(1)
9497 .sr(1)
9498 .m(5)
9499 .n(8)
9500 .k(2)
9501 .cm_stride(11)
9502 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9503 }
9504 #endif // XNN_ARCH_ARM64
9505
9506
9507 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,k_eq_2)9508 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_eq_2) {
9509 TEST_REQUIRES_ARM_NEON;
9510 GemmMicrokernelTester()
9511 .mr(6)
9512 .nr(8)
9513 .kr(1)
9514 .sr(1)
9515 .m(6)
9516 .n(8)
9517 .k(2)
9518 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9519 }
9520
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,strided_cn)9521 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, strided_cn) {
9522 TEST_REQUIRES_ARM_NEON;
9523 GemmMicrokernelTester()
9524 .mr(6)
9525 .nr(8)
9526 .kr(1)
9527 .sr(1)
9528 .m(6)
9529 .n(8)
9530 .k(2)
9531 .cn_stride(11)
9532 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9533 }
9534
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,k_eq_2_strided_a)9535 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_eq_2_strided_a) {
9536 TEST_REQUIRES_ARM_NEON;
9537 GemmMicrokernelTester()
9538 .mr(6)
9539 .nr(8)
9540 .kr(1)
9541 .sr(1)
9542 .m(6)
9543 .n(8)
9544 .k(2)
9545 .a_stride(5)
9546 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9547 }
9548
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,k_eq_2_subtile)9549 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_eq_2_subtile) {
9550 TEST_REQUIRES_ARM_NEON;
9551 for (uint32_t n = 1; n <= 8; n++) {
9552 for (uint32_t m = 1; m <= 6; m++) {
9553 GemmMicrokernelTester()
9554 .mr(6)
9555 .nr(8)
9556 .kr(1)
9557 .sr(1)
9558 .m(m)
9559 .n(n)
9560 .k(2)
9561 .iterations(1)
9562 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9563 }
9564 }
9565 }
9566
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,k_eq_2_subtile_m)9567 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_eq_2_subtile_m) {
9568 TEST_REQUIRES_ARM_NEON;
9569 for (uint32_t m = 1; m <= 6; m++) {
9570 GemmMicrokernelTester()
9571 .mr(6)
9572 .nr(8)
9573 .kr(1)
9574 .sr(1)
9575 .m(m)
9576 .n(8)
9577 .k(2)
9578 .iterations(1)
9579 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9580 }
9581 }
9582
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,k_eq_2_subtile_n)9583 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_eq_2_subtile_n) {
9584 TEST_REQUIRES_ARM_NEON;
9585 for (uint32_t n = 1; n <= 8; n++) {
9586 GemmMicrokernelTester()
9587 .mr(6)
9588 .nr(8)
9589 .kr(1)
9590 .sr(1)
9591 .m(6)
9592 .n(n)
9593 .k(2)
9594 .iterations(1)
9595 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9596 }
9597 }
9598
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,k_lt_2)9599 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_lt_2) {
9600 TEST_REQUIRES_ARM_NEON;
9601 for (size_t k = 1; k < 2; k++) {
9602 GemmMicrokernelTester()
9603 .mr(6)
9604 .nr(8)
9605 .kr(1)
9606 .sr(1)
9607 .m(6)
9608 .n(8)
9609 .k(k)
9610 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9611 }
9612 }
9613
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,k_lt_2_strided_a)9614 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_lt_2_strided_a) {
9615 TEST_REQUIRES_ARM_NEON;
9616 for (size_t k = 1; k < 2; k++) {
9617 GemmMicrokernelTester()
9618 .mr(6)
9619 .nr(8)
9620 .kr(1)
9621 .sr(1)
9622 .m(6)
9623 .n(8)
9624 .k(k)
9625 .a_stride(5)
9626 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9627 }
9628 }
9629
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,k_lt_2_subtile)9630 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_lt_2_subtile) {
9631 TEST_REQUIRES_ARM_NEON;
9632 for (size_t k = 1; k < 2; k++) {
9633 for (uint32_t n = 1; n <= 8; n++) {
9634 for (uint32_t m = 1; m <= 6; m++) {
9635 GemmMicrokernelTester()
9636 .mr(6)
9637 .nr(8)
9638 .kr(1)
9639 .sr(1)
9640 .m(m)
9641 .n(n)
9642 .k(k)
9643 .iterations(1)
9644 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9645 }
9646 }
9647 }
9648 }
9649
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,k_gt_2)9650 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_gt_2) {
9651 TEST_REQUIRES_ARM_NEON;
9652 for (size_t k = 3; k < 4; k++) {
9653 GemmMicrokernelTester()
9654 .mr(6)
9655 .nr(8)
9656 .kr(1)
9657 .sr(1)
9658 .m(6)
9659 .n(8)
9660 .k(k)
9661 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9662 }
9663 }
9664
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,k_gt_2_strided_a)9665 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_gt_2_strided_a) {
9666 TEST_REQUIRES_ARM_NEON;
9667 for (size_t k = 3; k < 4; k++) {
9668 GemmMicrokernelTester()
9669 .mr(6)
9670 .nr(8)
9671 .kr(1)
9672 .sr(1)
9673 .m(6)
9674 .n(8)
9675 .k(k)
9676 .a_stride(7)
9677 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9678 }
9679 }
9680
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,k_gt_2_subtile)9681 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_gt_2_subtile) {
9682 TEST_REQUIRES_ARM_NEON;
9683 for (size_t k = 3; k < 4; k++) {
9684 for (uint32_t n = 1; n <= 8; n++) {
9685 for (uint32_t m = 1; m <= 6; m++) {
9686 GemmMicrokernelTester()
9687 .mr(6)
9688 .nr(8)
9689 .kr(1)
9690 .sr(1)
9691 .m(m)
9692 .n(n)
9693 .k(k)
9694 .iterations(1)
9695 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9696 }
9697 }
9698 }
9699 }
9700
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,k_div_2)9701 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_div_2) {
9702 TEST_REQUIRES_ARM_NEON;
9703 for (size_t k = 4; k <= 20; k += 2) {
9704 GemmMicrokernelTester()
9705 .mr(6)
9706 .nr(8)
9707 .kr(1)
9708 .sr(1)
9709 .m(6)
9710 .n(8)
9711 .k(k)
9712 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9713 }
9714 }
9715
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,k_div_2_strided_a)9716 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_div_2_strided_a) {
9717 TEST_REQUIRES_ARM_NEON;
9718 for (size_t k = 4; k <= 20; k += 2) {
9719 GemmMicrokernelTester()
9720 .mr(6)
9721 .nr(8)
9722 .kr(1)
9723 .sr(1)
9724 .m(6)
9725 .n(8)
9726 .k(k)
9727 .a_stride(23)
9728 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9729 }
9730 }
9731
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,k_div_2_subtile)9732 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_div_2_subtile) {
9733 TEST_REQUIRES_ARM_NEON;
9734 for (size_t k = 4; k <= 20; k += 2) {
9735 for (uint32_t n = 1; n <= 8; n++) {
9736 for (uint32_t m = 1; m <= 6; m++) {
9737 GemmMicrokernelTester()
9738 .mr(6)
9739 .nr(8)
9740 .kr(1)
9741 .sr(1)
9742 .m(m)
9743 .n(n)
9744 .k(k)
9745 .iterations(1)
9746 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9747 }
9748 }
9749 }
9750 }
9751
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,n_gt_8)9752 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, n_gt_8) {
9753 TEST_REQUIRES_ARM_NEON;
9754 for (uint32_t n = 9; n < 16; n++) {
9755 for (size_t k = 1; k <= 10; k += 3) {
9756 GemmMicrokernelTester()
9757 .mr(6)
9758 .nr(8)
9759 .kr(1)
9760 .sr(1)
9761 .m(6)
9762 .n(n)
9763 .k(k)
9764 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9765 }
9766 }
9767 }
9768
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,n_gt_8_strided_cn)9769 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, n_gt_8_strided_cn) {
9770 TEST_REQUIRES_ARM_NEON;
9771 for (uint32_t n = 9; n < 16; n++) {
9772 for (size_t k = 1; k <= 10; k += 3) {
9773 GemmMicrokernelTester()
9774 .mr(6)
9775 .nr(8)
9776 .kr(1)
9777 .sr(1)
9778 .m(6)
9779 .n(n)
9780 .k(k)
9781 .cn_stride(11)
9782 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9783 }
9784 }
9785 }
9786
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,n_gt_8_strided_a)9787 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, n_gt_8_strided_a) {
9788 TEST_REQUIRES_ARM_NEON;
9789 for (uint32_t n = 9; n < 16; n++) {
9790 for (size_t k = 1; k <= 10; k += 3) {
9791 GemmMicrokernelTester()
9792 .mr(6)
9793 .nr(8)
9794 .kr(1)
9795 .sr(1)
9796 .m(6)
9797 .n(n)
9798 .k(k)
9799 .a_stride(13)
9800 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9801 }
9802 }
9803 }
9804
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,n_gt_8_subtile)9805 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, n_gt_8_subtile) {
9806 TEST_REQUIRES_ARM_NEON;
9807 for (uint32_t n = 9; n < 16; n++) {
9808 for (size_t k = 1; k <= 10; k += 3) {
9809 for (uint32_t m = 1; m <= 6; m++) {
9810 GemmMicrokernelTester()
9811 .mr(6)
9812 .nr(8)
9813 .kr(1)
9814 .sr(1)
9815 .m(m)
9816 .n(n)
9817 .k(k)
9818 .iterations(1)
9819 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9820 }
9821 }
9822 }
9823 }
9824
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,n_div_8)9825 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, n_div_8) {
9826 TEST_REQUIRES_ARM_NEON;
9827 for (uint32_t n = 16; n <= 24; n += 8) {
9828 for (size_t k = 1; k <= 10; k += 3) {
9829 GemmMicrokernelTester()
9830 .mr(6)
9831 .nr(8)
9832 .kr(1)
9833 .sr(1)
9834 .m(6)
9835 .n(n)
9836 .k(k)
9837 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9838 }
9839 }
9840 }
9841
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,n_div_8_strided_cn)9842 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, n_div_8_strided_cn) {
9843 TEST_REQUIRES_ARM_NEON;
9844 for (uint32_t n = 16; n <= 24; n += 8) {
9845 for (size_t k = 1; k <= 10; k += 3) {
9846 GemmMicrokernelTester()
9847 .mr(6)
9848 .nr(8)
9849 .kr(1)
9850 .sr(1)
9851 .m(6)
9852 .n(n)
9853 .k(k)
9854 .cn_stride(11)
9855 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9856 }
9857 }
9858 }
9859
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,n_div_8_strided_a)9860 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, n_div_8_strided_a) {
9861 TEST_REQUIRES_ARM_NEON;
9862 for (uint32_t n = 16; n <= 24; n += 8) {
9863 for (size_t k = 1; k <= 10; k += 3) {
9864 GemmMicrokernelTester()
9865 .mr(6)
9866 .nr(8)
9867 .kr(1)
9868 .sr(1)
9869 .m(6)
9870 .n(n)
9871 .k(k)
9872 .a_stride(13)
9873 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9874 }
9875 }
9876 }
9877
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,n_div_8_subtile)9878 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, n_div_8_subtile) {
9879 TEST_REQUIRES_ARM_NEON;
9880 for (uint32_t n = 16; n <= 24; n += 8) {
9881 for (size_t k = 1; k <= 10; k += 3) {
9882 for (uint32_t m = 1; m <= 6; m++) {
9883 GemmMicrokernelTester()
9884 .mr(6)
9885 .nr(8)
9886 .kr(1)
9887 .sr(1)
9888 .m(m)
9889 .n(n)
9890 .k(k)
9891 .iterations(1)
9892 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9893 }
9894 }
9895 }
9896 }
9897
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,strided_cm_subtile)9898 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, strided_cm_subtile) {
9899 TEST_REQUIRES_ARM_NEON;
9900 for (size_t k = 1; k <= 10; k += 3) {
9901 for (uint32_t n = 1; n <= 8; n++) {
9902 for (uint32_t m = 1; m <= 6; m++) {
9903 GemmMicrokernelTester()
9904 .mr(6)
9905 .nr(8)
9906 .kr(1)
9907 .sr(1)
9908 .m(m)
9909 .n(n)
9910 .k(k)
9911 .cm_stride(11)
9912 .iterations(1)
9913 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9914 }
9915 }
9916 }
9917 }
9918
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,qmin)9919 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, qmin) {
9920 TEST_REQUIRES_ARM_NEON;
9921 GemmMicrokernelTester()
9922 .mr(6)
9923 .nr(8)
9924 .kr(1)
9925 .sr(1)
9926 .m(6)
9927 .n(8)
9928 .k(2)
9929 .qmin(128)
9930 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9931 }
9932
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,qmax)9933 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, qmax) {
9934 TEST_REQUIRES_ARM_NEON;
9935 GemmMicrokernelTester()
9936 .mr(6)
9937 .nr(8)
9938 .kr(1)
9939 .sr(1)
9940 .m(6)
9941 .n(8)
9942 .k(2)
9943 .qmax(128)
9944 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9945 }
9946
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64,strided_cm)9947 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, strided_cm) {
9948 TEST_REQUIRES_ARM_NEON;
9949 GemmMicrokernelTester()
9950 .mr(6)
9951 .nr(8)
9952 .kr(1)
9953 .sr(1)
9954 .m(6)
9955 .n(8)
9956 .k(2)
9957 .cm_stride(11)
9958 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
9959 }
9960 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9961
9962
9963 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,k_eq_4)9964 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_eq_4) {
9965 TEST_REQUIRES_ARM_NEON;
9966 GemmMicrokernelTester()
9967 .mr(6)
9968 .nr(8)
9969 .kr(1)
9970 .sr(1)
9971 .m(6)
9972 .n(8)
9973 .k(4)
9974 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
9975 }
9976
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,strided_cn)9977 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, strided_cn) {
9978 TEST_REQUIRES_ARM_NEON;
9979 GemmMicrokernelTester()
9980 .mr(6)
9981 .nr(8)
9982 .kr(1)
9983 .sr(1)
9984 .m(6)
9985 .n(8)
9986 .k(4)
9987 .cn_stride(11)
9988 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
9989 }
9990
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,k_eq_4_strided_a)9991 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_eq_4_strided_a) {
9992 TEST_REQUIRES_ARM_NEON;
9993 GemmMicrokernelTester()
9994 .mr(6)
9995 .nr(8)
9996 .kr(1)
9997 .sr(1)
9998 .m(6)
9999 .n(8)
10000 .k(4)
10001 .a_stride(7)
10002 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10003 }
10004
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,k_eq_4_subtile)10005 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_eq_4_subtile) {
10006 TEST_REQUIRES_ARM_NEON;
10007 for (uint32_t n = 1; n <= 8; n++) {
10008 for (uint32_t m = 1; m <= 6; m++) {
10009 GemmMicrokernelTester()
10010 .mr(6)
10011 .nr(8)
10012 .kr(1)
10013 .sr(1)
10014 .m(m)
10015 .n(n)
10016 .k(4)
10017 .iterations(1)
10018 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10019 }
10020 }
10021 }
10022
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,k_eq_4_subtile_m)10023 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_eq_4_subtile_m) {
10024 TEST_REQUIRES_ARM_NEON;
10025 for (uint32_t m = 1; m <= 6; m++) {
10026 GemmMicrokernelTester()
10027 .mr(6)
10028 .nr(8)
10029 .kr(1)
10030 .sr(1)
10031 .m(m)
10032 .n(8)
10033 .k(4)
10034 .iterations(1)
10035 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10036 }
10037 }
10038
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,k_eq_4_subtile_n)10039 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_eq_4_subtile_n) {
10040 TEST_REQUIRES_ARM_NEON;
10041 for (uint32_t n = 1; n <= 8; n++) {
10042 GemmMicrokernelTester()
10043 .mr(6)
10044 .nr(8)
10045 .kr(1)
10046 .sr(1)
10047 .m(6)
10048 .n(n)
10049 .k(4)
10050 .iterations(1)
10051 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10052 }
10053 }
10054
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,k_lt_4)10055 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_lt_4) {
10056 TEST_REQUIRES_ARM_NEON;
10057 for (size_t k = 1; k < 4; k++) {
10058 GemmMicrokernelTester()
10059 .mr(6)
10060 .nr(8)
10061 .kr(1)
10062 .sr(1)
10063 .m(6)
10064 .n(8)
10065 .k(k)
10066 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10067 }
10068 }
10069
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,k_lt_4_strided_a)10070 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_lt_4_strided_a) {
10071 TEST_REQUIRES_ARM_NEON;
10072 for (size_t k = 1; k < 4; k++) {
10073 GemmMicrokernelTester()
10074 .mr(6)
10075 .nr(8)
10076 .kr(1)
10077 .sr(1)
10078 .m(6)
10079 .n(8)
10080 .k(k)
10081 .a_stride(7)
10082 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10083 }
10084 }
10085
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,k_lt_4_subtile)10086 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_lt_4_subtile) {
10087 TEST_REQUIRES_ARM_NEON;
10088 for (size_t k = 1; k < 4; k++) {
10089 for (uint32_t n = 1; n <= 8; n++) {
10090 for (uint32_t m = 1; m <= 6; m++) {
10091 GemmMicrokernelTester()
10092 .mr(6)
10093 .nr(8)
10094 .kr(1)
10095 .sr(1)
10096 .m(m)
10097 .n(n)
10098 .k(k)
10099 .iterations(1)
10100 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10101 }
10102 }
10103 }
10104 }
10105
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,k_gt_4)10106 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_gt_4) {
10107 TEST_REQUIRES_ARM_NEON;
10108 for (size_t k = 5; k < 8; k++) {
10109 GemmMicrokernelTester()
10110 .mr(6)
10111 .nr(8)
10112 .kr(1)
10113 .sr(1)
10114 .m(6)
10115 .n(8)
10116 .k(k)
10117 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10118 }
10119 }
10120
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,k_gt_4_strided_a)10121 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_gt_4_strided_a) {
10122 TEST_REQUIRES_ARM_NEON;
10123 for (size_t k = 5; k < 8; k++) {
10124 GemmMicrokernelTester()
10125 .mr(6)
10126 .nr(8)
10127 .kr(1)
10128 .sr(1)
10129 .m(6)
10130 .n(8)
10131 .k(k)
10132 .a_stride(11)
10133 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10134 }
10135 }
10136
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,k_gt_4_subtile)10137 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_gt_4_subtile) {
10138 TEST_REQUIRES_ARM_NEON;
10139 for (size_t k = 5; k < 8; k++) {
10140 for (uint32_t n = 1; n <= 8; n++) {
10141 for (uint32_t m = 1; m <= 6; m++) {
10142 GemmMicrokernelTester()
10143 .mr(6)
10144 .nr(8)
10145 .kr(1)
10146 .sr(1)
10147 .m(m)
10148 .n(n)
10149 .k(k)
10150 .iterations(1)
10151 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10152 }
10153 }
10154 }
10155 }
10156
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,k_div_4)10157 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_div_4) {
10158 TEST_REQUIRES_ARM_NEON;
10159 for (size_t k = 8; k <= 40; k += 4) {
10160 GemmMicrokernelTester()
10161 .mr(6)
10162 .nr(8)
10163 .kr(1)
10164 .sr(1)
10165 .m(6)
10166 .n(8)
10167 .k(k)
10168 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10169 }
10170 }
10171
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,k_div_4_strided_a)10172 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_div_4_strided_a) {
10173 TEST_REQUIRES_ARM_NEON;
10174 for (size_t k = 8; k <= 40; k += 4) {
10175 GemmMicrokernelTester()
10176 .mr(6)
10177 .nr(8)
10178 .kr(1)
10179 .sr(1)
10180 .m(6)
10181 .n(8)
10182 .k(k)
10183 .a_stride(43)
10184 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10185 }
10186 }
10187
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,k_div_4_subtile)10188 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_div_4_subtile) {
10189 TEST_REQUIRES_ARM_NEON;
10190 for (size_t k = 8; k <= 40; k += 4) {
10191 for (uint32_t n = 1; n <= 8; n++) {
10192 for (uint32_t m = 1; m <= 6; m++) {
10193 GemmMicrokernelTester()
10194 .mr(6)
10195 .nr(8)
10196 .kr(1)
10197 .sr(1)
10198 .m(m)
10199 .n(n)
10200 .k(k)
10201 .iterations(1)
10202 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10203 }
10204 }
10205 }
10206 }
10207
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,n_gt_8)10208 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, n_gt_8) {
10209 TEST_REQUIRES_ARM_NEON;
10210 for (uint32_t n = 9; n < 16; n++) {
10211 for (size_t k = 1; k <= 20; k += 5) {
10212 GemmMicrokernelTester()
10213 .mr(6)
10214 .nr(8)
10215 .kr(1)
10216 .sr(1)
10217 .m(6)
10218 .n(n)
10219 .k(k)
10220 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10221 }
10222 }
10223 }
10224
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,n_gt_8_strided_cn)10225 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, n_gt_8_strided_cn) {
10226 TEST_REQUIRES_ARM_NEON;
10227 for (uint32_t n = 9; n < 16; n++) {
10228 for (size_t k = 1; k <= 20; k += 5) {
10229 GemmMicrokernelTester()
10230 .mr(6)
10231 .nr(8)
10232 .kr(1)
10233 .sr(1)
10234 .m(6)
10235 .n(n)
10236 .k(k)
10237 .cn_stride(11)
10238 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10239 }
10240 }
10241 }
10242
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,n_gt_8_strided_a)10243 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, n_gt_8_strided_a) {
10244 TEST_REQUIRES_ARM_NEON;
10245 for (uint32_t n = 9; n < 16; n++) {
10246 for (size_t k = 1; k <= 20; k += 5) {
10247 GemmMicrokernelTester()
10248 .mr(6)
10249 .nr(8)
10250 .kr(1)
10251 .sr(1)
10252 .m(6)
10253 .n(n)
10254 .k(k)
10255 .a_stride(23)
10256 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10257 }
10258 }
10259 }
10260
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,n_gt_8_subtile)10261 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, n_gt_8_subtile) {
10262 TEST_REQUIRES_ARM_NEON;
10263 for (uint32_t n = 9; n < 16; n++) {
10264 for (size_t k = 1; k <= 20; k += 5) {
10265 for (uint32_t m = 1; m <= 6; m++) {
10266 GemmMicrokernelTester()
10267 .mr(6)
10268 .nr(8)
10269 .kr(1)
10270 .sr(1)
10271 .m(m)
10272 .n(n)
10273 .k(k)
10274 .iterations(1)
10275 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10276 }
10277 }
10278 }
10279 }
10280
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,n_div_8)10281 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, n_div_8) {
10282 TEST_REQUIRES_ARM_NEON;
10283 for (uint32_t n = 16; n <= 24; n += 8) {
10284 for (size_t k = 1; k <= 20; k += 5) {
10285 GemmMicrokernelTester()
10286 .mr(6)
10287 .nr(8)
10288 .kr(1)
10289 .sr(1)
10290 .m(6)
10291 .n(n)
10292 .k(k)
10293 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10294 }
10295 }
10296 }
10297
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,n_div_8_strided_cn)10298 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, n_div_8_strided_cn) {
10299 TEST_REQUIRES_ARM_NEON;
10300 for (uint32_t n = 16; n <= 24; n += 8) {
10301 for (size_t k = 1; k <= 20; k += 5) {
10302 GemmMicrokernelTester()
10303 .mr(6)
10304 .nr(8)
10305 .kr(1)
10306 .sr(1)
10307 .m(6)
10308 .n(n)
10309 .k(k)
10310 .cn_stride(11)
10311 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10312 }
10313 }
10314 }
10315
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,n_div_8_strided_a)10316 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, n_div_8_strided_a) {
10317 TEST_REQUIRES_ARM_NEON;
10318 for (uint32_t n = 16; n <= 24; n += 8) {
10319 for (size_t k = 1; k <= 20; k += 5) {
10320 GemmMicrokernelTester()
10321 .mr(6)
10322 .nr(8)
10323 .kr(1)
10324 .sr(1)
10325 .m(6)
10326 .n(n)
10327 .k(k)
10328 .a_stride(23)
10329 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10330 }
10331 }
10332 }
10333
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,n_div_8_subtile)10334 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, n_div_8_subtile) {
10335 TEST_REQUIRES_ARM_NEON;
10336 for (uint32_t n = 16; n <= 24; n += 8) {
10337 for (size_t k = 1; k <= 20; k += 5) {
10338 for (uint32_t m = 1; m <= 6; m++) {
10339 GemmMicrokernelTester()
10340 .mr(6)
10341 .nr(8)
10342 .kr(1)
10343 .sr(1)
10344 .m(m)
10345 .n(n)
10346 .k(k)
10347 .iterations(1)
10348 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10349 }
10350 }
10351 }
10352 }
10353
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,strided_cm_subtile)10354 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, strided_cm_subtile) {
10355 TEST_REQUIRES_ARM_NEON;
10356 for (size_t k = 1; k <= 20; k += 5) {
10357 for (uint32_t n = 1; n <= 8; n++) {
10358 for (uint32_t m = 1; m <= 6; m++) {
10359 GemmMicrokernelTester()
10360 .mr(6)
10361 .nr(8)
10362 .kr(1)
10363 .sr(1)
10364 .m(m)
10365 .n(n)
10366 .k(k)
10367 .cm_stride(11)
10368 .iterations(1)
10369 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10370 }
10371 }
10372 }
10373 }
10374
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,qmin)10375 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, qmin) {
10376 TEST_REQUIRES_ARM_NEON;
10377 GemmMicrokernelTester()
10378 .mr(6)
10379 .nr(8)
10380 .kr(1)
10381 .sr(1)
10382 .m(6)
10383 .n(8)
10384 .k(4)
10385 .qmin(128)
10386 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10387 }
10388
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,qmax)10389 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, qmax) {
10390 TEST_REQUIRES_ARM_NEON;
10391 GemmMicrokernelTester()
10392 .mr(6)
10393 .nr(8)
10394 .kr(1)
10395 .sr(1)
10396 .m(6)
10397 .n(8)
10398 .k(4)
10399 .qmax(128)
10400 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10401 }
10402
TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128,strided_cm)10403 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, strided_cm) {
10404 TEST_REQUIRES_ARM_NEON;
10405 GemmMicrokernelTester()
10406 .mr(6)
10407 .nr(8)
10408 .kr(1)
10409 .sr(1)
10410 .m(6)
10411 .n(8)
10412 .k(4)
10413 .cm_stride(11)
10414 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10415 }
10416 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10417
10418
10419 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,k_eq_2)10420 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_eq_2) {
10421 TEST_REQUIRES_ARM_NEON;
10422 GemmMicrokernelTester()
10423 .mr(6)
10424 .nr(8)
10425 .kr(1)
10426 .sr(1)
10427 .m(6)
10428 .n(8)
10429 .k(2)
10430 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10431 }
10432
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,strided_cn)10433 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, strided_cn) {
10434 TEST_REQUIRES_ARM_NEON;
10435 GemmMicrokernelTester()
10436 .mr(6)
10437 .nr(8)
10438 .kr(1)
10439 .sr(1)
10440 .m(6)
10441 .n(8)
10442 .k(2)
10443 .cn_stride(11)
10444 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10445 }
10446
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,k_eq_2_strided_a)10447 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_eq_2_strided_a) {
10448 TEST_REQUIRES_ARM_NEON;
10449 GemmMicrokernelTester()
10450 .mr(6)
10451 .nr(8)
10452 .kr(1)
10453 .sr(1)
10454 .m(6)
10455 .n(8)
10456 .k(2)
10457 .a_stride(5)
10458 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10459 }
10460
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,k_eq_2_subtile)10461 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_eq_2_subtile) {
10462 TEST_REQUIRES_ARM_NEON;
10463 for (uint32_t n = 1; n <= 8; n++) {
10464 for (uint32_t m = 1; m <= 6; m++) {
10465 GemmMicrokernelTester()
10466 .mr(6)
10467 .nr(8)
10468 .kr(1)
10469 .sr(1)
10470 .m(m)
10471 .n(n)
10472 .k(2)
10473 .iterations(1)
10474 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10475 }
10476 }
10477 }
10478
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,k_eq_2_subtile_m)10479 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_eq_2_subtile_m) {
10480 TEST_REQUIRES_ARM_NEON;
10481 for (uint32_t m = 1; m <= 6; m++) {
10482 GemmMicrokernelTester()
10483 .mr(6)
10484 .nr(8)
10485 .kr(1)
10486 .sr(1)
10487 .m(m)
10488 .n(8)
10489 .k(2)
10490 .iterations(1)
10491 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10492 }
10493 }
10494
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,k_eq_2_subtile_n)10495 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_eq_2_subtile_n) {
10496 TEST_REQUIRES_ARM_NEON;
10497 for (uint32_t n = 1; n <= 8; n++) {
10498 GemmMicrokernelTester()
10499 .mr(6)
10500 .nr(8)
10501 .kr(1)
10502 .sr(1)
10503 .m(6)
10504 .n(n)
10505 .k(2)
10506 .iterations(1)
10507 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10508 }
10509 }
10510
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,k_lt_2)10511 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_lt_2) {
10512 TEST_REQUIRES_ARM_NEON;
10513 for (size_t k = 1; k < 2; k++) {
10514 GemmMicrokernelTester()
10515 .mr(6)
10516 .nr(8)
10517 .kr(1)
10518 .sr(1)
10519 .m(6)
10520 .n(8)
10521 .k(k)
10522 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10523 }
10524 }
10525
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,k_lt_2_strided_a)10526 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_lt_2_strided_a) {
10527 TEST_REQUIRES_ARM_NEON;
10528 for (size_t k = 1; k < 2; k++) {
10529 GemmMicrokernelTester()
10530 .mr(6)
10531 .nr(8)
10532 .kr(1)
10533 .sr(1)
10534 .m(6)
10535 .n(8)
10536 .k(k)
10537 .a_stride(5)
10538 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10539 }
10540 }
10541
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,k_lt_2_subtile)10542 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_lt_2_subtile) {
10543 TEST_REQUIRES_ARM_NEON;
10544 for (size_t k = 1; k < 2; k++) {
10545 for (uint32_t n = 1; n <= 8; n++) {
10546 for (uint32_t m = 1; m <= 6; m++) {
10547 GemmMicrokernelTester()
10548 .mr(6)
10549 .nr(8)
10550 .kr(1)
10551 .sr(1)
10552 .m(m)
10553 .n(n)
10554 .k(k)
10555 .iterations(1)
10556 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10557 }
10558 }
10559 }
10560 }
10561
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,k_gt_2)10562 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_gt_2) {
10563 TEST_REQUIRES_ARM_NEON;
10564 for (size_t k = 3; k < 4; k++) {
10565 GemmMicrokernelTester()
10566 .mr(6)
10567 .nr(8)
10568 .kr(1)
10569 .sr(1)
10570 .m(6)
10571 .n(8)
10572 .k(k)
10573 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10574 }
10575 }
10576
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,k_gt_2_strided_a)10577 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_gt_2_strided_a) {
10578 TEST_REQUIRES_ARM_NEON;
10579 for (size_t k = 3; k < 4; k++) {
10580 GemmMicrokernelTester()
10581 .mr(6)
10582 .nr(8)
10583 .kr(1)
10584 .sr(1)
10585 .m(6)
10586 .n(8)
10587 .k(k)
10588 .a_stride(7)
10589 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10590 }
10591 }
10592
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,k_gt_2_subtile)10593 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_gt_2_subtile) {
10594 TEST_REQUIRES_ARM_NEON;
10595 for (size_t k = 3; k < 4; k++) {
10596 for (uint32_t n = 1; n <= 8; n++) {
10597 for (uint32_t m = 1; m <= 6; m++) {
10598 GemmMicrokernelTester()
10599 .mr(6)
10600 .nr(8)
10601 .kr(1)
10602 .sr(1)
10603 .m(m)
10604 .n(n)
10605 .k(k)
10606 .iterations(1)
10607 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10608 }
10609 }
10610 }
10611 }
10612
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,k_div_2)10613 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_div_2) {
10614 TEST_REQUIRES_ARM_NEON;
10615 for (size_t k = 4; k <= 20; k += 2) {
10616 GemmMicrokernelTester()
10617 .mr(6)
10618 .nr(8)
10619 .kr(1)
10620 .sr(1)
10621 .m(6)
10622 .n(8)
10623 .k(k)
10624 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10625 }
10626 }
10627
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,k_div_2_strided_a)10628 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_div_2_strided_a) {
10629 TEST_REQUIRES_ARM_NEON;
10630 for (size_t k = 4; k <= 20; k += 2) {
10631 GemmMicrokernelTester()
10632 .mr(6)
10633 .nr(8)
10634 .kr(1)
10635 .sr(1)
10636 .m(6)
10637 .n(8)
10638 .k(k)
10639 .a_stride(23)
10640 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10641 }
10642 }
10643
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,k_div_2_subtile)10644 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_div_2_subtile) {
10645 TEST_REQUIRES_ARM_NEON;
10646 for (size_t k = 4; k <= 20; k += 2) {
10647 for (uint32_t n = 1; n <= 8; n++) {
10648 for (uint32_t m = 1; m <= 6; m++) {
10649 GemmMicrokernelTester()
10650 .mr(6)
10651 .nr(8)
10652 .kr(1)
10653 .sr(1)
10654 .m(m)
10655 .n(n)
10656 .k(k)
10657 .iterations(1)
10658 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10659 }
10660 }
10661 }
10662 }
10663
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,n_gt_8)10664 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, n_gt_8) {
10665 TEST_REQUIRES_ARM_NEON;
10666 for (uint32_t n = 9; n < 16; n++) {
10667 for (size_t k = 1; k <= 10; k += 3) {
10668 GemmMicrokernelTester()
10669 .mr(6)
10670 .nr(8)
10671 .kr(1)
10672 .sr(1)
10673 .m(6)
10674 .n(n)
10675 .k(k)
10676 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10677 }
10678 }
10679 }
10680
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,n_gt_8_strided_cn)10681 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, n_gt_8_strided_cn) {
10682 TEST_REQUIRES_ARM_NEON;
10683 for (uint32_t n = 9; n < 16; n++) {
10684 for (size_t k = 1; k <= 10; k += 3) {
10685 GemmMicrokernelTester()
10686 .mr(6)
10687 .nr(8)
10688 .kr(1)
10689 .sr(1)
10690 .m(6)
10691 .n(n)
10692 .k(k)
10693 .cn_stride(11)
10694 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10695 }
10696 }
10697 }
10698
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,n_gt_8_strided_a)10699 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, n_gt_8_strided_a) {
10700 TEST_REQUIRES_ARM_NEON;
10701 for (uint32_t n = 9; n < 16; n++) {
10702 for (size_t k = 1; k <= 10; k += 3) {
10703 GemmMicrokernelTester()
10704 .mr(6)
10705 .nr(8)
10706 .kr(1)
10707 .sr(1)
10708 .m(6)
10709 .n(n)
10710 .k(k)
10711 .a_stride(13)
10712 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10713 }
10714 }
10715 }
10716
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,n_gt_8_subtile)10717 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, n_gt_8_subtile) {
10718 TEST_REQUIRES_ARM_NEON;
10719 for (uint32_t n = 9; n < 16; n++) {
10720 for (size_t k = 1; k <= 10; k += 3) {
10721 for (uint32_t m = 1; m <= 6; m++) {
10722 GemmMicrokernelTester()
10723 .mr(6)
10724 .nr(8)
10725 .kr(1)
10726 .sr(1)
10727 .m(m)
10728 .n(n)
10729 .k(k)
10730 .iterations(1)
10731 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10732 }
10733 }
10734 }
10735 }
10736
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,n_div_8)10737 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, n_div_8) {
10738 TEST_REQUIRES_ARM_NEON;
10739 for (uint32_t n = 16; n <= 24; n += 8) {
10740 for (size_t k = 1; k <= 10; k += 3) {
10741 GemmMicrokernelTester()
10742 .mr(6)
10743 .nr(8)
10744 .kr(1)
10745 .sr(1)
10746 .m(6)
10747 .n(n)
10748 .k(k)
10749 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10750 }
10751 }
10752 }
10753
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,n_div_8_strided_cn)10754 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, n_div_8_strided_cn) {
10755 TEST_REQUIRES_ARM_NEON;
10756 for (uint32_t n = 16; n <= 24; n += 8) {
10757 for (size_t k = 1; k <= 10; k += 3) {
10758 GemmMicrokernelTester()
10759 .mr(6)
10760 .nr(8)
10761 .kr(1)
10762 .sr(1)
10763 .m(6)
10764 .n(n)
10765 .k(k)
10766 .cn_stride(11)
10767 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10768 }
10769 }
10770 }
10771
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,n_div_8_strided_a)10772 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, n_div_8_strided_a) {
10773 TEST_REQUIRES_ARM_NEON;
10774 for (uint32_t n = 16; n <= 24; n += 8) {
10775 for (size_t k = 1; k <= 10; k += 3) {
10776 GemmMicrokernelTester()
10777 .mr(6)
10778 .nr(8)
10779 .kr(1)
10780 .sr(1)
10781 .m(6)
10782 .n(n)
10783 .k(k)
10784 .a_stride(13)
10785 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10786 }
10787 }
10788 }
10789
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,n_div_8_subtile)10790 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, n_div_8_subtile) {
10791 TEST_REQUIRES_ARM_NEON;
10792 for (uint32_t n = 16; n <= 24; n += 8) {
10793 for (size_t k = 1; k <= 10; k += 3) {
10794 for (uint32_t m = 1; m <= 6; m++) {
10795 GemmMicrokernelTester()
10796 .mr(6)
10797 .nr(8)
10798 .kr(1)
10799 .sr(1)
10800 .m(m)
10801 .n(n)
10802 .k(k)
10803 .iterations(1)
10804 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10805 }
10806 }
10807 }
10808 }
10809
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,strided_cm_subtile)10810 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, strided_cm_subtile) {
10811 TEST_REQUIRES_ARM_NEON;
10812 for (size_t k = 1; k <= 10; k += 3) {
10813 for (uint32_t n = 1; n <= 8; n++) {
10814 for (uint32_t m = 1; m <= 6; m++) {
10815 GemmMicrokernelTester()
10816 .mr(6)
10817 .nr(8)
10818 .kr(1)
10819 .sr(1)
10820 .m(m)
10821 .n(n)
10822 .k(k)
10823 .cm_stride(11)
10824 .iterations(1)
10825 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10826 }
10827 }
10828 }
10829 }
10830
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,qmin)10831 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, qmin) {
10832 TEST_REQUIRES_ARM_NEON;
10833 GemmMicrokernelTester()
10834 .mr(6)
10835 .nr(8)
10836 .kr(1)
10837 .sr(1)
10838 .m(6)
10839 .n(8)
10840 .k(2)
10841 .qmin(128)
10842 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10843 }
10844
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,qmax)10845 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, qmax) {
10846 TEST_REQUIRES_ARM_NEON;
10847 GemmMicrokernelTester()
10848 .mr(6)
10849 .nr(8)
10850 .kr(1)
10851 .sr(1)
10852 .m(6)
10853 .n(8)
10854 .k(2)
10855 .qmax(128)
10856 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10857 }
10858
TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64,strided_cm)10859 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, strided_cm) {
10860 TEST_REQUIRES_ARM_NEON;
10861 GemmMicrokernelTester()
10862 .mr(6)
10863 .nr(8)
10864 .kr(1)
10865 .sr(1)
10866 .m(6)
10867 .n(8)
10868 .k(2)
10869 .cm_stride(11)
10870 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10871 }
10872 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10873
10874
10875 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,k_eq_4)10876 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_eq_4) {
10877 TEST_REQUIRES_ARM_NEON_FMA;
10878 GemmMicrokernelTester()
10879 .mr(6)
10880 .nr(8)
10881 .kr(1)
10882 .sr(1)
10883 .m(6)
10884 .n(8)
10885 .k(4)
10886 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
10887 }
10888
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,strided_cn)10889 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, strided_cn) {
10890 TEST_REQUIRES_ARM_NEON_FMA;
10891 GemmMicrokernelTester()
10892 .mr(6)
10893 .nr(8)
10894 .kr(1)
10895 .sr(1)
10896 .m(6)
10897 .n(8)
10898 .k(4)
10899 .cn_stride(11)
10900 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
10901 }
10902
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,k_eq_4_strided_a)10903 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_eq_4_strided_a) {
10904 TEST_REQUIRES_ARM_NEON_FMA;
10905 GemmMicrokernelTester()
10906 .mr(6)
10907 .nr(8)
10908 .kr(1)
10909 .sr(1)
10910 .m(6)
10911 .n(8)
10912 .k(4)
10913 .a_stride(7)
10914 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
10915 }
10916
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,k_eq_4_subtile)10917 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_eq_4_subtile) {
10918 TEST_REQUIRES_ARM_NEON_FMA;
10919 for (uint32_t n = 1; n <= 8; n++) {
10920 for (uint32_t m = 1; m <= 6; m++) {
10921 GemmMicrokernelTester()
10922 .mr(6)
10923 .nr(8)
10924 .kr(1)
10925 .sr(1)
10926 .m(m)
10927 .n(n)
10928 .k(4)
10929 .iterations(1)
10930 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
10931 }
10932 }
10933 }
10934
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,k_eq_4_subtile_m)10935 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_eq_4_subtile_m) {
10936 TEST_REQUIRES_ARM_NEON_FMA;
10937 for (uint32_t m = 1; m <= 6; m++) {
10938 GemmMicrokernelTester()
10939 .mr(6)
10940 .nr(8)
10941 .kr(1)
10942 .sr(1)
10943 .m(m)
10944 .n(8)
10945 .k(4)
10946 .iterations(1)
10947 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
10948 }
10949 }
10950
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,k_eq_4_subtile_n)10951 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_eq_4_subtile_n) {
10952 TEST_REQUIRES_ARM_NEON_FMA;
10953 for (uint32_t n = 1; n <= 8; n++) {
10954 GemmMicrokernelTester()
10955 .mr(6)
10956 .nr(8)
10957 .kr(1)
10958 .sr(1)
10959 .m(6)
10960 .n(n)
10961 .k(4)
10962 .iterations(1)
10963 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
10964 }
10965 }
10966
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,k_lt_4)10967 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_lt_4) {
10968 TEST_REQUIRES_ARM_NEON_FMA;
10969 for (size_t k = 1; k < 4; k++) {
10970 GemmMicrokernelTester()
10971 .mr(6)
10972 .nr(8)
10973 .kr(1)
10974 .sr(1)
10975 .m(6)
10976 .n(8)
10977 .k(k)
10978 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
10979 }
10980 }
10981
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,k_lt_4_strided_a)10982 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_lt_4_strided_a) {
10983 TEST_REQUIRES_ARM_NEON_FMA;
10984 for (size_t k = 1; k < 4; k++) {
10985 GemmMicrokernelTester()
10986 .mr(6)
10987 .nr(8)
10988 .kr(1)
10989 .sr(1)
10990 .m(6)
10991 .n(8)
10992 .k(k)
10993 .a_stride(7)
10994 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
10995 }
10996 }
10997
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,k_lt_4_subtile)10998 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_lt_4_subtile) {
10999 TEST_REQUIRES_ARM_NEON_FMA;
11000 for (size_t k = 1; k < 4; k++) {
11001 for (uint32_t n = 1; n <= 8; n++) {
11002 for (uint32_t m = 1; m <= 6; m++) {
11003 GemmMicrokernelTester()
11004 .mr(6)
11005 .nr(8)
11006 .kr(1)
11007 .sr(1)
11008 .m(m)
11009 .n(n)
11010 .k(k)
11011 .iterations(1)
11012 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11013 }
11014 }
11015 }
11016 }
11017
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,k_gt_4)11018 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_gt_4) {
11019 TEST_REQUIRES_ARM_NEON_FMA;
11020 for (size_t k = 5; k < 8; k++) {
11021 GemmMicrokernelTester()
11022 .mr(6)
11023 .nr(8)
11024 .kr(1)
11025 .sr(1)
11026 .m(6)
11027 .n(8)
11028 .k(k)
11029 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11030 }
11031 }
11032
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,k_gt_4_strided_a)11033 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_gt_4_strided_a) {
11034 TEST_REQUIRES_ARM_NEON_FMA;
11035 for (size_t k = 5; k < 8; k++) {
11036 GemmMicrokernelTester()
11037 .mr(6)
11038 .nr(8)
11039 .kr(1)
11040 .sr(1)
11041 .m(6)
11042 .n(8)
11043 .k(k)
11044 .a_stride(11)
11045 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11046 }
11047 }
11048
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,k_gt_4_subtile)11049 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_gt_4_subtile) {
11050 TEST_REQUIRES_ARM_NEON_FMA;
11051 for (size_t k = 5; k < 8; k++) {
11052 for (uint32_t n = 1; n <= 8; n++) {
11053 for (uint32_t m = 1; m <= 6; m++) {
11054 GemmMicrokernelTester()
11055 .mr(6)
11056 .nr(8)
11057 .kr(1)
11058 .sr(1)
11059 .m(m)
11060 .n(n)
11061 .k(k)
11062 .iterations(1)
11063 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11064 }
11065 }
11066 }
11067 }
11068
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,k_div_4)11069 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_div_4) {
11070 TEST_REQUIRES_ARM_NEON_FMA;
11071 for (size_t k = 8; k <= 40; k += 4) {
11072 GemmMicrokernelTester()
11073 .mr(6)
11074 .nr(8)
11075 .kr(1)
11076 .sr(1)
11077 .m(6)
11078 .n(8)
11079 .k(k)
11080 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11081 }
11082 }
11083
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,k_div_4_strided_a)11084 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_div_4_strided_a) {
11085 TEST_REQUIRES_ARM_NEON_FMA;
11086 for (size_t k = 8; k <= 40; k += 4) {
11087 GemmMicrokernelTester()
11088 .mr(6)
11089 .nr(8)
11090 .kr(1)
11091 .sr(1)
11092 .m(6)
11093 .n(8)
11094 .k(k)
11095 .a_stride(43)
11096 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11097 }
11098 }
11099
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,k_div_4_subtile)11100 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_div_4_subtile) {
11101 TEST_REQUIRES_ARM_NEON_FMA;
11102 for (size_t k = 8; k <= 40; k += 4) {
11103 for (uint32_t n = 1; n <= 8; n++) {
11104 for (uint32_t m = 1; m <= 6; m++) {
11105 GemmMicrokernelTester()
11106 .mr(6)
11107 .nr(8)
11108 .kr(1)
11109 .sr(1)
11110 .m(m)
11111 .n(n)
11112 .k(k)
11113 .iterations(1)
11114 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11115 }
11116 }
11117 }
11118 }
11119
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,n_gt_8)11120 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, n_gt_8) {
11121 TEST_REQUIRES_ARM_NEON_FMA;
11122 for (uint32_t n = 9; n < 16; n++) {
11123 for (size_t k = 1; k <= 20; k += 5) {
11124 GemmMicrokernelTester()
11125 .mr(6)
11126 .nr(8)
11127 .kr(1)
11128 .sr(1)
11129 .m(6)
11130 .n(n)
11131 .k(k)
11132 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11133 }
11134 }
11135 }
11136
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,n_gt_8_strided_cn)11137 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, n_gt_8_strided_cn) {
11138 TEST_REQUIRES_ARM_NEON_FMA;
11139 for (uint32_t n = 9; n < 16; n++) {
11140 for (size_t k = 1; k <= 20; k += 5) {
11141 GemmMicrokernelTester()
11142 .mr(6)
11143 .nr(8)
11144 .kr(1)
11145 .sr(1)
11146 .m(6)
11147 .n(n)
11148 .k(k)
11149 .cn_stride(11)
11150 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11151 }
11152 }
11153 }
11154
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,n_gt_8_strided_a)11155 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, n_gt_8_strided_a) {
11156 TEST_REQUIRES_ARM_NEON_FMA;
11157 for (uint32_t n = 9; n < 16; n++) {
11158 for (size_t k = 1; k <= 20; k += 5) {
11159 GemmMicrokernelTester()
11160 .mr(6)
11161 .nr(8)
11162 .kr(1)
11163 .sr(1)
11164 .m(6)
11165 .n(n)
11166 .k(k)
11167 .a_stride(23)
11168 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11169 }
11170 }
11171 }
11172
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,n_gt_8_subtile)11173 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, n_gt_8_subtile) {
11174 TEST_REQUIRES_ARM_NEON_FMA;
11175 for (uint32_t n = 9; n < 16; n++) {
11176 for (size_t k = 1; k <= 20; k += 5) {
11177 for (uint32_t m = 1; m <= 6; m++) {
11178 GemmMicrokernelTester()
11179 .mr(6)
11180 .nr(8)
11181 .kr(1)
11182 .sr(1)
11183 .m(m)
11184 .n(n)
11185 .k(k)
11186 .iterations(1)
11187 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11188 }
11189 }
11190 }
11191 }
11192
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,n_div_8)11193 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, n_div_8) {
11194 TEST_REQUIRES_ARM_NEON_FMA;
11195 for (uint32_t n = 16; n <= 24; n += 8) {
11196 for (size_t k = 1; k <= 20; k += 5) {
11197 GemmMicrokernelTester()
11198 .mr(6)
11199 .nr(8)
11200 .kr(1)
11201 .sr(1)
11202 .m(6)
11203 .n(n)
11204 .k(k)
11205 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11206 }
11207 }
11208 }
11209
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,n_div_8_strided_cn)11210 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, n_div_8_strided_cn) {
11211 TEST_REQUIRES_ARM_NEON_FMA;
11212 for (uint32_t n = 16; n <= 24; n += 8) {
11213 for (size_t k = 1; k <= 20; k += 5) {
11214 GemmMicrokernelTester()
11215 .mr(6)
11216 .nr(8)
11217 .kr(1)
11218 .sr(1)
11219 .m(6)
11220 .n(n)
11221 .k(k)
11222 .cn_stride(11)
11223 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11224 }
11225 }
11226 }
11227
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,n_div_8_strided_a)11228 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, n_div_8_strided_a) {
11229 TEST_REQUIRES_ARM_NEON_FMA;
11230 for (uint32_t n = 16; n <= 24; n += 8) {
11231 for (size_t k = 1; k <= 20; k += 5) {
11232 GemmMicrokernelTester()
11233 .mr(6)
11234 .nr(8)
11235 .kr(1)
11236 .sr(1)
11237 .m(6)
11238 .n(n)
11239 .k(k)
11240 .a_stride(23)
11241 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11242 }
11243 }
11244 }
11245
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,n_div_8_subtile)11246 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, n_div_8_subtile) {
11247 TEST_REQUIRES_ARM_NEON_FMA;
11248 for (uint32_t n = 16; n <= 24; n += 8) {
11249 for (size_t k = 1; k <= 20; k += 5) {
11250 for (uint32_t m = 1; m <= 6; m++) {
11251 GemmMicrokernelTester()
11252 .mr(6)
11253 .nr(8)
11254 .kr(1)
11255 .sr(1)
11256 .m(m)
11257 .n(n)
11258 .k(k)
11259 .iterations(1)
11260 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11261 }
11262 }
11263 }
11264 }
11265
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,strided_cm_subtile)11266 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, strided_cm_subtile) {
11267 TEST_REQUIRES_ARM_NEON_FMA;
11268 for (size_t k = 1; k <= 20; k += 5) {
11269 for (uint32_t n = 1; n <= 8; n++) {
11270 for (uint32_t m = 1; m <= 6; m++) {
11271 GemmMicrokernelTester()
11272 .mr(6)
11273 .nr(8)
11274 .kr(1)
11275 .sr(1)
11276 .m(m)
11277 .n(n)
11278 .k(k)
11279 .cm_stride(11)
11280 .iterations(1)
11281 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11282 }
11283 }
11284 }
11285 }
11286
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,qmin)11287 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, qmin) {
11288 TEST_REQUIRES_ARM_NEON_FMA;
11289 GemmMicrokernelTester()
11290 .mr(6)
11291 .nr(8)
11292 .kr(1)
11293 .sr(1)
11294 .m(6)
11295 .n(8)
11296 .k(4)
11297 .qmin(128)
11298 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11299 }
11300
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,qmax)11301 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, qmax) {
11302 TEST_REQUIRES_ARM_NEON_FMA;
11303 GemmMicrokernelTester()
11304 .mr(6)
11305 .nr(8)
11306 .kr(1)
11307 .sr(1)
11308 .m(6)
11309 .n(8)
11310 .k(4)
11311 .qmax(128)
11312 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11313 }
11314
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128,strided_cm)11315 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, strided_cm) {
11316 TEST_REQUIRES_ARM_NEON_FMA;
11317 GemmMicrokernelTester()
11318 .mr(6)
11319 .nr(8)
11320 .kr(1)
11321 .sr(1)
11322 .m(6)
11323 .n(8)
11324 .k(4)
11325 .cm_stride(11)
11326 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11327 }
11328 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11329
11330
11331 #if XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,k_eq_2)11332 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_eq_2) {
11333 TEST_REQUIRES_ARM_NEON_FMA;
11334 GemmMicrokernelTester()
11335 .mr(6)
11336 .nr(8)
11337 .kr(1)
11338 .sr(1)
11339 .m(6)
11340 .n(8)
11341 .k(2)
11342 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11343 }
11344
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,strided_cn)11345 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, strided_cn) {
11346 TEST_REQUIRES_ARM_NEON_FMA;
11347 GemmMicrokernelTester()
11348 .mr(6)
11349 .nr(8)
11350 .kr(1)
11351 .sr(1)
11352 .m(6)
11353 .n(8)
11354 .k(2)
11355 .cn_stride(11)
11356 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11357 }
11358
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,k_eq_2_strided_a)11359 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_eq_2_strided_a) {
11360 TEST_REQUIRES_ARM_NEON_FMA;
11361 GemmMicrokernelTester()
11362 .mr(6)
11363 .nr(8)
11364 .kr(1)
11365 .sr(1)
11366 .m(6)
11367 .n(8)
11368 .k(2)
11369 .a_stride(5)
11370 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11371 }
11372
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,k_eq_2_subtile)11373 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_eq_2_subtile) {
11374 TEST_REQUIRES_ARM_NEON_FMA;
11375 for (uint32_t n = 1; n <= 8; n++) {
11376 for (uint32_t m = 1; m <= 6; m++) {
11377 GemmMicrokernelTester()
11378 .mr(6)
11379 .nr(8)
11380 .kr(1)
11381 .sr(1)
11382 .m(m)
11383 .n(n)
11384 .k(2)
11385 .iterations(1)
11386 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11387 }
11388 }
11389 }
11390
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,k_eq_2_subtile_m)11391 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_eq_2_subtile_m) {
11392 TEST_REQUIRES_ARM_NEON_FMA;
11393 for (uint32_t m = 1; m <= 6; m++) {
11394 GemmMicrokernelTester()
11395 .mr(6)
11396 .nr(8)
11397 .kr(1)
11398 .sr(1)
11399 .m(m)
11400 .n(8)
11401 .k(2)
11402 .iterations(1)
11403 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11404 }
11405 }
11406
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,k_eq_2_subtile_n)11407 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_eq_2_subtile_n) {
11408 TEST_REQUIRES_ARM_NEON_FMA;
11409 for (uint32_t n = 1; n <= 8; n++) {
11410 GemmMicrokernelTester()
11411 .mr(6)
11412 .nr(8)
11413 .kr(1)
11414 .sr(1)
11415 .m(6)
11416 .n(n)
11417 .k(2)
11418 .iterations(1)
11419 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11420 }
11421 }
11422
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,k_lt_2)11423 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_lt_2) {
11424 TEST_REQUIRES_ARM_NEON_FMA;
11425 for (size_t k = 1; k < 2; k++) {
11426 GemmMicrokernelTester()
11427 .mr(6)
11428 .nr(8)
11429 .kr(1)
11430 .sr(1)
11431 .m(6)
11432 .n(8)
11433 .k(k)
11434 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11435 }
11436 }
11437
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,k_lt_2_strided_a)11438 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_lt_2_strided_a) {
11439 TEST_REQUIRES_ARM_NEON_FMA;
11440 for (size_t k = 1; k < 2; k++) {
11441 GemmMicrokernelTester()
11442 .mr(6)
11443 .nr(8)
11444 .kr(1)
11445 .sr(1)
11446 .m(6)
11447 .n(8)
11448 .k(k)
11449 .a_stride(5)
11450 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11451 }
11452 }
11453
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,k_lt_2_subtile)11454 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_lt_2_subtile) {
11455 TEST_REQUIRES_ARM_NEON_FMA;
11456 for (size_t k = 1; k < 2; k++) {
11457 for (uint32_t n = 1; n <= 8; n++) {
11458 for (uint32_t m = 1; m <= 6; m++) {
11459 GemmMicrokernelTester()
11460 .mr(6)
11461 .nr(8)
11462 .kr(1)
11463 .sr(1)
11464 .m(m)
11465 .n(n)
11466 .k(k)
11467 .iterations(1)
11468 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11469 }
11470 }
11471 }
11472 }
11473
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,k_gt_2)11474 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_gt_2) {
11475 TEST_REQUIRES_ARM_NEON_FMA;
11476 for (size_t k = 3; k < 4; k++) {
11477 GemmMicrokernelTester()
11478 .mr(6)
11479 .nr(8)
11480 .kr(1)
11481 .sr(1)
11482 .m(6)
11483 .n(8)
11484 .k(k)
11485 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11486 }
11487 }
11488
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,k_gt_2_strided_a)11489 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_gt_2_strided_a) {
11490 TEST_REQUIRES_ARM_NEON_FMA;
11491 for (size_t k = 3; k < 4; k++) {
11492 GemmMicrokernelTester()
11493 .mr(6)
11494 .nr(8)
11495 .kr(1)
11496 .sr(1)
11497 .m(6)
11498 .n(8)
11499 .k(k)
11500 .a_stride(7)
11501 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11502 }
11503 }
11504
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,k_gt_2_subtile)11505 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_gt_2_subtile) {
11506 TEST_REQUIRES_ARM_NEON_FMA;
11507 for (size_t k = 3; k < 4; k++) {
11508 for (uint32_t n = 1; n <= 8; n++) {
11509 for (uint32_t m = 1; m <= 6; m++) {
11510 GemmMicrokernelTester()
11511 .mr(6)
11512 .nr(8)
11513 .kr(1)
11514 .sr(1)
11515 .m(m)
11516 .n(n)
11517 .k(k)
11518 .iterations(1)
11519 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11520 }
11521 }
11522 }
11523 }
11524
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,k_div_2)11525 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_div_2) {
11526 TEST_REQUIRES_ARM_NEON_FMA;
11527 for (size_t k = 4; k <= 20; k += 2) {
11528 GemmMicrokernelTester()
11529 .mr(6)
11530 .nr(8)
11531 .kr(1)
11532 .sr(1)
11533 .m(6)
11534 .n(8)
11535 .k(k)
11536 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11537 }
11538 }
11539
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,k_div_2_strided_a)11540 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_div_2_strided_a) {
11541 TEST_REQUIRES_ARM_NEON_FMA;
11542 for (size_t k = 4; k <= 20; k += 2) {
11543 GemmMicrokernelTester()
11544 .mr(6)
11545 .nr(8)
11546 .kr(1)
11547 .sr(1)
11548 .m(6)
11549 .n(8)
11550 .k(k)
11551 .a_stride(23)
11552 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11553 }
11554 }
11555
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,k_div_2_subtile)11556 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_div_2_subtile) {
11557 TEST_REQUIRES_ARM_NEON_FMA;
11558 for (size_t k = 4; k <= 20; k += 2) {
11559 for (uint32_t n = 1; n <= 8; n++) {
11560 for (uint32_t m = 1; m <= 6; m++) {
11561 GemmMicrokernelTester()
11562 .mr(6)
11563 .nr(8)
11564 .kr(1)
11565 .sr(1)
11566 .m(m)
11567 .n(n)
11568 .k(k)
11569 .iterations(1)
11570 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11571 }
11572 }
11573 }
11574 }
11575
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,n_gt_8)11576 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, n_gt_8) {
11577 TEST_REQUIRES_ARM_NEON_FMA;
11578 for (uint32_t n = 9; n < 16; n++) {
11579 for (size_t k = 1; k <= 10; k += 3) {
11580 GemmMicrokernelTester()
11581 .mr(6)
11582 .nr(8)
11583 .kr(1)
11584 .sr(1)
11585 .m(6)
11586 .n(n)
11587 .k(k)
11588 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11589 }
11590 }
11591 }
11592
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,n_gt_8_strided_cn)11593 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, n_gt_8_strided_cn) {
11594 TEST_REQUIRES_ARM_NEON_FMA;
11595 for (uint32_t n = 9; n < 16; n++) {
11596 for (size_t k = 1; k <= 10; k += 3) {
11597 GemmMicrokernelTester()
11598 .mr(6)
11599 .nr(8)
11600 .kr(1)
11601 .sr(1)
11602 .m(6)
11603 .n(n)
11604 .k(k)
11605 .cn_stride(11)
11606 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11607 }
11608 }
11609 }
11610
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,n_gt_8_strided_a)11611 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, n_gt_8_strided_a) {
11612 TEST_REQUIRES_ARM_NEON_FMA;
11613 for (uint32_t n = 9; n < 16; n++) {
11614 for (size_t k = 1; k <= 10; k += 3) {
11615 GemmMicrokernelTester()
11616 .mr(6)
11617 .nr(8)
11618 .kr(1)
11619 .sr(1)
11620 .m(6)
11621 .n(n)
11622 .k(k)
11623 .a_stride(13)
11624 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11625 }
11626 }
11627 }
11628
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,n_gt_8_subtile)11629 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, n_gt_8_subtile) {
11630 TEST_REQUIRES_ARM_NEON_FMA;
11631 for (uint32_t n = 9; n < 16; n++) {
11632 for (size_t k = 1; k <= 10; k += 3) {
11633 for (uint32_t m = 1; m <= 6; m++) {
11634 GemmMicrokernelTester()
11635 .mr(6)
11636 .nr(8)
11637 .kr(1)
11638 .sr(1)
11639 .m(m)
11640 .n(n)
11641 .k(k)
11642 .iterations(1)
11643 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11644 }
11645 }
11646 }
11647 }
11648
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,n_div_8)11649 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, n_div_8) {
11650 TEST_REQUIRES_ARM_NEON_FMA;
11651 for (uint32_t n = 16; n <= 24; n += 8) {
11652 for (size_t k = 1; k <= 10; k += 3) {
11653 GemmMicrokernelTester()
11654 .mr(6)
11655 .nr(8)
11656 .kr(1)
11657 .sr(1)
11658 .m(6)
11659 .n(n)
11660 .k(k)
11661 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11662 }
11663 }
11664 }
11665
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,n_div_8_strided_cn)11666 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, n_div_8_strided_cn) {
11667 TEST_REQUIRES_ARM_NEON_FMA;
11668 for (uint32_t n = 16; n <= 24; n += 8) {
11669 for (size_t k = 1; k <= 10; k += 3) {
11670 GemmMicrokernelTester()
11671 .mr(6)
11672 .nr(8)
11673 .kr(1)
11674 .sr(1)
11675 .m(6)
11676 .n(n)
11677 .k(k)
11678 .cn_stride(11)
11679 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11680 }
11681 }
11682 }
11683
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,n_div_8_strided_a)11684 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, n_div_8_strided_a) {
11685 TEST_REQUIRES_ARM_NEON_FMA;
11686 for (uint32_t n = 16; n <= 24; n += 8) {
11687 for (size_t k = 1; k <= 10; k += 3) {
11688 GemmMicrokernelTester()
11689 .mr(6)
11690 .nr(8)
11691 .kr(1)
11692 .sr(1)
11693 .m(6)
11694 .n(n)
11695 .k(k)
11696 .a_stride(13)
11697 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11698 }
11699 }
11700 }
11701
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,n_div_8_subtile)11702 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, n_div_8_subtile) {
11703 TEST_REQUIRES_ARM_NEON_FMA;
11704 for (uint32_t n = 16; n <= 24; n += 8) {
11705 for (size_t k = 1; k <= 10; k += 3) {
11706 for (uint32_t m = 1; m <= 6; m++) {
11707 GemmMicrokernelTester()
11708 .mr(6)
11709 .nr(8)
11710 .kr(1)
11711 .sr(1)
11712 .m(m)
11713 .n(n)
11714 .k(k)
11715 .iterations(1)
11716 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11717 }
11718 }
11719 }
11720 }
11721
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,strided_cm_subtile)11722 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, strided_cm_subtile) {
11723 TEST_REQUIRES_ARM_NEON_FMA;
11724 for (size_t k = 1; k <= 10; k += 3) {
11725 for (uint32_t n = 1; n <= 8; n++) {
11726 for (uint32_t m = 1; m <= 6; m++) {
11727 GemmMicrokernelTester()
11728 .mr(6)
11729 .nr(8)
11730 .kr(1)
11731 .sr(1)
11732 .m(m)
11733 .n(n)
11734 .k(k)
11735 .cm_stride(11)
11736 .iterations(1)
11737 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11738 }
11739 }
11740 }
11741 }
11742
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,qmin)11743 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, qmin) {
11744 TEST_REQUIRES_ARM_NEON_FMA;
11745 GemmMicrokernelTester()
11746 .mr(6)
11747 .nr(8)
11748 .kr(1)
11749 .sr(1)
11750 .m(6)
11751 .n(8)
11752 .k(2)
11753 .qmin(128)
11754 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11755 }
11756
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,qmax)11757 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, qmax) {
11758 TEST_REQUIRES_ARM_NEON_FMA;
11759 GemmMicrokernelTester()
11760 .mr(6)
11761 .nr(8)
11762 .kr(1)
11763 .sr(1)
11764 .m(6)
11765 .n(8)
11766 .k(2)
11767 .qmax(128)
11768 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11769 }
11770
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64,strided_cm)11771 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, strided_cm) {
11772 TEST_REQUIRES_ARM_NEON_FMA;
11773 GemmMicrokernelTester()
11774 .mr(6)
11775 .nr(8)
11776 .kr(1)
11777 .sr(1)
11778 .m(6)
11779 .n(8)
11780 .k(2)
11781 .cm_stride(11)
11782 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11783 }
11784 #endif // XNN_ARCH_ARM64
11785
11786
11787 #if XNN_ARCH_ARM64
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,k_eq_4)11788 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_eq_4) {
11789 TEST_REQUIRES_ARM_NEON_FMA;
11790 GemmMicrokernelTester()
11791 .mr(6)
11792 .nr(8)
11793 .kr(1)
11794 .sr(1)
11795 .m(6)
11796 .n(8)
11797 .k(4)
11798 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
11799 }
11800
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,strided_cn)11801 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, strided_cn) {
11802 TEST_REQUIRES_ARM_NEON_FMA;
11803 GemmMicrokernelTester()
11804 .mr(6)
11805 .nr(8)
11806 .kr(1)
11807 .sr(1)
11808 .m(6)
11809 .n(8)
11810 .k(4)
11811 .cn_stride(11)
11812 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
11813 }
11814
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,k_eq_4_strided_a)11815 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_eq_4_strided_a) {
11816 TEST_REQUIRES_ARM_NEON_FMA;
11817 GemmMicrokernelTester()
11818 .mr(6)
11819 .nr(8)
11820 .kr(1)
11821 .sr(1)
11822 .m(6)
11823 .n(8)
11824 .k(4)
11825 .a_stride(7)
11826 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
11827 }
11828
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,k_eq_4_subtile)11829 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_eq_4_subtile) {
11830 TEST_REQUIRES_ARM_NEON_FMA;
11831 for (uint32_t n = 1; n <= 8; n++) {
11832 for (uint32_t m = 1; m <= 6; m++) {
11833 GemmMicrokernelTester()
11834 .mr(6)
11835 .nr(8)
11836 .kr(1)
11837 .sr(1)
11838 .m(m)
11839 .n(n)
11840 .k(4)
11841 .iterations(1)
11842 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
11843 }
11844 }
11845 }
11846
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,k_eq_4_subtile_m)11847 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_eq_4_subtile_m) {
11848 TEST_REQUIRES_ARM_NEON_FMA;
11849 for (uint32_t m = 1; m <= 6; m++) {
11850 GemmMicrokernelTester()
11851 .mr(6)
11852 .nr(8)
11853 .kr(1)
11854 .sr(1)
11855 .m(m)
11856 .n(8)
11857 .k(4)
11858 .iterations(1)
11859 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
11860 }
11861 }
11862
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,k_eq_4_subtile_n)11863 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_eq_4_subtile_n) {
11864 TEST_REQUIRES_ARM_NEON_FMA;
11865 for (uint32_t n = 1; n <= 8; n++) {
11866 GemmMicrokernelTester()
11867 .mr(6)
11868 .nr(8)
11869 .kr(1)
11870 .sr(1)
11871 .m(6)
11872 .n(n)
11873 .k(4)
11874 .iterations(1)
11875 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
11876 }
11877 }
11878
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,k_lt_4)11879 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_lt_4) {
11880 TEST_REQUIRES_ARM_NEON_FMA;
11881 for (size_t k = 1; k < 4; k++) {
11882 GemmMicrokernelTester()
11883 .mr(6)
11884 .nr(8)
11885 .kr(1)
11886 .sr(1)
11887 .m(6)
11888 .n(8)
11889 .k(k)
11890 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
11891 }
11892 }
11893
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,k_lt_4_strided_a)11894 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_lt_4_strided_a) {
11895 TEST_REQUIRES_ARM_NEON_FMA;
11896 for (size_t k = 1; k < 4; k++) {
11897 GemmMicrokernelTester()
11898 .mr(6)
11899 .nr(8)
11900 .kr(1)
11901 .sr(1)
11902 .m(6)
11903 .n(8)
11904 .k(k)
11905 .a_stride(7)
11906 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
11907 }
11908 }
11909
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,k_lt_4_subtile)11910 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_lt_4_subtile) {
11911 TEST_REQUIRES_ARM_NEON_FMA;
11912 for (size_t k = 1; k < 4; k++) {
11913 for (uint32_t n = 1; n <= 8; n++) {
11914 for (uint32_t m = 1; m <= 6; m++) {
11915 GemmMicrokernelTester()
11916 .mr(6)
11917 .nr(8)
11918 .kr(1)
11919 .sr(1)
11920 .m(m)
11921 .n(n)
11922 .k(k)
11923 .iterations(1)
11924 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
11925 }
11926 }
11927 }
11928 }
11929
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,k_gt_4)11930 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_gt_4) {
11931 TEST_REQUIRES_ARM_NEON_FMA;
11932 for (size_t k = 5; k < 8; k++) {
11933 GemmMicrokernelTester()
11934 .mr(6)
11935 .nr(8)
11936 .kr(1)
11937 .sr(1)
11938 .m(6)
11939 .n(8)
11940 .k(k)
11941 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
11942 }
11943 }
11944
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,k_gt_4_strided_a)11945 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_gt_4_strided_a) {
11946 TEST_REQUIRES_ARM_NEON_FMA;
11947 for (size_t k = 5; k < 8; k++) {
11948 GemmMicrokernelTester()
11949 .mr(6)
11950 .nr(8)
11951 .kr(1)
11952 .sr(1)
11953 .m(6)
11954 .n(8)
11955 .k(k)
11956 .a_stride(11)
11957 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
11958 }
11959 }
11960
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,k_gt_4_subtile)11961 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_gt_4_subtile) {
11962 TEST_REQUIRES_ARM_NEON_FMA;
11963 for (size_t k = 5; k < 8; k++) {
11964 for (uint32_t n = 1; n <= 8; n++) {
11965 for (uint32_t m = 1; m <= 6; m++) {
11966 GemmMicrokernelTester()
11967 .mr(6)
11968 .nr(8)
11969 .kr(1)
11970 .sr(1)
11971 .m(m)
11972 .n(n)
11973 .k(k)
11974 .iterations(1)
11975 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
11976 }
11977 }
11978 }
11979 }
11980
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,k_div_4)11981 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_div_4) {
11982 TEST_REQUIRES_ARM_NEON_FMA;
11983 for (size_t k = 8; k <= 40; k += 4) {
11984 GemmMicrokernelTester()
11985 .mr(6)
11986 .nr(8)
11987 .kr(1)
11988 .sr(1)
11989 .m(6)
11990 .n(8)
11991 .k(k)
11992 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
11993 }
11994 }
11995
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,k_div_4_strided_a)11996 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_div_4_strided_a) {
11997 TEST_REQUIRES_ARM_NEON_FMA;
11998 for (size_t k = 8; k <= 40; k += 4) {
11999 GemmMicrokernelTester()
12000 .mr(6)
12001 .nr(8)
12002 .kr(1)
12003 .sr(1)
12004 .m(6)
12005 .n(8)
12006 .k(k)
12007 .a_stride(43)
12008 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
12009 }
12010 }
12011
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,k_div_4_subtile)12012 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_div_4_subtile) {
12013 TEST_REQUIRES_ARM_NEON_FMA;
12014 for (size_t k = 8; k <= 40; k += 4) {
12015 for (uint32_t n = 1; n <= 8; n++) {
12016 for (uint32_t m = 1; m <= 6; m++) {
12017 GemmMicrokernelTester()
12018 .mr(6)
12019 .nr(8)
12020 .kr(1)
12021 .sr(1)
12022 .m(m)
12023 .n(n)
12024 .k(k)
12025 .iterations(1)
12026 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
12027 }
12028 }
12029 }
12030 }
12031
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,n_gt_8)12032 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, n_gt_8) {
12033 TEST_REQUIRES_ARM_NEON_FMA;
12034 for (uint32_t n = 9; n < 16; n++) {
12035 for (size_t k = 1; k <= 20; k += 5) {
12036 GemmMicrokernelTester()
12037 .mr(6)
12038 .nr(8)
12039 .kr(1)
12040 .sr(1)
12041 .m(6)
12042 .n(n)
12043 .k(k)
12044 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
12045 }
12046 }
12047 }
12048
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,n_gt_8_strided_cn)12049 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, n_gt_8_strided_cn) {
12050 TEST_REQUIRES_ARM_NEON_FMA;
12051 for (uint32_t n = 9; n < 16; n++) {
12052 for (size_t k = 1; k <= 20; k += 5) {
12053 GemmMicrokernelTester()
12054 .mr(6)
12055 .nr(8)
12056 .kr(1)
12057 .sr(1)
12058 .m(6)
12059 .n(n)
12060 .k(k)
12061 .cn_stride(11)
12062 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
12063 }
12064 }
12065 }
12066
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,n_gt_8_strided_a)12067 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, n_gt_8_strided_a) {
12068 TEST_REQUIRES_ARM_NEON_FMA;
12069 for (uint32_t n = 9; n < 16; n++) {
12070 for (size_t k = 1; k <= 20; k += 5) {
12071 GemmMicrokernelTester()
12072 .mr(6)
12073 .nr(8)
12074 .kr(1)
12075 .sr(1)
12076 .m(6)
12077 .n(n)
12078 .k(k)
12079 .a_stride(23)
12080 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
12081 }
12082 }
12083 }
12084
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,n_gt_8_subtile)12085 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, n_gt_8_subtile) {
12086 TEST_REQUIRES_ARM_NEON_FMA;
12087 for (uint32_t n = 9; n < 16; n++) {
12088 for (size_t k = 1; k <= 20; k += 5) {
12089 for (uint32_t m = 1; m <= 6; m++) {
12090 GemmMicrokernelTester()
12091 .mr(6)
12092 .nr(8)
12093 .kr(1)
12094 .sr(1)
12095 .m(m)
12096 .n(n)
12097 .k(k)
12098 .iterations(1)
12099 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
12100 }
12101 }
12102 }
12103 }
12104
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,n_div_8)12105 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, n_div_8) {
12106 TEST_REQUIRES_ARM_NEON_FMA;
12107 for (uint32_t n = 16; n <= 24; n += 8) {
12108 for (size_t k = 1; k <= 20; k += 5) {
12109 GemmMicrokernelTester()
12110 .mr(6)
12111 .nr(8)
12112 .kr(1)
12113 .sr(1)
12114 .m(6)
12115 .n(n)
12116 .k(k)
12117 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
12118 }
12119 }
12120 }
12121
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,n_div_8_strided_cn)12122 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, n_div_8_strided_cn) {
12123 TEST_REQUIRES_ARM_NEON_FMA;
12124 for (uint32_t n = 16; n <= 24; n += 8) {
12125 for (size_t k = 1; k <= 20; k += 5) {
12126 GemmMicrokernelTester()
12127 .mr(6)
12128 .nr(8)
12129 .kr(1)
12130 .sr(1)
12131 .m(6)
12132 .n(n)
12133 .k(k)
12134 .cn_stride(11)
12135 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
12136 }
12137 }
12138 }
12139
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,n_div_8_strided_a)12140 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, n_div_8_strided_a) {
12141 TEST_REQUIRES_ARM_NEON_FMA;
12142 for (uint32_t n = 16; n <= 24; n += 8) {
12143 for (size_t k = 1; k <= 20; k += 5) {
12144 GemmMicrokernelTester()
12145 .mr(6)
12146 .nr(8)
12147 .kr(1)
12148 .sr(1)
12149 .m(6)
12150 .n(n)
12151 .k(k)
12152 .a_stride(23)
12153 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
12154 }
12155 }
12156 }
12157
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,n_div_8_subtile)12158 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, n_div_8_subtile) {
12159 TEST_REQUIRES_ARM_NEON_FMA;
12160 for (uint32_t n = 16; n <= 24; n += 8) {
12161 for (size_t k = 1; k <= 20; k += 5) {
12162 for (uint32_t m = 1; m <= 6; m++) {
12163 GemmMicrokernelTester()
12164 .mr(6)
12165 .nr(8)
12166 .kr(1)
12167 .sr(1)
12168 .m(m)
12169 .n(n)
12170 .k(k)
12171 .iterations(1)
12172 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
12173 }
12174 }
12175 }
12176 }
12177
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,strided_cm_subtile)12178 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, strided_cm_subtile) {
12179 TEST_REQUIRES_ARM_NEON_FMA;
12180 for (size_t k = 1; k <= 20; k += 5) {
12181 for (uint32_t n = 1; n <= 8; n++) {
12182 for (uint32_t m = 1; m <= 6; m++) {
12183 GemmMicrokernelTester()
12184 .mr(6)
12185 .nr(8)
12186 .kr(1)
12187 .sr(1)
12188 .m(m)
12189 .n(n)
12190 .k(k)
12191 .cm_stride(11)
12192 .iterations(1)
12193 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
12194 }
12195 }
12196 }
12197 }
12198
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,qmin)12199 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, qmin) {
12200 TEST_REQUIRES_ARM_NEON_FMA;
12201 GemmMicrokernelTester()
12202 .mr(6)
12203 .nr(8)
12204 .kr(1)
12205 .sr(1)
12206 .m(6)
12207 .n(8)
12208 .k(4)
12209 .qmin(128)
12210 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
12211 }
12212
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,qmax)12213 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, qmax) {
12214 TEST_REQUIRES_ARM_NEON_FMA;
12215 GemmMicrokernelTester()
12216 .mr(6)
12217 .nr(8)
12218 .kr(1)
12219 .sr(1)
12220 .m(6)
12221 .n(8)
12222 .k(4)
12223 .qmax(128)
12224 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
12225 }
12226
TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128,strided_cm)12227 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, strided_cm) {
12228 TEST_REQUIRES_ARM_NEON_FMA;
12229 GemmMicrokernelTester()
12230 .mr(6)
12231 .nr(8)
12232 .kr(1)
12233 .sr(1)
12234 .m(6)
12235 .n(8)
12236 .k(4)
12237 .cm_stride(11)
12238 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
12239 }
12240 #endif // XNN_ARCH_ARM64
12241
12242
12243 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,k_eq_4)12244 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_eq_4) {
12245 TEST_REQUIRES_X86_SSE;
12246 GemmMicrokernelTester()
12247 .mr(1)
12248 .nr(8)
12249 .kr(1)
12250 .sr(1)
12251 .m(1)
12252 .n(8)
12253 .k(4)
12254 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12255 }
12256
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,strided_cn)12257 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, strided_cn) {
12258 TEST_REQUIRES_X86_SSE;
12259 GemmMicrokernelTester()
12260 .mr(1)
12261 .nr(8)
12262 .kr(1)
12263 .sr(1)
12264 .m(1)
12265 .n(8)
12266 .k(4)
12267 .cn_stride(11)
12268 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12269 }
12270
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,k_eq_4_strided_a)12271 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_eq_4_strided_a) {
12272 TEST_REQUIRES_X86_SSE;
12273 GemmMicrokernelTester()
12274 .mr(1)
12275 .nr(8)
12276 .kr(1)
12277 .sr(1)
12278 .m(1)
12279 .n(8)
12280 .k(4)
12281 .a_stride(7)
12282 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12283 }
12284
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,k_eq_4_subtile)12285 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_eq_4_subtile) {
12286 TEST_REQUIRES_X86_SSE;
12287 for (uint32_t n = 1; n <= 8; n++) {
12288 for (uint32_t m = 1; m <= 1; m++) {
12289 GemmMicrokernelTester()
12290 .mr(1)
12291 .nr(8)
12292 .kr(1)
12293 .sr(1)
12294 .m(m)
12295 .n(n)
12296 .k(4)
12297 .iterations(1)
12298 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12299 }
12300 }
12301 }
12302
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,k_eq_4_subtile_m)12303 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_eq_4_subtile_m) {
12304 TEST_REQUIRES_X86_SSE;
12305 for (uint32_t m = 1; m <= 1; m++) {
12306 GemmMicrokernelTester()
12307 .mr(1)
12308 .nr(8)
12309 .kr(1)
12310 .sr(1)
12311 .m(m)
12312 .n(8)
12313 .k(4)
12314 .iterations(1)
12315 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12316 }
12317 }
12318
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,k_eq_4_subtile_n)12319 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_eq_4_subtile_n) {
12320 TEST_REQUIRES_X86_SSE;
12321 for (uint32_t n = 1; n <= 8; n++) {
12322 GemmMicrokernelTester()
12323 .mr(1)
12324 .nr(8)
12325 .kr(1)
12326 .sr(1)
12327 .m(1)
12328 .n(n)
12329 .k(4)
12330 .iterations(1)
12331 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12332 }
12333 }
12334
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,k_lt_4)12335 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_lt_4) {
12336 TEST_REQUIRES_X86_SSE;
12337 for (size_t k = 1; k < 4; k++) {
12338 GemmMicrokernelTester()
12339 .mr(1)
12340 .nr(8)
12341 .kr(1)
12342 .sr(1)
12343 .m(1)
12344 .n(8)
12345 .k(k)
12346 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12347 }
12348 }
12349
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,k_lt_4_strided_a)12350 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_lt_4_strided_a) {
12351 TEST_REQUIRES_X86_SSE;
12352 for (size_t k = 1; k < 4; k++) {
12353 GemmMicrokernelTester()
12354 .mr(1)
12355 .nr(8)
12356 .kr(1)
12357 .sr(1)
12358 .m(1)
12359 .n(8)
12360 .k(k)
12361 .a_stride(7)
12362 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12363 }
12364 }
12365
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,k_lt_4_subtile)12366 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_lt_4_subtile) {
12367 TEST_REQUIRES_X86_SSE;
12368 for (size_t k = 1; k < 4; k++) {
12369 for (uint32_t n = 1; n <= 8; n++) {
12370 for (uint32_t m = 1; m <= 1; m++) {
12371 GemmMicrokernelTester()
12372 .mr(1)
12373 .nr(8)
12374 .kr(1)
12375 .sr(1)
12376 .m(m)
12377 .n(n)
12378 .k(k)
12379 .iterations(1)
12380 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12381 }
12382 }
12383 }
12384 }
12385
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,k_gt_4)12386 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_gt_4) {
12387 TEST_REQUIRES_X86_SSE;
12388 for (size_t k = 5; k < 8; k++) {
12389 GemmMicrokernelTester()
12390 .mr(1)
12391 .nr(8)
12392 .kr(1)
12393 .sr(1)
12394 .m(1)
12395 .n(8)
12396 .k(k)
12397 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12398 }
12399 }
12400
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,k_gt_4_strided_a)12401 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_gt_4_strided_a) {
12402 TEST_REQUIRES_X86_SSE;
12403 for (size_t k = 5; k < 8; k++) {
12404 GemmMicrokernelTester()
12405 .mr(1)
12406 .nr(8)
12407 .kr(1)
12408 .sr(1)
12409 .m(1)
12410 .n(8)
12411 .k(k)
12412 .a_stride(11)
12413 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12414 }
12415 }
12416
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,k_gt_4_subtile)12417 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_gt_4_subtile) {
12418 TEST_REQUIRES_X86_SSE;
12419 for (size_t k = 5; k < 8; k++) {
12420 for (uint32_t n = 1; n <= 8; n++) {
12421 for (uint32_t m = 1; m <= 1; m++) {
12422 GemmMicrokernelTester()
12423 .mr(1)
12424 .nr(8)
12425 .kr(1)
12426 .sr(1)
12427 .m(m)
12428 .n(n)
12429 .k(k)
12430 .iterations(1)
12431 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12432 }
12433 }
12434 }
12435 }
12436
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,k_div_4)12437 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_div_4) {
12438 TEST_REQUIRES_X86_SSE;
12439 for (size_t k = 8; k <= 40; k += 4) {
12440 GemmMicrokernelTester()
12441 .mr(1)
12442 .nr(8)
12443 .kr(1)
12444 .sr(1)
12445 .m(1)
12446 .n(8)
12447 .k(k)
12448 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12449 }
12450 }
12451
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,k_div_4_strided_a)12452 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_div_4_strided_a) {
12453 TEST_REQUIRES_X86_SSE;
12454 for (size_t k = 8; k <= 40; k += 4) {
12455 GemmMicrokernelTester()
12456 .mr(1)
12457 .nr(8)
12458 .kr(1)
12459 .sr(1)
12460 .m(1)
12461 .n(8)
12462 .k(k)
12463 .a_stride(43)
12464 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12465 }
12466 }
12467
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,k_div_4_subtile)12468 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_div_4_subtile) {
12469 TEST_REQUIRES_X86_SSE;
12470 for (size_t k = 8; k <= 40; k += 4) {
12471 for (uint32_t n = 1; n <= 8; n++) {
12472 for (uint32_t m = 1; m <= 1; m++) {
12473 GemmMicrokernelTester()
12474 .mr(1)
12475 .nr(8)
12476 .kr(1)
12477 .sr(1)
12478 .m(m)
12479 .n(n)
12480 .k(k)
12481 .iterations(1)
12482 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12483 }
12484 }
12485 }
12486 }
12487
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,n_gt_8)12488 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, n_gt_8) {
12489 TEST_REQUIRES_X86_SSE;
12490 for (uint32_t n = 9; n < 16; n++) {
12491 for (size_t k = 1; k <= 20; k += 5) {
12492 GemmMicrokernelTester()
12493 .mr(1)
12494 .nr(8)
12495 .kr(1)
12496 .sr(1)
12497 .m(1)
12498 .n(n)
12499 .k(k)
12500 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12501 }
12502 }
12503 }
12504
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,n_gt_8_strided_cn)12505 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, n_gt_8_strided_cn) {
12506 TEST_REQUIRES_X86_SSE;
12507 for (uint32_t n = 9; n < 16; n++) {
12508 for (size_t k = 1; k <= 20; k += 5) {
12509 GemmMicrokernelTester()
12510 .mr(1)
12511 .nr(8)
12512 .kr(1)
12513 .sr(1)
12514 .m(1)
12515 .n(n)
12516 .k(k)
12517 .cn_stride(11)
12518 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12519 }
12520 }
12521 }
12522
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,n_gt_8_strided_a)12523 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, n_gt_8_strided_a) {
12524 TEST_REQUIRES_X86_SSE;
12525 for (uint32_t n = 9; n < 16; n++) {
12526 for (size_t k = 1; k <= 20; k += 5) {
12527 GemmMicrokernelTester()
12528 .mr(1)
12529 .nr(8)
12530 .kr(1)
12531 .sr(1)
12532 .m(1)
12533 .n(n)
12534 .k(k)
12535 .a_stride(23)
12536 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12537 }
12538 }
12539 }
12540
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,n_gt_8_subtile)12541 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, n_gt_8_subtile) {
12542 TEST_REQUIRES_X86_SSE;
12543 for (uint32_t n = 9; n < 16; n++) {
12544 for (size_t k = 1; k <= 20; k += 5) {
12545 for (uint32_t m = 1; m <= 1; m++) {
12546 GemmMicrokernelTester()
12547 .mr(1)
12548 .nr(8)
12549 .kr(1)
12550 .sr(1)
12551 .m(m)
12552 .n(n)
12553 .k(k)
12554 .iterations(1)
12555 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12556 }
12557 }
12558 }
12559 }
12560
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,n_div_8)12561 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, n_div_8) {
12562 TEST_REQUIRES_X86_SSE;
12563 for (uint32_t n = 16; n <= 24; n += 8) {
12564 for (size_t k = 1; k <= 20; k += 5) {
12565 GemmMicrokernelTester()
12566 .mr(1)
12567 .nr(8)
12568 .kr(1)
12569 .sr(1)
12570 .m(1)
12571 .n(n)
12572 .k(k)
12573 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12574 }
12575 }
12576 }
12577
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,n_div_8_strided_cn)12578 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, n_div_8_strided_cn) {
12579 TEST_REQUIRES_X86_SSE;
12580 for (uint32_t n = 16; n <= 24; n += 8) {
12581 for (size_t k = 1; k <= 20; k += 5) {
12582 GemmMicrokernelTester()
12583 .mr(1)
12584 .nr(8)
12585 .kr(1)
12586 .sr(1)
12587 .m(1)
12588 .n(n)
12589 .k(k)
12590 .cn_stride(11)
12591 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12592 }
12593 }
12594 }
12595
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,n_div_8_strided_a)12596 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, n_div_8_strided_a) {
12597 TEST_REQUIRES_X86_SSE;
12598 for (uint32_t n = 16; n <= 24; n += 8) {
12599 for (size_t k = 1; k <= 20; k += 5) {
12600 GemmMicrokernelTester()
12601 .mr(1)
12602 .nr(8)
12603 .kr(1)
12604 .sr(1)
12605 .m(1)
12606 .n(n)
12607 .k(k)
12608 .a_stride(23)
12609 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12610 }
12611 }
12612 }
12613
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,n_div_8_subtile)12614 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, n_div_8_subtile) {
12615 TEST_REQUIRES_X86_SSE;
12616 for (uint32_t n = 16; n <= 24; n += 8) {
12617 for (size_t k = 1; k <= 20; k += 5) {
12618 for (uint32_t m = 1; m <= 1; m++) {
12619 GemmMicrokernelTester()
12620 .mr(1)
12621 .nr(8)
12622 .kr(1)
12623 .sr(1)
12624 .m(m)
12625 .n(n)
12626 .k(k)
12627 .iterations(1)
12628 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12629 }
12630 }
12631 }
12632 }
12633
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,strided_cm_subtile)12634 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, strided_cm_subtile) {
12635 TEST_REQUIRES_X86_SSE;
12636 for (size_t k = 1; k <= 20; k += 5) {
12637 for (uint32_t n = 1; n <= 8; n++) {
12638 for (uint32_t m = 1; m <= 1; m++) {
12639 GemmMicrokernelTester()
12640 .mr(1)
12641 .nr(8)
12642 .kr(1)
12643 .sr(1)
12644 .m(m)
12645 .n(n)
12646 .k(k)
12647 .cm_stride(11)
12648 .iterations(1)
12649 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12650 }
12651 }
12652 }
12653 }
12654
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,qmin)12655 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, qmin) {
12656 TEST_REQUIRES_X86_SSE;
12657 GemmMicrokernelTester()
12658 .mr(1)
12659 .nr(8)
12660 .kr(1)
12661 .sr(1)
12662 .m(1)
12663 .n(8)
12664 .k(4)
12665 .qmin(128)
12666 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12667 }
12668
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,qmax)12669 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, qmax) {
12670 TEST_REQUIRES_X86_SSE;
12671 GemmMicrokernelTester()
12672 .mr(1)
12673 .nr(8)
12674 .kr(1)
12675 .sr(1)
12676 .m(1)
12677 .n(8)
12678 .k(4)
12679 .qmax(128)
12680 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12681 }
12682
TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP,strided_cm)12683 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, strided_cm) {
12684 TEST_REQUIRES_X86_SSE;
12685 GemmMicrokernelTester()
12686 .mr(1)
12687 .nr(8)
12688 .kr(1)
12689 .sr(1)
12690 .m(1)
12691 .n(8)
12692 .k(4)
12693 .cm_stride(11)
12694 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
12695 }
12696 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12697
12698
12699 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1,k_eq_1)12700 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, k_eq_1) {
12701 TEST_REQUIRES_X86_SSE;
12702 GemmMicrokernelTester()
12703 .mr(1)
12704 .nr(8)
12705 .kr(1)
12706 .sr(1)
12707 .m(1)
12708 .n(8)
12709 .k(1)
12710 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
12711 }
12712
TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1,strided_cn)12713 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, strided_cn) {
12714 TEST_REQUIRES_X86_SSE;
12715 GemmMicrokernelTester()
12716 .mr(1)
12717 .nr(8)
12718 .kr(1)
12719 .sr(1)
12720 .m(1)
12721 .n(8)
12722 .k(1)
12723 .cn_stride(11)
12724 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
12725 }
12726
TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1,k_eq_1_strided_a)12727 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, k_eq_1_strided_a) {
12728 TEST_REQUIRES_X86_SSE;
12729 GemmMicrokernelTester()
12730 .mr(1)
12731 .nr(8)
12732 .kr(1)
12733 .sr(1)
12734 .m(1)
12735 .n(8)
12736 .k(1)
12737 .a_stride(3)
12738 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
12739 }
12740
TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1,k_eq_1_subtile)12741 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, k_eq_1_subtile) {
12742 TEST_REQUIRES_X86_SSE;
12743 for (uint32_t n = 1; n <= 8; n++) {
12744 for (uint32_t m = 1; m <= 1; m++) {
12745 GemmMicrokernelTester()
12746 .mr(1)
12747 .nr(8)
12748 .kr(1)
12749 .sr(1)
12750 .m(m)
12751 .n(n)
12752 .k(1)
12753 .iterations(1)
12754 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
12755 }
12756 }
12757 }
12758
TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1,k_eq_1_subtile_m)12759 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, k_eq_1_subtile_m) {
12760 TEST_REQUIRES_X86_SSE;
12761 for (uint32_t m = 1; m <= 1; m++) {
12762 GemmMicrokernelTester()
12763 .mr(1)
12764 .nr(8)
12765 .kr(1)
12766 .sr(1)
12767 .m(m)
12768 .n(8)
12769 .k(1)
12770 .iterations(1)
12771 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
12772 }
12773 }
12774
TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1,k_eq_1_subtile_n)12775 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, k_eq_1_subtile_n) {
12776 TEST_REQUIRES_X86_SSE;
12777 for (uint32_t n = 1; n <= 8; n++) {
12778 GemmMicrokernelTester()
12779 .mr(1)
12780 .nr(8)
12781 .kr(1)
12782 .sr(1)
12783 .m(1)
12784 .n(n)
12785 .k(1)
12786 .iterations(1)
12787 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
12788 }
12789 }
12790
TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1,k_gt_1)12791 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, k_gt_1) {
12792 TEST_REQUIRES_X86_SSE;
12793 for (size_t k = 2; k < 10; k++) {
12794 GemmMicrokernelTester()
12795 .mr(1)
12796 .nr(8)
12797 .kr(1)
12798 .sr(1)
12799 .m(1)
12800 .n(8)
12801 .k(k)
12802 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
12803 }
12804 }
12805
TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1,k_gt_1_strided_a)12806 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, k_gt_1_strided_a) {
12807 TEST_REQUIRES_X86_SSE;
12808 for (size_t k = 2; k < 10; k++) {
12809 GemmMicrokernelTester()
12810 .mr(1)
12811 .nr(8)
12812 .kr(1)
12813 .sr(1)
12814 .m(1)
12815 .n(8)
12816 .k(k)
12817 .a_stride(11)
12818 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
12819 }
12820 }
12821
TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1,k_gt_1_subtile)12822 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, k_gt_1_subtile) {
12823 TEST_REQUIRES_X86_SSE;
12824 for (size_t k = 2; k < 10; k++) {
12825 for (uint32_t n = 1; n <= 8; n++) {
12826 for (uint32_t m = 1; m <= 1; m++) {
12827 GemmMicrokernelTester()
12828 .mr(1)
12829 .nr(8)
12830 .kr(1)
12831 .sr(1)
12832 .m(m)
12833 .n(n)
12834 .k(k)
12835 .iterations(1)
12836 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
12837 }
12838 }
12839 }
12840 }
12841
TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1,n_gt_8)12842 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, n_gt_8) {
12843 TEST_REQUIRES_X86_SSE;
12844 for (uint32_t n = 9; n < 16; n++) {
12845 for (size_t k = 1; k <= 5; k += 2) {
12846 GemmMicrokernelTester()
12847 .mr(1)
12848 .nr(8)
12849 .kr(1)
12850 .sr(1)
12851 .m(1)
12852 .n(n)
12853 .k(k)
12854 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
12855 }
12856 }
12857 }
12858
TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1,n_gt_8_strided_cn)12859 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, n_gt_8_strided_cn) {
12860 TEST_REQUIRES_X86_SSE;
12861 for (uint32_t n = 9; n < 16; n++) {
12862 for (size_t k = 1; k <= 5; k += 2) {
12863 GemmMicrokernelTester()
12864 .mr(1)
12865 .nr(8)
12866 .kr(1)
12867 .sr(1)
12868 .m(1)
12869 .n(n)
12870 .k(k)
12871 .cn_stride(11)
12872 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
12873 }
12874 }
12875 }
12876
TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1,n_gt_8_strided_a)12877 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, n_gt_8_strided_a) {
12878 TEST_REQUIRES_X86_SSE;
12879 for (uint32_t n = 9; n < 16; n++) {
12880 for (size_t k = 1; k <= 5; k += 2) {
12881 GemmMicrokernelTester()
12882 .mr(1)
12883 .nr(8)
12884 .kr(1)
12885 .sr(1)
12886 .m(1)
12887 .n(n)
12888 .k(k)
12889 .a_stride(7)
12890 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
12891 }
12892 }
12893 }
12894
TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1,n_gt_8_subtile)12895 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, n_gt_8_subtile) {
12896 TEST_REQUIRES_X86_SSE;
12897 for (uint32_t n = 9; n < 16; n++) {
12898 for (size_t k = 1; k <= 5; k += 2) {
12899 for (uint32_t m = 1; m <= 1; m++) {
12900 GemmMicrokernelTester()
12901 .mr(1)
12902 .nr(8)
12903 .kr(1)
12904 .sr(1)
12905 .m(m)
12906 .n(n)
12907 .k(k)
12908 .iterations(1)
12909 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
12910 }
12911 }
12912 }
12913 }
12914
TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1,n_div_8)12915 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, n_div_8) {
12916 TEST_REQUIRES_X86_SSE;
12917 for (uint32_t n = 16; n <= 24; n += 8) {
12918 for (size_t k = 1; k <= 5; k += 2) {
12919 GemmMicrokernelTester()
12920 .mr(1)
12921 .nr(8)
12922 .kr(1)
12923 .sr(1)
12924 .m(1)
12925 .n(n)
12926 .k(k)
12927 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
12928 }
12929 }
12930 }
12931
TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1,n_div_8_strided_cn)12932 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, n_div_8_strided_cn) {
12933 TEST_REQUIRES_X86_SSE;
12934 for (uint32_t n = 16; n <= 24; n += 8) {
12935 for (size_t k = 1; k <= 5; k += 2) {
12936 GemmMicrokernelTester()
12937 .mr(1)
12938 .nr(8)
12939 .kr(1)
12940 .sr(1)
12941 .m(1)
12942 .n(n)
12943 .k(k)
12944 .cn_stride(11)
12945 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
12946 }
12947 }
12948 }
12949
TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1,n_div_8_strided_a)12950 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, n_div_8_strided_a) {
12951 TEST_REQUIRES_X86_SSE;
12952 for (uint32_t n = 16; n <= 24; n += 8) {
12953 for (size_t k = 1; k <= 5; k += 2) {
12954 GemmMicrokernelTester()
12955 .mr(1)
12956 .nr(8)
12957 .kr(1)
12958 .sr(1)
12959 .m(1)
12960 .n(n)
12961 .k(k)
12962 .a_stride(7)
12963 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
12964 }
12965 }
12966 }
12967
TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1,n_div_8_subtile)12968 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, n_div_8_subtile) {
12969 TEST_REQUIRES_X86_SSE;
12970 for (uint32_t n = 16; n <= 24; n += 8) {
12971 for (size_t k = 1; k <= 5; k += 2) {
12972 for (uint32_t m = 1; m <= 1; m++) {
12973 GemmMicrokernelTester()
12974 .mr(1)
12975 .nr(8)
12976 .kr(1)
12977 .sr(1)
12978 .m(m)
12979 .n(n)
12980 .k(k)
12981 .iterations(1)
12982 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
12983 }
12984 }
12985 }
12986 }
12987
TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1,strided_cm_subtile)12988 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, strided_cm_subtile) {
12989 TEST_REQUIRES_X86_SSE;
12990 for (size_t k = 1; k <= 5; k += 2) {
12991 for (uint32_t n = 1; n <= 8; n++) {
12992 for (uint32_t m = 1; m <= 1; m++) {
12993 GemmMicrokernelTester()
12994 .mr(1)
12995 .nr(8)
12996 .kr(1)
12997 .sr(1)
12998 .m(m)
12999 .n(n)
13000 .k(k)
13001 .cm_stride(11)
13002 .iterations(1)
13003 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
13004 }
13005 }
13006 }
13007 }
13008
TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1,qmin)13009 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, qmin) {
13010 TEST_REQUIRES_X86_SSE;
13011 GemmMicrokernelTester()
13012 .mr(1)
13013 .nr(8)
13014 .kr(1)
13015 .sr(1)
13016 .m(1)
13017 .n(8)
13018 .k(1)
13019 .qmin(128)
13020 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
13021 }
13022
TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1,qmax)13023 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, qmax) {
13024 TEST_REQUIRES_X86_SSE;
13025 GemmMicrokernelTester()
13026 .mr(1)
13027 .nr(8)
13028 .kr(1)
13029 .sr(1)
13030 .m(1)
13031 .n(8)
13032 .k(1)
13033 .qmax(128)
13034 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
13035 }
13036
TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1,strided_cm)13037 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, strided_cm) {
13038 TEST_REQUIRES_X86_SSE;
13039 GemmMicrokernelTester()
13040 .mr(1)
13041 .nr(8)
13042 .kr(1)
13043 .sr(1)
13044 .m(1)
13045 .n(8)
13046 .k(1)
13047 .cm_stride(11)
13048 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
13049 }
13050 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13051
13052
13053 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,k_eq_4)13054 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_eq_4) {
13055 TEST_REQUIRES_X86_SSE2;
13056 GemmMicrokernelTester()
13057 .mr(3)
13058 .nr(8)
13059 .kr(1)
13060 .sr(1)
13061 .m(3)
13062 .n(8)
13063 .k(4)
13064 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13065 }
13066
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,strided_cn)13067 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, strided_cn) {
13068 TEST_REQUIRES_X86_SSE2;
13069 GemmMicrokernelTester()
13070 .mr(3)
13071 .nr(8)
13072 .kr(1)
13073 .sr(1)
13074 .m(3)
13075 .n(8)
13076 .k(4)
13077 .cn_stride(11)
13078 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13079 }
13080
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,k_eq_4_strided_a)13081 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_eq_4_strided_a) {
13082 TEST_REQUIRES_X86_SSE2;
13083 GemmMicrokernelTester()
13084 .mr(3)
13085 .nr(8)
13086 .kr(1)
13087 .sr(1)
13088 .m(3)
13089 .n(8)
13090 .k(4)
13091 .a_stride(7)
13092 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13093 }
13094
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,k_eq_4_subtile)13095 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_eq_4_subtile) {
13096 TEST_REQUIRES_X86_SSE2;
13097 for (uint32_t n = 1; n <= 8; n++) {
13098 for (uint32_t m = 1; m <= 3; m++) {
13099 GemmMicrokernelTester()
13100 .mr(3)
13101 .nr(8)
13102 .kr(1)
13103 .sr(1)
13104 .m(m)
13105 .n(n)
13106 .k(4)
13107 .iterations(1)
13108 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13109 }
13110 }
13111 }
13112
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,k_eq_4_subtile_m)13113 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_eq_4_subtile_m) {
13114 TEST_REQUIRES_X86_SSE2;
13115 for (uint32_t m = 1; m <= 3; m++) {
13116 GemmMicrokernelTester()
13117 .mr(3)
13118 .nr(8)
13119 .kr(1)
13120 .sr(1)
13121 .m(m)
13122 .n(8)
13123 .k(4)
13124 .iterations(1)
13125 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13126 }
13127 }
13128
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,k_eq_4_subtile_n)13129 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_eq_4_subtile_n) {
13130 TEST_REQUIRES_X86_SSE2;
13131 for (uint32_t n = 1; n <= 8; n++) {
13132 GemmMicrokernelTester()
13133 .mr(3)
13134 .nr(8)
13135 .kr(1)
13136 .sr(1)
13137 .m(3)
13138 .n(n)
13139 .k(4)
13140 .iterations(1)
13141 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13142 }
13143 }
13144
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,k_lt_4)13145 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_lt_4) {
13146 TEST_REQUIRES_X86_SSE2;
13147 for (size_t k = 1; k < 4; k++) {
13148 GemmMicrokernelTester()
13149 .mr(3)
13150 .nr(8)
13151 .kr(1)
13152 .sr(1)
13153 .m(3)
13154 .n(8)
13155 .k(k)
13156 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13157 }
13158 }
13159
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,k_lt_4_strided_a)13160 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_lt_4_strided_a) {
13161 TEST_REQUIRES_X86_SSE2;
13162 for (size_t k = 1; k < 4; k++) {
13163 GemmMicrokernelTester()
13164 .mr(3)
13165 .nr(8)
13166 .kr(1)
13167 .sr(1)
13168 .m(3)
13169 .n(8)
13170 .k(k)
13171 .a_stride(7)
13172 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13173 }
13174 }
13175
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,k_lt_4_subtile)13176 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_lt_4_subtile) {
13177 TEST_REQUIRES_X86_SSE2;
13178 for (size_t k = 1; k < 4; k++) {
13179 for (uint32_t n = 1; n <= 8; n++) {
13180 for (uint32_t m = 1; m <= 3; m++) {
13181 GemmMicrokernelTester()
13182 .mr(3)
13183 .nr(8)
13184 .kr(1)
13185 .sr(1)
13186 .m(m)
13187 .n(n)
13188 .k(k)
13189 .iterations(1)
13190 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13191 }
13192 }
13193 }
13194 }
13195
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,k_gt_4)13196 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_gt_4) {
13197 TEST_REQUIRES_X86_SSE2;
13198 for (size_t k = 5; k < 8; k++) {
13199 GemmMicrokernelTester()
13200 .mr(3)
13201 .nr(8)
13202 .kr(1)
13203 .sr(1)
13204 .m(3)
13205 .n(8)
13206 .k(k)
13207 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13208 }
13209 }
13210
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,k_gt_4_strided_a)13211 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_gt_4_strided_a) {
13212 TEST_REQUIRES_X86_SSE2;
13213 for (size_t k = 5; k < 8; k++) {
13214 GemmMicrokernelTester()
13215 .mr(3)
13216 .nr(8)
13217 .kr(1)
13218 .sr(1)
13219 .m(3)
13220 .n(8)
13221 .k(k)
13222 .a_stride(11)
13223 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13224 }
13225 }
13226
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,k_gt_4_subtile)13227 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_gt_4_subtile) {
13228 TEST_REQUIRES_X86_SSE2;
13229 for (size_t k = 5; k < 8; k++) {
13230 for (uint32_t n = 1; n <= 8; n++) {
13231 for (uint32_t m = 1; m <= 3; m++) {
13232 GemmMicrokernelTester()
13233 .mr(3)
13234 .nr(8)
13235 .kr(1)
13236 .sr(1)
13237 .m(m)
13238 .n(n)
13239 .k(k)
13240 .iterations(1)
13241 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13242 }
13243 }
13244 }
13245 }
13246
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,k_div_4)13247 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_div_4) {
13248 TEST_REQUIRES_X86_SSE2;
13249 for (size_t k = 8; k <= 40; k += 4) {
13250 GemmMicrokernelTester()
13251 .mr(3)
13252 .nr(8)
13253 .kr(1)
13254 .sr(1)
13255 .m(3)
13256 .n(8)
13257 .k(k)
13258 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13259 }
13260 }
13261
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,k_div_4_strided_a)13262 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_div_4_strided_a) {
13263 TEST_REQUIRES_X86_SSE2;
13264 for (size_t k = 8; k <= 40; k += 4) {
13265 GemmMicrokernelTester()
13266 .mr(3)
13267 .nr(8)
13268 .kr(1)
13269 .sr(1)
13270 .m(3)
13271 .n(8)
13272 .k(k)
13273 .a_stride(43)
13274 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13275 }
13276 }
13277
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,k_div_4_subtile)13278 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_div_4_subtile) {
13279 TEST_REQUIRES_X86_SSE2;
13280 for (size_t k = 8; k <= 40; k += 4) {
13281 for (uint32_t n = 1; n <= 8; n++) {
13282 for (uint32_t m = 1; m <= 3; m++) {
13283 GemmMicrokernelTester()
13284 .mr(3)
13285 .nr(8)
13286 .kr(1)
13287 .sr(1)
13288 .m(m)
13289 .n(n)
13290 .k(k)
13291 .iterations(1)
13292 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13293 }
13294 }
13295 }
13296 }
13297
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,n_gt_8)13298 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, n_gt_8) {
13299 TEST_REQUIRES_X86_SSE2;
13300 for (uint32_t n = 9; n < 16; n++) {
13301 for (size_t k = 1; k <= 20; k += 5) {
13302 GemmMicrokernelTester()
13303 .mr(3)
13304 .nr(8)
13305 .kr(1)
13306 .sr(1)
13307 .m(3)
13308 .n(n)
13309 .k(k)
13310 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13311 }
13312 }
13313 }
13314
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,n_gt_8_strided_cn)13315 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, n_gt_8_strided_cn) {
13316 TEST_REQUIRES_X86_SSE2;
13317 for (uint32_t n = 9; n < 16; n++) {
13318 for (size_t k = 1; k <= 20; k += 5) {
13319 GemmMicrokernelTester()
13320 .mr(3)
13321 .nr(8)
13322 .kr(1)
13323 .sr(1)
13324 .m(3)
13325 .n(n)
13326 .k(k)
13327 .cn_stride(11)
13328 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13329 }
13330 }
13331 }
13332
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,n_gt_8_strided_a)13333 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, n_gt_8_strided_a) {
13334 TEST_REQUIRES_X86_SSE2;
13335 for (uint32_t n = 9; n < 16; n++) {
13336 for (size_t k = 1; k <= 20; k += 5) {
13337 GemmMicrokernelTester()
13338 .mr(3)
13339 .nr(8)
13340 .kr(1)
13341 .sr(1)
13342 .m(3)
13343 .n(n)
13344 .k(k)
13345 .a_stride(23)
13346 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13347 }
13348 }
13349 }
13350
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,n_gt_8_subtile)13351 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, n_gt_8_subtile) {
13352 TEST_REQUIRES_X86_SSE2;
13353 for (uint32_t n = 9; n < 16; n++) {
13354 for (size_t k = 1; k <= 20; k += 5) {
13355 for (uint32_t m = 1; m <= 3; m++) {
13356 GemmMicrokernelTester()
13357 .mr(3)
13358 .nr(8)
13359 .kr(1)
13360 .sr(1)
13361 .m(m)
13362 .n(n)
13363 .k(k)
13364 .iterations(1)
13365 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13366 }
13367 }
13368 }
13369 }
13370
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,n_div_8)13371 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, n_div_8) {
13372 TEST_REQUIRES_X86_SSE2;
13373 for (uint32_t n = 16; n <= 24; n += 8) {
13374 for (size_t k = 1; k <= 20; k += 5) {
13375 GemmMicrokernelTester()
13376 .mr(3)
13377 .nr(8)
13378 .kr(1)
13379 .sr(1)
13380 .m(3)
13381 .n(n)
13382 .k(k)
13383 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13384 }
13385 }
13386 }
13387
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,n_div_8_strided_cn)13388 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, n_div_8_strided_cn) {
13389 TEST_REQUIRES_X86_SSE2;
13390 for (uint32_t n = 16; n <= 24; n += 8) {
13391 for (size_t k = 1; k <= 20; k += 5) {
13392 GemmMicrokernelTester()
13393 .mr(3)
13394 .nr(8)
13395 .kr(1)
13396 .sr(1)
13397 .m(3)
13398 .n(n)
13399 .k(k)
13400 .cn_stride(11)
13401 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13402 }
13403 }
13404 }
13405
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,n_div_8_strided_a)13406 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, n_div_8_strided_a) {
13407 TEST_REQUIRES_X86_SSE2;
13408 for (uint32_t n = 16; n <= 24; n += 8) {
13409 for (size_t k = 1; k <= 20; k += 5) {
13410 GemmMicrokernelTester()
13411 .mr(3)
13412 .nr(8)
13413 .kr(1)
13414 .sr(1)
13415 .m(3)
13416 .n(n)
13417 .k(k)
13418 .a_stride(23)
13419 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13420 }
13421 }
13422 }
13423
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,n_div_8_subtile)13424 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, n_div_8_subtile) {
13425 TEST_REQUIRES_X86_SSE2;
13426 for (uint32_t n = 16; n <= 24; n += 8) {
13427 for (size_t k = 1; k <= 20; k += 5) {
13428 for (uint32_t m = 1; m <= 3; m++) {
13429 GemmMicrokernelTester()
13430 .mr(3)
13431 .nr(8)
13432 .kr(1)
13433 .sr(1)
13434 .m(m)
13435 .n(n)
13436 .k(k)
13437 .iterations(1)
13438 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13439 }
13440 }
13441 }
13442 }
13443
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,strided_cm_subtile)13444 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, strided_cm_subtile) {
13445 TEST_REQUIRES_X86_SSE2;
13446 for (size_t k = 1; k <= 20; k += 5) {
13447 for (uint32_t n = 1; n <= 8; n++) {
13448 for (uint32_t m = 1; m <= 3; m++) {
13449 GemmMicrokernelTester()
13450 .mr(3)
13451 .nr(8)
13452 .kr(1)
13453 .sr(1)
13454 .m(m)
13455 .n(n)
13456 .k(k)
13457 .cm_stride(11)
13458 .iterations(1)
13459 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13460 }
13461 }
13462 }
13463 }
13464
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,qmin)13465 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, qmin) {
13466 TEST_REQUIRES_X86_SSE2;
13467 GemmMicrokernelTester()
13468 .mr(3)
13469 .nr(8)
13470 .kr(1)
13471 .sr(1)
13472 .m(3)
13473 .n(8)
13474 .k(4)
13475 .qmin(128)
13476 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13477 }
13478
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,qmax)13479 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, qmax) {
13480 TEST_REQUIRES_X86_SSE2;
13481 GemmMicrokernelTester()
13482 .mr(3)
13483 .nr(8)
13484 .kr(1)
13485 .sr(1)
13486 .m(3)
13487 .n(8)
13488 .k(4)
13489 .qmax(128)
13490 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13491 }
13492
TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP,strided_cm)13493 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, strided_cm) {
13494 TEST_REQUIRES_X86_SSE2;
13495 GemmMicrokernelTester()
13496 .mr(3)
13497 .nr(8)
13498 .kr(1)
13499 .sr(1)
13500 .m(3)
13501 .n(8)
13502 .k(4)
13503 .cm_stride(11)
13504 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
13505 }
13506 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13507
13508
13509 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,k_eq_4)13510 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_eq_4) {
13511 TEST_REQUIRES_X86_SSE;
13512 GemmMicrokernelTester()
13513 .mr(3)
13514 .nr(8)
13515 .kr(1)
13516 .sr(4)
13517 .m(3)
13518 .n(8)
13519 .k(4)
13520 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13521 }
13522
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,strided_cn)13523 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, strided_cn) {
13524 TEST_REQUIRES_X86_SSE;
13525 GemmMicrokernelTester()
13526 .mr(3)
13527 .nr(8)
13528 .kr(1)
13529 .sr(4)
13530 .m(3)
13531 .n(8)
13532 .k(4)
13533 .cn_stride(11)
13534 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13535 }
13536
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,k_eq_4_strided_a)13537 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_eq_4_strided_a) {
13538 TEST_REQUIRES_X86_SSE;
13539 GemmMicrokernelTester()
13540 .mr(3)
13541 .nr(8)
13542 .kr(1)
13543 .sr(4)
13544 .m(3)
13545 .n(8)
13546 .k(4)
13547 .a_stride(7)
13548 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13549 }
13550
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,k_eq_4_subtile)13551 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_eq_4_subtile) {
13552 TEST_REQUIRES_X86_SSE;
13553 for (uint32_t n = 1; n <= 8; n++) {
13554 for (uint32_t m = 1; m <= 3; m++) {
13555 GemmMicrokernelTester()
13556 .mr(3)
13557 .nr(8)
13558 .kr(1)
13559 .sr(4)
13560 .m(m)
13561 .n(n)
13562 .k(4)
13563 .iterations(1)
13564 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13565 }
13566 }
13567 }
13568
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,k_eq_4_subtile_m)13569 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_eq_4_subtile_m) {
13570 TEST_REQUIRES_X86_SSE;
13571 for (uint32_t m = 1; m <= 3; m++) {
13572 GemmMicrokernelTester()
13573 .mr(3)
13574 .nr(8)
13575 .kr(1)
13576 .sr(4)
13577 .m(m)
13578 .n(8)
13579 .k(4)
13580 .iterations(1)
13581 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13582 }
13583 }
13584
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,k_eq_4_subtile_n)13585 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_eq_4_subtile_n) {
13586 TEST_REQUIRES_X86_SSE;
13587 for (uint32_t n = 1; n <= 8; n++) {
13588 GemmMicrokernelTester()
13589 .mr(3)
13590 .nr(8)
13591 .kr(1)
13592 .sr(4)
13593 .m(3)
13594 .n(n)
13595 .k(4)
13596 .iterations(1)
13597 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13598 }
13599 }
13600
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,k_lt_4)13601 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_lt_4) {
13602 TEST_REQUIRES_X86_SSE;
13603 for (size_t k = 1; k < 4; k++) {
13604 GemmMicrokernelTester()
13605 .mr(3)
13606 .nr(8)
13607 .kr(1)
13608 .sr(4)
13609 .m(3)
13610 .n(8)
13611 .k(k)
13612 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13613 }
13614 }
13615
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,k_lt_4_strided_a)13616 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_lt_4_strided_a) {
13617 TEST_REQUIRES_X86_SSE;
13618 for (size_t k = 1; k < 4; k++) {
13619 GemmMicrokernelTester()
13620 .mr(3)
13621 .nr(8)
13622 .kr(1)
13623 .sr(4)
13624 .m(3)
13625 .n(8)
13626 .k(k)
13627 .a_stride(7)
13628 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13629 }
13630 }
13631
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,k_lt_4_subtile)13632 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_lt_4_subtile) {
13633 TEST_REQUIRES_X86_SSE;
13634 for (size_t k = 1; k < 4; k++) {
13635 for (uint32_t n = 1; n <= 8; n++) {
13636 for (uint32_t m = 1; m <= 3; m++) {
13637 GemmMicrokernelTester()
13638 .mr(3)
13639 .nr(8)
13640 .kr(1)
13641 .sr(4)
13642 .m(m)
13643 .n(n)
13644 .k(k)
13645 .iterations(1)
13646 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13647 }
13648 }
13649 }
13650 }
13651
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,k_gt_4)13652 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_gt_4) {
13653 TEST_REQUIRES_X86_SSE;
13654 for (size_t k = 5; k < 8; k++) {
13655 GemmMicrokernelTester()
13656 .mr(3)
13657 .nr(8)
13658 .kr(1)
13659 .sr(4)
13660 .m(3)
13661 .n(8)
13662 .k(k)
13663 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13664 }
13665 }
13666
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,k_gt_4_strided_a)13667 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_gt_4_strided_a) {
13668 TEST_REQUIRES_X86_SSE;
13669 for (size_t k = 5; k < 8; k++) {
13670 GemmMicrokernelTester()
13671 .mr(3)
13672 .nr(8)
13673 .kr(1)
13674 .sr(4)
13675 .m(3)
13676 .n(8)
13677 .k(k)
13678 .a_stride(11)
13679 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13680 }
13681 }
13682
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,k_gt_4_subtile)13683 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_gt_4_subtile) {
13684 TEST_REQUIRES_X86_SSE;
13685 for (size_t k = 5; k < 8; k++) {
13686 for (uint32_t n = 1; n <= 8; n++) {
13687 for (uint32_t m = 1; m <= 3; m++) {
13688 GemmMicrokernelTester()
13689 .mr(3)
13690 .nr(8)
13691 .kr(1)
13692 .sr(4)
13693 .m(m)
13694 .n(n)
13695 .k(k)
13696 .iterations(1)
13697 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13698 }
13699 }
13700 }
13701 }
13702
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,k_div_4)13703 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_div_4) {
13704 TEST_REQUIRES_X86_SSE;
13705 for (size_t k = 8; k <= 40; k += 4) {
13706 GemmMicrokernelTester()
13707 .mr(3)
13708 .nr(8)
13709 .kr(1)
13710 .sr(4)
13711 .m(3)
13712 .n(8)
13713 .k(k)
13714 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13715 }
13716 }
13717
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,k_div_4_strided_a)13718 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_div_4_strided_a) {
13719 TEST_REQUIRES_X86_SSE;
13720 for (size_t k = 8; k <= 40; k += 4) {
13721 GemmMicrokernelTester()
13722 .mr(3)
13723 .nr(8)
13724 .kr(1)
13725 .sr(4)
13726 .m(3)
13727 .n(8)
13728 .k(k)
13729 .a_stride(43)
13730 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13731 }
13732 }
13733
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,k_div_4_subtile)13734 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_div_4_subtile) {
13735 TEST_REQUIRES_X86_SSE;
13736 for (size_t k = 8; k <= 40; k += 4) {
13737 for (uint32_t n = 1; n <= 8; n++) {
13738 for (uint32_t m = 1; m <= 3; m++) {
13739 GemmMicrokernelTester()
13740 .mr(3)
13741 .nr(8)
13742 .kr(1)
13743 .sr(4)
13744 .m(m)
13745 .n(n)
13746 .k(k)
13747 .iterations(1)
13748 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13749 }
13750 }
13751 }
13752 }
13753
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,n_gt_8)13754 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, n_gt_8) {
13755 TEST_REQUIRES_X86_SSE;
13756 for (uint32_t n = 9; n < 16; n++) {
13757 for (size_t k = 1; k <= 20; k += 5) {
13758 GemmMicrokernelTester()
13759 .mr(3)
13760 .nr(8)
13761 .kr(1)
13762 .sr(4)
13763 .m(3)
13764 .n(n)
13765 .k(k)
13766 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13767 }
13768 }
13769 }
13770
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,n_gt_8_strided_cn)13771 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, n_gt_8_strided_cn) {
13772 TEST_REQUIRES_X86_SSE;
13773 for (uint32_t n = 9; n < 16; n++) {
13774 for (size_t k = 1; k <= 20; k += 5) {
13775 GemmMicrokernelTester()
13776 .mr(3)
13777 .nr(8)
13778 .kr(1)
13779 .sr(4)
13780 .m(3)
13781 .n(n)
13782 .k(k)
13783 .cn_stride(11)
13784 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13785 }
13786 }
13787 }
13788
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,n_gt_8_strided_a)13789 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, n_gt_8_strided_a) {
13790 TEST_REQUIRES_X86_SSE;
13791 for (uint32_t n = 9; n < 16; n++) {
13792 for (size_t k = 1; k <= 20; k += 5) {
13793 GemmMicrokernelTester()
13794 .mr(3)
13795 .nr(8)
13796 .kr(1)
13797 .sr(4)
13798 .m(3)
13799 .n(n)
13800 .k(k)
13801 .a_stride(23)
13802 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13803 }
13804 }
13805 }
13806
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,n_gt_8_subtile)13807 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, n_gt_8_subtile) {
13808 TEST_REQUIRES_X86_SSE;
13809 for (uint32_t n = 9; n < 16; n++) {
13810 for (size_t k = 1; k <= 20; k += 5) {
13811 for (uint32_t m = 1; m <= 3; m++) {
13812 GemmMicrokernelTester()
13813 .mr(3)
13814 .nr(8)
13815 .kr(1)
13816 .sr(4)
13817 .m(m)
13818 .n(n)
13819 .k(k)
13820 .iterations(1)
13821 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13822 }
13823 }
13824 }
13825 }
13826
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,n_div_8)13827 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, n_div_8) {
13828 TEST_REQUIRES_X86_SSE;
13829 for (uint32_t n = 16; n <= 24; n += 8) {
13830 for (size_t k = 1; k <= 20; k += 5) {
13831 GemmMicrokernelTester()
13832 .mr(3)
13833 .nr(8)
13834 .kr(1)
13835 .sr(4)
13836 .m(3)
13837 .n(n)
13838 .k(k)
13839 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13840 }
13841 }
13842 }
13843
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,n_div_8_strided_cn)13844 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, n_div_8_strided_cn) {
13845 TEST_REQUIRES_X86_SSE;
13846 for (uint32_t n = 16; n <= 24; n += 8) {
13847 for (size_t k = 1; k <= 20; k += 5) {
13848 GemmMicrokernelTester()
13849 .mr(3)
13850 .nr(8)
13851 .kr(1)
13852 .sr(4)
13853 .m(3)
13854 .n(n)
13855 .k(k)
13856 .cn_stride(11)
13857 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13858 }
13859 }
13860 }
13861
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,n_div_8_strided_a)13862 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, n_div_8_strided_a) {
13863 TEST_REQUIRES_X86_SSE;
13864 for (uint32_t n = 16; n <= 24; n += 8) {
13865 for (size_t k = 1; k <= 20; k += 5) {
13866 GemmMicrokernelTester()
13867 .mr(3)
13868 .nr(8)
13869 .kr(1)
13870 .sr(4)
13871 .m(3)
13872 .n(n)
13873 .k(k)
13874 .a_stride(23)
13875 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13876 }
13877 }
13878 }
13879
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,n_div_8_subtile)13880 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, n_div_8_subtile) {
13881 TEST_REQUIRES_X86_SSE;
13882 for (uint32_t n = 16; n <= 24; n += 8) {
13883 for (size_t k = 1; k <= 20; k += 5) {
13884 for (uint32_t m = 1; m <= 3; m++) {
13885 GemmMicrokernelTester()
13886 .mr(3)
13887 .nr(8)
13888 .kr(1)
13889 .sr(4)
13890 .m(m)
13891 .n(n)
13892 .k(k)
13893 .iterations(1)
13894 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13895 }
13896 }
13897 }
13898 }
13899
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,strided_cm_subtile)13900 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, strided_cm_subtile) {
13901 TEST_REQUIRES_X86_SSE;
13902 for (size_t k = 1; k <= 20; k += 5) {
13903 for (uint32_t n = 1; n <= 8; n++) {
13904 for (uint32_t m = 1; m <= 3; m++) {
13905 GemmMicrokernelTester()
13906 .mr(3)
13907 .nr(8)
13908 .kr(1)
13909 .sr(4)
13910 .m(m)
13911 .n(n)
13912 .k(k)
13913 .cm_stride(11)
13914 .iterations(1)
13915 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13916 }
13917 }
13918 }
13919 }
13920
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,qmin)13921 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, qmin) {
13922 TEST_REQUIRES_X86_SSE;
13923 GemmMicrokernelTester()
13924 .mr(3)
13925 .nr(8)
13926 .kr(1)
13927 .sr(4)
13928 .m(3)
13929 .n(8)
13930 .k(4)
13931 .qmin(128)
13932 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13933 }
13934
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,qmax)13935 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, qmax) {
13936 TEST_REQUIRES_X86_SSE;
13937 GemmMicrokernelTester()
13938 .mr(3)
13939 .nr(8)
13940 .kr(1)
13941 .sr(4)
13942 .m(3)
13943 .n(8)
13944 .k(4)
13945 .qmax(128)
13946 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13947 }
13948
TEST(F32_GEMMINC_MINMAX_3X8S4__SSE,strided_cm)13949 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, strided_cm) {
13950 TEST_REQUIRES_X86_SSE;
13951 GemmMicrokernelTester()
13952 .mr(3)
13953 .nr(8)
13954 .kr(1)
13955 .sr(4)
13956 .m(3)
13957 .n(8)
13958 .k(4)
13959 .cm_stride(11)
13960 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
13961 }
13962 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13963
13964
13965 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,k_eq_4)13966 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_eq_4) {
13967 TEST_REQUIRES_X86_SSE;
13968 GemmMicrokernelTester()
13969 .mr(4)
13970 .nr(8)
13971 .kr(1)
13972 .sr(1)
13973 .m(4)
13974 .n(8)
13975 .k(4)
13976 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
13977 }
13978
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,strided_cn)13979 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, strided_cn) {
13980 TEST_REQUIRES_X86_SSE;
13981 GemmMicrokernelTester()
13982 .mr(4)
13983 .nr(8)
13984 .kr(1)
13985 .sr(1)
13986 .m(4)
13987 .n(8)
13988 .k(4)
13989 .cn_stride(11)
13990 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
13991 }
13992
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,k_eq_4_strided_a)13993 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_eq_4_strided_a) {
13994 TEST_REQUIRES_X86_SSE;
13995 GemmMicrokernelTester()
13996 .mr(4)
13997 .nr(8)
13998 .kr(1)
13999 .sr(1)
14000 .m(4)
14001 .n(8)
14002 .k(4)
14003 .a_stride(7)
14004 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14005 }
14006
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,k_eq_4_subtile)14007 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_eq_4_subtile) {
14008 TEST_REQUIRES_X86_SSE;
14009 for (uint32_t n = 1; n <= 8; n++) {
14010 for (uint32_t m = 1; m <= 4; m++) {
14011 GemmMicrokernelTester()
14012 .mr(4)
14013 .nr(8)
14014 .kr(1)
14015 .sr(1)
14016 .m(m)
14017 .n(n)
14018 .k(4)
14019 .iterations(1)
14020 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14021 }
14022 }
14023 }
14024
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,k_eq_4_subtile_m)14025 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_eq_4_subtile_m) {
14026 TEST_REQUIRES_X86_SSE;
14027 for (uint32_t m = 1; m <= 4; m++) {
14028 GemmMicrokernelTester()
14029 .mr(4)
14030 .nr(8)
14031 .kr(1)
14032 .sr(1)
14033 .m(m)
14034 .n(8)
14035 .k(4)
14036 .iterations(1)
14037 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14038 }
14039 }
14040
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,k_eq_4_subtile_n)14041 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_eq_4_subtile_n) {
14042 TEST_REQUIRES_X86_SSE;
14043 for (uint32_t n = 1; n <= 8; n++) {
14044 GemmMicrokernelTester()
14045 .mr(4)
14046 .nr(8)
14047 .kr(1)
14048 .sr(1)
14049 .m(4)
14050 .n(n)
14051 .k(4)
14052 .iterations(1)
14053 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14054 }
14055 }
14056
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,k_lt_4)14057 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_lt_4) {
14058 TEST_REQUIRES_X86_SSE;
14059 for (size_t k = 1; k < 4; k++) {
14060 GemmMicrokernelTester()
14061 .mr(4)
14062 .nr(8)
14063 .kr(1)
14064 .sr(1)
14065 .m(4)
14066 .n(8)
14067 .k(k)
14068 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14069 }
14070 }
14071
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,k_lt_4_strided_a)14072 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_lt_4_strided_a) {
14073 TEST_REQUIRES_X86_SSE;
14074 for (size_t k = 1; k < 4; k++) {
14075 GemmMicrokernelTester()
14076 .mr(4)
14077 .nr(8)
14078 .kr(1)
14079 .sr(1)
14080 .m(4)
14081 .n(8)
14082 .k(k)
14083 .a_stride(7)
14084 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14085 }
14086 }
14087
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,k_lt_4_subtile)14088 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_lt_4_subtile) {
14089 TEST_REQUIRES_X86_SSE;
14090 for (size_t k = 1; k < 4; k++) {
14091 for (uint32_t n = 1; n <= 8; n++) {
14092 for (uint32_t m = 1; m <= 4; m++) {
14093 GemmMicrokernelTester()
14094 .mr(4)
14095 .nr(8)
14096 .kr(1)
14097 .sr(1)
14098 .m(m)
14099 .n(n)
14100 .k(k)
14101 .iterations(1)
14102 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14103 }
14104 }
14105 }
14106 }
14107
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,k_gt_4)14108 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_gt_4) {
14109 TEST_REQUIRES_X86_SSE;
14110 for (size_t k = 5; k < 8; k++) {
14111 GemmMicrokernelTester()
14112 .mr(4)
14113 .nr(8)
14114 .kr(1)
14115 .sr(1)
14116 .m(4)
14117 .n(8)
14118 .k(k)
14119 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14120 }
14121 }
14122
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,k_gt_4_strided_a)14123 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_gt_4_strided_a) {
14124 TEST_REQUIRES_X86_SSE;
14125 for (size_t k = 5; k < 8; k++) {
14126 GemmMicrokernelTester()
14127 .mr(4)
14128 .nr(8)
14129 .kr(1)
14130 .sr(1)
14131 .m(4)
14132 .n(8)
14133 .k(k)
14134 .a_stride(11)
14135 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14136 }
14137 }
14138
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,k_gt_4_subtile)14139 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_gt_4_subtile) {
14140 TEST_REQUIRES_X86_SSE;
14141 for (size_t k = 5; k < 8; k++) {
14142 for (uint32_t n = 1; n <= 8; n++) {
14143 for (uint32_t m = 1; m <= 4; m++) {
14144 GemmMicrokernelTester()
14145 .mr(4)
14146 .nr(8)
14147 .kr(1)
14148 .sr(1)
14149 .m(m)
14150 .n(n)
14151 .k(k)
14152 .iterations(1)
14153 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14154 }
14155 }
14156 }
14157 }
14158
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,k_div_4)14159 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_div_4) {
14160 TEST_REQUIRES_X86_SSE;
14161 for (size_t k = 8; k <= 40; k += 4) {
14162 GemmMicrokernelTester()
14163 .mr(4)
14164 .nr(8)
14165 .kr(1)
14166 .sr(1)
14167 .m(4)
14168 .n(8)
14169 .k(k)
14170 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14171 }
14172 }
14173
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,k_div_4_strided_a)14174 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_div_4_strided_a) {
14175 TEST_REQUIRES_X86_SSE;
14176 for (size_t k = 8; k <= 40; k += 4) {
14177 GemmMicrokernelTester()
14178 .mr(4)
14179 .nr(8)
14180 .kr(1)
14181 .sr(1)
14182 .m(4)
14183 .n(8)
14184 .k(k)
14185 .a_stride(43)
14186 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14187 }
14188 }
14189
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,k_div_4_subtile)14190 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_div_4_subtile) {
14191 TEST_REQUIRES_X86_SSE;
14192 for (size_t k = 8; k <= 40; k += 4) {
14193 for (uint32_t n = 1; n <= 8; n++) {
14194 for (uint32_t m = 1; m <= 4; m++) {
14195 GemmMicrokernelTester()
14196 .mr(4)
14197 .nr(8)
14198 .kr(1)
14199 .sr(1)
14200 .m(m)
14201 .n(n)
14202 .k(k)
14203 .iterations(1)
14204 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14205 }
14206 }
14207 }
14208 }
14209
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,n_gt_8)14210 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, n_gt_8) {
14211 TEST_REQUIRES_X86_SSE;
14212 for (uint32_t n = 9; n < 16; n++) {
14213 for (size_t k = 1; k <= 20; k += 5) {
14214 GemmMicrokernelTester()
14215 .mr(4)
14216 .nr(8)
14217 .kr(1)
14218 .sr(1)
14219 .m(4)
14220 .n(n)
14221 .k(k)
14222 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14223 }
14224 }
14225 }
14226
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,n_gt_8_strided_cn)14227 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, n_gt_8_strided_cn) {
14228 TEST_REQUIRES_X86_SSE;
14229 for (uint32_t n = 9; n < 16; n++) {
14230 for (size_t k = 1; k <= 20; k += 5) {
14231 GemmMicrokernelTester()
14232 .mr(4)
14233 .nr(8)
14234 .kr(1)
14235 .sr(1)
14236 .m(4)
14237 .n(n)
14238 .k(k)
14239 .cn_stride(11)
14240 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14241 }
14242 }
14243 }
14244
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,n_gt_8_strided_a)14245 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, n_gt_8_strided_a) {
14246 TEST_REQUIRES_X86_SSE;
14247 for (uint32_t n = 9; n < 16; n++) {
14248 for (size_t k = 1; k <= 20; k += 5) {
14249 GemmMicrokernelTester()
14250 .mr(4)
14251 .nr(8)
14252 .kr(1)
14253 .sr(1)
14254 .m(4)
14255 .n(n)
14256 .k(k)
14257 .a_stride(23)
14258 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14259 }
14260 }
14261 }
14262
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,n_gt_8_subtile)14263 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, n_gt_8_subtile) {
14264 TEST_REQUIRES_X86_SSE;
14265 for (uint32_t n = 9; n < 16; n++) {
14266 for (size_t k = 1; k <= 20; k += 5) {
14267 for (uint32_t m = 1; m <= 4; m++) {
14268 GemmMicrokernelTester()
14269 .mr(4)
14270 .nr(8)
14271 .kr(1)
14272 .sr(1)
14273 .m(m)
14274 .n(n)
14275 .k(k)
14276 .iterations(1)
14277 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14278 }
14279 }
14280 }
14281 }
14282
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,n_div_8)14283 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, n_div_8) {
14284 TEST_REQUIRES_X86_SSE;
14285 for (uint32_t n = 16; n <= 24; n += 8) {
14286 for (size_t k = 1; k <= 20; k += 5) {
14287 GemmMicrokernelTester()
14288 .mr(4)
14289 .nr(8)
14290 .kr(1)
14291 .sr(1)
14292 .m(4)
14293 .n(n)
14294 .k(k)
14295 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14296 }
14297 }
14298 }
14299
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,n_div_8_strided_cn)14300 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, n_div_8_strided_cn) {
14301 TEST_REQUIRES_X86_SSE;
14302 for (uint32_t n = 16; n <= 24; n += 8) {
14303 for (size_t k = 1; k <= 20; k += 5) {
14304 GemmMicrokernelTester()
14305 .mr(4)
14306 .nr(8)
14307 .kr(1)
14308 .sr(1)
14309 .m(4)
14310 .n(n)
14311 .k(k)
14312 .cn_stride(11)
14313 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14314 }
14315 }
14316 }
14317
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,n_div_8_strided_a)14318 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, n_div_8_strided_a) {
14319 TEST_REQUIRES_X86_SSE;
14320 for (uint32_t n = 16; n <= 24; n += 8) {
14321 for (size_t k = 1; k <= 20; k += 5) {
14322 GemmMicrokernelTester()
14323 .mr(4)
14324 .nr(8)
14325 .kr(1)
14326 .sr(1)
14327 .m(4)
14328 .n(n)
14329 .k(k)
14330 .a_stride(23)
14331 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14332 }
14333 }
14334 }
14335
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,n_div_8_subtile)14336 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, n_div_8_subtile) {
14337 TEST_REQUIRES_X86_SSE;
14338 for (uint32_t n = 16; n <= 24; n += 8) {
14339 for (size_t k = 1; k <= 20; k += 5) {
14340 for (uint32_t m = 1; m <= 4; m++) {
14341 GemmMicrokernelTester()
14342 .mr(4)
14343 .nr(8)
14344 .kr(1)
14345 .sr(1)
14346 .m(m)
14347 .n(n)
14348 .k(k)
14349 .iterations(1)
14350 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14351 }
14352 }
14353 }
14354 }
14355
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,strided_cm_subtile)14356 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, strided_cm_subtile) {
14357 TEST_REQUIRES_X86_SSE;
14358 for (size_t k = 1; k <= 20; k += 5) {
14359 for (uint32_t n = 1; n <= 8; n++) {
14360 for (uint32_t m = 1; m <= 4; m++) {
14361 GemmMicrokernelTester()
14362 .mr(4)
14363 .nr(8)
14364 .kr(1)
14365 .sr(1)
14366 .m(m)
14367 .n(n)
14368 .k(k)
14369 .cm_stride(11)
14370 .iterations(1)
14371 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14372 }
14373 }
14374 }
14375 }
14376
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,qmin)14377 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, qmin) {
14378 TEST_REQUIRES_X86_SSE;
14379 GemmMicrokernelTester()
14380 .mr(4)
14381 .nr(8)
14382 .kr(1)
14383 .sr(1)
14384 .m(4)
14385 .n(8)
14386 .k(4)
14387 .qmin(128)
14388 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14389 }
14390
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,qmax)14391 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, qmax) {
14392 TEST_REQUIRES_X86_SSE;
14393 GemmMicrokernelTester()
14394 .mr(4)
14395 .nr(8)
14396 .kr(1)
14397 .sr(1)
14398 .m(4)
14399 .n(8)
14400 .k(4)
14401 .qmax(128)
14402 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14403 }
14404
TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP,strided_cm)14405 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, strided_cm) {
14406 TEST_REQUIRES_X86_SSE;
14407 GemmMicrokernelTester()
14408 .mr(4)
14409 .nr(8)
14410 .kr(1)
14411 .sr(1)
14412 .m(4)
14413 .n(8)
14414 .k(4)
14415 .cm_stride(11)
14416 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
14417 }
14418 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14419
14420
14421 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,k_eq_4)14422 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_eq_4) {
14423 TEST_REQUIRES_X86_SSE2;
14424 GemmMicrokernelTester()
14425 .mr(4)
14426 .nr(8)
14427 .kr(1)
14428 .sr(1)
14429 .m(4)
14430 .n(8)
14431 .k(4)
14432 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14433 }
14434
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,strided_cn)14435 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, strided_cn) {
14436 TEST_REQUIRES_X86_SSE2;
14437 GemmMicrokernelTester()
14438 .mr(4)
14439 .nr(8)
14440 .kr(1)
14441 .sr(1)
14442 .m(4)
14443 .n(8)
14444 .k(4)
14445 .cn_stride(11)
14446 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14447 }
14448
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,k_eq_4_strided_a)14449 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_eq_4_strided_a) {
14450 TEST_REQUIRES_X86_SSE2;
14451 GemmMicrokernelTester()
14452 .mr(4)
14453 .nr(8)
14454 .kr(1)
14455 .sr(1)
14456 .m(4)
14457 .n(8)
14458 .k(4)
14459 .a_stride(7)
14460 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14461 }
14462
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,k_eq_4_subtile)14463 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_eq_4_subtile) {
14464 TEST_REQUIRES_X86_SSE2;
14465 for (uint32_t n = 1; n <= 8; n++) {
14466 for (uint32_t m = 1; m <= 4; m++) {
14467 GemmMicrokernelTester()
14468 .mr(4)
14469 .nr(8)
14470 .kr(1)
14471 .sr(1)
14472 .m(m)
14473 .n(n)
14474 .k(4)
14475 .iterations(1)
14476 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14477 }
14478 }
14479 }
14480
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,k_eq_4_subtile_m)14481 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_eq_4_subtile_m) {
14482 TEST_REQUIRES_X86_SSE2;
14483 for (uint32_t m = 1; m <= 4; m++) {
14484 GemmMicrokernelTester()
14485 .mr(4)
14486 .nr(8)
14487 .kr(1)
14488 .sr(1)
14489 .m(m)
14490 .n(8)
14491 .k(4)
14492 .iterations(1)
14493 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14494 }
14495 }
14496
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,k_eq_4_subtile_n)14497 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_eq_4_subtile_n) {
14498 TEST_REQUIRES_X86_SSE2;
14499 for (uint32_t n = 1; n <= 8; n++) {
14500 GemmMicrokernelTester()
14501 .mr(4)
14502 .nr(8)
14503 .kr(1)
14504 .sr(1)
14505 .m(4)
14506 .n(n)
14507 .k(4)
14508 .iterations(1)
14509 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14510 }
14511 }
14512
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,k_lt_4)14513 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_lt_4) {
14514 TEST_REQUIRES_X86_SSE2;
14515 for (size_t k = 1; k < 4; k++) {
14516 GemmMicrokernelTester()
14517 .mr(4)
14518 .nr(8)
14519 .kr(1)
14520 .sr(1)
14521 .m(4)
14522 .n(8)
14523 .k(k)
14524 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14525 }
14526 }
14527
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,k_lt_4_strided_a)14528 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_lt_4_strided_a) {
14529 TEST_REQUIRES_X86_SSE2;
14530 for (size_t k = 1; k < 4; k++) {
14531 GemmMicrokernelTester()
14532 .mr(4)
14533 .nr(8)
14534 .kr(1)
14535 .sr(1)
14536 .m(4)
14537 .n(8)
14538 .k(k)
14539 .a_stride(7)
14540 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14541 }
14542 }
14543
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,k_lt_4_subtile)14544 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_lt_4_subtile) {
14545 TEST_REQUIRES_X86_SSE2;
14546 for (size_t k = 1; k < 4; k++) {
14547 for (uint32_t n = 1; n <= 8; n++) {
14548 for (uint32_t m = 1; m <= 4; m++) {
14549 GemmMicrokernelTester()
14550 .mr(4)
14551 .nr(8)
14552 .kr(1)
14553 .sr(1)
14554 .m(m)
14555 .n(n)
14556 .k(k)
14557 .iterations(1)
14558 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14559 }
14560 }
14561 }
14562 }
14563
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,k_gt_4)14564 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_gt_4) {
14565 TEST_REQUIRES_X86_SSE2;
14566 for (size_t k = 5; k < 8; k++) {
14567 GemmMicrokernelTester()
14568 .mr(4)
14569 .nr(8)
14570 .kr(1)
14571 .sr(1)
14572 .m(4)
14573 .n(8)
14574 .k(k)
14575 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14576 }
14577 }
14578
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,k_gt_4_strided_a)14579 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_gt_4_strided_a) {
14580 TEST_REQUIRES_X86_SSE2;
14581 for (size_t k = 5; k < 8; k++) {
14582 GemmMicrokernelTester()
14583 .mr(4)
14584 .nr(8)
14585 .kr(1)
14586 .sr(1)
14587 .m(4)
14588 .n(8)
14589 .k(k)
14590 .a_stride(11)
14591 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14592 }
14593 }
14594
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,k_gt_4_subtile)14595 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_gt_4_subtile) {
14596 TEST_REQUIRES_X86_SSE2;
14597 for (size_t k = 5; k < 8; k++) {
14598 for (uint32_t n = 1; n <= 8; n++) {
14599 for (uint32_t m = 1; m <= 4; m++) {
14600 GemmMicrokernelTester()
14601 .mr(4)
14602 .nr(8)
14603 .kr(1)
14604 .sr(1)
14605 .m(m)
14606 .n(n)
14607 .k(k)
14608 .iterations(1)
14609 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14610 }
14611 }
14612 }
14613 }
14614
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,k_div_4)14615 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_div_4) {
14616 TEST_REQUIRES_X86_SSE2;
14617 for (size_t k = 8; k <= 40; k += 4) {
14618 GemmMicrokernelTester()
14619 .mr(4)
14620 .nr(8)
14621 .kr(1)
14622 .sr(1)
14623 .m(4)
14624 .n(8)
14625 .k(k)
14626 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14627 }
14628 }
14629
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,k_div_4_strided_a)14630 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_div_4_strided_a) {
14631 TEST_REQUIRES_X86_SSE2;
14632 for (size_t k = 8; k <= 40; k += 4) {
14633 GemmMicrokernelTester()
14634 .mr(4)
14635 .nr(8)
14636 .kr(1)
14637 .sr(1)
14638 .m(4)
14639 .n(8)
14640 .k(k)
14641 .a_stride(43)
14642 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14643 }
14644 }
14645
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,k_div_4_subtile)14646 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_div_4_subtile) {
14647 TEST_REQUIRES_X86_SSE2;
14648 for (size_t k = 8; k <= 40; k += 4) {
14649 for (uint32_t n = 1; n <= 8; n++) {
14650 for (uint32_t m = 1; m <= 4; m++) {
14651 GemmMicrokernelTester()
14652 .mr(4)
14653 .nr(8)
14654 .kr(1)
14655 .sr(1)
14656 .m(m)
14657 .n(n)
14658 .k(k)
14659 .iterations(1)
14660 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14661 }
14662 }
14663 }
14664 }
14665
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,n_gt_8)14666 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, n_gt_8) {
14667 TEST_REQUIRES_X86_SSE2;
14668 for (uint32_t n = 9; n < 16; n++) {
14669 for (size_t k = 1; k <= 20; k += 5) {
14670 GemmMicrokernelTester()
14671 .mr(4)
14672 .nr(8)
14673 .kr(1)
14674 .sr(1)
14675 .m(4)
14676 .n(n)
14677 .k(k)
14678 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14679 }
14680 }
14681 }
14682
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,n_gt_8_strided_cn)14683 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, n_gt_8_strided_cn) {
14684 TEST_REQUIRES_X86_SSE2;
14685 for (uint32_t n = 9; n < 16; n++) {
14686 for (size_t k = 1; k <= 20; k += 5) {
14687 GemmMicrokernelTester()
14688 .mr(4)
14689 .nr(8)
14690 .kr(1)
14691 .sr(1)
14692 .m(4)
14693 .n(n)
14694 .k(k)
14695 .cn_stride(11)
14696 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14697 }
14698 }
14699 }
14700
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,n_gt_8_strided_a)14701 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, n_gt_8_strided_a) {
14702 TEST_REQUIRES_X86_SSE2;
14703 for (uint32_t n = 9; n < 16; n++) {
14704 for (size_t k = 1; k <= 20; k += 5) {
14705 GemmMicrokernelTester()
14706 .mr(4)
14707 .nr(8)
14708 .kr(1)
14709 .sr(1)
14710 .m(4)
14711 .n(n)
14712 .k(k)
14713 .a_stride(23)
14714 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14715 }
14716 }
14717 }
14718
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,n_gt_8_subtile)14719 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, n_gt_8_subtile) {
14720 TEST_REQUIRES_X86_SSE2;
14721 for (uint32_t n = 9; n < 16; n++) {
14722 for (size_t k = 1; k <= 20; k += 5) {
14723 for (uint32_t m = 1; m <= 4; m++) {
14724 GemmMicrokernelTester()
14725 .mr(4)
14726 .nr(8)
14727 .kr(1)
14728 .sr(1)
14729 .m(m)
14730 .n(n)
14731 .k(k)
14732 .iterations(1)
14733 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14734 }
14735 }
14736 }
14737 }
14738
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,n_div_8)14739 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, n_div_8) {
14740 TEST_REQUIRES_X86_SSE2;
14741 for (uint32_t n = 16; n <= 24; n += 8) {
14742 for (size_t k = 1; k <= 20; k += 5) {
14743 GemmMicrokernelTester()
14744 .mr(4)
14745 .nr(8)
14746 .kr(1)
14747 .sr(1)
14748 .m(4)
14749 .n(n)
14750 .k(k)
14751 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14752 }
14753 }
14754 }
14755
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,n_div_8_strided_cn)14756 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, n_div_8_strided_cn) {
14757 TEST_REQUIRES_X86_SSE2;
14758 for (uint32_t n = 16; n <= 24; n += 8) {
14759 for (size_t k = 1; k <= 20; k += 5) {
14760 GemmMicrokernelTester()
14761 .mr(4)
14762 .nr(8)
14763 .kr(1)
14764 .sr(1)
14765 .m(4)
14766 .n(n)
14767 .k(k)
14768 .cn_stride(11)
14769 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14770 }
14771 }
14772 }
14773
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,n_div_8_strided_a)14774 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, n_div_8_strided_a) {
14775 TEST_REQUIRES_X86_SSE2;
14776 for (uint32_t n = 16; n <= 24; n += 8) {
14777 for (size_t k = 1; k <= 20; k += 5) {
14778 GemmMicrokernelTester()
14779 .mr(4)
14780 .nr(8)
14781 .kr(1)
14782 .sr(1)
14783 .m(4)
14784 .n(n)
14785 .k(k)
14786 .a_stride(23)
14787 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14788 }
14789 }
14790 }
14791
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,n_div_8_subtile)14792 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, n_div_8_subtile) {
14793 TEST_REQUIRES_X86_SSE2;
14794 for (uint32_t n = 16; n <= 24; n += 8) {
14795 for (size_t k = 1; k <= 20; k += 5) {
14796 for (uint32_t m = 1; m <= 4; m++) {
14797 GemmMicrokernelTester()
14798 .mr(4)
14799 .nr(8)
14800 .kr(1)
14801 .sr(1)
14802 .m(m)
14803 .n(n)
14804 .k(k)
14805 .iterations(1)
14806 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14807 }
14808 }
14809 }
14810 }
14811
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,strided_cm_subtile)14812 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, strided_cm_subtile) {
14813 TEST_REQUIRES_X86_SSE2;
14814 for (size_t k = 1; k <= 20; k += 5) {
14815 for (uint32_t n = 1; n <= 8; n++) {
14816 for (uint32_t m = 1; m <= 4; m++) {
14817 GemmMicrokernelTester()
14818 .mr(4)
14819 .nr(8)
14820 .kr(1)
14821 .sr(1)
14822 .m(m)
14823 .n(n)
14824 .k(k)
14825 .cm_stride(11)
14826 .iterations(1)
14827 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14828 }
14829 }
14830 }
14831 }
14832
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,qmin)14833 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, qmin) {
14834 TEST_REQUIRES_X86_SSE2;
14835 GemmMicrokernelTester()
14836 .mr(4)
14837 .nr(8)
14838 .kr(1)
14839 .sr(1)
14840 .m(4)
14841 .n(8)
14842 .k(4)
14843 .qmin(128)
14844 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14845 }
14846
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,qmax)14847 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, qmax) {
14848 TEST_REQUIRES_X86_SSE2;
14849 GemmMicrokernelTester()
14850 .mr(4)
14851 .nr(8)
14852 .kr(1)
14853 .sr(1)
14854 .m(4)
14855 .n(8)
14856 .k(4)
14857 .qmax(128)
14858 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14859 }
14860
TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP,strided_cm)14861 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, strided_cm) {
14862 TEST_REQUIRES_X86_SSE2;
14863 GemmMicrokernelTester()
14864 .mr(4)
14865 .nr(8)
14866 .kr(1)
14867 .sr(1)
14868 .m(4)
14869 .n(8)
14870 .k(4)
14871 .cm_stride(11)
14872 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
14873 }
14874 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14875
14876
14877 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST,k_eq_1)14878 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, k_eq_1) {
14879 TEST_REQUIRES_X86_AVX;
14880 GemmMicrokernelTester()
14881 .mr(1)
14882 .nr(8)
14883 .kr(1)
14884 .sr(1)
14885 .m(1)
14886 .n(8)
14887 .k(1)
14888 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
14889 }
14890
TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST,strided_cn)14891 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, strided_cn) {
14892 TEST_REQUIRES_X86_AVX;
14893 GemmMicrokernelTester()
14894 .mr(1)
14895 .nr(8)
14896 .kr(1)
14897 .sr(1)
14898 .m(1)
14899 .n(8)
14900 .k(1)
14901 .cn_stride(11)
14902 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
14903 }
14904
TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST,k_eq_1_strided_a)14905 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, k_eq_1_strided_a) {
14906 TEST_REQUIRES_X86_AVX;
14907 GemmMicrokernelTester()
14908 .mr(1)
14909 .nr(8)
14910 .kr(1)
14911 .sr(1)
14912 .m(1)
14913 .n(8)
14914 .k(1)
14915 .a_stride(3)
14916 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
14917 }
14918
TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST,k_eq_1_subtile)14919 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, k_eq_1_subtile) {
14920 TEST_REQUIRES_X86_AVX;
14921 for (uint32_t n = 1; n <= 8; n++) {
14922 for (uint32_t m = 1; m <= 1; m++) {
14923 GemmMicrokernelTester()
14924 .mr(1)
14925 .nr(8)
14926 .kr(1)
14927 .sr(1)
14928 .m(m)
14929 .n(n)
14930 .k(1)
14931 .iterations(1)
14932 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
14933 }
14934 }
14935 }
14936
TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST,k_eq_1_subtile_m)14937 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, k_eq_1_subtile_m) {
14938 TEST_REQUIRES_X86_AVX;
14939 for (uint32_t m = 1; m <= 1; m++) {
14940 GemmMicrokernelTester()
14941 .mr(1)
14942 .nr(8)
14943 .kr(1)
14944 .sr(1)
14945 .m(m)
14946 .n(8)
14947 .k(1)
14948 .iterations(1)
14949 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
14950 }
14951 }
14952
TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST,k_eq_1_subtile_n)14953 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, k_eq_1_subtile_n) {
14954 TEST_REQUIRES_X86_AVX;
14955 for (uint32_t n = 1; n <= 8; n++) {
14956 GemmMicrokernelTester()
14957 .mr(1)
14958 .nr(8)
14959 .kr(1)
14960 .sr(1)
14961 .m(1)
14962 .n(n)
14963 .k(1)
14964 .iterations(1)
14965 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
14966 }
14967 }
14968
TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST,k_gt_1)14969 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, k_gt_1) {
14970 TEST_REQUIRES_X86_AVX;
14971 for (size_t k = 2; k < 10; k++) {
14972 GemmMicrokernelTester()
14973 .mr(1)
14974 .nr(8)
14975 .kr(1)
14976 .sr(1)
14977 .m(1)
14978 .n(8)
14979 .k(k)
14980 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
14981 }
14982 }
14983
TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST,k_gt_1_strided_a)14984 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, k_gt_1_strided_a) {
14985 TEST_REQUIRES_X86_AVX;
14986 for (size_t k = 2; k < 10; k++) {
14987 GemmMicrokernelTester()
14988 .mr(1)
14989 .nr(8)
14990 .kr(1)
14991 .sr(1)
14992 .m(1)
14993 .n(8)
14994 .k(k)
14995 .a_stride(11)
14996 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
14997 }
14998 }
14999
TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST,k_gt_1_subtile)15000 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, k_gt_1_subtile) {
15001 TEST_REQUIRES_X86_AVX;
15002 for (size_t k = 2; k < 10; k++) {
15003 for (uint32_t n = 1; n <= 8; n++) {
15004 for (uint32_t m = 1; m <= 1; m++) {
15005 GemmMicrokernelTester()
15006 .mr(1)
15007 .nr(8)
15008 .kr(1)
15009 .sr(1)
15010 .m(m)
15011 .n(n)
15012 .k(k)
15013 .iterations(1)
15014 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15015 }
15016 }
15017 }
15018 }
15019
TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST,n_gt_8)15020 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, n_gt_8) {
15021 TEST_REQUIRES_X86_AVX;
15022 for (uint32_t n = 9; n < 16; n++) {
15023 for (size_t k = 1; k <= 5; k += 2) {
15024 GemmMicrokernelTester()
15025 .mr(1)
15026 .nr(8)
15027 .kr(1)
15028 .sr(1)
15029 .m(1)
15030 .n(n)
15031 .k(k)
15032 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15033 }
15034 }
15035 }
15036
TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST,n_gt_8_strided_cn)15037 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, n_gt_8_strided_cn) {
15038 TEST_REQUIRES_X86_AVX;
15039 for (uint32_t n = 9; n < 16; n++) {
15040 for (size_t k = 1; k <= 5; k += 2) {
15041 GemmMicrokernelTester()
15042 .mr(1)
15043 .nr(8)
15044 .kr(1)
15045 .sr(1)
15046 .m(1)
15047 .n(n)
15048 .k(k)
15049 .cn_stride(11)
15050 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15051 }
15052 }
15053 }
15054
TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST,n_gt_8_strided_a)15055 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, n_gt_8_strided_a) {
15056 TEST_REQUIRES_X86_AVX;
15057 for (uint32_t n = 9; n < 16; n++) {
15058 for (size_t k = 1; k <= 5; k += 2) {
15059 GemmMicrokernelTester()
15060 .mr(1)
15061 .nr(8)
15062 .kr(1)
15063 .sr(1)
15064 .m(1)
15065 .n(n)
15066 .k(k)
15067 .a_stride(7)
15068 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15069 }
15070 }
15071 }
15072
TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST,n_gt_8_subtile)15073 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, n_gt_8_subtile) {
15074 TEST_REQUIRES_X86_AVX;
15075 for (uint32_t n = 9; n < 16; n++) {
15076 for (size_t k = 1; k <= 5; k += 2) {
15077 for (uint32_t m = 1; m <= 1; m++) {
15078 GemmMicrokernelTester()
15079 .mr(1)
15080 .nr(8)
15081 .kr(1)
15082 .sr(1)
15083 .m(m)
15084 .n(n)
15085 .k(k)
15086 .iterations(1)
15087 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15088 }
15089 }
15090 }
15091 }
15092
TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST,n_div_8)15093 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, n_div_8) {
15094 TEST_REQUIRES_X86_AVX;
15095 for (uint32_t n = 16; n <= 24; n += 8) {
15096 for (size_t k = 1; k <= 5; k += 2) {
15097 GemmMicrokernelTester()
15098 .mr(1)
15099 .nr(8)
15100 .kr(1)
15101 .sr(1)
15102 .m(1)
15103 .n(n)
15104 .k(k)
15105 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15106 }
15107 }
15108 }
15109
TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST,n_div_8_strided_cn)15110 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, n_div_8_strided_cn) {
15111 TEST_REQUIRES_X86_AVX;
15112 for (uint32_t n = 16; n <= 24; n += 8) {
15113 for (size_t k = 1; k <= 5; k += 2) {
15114 GemmMicrokernelTester()
15115 .mr(1)
15116 .nr(8)
15117 .kr(1)
15118 .sr(1)
15119 .m(1)
15120 .n(n)
15121 .k(k)
15122 .cn_stride(11)
15123 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15124 }
15125 }
15126 }
15127
TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST,n_div_8_strided_a)15128 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, n_div_8_strided_a) {
15129 TEST_REQUIRES_X86_AVX;
15130 for (uint32_t n = 16; n <= 24; n += 8) {
15131 for (size_t k = 1; k <= 5; k += 2) {
15132 GemmMicrokernelTester()
15133 .mr(1)
15134 .nr(8)
15135 .kr(1)
15136 .sr(1)
15137 .m(1)
15138 .n(n)
15139 .k(k)
15140 .a_stride(7)
15141 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15142 }
15143 }
15144 }
15145
TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST,n_div_8_subtile)15146 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, n_div_8_subtile) {
15147 TEST_REQUIRES_X86_AVX;
15148 for (uint32_t n = 16; n <= 24; n += 8) {
15149 for (size_t k = 1; k <= 5; k += 2) {
15150 for (uint32_t m = 1; m <= 1; m++) {
15151 GemmMicrokernelTester()
15152 .mr(1)
15153 .nr(8)
15154 .kr(1)
15155 .sr(1)
15156 .m(m)
15157 .n(n)
15158 .k(k)
15159 .iterations(1)
15160 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15161 }
15162 }
15163 }
15164 }
15165
TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST,strided_cm_subtile)15166 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, strided_cm_subtile) {
15167 TEST_REQUIRES_X86_AVX;
15168 for (size_t k = 1; k <= 5; k += 2) {
15169 for (uint32_t n = 1; n <= 8; n++) {
15170 for (uint32_t m = 1; m <= 1; m++) {
15171 GemmMicrokernelTester()
15172 .mr(1)
15173 .nr(8)
15174 .kr(1)
15175 .sr(1)
15176 .m(m)
15177 .n(n)
15178 .k(k)
15179 .cm_stride(11)
15180 .iterations(1)
15181 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15182 }
15183 }
15184 }
15185 }
15186
TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST,qmin)15187 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, qmin) {
15188 TEST_REQUIRES_X86_AVX;
15189 GemmMicrokernelTester()
15190 .mr(1)
15191 .nr(8)
15192 .kr(1)
15193 .sr(1)
15194 .m(1)
15195 .n(8)
15196 .k(1)
15197 .qmin(128)
15198 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15199 }
15200
TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST,qmax)15201 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, qmax) {
15202 TEST_REQUIRES_X86_AVX;
15203 GemmMicrokernelTester()
15204 .mr(1)
15205 .nr(8)
15206 .kr(1)
15207 .sr(1)
15208 .m(1)
15209 .n(8)
15210 .k(1)
15211 .qmax(128)
15212 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15213 }
15214
TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST,strided_cm)15215 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, strided_cm) {
15216 TEST_REQUIRES_X86_AVX;
15217 GemmMicrokernelTester()
15218 .mr(1)
15219 .nr(8)
15220 .kr(1)
15221 .sr(1)
15222 .m(1)
15223 .n(8)
15224 .k(1)
15225 .cm_stride(11)
15226 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15227 }
15228 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15229
15230
15231 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST,k_eq_1)15232 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, k_eq_1) {
15233 TEST_REQUIRES_X86_AVX;
15234 GemmMicrokernelTester()
15235 .mr(1)
15236 .nr(16)
15237 .kr(1)
15238 .sr(1)
15239 .m(1)
15240 .n(16)
15241 .k(1)
15242 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15243 }
15244
TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST,strided_cn)15245 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, strided_cn) {
15246 TEST_REQUIRES_X86_AVX;
15247 GemmMicrokernelTester()
15248 .mr(1)
15249 .nr(16)
15250 .kr(1)
15251 .sr(1)
15252 .m(1)
15253 .n(16)
15254 .k(1)
15255 .cn_stride(19)
15256 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15257 }
15258
TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST,k_eq_1_strided_a)15259 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, k_eq_1_strided_a) {
15260 TEST_REQUIRES_X86_AVX;
15261 GemmMicrokernelTester()
15262 .mr(1)
15263 .nr(16)
15264 .kr(1)
15265 .sr(1)
15266 .m(1)
15267 .n(16)
15268 .k(1)
15269 .a_stride(3)
15270 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15271 }
15272
TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST,k_eq_1_subtile)15273 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, k_eq_1_subtile) {
15274 TEST_REQUIRES_X86_AVX;
15275 for (uint32_t n = 1; n <= 16; n++) {
15276 for (uint32_t m = 1; m <= 1; m++) {
15277 GemmMicrokernelTester()
15278 .mr(1)
15279 .nr(16)
15280 .kr(1)
15281 .sr(1)
15282 .m(m)
15283 .n(n)
15284 .k(1)
15285 .iterations(1)
15286 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15287 }
15288 }
15289 }
15290
TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST,k_eq_1_subtile_m)15291 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, k_eq_1_subtile_m) {
15292 TEST_REQUIRES_X86_AVX;
15293 for (uint32_t m = 1; m <= 1; m++) {
15294 GemmMicrokernelTester()
15295 .mr(1)
15296 .nr(16)
15297 .kr(1)
15298 .sr(1)
15299 .m(m)
15300 .n(16)
15301 .k(1)
15302 .iterations(1)
15303 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15304 }
15305 }
15306
TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST,k_eq_1_subtile_n)15307 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, k_eq_1_subtile_n) {
15308 TEST_REQUIRES_X86_AVX;
15309 for (uint32_t n = 1; n <= 16; n++) {
15310 GemmMicrokernelTester()
15311 .mr(1)
15312 .nr(16)
15313 .kr(1)
15314 .sr(1)
15315 .m(1)
15316 .n(n)
15317 .k(1)
15318 .iterations(1)
15319 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15320 }
15321 }
15322
TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST,k_gt_1)15323 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, k_gt_1) {
15324 TEST_REQUIRES_X86_AVX;
15325 for (size_t k = 2; k < 10; k++) {
15326 GemmMicrokernelTester()
15327 .mr(1)
15328 .nr(16)
15329 .kr(1)
15330 .sr(1)
15331 .m(1)
15332 .n(16)
15333 .k(k)
15334 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15335 }
15336 }
15337
TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST,k_gt_1_strided_a)15338 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, k_gt_1_strided_a) {
15339 TEST_REQUIRES_X86_AVX;
15340 for (size_t k = 2; k < 10; k++) {
15341 GemmMicrokernelTester()
15342 .mr(1)
15343 .nr(16)
15344 .kr(1)
15345 .sr(1)
15346 .m(1)
15347 .n(16)
15348 .k(k)
15349 .a_stride(11)
15350 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15351 }
15352 }
15353
TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST,k_gt_1_subtile)15354 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, k_gt_1_subtile) {
15355 TEST_REQUIRES_X86_AVX;
15356 for (size_t k = 2; k < 10; k++) {
15357 for (uint32_t n = 1; n <= 16; n++) {
15358 for (uint32_t m = 1; m <= 1; m++) {
15359 GemmMicrokernelTester()
15360 .mr(1)
15361 .nr(16)
15362 .kr(1)
15363 .sr(1)
15364 .m(m)
15365 .n(n)
15366 .k(k)
15367 .iterations(1)
15368 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15369 }
15370 }
15371 }
15372 }
15373
TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST,n_gt_16)15374 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, n_gt_16) {
15375 TEST_REQUIRES_X86_AVX;
15376 for (uint32_t n = 17; n < 32; n++) {
15377 for (size_t k = 1; k <= 5; k += 2) {
15378 GemmMicrokernelTester()
15379 .mr(1)
15380 .nr(16)
15381 .kr(1)
15382 .sr(1)
15383 .m(1)
15384 .n(n)
15385 .k(k)
15386 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15387 }
15388 }
15389 }
15390
TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST,n_gt_16_strided_cn)15391 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, n_gt_16_strided_cn) {
15392 TEST_REQUIRES_X86_AVX;
15393 for (uint32_t n = 17; n < 32; n++) {
15394 for (size_t k = 1; k <= 5; k += 2) {
15395 GemmMicrokernelTester()
15396 .mr(1)
15397 .nr(16)
15398 .kr(1)
15399 .sr(1)
15400 .m(1)
15401 .n(n)
15402 .k(k)
15403 .cn_stride(19)
15404 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15405 }
15406 }
15407 }
15408
TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST,n_gt_16_strided_a)15409 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, n_gt_16_strided_a) {
15410 TEST_REQUIRES_X86_AVX;
15411 for (uint32_t n = 17; n < 32; n++) {
15412 for (size_t k = 1; k <= 5; k += 2) {
15413 GemmMicrokernelTester()
15414 .mr(1)
15415 .nr(16)
15416 .kr(1)
15417 .sr(1)
15418 .m(1)
15419 .n(n)
15420 .k(k)
15421 .a_stride(7)
15422 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15423 }
15424 }
15425 }
15426
TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST,n_gt_16_subtile)15427 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, n_gt_16_subtile) {
15428 TEST_REQUIRES_X86_AVX;
15429 for (uint32_t n = 17; n < 32; n++) {
15430 for (size_t k = 1; k <= 5; k += 2) {
15431 for (uint32_t m = 1; m <= 1; m++) {
15432 GemmMicrokernelTester()
15433 .mr(1)
15434 .nr(16)
15435 .kr(1)
15436 .sr(1)
15437 .m(m)
15438 .n(n)
15439 .k(k)
15440 .iterations(1)
15441 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15442 }
15443 }
15444 }
15445 }
15446
TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST,n_div_16)15447 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, n_div_16) {
15448 TEST_REQUIRES_X86_AVX;
15449 for (uint32_t n = 32; n <= 48; n += 16) {
15450 for (size_t k = 1; k <= 5; k += 2) {
15451 GemmMicrokernelTester()
15452 .mr(1)
15453 .nr(16)
15454 .kr(1)
15455 .sr(1)
15456 .m(1)
15457 .n(n)
15458 .k(k)
15459 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15460 }
15461 }
15462 }
15463
TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST,n_div_16_strided_cn)15464 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, n_div_16_strided_cn) {
15465 TEST_REQUIRES_X86_AVX;
15466 for (uint32_t n = 32; n <= 48; n += 16) {
15467 for (size_t k = 1; k <= 5; k += 2) {
15468 GemmMicrokernelTester()
15469 .mr(1)
15470 .nr(16)
15471 .kr(1)
15472 .sr(1)
15473 .m(1)
15474 .n(n)
15475 .k(k)
15476 .cn_stride(19)
15477 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15478 }
15479 }
15480 }
15481
TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST,n_div_16_strided_a)15482 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, n_div_16_strided_a) {
15483 TEST_REQUIRES_X86_AVX;
15484 for (uint32_t n = 32; n <= 48; n += 16) {
15485 for (size_t k = 1; k <= 5; k += 2) {
15486 GemmMicrokernelTester()
15487 .mr(1)
15488 .nr(16)
15489 .kr(1)
15490 .sr(1)
15491 .m(1)
15492 .n(n)
15493 .k(k)
15494 .a_stride(7)
15495 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15496 }
15497 }
15498 }
15499
TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST,n_div_16_subtile)15500 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, n_div_16_subtile) {
15501 TEST_REQUIRES_X86_AVX;
15502 for (uint32_t n = 32; n <= 48; n += 16) {
15503 for (size_t k = 1; k <= 5; k += 2) {
15504 for (uint32_t m = 1; m <= 1; m++) {
15505 GemmMicrokernelTester()
15506 .mr(1)
15507 .nr(16)
15508 .kr(1)
15509 .sr(1)
15510 .m(m)
15511 .n(n)
15512 .k(k)
15513 .iterations(1)
15514 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15515 }
15516 }
15517 }
15518 }
15519
TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST,strided_cm_subtile)15520 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, strided_cm_subtile) {
15521 TEST_REQUIRES_X86_AVX;
15522 for (size_t k = 1; k <= 5; k += 2) {
15523 for (uint32_t n = 1; n <= 16; n++) {
15524 for (uint32_t m = 1; m <= 1; m++) {
15525 GemmMicrokernelTester()
15526 .mr(1)
15527 .nr(16)
15528 .kr(1)
15529 .sr(1)
15530 .m(m)
15531 .n(n)
15532 .k(k)
15533 .cm_stride(19)
15534 .iterations(1)
15535 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15536 }
15537 }
15538 }
15539 }
15540
TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST,qmin)15541 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, qmin) {
15542 TEST_REQUIRES_X86_AVX;
15543 GemmMicrokernelTester()
15544 .mr(1)
15545 .nr(16)
15546 .kr(1)
15547 .sr(1)
15548 .m(1)
15549 .n(16)
15550 .k(1)
15551 .qmin(128)
15552 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15553 }
15554
TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST,qmax)15555 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, qmax) {
15556 TEST_REQUIRES_X86_AVX;
15557 GemmMicrokernelTester()
15558 .mr(1)
15559 .nr(16)
15560 .kr(1)
15561 .sr(1)
15562 .m(1)
15563 .n(16)
15564 .k(1)
15565 .qmax(128)
15566 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15567 }
15568
TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST,strided_cm)15569 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, strided_cm) {
15570 TEST_REQUIRES_X86_AVX;
15571 GemmMicrokernelTester()
15572 .mr(1)
15573 .nr(16)
15574 .kr(1)
15575 .sr(1)
15576 .m(1)
15577 .n(16)
15578 .k(1)
15579 .cm_stride(19)
15580 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15581 }
15582 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15583
15584
15585 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST,k_eq_1)15586 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, k_eq_1) {
15587 TEST_REQUIRES_X86_AVX;
15588 GemmMicrokernelTester()
15589 .mr(4)
15590 .nr(16)
15591 .kr(1)
15592 .sr(1)
15593 .m(4)
15594 .n(16)
15595 .k(1)
15596 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15597 }
15598
TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST,strided_cn)15599 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, strided_cn) {
15600 TEST_REQUIRES_X86_AVX;
15601 GemmMicrokernelTester()
15602 .mr(4)
15603 .nr(16)
15604 .kr(1)
15605 .sr(1)
15606 .m(4)
15607 .n(16)
15608 .k(1)
15609 .cn_stride(19)
15610 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15611 }
15612
TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST,k_eq_1_strided_a)15613 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, k_eq_1_strided_a) {
15614 TEST_REQUIRES_X86_AVX;
15615 GemmMicrokernelTester()
15616 .mr(4)
15617 .nr(16)
15618 .kr(1)
15619 .sr(1)
15620 .m(4)
15621 .n(16)
15622 .k(1)
15623 .a_stride(3)
15624 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15625 }
15626
TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST,k_eq_1_subtile)15627 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, k_eq_1_subtile) {
15628 TEST_REQUIRES_X86_AVX;
15629 for (uint32_t n = 1; n <= 16; n++) {
15630 for (uint32_t m = 1; m <= 4; m++) {
15631 GemmMicrokernelTester()
15632 .mr(4)
15633 .nr(16)
15634 .kr(1)
15635 .sr(1)
15636 .m(m)
15637 .n(n)
15638 .k(1)
15639 .iterations(1)
15640 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15641 }
15642 }
15643 }
15644
TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST,k_eq_1_subtile_m)15645 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, k_eq_1_subtile_m) {
15646 TEST_REQUIRES_X86_AVX;
15647 for (uint32_t m = 1; m <= 4; m++) {
15648 GemmMicrokernelTester()
15649 .mr(4)
15650 .nr(16)
15651 .kr(1)
15652 .sr(1)
15653 .m(m)
15654 .n(16)
15655 .k(1)
15656 .iterations(1)
15657 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15658 }
15659 }
15660
TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST,k_eq_1_subtile_n)15661 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, k_eq_1_subtile_n) {
15662 TEST_REQUIRES_X86_AVX;
15663 for (uint32_t n = 1; n <= 16; n++) {
15664 GemmMicrokernelTester()
15665 .mr(4)
15666 .nr(16)
15667 .kr(1)
15668 .sr(1)
15669 .m(4)
15670 .n(n)
15671 .k(1)
15672 .iterations(1)
15673 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15674 }
15675 }
15676
TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST,k_gt_1)15677 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, k_gt_1) {
15678 TEST_REQUIRES_X86_AVX;
15679 for (size_t k = 2; k < 10; k++) {
15680 GemmMicrokernelTester()
15681 .mr(4)
15682 .nr(16)
15683 .kr(1)
15684 .sr(1)
15685 .m(4)
15686 .n(16)
15687 .k(k)
15688 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15689 }
15690 }
15691
TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST,k_gt_1_strided_a)15692 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, k_gt_1_strided_a) {
15693 TEST_REQUIRES_X86_AVX;
15694 for (size_t k = 2; k < 10; k++) {
15695 GemmMicrokernelTester()
15696 .mr(4)
15697 .nr(16)
15698 .kr(1)
15699 .sr(1)
15700 .m(4)
15701 .n(16)
15702 .k(k)
15703 .a_stride(11)
15704 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15705 }
15706 }
15707
TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST,k_gt_1_subtile)15708 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, k_gt_1_subtile) {
15709 TEST_REQUIRES_X86_AVX;
15710 for (size_t k = 2; k < 10; k++) {
15711 for (uint32_t n = 1; n <= 16; n++) {
15712 for (uint32_t m = 1; m <= 4; m++) {
15713 GemmMicrokernelTester()
15714 .mr(4)
15715 .nr(16)
15716 .kr(1)
15717 .sr(1)
15718 .m(m)
15719 .n(n)
15720 .k(k)
15721 .iterations(1)
15722 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15723 }
15724 }
15725 }
15726 }
15727
TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST,n_gt_16)15728 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, n_gt_16) {
15729 TEST_REQUIRES_X86_AVX;
15730 for (uint32_t n = 17; n < 32; n++) {
15731 for (size_t k = 1; k <= 5; k += 2) {
15732 GemmMicrokernelTester()
15733 .mr(4)
15734 .nr(16)
15735 .kr(1)
15736 .sr(1)
15737 .m(4)
15738 .n(n)
15739 .k(k)
15740 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15741 }
15742 }
15743 }
15744
TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST,n_gt_16_strided_cn)15745 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, n_gt_16_strided_cn) {
15746 TEST_REQUIRES_X86_AVX;
15747 for (uint32_t n = 17; n < 32; n++) {
15748 for (size_t k = 1; k <= 5; k += 2) {
15749 GemmMicrokernelTester()
15750 .mr(4)
15751 .nr(16)
15752 .kr(1)
15753 .sr(1)
15754 .m(4)
15755 .n(n)
15756 .k(k)
15757 .cn_stride(19)
15758 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15759 }
15760 }
15761 }
15762
TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST,n_gt_16_strided_a)15763 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, n_gt_16_strided_a) {
15764 TEST_REQUIRES_X86_AVX;
15765 for (uint32_t n = 17; n < 32; n++) {
15766 for (size_t k = 1; k <= 5; k += 2) {
15767 GemmMicrokernelTester()
15768 .mr(4)
15769 .nr(16)
15770 .kr(1)
15771 .sr(1)
15772 .m(4)
15773 .n(n)
15774 .k(k)
15775 .a_stride(7)
15776 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15777 }
15778 }
15779 }
15780
TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST,n_gt_16_subtile)15781 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, n_gt_16_subtile) {
15782 TEST_REQUIRES_X86_AVX;
15783 for (uint32_t n = 17; n < 32; n++) {
15784 for (size_t k = 1; k <= 5; k += 2) {
15785 for (uint32_t m = 1; m <= 4; m++) {
15786 GemmMicrokernelTester()
15787 .mr(4)
15788 .nr(16)
15789 .kr(1)
15790 .sr(1)
15791 .m(m)
15792 .n(n)
15793 .k(k)
15794 .iterations(1)
15795 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15796 }
15797 }
15798 }
15799 }
15800
TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST,n_div_16)15801 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, n_div_16) {
15802 TEST_REQUIRES_X86_AVX;
15803 for (uint32_t n = 32; n <= 48; n += 16) {
15804 for (size_t k = 1; k <= 5; k += 2) {
15805 GemmMicrokernelTester()
15806 .mr(4)
15807 .nr(16)
15808 .kr(1)
15809 .sr(1)
15810 .m(4)
15811 .n(n)
15812 .k(k)
15813 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15814 }
15815 }
15816 }
15817
TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST,n_div_16_strided_cn)15818 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, n_div_16_strided_cn) {
15819 TEST_REQUIRES_X86_AVX;
15820 for (uint32_t n = 32; n <= 48; n += 16) {
15821 for (size_t k = 1; k <= 5; k += 2) {
15822 GemmMicrokernelTester()
15823 .mr(4)
15824 .nr(16)
15825 .kr(1)
15826 .sr(1)
15827 .m(4)
15828 .n(n)
15829 .k(k)
15830 .cn_stride(19)
15831 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15832 }
15833 }
15834 }
15835
TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST,n_div_16_strided_a)15836 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, n_div_16_strided_a) {
15837 TEST_REQUIRES_X86_AVX;
15838 for (uint32_t n = 32; n <= 48; n += 16) {
15839 for (size_t k = 1; k <= 5; k += 2) {
15840 GemmMicrokernelTester()
15841 .mr(4)
15842 .nr(16)
15843 .kr(1)
15844 .sr(1)
15845 .m(4)
15846 .n(n)
15847 .k(k)
15848 .a_stride(7)
15849 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15850 }
15851 }
15852 }
15853
TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST,n_div_16_subtile)15854 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, n_div_16_subtile) {
15855 TEST_REQUIRES_X86_AVX;
15856 for (uint32_t n = 32; n <= 48; n += 16) {
15857 for (size_t k = 1; k <= 5; k += 2) {
15858 for (uint32_t m = 1; m <= 4; m++) {
15859 GemmMicrokernelTester()
15860 .mr(4)
15861 .nr(16)
15862 .kr(1)
15863 .sr(1)
15864 .m(m)
15865 .n(n)
15866 .k(k)
15867 .iterations(1)
15868 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15869 }
15870 }
15871 }
15872 }
15873
TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST,strided_cm_subtile)15874 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, strided_cm_subtile) {
15875 TEST_REQUIRES_X86_AVX;
15876 for (size_t k = 1; k <= 5; k += 2) {
15877 for (uint32_t n = 1; n <= 16; n++) {
15878 for (uint32_t m = 1; m <= 4; m++) {
15879 GemmMicrokernelTester()
15880 .mr(4)
15881 .nr(16)
15882 .kr(1)
15883 .sr(1)
15884 .m(m)
15885 .n(n)
15886 .k(k)
15887 .cm_stride(19)
15888 .iterations(1)
15889 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15890 }
15891 }
15892 }
15893 }
15894
TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST,qmin)15895 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, qmin) {
15896 TEST_REQUIRES_X86_AVX;
15897 GemmMicrokernelTester()
15898 .mr(4)
15899 .nr(16)
15900 .kr(1)
15901 .sr(1)
15902 .m(4)
15903 .n(16)
15904 .k(1)
15905 .qmin(128)
15906 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15907 }
15908
TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST,qmax)15909 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, qmax) {
15910 TEST_REQUIRES_X86_AVX;
15911 GemmMicrokernelTester()
15912 .mr(4)
15913 .nr(16)
15914 .kr(1)
15915 .sr(1)
15916 .m(4)
15917 .n(16)
15918 .k(1)
15919 .qmax(128)
15920 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15921 }
15922
TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST,strided_cm)15923 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, strided_cm) {
15924 TEST_REQUIRES_X86_AVX;
15925 GemmMicrokernelTester()
15926 .mr(4)
15927 .nr(16)
15928 .kr(1)
15929 .sr(1)
15930 .m(4)
15931 .n(16)
15932 .k(1)
15933 .cm_stride(19)
15934 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
15935 }
15936 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15937
15938
15939 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST,k_eq_1)15940 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, k_eq_1) {
15941 TEST_REQUIRES_X86_AVX;
15942 GemmMicrokernelTester()
15943 .mr(5)
15944 .nr(8)
15945 .kr(1)
15946 .sr(1)
15947 .m(5)
15948 .n(8)
15949 .k(1)
15950 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15951 }
15952
TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST,strided_cn)15953 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, strided_cn) {
15954 TEST_REQUIRES_X86_AVX;
15955 GemmMicrokernelTester()
15956 .mr(5)
15957 .nr(8)
15958 .kr(1)
15959 .sr(1)
15960 .m(5)
15961 .n(8)
15962 .k(1)
15963 .cn_stride(11)
15964 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15965 }
15966
TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST,k_eq_1_strided_a)15967 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, k_eq_1_strided_a) {
15968 TEST_REQUIRES_X86_AVX;
15969 GemmMicrokernelTester()
15970 .mr(5)
15971 .nr(8)
15972 .kr(1)
15973 .sr(1)
15974 .m(5)
15975 .n(8)
15976 .k(1)
15977 .a_stride(3)
15978 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15979 }
15980
TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST,k_eq_1_subtile)15981 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, k_eq_1_subtile) {
15982 TEST_REQUIRES_X86_AVX;
15983 for (uint32_t n = 1; n <= 8; n++) {
15984 for (uint32_t m = 1; m <= 5; m++) {
15985 GemmMicrokernelTester()
15986 .mr(5)
15987 .nr(8)
15988 .kr(1)
15989 .sr(1)
15990 .m(m)
15991 .n(n)
15992 .k(1)
15993 .iterations(1)
15994 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
15995 }
15996 }
15997 }
15998
TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST,k_eq_1_subtile_m)15999 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, k_eq_1_subtile_m) {
16000 TEST_REQUIRES_X86_AVX;
16001 for (uint32_t m = 1; m <= 5; m++) {
16002 GemmMicrokernelTester()
16003 .mr(5)
16004 .nr(8)
16005 .kr(1)
16006 .sr(1)
16007 .m(m)
16008 .n(8)
16009 .k(1)
16010 .iterations(1)
16011 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16012 }
16013 }
16014
TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST,k_eq_1_subtile_n)16015 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, k_eq_1_subtile_n) {
16016 TEST_REQUIRES_X86_AVX;
16017 for (uint32_t n = 1; n <= 8; n++) {
16018 GemmMicrokernelTester()
16019 .mr(5)
16020 .nr(8)
16021 .kr(1)
16022 .sr(1)
16023 .m(5)
16024 .n(n)
16025 .k(1)
16026 .iterations(1)
16027 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16028 }
16029 }
16030
TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST,k_gt_1)16031 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, k_gt_1) {
16032 TEST_REQUIRES_X86_AVX;
16033 for (size_t k = 2; k < 10; k++) {
16034 GemmMicrokernelTester()
16035 .mr(5)
16036 .nr(8)
16037 .kr(1)
16038 .sr(1)
16039 .m(5)
16040 .n(8)
16041 .k(k)
16042 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16043 }
16044 }
16045
TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST,k_gt_1_strided_a)16046 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, k_gt_1_strided_a) {
16047 TEST_REQUIRES_X86_AVX;
16048 for (size_t k = 2; k < 10; k++) {
16049 GemmMicrokernelTester()
16050 .mr(5)
16051 .nr(8)
16052 .kr(1)
16053 .sr(1)
16054 .m(5)
16055 .n(8)
16056 .k(k)
16057 .a_stride(11)
16058 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16059 }
16060 }
16061
TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST,k_gt_1_subtile)16062 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, k_gt_1_subtile) {
16063 TEST_REQUIRES_X86_AVX;
16064 for (size_t k = 2; k < 10; k++) {
16065 for (uint32_t n = 1; n <= 8; n++) {
16066 for (uint32_t m = 1; m <= 5; m++) {
16067 GemmMicrokernelTester()
16068 .mr(5)
16069 .nr(8)
16070 .kr(1)
16071 .sr(1)
16072 .m(m)
16073 .n(n)
16074 .k(k)
16075 .iterations(1)
16076 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16077 }
16078 }
16079 }
16080 }
16081
TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST,n_gt_8)16082 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, n_gt_8) {
16083 TEST_REQUIRES_X86_AVX;
16084 for (uint32_t n = 9; n < 16; n++) {
16085 for (size_t k = 1; k <= 5; k += 2) {
16086 GemmMicrokernelTester()
16087 .mr(5)
16088 .nr(8)
16089 .kr(1)
16090 .sr(1)
16091 .m(5)
16092 .n(n)
16093 .k(k)
16094 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16095 }
16096 }
16097 }
16098
TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST,n_gt_8_strided_cn)16099 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, n_gt_8_strided_cn) {
16100 TEST_REQUIRES_X86_AVX;
16101 for (uint32_t n = 9; n < 16; n++) {
16102 for (size_t k = 1; k <= 5; k += 2) {
16103 GemmMicrokernelTester()
16104 .mr(5)
16105 .nr(8)
16106 .kr(1)
16107 .sr(1)
16108 .m(5)
16109 .n(n)
16110 .k(k)
16111 .cn_stride(11)
16112 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16113 }
16114 }
16115 }
16116
TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST,n_gt_8_strided_a)16117 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, n_gt_8_strided_a) {
16118 TEST_REQUIRES_X86_AVX;
16119 for (uint32_t n = 9; n < 16; n++) {
16120 for (size_t k = 1; k <= 5; k += 2) {
16121 GemmMicrokernelTester()
16122 .mr(5)
16123 .nr(8)
16124 .kr(1)
16125 .sr(1)
16126 .m(5)
16127 .n(n)
16128 .k(k)
16129 .a_stride(7)
16130 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16131 }
16132 }
16133 }
16134
TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST,n_gt_8_subtile)16135 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, n_gt_8_subtile) {
16136 TEST_REQUIRES_X86_AVX;
16137 for (uint32_t n = 9; n < 16; n++) {
16138 for (size_t k = 1; k <= 5; k += 2) {
16139 for (uint32_t m = 1; m <= 5; m++) {
16140 GemmMicrokernelTester()
16141 .mr(5)
16142 .nr(8)
16143 .kr(1)
16144 .sr(1)
16145 .m(m)
16146 .n(n)
16147 .k(k)
16148 .iterations(1)
16149 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16150 }
16151 }
16152 }
16153 }
16154
TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST,n_div_8)16155 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, n_div_8) {
16156 TEST_REQUIRES_X86_AVX;
16157 for (uint32_t n = 16; n <= 24; n += 8) {
16158 for (size_t k = 1; k <= 5; k += 2) {
16159 GemmMicrokernelTester()
16160 .mr(5)
16161 .nr(8)
16162 .kr(1)
16163 .sr(1)
16164 .m(5)
16165 .n(n)
16166 .k(k)
16167 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16168 }
16169 }
16170 }
16171
TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST,n_div_8_strided_cn)16172 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, n_div_8_strided_cn) {
16173 TEST_REQUIRES_X86_AVX;
16174 for (uint32_t n = 16; n <= 24; n += 8) {
16175 for (size_t k = 1; k <= 5; k += 2) {
16176 GemmMicrokernelTester()
16177 .mr(5)
16178 .nr(8)
16179 .kr(1)
16180 .sr(1)
16181 .m(5)
16182 .n(n)
16183 .k(k)
16184 .cn_stride(11)
16185 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16186 }
16187 }
16188 }
16189
TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST,n_div_8_strided_a)16190 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, n_div_8_strided_a) {
16191 TEST_REQUIRES_X86_AVX;
16192 for (uint32_t n = 16; n <= 24; n += 8) {
16193 for (size_t k = 1; k <= 5; k += 2) {
16194 GemmMicrokernelTester()
16195 .mr(5)
16196 .nr(8)
16197 .kr(1)
16198 .sr(1)
16199 .m(5)
16200 .n(n)
16201 .k(k)
16202 .a_stride(7)
16203 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16204 }
16205 }
16206 }
16207
TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST,n_div_8_subtile)16208 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, n_div_8_subtile) {
16209 TEST_REQUIRES_X86_AVX;
16210 for (uint32_t n = 16; n <= 24; n += 8) {
16211 for (size_t k = 1; k <= 5; k += 2) {
16212 for (uint32_t m = 1; m <= 5; m++) {
16213 GemmMicrokernelTester()
16214 .mr(5)
16215 .nr(8)
16216 .kr(1)
16217 .sr(1)
16218 .m(m)
16219 .n(n)
16220 .k(k)
16221 .iterations(1)
16222 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16223 }
16224 }
16225 }
16226 }
16227
TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST,strided_cm_subtile)16228 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, strided_cm_subtile) {
16229 TEST_REQUIRES_X86_AVX;
16230 for (size_t k = 1; k <= 5; k += 2) {
16231 for (uint32_t n = 1; n <= 8; n++) {
16232 for (uint32_t m = 1; m <= 5; m++) {
16233 GemmMicrokernelTester()
16234 .mr(5)
16235 .nr(8)
16236 .kr(1)
16237 .sr(1)
16238 .m(m)
16239 .n(n)
16240 .k(k)
16241 .cm_stride(11)
16242 .iterations(1)
16243 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16244 }
16245 }
16246 }
16247 }
16248
TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST,qmin)16249 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, qmin) {
16250 TEST_REQUIRES_X86_AVX;
16251 GemmMicrokernelTester()
16252 .mr(5)
16253 .nr(8)
16254 .kr(1)
16255 .sr(1)
16256 .m(5)
16257 .n(8)
16258 .k(1)
16259 .qmin(128)
16260 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16261 }
16262
TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST,qmax)16263 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, qmax) {
16264 TEST_REQUIRES_X86_AVX;
16265 GemmMicrokernelTester()
16266 .mr(5)
16267 .nr(8)
16268 .kr(1)
16269 .sr(1)
16270 .m(5)
16271 .n(8)
16272 .k(1)
16273 .qmax(128)
16274 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16275 }
16276
TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST,strided_cm)16277 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, strided_cm) {
16278 TEST_REQUIRES_X86_AVX;
16279 GemmMicrokernelTester()
16280 .mr(5)
16281 .nr(8)
16282 .kr(1)
16283 .sr(1)
16284 .m(5)
16285 .n(8)
16286 .k(1)
16287 .cm_stride(11)
16288 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16289 }
16290 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16291
16292
16293 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST,k_eq_1)16294 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, k_eq_1) {
16295 TEST_REQUIRES_X86_AVX;
16296 GemmMicrokernelTester()
16297 .mr(5)
16298 .nr(16)
16299 .kr(1)
16300 .sr(1)
16301 .m(5)
16302 .n(16)
16303 .k(1)
16304 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16305 }
16306
TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST,strided_cn)16307 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, strided_cn) {
16308 TEST_REQUIRES_X86_AVX;
16309 GemmMicrokernelTester()
16310 .mr(5)
16311 .nr(16)
16312 .kr(1)
16313 .sr(1)
16314 .m(5)
16315 .n(16)
16316 .k(1)
16317 .cn_stride(19)
16318 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16319 }
16320
TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST,k_eq_1_strided_a)16321 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, k_eq_1_strided_a) {
16322 TEST_REQUIRES_X86_AVX;
16323 GemmMicrokernelTester()
16324 .mr(5)
16325 .nr(16)
16326 .kr(1)
16327 .sr(1)
16328 .m(5)
16329 .n(16)
16330 .k(1)
16331 .a_stride(3)
16332 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16333 }
16334
TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST,k_eq_1_subtile)16335 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, k_eq_1_subtile) {
16336 TEST_REQUIRES_X86_AVX;
16337 for (uint32_t n = 1; n <= 16; n++) {
16338 for (uint32_t m = 1; m <= 5; m++) {
16339 GemmMicrokernelTester()
16340 .mr(5)
16341 .nr(16)
16342 .kr(1)
16343 .sr(1)
16344 .m(m)
16345 .n(n)
16346 .k(1)
16347 .iterations(1)
16348 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16349 }
16350 }
16351 }
16352
TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST,k_eq_1_subtile_m)16353 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, k_eq_1_subtile_m) {
16354 TEST_REQUIRES_X86_AVX;
16355 for (uint32_t m = 1; m <= 5; m++) {
16356 GemmMicrokernelTester()
16357 .mr(5)
16358 .nr(16)
16359 .kr(1)
16360 .sr(1)
16361 .m(m)
16362 .n(16)
16363 .k(1)
16364 .iterations(1)
16365 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16366 }
16367 }
16368
TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST,k_eq_1_subtile_n)16369 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, k_eq_1_subtile_n) {
16370 TEST_REQUIRES_X86_AVX;
16371 for (uint32_t n = 1; n <= 16; n++) {
16372 GemmMicrokernelTester()
16373 .mr(5)
16374 .nr(16)
16375 .kr(1)
16376 .sr(1)
16377 .m(5)
16378 .n(n)
16379 .k(1)
16380 .iterations(1)
16381 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16382 }
16383 }
16384
TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST,k_gt_1)16385 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, k_gt_1) {
16386 TEST_REQUIRES_X86_AVX;
16387 for (size_t k = 2; k < 10; k++) {
16388 GemmMicrokernelTester()
16389 .mr(5)
16390 .nr(16)
16391 .kr(1)
16392 .sr(1)
16393 .m(5)
16394 .n(16)
16395 .k(k)
16396 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16397 }
16398 }
16399
TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST,k_gt_1_strided_a)16400 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, k_gt_1_strided_a) {
16401 TEST_REQUIRES_X86_AVX;
16402 for (size_t k = 2; k < 10; k++) {
16403 GemmMicrokernelTester()
16404 .mr(5)
16405 .nr(16)
16406 .kr(1)
16407 .sr(1)
16408 .m(5)
16409 .n(16)
16410 .k(k)
16411 .a_stride(11)
16412 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16413 }
16414 }
16415
TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST,k_gt_1_subtile)16416 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, k_gt_1_subtile) {
16417 TEST_REQUIRES_X86_AVX;
16418 for (size_t k = 2; k < 10; k++) {
16419 for (uint32_t n = 1; n <= 16; n++) {
16420 for (uint32_t m = 1; m <= 5; m++) {
16421 GemmMicrokernelTester()
16422 .mr(5)
16423 .nr(16)
16424 .kr(1)
16425 .sr(1)
16426 .m(m)
16427 .n(n)
16428 .k(k)
16429 .iterations(1)
16430 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16431 }
16432 }
16433 }
16434 }
16435
TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST,n_gt_16)16436 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, n_gt_16) {
16437 TEST_REQUIRES_X86_AVX;
16438 for (uint32_t n = 17; n < 32; n++) {
16439 for (size_t k = 1; k <= 5; k += 2) {
16440 GemmMicrokernelTester()
16441 .mr(5)
16442 .nr(16)
16443 .kr(1)
16444 .sr(1)
16445 .m(5)
16446 .n(n)
16447 .k(k)
16448 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16449 }
16450 }
16451 }
16452
TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST,n_gt_16_strided_cn)16453 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, n_gt_16_strided_cn) {
16454 TEST_REQUIRES_X86_AVX;
16455 for (uint32_t n = 17; n < 32; n++) {
16456 for (size_t k = 1; k <= 5; k += 2) {
16457 GemmMicrokernelTester()
16458 .mr(5)
16459 .nr(16)
16460 .kr(1)
16461 .sr(1)
16462 .m(5)
16463 .n(n)
16464 .k(k)
16465 .cn_stride(19)
16466 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16467 }
16468 }
16469 }
16470
TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST,n_gt_16_strided_a)16471 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, n_gt_16_strided_a) {
16472 TEST_REQUIRES_X86_AVX;
16473 for (uint32_t n = 17; n < 32; n++) {
16474 for (size_t k = 1; k <= 5; k += 2) {
16475 GemmMicrokernelTester()
16476 .mr(5)
16477 .nr(16)
16478 .kr(1)
16479 .sr(1)
16480 .m(5)
16481 .n(n)
16482 .k(k)
16483 .a_stride(7)
16484 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16485 }
16486 }
16487 }
16488
TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST,n_gt_16_subtile)16489 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, n_gt_16_subtile) {
16490 TEST_REQUIRES_X86_AVX;
16491 for (uint32_t n = 17; n < 32; n++) {
16492 for (size_t k = 1; k <= 5; k += 2) {
16493 for (uint32_t m = 1; m <= 5; m++) {
16494 GemmMicrokernelTester()
16495 .mr(5)
16496 .nr(16)
16497 .kr(1)
16498 .sr(1)
16499 .m(m)
16500 .n(n)
16501 .k(k)
16502 .iterations(1)
16503 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16504 }
16505 }
16506 }
16507 }
16508
TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST,n_div_16)16509 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, n_div_16) {
16510 TEST_REQUIRES_X86_AVX;
16511 for (uint32_t n = 32; n <= 48; n += 16) {
16512 for (size_t k = 1; k <= 5; k += 2) {
16513 GemmMicrokernelTester()
16514 .mr(5)
16515 .nr(16)
16516 .kr(1)
16517 .sr(1)
16518 .m(5)
16519 .n(n)
16520 .k(k)
16521 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16522 }
16523 }
16524 }
16525
TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST,n_div_16_strided_cn)16526 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, n_div_16_strided_cn) {
16527 TEST_REQUIRES_X86_AVX;
16528 for (uint32_t n = 32; n <= 48; n += 16) {
16529 for (size_t k = 1; k <= 5; k += 2) {
16530 GemmMicrokernelTester()
16531 .mr(5)
16532 .nr(16)
16533 .kr(1)
16534 .sr(1)
16535 .m(5)
16536 .n(n)
16537 .k(k)
16538 .cn_stride(19)
16539 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16540 }
16541 }
16542 }
16543
TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST,n_div_16_strided_a)16544 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, n_div_16_strided_a) {
16545 TEST_REQUIRES_X86_AVX;
16546 for (uint32_t n = 32; n <= 48; n += 16) {
16547 for (size_t k = 1; k <= 5; k += 2) {
16548 GemmMicrokernelTester()
16549 .mr(5)
16550 .nr(16)
16551 .kr(1)
16552 .sr(1)
16553 .m(5)
16554 .n(n)
16555 .k(k)
16556 .a_stride(7)
16557 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16558 }
16559 }
16560 }
16561
TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST,n_div_16_subtile)16562 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, n_div_16_subtile) {
16563 TEST_REQUIRES_X86_AVX;
16564 for (uint32_t n = 32; n <= 48; n += 16) {
16565 for (size_t k = 1; k <= 5; k += 2) {
16566 for (uint32_t m = 1; m <= 5; m++) {
16567 GemmMicrokernelTester()
16568 .mr(5)
16569 .nr(16)
16570 .kr(1)
16571 .sr(1)
16572 .m(m)
16573 .n(n)
16574 .k(k)
16575 .iterations(1)
16576 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16577 }
16578 }
16579 }
16580 }
16581
TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST,strided_cm_subtile)16582 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, strided_cm_subtile) {
16583 TEST_REQUIRES_X86_AVX;
16584 for (size_t k = 1; k <= 5; k += 2) {
16585 for (uint32_t n = 1; n <= 16; n++) {
16586 for (uint32_t m = 1; m <= 5; m++) {
16587 GemmMicrokernelTester()
16588 .mr(5)
16589 .nr(16)
16590 .kr(1)
16591 .sr(1)
16592 .m(m)
16593 .n(n)
16594 .k(k)
16595 .cm_stride(19)
16596 .iterations(1)
16597 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16598 }
16599 }
16600 }
16601 }
16602
TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST,qmin)16603 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, qmin) {
16604 TEST_REQUIRES_X86_AVX;
16605 GemmMicrokernelTester()
16606 .mr(5)
16607 .nr(16)
16608 .kr(1)
16609 .sr(1)
16610 .m(5)
16611 .n(16)
16612 .k(1)
16613 .qmin(128)
16614 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16615 }
16616
TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST,qmax)16617 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, qmax) {
16618 TEST_REQUIRES_X86_AVX;
16619 GemmMicrokernelTester()
16620 .mr(5)
16621 .nr(16)
16622 .kr(1)
16623 .sr(1)
16624 .m(5)
16625 .n(16)
16626 .k(1)
16627 .qmax(128)
16628 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16629 }
16630
TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST,strided_cm)16631 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, strided_cm) {
16632 TEST_REQUIRES_X86_AVX;
16633 GemmMicrokernelTester()
16634 .mr(5)
16635 .nr(16)
16636 .kr(1)
16637 .sr(1)
16638 .m(5)
16639 .n(16)
16640 .k(1)
16641 .cm_stride(19)
16642 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
16643 }
16644 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16645
16646
16647 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST,k_eq_1)16648 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, k_eq_1) {
16649 TEST_REQUIRES_X86_AVX;
16650 GemmMicrokernelTester()
16651 .mr(6)
16652 .nr(8)
16653 .kr(1)
16654 .sr(1)
16655 .m(6)
16656 .n(8)
16657 .k(1)
16658 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16659 }
16660
TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST,strided_cn)16661 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, strided_cn) {
16662 TEST_REQUIRES_X86_AVX;
16663 GemmMicrokernelTester()
16664 .mr(6)
16665 .nr(8)
16666 .kr(1)
16667 .sr(1)
16668 .m(6)
16669 .n(8)
16670 .k(1)
16671 .cn_stride(11)
16672 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16673 }
16674
TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST,k_eq_1_strided_a)16675 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, k_eq_1_strided_a) {
16676 TEST_REQUIRES_X86_AVX;
16677 GemmMicrokernelTester()
16678 .mr(6)
16679 .nr(8)
16680 .kr(1)
16681 .sr(1)
16682 .m(6)
16683 .n(8)
16684 .k(1)
16685 .a_stride(3)
16686 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16687 }
16688
TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST,k_eq_1_subtile)16689 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, k_eq_1_subtile) {
16690 TEST_REQUIRES_X86_AVX;
16691 for (uint32_t n = 1; n <= 8; n++) {
16692 for (uint32_t m = 1; m <= 6; m++) {
16693 GemmMicrokernelTester()
16694 .mr(6)
16695 .nr(8)
16696 .kr(1)
16697 .sr(1)
16698 .m(m)
16699 .n(n)
16700 .k(1)
16701 .iterations(1)
16702 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16703 }
16704 }
16705 }
16706
TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST,k_eq_1_subtile_m)16707 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, k_eq_1_subtile_m) {
16708 TEST_REQUIRES_X86_AVX;
16709 for (uint32_t m = 1; m <= 6; m++) {
16710 GemmMicrokernelTester()
16711 .mr(6)
16712 .nr(8)
16713 .kr(1)
16714 .sr(1)
16715 .m(m)
16716 .n(8)
16717 .k(1)
16718 .iterations(1)
16719 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16720 }
16721 }
16722
TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST,k_eq_1_subtile_n)16723 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, k_eq_1_subtile_n) {
16724 TEST_REQUIRES_X86_AVX;
16725 for (uint32_t n = 1; n <= 8; n++) {
16726 GemmMicrokernelTester()
16727 .mr(6)
16728 .nr(8)
16729 .kr(1)
16730 .sr(1)
16731 .m(6)
16732 .n(n)
16733 .k(1)
16734 .iterations(1)
16735 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16736 }
16737 }
16738
TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST,k_gt_1)16739 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, k_gt_1) {
16740 TEST_REQUIRES_X86_AVX;
16741 for (size_t k = 2; k < 10; k++) {
16742 GemmMicrokernelTester()
16743 .mr(6)
16744 .nr(8)
16745 .kr(1)
16746 .sr(1)
16747 .m(6)
16748 .n(8)
16749 .k(k)
16750 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16751 }
16752 }
16753
TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST,k_gt_1_strided_a)16754 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, k_gt_1_strided_a) {
16755 TEST_REQUIRES_X86_AVX;
16756 for (size_t k = 2; k < 10; k++) {
16757 GemmMicrokernelTester()
16758 .mr(6)
16759 .nr(8)
16760 .kr(1)
16761 .sr(1)
16762 .m(6)
16763 .n(8)
16764 .k(k)
16765 .a_stride(11)
16766 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16767 }
16768 }
16769
TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST,k_gt_1_subtile)16770 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, k_gt_1_subtile) {
16771 TEST_REQUIRES_X86_AVX;
16772 for (size_t k = 2; k < 10; k++) {
16773 for (uint32_t n = 1; n <= 8; n++) {
16774 for (uint32_t m = 1; m <= 6; m++) {
16775 GemmMicrokernelTester()
16776 .mr(6)
16777 .nr(8)
16778 .kr(1)
16779 .sr(1)
16780 .m(m)
16781 .n(n)
16782 .k(k)
16783 .iterations(1)
16784 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16785 }
16786 }
16787 }
16788 }
16789
TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST,n_gt_8)16790 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, n_gt_8) {
16791 TEST_REQUIRES_X86_AVX;
16792 for (uint32_t n = 9; n < 16; n++) {
16793 for (size_t k = 1; k <= 5; k += 2) {
16794 GemmMicrokernelTester()
16795 .mr(6)
16796 .nr(8)
16797 .kr(1)
16798 .sr(1)
16799 .m(6)
16800 .n(n)
16801 .k(k)
16802 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16803 }
16804 }
16805 }
16806
TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST,n_gt_8_strided_cn)16807 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, n_gt_8_strided_cn) {
16808 TEST_REQUIRES_X86_AVX;
16809 for (uint32_t n = 9; n < 16; n++) {
16810 for (size_t k = 1; k <= 5; k += 2) {
16811 GemmMicrokernelTester()
16812 .mr(6)
16813 .nr(8)
16814 .kr(1)
16815 .sr(1)
16816 .m(6)
16817 .n(n)
16818 .k(k)
16819 .cn_stride(11)
16820 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16821 }
16822 }
16823 }
16824
TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST,n_gt_8_strided_a)16825 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, n_gt_8_strided_a) {
16826 TEST_REQUIRES_X86_AVX;
16827 for (uint32_t n = 9; n < 16; n++) {
16828 for (size_t k = 1; k <= 5; k += 2) {
16829 GemmMicrokernelTester()
16830 .mr(6)
16831 .nr(8)
16832 .kr(1)
16833 .sr(1)
16834 .m(6)
16835 .n(n)
16836 .k(k)
16837 .a_stride(7)
16838 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16839 }
16840 }
16841 }
16842
TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST,n_gt_8_subtile)16843 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, n_gt_8_subtile) {
16844 TEST_REQUIRES_X86_AVX;
16845 for (uint32_t n = 9; n < 16; n++) {
16846 for (size_t k = 1; k <= 5; k += 2) {
16847 for (uint32_t m = 1; m <= 6; m++) {
16848 GemmMicrokernelTester()
16849 .mr(6)
16850 .nr(8)
16851 .kr(1)
16852 .sr(1)
16853 .m(m)
16854 .n(n)
16855 .k(k)
16856 .iterations(1)
16857 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16858 }
16859 }
16860 }
16861 }
16862
TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST,n_div_8)16863 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, n_div_8) {
16864 TEST_REQUIRES_X86_AVX;
16865 for (uint32_t n = 16; n <= 24; n += 8) {
16866 for (size_t k = 1; k <= 5; k += 2) {
16867 GemmMicrokernelTester()
16868 .mr(6)
16869 .nr(8)
16870 .kr(1)
16871 .sr(1)
16872 .m(6)
16873 .n(n)
16874 .k(k)
16875 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16876 }
16877 }
16878 }
16879
TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST,n_div_8_strided_cn)16880 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, n_div_8_strided_cn) {
16881 TEST_REQUIRES_X86_AVX;
16882 for (uint32_t n = 16; n <= 24; n += 8) {
16883 for (size_t k = 1; k <= 5; k += 2) {
16884 GemmMicrokernelTester()
16885 .mr(6)
16886 .nr(8)
16887 .kr(1)
16888 .sr(1)
16889 .m(6)
16890 .n(n)
16891 .k(k)
16892 .cn_stride(11)
16893 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16894 }
16895 }
16896 }
16897
TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST,n_div_8_strided_a)16898 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, n_div_8_strided_a) {
16899 TEST_REQUIRES_X86_AVX;
16900 for (uint32_t n = 16; n <= 24; n += 8) {
16901 for (size_t k = 1; k <= 5; k += 2) {
16902 GemmMicrokernelTester()
16903 .mr(6)
16904 .nr(8)
16905 .kr(1)
16906 .sr(1)
16907 .m(6)
16908 .n(n)
16909 .k(k)
16910 .a_stride(7)
16911 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16912 }
16913 }
16914 }
16915
TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST,n_div_8_subtile)16916 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, n_div_8_subtile) {
16917 TEST_REQUIRES_X86_AVX;
16918 for (uint32_t n = 16; n <= 24; n += 8) {
16919 for (size_t k = 1; k <= 5; k += 2) {
16920 for (uint32_t m = 1; m <= 6; m++) {
16921 GemmMicrokernelTester()
16922 .mr(6)
16923 .nr(8)
16924 .kr(1)
16925 .sr(1)
16926 .m(m)
16927 .n(n)
16928 .k(k)
16929 .iterations(1)
16930 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16931 }
16932 }
16933 }
16934 }
16935
TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST,strided_cm_subtile)16936 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, strided_cm_subtile) {
16937 TEST_REQUIRES_X86_AVX;
16938 for (size_t k = 1; k <= 5; k += 2) {
16939 for (uint32_t n = 1; n <= 8; n++) {
16940 for (uint32_t m = 1; m <= 6; m++) {
16941 GemmMicrokernelTester()
16942 .mr(6)
16943 .nr(8)
16944 .kr(1)
16945 .sr(1)
16946 .m(m)
16947 .n(n)
16948 .k(k)
16949 .cm_stride(11)
16950 .iterations(1)
16951 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16952 }
16953 }
16954 }
16955 }
16956
TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST,qmin)16957 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, qmin) {
16958 TEST_REQUIRES_X86_AVX;
16959 GemmMicrokernelTester()
16960 .mr(6)
16961 .nr(8)
16962 .kr(1)
16963 .sr(1)
16964 .m(6)
16965 .n(8)
16966 .k(1)
16967 .qmin(128)
16968 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16969 }
16970
TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST,qmax)16971 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, qmax) {
16972 TEST_REQUIRES_X86_AVX;
16973 GemmMicrokernelTester()
16974 .mr(6)
16975 .nr(8)
16976 .kr(1)
16977 .sr(1)
16978 .m(6)
16979 .n(8)
16980 .k(1)
16981 .qmax(128)
16982 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16983 }
16984
TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST,strided_cm)16985 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, strided_cm) {
16986 TEST_REQUIRES_X86_AVX;
16987 GemmMicrokernelTester()
16988 .mr(6)
16989 .nr(8)
16990 .kr(1)
16991 .sr(1)
16992 .m(6)
16993 .n(8)
16994 .k(1)
16995 .cm_stride(11)
16996 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
16997 }
16998 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16999
17000
17001 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,k_eq_4)17002 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_eq_4) {
17003 TEST_REQUIRES_X86_FMA3;
17004 GemmMicrokernelTester()
17005 .mr(1)
17006 .nr(16)
17007 .kr(1)
17008 .sr(4)
17009 .m(1)
17010 .n(16)
17011 .k(4)
17012 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17013 }
17014
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,strided_cn)17015 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, strided_cn) {
17016 TEST_REQUIRES_X86_FMA3;
17017 GemmMicrokernelTester()
17018 .mr(1)
17019 .nr(16)
17020 .kr(1)
17021 .sr(4)
17022 .m(1)
17023 .n(16)
17024 .k(4)
17025 .cn_stride(19)
17026 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17027 }
17028
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,k_eq_4_strided_a)17029 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_eq_4_strided_a) {
17030 TEST_REQUIRES_X86_FMA3;
17031 GemmMicrokernelTester()
17032 .mr(1)
17033 .nr(16)
17034 .kr(1)
17035 .sr(4)
17036 .m(1)
17037 .n(16)
17038 .k(4)
17039 .a_stride(7)
17040 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17041 }
17042
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,k_eq_4_subtile)17043 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_eq_4_subtile) {
17044 TEST_REQUIRES_X86_FMA3;
17045 for (uint32_t n = 1; n <= 16; n++) {
17046 for (uint32_t m = 1; m <= 1; m++) {
17047 GemmMicrokernelTester()
17048 .mr(1)
17049 .nr(16)
17050 .kr(1)
17051 .sr(4)
17052 .m(m)
17053 .n(n)
17054 .k(4)
17055 .iterations(1)
17056 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17057 }
17058 }
17059 }
17060
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,k_eq_4_subtile_m)17061 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_eq_4_subtile_m) {
17062 TEST_REQUIRES_X86_FMA3;
17063 for (uint32_t m = 1; m <= 1; m++) {
17064 GemmMicrokernelTester()
17065 .mr(1)
17066 .nr(16)
17067 .kr(1)
17068 .sr(4)
17069 .m(m)
17070 .n(16)
17071 .k(4)
17072 .iterations(1)
17073 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17074 }
17075 }
17076
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,k_eq_4_subtile_n)17077 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_eq_4_subtile_n) {
17078 TEST_REQUIRES_X86_FMA3;
17079 for (uint32_t n = 1; n <= 16; n++) {
17080 GemmMicrokernelTester()
17081 .mr(1)
17082 .nr(16)
17083 .kr(1)
17084 .sr(4)
17085 .m(1)
17086 .n(n)
17087 .k(4)
17088 .iterations(1)
17089 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17090 }
17091 }
17092
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,k_lt_4)17093 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_lt_4) {
17094 TEST_REQUIRES_X86_FMA3;
17095 for (size_t k = 1; k < 4; k++) {
17096 GemmMicrokernelTester()
17097 .mr(1)
17098 .nr(16)
17099 .kr(1)
17100 .sr(4)
17101 .m(1)
17102 .n(16)
17103 .k(k)
17104 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17105 }
17106 }
17107
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,k_lt_4_strided_a)17108 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_lt_4_strided_a) {
17109 TEST_REQUIRES_X86_FMA3;
17110 for (size_t k = 1; k < 4; k++) {
17111 GemmMicrokernelTester()
17112 .mr(1)
17113 .nr(16)
17114 .kr(1)
17115 .sr(4)
17116 .m(1)
17117 .n(16)
17118 .k(k)
17119 .a_stride(7)
17120 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17121 }
17122 }
17123
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,k_lt_4_subtile)17124 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_lt_4_subtile) {
17125 TEST_REQUIRES_X86_FMA3;
17126 for (size_t k = 1; k < 4; k++) {
17127 for (uint32_t n = 1; n <= 16; n++) {
17128 for (uint32_t m = 1; m <= 1; m++) {
17129 GemmMicrokernelTester()
17130 .mr(1)
17131 .nr(16)
17132 .kr(1)
17133 .sr(4)
17134 .m(m)
17135 .n(n)
17136 .k(k)
17137 .iterations(1)
17138 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17139 }
17140 }
17141 }
17142 }
17143
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,k_gt_4)17144 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_gt_4) {
17145 TEST_REQUIRES_X86_FMA3;
17146 for (size_t k = 5; k < 8; k++) {
17147 GemmMicrokernelTester()
17148 .mr(1)
17149 .nr(16)
17150 .kr(1)
17151 .sr(4)
17152 .m(1)
17153 .n(16)
17154 .k(k)
17155 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17156 }
17157 }
17158
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,k_gt_4_strided_a)17159 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_gt_4_strided_a) {
17160 TEST_REQUIRES_X86_FMA3;
17161 for (size_t k = 5; k < 8; k++) {
17162 GemmMicrokernelTester()
17163 .mr(1)
17164 .nr(16)
17165 .kr(1)
17166 .sr(4)
17167 .m(1)
17168 .n(16)
17169 .k(k)
17170 .a_stride(11)
17171 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17172 }
17173 }
17174
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,k_gt_4_subtile)17175 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_gt_4_subtile) {
17176 TEST_REQUIRES_X86_FMA3;
17177 for (size_t k = 5; k < 8; k++) {
17178 for (uint32_t n = 1; n <= 16; n++) {
17179 for (uint32_t m = 1; m <= 1; m++) {
17180 GemmMicrokernelTester()
17181 .mr(1)
17182 .nr(16)
17183 .kr(1)
17184 .sr(4)
17185 .m(m)
17186 .n(n)
17187 .k(k)
17188 .iterations(1)
17189 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17190 }
17191 }
17192 }
17193 }
17194
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,k_div_4)17195 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_div_4) {
17196 TEST_REQUIRES_X86_FMA3;
17197 for (size_t k = 8; k <= 40; k += 4) {
17198 GemmMicrokernelTester()
17199 .mr(1)
17200 .nr(16)
17201 .kr(1)
17202 .sr(4)
17203 .m(1)
17204 .n(16)
17205 .k(k)
17206 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17207 }
17208 }
17209
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,k_div_4_strided_a)17210 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_div_4_strided_a) {
17211 TEST_REQUIRES_X86_FMA3;
17212 for (size_t k = 8; k <= 40; k += 4) {
17213 GemmMicrokernelTester()
17214 .mr(1)
17215 .nr(16)
17216 .kr(1)
17217 .sr(4)
17218 .m(1)
17219 .n(16)
17220 .k(k)
17221 .a_stride(43)
17222 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17223 }
17224 }
17225
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,k_div_4_subtile)17226 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_div_4_subtile) {
17227 TEST_REQUIRES_X86_FMA3;
17228 for (size_t k = 8; k <= 40; k += 4) {
17229 for (uint32_t n = 1; n <= 16; n++) {
17230 for (uint32_t m = 1; m <= 1; m++) {
17231 GemmMicrokernelTester()
17232 .mr(1)
17233 .nr(16)
17234 .kr(1)
17235 .sr(4)
17236 .m(m)
17237 .n(n)
17238 .k(k)
17239 .iterations(1)
17240 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17241 }
17242 }
17243 }
17244 }
17245
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,n_gt_16)17246 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, n_gt_16) {
17247 TEST_REQUIRES_X86_FMA3;
17248 for (uint32_t n = 17; n < 32; n++) {
17249 for (size_t k = 1; k <= 20; k += 5) {
17250 GemmMicrokernelTester()
17251 .mr(1)
17252 .nr(16)
17253 .kr(1)
17254 .sr(4)
17255 .m(1)
17256 .n(n)
17257 .k(k)
17258 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17259 }
17260 }
17261 }
17262
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,n_gt_16_strided_cn)17263 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, n_gt_16_strided_cn) {
17264 TEST_REQUIRES_X86_FMA3;
17265 for (uint32_t n = 17; n < 32; n++) {
17266 for (size_t k = 1; k <= 20; k += 5) {
17267 GemmMicrokernelTester()
17268 .mr(1)
17269 .nr(16)
17270 .kr(1)
17271 .sr(4)
17272 .m(1)
17273 .n(n)
17274 .k(k)
17275 .cn_stride(19)
17276 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17277 }
17278 }
17279 }
17280
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,n_gt_16_strided_a)17281 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, n_gt_16_strided_a) {
17282 TEST_REQUIRES_X86_FMA3;
17283 for (uint32_t n = 17; n < 32; n++) {
17284 for (size_t k = 1; k <= 20; k += 5) {
17285 GemmMicrokernelTester()
17286 .mr(1)
17287 .nr(16)
17288 .kr(1)
17289 .sr(4)
17290 .m(1)
17291 .n(n)
17292 .k(k)
17293 .a_stride(23)
17294 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17295 }
17296 }
17297 }
17298
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,n_gt_16_subtile)17299 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, n_gt_16_subtile) {
17300 TEST_REQUIRES_X86_FMA3;
17301 for (uint32_t n = 17; n < 32; n++) {
17302 for (size_t k = 1; k <= 20; k += 5) {
17303 for (uint32_t m = 1; m <= 1; m++) {
17304 GemmMicrokernelTester()
17305 .mr(1)
17306 .nr(16)
17307 .kr(1)
17308 .sr(4)
17309 .m(m)
17310 .n(n)
17311 .k(k)
17312 .iterations(1)
17313 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17314 }
17315 }
17316 }
17317 }
17318
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,n_div_16)17319 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, n_div_16) {
17320 TEST_REQUIRES_X86_FMA3;
17321 for (uint32_t n = 32; n <= 48; n += 16) {
17322 for (size_t k = 1; k <= 20; k += 5) {
17323 GemmMicrokernelTester()
17324 .mr(1)
17325 .nr(16)
17326 .kr(1)
17327 .sr(4)
17328 .m(1)
17329 .n(n)
17330 .k(k)
17331 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17332 }
17333 }
17334 }
17335
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,n_div_16_strided_cn)17336 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, n_div_16_strided_cn) {
17337 TEST_REQUIRES_X86_FMA3;
17338 for (uint32_t n = 32; n <= 48; n += 16) {
17339 for (size_t k = 1; k <= 20; k += 5) {
17340 GemmMicrokernelTester()
17341 .mr(1)
17342 .nr(16)
17343 .kr(1)
17344 .sr(4)
17345 .m(1)
17346 .n(n)
17347 .k(k)
17348 .cn_stride(19)
17349 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17350 }
17351 }
17352 }
17353
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,n_div_16_strided_a)17354 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, n_div_16_strided_a) {
17355 TEST_REQUIRES_X86_FMA3;
17356 for (uint32_t n = 32; n <= 48; n += 16) {
17357 for (size_t k = 1; k <= 20; k += 5) {
17358 GemmMicrokernelTester()
17359 .mr(1)
17360 .nr(16)
17361 .kr(1)
17362 .sr(4)
17363 .m(1)
17364 .n(n)
17365 .k(k)
17366 .a_stride(23)
17367 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17368 }
17369 }
17370 }
17371
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,n_div_16_subtile)17372 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, n_div_16_subtile) {
17373 TEST_REQUIRES_X86_FMA3;
17374 for (uint32_t n = 32; n <= 48; n += 16) {
17375 for (size_t k = 1; k <= 20; k += 5) {
17376 for (uint32_t m = 1; m <= 1; m++) {
17377 GemmMicrokernelTester()
17378 .mr(1)
17379 .nr(16)
17380 .kr(1)
17381 .sr(4)
17382 .m(m)
17383 .n(n)
17384 .k(k)
17385 .iterations(1)
17386 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17387 }
17388 }
17389 }
17390 }
17391
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,strided_cm_subtile)17392 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, strided_cm_subtile) {
17393 TEST_REQUIRES_X86_FMA3;
17394 for (size_t k = 1; k <= 20; k += 5) {
17395 for (uint32_t n = 1; n <= 16; n++) {
17396 for (uint32_t m = 1; m <= 1; m++) {
17397 GemmMicrokernelTester()
17398 .mr(1)
17399 .nr(16)
17400 .kr(1)
17401 .sr(4)
17402 .m(m)
17403 .n(n)
17404 .k(k)
17405 .cm_stride(19)
17406 .iterations(1)
17407 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17408 }
17409 }
17410 }
17411 }
17412
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,qmin)17413 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, qmin) {
17414 TEST_REQUIRES_X86_FMA3;
17415 GemmMicrokernelTester()
17416 .mr(1)
17417 .nr(16)
17418 .kr(1)
17419 .sr(4)
17420 .m(1)
17421 .n(16)
17422 .k(4)
17423 .qmin(128)
17424 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17425 }
17426
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,qmax)17427 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, qmax) {
17428 TEST_REQUIRES_X86_FMA3;
17429 GemmMicrokernelTester()
17430 .mr(1)
17431 .nr(16)
17432 .kr(1)
17433 .sr(4)
17434 .m(1)
17435 .n(16)
17436 .k(4)
17437 .qmax(128)
17438 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17439 }
17440
TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST,strided_cm)17441 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, strided_cm) {
17442 TEST_REQUIRES_X86_FMA3;
17443 GemmMicrokernelTester()
17444 .mr(1)
17445 .nr(16)
17446 .kr(1)
17447 .sr(4)
17448 .m(1)
17449 .n(16)
17450 .k(4)
17451 .cm_stride(19)
17452 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17453 }
17454 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17455
17456
17457 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,k_eq_4)17458 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_eq_4) {
17459 TEST_REQUIRES_X86_FMA3;
17460 GemmMicrokernelTester()
17461 .mr(3)
17462 .nr(16)
17463 .kr(1)
17464 .sr(4)
17465 .m(3)
17466 .n(16)
17467 .k(4)
17468 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17469 }
17470
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,strided_cn)17471 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, strided_cn) {
17472 TEST_REQUIRES_X86_FMA3;
17473 GemmMicrokernelTester()
17474 .mr(3)
17475 .nr(16)
17476 .kr(1)
17477 .sr(4)
17478 .m(3)
17479 .n(16)
17480 .k(4)
17481 .cn_stride(19)
17482 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17483 }
17484
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,k_eq_4_strided_a)17485 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_eq_4_strided_a) {
17486 TEST_REQUIRES_X86_FMA3;
17487 GemmMicrokernelTester()
17488 .mr(3)
17489 .nr(16)
17490 .kr(1)
17491 .sr(4)
17492 .m(3)
17493 .n(16)
17494 .k(4)
17495 .a_stride(7)
17496 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17497 }
17498
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,k_eq_4_subtile)17499 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_eq_4_subtile) {
17500 TEST_REQUIRES_X86_FMA3;
17501 for (uint32_t n = 1; n <= 16; n++) {
17502 for (uint32_t m = 1; m <= 3; m++) {
17503 GemmMicrokernelTester()
17504 .mr(3)
17505 .nr(16)
17506 .kr(1)
17507 .sr(4)
17508 .m(m)
17509 .n(n)
17510 .k(4)
17511 .iterations(1)
17512 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17513 }
17514 }
17515 }
17516
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,k_eq_4_subtile_m)17517 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_eq_4_subtile_m) {
17518 TEST_REQUIRES_X86_FMA3;
17519 for (uint32_t m = 1; m <= 3; m++) {
17520 GemmMicrokernelTester()
17521 .mr(3)
17522 .nr(16)
17523 .kr(1)
17524 .sr(4)
17525 .m(m)
17526 .n(16)
17527 .k(4)
17528 .iterations(1)
17529 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17530 }
17531 }
17532
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,k_eq_4_subtile_n)17533 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_eq_4_subtile_n) {
17534 TEST_REQUIRES_X86_FMA3;
17535 for (uint32_t n = 1; n <= 16; n++) {
17536 GemmMicrokernelTester()
17537 .mr(3)
17538 .nr(16)
17539 .kr(1)
17540 .sr(4)
17541 .m(3)
17542 .n(n)
17543 .k(4)
17544 .iterations(1)
17545 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17546 }
17547 }
17548
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,k_lt_4)17549 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_lt_4) {
17550 TEST_REQUIRES_X86_FMA3;
17551 for (size_t k = 1; k < 4; k++) {
17552 GemmMicrokernelTester()
17553 .mr(3)
17554 .nr(16)
17555 .kr(1)
17556 .sr(4)
17557 .m(3)
17558 .n(16)
17559 .k(k)
17560 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17561 }
17562 }
17563
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,k_lt_4_strided_a)17564 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_lt_4_strided_a) {
17565 TEST_REQUIRES_X86_FMA3;
17566 for (size_t k = 1; k < 4; k++) {
17567 GemmMicrokernelTester()
17568 .mr(3)
17569 .nr(16)
17570 .kr(1)
17571 .sr(4)
17572 .m(3)
17573 .n(16)
17574 .k(k)
17575 .a_stride(7)
17576 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17577 }
17578 }
17579
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,k_lt_4_subtile)17580 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_lt_4_subtile) {
17581 TEST_REQUIRES_X86_FMA3;
17582 for (size_t k = 1; k < 4; k++) {
17583 for (uint32_t n = 1; n <= 16; n++) {
17584 for (uint32_t m = 1; m <= 3; m++) {
17585 GemmMicrokernelTester()
17586 .mr(3)
17587 .nr(16)
17588 .kr(1)
17589 .sr(4)
17590 .m(m)
17591 .n(n)
17592 .k(k)
17593 .iterations(1)
17594 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17595 }
17596 }
17597 }
17598 }
17599
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,k_gt_4)17600 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_gt_4) {
17601 TEST_REQUIRES_X86_FMA3;
17602 for (size_t k = 5; k < 8; k++) {
17603 GemmMicrokernelTester()
17604 .mr(3)
17605 .nr(16)
17606 .kr(1)
17607 .sr(4)
17608 .m(3)
17609 .n(16)
17610 .k(k)
17611 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17612 }
17613 }
17614
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,k_gt_4_strided_a)17615 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_gt_4_strided_a) {
17616 TEST_REQUIRES_X86_FMA3;
17617 for (size_t k = 5; k < 8; k++) {
17618 GemmMicrokernelTester()
17619 .mr(3)
17620 .nr(16)
17621 .kr(1)
17622 .sr(4)
17623 .m(3)
17624 .n(16)
17625 .k(k)
17626 .a_stride(11)
17627 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17628 }
17629 }
17630
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,k_gt_4_subtile)17631 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_gt_4_subtile) {
17632 TEST_REQUIRES_X86_FMA3;
17633 for (size_t k = 5; k < 8; k++) {
17634 for (uint32_t n = 1; n <= 16; n++) {
17635 for (uint32_t m = 1; m <= 3; m++) {
17636 GemmMicrokernelTester()
17637 .mr(3)
17638 .nr(16)
17639 .kr(1)
17640 .sr(4)
17641 .m(m)
17642 .n(n)
17643 .k(k)
17644 .iterations(1)
17645 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17646 }
17647 }
17648 }
17649 }
17650
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,k_div_4)17651 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_div_4) {
17652 TEST_REQUIRES_X86_FMA3;
17653 for (size_t k = 8; k <= 40; k += 4) {
17654 GemmMicrokernelTester()
17655 .mr(3)
17656 .nr(16)
17657 .kr(1)
17658 .sr(4)
17659 .m(3)
17660 .n(16)
17661 .k(k)
17662 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17663 }
17664 }
17665
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,k_div_4_strided_a)17666 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_div_4_strided_a) {
17667 TEST_REQUIRES_X86_FMA3;
17668 for (size_t k = 8; k <= 40; k += 4) {
17669 GemmMicrokernelTester()
17670 .mr(3)
17671 .nr(16)
17672 .kr(1)
17673 .sr(4)
17674 .m(3)
17675 .n(16)
17676 .k(k)
17677 .a_stride(43)
17678 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17679 }
17680 }
17681
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,k_div_4_subtile)17682 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_div_4_subtile) {
17683 TEST_REQUIRES_X86_FMA3;
17684 for (size_t k = 8; k <= 40; k += 4) {
17685 for (uint32_t n = 1; n <= 16; n++) {
17686 for (uint32_t m = 1; m <= 3; m++) {
17687 GemmMicrokernelTester()
17688 .mr(3)
17689 .nr(16)
17690 .kr(1)
17691 .sr(4)
17692 .m(m)
17693 .n(n)
17694 .k(k)
17695 .iterations(1)
17696 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17697 }
17698 }
17699 }
17700 }
17701
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,n_gt_16)17702 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, n_gt_16) {
17703 TEST_REQUIRES_X86_FMA3;
17704 for (uint32_t n = 17; n < 32; n++) {
17705 for (size_t k = 1; k <= 20; k += 5) {
17706 GemmMicrokernelTester()
17707 .mr(3)
17708 .nr(16)
17709 .kr(1)
17710 .sr(4)
17711 .m(3)
17712 .n(n)
17713 .k(k)
17714 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17715 }
17716 }
17717 }
17718
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,n_gt_16_strided_cn)17719 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, n_gt_16_strided_cn) {
17720 TEST_REQUIRES_X86_FMA3;
17721 for (uint32_t n = 17; n < 32; n++) {
17722 for (size_t k = 1; k <= 20; k += 5) {
17723 GemmMicrokernelTester()
17724 .mr(3)
17725 .nr(16)
17726 .kr(1)
17727 .sr(4)
17728 .m(3)
17729 .n(n)
17730 .k(k)
17731 .cn_stride(19)
17732 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17733 }
17734 }
17735 }
17736
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,n_gt_16_strided_a)17737 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, n_gt_16_strided_a) {
17738 TEST_REQUIRES_X86_FMA3;
17739 for (uint32_t n = 17; n < 32; n++) {
17740 for (size_t k = 1; k <= 20; k += 5) {
17741 GemmMicrokernelTester()
17742 .mr(3)
17743 .nr(16)
17744 .kr(1)
17745 .sr(4)
17746 .m(3)
17747 .n(n)
17748 .k(k)
17749 .a_stride(23)
17750 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17751 }
17752 }
17753 }
17754
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,n_gt_16_subtile)17755 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, n_gt_16_subtile) {
17756 TEST_REQUIRES_X86_FMA3;
17757 for (uint32_t n = 17; n < 32; n++) {
17758 for (size_t k = 1; k <= 20; k += 5) {
17759 for (uint32_t m = 1; m <= 3; m++) {
17760 GemmMicrokernelTester()
17761 .mr(3)
17762 .nr(16)
17763 .kr(1)
17764 .sr(4)
17765 .m(m)
17766 .n(n)
17767 .k(k)
17768 .iterations(1)
17769 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17770 }
17771 }
17772 }
17773 }
17774
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,n_div_16)17775 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, n_div_16) {
17776 TEST_REQUIRES_X86_FMA3;
17777 for (uint32_t n = 32; n <= 48; n += 16) {
17778 for (size_t k = 1; k <= 20; k += 5) {
17779 GemmMicrokernelTester()
17780 .mr(3)
17781 .nr(16)
17782 .kr(1)
17783 .sr(4)
17784 .m(3)
17785 .n(n)
17786 .k(k)
17787 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17788 }
17789 }
17790 }
17791
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,n_div_16_strided_cn)17792 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, n_div_16_strided_cn) {
17793 TEST_REQUIRES_X86_FMA3;
17794 for (uint32_t n = 32; n <= 48; n += 16) {
17795 for (size_t k = 1; k <= 20; k += 5) {
17796 GemmMicrokernelTester()
17797 .mr(3)
17798 .nr(16)
17799 .kr(1)
17800 .sr(4)
17801 .m(3)
17802 .n(n)
17803 .k(k)
17804 .cn_stride(19)
17805 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17806 }
17807 }
17808 }
17809
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,n_div_16_strided_a)17810 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, n_div_16_strided_a) {
17811 TEST_REQUIRES_X86_FMA3;
17812 for (uint32_t n = 32; n <= 48; n += 16) {
17813 for (size_t k = 1; k <= 20; k += 5) {
17814 GemmMicrokernelTester()
17815 .mr(3)
17816 .nr(16)
17817 .kr(1)
17818 .sr(4)
17819 .m(3)
17820 .n(n)
17821 .k(k)
17822 .a_stride(23)
17823 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17824 }
17825 }
17826 }
17827
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,n_div_16_subtile)17828 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, n_div_16_subtile) {
17829 TEST_REQUIRES_X86_FMA3;
17830 for (uint32_t n = 32; n <= 48; n += 16) {
17831 for (size_t k = 1; k <= 20; k += 5) {
17832 for (uint32_t m = 1; m <= 3; m++) {
17833 GemmMicrokernelTester()
17834 .mr(3)
17835 .nr(16)
17836 .kr(1)
17837 .sr(4)
17838 .m(m)
17839 .n(n)
17840 .k(k)
17841 .iterations(1)
17842 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17843 }
17844 }
17845 }
17846 }
17847
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,strided_cm_subtile)17848 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, strided_cm_subtile) {
17849 TEST_REQUIRES_X86_FMA3;
17850 for (size_t k = 1; k <= 20; k += 5) {
17851 for (uint32_t n = 1; n <= 16; n++) {
17852 for (uint32_t m = 1; m <= 3; m++) {
17853 GemmMicrokernelTester()
17854 .mr(3)
17855 .nr(16)
17856 .kr(1)
17857 .sr(4)
17858 .m(m)
17859 .n(n)
17860 .k(k)
17861 .cm_stride(19)
17862 .iterations(1)
17863 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17864 }
17865 }
17866 }
17867 }
17868
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,qmin)17869 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, qmin) {
17870 TEST_REQUIRES_X86_FMA3;
17871 GemmMicrokernelTester()
17872 .mr(3)
17873 .nr(16)
17874 .kr(1)
17875 .sr(4)
17876 .m(3)
17877 .n(16)
17878 .k(4)
17879 .qmin(128)
17880 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17881 }
17882
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,qmax)17883 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, qmax) {
17884 TEST_REQUIRES_X86_FMA3;
17885 GemmMicrokernelTester()
17886 .mr(3)
17887 .nr(16)
17888 .kr(1)
17889 .sr(4)
17890 .m(3)
17891 .n(16)
17892 .k(4)
17893 .qmax(128)
17894 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17895 }
17896
TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST,strided_cm)17897 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, strided_cm) {
17898 TEST_REQUIRES_X86_FMA3;
17899 GemmMicrokernelTester()
17900 .mr(3)
17901 .nr(16)
17902 .kr(1)
17903 .sr(4)
17904 .m(3)
17905 .n(16)
17906 .k(4)
17907 .cm_stride(19)
17908 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17909 }
17910 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17911
17912
17913 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST,k_eq_1)17914 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, k_eq_1) {
17915 TEST_REQUIRES_X86_FMA3;
17916 GemmMicrokernelTester()
17917 .mr(4)
17918 .nr(16)
17919 .kr(1)
17920 .sr(1)
17921 .m(4)
17922 .n(16)
17923 .k(1)
17924 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17925 }
17926
TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST,strided_cn)17927 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, strided_cn) {
17928 TEST_REQUIRES_X86_FMA3;
17929 GemmMicrokernelTester()
17930 .mr(4)
17931 .nr(16)
17932 .kr(1)
17933 .sr(1)
17934 .m(4)
17935 .n(16)
17936 .k(1)
17937 .cn_stride(19)
17938 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17939 }
17940
TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST,k_eq_1_strided_a)17941 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, k_eq_1_strided_a) {
17942 TEST_REQUIRES_X86_FMA3;
17943 GemmMicrokernelTester()
17944 .mr(4)
17945 .nr(16)
17946 .kr(1)
17947 .sr(1)
17948 .m(4)
17949 .n(16)
17950 .k(1)
17951 .a_stride(3)
17952 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17953 }
17954
TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST,k_eq_1_subtile)17955 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, k_eq_1_subtile) {
17956 TEST_REQUIRES_X86_FMA3;
17957 for (uint32_t n = 1; n <= 16; n++) {
17958 for (uint32_t m = 1; m <= 4; m++) {
17959 GemmMicrokernelTester()
17960 .mr(4)
17961 .nr(16)
17962 .kr(1)
17963 .sr(1)
17964 .m(m)
17965 .n(n)
17966 .k(1)
17967 .iterations(1)
17968 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17969 }
17970 }
17971 }
17972
TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST,k_eq_1_subtile_m)17973 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, k_eq_1_subtile_m) {
17974 TEST_REQUIRES_X86_FMA3;
17975 for (uint32_t m = 1; m <= 4; m++) {
17976 GemmMicrokernelTester()
17977 .mr(4)
17978 .nr(16)
17979 .kr(1)
17980 .sr(1)
17981 .m(m)
17982 .n(16)
17983 .k(1)
17984 .iterations(1)
17985 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
17986 }
17987 }
17988
TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST,k_eq_1_subtile_n)17989 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, k_eq_1_subtile_n) {
17990 TEST_REQUIRES_X86_FMA3;
17991 for (uint32_t n = 1; n <= 16; n++) {
17992 GemmMicrokernelTester()
17993 .mr(4)
17994 .nr(16)
17995 .kr(1)
17996 .sr(1)
17997 .m(4)
17998 .n(n)
17999 .k(1)
18000 .iterations(1)
18001 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18002 }
18003 }
18004
TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST,k_gt_1)18005 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, k_gt_1) {
18006 TEST_REQUIRES_X86_FMA3;
18007 for (size_t k = 2; k < 10; k++) {
18008 GemmMicrokernelTester()
18009 .mr(4)
18010 .nr(16)
18011 .kr(1)
18012 .sr(1)
18013 .m(4)
18014 .n(16)
18015 .k(k)
18016 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18017 }
18018 }
18019
TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST,k_gt_1_strided_a)18020 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, k_gt_1_strided_a) {
18021 TEST_REQUIRES_X86_FMA3;
18022 for (size_t k = 2; k < 10; k++) {
18023 GemmMicrokernelTester()
18024 .mr(4)
18025 .nr(16)
18026 .kr(1)
18027 .sr(1)
18028 .m(4)
18029 .n(16)
18030 .k(k)
18031 .a_stride(11)
18032 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18033 }
18034 }
18035
TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST,k_gt_1_subtile)18036 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, k_gt_1_subtile) {
18037 TEST_REQUIRES_X86_FMA3;
18038 for (size_t k = 2; k < 10; k++) {
18039 for (uint32_t n = 1; n <= 16; n++) {
18040 for (uint32_t m = 1; m <= 4; m++) {
18041 GemmMicrokernelTester()
18042 .mr(4)
18043 .nr(16)
18044 .kr(1)
18045 .sr(1)
18046 .m(m)
18047 .n(n)
18048 .k(k)
18049 .iterations(1)
18050 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18051 }
18052 }
18053 }
18054 }
18055
TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST,n_gt_16)18056 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, n_gt_16) {
18057 TEST_REQUIRES_X86_FMA3;
18058 for (uint32_t n = 17; n < 32; n++) {
18059 for (size_t k = 1; k <= 5; k += 2) {
18060 GemmMicrokernelTester()
18061 .mr(4)
18062 .nr(16)
18063 .kr(1)
18064 .sr(1)
18065 .m(4)
18066 .n(n)
18067 .k(k)
18068 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18069 }
18070 }
18071 }
18072
TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST,n_gt_16_strided_cn)18073 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, n_gt_16_strided_cn) {
18074 TEST_REQUIRES_X86_FMA3;
18075 for (uint32_t n = 17; n < 32; n++) {
18076 for (size_t k = 1; k <= 5; k += 2) {
18077 GemmMicrokernelTester()
18078 .mr(4)
18079 .nr(16)
18080 .kr(1)
18081 .sr(1)
18082 .m(4)
18083 .n(n)
18084 .k(k)
18085 .cn_stride(19)
18086 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18087 }
18088 }
18089 }
18090
TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST,n_gt_16_strided_a)18091 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, n_gt_16_strided_a) {
18092 TEST_REQUIRES_X86_FMA3;
18093 for (uint32_t n = 17; n < 32; n++) {
18094 for (size_t k = 1; k <= 5; k += 2) {
18095 GemmMicrokernelTester()
18096 .mr(4)
18097 .nr(16)
18098 .kr(1)
18099 .sr(1)
18100 .m(4)
18101 .n(n)
18102 .k(k)
18103 .a_stride(7)
18104 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18105 }
18106 }
18107 }
18108
TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST,n_gt_16_subtile)18109 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, n_gt_16_subtile) {
18110 TEST_REQUIRES_X86_FMA3;
18111 for (uint32_t n = 17; n < 32; n++) {
18112 for (size_t k = 1; k <= 5; k += 2) {
18113 for (uint32_t m = 1; m <= 4; m++) {
18114 GemmMicrokernelTester()
18115 .mr(4)
18116 .nr(16)
18117 .kr(1)
18118 .sr(1)
18119 .m(m)
18120 .n(n)
18121 .k(k)
18122 .iterations(1)
18123 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18124 }
18125 }
18126 }
18127 }
18128
TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST,n_div_16)18129 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, n_div_16) {
18130 TEST_REQUIRES_X86_FMA3;
18131 for (uint32_t n = 32; n <= 48; n += 16) {
18132 for (size_t k = 1; k <= 5; k += 2) {
18133 GemmMicrokernelTester()
18134 .mr(4)
18135 .nr(16)
18136 .kr(1)
18137 .sr(1)
18138 .m(4)
18139 .n(n)
18140 .k(k)
18141 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18142 }
18143 }
18144 }
18145
TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST,n_div_16_strided_cn)18146 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, n_div_16_strided_cn) {
18147 TEST_REQUIRES_X86_FMA3;
18148 for (uint32_t n = 32; n <= 48; n += 16) {
18149 for (size_t k = 1; k <= 5; k += 2) {
18150 GemmMicrokernelTester()
18151 .mr(4)
18152 .nr(16)
18153 .kr(1)
18154 .sr(1)
18155 .m(4)
18156 .n(n)
18157 .k(k)
18158 .cn_stride(19)
18159 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18160 }
18161 }
18162 }
18163
TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST,n_div_16_strided_a)18164 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, n_div_16_strided_a) {
18165 TEST_REQUIRES_X86_FMA3;
18166 for (uint32_t n = 32; n <= 48; n += 16) {
18167 for (size_t k = 1; k <= 5; k += 2) {
18168 GemmMicrokernelTester()
18169 .mr(4)
18170 .nr(16)
18171 .kr(1)
18172 .sr(1)
18173 .m(4)
18174 .n(n)
18175 .k(k)
18176 .a_stride(7)
18177 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18178 }
18179 }
18180 }
18181
TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST,n_div_16_subtile)18182 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, n_div_16_subtile) {
18183 TEST_REQUIRES_X86_FMA3;
18184 for (uint32_t n = 32; n <= 48; n += 16) {
18185 for (size_t k = 1; k <= 5; k += 2) {
18186 for (uint32_t m = 1; m <= 4; m++) {
18187 GemmMicrokernelTester()
18188 .mr(4)
18189 .nr(16)
18190 .kr(1)
18191 .sr(1)
18192 .m(m)
18193 .n(n)
18194 .k(k)
18195 .iterations(1)
18196 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18197 }
18198 }
18199 }
18200 }
18201
TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST,strided_cm_subtile)18202 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, strided_cm_subtile) {
18203 TEST_REQUIRES_X86_FMA3;
18204 for (size_t k = 1; k <= 5; k += 2) {
18205 for (uint32_t n = 1; n <= 16; n++) {
18206 for (uint32_t m = 1; m <= 4; m++) {
18207 GemmMicrokernelTester()
18208 .mr(4)
18209 .nr(16)
18210 .kr(1)
18211 .sr(1)
18212 .m(m)
18213 .n(n)
18214 .k(k)
18215 .cm_stride(19)
18216 .iterations(1)
18217 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18218 }
18219 }
18220 }
18221 }
18222
TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST,qmin)18223 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, qmin) {
18224 TEST_REQUIRES_X86_FMA3;
18225 GemmMicrokernelTester()
18226 .mr(4)
18227 .nr(16)
18228 .kr(1)
18229 .sr(1)
18230 .m(4)
18231 .n(16)
18232 .k(1)
18233 .qmin(128)
18234 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18235 }
18236
TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST,qmax)18237 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, qmax) {
18238 TEST_REQUIRES_X86_FMA3;
18239 GemmMicrokernelTester()
18240 .mr(4)
18241 .nr(16)
18242 .kr(1)
18243 .sr(1)
18244 .m(4)
18245 .n(16)
18246 .k(1)
18247 .qmax(128)
18248 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18249 }
18250
TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST,strided_cm)18251 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, strided_cm) {
18252 TEST_REQUIRES_X86_FMA3;
18253 GemmMicrokernelTester()
18254 .mr(4)
18255 .nr(16)
18256 .kr(1)
18257 .sr(1)
18258 .m(4)
18259 .n(16)
18260 .k(1)
18261 .cm_stride(19)
18262 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18263 }
18264 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18265
18266
18267 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,k_eq_4)18268 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_eq_4) {
18269 TEST_REQUIRES_X86_FMA3;
18270 GemmMicrokernelTester()
18271 .mr(4)
18272 .nr(16)
18273 .kr(1)
18274 .sr(4)
18275 .m(4)
18276 .n(16)
18277 .k(4)
18278 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18279 }
18280
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,strided_cn)18281 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, strided_cn) {
18282 TEST_REQUIRES_X86_FMA3;
18283 GemmMicrokernelTester()
18284 .mr(4)
18285 .nr(16)
18286 .kr(1)
18287 .sr(4)
18288 .m(4)
18289 .n(16)
18290 .k(4)
18291 .cn_stride(19)
18292 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18293 }
18294
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,k_eq_4_strided_a)18295 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_eq_4_strided_a) {
18296 TEST_REQUIRES_X86_FMA3;
18297 GemmMicrokernelTester()
18298 .mr(4)
18299 .nr(16)
18300 .kr(1)
18301 .sr(4)
18302 .m(4)
18303 .n(16)
18304 .k(4)
18305 .a_stride(7)
18306 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18307 }
18308
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,k_eq_4_subtile)18309 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_eq_4_subtile) {
18310 TEST_REQUIRES_X86_FMA3;
18311 for (uint32_t n = 1; n <= 16; n++) {
18312 for (uint32_t m = 1; m <= 4; m++) {
18313 GemmMicrokernelTester()
18314 .mr(4)
18315 .nr(16)
18316 .kr(1)
18317 .sr(4)
18318 .m(m)
18319 .n(n)
18320 .k(4)
18321 .iterations(1)
18322 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18323 }
18324 }
18325 }
18326
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,k_eq_4_subtile_m)18327 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_eq_4_subtile_m) {
18328 TEST_REQUIRES_X86_FMA3;
18329 for (uint32_t m = 1; m <= 4; m++) {
18330 GemmMicrokernelTester()
18331 .mr(4)
18332 .nr(16)
18333 .kr(1)
18334 .sr(4)
18335 .m(m)
18336 .n(16)
18337 .k(4)
18338 .iterations(1)
18339 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18340 }
18341 }
18342
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,k_eq_4_subtile_n)18343 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_eq_4_subtile_n) {
18344 TEST_REQUIRES_X86_FMA3;
18345 for (uint32_t n = 1; n <= 16; n++) {
18346 GemmMicrokernelTester()
18347 .mr(4)
18348 .nr(16)
18349 .kr(1)
18350 .sr(4)
18351 .m(4)
18352 .n(n)
18353 .k(4)
18354 .iterations(1)
18355 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18356 }
18357 }
18358
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,k_lt_4)18359 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_lt_4) {
18360 TEST_REQUIRES_X86_FMA3;
18361 for (size_t k = 1; k < 4; k++) {
18362 GemmMicrokernelTester()
18363 .mr(4)
18364 .nr(16)
18365 .kr(1)
18366 .sr(4)
18367 .m(4)
18368 .n(16)
18369 .k(k)
18370 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18371 }
18372 }
18373
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,k_lt_4_strided_a)18374 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_lt_4_strided_a) {
18375 TEST_REQUIRES_X86_FMA3;
18376 for (size_t k = 1; k < 4; k++) {
18377 GemmMicrokernelTester()
18378 .mr(4)
18379 .nr(16)
18380 .kr(1)
18381 .sr(4)
18382 .m(4)
18383 .n(16)
18384 .k(k)
18385 .a_stride(7)
18386 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18387 }
18388 }
18389
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,k_lt_4_subtile)18390 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_lt_4_subtile) {
18391 TEST_REQUIRES_X86_FMA3;
18392 for (size_t k = 1; k < 4; k++) {
18393 for (uint32_t n = 1; n <= 16; n++) {
18394 for (uint32_t m = 1; m <= 4; m++) {
18395 GemmMicrokernelTester()
18396 .mr(4)
18397 .nr(16)
18398 .kr(1)
18399 .sr(4)
18400 .m(m)
18401 .n(n)
18402 .k(k)
18403 .iterations(1)
18404 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18405 }
18406 }
18407 }
18408 }
18409
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,k_gt_4)18410 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_gt_4) {
18411 TEST_REQUIRES_X86_FMA3;
18412 for (size_t k = 5; k < 8; k++) {
18413 GemmMicrokernelTester()
18414 .mr(4)
18415 .nr(16)
18416 .kr(1)
18417 .sr(4)
18418 .m(4)
18419 .n(16)
18420 .k(k)
18421 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18422 }
18423 }
18424
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,k_gt_4_strided_a)18425 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_gt_4_strided_a) {
18426 TEST_REQUIRES_X86_FMA3;
18427 for (size_t k = 5; k < 8; k++) {
18428 GemmMicrokernelTester()
18429 .mr(4)
18430 .nr(16)
18431 .kr(1)
18432 .sr(4)
18433 .m(4)
18434 .n(16)
18435 .k(k)
18436 .a_stride(11)
18437 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18438 }
18439 }
18440
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,k_gt_4_subtile)18441 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_gt_4_subtile) {
18442 TEST_REQUIRES_X86_FMA3;
18443 for (size_t k = 5; k < 8; k++) {
18444 for (uint32_t n = 1; n <= 16; n++) {
18445 for (uint32_t m = 1; m <= 4; m++) {
18446 GemmMicrokernelTester()
18447 .mr(4)
18448 .nr(16)
18449 .kr(1)
18450 .sr(4)
18451 .m(m)
18452 .n(n)
18453 .k(k)
18454 .iterations(1)
18455 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18456 }
18457 }
18458 }
18459 }
18460
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,k_div_4)18461 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_div_4) {
18462 TEST_REQUIRES_X86_FMA3;
18463 for (size_t k = 8; k <= 40; k += 4) {
18464 GemmMicrokernelTester()
18465 .mr(4)
18466 .nr(16)
18467 .kr(1)
18468 .sr(4)
18469 .m(4)
18470 .n(16)
18471 .k(k)
18472 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18473 }
18474 }
18475
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,k_div_4_strided_a)18476 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_div_4_strided_a) {
18477 TEST_REQUIRES_X86_FMA3;
18478 for (size_t k = 8; k <= 40; k += 4) {
18479 GemmMicrokernelTester()
18480 .mr(4)
18481 .nr(16)
18482 .kr(1)
18483 .sr(4)
18484 .m(4)
18485 .n(16)
18486 .k(k)
18487 .a_stride(43)
18488 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18489 }
18490 }
18491
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,k_div_4_subtile)18492 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_div_4_subtile) {
18493 TEST_REQUIRES_X86_FMA3;
18494 for (size_t k = 8; k <= 40; k += 4) {
18495 for (uint32_t n = 1; n <= 16; n++) {
18496 for (uint32_t m = 1; m <= 4; m++) {
18497 GemmMicrokernelTester()
18498 .mr(4)
18499 .nr(16)
18500 .kr(1)
18501 .sr(4)
18502 .m(m)
18503 .n(n)
18504 .k(k)
18505 .iterations(1)
18506 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18507 }
18508 }
18509 }
18510 }
18511
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,n_gt_16)18512 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, n_gt_16) {
18513 TEST_REQUIRES_X86_FMA3;
18514 for (uint32_t n = 17; n < 32; n++) {
18515 for (size_t k = 1; k <= 20; k += 5) {
18516 GemmMicrokernelTester()
18517 .mr(4)
18518 .nr(16)
18519 .kr(1)
18520 .sr(4)
18521 .m(4)
18522 .n(n)
18523 .k(k)
18524 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18525 }
18526 }
18527 }
18528
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,n_gt_16_strided_cn)18529 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, n_gt_16_strided_cn) {
18530 TEST_REQUIRES_X86_FMA3;
18531 for (uint32_t n = 17; n < 32; n++) {
18532 for (size_t k = 1; k <= 20; k += 5) {
18533 GemmMicrokernelTester()
18534 .mr(4)
18535 .nr(16)
18536 .kr(1)
18537 .sr(4)
18538 .m(4)
18539 .n(n)
18540 .k(k)
18541 .cn_stride(19)
18542 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18543 }
18544 }
18545 }
18546
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,n_gt_16_strided_a)18547 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, n_gt_16_strided_a) {
18548 TEST_REQUIRES_X86_FMA3;
18549 for (uint32_t n = 17; n < 32; n++) {
18550 for (size_t k = 1; k <= 20; k += 5) {
18551 GemmMicrokernelTester()
18552 .mr(4)
18553 .nr(16)
18554 .kr(1)
18555 .sr(4)
18556 .m(4)
18557 .n(n)
18558 .k(k)
18559 .a_stride(23)
18560 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18561 }
18562 }
18563 }
18564
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,n_gt_16_subtile)18565 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, n_gt_16_subtile) {
18566 TEST_REQUIRES_X86_FMA3;
18567 for (uint32_t n = 17; n < 32; n++) {
18568 for (size_t k = 1; k <= 20; k += 5) {
18569 for (uint32_t m = 1; m <= 4; m++) {
18570 GemmMicrokernelTester()
18571 .mr(4)
18572 .nr(16)
18573 .kr(1)
18574 .sr(4)
18575 .m(m)
18576 .n(n)
18577 .k(k)
18578 .iterations(1)
18579 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18580 }
18581 }
18582 }
18583 }
18584
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,n_div_16)18585 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, n_div_16) {
18586 TEST_REQUIRES_X86_FMA3;
18587 for (uint32_t n = 32; n <= 48; n += 16) {
18588 for (size_t k = 1; k <= 20; k += 5) {
18589 GemmMicrokernelTester()
18590 .mr(4)
18591 .nr(16)
18592 .kr(1)
18593 .sr(4)
18594 .m(4)
18595 .n(n)
18596 .k(k)
18597 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18598 }
18599 }
18600 }
18601
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,n_div_16_strided_cn)18602 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, n_div_16_strided_cn) {
18603 TEST_REQUIRES_X86_FMA3;
18604 for (uint32_t n = 32; n <= 48; n += 16) {
18605 for (size_t k = 1; k <= 20; k += 5) {
18606 GemmMicrokernelTester()
18607 .mr(4)
18608 .nr(16)
18609 .kr(1)
18610 .sr(4)
18611 .m(4)
18612 .n(n)
18613 .k(k)
18614 .cn_stride(19)
18615 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18616 }
18617 }
18618 }
18619
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,n_div_16_strided_a)18620 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, n_div_16_strided_a) {
18621 TEST_REQUIRES_X86_FMA3;
18622 for (uint32_t n = 32; n <= 48; n += 16) {
18623 for (size_t k = 1; k <= 20; k += 5) {
18624 GemmMicrokernelTester()
18625 .mr(4)
18626 .nr(16)
18627 .kr(1)
18628 .sr(4)
18629 .m(4)
18630 .n(n)
18631 .k(k)
18632 .a_stride(23)
18633 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18634 }
18635 }
18636 }
18637
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,n_div_16_subtile)18638 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, n_div_16_subtile) {
18639 TEST_REQUIRES_X86_FMA3;
18640 for (uint32_t n = 32; n <= 48; n += 16) {
18641 for (size_t k = 1; k <= 20; k += 5) {
18642 for (uint32_t m = 1; m <= 4; m++) {
18643 GemmMicrokernelTester()
18644 .mr(4)
18645 .nr(16)
18646 .kr(1)
18647 .sr(4)
18648 .m(m)
18649 .n(n)
18650 .k(k)
18651 .iterations(1)
18652 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18653 }
18654 }
18655 }
18656 }
18657
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,strided_cm_subtile)18658 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, strided_cm_subtile) {
18659 TEST_REQUIRES_X86_FMA3;
18660 for (size_t k = 1; k <= 20; k += 5) {
18661 for (uint32_t n = 1; n <= 16; n++) {
18662 for (uint32_t m = 1; m <= 4; m++) {
18663 GemmMicrokernelTester()
18664 .mr(4)
18665 .nr(16)
18666 .kr(1)
18667 .sr(4)
18668 .m(m)
18669 .n(n)
18670 .k(k)
18671 .cm_stride(19)
18672 .iterations(1)
18673 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18674 }
18675 }
18676 }
18677 }
18678
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,qmin)18679 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, qmin) {
18680 TEST_REQUIRES_X86_FMA3;
18681 GemmMicrokernelTester()
18682 .mr(4)
18683 .nr(16)
18684 .kr(1)
18685 .sr(4)
18686 .m(4)
18687 .n(16)
18688 .k(4)
18689 .qmin(128)
18690 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18691 }
18692
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,qmax)18693 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, qmax) {
18694 TEST_REQUIRES_X86_FMA3;
18695 GemmMicrokernelTester()
18696 .mr(4)
18697 .nr(16)
18698 .kr(1)
18699 .sr(4)
18700 .m(4)
18701 .n(16)
18702 .k(4)
18703 .qmax(128)
18704 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18705 }
18706
TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST,strided_cm)18707 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, strided_cm) {
18708 TEST_REQUIRES_X86_FMA3;
18709 GemmMicrokernelTester()
18710 .mr(4)
18711 .nr(16)
18712 .kr(1)
18713 .sr(4)
18714 .m(4)
18715 .n(16)
18716 .k(4)
18717 .cm_stride(19)
18718 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18719 }
18720 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18721
18722
18723 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST,k_eq_1)18724 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, k_eq_1) {
18725 TEST_REQUIRES_X86_FMA3;
18726 GemmMicrokernelTester()
18727 .mr(5)
18728 .nr(16)
18729 .kr(1)
18730 .sr(1)
18731 .m(5)
18732 .n(16)
18733 .k(1)
18734 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18735 }
18736
TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST,strided_cn)18737 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, strided_cn) {
18738 TEST_REQUIRES_X86_FMA3;
18739 GemmMicrokernelTester()
18740 .mr(5)
18741 .nr(16)
18742 .kr(1)
18743 .sr(1)
18744 .m(5)
18745 .n(16)
18746 .k(1)
18747 .cn_stride(19)
18748 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18749 }
18750
TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST,k_eq_1_strided_a)18751 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, k_eq_1_strided_a) {
18752 TEST_REQUIRES_X86_FMA3;
18753 GemmMicrokernelTester()
18754 .mr(5)
18755 .nr(16)
18756 .kr(1)
18757 .sr(1)
18758 .m(5)
18759 .n(16)
18760 .k(1)
18761 .a_stride(3)
18762 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18763 }
18764
TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST,k_eq_1_subtile)18765 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, k_eq_1_subtile) {
18766 TEST_REQUIRES_X86_FMA3;
18767 for (uint32_t n = 1; n <= 16; n++) {
18768 for (uint32_t m = 1; m <= 5; m++) {
18769 GemmMicrokernelTester()
18770 .mr(5)
18771 .nr(16)
18772 .kr(1)
18773 .sr(1)
18774 .m(m)
18775 .n(n)
18776 .k(1)
18777 .iterations(1)
18778 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18779 }
18780 }
18781 }
18782
TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST,k_eq_1_subtile_m)18783 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, k_eq_1_subtile_m) {
18784 TEST_REQUIRES_X86_FMA3;
18785 for (uint32_t m = 1; m <= 5; m++) {
18786 GemmMicrokernelTester()
18787 .mr(5)
18788 .nr(16)
18789 .kr(1)
18790 .sr(1)
18791 .m(m)
18792 .n(16)
18793 .k(1)
18794 .iterations(1)
18795 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18796 }
18797 }
18798
TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST,k_eq_1_subtile_n)18799 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, k_eq_1_subtile_n) {
18800 TEST_REQUIRES_X86_FMA3;
18801 for (uint32_t n = 1; n <= 16; n++) {
18802 GemmMicrokernelTester()
18803 .mr(5)
18804 .nr(16)
18805 .kr(1)
18806 .sr(1)
18807 .m(5)
18808 .n(n)
18809 .k(1)
18810 .iterations(1)
18811 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18812 }
18813 }
18814
TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST,k_gt_1)18815 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, k_gt_1) {
18816 TEST_REQUIRES_X86_FMA3;
18817 for (size_t k = 2; k < 10; k++) {
18818 GemmMicrokernelTester()
18819 .mr(5)
18820 .nr(16)
18821 .kr(1)
18822 .sr(1)
18823 .m(5)
18824 .n(16)
18825 .k(k)
18826 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18827 }
18828 }
18829
TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST,k_gt_1_strided_a)18830 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, k_gt_1_strided_a) {
18831 TEST_REQUIRES_X86_FMA3;
18832 for (size_t k = 2; k < 10; k++) {
18833 GemmMicrokernelTester()
18834 .mr(5)
18835 .nr(16)
18836 .kr(1)
18837 .sr(1)
18838 .m(5)
18839 .n(16)
18840 .k(k)
18841 .a_stride(11)
18842 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18843 }
18844 }
18845
TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST,k_gt_1_subtile)18846 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, k_gt_1_subtile) {
18847 TEST_REQUIRES_X86_FMA3;
18848 for (size_t k = 2; k < 10; k++) {
18849 for (uint32_t n = 1; n <= 16; n++) {
18850 for (uint32_t m = 1; m <= 5; m++) {
18851 GemmMicrokernelTester()
18852 .mr(5)
18853 .nr(16)
18854 .kr(1)
18855 .sr(1)
18856 .m(m)
18857 .n(n)
18858 .k(k)
18859 .iterations(1)
18860 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18861 }
18862 }
18863 }
18864 }
18865
TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST,n_gt_16)18866 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, n_gt_16) {
18867 TEST_REQUIRES_X86_FMA3;
18868 for (uint32_t n = 17; n < 32; n++) {
18869 for (size_t k = 1; k <= 5; k += 2) {
18870 GemmMicrokernelTester()
18871 .mr(5)
18872 .nr(16)
18873 .kr(1)
18874 .sr(1)
18875 .m(5)
18876 .n(n)
18877 .k(k)
18878 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18879 }
18880 }
18881 }
18882
TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST,n_gt_16_strided_cn)18883 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, n_gt_16_strided_cn) {
18884 TEST_REQUIRES_X86_FMA3;
18885 for (uint32_t n = 17; n < 32; n++) {
18886 for (size_t k = 1; k <= 5; k += 2) {
18887 GemmMicrokernelTester()
18888 .mr(5)
18889 .nr(16)
18890 .kr(1)
18891 .sr(1)
18892 .m(5)
18893 .n(n)
18894 .k(k)
18895 .cn_stride(19)
18896 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18897 }
18898 }
18899 }
18900
TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST,n_gt_16_strided_a)18901 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, n_gt_16_strided_a) {
18902 TEST_REQUIRES_X86_FMA3;
18903 for (uint32_t n = 17; n < 32; n++) {
18904 for (size_t k = 1; k <= 5; k += 2) {
18905 GemmMicrokernelTester()
18906 .mr(5)
18907 .nr(16)
18908 .kr(1)
18909 .sr(1)
18910 .m(5)
18911 .n(n)
18912 .k(k)
18913 .a_stride(7)
18914 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18915 }
18916 }
18917 }
18918
TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST,n_gt_16_subtile)18919 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, n_gt_16_subtile) {
18920 TEST_REQUIRES_X86_FMA3;
18921 for (uint32_t n = 17; n < 32; n++) {
18922 for (size_t k = 1; k <= 5; k += 2) {
18923 for (uint32_t m = 1; m <= 5; m++) {
18924 GemmMicrokernelTester()
18925 .mr(5)
18926 .nr(16)
18927 .kr(1)
18928 .sr(1)
18929 .m(m)
18930 .n(n)
18931 .k(k)
18932 .iterations(1)
18933 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18934 }
18935 }
18936 }
18937 }
18938
TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST,n_div_16)18939 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, n_div_16) {
18940 TEST_REQUIRES_X86_FMA3;
18941 for (uint32_t n = 32; n <= 48; n += 16) {
18942 for (size_t k = 1; k <= 5; k += 2) {
18943 GemmMicrokernelTester()
18944 .mr(5)
18945 .nr(16)
18946 .kr(1)
18947 .sr(1)
18948 .m(5)
18949 .n(n)
18950 .k(k)
18951 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18952 }
18953 }
18954 }
18955
TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST,n_div_16_strided_cn)18956 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, n_div_16_strided_cn) {
18957 TEST_REQUIRES_X86_FMA3;
18958 for (uint32_t n = 32; n <= 48; n += 16) {
18959 for (size_t k = 1; k <= 5; k += 2) {
18960 GemmMicrokernelTester()
18961 .mr(5)
18962 .nr(16)
18963 .kr(1)
18964 .sr(1)
18965 .m(5)
18966 .n(n)
18967 .k(k)
18968 .cn_stride(19)
18969 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18970 }
18971 }
18972 }
18973
TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST,n_div_16_strided_a)18974 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, n_div_16_strided_a) {
18975 TEST_REQUIRES_X86_FMA3;
18976 for (uint32_t n = 32; n <= 48; n += 16) {
18977 for (size_t k = 1; k <= 5; k += 2) {
18978 GemmMicrokernelTester()
18979 .mr(5)
18980 .nr(16)
18981 .kr(1)
18982 .sr(1)
18983 .m(5)
18984 .n(n)
18985 .k(k)
18986 .a_stride(7)
18987 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
18988 }
18989 }
18990 }
18991
TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST,n_div_16_subtile)18992 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, n_div_16_subtile) {
18993 TEST_REQUIRES_X86_FMA3;
18994 for (uint32_t n = 32; n <= 48; n += 16) {
18995 for (size_t k = 1; k <= 5; k += 2) {
18996 for (uint32_t m = 1; m <= 5; m++) {
18997 GemmMicrokernelTester()
18998 .mr(5)
18999 .nr(16)
19000 .kr(1)
19001 .sr(1)
19002 .m(m)
19003 .n(n)
19004 .k(k)
19005 .iterations(1)
19006 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19007 }
19008 }
19009 }
19010 }
19011
TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST,strided_cm_subtile)19012 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, strided_cm_subtile) {
19013 TEST_REQUIRES_X86_FMA3;
19014 for (size_t k = 1; k <= 5; k += 2) {
19015 for (uint32_t n = 1; n <= 16; n++) {
19016 for (uint32_t m = 1; m <= 5; m++) {
19017 GemmMicrokernelTester()
19018 .mr(5)
19019 .nr(16)
19020 .kr(1)
19021 .sr(1)
19022 .m(m)
19023 .n(n)
19024 .k(k)
19025 .cm_stride(19)
19026 .iterations(1)
19027 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19028 }
19029 }
19030 }
19031 }
19032
TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST,qmin)19033 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, qmin) {
19034 TEST_REQUIRES_X86_FMA3;
19035 GemmMicrokernelTester()
19036 .mr(5)
19037 .nr(16)
19038 .kr(1)
19039 .sr(1)
19040 .m(5)
19041 .n(16)
19042 .k(1)
19043 .qmin(128)
19044 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19045 }
19046
TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST,qmax)19047 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, qmax) {
19048 TEST_REQUIRES_X86_FMA3;
19049 GemmMicrokernelTester()
19050 .mr(5)
19051 .nr(16)
19052 .kr(1)
19053 .sr(1)
19054 .m(5)
19055 .n(16)
19056 .k(1)
19057 .qmax(128)
19058 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19059 }
19060
TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST,strided_cm)19061 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, strided_cm) {
19062 TEST_REQUIRES_X86_FMA3;
19063 GemmMicrokernelTester()
19064 .mr(5)
19065 .nr(16)
19066 .kr(1)
19067 .sr(1)
19068 .m(5)
19069 .n(16)
19070 .k(1)
19071 .cm_stride(19)
19072 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19073 }
19074 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19075
19076
19077 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,k_eq_4)19078 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_eq_4) {
19079 TEST_REQUIRES_X86_FMA3;
19080 GemmMicrokernelTester()
19081 .mr(5)
19082 .nr(16)
19083 .kr(1)
19084 .sr(4)
19085 .m(5)
19086 .n(16)
19087 .k(4)
19088 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19089 }
19090
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,strided_cn)19091 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, strided_cn) {
19092 TEST_REQUIRES_X86_FMA3;
19093 GemmMicrokernelTester()
19094 .mr(5)
19095 .nr(16)
19096 .kr(1)
19097 .sr(4)
19098 .m(5)
19099 .n(16)
19100 .k(4)
19101 .cn_stride(19)
19102 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19103 }
19104
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,k_eq_4_strided_a)19105 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_eq_4_strided_a) {
19106 TEST_REQUIRES_X86_FMA3;
19107 GemmMicrokernelTester()
19108 .mr(5)
19109 .nr(16)
19110 .kr(1)
19111 .sr(4)
19112 .m(5)
19113 .n(16)
19114 .k(4)
19115 .a_stride(7)
19116 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19117 }
19118
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,k_eq_4_subtile)19119 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_eq_4_subtile) {
19120 TEST_REQUIRES_X86_FMA3;
19121 for (uint32_t n = 1; n <= 16; n++) {
19122 for (uint32_t m = 1; m <= 5; m++) {
19123 GemmMicrokernelTester()
19124 .mr(5)
19125 .nr(16)
19126 .kr(1)
19127 .sr(4)
19128 .m(m)
19129 .n(n)
19130 .k(4)
19131 .iterations(1)
19132 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19133 }
19134 }
19135 }
19136
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,k_eq_4_subtile_m)19137 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_eq_4_subtile_m) {
19138 TEST_REQUIRES_X86_FMA3;
19139 for (uint32_t m = 1; m <= 5; m++) {
19140 GemmMicrokernelTester()
19141 .mr(5)
19142 .nr(16)
19143 .kr(1)
19144 .sr(4)
19145 .m(m)
19146 .n(16)
19147 .k(4)
19148 .iterations(1)
19149 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19150 }
19151 }
19152
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,k_eq_4_subtile_n)19153 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_eq_4_subtile_n) {
19154 TEST_REQUIRES_X86_FMA3;
19155 for (uint32_t n = 1; n <= 16; n++) {
19156 GemmMicrokernelTester()
19157 .mr(5)
19158 .nr(16)
19159 .kr(1)
19160 .sr(4)
19161 .m(5)
19162 .n(n)
19163 .k(4)
19164 .iterations(1)
19165 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19166 }
19167 }
19168
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,k_lt_4)19169 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_lt_4) {
19170 TEST_REQUIRES_X86_FMA3;
19171 for (size_t k = 1; k < 4; k++) {
19172 GemmMicrokernelTester()
19173 .mr(5)
19174 .nr(16)
19175 .kr(1)
19176 .sr(4)
19177 .m(5)
19178 .n(16)
19179 .k(k)
19180 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19181 }
19182 }
19183
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,k_lt_4_strided_a)19184 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_lt_4_strided_a) {
19185 TEST_REQUIRES_X86_FMA3;
19186 for (size_t k = 1; k < 4; k++) {
19187 GemmMicrokernelTester()
19188 .mr(5)
19189 .nr(16)
19190 .kr(1)
19191 .sr(4)
19192 .m(5)
19193 .n(16)
19194 .k(k)
19195 .a_stride(7)
19196 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19197 }
19198 }
19199
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,k_lt_4_subtile)19200 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_lt_4_subtile) {
19201 TEST_REQUIRES_X86_FMA3;
19202 for (size_t k = 1; k < 4; k++) {
19203 for (uint32_t n = 1; n <= 16; n++) {
19204 for (uint32_t m = 1; m <= 5; m++) {
19205 GemmMicrokernelTester()
19206 .mr(5)
19207 .nr(16)
19208 .kr(1)
19209 .sr(4)
19210 .m(m)
19211 .n(n)
19212 .k(k)
19213 .iterations(1)
19214 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19215 }
19216 }
19217 }
19218 }
19219
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,k_gt_4)19220 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_gt_4) {
19221 TEST_REQUIRES_X86_FMA3;
19222 for (size_t k = 5; k < 8; k++) {
19223 GemmMicrokernelTester()
19224 .mr(5)
19225 .nr(16)
19226 .kr(1)
19227 .sr(4)
19228 .m(5)
19229 .n(16)
19230 .k(k)
19231 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19232 }
19233 }
19234
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,k_gt_4_strided_a)19235 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_gt_4_strided_a) {
19236 TEST_REQUIRES_X86_FMA3;
19237 for (size_t k = 5; k < 8; k++) {
19238 GemmMicrokernelTester()
19239 .mr(5)
19240 .nr(16)
19241 .kr(1)
19242 .sr(4)
19243 .m(5)
19244 .n(16)
19245 .k(k)
19246 .a_stride(11)
19247 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19248 }
19249 }
19250
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,k_gt_4_subtile)19251 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_gt_4_subtile) {
19252 TEST_REQUIRES_X86_FMA3;
19253 for (size_t k = 5; k < 8; k++) {
19254 for (uint32_t n = 1; n <= 16; n++) {
19255 for (uint32_t m = 1; m <= 5; m++) {
19256 GemmMicrokernelTester()
19257 .mr(5)
19258 .nr(16)
19259 .kr(1)
19260 .sr(4)
19261 .m(m)
19262 .n(n)
19263 .k(k)
19264 .iterations(1)
19265 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19266 }
19267 }
19268 }
19269 }
19270
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,k_div_4)19271 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_div_4) {
19272 TEST_REQUIRES_X86_FMA3;
19273 for (size_t k = 8; k <= 40; k += 4) {
19274 GemmMicrokernelTester()
19275 .mr(5)
19276 .nr(16)
19277 .kr(1)
19278 .sr(4)
19279 .m(5)
19280 .n(16)
19281 .k(k)
19282 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19283 }
19284 }
19285
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,k_div_4_strided_a)19286 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_div_4_strided_a) {
19287 TEST_REQUIRES_X86_FMA3;
19288 for (size_t k = 8; k <= 40; k += 4) {
19289 GemmMicrokernelTester()
19290 .mr(5)
19291 .nr(16)
19292 .kr(1)
19293 .sr(4)
19294 .m(5)
19295 .n(16)
19296 .k(k)
19297 .a_stride(43)
19298 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19299 }
19300 }
19301
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,k_div_4_subtile)19302 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_div_4_subtile) {
19303 TEST_REQUIRES_X86_FMA3;
19304 for (size_t k = 8; k <= 40; k += 4) {
19305 for (uint32_t n = 1; n <= 16; n++) {
19306 for (uint32_t m = 1; m <= 5; m++) {
19307 GemmMicrokernelTester()
19308 .mr(5)
19309 .nr(16)
19310 .kr(1)
19311 .sr(4)
19312 .m(m)
19313 .n(n)
19314 .k(k)
19315 .iterations(1)
19316 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19317 }
19318 }
19319 }
19320 }
19321
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,n_gt_16)19322 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, n_gt_16) {
19323 TEST_REQUIRES_X86_FMA3;
19324 for (uint32_t n = 17; n < 32; n++) {
19325 for (size_t k = 1; k <= 20; k += 5) {
19326 GemmMicrokernelTester()
19327 .mr(5)
19328 .nr(16)
19329 .kr(1)
19330 .sr(4)
19331 .m(5)
19332 .n(n)
19333 .k(k)
19334 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19335 }
19336 }
19337 }
19338
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,n_gt_16_strided_cn)19339 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, n_gt_16_strided_cn) {
19340 TEST_REQUIRES_X86_FMA3;
19341 for (uint32_t n = 17; n < 32; n++) {
19342 for (size_t k = 1; k <= 20; k += 5) {
19343 GemmMicrokernelTester()
19344 .mr(5)
19345 .nr(16)
19346 .kr(1)
19347 .sr(4)
19348 .m(5)
19349 .n(n)
19350 .k(k)
19351 .cn_stride(19)
19352 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19353 }
19354 }
19355 }
19356
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,n_gt_16_strided_a)19357 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, n_gt_16_strided_a) {
19358 TEST_REQUIRES_X86_FMA3;
19359 for (uint32_t n = 17; n < 32; n++) {
19360 for (size_t k = 1; k <= 20; k += 5) {
19361 GemmMicrokernelTester()
19362 .mr(5)
19363 .nr(16)
19364 .kr(1)
19365 .sr(4)
19366 .m(5)
19367 .n(n)
19368 .k(k)
19369 .a_stride(23)
19370 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19371 }
19372 }
19373 }
19374
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,n_gt_16_subtile)19375 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, n_gt_16_subtile) {
19376 TEST_REQUIRES_X86_FMA3;
19377 for (uint32_t n = 17; n < 32; n++) {
19378 for (size_t k = 1; k <= 20; k += 5) {
19379 for (uint32_t m = 1; m <= 5; m++) {
19380 GemmMicrokernelTester()
19381 .mr(5)
19382 .nr(16)
19383 .kr(1)
19384 .sr(4)
19385 .m(m)
19386 .n(n)
19387 .k(k)
19388 .iterations(1)
19389 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19390 }
19391 }
19392 }
19393 }
19394
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,n_div_16)19395 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, n_div_16) {
19396 TEST_REQUIRES_X86_FMA3;
19397 for (uint32_t n = 32; n <= 48; n += 16) {
19398 for (size_t k = 1; k <= 20; k += 5) {
19399 GemmMicrokernelTester()
19400 .mr(5)
19401 .nr(16)
19402 .kr(1)
19403 .sr(4)
19404 .m(5)
19405 .n(n)
19406 .k(k)
19407 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19408 }
19409 }
19410 }
19411
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,n_div_16_strided_cn)19412 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, n_div_16_strided_cn) {
19413 TEST_REQUIRES_X86_FMA3;
19414 for (uint32_t n = 32; n <= 48; n += 16) {
19415 for (size_t k = 1; k <= 20; k += 5) {
19416 GemmMicrokernelTester()
19417 .mr(5)
19418 .nr(16)
19419 .kr(1)
19420 .sr(4)
19421 .m(5)
19422 .n(n)
19423 .k(k)
19424 .cn_stride(19)
19425 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19426 }
19427 }
19428 }
19429
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,n_div_16_strided_a)19430 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, n_div_16_strided_a) {
19431 TEST_REQUIRES_X86_FMA3;
19432 for (uint32_t n = 32; n <= 48; n += 16) {
19433 for (size_t k = 1; k <= 20; k += 5) {
19434 GemmMicrokernelTester()
19435 .mr(5)
19436 .nr(16)
19437 .kr(1)
19438 .sr(4)
19439 .m(5)
19440 .n(n)
19441 .k(k)
19442 .a_stride(23)
19443 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19444 }
19445 }
19446 }
19447
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,n_div_16_subtile)19448 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, n_div_16_subtile) {
19449 TEST_REQUIRES_X86_FMA3;
19450 for (uint32_t n = 32; n <= 48; n += 16) {
19451 for (size_t k = 1; k <= 20; k += 5) {
19452 for (uint32_t m = 1; m <= 5; m++) {
19453 GemmMicrokernelTester()
19454 .mr(5)
19455 .nr(16)
19456 .kr(1)
19457 .sr(4)
19458 .m(m)
19459 .n(n)
19460 .k(k)
19461 .iterations(1)
19462 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19463 }
19464 }
19465 }
19466 }
19467
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,strided_cm_subtile)19468 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, strided_cm_subtile) {
19469 TEST_REQUIRES_X86_FMA3;
19470 for (size_t k = 1; k <= 20; k += 5) {
19471 for (uint32_t n = 1; n <= 16; n++) {
19472 for (uint32_t m = 1; m <= 5; m++) {
19473 GemmMicrokernelTester()
19474 .mr(5)
19475 .nr(16)
19476 .kr(1)
19477 .sr(4)
19478 .m(m)
19479 .n(n)
19480 .k(k)
19481 .cm_stride(19)
19482 .iterations(1)
19483 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19484 }
19485 }
19486 }
19487 }
19488
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,qmin)19489 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, qmin) {
19490 TEST_REQUIRES_X86_FMA3;
19491 GemmMicrokernelTester()
19492 .mr(5)
19493 .nr(16)
19494 .kr(1)
19495 .sr(4)
19496 .m(5)
19497 .n(16)
19498 .k(4)
19499 .qmin(128)
19500 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19501 }
19502
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,qmax)19503 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, qmax) {
19504 TEST_REQUIRES_X86_FMA3;
19505 GemmMicrokernelTester()
19506 .mr(5)
19507 .nr(16)
19508 .kr(1)
19509 .sr(4)
19510 .m(5)
19511 .n(16)
19512 .k(4)
19513 .qmax(128)
19514 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19515 }
19516
TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST,strided_cm)19517 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, strided_cm) {
19518 TEST_REQUIRES_X86_FMA3;
19519 GemmMicrokernelTester()
19520 .mr(5)
19521 .nr(16)
19522 .kr(1)
19523 .sr(4)
19524 .m(5)
19525 .n(16)
19526 .k(4)
19527 .cm_stride(19)
19528 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19529 }
19530 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19531
19532
19533 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST,k_eq_1)19534 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, k_eq_1) {
19535 TEST_REQUIRES_X86_FMA3;
19536 GemmMicrokernelTester()
19537 .mr(6)
19538 .nr(8)
19539 .kr(1)
19540 .sr(1)
19541 .m(6)
19542 .n(8)
19543 .k(1)
19544 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19545 }
19546
TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST,strided_cn)19547 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, strided_cn) {
19548 TEST_REQUIRES_X86_FMA3;
19549 GemmMicrokernelTester()
19550 .mr(6)
19551 .nr(8)
19552 .kr(1)
19553 .sr(1)
19554 .m(6)
19555 .n(8)
19556 .k(1)
19557 .cn_stride(11)
19558 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19559 }
19560
TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST,k_eq_1_strided_a)19561 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, k_eq_1_strided_a) {
19562 TEST_REQUIRES_X86_FMA3;
19563 GemmMicrokernelTester()
19564 .mr(6)
19565 .nr(8)
19566 .kr(1)
19567 .sr(1)
19568 .m(6)
19569 .n(8)
19570 .k(1)
19571 .a_stride(3)
19572 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19573 }
19574
TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST,k_eq_1_subtile)19575 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, k_eq_1_subtile) {
19576 TEST_REQUIRES_X86_FMA3;
19577 for (uint32_t n = 1; n <= 8; n++) {
19578 for (uint32_t m = 1; m <= 6; m++) {
19579 GemmMicrokernelTester()
19580 .mr(6)
19581 .nr(8)
19582 .kr(1)
19583 .sr(1)
19584 .m(m)
19585 .n(n)
19586 .k(1)
19587 .iterations(1)
19588 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19589 }
19590 }
19591 }
19592
TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST,k_eq_1_subtile_m)19593 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
19594 TEST_REQUIRES_X86_FMA3;
19595 for (uint32_t m = 1; m <= 6; m++) {
19596 GemmMicrokernelTester()
19597 .mr(6)
19598 .nr(8)
19599 .kr(1)
19600 .sr(1)
19601 .m(m)
19602 .n(8)
19603 .k(1)
19604 .iterations(1)
19605 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19606 }
19607 }
19608
TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST,k_eq_1_subtile_n)19609 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
19610 TEST_REQUIRES_X86_FMA3;
19611 for (uint32_t n = 1; n <= 8; n++) {
19612 GemmMicrokernelTester()
19613 .mr(6)
19614 .nr(8)
19615 .kr(1)
19616 .sr(1)
19617 .m(6)
19618 .n(n)
19619 .k(1)
19620 .iterations(1)
19621 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19622 }
19623 }
19624
TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST,k_gt_1)19625 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, k_gt_1) {
19626 TEST_REQUIRES_X86_FMA3;
19627 for (size_t k = 2; k < 10; k++) {
19628 GemmMicrokernelTester()
19629 .mr(6)
19630 .nr(8)
19631 .kr(1)
19632 .sr(1)
19633 .m(6)
19634 .n(8)
19635 .k(k)
19636 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19637 }
19638 }
19639
TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST,k_gt_1_strided_a)19640 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, k_gt_1_strided_a) {
19641 TEST_REQUIRES_X86_FMA3;
19642 for (size_t k = 2; k < 10; k++) {
19643 GemmMicrokernelTester()
19644 .mr(6)
19645 .nr(8)
19646 .kr(1)
19647 .sr(1)
19648 .m(6)
19649 .n(8)
19650 .k(k)
19651 .a_stride(11)
19652 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19653 }
19654 }
19655
TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST,k_gt_1_subtile)19656 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, k_gt_1_subtile) {
19657 TEST_REQUIRES_X86_FMA3;
19658 for (size_t k = 2; k < 10; k++) {
19659 for (uint32_t n = 1; n <= 8; n++) {
19660 for (uint32_t m = 1; m <= 6; m++) {
19661 GemmMicrokernelTester()
19662 .mr(6)
19663 .nr(8)
19664 .kr(1)
19665 .sr(1)
19666 .m(m)
19667 .n(n)
19668 .k(k)
19669 .iterations(1)
19670 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19671 }
19672 }
19673 }
19674 }
19675
TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST,n_gt_8)19676 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, n_gt_8) {
19677 TEST_REQUIRES_X86_FMA3;
19678 for (uint32_t n = 9; n < 16; n++) {
19679 for (size_t k = 1; k <= 5; k += 2) {
19680 GemmMicrokernelTester()
19681 .mr(6)
19682 .nr(8)
19683 .kr(1)
19684 .sr(1)
19685 .m(6)
19686 .n(n)
19687 .k(k)
19688 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19689 }
19690 }
19691 }
19692
TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST,n_gt_8_strided_cn)19693 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
19694 TEST_REQUIRES_X86_FMA3;
19695 for (uint32_t n = 9; n < 16; n++) {
19696 for (size_t k = 1; k <= 5; k += 2) {
19697 GemmMicrokernelTester()
19698 .mr(6)
19699 .nr(8)
19700 .kr(1)
19701 .sr(1)
19702 .m(6)
19703 .n(n)
19704 .k(k)
19705 .cn_stride(11)
19706 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19707 }
19708 }
19709 }
19710
TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST,n_gt_8_strided_a)19711 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, n_gt_8_strided_a) {
19712 TEST_REQUIRES_X86_FMA3;
19713 for (uint32_t n = 9; n < 16; n++) {
19714 for (size_t k = 1; k <= 5; k += 2) {
19715 GemmMicrokernelTester()
19716 .mr(6)
19717 .nr(8)
19718 .kr(1)
19719 .sr(1)
19720 .m(6)
19721 .n(n)
19722 .k(k)
19723 .a_stride(7)
19724 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19725 }
19726 }
19727 }
19728
TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST,n_gt_8_subtile)19729 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, n_gt_8_subtile) {
19730 TEST_REQUIRES_X86_FMA3;
19731 for (uint32_t n = 9; n < 16; n++) {
19732 for (size_t k = 1; k <= 5; k += 2) {
19733 for (uint32_t m = 1; m <= 6; m++) {
19734 GemmMicrokernelTester()
19735 .mr(6)
19736 .nr(8)
19737 .kr(1)
19738 .sr(1)
19739 .m(m)
19740 .n(n)
19741 .k(k)
19742 .iterations(1)
19743 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19744 }
19745 }
19746 }
19747 }
19748
TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST,n_div_8)19749 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, n_div_8) {
19750 TEST_REQUIRES_X86_FMA3;
19751 for (uint32_t n = 16; n <= 24; n += 8) {
19752 for (size_t k = 1; k <= 5; k += 2) {
19753 GemmMicrokernelTester()
19754 .mr(6)
19755 .nr(8)
19756 .kr(1)
19757 .sr(1)
19758 .m(6)
19759 .n(n)
19760 .k(k)
19761 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19762 }
19763 }
19764 }
19765
TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST,n_div_8_strided_cn)19766 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, n_div_8_strided_cn) {
19767 TEST_REQUIRES_X86_FMA3;
19768 for (uint32_t n = 16; n <= 24; n += 8) {
19769 for (size_t k = 1; k <= 5; k += 2) {
19770 GemmMicrokernelTester()
19771 .mr(6)
19772 .nr(8)
19773 .kr(1)
19774 .sr(1)
19775 .m(6)
19776 .n(n)
19777 .k(k)
19778 .cn_stride(11)
19779 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19780 }
19781 }
19782 }
19783
TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST,n_div_8_strided_a)19784 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, n_div_8_strided_a) {
19785 TEST_REQUIRES_X86_FMA3;
19786 for (uint32_t n = 16; n <= 24; n += 8) {
19787 for (size_t k = 1; k <= 5; k += 2) {
19788 GemmMicrokernelTester()
19789 .mr(6)
19790 .nr(8)
19791 .kr(1)
19792 .sr(1)
19793 .m(6)
19794 .n(n)
19795 .k(k)
19796 .a_stride(7)
19797 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19798 }
19799 }
19800 }
19801
TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST,n_div_8_subtile)19802 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, n_div_8_subtile) {
19803 TEST_REQUIRES_X86_FMA3;
19804 for (uint32_t n = 16; n <= 24; n += 8) {
19805 for (size_t k = 1; k <= 5; k += 2) {
19806 for (uint32_t m = 1; m <= 6; m++) {
19807 GemmMicrokernelTester()
19808 .mr(6)
19809 .nr(8)
19810 .kr(1)
19811 .sr(1)
19812 .m(m)
19813 .n(n)
19814 .k(k)
19815 .iterations(1)
19816 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19817 }
19818 }
19819 }
19820 }
19821
TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST,strided_cm_subtile)19822 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, strided_cm_subtile) {
19823 TEST_REQUIRES_X86_FMA3;
19824 for (size_t k = 1; k <= 5; k += 2) {
19825 for (uint32_t n = 1; n <= 8; n++) {
19826 for (uint32_t m = 1; m <= 6; m++) {
19827 GemmMicrokernelTester()
19828 .mr(6)
19829 .nr(8)
19830 .kr(1)
19831 .sr(1)
19832 .m(m)
19833 .n(n)
19834 .k(k)
19835 .cm_stride(11)
19836 .iterations(1)
19837 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19838 }
19839 }
19840 }
19841 }
19842
TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST,qmin)19843 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, qmin) {
19844 TEST_REQUIRES_X86_FMA3;
19845 GemmMicrokernelTester()
19846 .mr(6)
19847 .nr(8)
19848 .kr(1)
19849 .sr(1)
19850 .m(6)
19851 .n(8)
19852 .k(1)
19853 .qmin(128)
19854 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19855 }
19856
TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST,qmax)19857 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, qmax) {
19858 TEST_REQUIRES_X86_FMA3;
19859 GemmMicrokernelTester()
19860 .mr(6)
19861 .nr(8)
19862 .kr(1)
19863 .sr(1)
19864 .m(6)
19865 .n(8)
19866 .k(1)
19867 .qmax(128)
19868 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19869 }
19870
TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST,strided_cm)19871 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, strided_cm) {
19872 TEST_REQUIRES_X86_FMA3;
19873 GemmMicrokernelTester()
19874 .mr(6)
19875 .nr(8)
19876 .kr(1)
19877 .sr(1)
19878 .m(6)
19879 .n(8)
19880 .k(1)
19881 .cm_stride(11)
19882 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19883 }
19884 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19885
19886
19887 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST,k_eq_1)19888 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, k_eq_1) {
19889 TEST_REQUIRES_X86_FMA3;
19890 GemmMicrokernelTester()
19891 .mr(7)
19892 .nr(8)
19893 .kr(1)
19894 .sr(1)
19895 .m(7)
19896 .n(8)
19897 .k(1)
19898 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19899 }
19900
TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST,strided_cn)19901 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, strided_cn) {
19902 TEST_REQUIRES_X86_FMA3;
19903 GemmMicrokernelTester()
19904 .mr(7)
19905 .nr(8)
19906 .kr(1)
19907 .sr(1)
19908 .m(7)
19909 .n(8)
19910 .k(1)
19911 .cn_stride(11)
19912 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19913 }
19914
TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST,k_eq_1_strided_a)19915 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, k_eq_1_strided_a) {
19916 TEST_REQUIRES_X86_FMA3;
19917 GemmMicrokernelTester()
19918 .mr(7)
19919 .nr(8)
19920 .kr(1)
19921 .sr(1)
19922 .m(7)
19923 .n(8)
19924 .k(1)
19925 .a_stride(3)
19926 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19927 }
19928
TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST,k_eq_1_subtile)19929 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, k_eq_1_subtile) {
19930 TEST_REQUIRES_X86_FMA3;
19931 for (uint32_t n = 1; n <= 8; n++) {
19932 for (uint32_t m = 1; m <= 7; m++) {
19933 GemmMicrokernelTester()
19934 .mr(7)
19935 .nr(8)
19936 .kr(1)
19937 .sr(1)
19938 .m(m)
19939 .n(n)
19940 .k(1)
19941 .iterations(1)
19942 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19943 }
19944 }
19945 }
19946
TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST,k_eq_1_subtile_m)19947 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
19948 TEST_REQUIRES_X86_FMA3;
19949 for (uint32_t m = 1; m <= 7; m++) {
19950 GemmMicrokernelTester()
19951 .mr(7)
19952 .nr(8)
19953 .kr(1)
19954 .sr(1)
19955 .m(m)
19956 .n(8)
19957 .k(1)
19958 .iterations(1)
19959 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19960 }
19961 }
19962
TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST,k_eq_1_subtile_n)19963 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
19964 TEST_REQUIRES_X86_FMA3;
19965 for (uint32_t n = 1; n <= 8; n++) {
19966 GemmMicrokernelTester()
19967 .mr(7)
19968 .nr(8)
19969 .kr(1)
19970 .sr(1)
19971 .m(7)
19972 .n(n)
19973 .k(1)
19974 .iterations(1)
19975 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19976 }
19977 }
19978
TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST,k_gt_1)19979 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, k_gt_1) {
19980 TEST_REQUIRES_X86_FMA3;
19981 for (size_t k = 2; k < 10; k++) {
19982 GemmMicrokernelTester()
19983 .mr(7)
19984 .nr(8)
19985 .kr(1)
19986 .sr(1)
19987 .m(7)
19988 .n(8)
19989 .k(k)
19990 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19991 }
19992 }
19993
TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST,k_gt_1_strided_a)19994 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, k_gt_1_strided_a) {
19995 TEST_REQUIRES_X86_FMA3;
19996 for (size_t k = 2; k < 10; k++) {
19997 GemmMicrokernelTester()
19998 .mr(7)
19999 .nr(8)
20000 .kr(1)
20001 .sr(1)
20002 .m(7)
20003 .n(8)
20004 .k(k)
20005 .a_stride(11)
20006 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20007 }
20008 }
20009
TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST,k_gt_1_subtile)20010 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, k_gt_1_subtile) {
20011 TEST_REQUIRES_X86_FMA3;
20012 for (size_t k = 2; k < 10; k++) {
20013 for (uint32_t n = 1; n <= 8; n++) {
20014 for (uint32_t m = 1; m <= 7; m++) {
20015 GemmMicrokernelTester()
20016 .mr(7)
20017 .nr(8)
20018 .kr(1)
20019 .sr(1)
20020 .m(m)
20021 .n(n)
20022 .k(k)
20023 .iterations(1)
20024 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20025 }
20026 }
20027 }
20028 }
20029
TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST,n_gt_8)20030 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, n_gt_8) {
20031 TEST_REQUIRES_X86_FMA3;
20032 for (uint32_t n = 9; n < 16; n++) {
20033 for (size_t k = 1; k <= 5; k += 2) {
20034 GemmMicrokernelTester()
20035 .mr(7)
20036 .nr(8)
20037 .kr(1)
20038 .sr(1)
20039 .m(7)
20040 .n(n)
20041 .k(k)
20042 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20043 }
20044 }
20045 }
20046
TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST,n_gt_8_strided_cn)20047 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
20048 TEST_REQUIRES_X86_FMA3;
20049 for (uint32_t n = 9; n < 16; n++) {
20050 for (size_t k = 1; k <= 5; k += 2) {
20051 GemmMicrokernelTester()
20052 .mr(7)
20053 .nr(8)
20054 .kr(1)
20055 .sr(1)
20056 .m(7)
20057 .n(n)
20058 .k(k)
20059 .cn_stride(11)
20060 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20061 }
20062 }
20063 }
20064
TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST,n_gt_8_strided_a)20065 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, n_gt_8_strided_a) {
20066 TEST_REQUIRES_X86_FMA3;
20067 for (uint32_t n = 9; n < 16; n++) {
20068 for (size_t k = 1; k <= 5; k += 2) {
20069 GemmMicrokernelTester()
20070 .mr(7)
20071 .nr(8)
20072 .kr(1)
20073 .sr(1)
20074 .m(7)
20075 .n(n)
20076 .k(k)
20077 .a_stride(7)
20078 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20079 }
20080 }
20081 }
20082
TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST,n_gt_8_subtile)20083 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, n_gt_8_subtile) {
20084 TEST_REQUIRES_X86_FMA3;
20085 for (uint32_t n = 9; n < 16; n++) {
20086 for (size_t k = 1; k <= 5; k += 2) {
20087 for (uint32_t m = 1; m <= 7; m++) {
20088 GemmMicrokernelTester()
20089 .mr(7)
20090 .nr(8)
20091 .kr(1)
20092 .sr(1)
20093 .m(m)
20094 .n(n)
20095 .k(k)
20096 .iterations(1)
20097 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20098 }
20099 }
20100 }
20101 }
20102
TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST,n_div_8)20103 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, n_div_8) {
20104 TEST_REQUIRES_X86_FMA3;
20105 for (uint32_t n = 16; n <= 24; n += 8) {
20106 for (size_t k = 1; k <= 5; k += 2) {
20107 GemmMicrokernelTester()
20108 .mr(7)
20109 .nr(8)
20110 .kr(1)
20111 .sr(1)
20112 .m(7)
20113 .n(n)
20114 .k(k)
20115 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20116 }
20117 }
20118 }
20119
TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST,n_div_8_strided_cn)20120 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, n_div_8_strided_cn) {
20121 TEST_REQUIRES_X86_FMA3;
20122 for (uint32_t n = 16; n <= 24; n += 8) {
20123 for (size_t k = 1; k <= 5; k += 2) {
20124 GemmMicrokernelTester()
20125 .mr(7)
20126 .nr(8)
20127 .kr(1)
20128 .sr(1)
20129 .m(7)
20130 .n(n)
20131 .k(k)
20132 .cn_stride(11)
20133 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20134 }
20135 }
20136 }
20137
TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST,n_div_8_strided_a)20138 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, n_div_8_strided_a) {
20139 TEST_REQUIRES_X86_FMA3;
20140 for (uint32_t n = 16; n <= 24; n += 8) {
20141 for (size_t k = 1; k <= 5; k += 2) {
20142 GemmMicrokernelTester()
20143 .mr(7)
20144 .nr(8)
20145 .kr(1)
20146 .sr(1)
20147 .m(7)
20148 .n(n)
20149 .k(k)
20150 .a_stride(7)
20151 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20152 }
20153 }
20154 }
20155
TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST,n_div_8_subtile)20156 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, n_div_8_subtile) {
20157 TEST_REQUIRES_X86_FMA3;
20158 for (uint32_t n = 16; n <= 24; n += 8) {
20159 for (size_t k = 1; k <= 5; k += 2) {
20160 for (uint32_t m = 1; m <= 7; m++) {
20161 GemmMicrokernelTester()
20162 .mr(7)
20163 .nr(8)
20164 .kr(1)
20165 .sr(1)
20166 .m(m)
20167 .n(n)
20168 .k(k)
20169 .iterations(1)
20170 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20171 }
20172 }
20173 }
20174 }
20175
TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST,strided_cm_subtile)20176 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, strided_cm_subtile) {
20177 TEST_REQUIRES_X86_FMA3;
20178 for (size_t k = 1; k <= 5; k += 2) {
20179 for (uint32_t n = 1; n <= 8; n++) {
20180 for (uint32_t m = 1; m <= 7; m++) {
20181 GemmMicrokernelTester()
20182 .mr(7)
20183 .nr(8)
20184 .kr(1)
20185 .sr(1)
20186 .m(m)
20187 .n(n)
20188 .k(k)
20189 .cm_stride(11)
20190 .iterations(1)
20191 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20192 }
20193 }
20194 }
20195 }
20196
TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST,qmin)20197 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, qmin) {
20198 TEST_REQUIRES_X86_FMA3;
20199 GemmMicrokernelTester()
20200 .mr(7)
20201 .nr(8)
20202 .kr(1)
20203 .sr(1)
20204 .m(7)
20205 .n(8)
20206 .k(1)
20207 .qmin(128)
20208 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20209 }
20210
TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST,qmax)20211 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, qmax) {
20212 TEST_REQUIRES_X86_FMA3;
20213 GemmMicrokernelTester()
20214 .mr(7)
20215 .nr(8)
20216 .kr(1)
20217 .sr(1)
20218 .m(7)
20219 .n(8)
20220 .k(1)
20221 .qmax(128)
20222 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20223 }
20224
TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST,strided_cm)20225 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, strided_cm) {
20226 TEST_REQUIRES_X86_FMA3;
20227 GemmMicrokernelTester()
20228 .mr(7)
20229 .nr(8)
20230 .kr(1)
20231 .sr(1)
20232 .m(7)
20233 .n(8)
20234 .k(1)
20235 .cm_stride(11)
20236 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20237 }
20238 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20239
20240
20241 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST,k_eq_1)20242 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, k_eq_1) {
20243 TEST_REQUIRES_X86_FMA3;
20244 GemmMicrokernelTester()
20245 .mr(8)
20246 .nr(8)
20247 .kr(1)
20248 .sr(1)
20249 .m(8)
20250 .n(8)
20251 .k(1)
20252 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20253 }
20254
TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST,strided_cn)20255 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, strided_cn) {
20256 TEST_REQUIRES_X86_FMA3;
20257 GemmMicrokernelTester()
20258 .mr(8)
20259 .nr(8)
20260 .kr(1)
20261 .sr(1)
20262 .m(8)
20263 .n(8)
20264 .k(1)
20265 .cn_stride(11)
20266 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20267 }
20268
TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST,k_eq_1_strided_a)20269 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, k_eq_1_strided_a) {
20270 TEST_REQUIRES_X86_FMA3;
20271 GemmMicrokernelTester()
20272 .mr(8)
20273 .nr(8)
20274 .kr(1)
20275 .sr(1)
20276 .m(8)
20277 .n(8)
20278 .k(1)
20279 .a_stride(3)
20280 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20281 }
20282
TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST,k_eq_1_subtile)20283 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, k_eq_1_subtile) {
20284 TEST_REQUIRES_X86_FMA3;
20285 for (uint32_t n = 1; n <= 8; n++) {
20286 for (uint32_t m = 1; m <= 8; m++) {
20287 GemmMicrokernelTester()
20288 .mr(8)
20289 .nr(8)
20290 .kr(1)
20291 .sr(1)
20292 .m(m)
20293 .n(n)
20294 .k(1)
20295 .iterations(1)
20296 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20297 }
20298 }
20299 }
20300
TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST,k_eq_1_subtile_m)20301 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
20302 TEST_REQUIRES_X86_FMA3;
20303 for (uint32_t m = 1; m <= 8; m++) {
20304 GemmMicrokernelTester()
20305 .mr(8)
20306 .nr(8)
20307 .kr(1)
20308 .sr(1)
20309 .m(m)
20310 .n(8)
20311 .k(1)
20312 .iterations(1)
20313 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20314 }
20315 }
20316
TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST,k_eq_1_subtile_n)20317 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
20318 TEST_REQUIRES_X86_FMA3;
20319 for (uint32_t n = 1; n <= 8; n++) {
20320 GemmMicrokernelTester()
20321 .mr(8)
20322 .nr(8)
20323 .kr(1)
20324 .sr(1)
20325 .m(8)
20326 .n(n)
20327 .k(1)
20328 .iterations(1)
20329 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20330 }
20331 }
20332
TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST,k_gt_1)20333 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, k_gt_1) {
20334 TEST_REQUIRES_X86_FMA3;
20335 for (size_t k = 2; k < 10; k++) {
20336 GemmMicrokernelTester()
20337 .mr(8)
20338 .nr(8)
20339 .kr(1)
20340 .sr(1)
20341 .m(8)
20342 .n(8)
20343 .k(k)
20344 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20345 }
20346 }
20347
TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST,k_gt_1_strided_a)20348 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, k_gt_1_strided_a) {
20349 TEST_REQUIRES_X86_FMA3;
20350 for (size_t k = 2; k < 10; k++) {
20351 GemmMicrokernelTester()
20352 .mr(8)
20353 .nr(8)
20354 .kr(1)
20355 .sr(1)
20356 .m(8)
20357 .n(8)
20358 .k(k)
20359 .a_stride(11)
20360 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20361 }
20362 }
20363
TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST,k_gt_1_subtile)20364 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, k_gt_1_subtile) {
20365 TEST_REQUIRES_X86_FMA3;
20366 for (size_t k = 2; k < 10; k++) {
20367 for (uint32_t n = 1; n <= 8; n++) {
20368 for (uint32_t m = 1; m <= 8; m++) {
20369 GemmMicrokernelTester()
20370 .mr(8)
20371 .nr(8)
20372 .kr(1)
20373 .sr(1)
20374 .m(m)
20375 .n(n)
20376 .k(k)
20377 .iterations(1)
20378 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20379 }
20380 }
20381 }
20382 }
20383
TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST,n_gt_8)20384 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, n_gt_8) {
20385 TEST_REQUIRES_X86_FMA3;
20386 for (uint32_t n = 9; n < 16; n++) {
20387 for (size_t k = 1; k <= 5; k += 2) {
20388 GemmMicrokernelTester()
20389 .mr(8)
20390 .nr(8)
20391 .kr(1)
20392 .sr(1)
20393 .m(8)
20394 .n(n)
20395 .k(k)
20396 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20397 }
20398 }
20399 }
20400
TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST,n_gt_8_strided_cn)20401 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
20402 TEST_REQUIRES_X86_FMA3;
20403 for (uint32_t n = 9; n < 16; n++) {
20404 for (size_t k = 1; k <= 5; k += 2) {
20405 GemmMicrokernelTester()
20406 .mr(8)
20407 .nr(8)
20408 .kr(1)
20409 .sr(1)
20410 .m(8)
20411 .n(n)
20412 .k(k)
20413 .cn_stride(11)
20414 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20415 }
20416 }
20417 }
20418
TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST,n_gt_8_strided_a)20419 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, n_gt_8_strided_a) {
20420 TEST_REQUIRES_X86_FMA3;
20421 for (uint32_t n = 9; n < 16; n++) {
20422 for (size_t k = 1; k <= 5; k += 2) {
20423 GemmMicrokernelTester()
20424 .mr(8)
20425 .nr(8)
20426 .kr(1)
20427 .sr(1)
20428 .m(8)
20429 .n(n)
20430 .k(k)
20431 .a_stride(7)
20432 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20433 }
20434 }
20435 }
20436
TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST,n_gt_8_subtile)20437 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, n_gt_8_subtile) {
20438 TEST_REQUIRES_X86_FMA3;
20439 for (uint32_t n = 9; n < 16; n++) {
20440 for (size_t k = 1; k <= 5; k += 2) {
20441 for (uint32_t m = 1; m <= 8; m++) {
20442 GemmMicrokernelTester()
20443 .mr(8)
20444 .nr(8)
20445 .kr(1)
20446 .sr(1)
20447 .m(m)
20448 .n(n)
20449 .k(k)
20450 .iterations(1)
20451 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20452 }
20453 }
20454 }
20455 }
20456
TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST,n_div_8)20457 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, n_div_8) {
20458 TEST_REQUIRES_X86_FMA3;
20459 for (uint32_t n = 16; n <= 24; n += 8) {
20460 for (size_t k = 1; k <= 5; k += 2) {
20461 GemmMicrokernelTester()
20462 .mr(8)
20463 .nr(8)
20464 .kr(1)
20465 .sr(1)
20466 .m(8)
20467 .n(n)
20468 .k(k)
20469 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20470 }
20471 }
20472 }
20473
TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST,n_div_8_strided_cn)20474 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, n_div_8_strided_cn) {
20475 TEST_REQUIRES_X86_FMA3;
20476 for (uint32_t n = 16; n <= 24; n += 8) {
20477 for (size_t k = 1; k <= 5; k += 2) {
20478 GemmMicrokernelTester()
20479 .mr(8)
20480 .nr(8)
20481 .kr(1)
20482 .sr(1)
20483 .m(8)
20484 .n(n)
20485 .k(k)
20486 .cn_stride(11)
20487 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20488 }
20489 }
20490 }
20491
TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST,n_div_8_strided_a)20492 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, n_div_8_strided_a) {
20493 TEST_REQUIRES_X86_FMA3;
20494 for (uint32_t n = 16; n <= 24; n += 8) {
20495 for (size_t k = 1; k <= 5; k += 2) {
20496 GemmMicrokernelTester()
20497 .mr(8)
20498 .nr(8)
20499 .kr(1)
20500 .sr(1)
20501 .m(8)
20502 .n(n)
20503 .k(k)
20504 .a_stride(7)
20505 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20506 }
20507 }
20508 }
20509
TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST,n_div_8_subtile)20510 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, n_div_8_subtile) {
20511 TEST_REQUIRES_X86_FMA3;
20512 for (uint32_t n = 16; n <= 24; n += 8) {
20513 for (size_t k = 1; k <= 5; k += 2) {
20514 for (uint32_t m = 1; m <= 8; m++) {
20515 GemmMicrokernelTester()
20516 .mr(8)
20517 .nr(8)
20518 .kr(1)
20519 .sr(1)
20520 .m(m)
20521 .n(n)
20522 .k(k)
20523 .iterations(1)
20524 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20525 }
20526 }
20527 }
20528 }
20529
TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST,strided_cm_subtile)20530 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, strided_cm_subtile) {
20531 TEST_REQUIRES_X86_FMA3;
20532 for (size_t k = 1; k <= 5; k += 2) {
20533 for (uint32_t n = 1; n <= 8; n++) {
20534 for (uint32_t m = 1; m <= 8; m++) {
20535 GemmMicrokernelTester()
20536 .mr(8)
20537 .nr(8)
20538 .kr(1)
20539 .sr(1)
20540 .m(m)
20541 .n(n)
20542 .k(k)
20543 .cm_stride(11)
20544 .iterations(1)
20545 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20546 }
20547 }
20548 }
20549 }
20550
TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST,qmin)20551 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, qmin) {
20552 TEST_REQUIRES_X86_FMA3;
20553 GemmMicrokernelTester()
20554 .mr(8)
20555 .nr(8)
20556 .kr(1)
20557 .sr(1)
20558 .m(8)
20559 .n(8)
20560 .k(1)
20561 .qmin(128)
20562 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20563 }
20564
TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST,qmax)20565 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, qmax) {
20566 TEST_REQUIRES_X86_FMA3;
20567 GemmMicrokernelTester()
20568 .mr(8)
20569 .nr(8)
20570 .kr(1)
20571 .sr(1)
20572 .m(8)
20573 .n(8)
20574 .k(1)
20575 .qmax(128)
20576 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20577 }
20578
TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST,strided_cm)20579 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, strided_cm) {
20580 TEST_REQUIRES_X86_FMA3;
20581 GemmMicrokernelTester()
20582 .mr(8)
20583 .nr(8)
20584 .kr(1)
20585 .sr(1)
20586 .m(8)
20587 .n(8)
20588 .k(1)
20589 .cm_stride(11)
20590 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20591 }
20592 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20593
20594
20595 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST,k_eq_1)20596 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, k_eq_1) {
20597 TEST_REQUIRES_X86_AVX512F;
20598 GemmMicrokernelTester()
20599 .mr(4)
20600 .nr(16)
20601 .kr(1)
20602 .sr(1)
20603 .m(4)
20604 .n(16)
20605 .k(1)
20606 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
20607 }
20608
TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST,strided_cn)20609 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, strided_cn) {
20610 TEST_REQUIRES_X86_AVX512F;
20611 GemmMicrokernelTester()
20612 .mr(4)
20613 .nr(16)
20614 .kr(1)
20615 .sr(1)
20616 .m(4)
20617 .n(16)
20618 .k(1)
20619 .cn_stride(19)
20620 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
20621 }
20622
TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST,k_eq_1_strided_a)20623 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, k_eq_1_strided_a) {
20624 TEST_REQUIRES_X86_AVX512F;
20625 GemmMicrokernelTester()
20626 .mr(4)
20627 .nr(16)
20628 .kr(1)
20629 .sr(1)
20630 .m(4)
20631 .n(16)
20632 .k(1)
20633 .a_stride(3)
20634 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
20635 }
20636
TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST,k_eq_1_subtile)20637 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, k_eq_1_subtile) {
20638 TEST_REQUIRES_X86_AVX512F;
20639 for (uint32_t n = 1; n <= 16; n++) {
20640 for (uint32_t m = 1; m <= 4; m++) {
20641 GemmMicrokernelTester()
20642 .mr(4)
20643 .nr(16)
20644 .kr(1)
20645 .sr(1)
20646 .m(m)
20647 .n(n)
20648 .k(1)
20649 .iterations(1)
20650 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
20651 }
20652 }
20653 }
20654
TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST,k_eq_1_subtile_m)20655 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
20656 TEST_REQUIRES_X86_AVX512F;
20657 for (uint32_t m = 1; m <= 4; m++) {
20658 GemmMicrokernelTester()
20659 .mr(4)
20660 .nr(16)
20661 .kr(1)
20662 .sr(1)
20663 .m(m)
20664 .n(16)
20665 .k(1)
20666 .iterations(1)
20667 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
20668 }
20669 }
20670
TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST,k_eq_1_subtile_n)20671 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
20672 TEST_REQUIRES_X86_AVX512F;
20673 for (uint32_t n = 1; n <= 16; n++) {
20674 GemmMicrokernelTester()
20675 .mr(4)
20676 .nr(16)
20677 .kr(1)
20678 .sr(1)
20679 .m(4)
20680 .n(n)
20681 .k(1)
20682 .iterations(1)
20683 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
20684 }
20685 }
20686
TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST,k_gt_1)20687 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, k_gt_1) {
20688 TEST_REQUIRES_X86_AVX512F;
20689 for (size_t k = 2; k < 10; k++) {
20690 GemmMicrokernelTester()
20691 .mr(4)
20692 .nr(16)
20693 .kr(1)
20694 .sr(1)
20695 .m(4)
20696 .n(16)
20697 .k(k)
20698 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
20699 }
20700 }
20701
TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST,k_gt_1_strided_a)20702 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, k_gt_1_strided_a) {
20703 TEST_REQUIRES_X86_AVX512F;
20704 for (size_t k = 2; k < 10; k++) {
20705 GemmMicrokernelTester()
20706 .mr(4)
20707 .nr(16)
20708 .kr(1)
20709 .sr(1)
20710 .m(4)
20711 .n(16)
20712 .k(k)
20713 .a_stride(11)
20714 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
20715 }
20716 }
20717
TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST,k_gt_1_subtile)20718 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, k_gt_1_subtile) {
20719 TEST_REQUIRES_X86_AVX512F;
20720 for (size_t k = 2; k < 10; k++) {
20721 for (uint32_t n = 1; n <= 16; n++) {
20722 for (uint32_t m = 1; m <= 4; m++) {
20723 GemmMicrokernelTester()
20724 .mr(4)
20725 .nr(16)
20726 .kr(1)
20727 .sr(1)
20728 .m(m)
20729 .n(n)
20730 .k(k)
20731 .iterations(1)
20732 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
20733 }
20734 }
20735 }
20736 }
20737
TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST,n_gt_16)20738 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, n_gt_16) {
20739 TEST_REQUIRES_X86_AVX512F;
20740 for (uint32_t n = 17; n < 32; n++) {
20741 for (size_t k = 1; k <= 5; k += 2) {
20742 GemmMicrokernelTester()
20743 .mr(4)
20744 .nr(16)
20745 .kr(1)
20746 .sr(1)
20747 .m(4)
20748 .n(n)
20749 .k(k)
20750 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
20751 }
20752 }
20753 }
20754
TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST,n_gt_16_strided_cn)20755 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
20756 TEST_REQUIRES_X86_AVX512F;
20757 for (uint32_t n = 17; n < 32; n++) {
20758 for (size_t k = 1; k <= 5; k += 2) {
20759 GemmMicrokernelTester()
20760 .mr(4)
20761 .nr(16)
20762 .kr(1)
20763 .sr(1)
20764 .m(4)
20765 .n(n)
20766 .k(k)
20767 .cn_stride(19)
20768 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
20769 }
20770 }
20771 }
20772
TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST,n_gt_16_strided_a)20773 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, n_gt_16_strided_a) {
20774 TEST_REQUIRES_X86_AVX512F;
20775 for (uint32_t n = 17; n < 32; n++) {
20776 for (size_t k = 1; k <= 5; k += 2) {
20777 GemmMicrokernelTester()
20778 .mr(4)
20779 .nr(16)
20780 .kr(1)
20781 .sr(1)
20782 .m(4)
20783 .n(n)
20784 .k(k)
20785 .a_stride(7)
20786 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
20787 }
20788 }
20789 }
20790
TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST,n_gt_16_subtile)20791 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, n_gt_16_subtile) {
20792 TEST_REQUIRES_X86_AVX512F;
20793 for (uint32_t n = 17; n < 32; n++) {
20794 for (size_t k = 1; k <= 5; k += 2) {
20795 for (uint32_t m = 1; m <= 4; m++) {
20796 GemmMicrokernelTester()
20797 .mr(4)
20798 .nr(16)
20799 .kr(1)
20800 .sr(1)
20801 .m(m)
20802 .n(n)
20803 .k(k)
20804 .iterations(1)
20805 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
20806 }
20807 }
20808 }
20809 }
20810
TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST,n_div_16)20811 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, n_div_16) {
20812 TEST_REQUIRES_X86_AVX512F;
20813 for (uint32_t n = 32; n <= 48; n += 16) {
20814 for (size_t k = 1; k <= 5; k += 2) {
20815 GemmMicrokernelTester()
20816 .mr(4)
20817 .nr(16)
20818 .kr(1)
20819 .sr(1)
20820 .m(4)
20821 .n(n)
20822 .k(k)
20823 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
20824 }
20825 }
20826 }
20827
TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST,n_div_16_strided_cn)20828 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
20829 TEST_REQUIRES_X86_AVX512F;
20830 for (uint32_t n = 32; n <= 48; n += 16) {
20831 for (size_t k = 1; k <= 5; k += 2) {
20832 GemmMicrokernelTester()
20833 .mr(4)
20834 .nr(16)
20835 .kr(1)
20836 .sr(1)
20837 .m(4)
20838 .n(n)
20839 .k(k)
20840 .cn_stride(19)
20841 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
20842 }
20843 }
20844 }
20845
TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST,n_div_16_strided_a)20846 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, n_div_16_strided_a) {
20847 TEST_REQUIRES_X86_AVX512F;
20848 for (uint32_t n = 32; n <= 48; n += 16) {
20849 for (size_t k = 1; k <= 5; k += 2) {
20850 GemmMicrokernelTester()
20851 .mr(4)
20852 .nr(16)
20853 .kr(1)
20854 .sr(1)
20855 .m(4)
20856 .n(n)
20857 .k(k)
20858 .a_stride(7)
20859 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
20860 }
20861 }
20862 }
20863
TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST,n_div_16_subtile)20864 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, n_div_16_subtile) {
20865 TEST_REQUIRES_X86_AVX512F;
20866 for (uint32_t n = 32; n <= 48; n += 16) {
20867 for (size_t k = 1; k <= 5; k += 2) {
20868 for (uint32_t m = 1; m <= 4; m++) {
20869 GemmMicrokernelTester()
20870 .mr(4)
20871 .nr(16)
20872 .kr(1)
20873 .sr(1)
20874 .m(m)
20875 .n(n)
20876 .k(k)
20877 .iterations(1)
20878 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
20879 }
20880 }
20881 }
20882 }
20883
TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST,strided_cm_subtile)20884 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, strided_cm_subtile) {
20885 TEST_REQUIRES_X86_AVX512F;
20886 for (size_t k = 1; k <= 5; k += 2) {
20887 for (uint32_t n = 1; n <= 16; n++) {
20888 for (uint32_t m = 1; m <= 4; m++) {
20889 GemmMicrokernelTester()
20890 .mr(4)
20891 .nr(16)
20892 .kr(1)
20893 .sr(1)
20894 .m(m)
20895 .n(n)
20896 .k(k)
20897 .cm_stride(19)
20898 .iterations(1)
20899 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
20900 }
20901 }
20902 }
20903 }
20904
TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST,qmin)20905 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, qmin) {
20906 TEST_REQUIRES_X86_AVX512F;
20907 GemmMicrokernelTester()
20908 .mr(4)
20909 .nr(16)
20910 .kr(1)
20911 .sr(1)
20912 .m(4)
20913 .n(16)
20914 .k(1)
20915 .qmin(128)
20916 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
20917 }
20918
TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST,qmax)20919 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, qmax) {
20920 TEST_REQUIRES_X86_AVX512F;
20921 GemmMicrokernelTester()
20922 .mr(4)
20923 .nr(16)
20924 .kr(1)
20925 .sr(1)
20926 .m(4)
20927 .n(16)
20928 .k(1)
20929 .qmax(128)
20930 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
20931 }
20932
TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST,strided_cm)20933 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, strided_cm) {
20934 TEST_REQUIRES_X86_AVX512F;
20935 GemmMicrokernelTester()
20936 .mr(4)
20937 .nr(16)
20938 .kr(1)
20939 .sr(1)
20940 .m(4)
20941 .n(16)
20942 .k(1)
20943 .cm_stride(19)
20944 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
20945 }
20946 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20947
20948
20949 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST,k_eq_1)20950 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, k_eq_1) {
20951 TEST_REQUIRES_X86_AVX512F;
20952 GemmMicrokernelTester()
20953 .mr(5)
20954 .nr(16)
20955 .kr(1)
20956 .sr(1)
20957 .m(5)
20958 .n(16)
20959 .k(1)
20960 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
20961 }
20962
TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST,strided_cn)20963 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, strided_cn) {
20964 TEST_REQUIRES_X86_AVX512F;
20965 GemmMicrokernelTester()
20966 .mr(5)
20967 .nr(16)
20968 .kr(1)
20969 .sr(1)
20970 .m(5)
20971 .n(16)
20972 .k(1)
20973 .cn_stride(19)
20974 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
20975 }
20976
TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST,k_eq_1_strided_a)20977 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, k_eq_1_strided_a) {
20978 TEST_REQUIRES_X86_AVX512F;
20979 GemmMicrokernelTester()
20980 .mr(5)
20981 .nr(16)
20982 .kr(1)
20983 .sr(1)
20984 .m(5)
20985 .n(16)
20986 .k(1)
20987 .a_stride(3)
20988 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
20989 }
20990
TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST,k_eq_1_subtile)20991 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, k_eq_1_subtile) {
20992 TEST_REQUIRES_X86_AVX512F;
20993 for (uint32_t n = 1; n <= 16; n++) {
20994 for (uint32_t m = 1; m <= 5; m++) {
20995 GemmMicrokernelTester()
20996 .mr(5)
20997 .nr(16)
20998 .kr(1)
20999 .sr(1)
21000 .m(m)
21001 .n(n)
21002 .k(1)
21003 .iterations(1)
21004 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
21005 }
21006 }
21007 }
21008
TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST,k_eq_1_subtile_m)21009 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
21010 TEST_REQUIRES_X86_AVX512F;
21011 for (uint32_t m = 1; m <= 5; m++) {
21012 GemmMicrokernelTester()
21013 .mr(5)
21014 .nr(16)
21015 .kr(1)
21016 .sr(1)
21017 .m(m)
21018 .n(16)
21019 .k(1)
21020 .iterations(1)
21021 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
21022 }
21023 }
21024
TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST,k_eq_1_subtile_n)21025 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
21026 TEST_REQUIRES_X86_AVX512F;
21027 for (uint32_t n = 1; n <= 16; n++) {
21028 GemmMicrokernelTester()
21029 .mr(5)
21030 .nr(16)
21031 .kr(1)
21032 .sr(1)
21033 .m(5)
21034 .n(n)
21035 .k(1)
21036 .iterations(1)
21037 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
21038 }
21039 }
21040
TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST,k_gt_1)21041 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, k_gt_1) {
21042 TEST_REQUIRES_X86_AVX512F;
21043 for (size_t k = 2; k < 10; k++) {
21044 GemmMicrokernelTester()
21045 .mr(5)
21046 .nr(16)
21047 .kr(1)
21048 .sr(1)
21049 .m(5)
21050 .n(16)
21051 .k(k)
21052 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
21053 }
21054 }
21055
TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST,k_gt_1_strided_a)21056 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, k_gt_1_strided_a) {
21057 TEST_REQUIRES_X86_AVX512F;
21058 for (size_t k = 2; k < 10; k++) {
21059 GemmMicrokernelTester()
21060 .mr(5)
21061 .nr(16)
21062 .kr(1)
21063 .sr(1)
21064 .m(5)
21065 .n(16)
21066 .k(k)
21067 .a_stride(11)
21068 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
21069 }
21070 }
21071
TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST,k_gt_1_subtile)21072 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, k_gt_1_subtile) {
21073 TEST_REQUIRES_X86_AVX512F;
21074 for (size_t k = 2; k < 10; k++) {
21075 for (uint32_t n = 1; n <= 16; n++) {
21076 for (uint32_t m = 1; m <= 5; m++) {
21077 GemmMicrokernelTester()
21078 .mr(5)
21079 .nr(16)
21080 .kr(1)
21081 .sr(1)
21082 .m(m)
21083 .n(n)
21084 .k(k)
21085 .iterations(1)
21086 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
21087 }
21088 }
21089 }
21090 }
21091
TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST,n_gt_16)21092 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, n_gt_16) {
21093 TEST_REQUIRES_X86_AVX512F;
21094 for (uint32_t n = 17; n < 32; n++) {
21095 for (size_t k = 1; k <= 5; k += 2) {
21096 GemmMicrokernelTester()
21097 .mr(5)
21098 .nr(16)
21099 .kr(1)
21100 .sr(1)
21101 .m(5)
21102 .n(n)
21103 .k(k)
21104 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
21105 }
21106 }
21107 }
21108
TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST,n_gt_16_strided_cn)21109 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
21110 TEST_REQUIRES_X86_AVX512F;
21111 for (uint32_t n = 17; n < 32; n++) {
21112 for (size_t k = 1; k <= 5; k += 2) {
21113 GemmMicrokernelTester()
21114 .mr(5)
21115 .nr(16)
21116 .kr(1)
21117 .sr(1)
21118 .m(5)
21119 .n(n)
21120 .k(k)
21121 .cn_stride(19)
21122 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
21123 }
21124 }
21125 }
21126
TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST,n_gt_16_strided_a)21127 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, n_gt_16_strided_a) {
21128 TEST_REQUIRES_X86_AVX512F;
21129 for (uint32_t n = 17; n < 32; n++) {
21130 for (size_t k = 1; k <= 5; k += 2) {
21131 GemmMicrokernelTester()
21132 .mr(5)
21133 .nr(16)
21134 .kr(1)
21135 .sr(1)
21136 .m(5)
21137 .n(n)
21138 .k(k)
21139 .a_stride(7)
21140 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
21141 }
21142 }
21143 }
21144
TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST,n_gt_16_subtile)21145 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, n_gt_16_subtile) {
21146 TEST_REQUIRES_X86_AVX512F;
21147 for (uint32_t n = 17; n < 32; n++) {
21148 for (size_t k = 1; k <= 5; k += 2) {
21149 for (uint32_t m = 1; m <= 5; m++) {
21150 GemmMicrokernelTester()
21151 .mr(5)
21152 .nr(16)
21153 .kr(1)
21154 .sr(1)
21155 .m(m)
21156 .n(n)
21157 .k(k)
21158 .iterations(1)
21159 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
21160 }
21161 }
21162 }
21163 }
21164
TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST,n_div_16)21165 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, n_div_16) {
21166 TEST_REQUIRES_X86_AVX512F;
21167 for (uint32_t n = 32; n <= 48; n += 16) {
21168 for (size_t k = 1; k <= 5; k += 2) {
21169 GemmMicrokernelTester()
21170 .mr(5)
21171 .nr(16)
21172 .kr(1)
21173 .sr(1)
21174 .m(5)
21175 .n(n)
21176 .k(k)
21177 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
21178 }
21179 }
21180 }
21181
TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST,n_div_16_strided_cn)21182 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
21183 TEST_REQUIRES_X86_AVX512F;
21184 for (uint32_t n = 32; n <= 48; n += 16) {
21185 for (size_t k = 1; k <= 5; k += 2) {
21186 GemmMicrokernelTester()
21187 .mr(5)
21188 .nr(16)
21189 .kr(1)
21190 .sr(1)
21191 .m(5)
21192 .n(n)
21193 .k(k)
21194 .cn_stride(19)
21195 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
21196 }
21197 }
21198 }
21199
TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST,n_div_16_strided_a)21200 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, n_div_16_strided_a) {
21201 TEST_REQUIRES_X86_AVX512F;
21202 for (uint32_t n = 32; n <= 48; n += 16) {
21203 for (size_t k = 1; k <= 5; k += 2) {
21204 GemmMicrokernelTester()
21205 .mr(5)
21206 .nr(16)
21207 .kr(1)
21208 .sr(1)
21209 .m(5)
21210 .n(n)
21211 .k(k)
21212 .a_stride(7)
21213 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
21214 }
21215 }
21216 }
21217
TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST,n_div_16_subtile)21218 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, n_div_16_subtile) {
21219 TEST_REQUIRES_X86_AVX512F;
21220 for (uint32_t n = 32; n <= 48; n += 16) {
21221 for (size_t k = 1; k <= 5; k += 2) {
21222 for (uint32_t m = 1; m <= 5; m++) {
21223 GemmMicrokernelTester()
21224 .mr(5)
21225 .nr(16)
21226 .kr(1)
21227 .sr(1)
21228 .m(m)
21229 .n(n)
21230 .k(k)
21231 .iterations(1)
21232 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
21233 }
21234 }
21235 }
21236 }
21237
TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST,strided_cm_subtile)21238 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, strided_cm_subtile) {
21239 TEST_REQUIRES_X86_AVX512F;
21240 for (size_t k = 1; k <= 5; k += 2) {
21241 for (uint32_t n = 1; n <= 16; n++) {
21242 for (uint32_t m = 1; m <= 5; m++) {
21243 GemmMicrokernelTester()
21244 .mr(5)
21245 .nr(16)
21246 .kr(1)
21247 .sr(1)
21248 .m(m)
21249 .n(n)
21250 .k(k)
21251 .cm_stride(19)
21252 .iterations(1)
21253 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
21254 }
21255 }
21256 }
21257 }
21258
TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST,qmin)21259 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, qmin) {
21260 TEST_REQUIRES_X86_AVX512F;
21261 GemmMicrokernelTester()
21262 .mr(5)
21263 .nr(16)
21264 .kr(1)
21265 .sr(1)
21266 .m(5)
21267 .n(16)
21268 .k(1)
21269 .qmin(128)
21270 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
21271 }
21272
TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST,qmax)21273 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, qmax) {
21274 TEST_REQUIRES_X86_AVX512F;
21275 GemmMicrokernelTester()
21276 .mr(5)
21277 .nr(16)
21278 .kr(1)
21279 .sr(1)
21280 .m(5)
21281 .n(16)
21282 .k(1)
21283 .qmax(128)
21284 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
21285 }
21286
TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST,strided_cm)21287 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, strided_cm) {
21288 TEST_REQUIRES_X86_AVX512F;
21289 GemmMicrokernelTester()
21290 .mr(5)
21291 .nr(16)
21292 .kr(1)
21293 .sr(1)
21294 .m(5)
21295 .n(16)
21296 .k(1)
21297 .cm_stride(19)
21298 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
21299 }
21300 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21301
21302
21303 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,k_eq_1)21304 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_eq_1) {
21305 GemmMicrokernelTester()
21306 .mr(1)
21307 .nr(8)
21308 .kr(1)
21309 .sr(1)
21310 .m(1)
21311 .n(8)
21312 .k(1)
21313 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21314 }
21315
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,strided_cn)21316 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, strided_cn) {
21317 GemmMicrokernelTester()
21318 .mr(1)
21319 .nr(8)
21320 .kr(1)
21321 .sr(1)
21322 .m(1)
21323 .n(8)
21324 .k(1)
21325 .cn_stride(11)
21326 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21327 }
21328
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_strided_a)21329 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_strided_a) {
21330 GemmMicrokernelTester()
21331 .mr(1)
21332 .nr(8)
21333 .kr(1)
21334 .sr(1)
21335 .m(1)
21336 .n(8)
21337 .k(1)
21338 .a_stride(3)
21339 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21340 }
21341
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile)21342 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile) {
21343 for (uint32_t n = 1; n <= 8; n++) {
21344 for (uint32_t m = 1; m <= 1; m++) {
21345 GemmMicrokernelTester()
21346 .mr(1)
21347 .nr(8)
21348 .kr(1)
21349 .sr(1)
21350 .m(m)
21351 .n(n)
21352 .k(1)
21353 .iterations(1)
21354 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21355 }
21356 }
21357 }
21358
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile_m)21359 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_m) {
21360 for (uint32_t m = 1; m <= 1; m++) {
21361 GemmMicrokernelTester()
21362 .mr(1)
21363 .nr(8)
21364 .kr(1)
21365 .sr(1)
21366 .m(m)
21367 .n(8)
21368 .k(1)
21369 .iterations(1)
21370 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21371 }
21372 }
21373
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile_n)21374 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_n) {
21375 for (uint32_t n = 1; n <= 8; n++) {
21376 GemmMicrokernelTester()
21377 .mr(1)
21378 .nr(8)
21379 .kr(1)
21380 .sr(1)
21381 .m(1)
21382 .n(n)
21383 .k(1)
21384 .iterations(1)
21385 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21386 }
21387 }
21388
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,k_gt_1)21389 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_gt_1) {
21390 for (size_t k = 2; k < 10; k++) {
21391 GemmMicrokernelTester()
21392 .mr(1)
21393 .nr(8)
21394 .kr(1)
21395 .sr(1)
21396 .m(1)
21397 .n(8)
21398 .k(k)
21399 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21400 }
21401 }
21402
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,k_gt_1_strided_a)21403 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_strided_a) {
21404 for (size_t k = 2; k < 10; k++) {
21405 GemmMicrokernelTester()
21406 .mr(1)
21407 .nr(8)
21408 .kr(1)
21409 .sr(1)
21410 .m(1)
21411 .n(8)
21412 .k(k)
21413 .a_stride(11)
21414 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21415 }
21416 }
21417
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,k_gt_1_subtile)21418 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_subtile) {
21419 for (size_t k = 2; k < 10; k++) {
21420 for (uint32_t n = 1; n <= 8; n++) {
21421 for (uint32_t m = 1; m <= 1; m++) {
21422 GemmMicrokernelTester()
21423 .mr(1)
21424 .nr(8)
21425 .kr(1)
21426 .sr(1)
21427 .m(m)
21428 .n(n)
21429 .k(k)
21430 .iterations(1)
21431 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21432 }
21433 }
21434 }
21435 }
21436
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,n_gt_8)21437 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_gt_8) {
21438 for (uint32_t n = 9; n < 16; n++) {
21439 for (size_t k = 1; k <= 5; k += 2) {
21440 GemmMicrokernelTester()
21441 .mr(1)
21442 .nr(8)
21443 .kr(1)
21444 .sr(1)
21445 .m(1)
21446 .n(n)
21447 .k(k)
21448 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21449 }
21450 }
21451 }
21452
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_strided_cn)21453 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_cn) {
21454 for (uint32_t n = 9; n < 16; n++) {
21455 for (size_t k = 1; k <= 5; k += 2) {
21456 GemmMicrokernelTester()
21457 .mr(1)
21458 .nr(8)
21459 .kr(1)
21460 .sr(1)
21461 .m(1)
21462 .n(n)
21463 .k(k)
21464 .cn_stride(11)
21465 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21466 }
21467 }
21468 }
21469
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_strided_a)21470 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_a) {
21471 for (uint32_t n = 9; n < 16; n++) {
21472 for (size_t k = 1; k <= 5; k += 2) {
21473 GemmMicrokernelTester()
21474 .mr(1)
21475 .nr(8)
21476 .kr(1)
21477 .sr(1)
21478 .m(1)
21479 .n(n)
21480 .k(k)
21481 .a_stride(7)
21482 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21483 }
21484 }
21485 }
21486
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_subtile)21487 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_subtile) {
21488 for (uint32_t n = 9; n < 16; n++) {
21489 for (size_t k = 1; k <= 5; k += 2) {
21490 for (uint32_t m = 1; m <= 1; m++) {
21491 GemmMicrokernelTester()
21492 .mr(1)
21493 .nr(8)
21494 .kr(1)
21495 .sr(1)
21496 .m(m)
21497 .n(n)
21498 .k(k)
21499 .iterations(1)
21500 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21501 }
21502 }
21503 }
21504 }
21505
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,n_div_8)21506 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_div_8) {
21507 for (uint32_t n = 16; n <= 24; n += 8) {
21508 for (size_t k = 1; k <= 5; k += 2) {
21509 GemmMicrokernelTester()
21510 .mr(1)
21511 .nr(8)
21512 .kr(1)
21513 .sr(1)
21514 .m(1)
21515 .n(n)
21516 .k(k)
21517 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21518 }
21519 }
21520 }
21521
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,n_div_8_strided_cn)21522 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_cn) {
21523 for (uint32_t n = 16; n <= 24; n += 8) {
21524 for (size_t k = 1; k <= 5; k += 2) {
21525 GemmMicrokernelTester()
21526 .mr(1)
21527 .nr(8)
21528 .kr(1)
21529 .sr(1)
21530 .m(1)
21531 .n(n)
21532 .k(k)
21533 .cn_stride(11)
21534 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21535 }
21536 }
21537 }
21538
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,n_div_8_strided_a)21539 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_a) {
21540 for (uint32_t n = 16; n <= 24; n += 8) {
21541 for (size_t k = 1; k <= 5; k += 2) {
21542 GemmMicrokernelTester()
21543 .mr(1)
21544 .nr(8)
21545 .kr(1)
21546 .sr(1)
21547 .m(1)
21548 .n(n)
21549 .k(k)
21550 .a_stride(7)
21551 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21552 }
21553 }
21554 }
21555
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,n_div_8_subtile)21556 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_div_8_subtile) {
21557 for (uint32_t n = 16; n <= 24; n += 8) {
21558 for (size_t k = 1; k <= 5; k += 2) {
21559 for (uint32_t m = 1; m <= 1; m++) {
21560 GemmMicrokernelTester()
21561 .mr(1)
21562 .nr(8)
21563 .kr(1)
21564 .sr(1)
21565 .m(m)
21566 .n(n)
21567 .k(k)
21568 .iterations(1)
21569 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21570 }
21571 }
21572 }
21573 }
21574
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,strided_cm_subtile)21575 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, strided_cm_subtile) {
21576 for (size_t k = 1; k <= 5; k += 2) {
21577 for (uint32_t n = 1; n <= 8; n++) {
21578 for (uint32_t m = 1; m <= 1; m++) {
21579 GemmMicrokernelTester()
21580 .mr(1)
21581 .nr(8)
21582 .kr(1)
21583 .sr(1)
21584 .m(m)
21585 .n(n)
21586 .k(k)
21587 .cm_stride(11)
21588 .iterations(1)
21589 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21590 }
21591 }
21592 }
21593 }
21594
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,qmin)21595 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, qmin) {
21596 GemmMicrokernelTester()
21597 .mr(1)
21598 .nr(8)
21599 .kr(1)
21600 .sr(1)
21601 .m(1)
21602 .n(8)
21603 .k(1)
21604 .qmin(128)
21605 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21606 }
21607
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,qmax)21608 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, qmax) {
21609 GemmMicrokernelTester()
21610 .mr(1)
21611 .nr(8)
21612 .kr(1)
21613 .sr(1)
21614 .m(1)
21615 .n(8)
21616 .k(1)
21617 .qmax(128)
21618 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21619 }
21620
TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT,strided_cm)21621 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, strided_cm) {
21622 GemmMicrokernelTester()
21623 .mr(1)
21624 .nr(8)
21625 .kr(1)
21626 .sr(1)
21627 .m(1)
21628 .n(8)
21629 .k(1)
21630 .cm_stride(11)
21631 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
21632 }
21633 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
21634
21635
21636 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,k_eq_4)21637 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_eq_4) {
21638 GemmMicrokernelTester()
21639 .mr(1)
21640 .nr(8)
21641 .kr(1)
21642 .sr(4)
21643 .m(1)
21644 .n(8)
21645 .k(4)
21646 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21647 }
21648
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,strided_cn)21649 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, strided_cn) {
21650 GemmMicrokernelTester()
21651 .mr(1)
21652 .nr(8)
21653 .kr(1)
21654 .sr(4)
21655 .m(1)
21656 .n(8)
21657 .k(4)
21658 .cn_stride(11)
21659 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21660 }
21661
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,k_eq_4_strided_a)21662 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_eq_4_strided_a) {
21663 GemmMicrokernelTester()
21664 .mr(1)
21665 .nr(8)
21666 .kr(1)
21667 .sr(4)
21668 .m(1)
21669 .n(8)
21670 .k(4)
21671 .a_stride(7)
21672 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21673 }
21674
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,k_eq_4_subtile)21675 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_eq_4_subtile) {
21676 for (uint32_t n = 1; n <= 8; n++) {
21677 for (uint32_t m = 1; m <= 1; m++) {
21678 GemmMicrokernelTester()
21679 .mr(1)
21680 .nr(8)
21681 .kr(1)
21682 .sr(4)
21683 .m(m)
21684 .n(n)
21685 .k(4)
21686 .iterations(1)
21687 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21688 }
21689 }
21690 }
21691
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,k_eq_4_subtile_m)21692 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_eq_4_subtile_m) {
21693 for (uint32_t m = 1; m <= 1; m++) {
21694 GemmMicrokernelTester()
21695 .mr(1)
21696 .nr(8)
21697 .kr(1)
21698 .sr(4)
21699 .m(m)
21700 .n(8)
21701 .k(4)
21702 .iterations(1)
21703 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21704 }
21705 }
21706
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,k_eq_4_subtile_n)21707 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_eq_4_subtile_n) {
21708 for (uint32_t n = 1; n <= 8; n++) {
21709 GemmMicrokernelTester()
21710 .mr(1)
21711 .nr(8)
21712 .kr(1)
21713 .sr(4)
21714 .m(1)
21715 .n(n)
21716 .k(4)
21717 .iterations(1)
21718 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21719 }
21720 }
21721
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,k_lt_4)21722 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_lt_4) {
21723 for (size_t k = 1; k < 4; k++) {
21724 GemmMicrokernelTester()
21725 .mr(1)
21726 .nr(8)
21727 .kr(1)
21728 .sr(4)
21729 .m(1)
21730 .n(8)
21731 .k(k)
21732 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21733 }
21734 }
21735
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,k_lt_4_strided_a)21736 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_lt_4_strided_a) {
21737 for (size_t k = 1; k < 4; k++) {
21738 GemmMicrokernelTester()
21739 .mr(1)
21740 .nr(8)
21741 .kr(1)
21742 .sr(4)
21743 .m(1)
21744 .n(8)
21745 .k(k)
21746 .a_stride(7)
21747 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21748 }
21749 }
21750
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,k_lt_4_subtile)21751 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_lt_4_subtile) {
21752 for (size_t k = 1; k < 4; k++) {
21753 for (uint32_t n = 1; n <= 8; n++) {
21754 for (uint32_t m = 1; m <= 1; m++) {
21755 GemmMicrokernelTester()
21756 .mr(1)
21757 .nr(8)
21758 .kr(1)
21759 .sr(4)
21760 .m(m)
21761 .n(n)
21762 .k(k)
21763 .iterations(1)
21764 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21765 }
21766 }
21767 }
21768 }
21769
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,k_gt_4)21770 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_gt_4) {
21771 for (size_t k = 5; k < 8; k++) {
21772 GemmMicrokernelTester()
21773 .mr(1)
21774 .nr(8)
21775 .kr(1)
21776 .sr(4)
21777 .m(1)
21778 .n(8)
21779 .k(k)
21780 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21781 }
21782 }
21783
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,k_gt_4_strided_a)21784 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_gt_4_strided_a) {
21785 for (size_t k = 5; k < 8; k++) {
21786 GemmMicrokernelTester()
21787 .mr(1)
21788 .nr(8)
21789 .kr(1)
21790 .sr(4)
21791 .m(1)
21792 .n(8)
21793 .k(k)
21794 .a_stride(11)
21795 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21796 }
21797 }
21798
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,k_gt_4_subtile)21799 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_gt_4_subtile) {
21800 for (size_t k = 5; k < 8; k++) {
21801 for (uint32_t n = 1; n <= 8; n++) {
21802 for (uint32_t m = 1; m <= 1; m++) {
21803 GemmMicrokernelTester()
21804 .mr(1)
21805 .nr(8)
21806 .kr(1)
21807 .sr(4)
21808 .m(m)
21809 .n(n)
21810 .k(k)
21811 .iterations(1)
21812 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21813 }
21814 }
21815 }
21816 }
21817
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,k_div_4)21818 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_div_4) {
21819 for (size_t k = 8; k <= 40; k += 4) {
21820 GemmMicrokernelTester()
21821 .mr(1)
21822 .nr(8)
21823 .kr(1)
21824 .sr(4)
21825 .m(1)
21826 .n(8)
21827 .k(k)
21828 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21829 }
21830 }
21831
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,k_div_4_strided_a)21832 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_div_4_strided_a) {
21833 for (size_t k = 8; k <= 40; k += 4) {
21834 GemmMicrokernelTester()
21835 .mr(1)
21836 .nr(8)
21837 .kr(1)
21838 .sr(4)
21839 .m(1)
21840 .n(8)
21841 .k(k)
21842 .a_stride(43)
21843 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21844 }
21845 }
21846
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,k_div_4_subtile)21847 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_div_4_subtile) {
21848 for (size_t k = 8; k <= 40; k += 4) {
21849 for (uint32_t n = 1; n <= 8; n++) {
21850 for (uint32_t m = 1; m <= 1; m++) {
21851 GemmMicrokernelTester()
21852 .mr(1)
21853 .nr(8)
21854 .kr(1)
21855 .sr(4)
21856 .m(m)
21857 .n(n)
21858 .k(k)
21859 .iterations(1)
21860 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21861 }
21862 }
21863 }
21864 }
21865
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,n_gt_8)21866 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, n_gt_8) {
21867 for (uint32_t n = 9; n < 16; n++) {
21868 for (size_t k = 1; k <= 20; k += 5) {
21869 GemmMicrokernelTester()
21870 .mr(1)
21871 .nr(8)
21872 .kr(1)
21873 .sr(4)
21874 .m(1)
21875 .n(n)
21876 .k(k)
21877 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21878 }
21879 }
21880 }
21881
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,n_gt_8_strided_cn)21882 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, n_gt_8_strided_cn) {
21883 for (uint32_t n = 9; n < 16; n++) {
21884 for (size_t k = 1; k <= 20; k += 5) {
21885 GemmMicrokernelTester()
21886 .mr(1)
21887 .nr(8)
21888 .kr(1)
21889 .sr(4)
21890 .m(1)
21891 .n(n)
21892 .k(k)
21893 .cn_stride(11)
21894 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21895 }
21896 }
21897 }
21898
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,n_gt_8_strided_a)21899 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, n_gt_8_strided_a) {
21900 for (uint32_t n = 9; n < 16; n++) {
21901 for (size_t k = 1; k <= 20; k += 5) {
21902 GemmMicrokernelTester()
21903 .mr(1)
21904 .nr(8)
21905 .kr(1)
21906 .sr(4)
21907 .m(1)
21908 .n(n)
21909 .k(k)
21910 .a_stride(23)
21911 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21912 }
21913 }
21914 }
21915
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,n_gt_8_subtile)21916 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, n_gt_8_subtile) {
21917 for (uint32_t n = 9; n < 16; n++) {
21918 for (size_t k = 1; k <= 20; k += 5) {
21919 for (uint32_t m = 1; m <= 1; m++) {
21920 GemmMicrokernelTester()
21921 .mr(1)
21922 .nr(8)
21923 .kr(1)
21924 .sr(4)
21925 .m(m)
21926 .n(n)
21927 .k(k)
21928 .iterations(1)
21929 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21930 }
21931 }
21932 }
21933 }
21934
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,n_div_8)21935 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, n_div_8) {
21936 for (uint32_t n = 16; n <= 24; n += 8) {
21937 for (size_t k = 1; k <= 20; k += 5) {
21938 GemmMicrokernelTester()
21939 .mr(1)
21940 .nr(8)
21941 .kr(1)
21942 .sr(4)
21943 .m(1)
21944 .n(n)
21945 .k(k)
21946 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21947 }
21948 }
21949 }
21950
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,n_div_8_strided_cn)21951 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, n_div_8_strided_cn) {
21952 for (uint32_t n = 16; n <= 24; n += 8) {
21953 for (size_t k = 1; k <= 20; k += 5) {
21954 GemmMicrokernelTester()
21955 .mr(1)
21956 .nr(8)
21957 .kr(1)
21958 .sr(4)
21959 .m(1)
21960 .n(n)
21961 .k(k)
21962 .cn_stride(11)
21963 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21964 }
21965 }
21966 }
21967
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,n_div_8_strided_a)21968 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, n_div_8_strided_a) {
21969 for (uint32_t n = 16; n <= 24; n += 8) {
21970 for (size_t k = 1; k <= 20; k += 5) {
21971 GemmMicrokernelTester()
21972 .mr(1)
21973 .nr(8)
21974 .kr(1)
21975 .sr(4)
21976 .m(1)
21977 .n(n)
21978 .k(k)
21979 .a_stride(23)
21980 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21981 }
21982 }
21983 }
21984
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,n_div_8_subtile)21985 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, n_div_8_subtile) {
21986 for (uint32_t n = 16; n <= 24; n += 8) {
21987 for (size_t k = 1; k <= 20; k += 5) {
21988 for (uint32_t m = 1; m <= 1; m++) {
21989 GemmMicrokernelTester()
21990 .mr(1)
21991 .nr(8)
21992 .kr(1)
21993 .sr(4)
21994 .m(m)
21995 .n(n)
21996 .k(k)
21997 .iterations(1)
21998 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
21999 }
22000 }
22001 }
22002 }
22003
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,strided_cm_subtile)22004 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, strided_cm_subtile) {
22005 for (size_t k = 1; k <= 20; k += 5) {
22006 for (uint32_t n = 1; n <= 8; n++) {
22007 for (uint32_t m = 1; m <= 1; m++) {
22008 GemmMicrokernelTester()
22009 .mr(1)
22010 .nr(8)
22011 .kr(1)
22012 .sr(4)
22013 .m(m)
22014 .n(n)
22015 .k(k)
22016 .cm_stride(11)
22017 .iterations(1)
22018 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
22019 }
22020 }
22021 }
22022 }
22023
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,qmin)22024 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, qmin) {
22025 GemmMicrokernelTester()
22026 .mr(1)
22027 .nr(8)
22028 .kr(1)
22029 .sr(4)
22030 .m(1)
22031 .n(8)
22032 .k(4)
22033 .qmin(128)
22034 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
22035 }
22036
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,qmax)22037 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, qmax) {
22038 GemmMicrokernelTester()
22039 .mr(1)
22040 .nr(8)
22041 .kr(1)
22042 .sr(4)
22043 .m(1)
22044 .n(8)
22045 .k(4)
22046 .qmax(128)
22047 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
22048 }
22049
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM,strided_cm)22050 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, strided_cm) {
22051 GemmMicrokernelTester()
22052 .mr(1)
22053 .nr(8)
22054 .kr(1)
22055 .sr(4)
22056 .m(1)
22057 .n(8)
22058 .k(4)
22059 .cm_stride(11)
22060 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
22061 }
22062 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
22063
22064
22065 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,k_eq_4)22066 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_eq_4) {
22067 GemmMicrokernelTester()
22068 .mr(1)
22069 .nr(8)
22070 .kr(1)
22071 .sr(4)
22072 .m(1)
22073 .n(8)
22074 .k(4)
22075 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22076 }
22077
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,strided_cn)22078 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, strided_cn) {
22079 GemmMicrokernelTester()
22080 .mr(1)
22081 .nr(8)
22082 .kr(1)
22083 .sr(4)
22084 .m(1)
22085 .n(8)
22086 .k(4)
22087 .cn_stride(11)
22088 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22089 }
22090
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,k_eq_4_strided_a)22091 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_eq_4_strided_a) {
22092 GemmMicrokernelTester()
22093 .mr(1)
22094 .nr(8)
22095 .kr(1)
22096 .sr(4)
22097 .m(1)
22098 .n(8)
22099 .k(4)
22100 .a_stride(7)
22101 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22102 }
22103
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,k_eq_4_subtile)22104 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_eq_4_subtile) {
22105 for (uint32_t n = 1; n <= 8; n++) {
22106 for (uint32_t m = 1; m <= 1; m++) {
22107 GemmMicrokernelTester()
22108 .mr(1)
22109 .nr(8)
22110 .kr(1)
22111 .sr(4)
22112 .m(m)
22113 .n(n)
22114 .k(4)
22115 .iterations(1)
22116 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22117 }
22118 }
22119 }
22120
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,k_eq_4_subtile_m)22121 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_eq_4_subtile_m) {
22122 for (uint32_t m = 1; m <= 1; m++) {
22123 GemmMicrokernelTester()
22124 .mr(1)
22125 .nr(8)
22126 .kr(1)
22127 .sr(4)
22128 .m(m)
22129 .n(8)
22130 .k(4)
22131 .iterations(1)
22132 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22133 }
22134 }
22135
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,k_eq_4_subtile_n)22136 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_eq_4_subtile_n) {
22137 for (uint32_t n = 1; n <= 8; n++) {
22138 GemmMicrokernelTester()
22139 .mr(1)
22140 .nr(8)
22141 .kr(1)
22142 .sr(4)
22143 .m(1)
22144 .n(n)
22145 .k(4)
22146 .iterations(1)
22147 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22148 }
22149 }
22150
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,k_lt_4)22151 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_lt_4) {
22152 for (size_t k = 1; k < 4; k++) {
22153 GemmMicrokernelTester()
22154 .mr(1)
22155 .nr(8)
22156 .kr(1)
22157 .sr(4)
22158 .m(1)
22159 .n(8)
22160 .k(k)
22161 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22162 }
22163 }
22164
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,k_lt_4_strided_a)22165 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_lt_4_strided_a) {
22166 for (size_t k = 1; k < 4; k++) {
22167 GemmMicrokernelTester()
22168 .mr(1)
22169 .nr(8)
22170 .kr(1)
22171 .sr(4)
22172 .m(1)
22173 .n(8)
22174 .k(k)
22175 .a_stride(7)
22176 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22177 }
22178 }
22179
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,k_lt_4_subtile)22180 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_lt_4_subtile) {
22181 for (size_t k = 1; k < 4; k++) {
22182 for (uint32_t n = 1; n <= 8; n++) {
22183 for (uint32_t m = 1; m <= 1; m++) {
22184 GemmMicrokernelTester()
22185 .mr(1)
22186 .nr(8)
22187 .kr(1)
22188 .sr(4)
22189 .m(m)
22190 .n(n)
22191 .k(k)
22192 .iterations(1)
22193 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22194 }
22195 }
22196 }
22197 }
22198
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,k_gt_4)22199 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_gt_4) {
22200 for (size_t k = 5; k < 8; k++) {
22201 GemmMicrokernelTester()
22202 .mr(1)
22203 .nr(8)
22204 .kr(1)
22205 .sr(4)
22206 .m(1)
22207 .n(8)
22208 .k(k)
22209 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22210 }
22211 }
22212
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,k_gt_4_strided_a)22213 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_gt_4_strided_a) {
22214 for (size_t k = 5; k < 8; k++) {
22215 GemmMicrokernelTester()
22216 .mr(1)
22217 .nr(8)
22218 .kr(1)
22219 .sr(4)
22220 .m(1)
22221 .n(8)
22222 .k(k)
22223 .a_stride(11)
22224 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22225 }
22226 }
22227
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,k_gt_4_subtile)22228 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_gt_4_subtile) {
22229 for (size_t k = 5; k < 8; k++) {
22230 for (uint32_t n = 1; n <= 8; n++) {
22231 for (uint32_t m = 1; m <= 1; m++) {
22232 GemmMicrokernelTester()
22233 .mr(1)
22234 .nr(8)
22235 .kr(1)
22236 .sr(4)
22237 .m(m)
22238 .n(n)
22239 .k(k)
22240 .iterations(1)
22241 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22242 }
22243 }
22244 }
22245 }
22246
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,k_div_4)22247 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_div_4) {
22248 for (size_t k = 8; k <= 40; k += 4) {
22249 GemmMicrokernelTester()
22250 .mr(1)
22251 .nr(8)
22252 .kr(1)
22253 .sr(4)
22254 .m(1)
22255 .n(8)
22256 .k(k)
22257 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22258 }
22259 }
22260
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,k_div_4_strided_a)22261 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_div_4_strided_a) {
22262 for (size_t k = 8; k <= 40; k += 4) {
22263 GemmMicrokernelTester()
22264 .mr(1)
22265 .nr(8)
22266 .kr(1)
22267 .sr(4)
22268 .m(1)
22269 .n(8)
22270 .k(k)
22271 .a_stride(43)
22272 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22273 }
22274 }
22275
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,k_div_4_subtile)22276 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_div_4_subtile) {
22277 for (size_t k = 8; k <= 40; k += 4) {
22278 for (uint32_t n = 1; n <= 8; n++) {
22279 for (uint32_t m = 1; m <= 1; m++) {
22280 GemmMicrokernelTester()
22281 .mr(1)
22282 .nr(8)
22283 .kr(1)
22284 .sr(4)
22285 .m(m)
22286 .n(n)
22287 .k(k)
22288 .iterations(1)
22289 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22290 }
22291 }
22292 }
22293 }
22294
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,n_gt_8)22295 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, n_gt_8) {
22296 for (uint32_t n = 9; n < 16; n++) {
22297 for (size_t k = 1; k <= 20; k += 5) {
22298 GemmMicrokernelTester()
22299 .mr(1)
22300 .nr(8)
22301 .kr(1)
22302 .sr(4)
22303 .m(1)
22304 .n(n)
22305 .k(k)
22306 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22307 }
22308 }
22309 }
22310
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,n_gt_8_strided_cn)22311 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, n_gt_8_strided_cn) {
22312 for (uint32_t n = 9; n < 16; n++) {
22313 for (size_t k = 1; k <= 20; k += 5) {
22314 GemmMicrokernelTester()
22315 .mr(1)
22316 .nr(8)
22317 .kr(1)
22318 .sr(4)
22319 .m(1)
22320 .n(n)
22321 .k(k)
22322 .cn_stride(11)
22323 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22324 }
22325 }
22326 }
22327
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,n_gt_8_strided_a)22328 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, n_gt_8_strided_a) {
22329 for (uint32_t n = 9; n < 16; n++) {
22330 for (size_t k = 1; k <= 20; k += 5) {
22331 GemmMicrokernelTester()
22332 .mr(1)
22333 .nr(8)
22334 .kr(1)
22335 .sr(4)
22336 .m(1)
22337 .n(n)
22338 .k(k)
22339 .a_stride(23)
22340 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22341 }
22342 }
22343 }
22344
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,n_gt_8_subtile)22345 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, n_gt_8_subtile) {
22346 for (uint32_t n = 9; n < 16; n++) {
22347 for (size_t k = 1; k <= 20; k += 5) {
22348 for (uint32_t m = 1; m <= 1; m++) {
22349 GemmMicrokernelTester()
22350 .mr(1)
22351 .nr(8)
22352 .kr(1)
22353 .sr(4)
22354 .m(m)
22355 .n(n)
22356 .k(k)
22357 .iterations(1)
22358 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22359 }
22360 }
22361 }
22362 }
22363
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,n_div_8)22364 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, n_div_8) {
22365 for (uint32_t n = 16; n <= 24; n += 8) {
22366 for (size_t k = 1; k <= 20; k += 5) {
22367 GemmMicrokernelTester()
22368 .mr(1)
22369 .nr(8)
22370 .kr(1)
22371 .sr(4)
22372 .m(1)
22373 .n(n)
22374 .k(k)
22375 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22376 }
22377 }
22378 }
22379
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,n_div_8_strided_cn)22380 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, n_div_8_strided_cn) {
22381 for (uint32_t n = 16; n <= 24; n += 8) {
22382 for (size_t k = 1; k <= 20; k += 5) {
22383 GemmMicrokernelTester()
22384 .mr(1)
22385 .nr(8)
22386 .kr(1)
22387 .sr(4)
22388 .m(1)
22389 .n(n)
22390 .k(k)
22391 .cn_stride(11)
22392 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22393 }
22394 }
22395 }
22396
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,n_div_8_strided_a)22397 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, n_div_8_strided_a) {
22398 for (uint32_t n = 16; n <= 24; n += 8) {
22399 for (size_t k = 1; k <= 20; k += 5) {
22400 GemmMicrokernelTester()
22401 .mr(1)
22402 .nr(8)
22403 .kr(1)
22404 .sr(4)
22405 .m(1)
22406 .n(n)
22407 .k(k)
22408 .a_stride(23)
22409 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22410 }
22411 }
22412 }
22413
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,n_div_8_subtile)22414 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, n_div_8_subtile) {
22415 for (uint32_t n = 16; n <= 24; n += 8) {
22416 for (size_t k = 1; k <= 20; k += 5) {
22417 for (uint32_t m = 1; m <= 1; m++) {
22418 GemmMicrokernelTester()
22419 .mr(1)
22420 .nr(8)
22421 .kr(1)
22422 .sr(4)
22423 .m(m)
22424 .n(n)
22425 .k(k)
22426 .iterations(1)
22427 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22428 }
22429 }
22430 }
22431 }
22432
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,strided_cm_subtile)22433 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, strided_cm_subtile) {
22434 for (size_t k = 1; k <= 20; k += 5) {
22435 for (uint32_t n = 1; n <= 8; n++) {
22436 for (uint32_t m = 1; m <= 1; m++) {
22437 GemmMicrokernelTester()
22438 .mr(1)
22439 .nr(8)
22440 .kr(1)
22441 .sr(4)
22442 .m(m)
22443 .n(n)
22444 .k(k)
22445 .cm_stride(11)
22446 .iterations(1)
22447 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22448 }
22449 }
22450 }
22451 }
22452
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,qmin)22453 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, qmin) {
22454 GemmMicrokernelTester()
22455 .mr(1)
22456 .nr(8)
22457 .kr(1)
22458 .sr(4)
22459 .m(1)
22460 .n(8)
22461 .k(4)
22462 .qmin(128)
22463 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22464 }
22465
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,qmax)22466 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, qmax) {
22467 GemmMicrokernelTester()
22468 .mr(1)
22469 .nr(8)
22470 .kr(1)
22471 .sr(4)
22472 .m(1)
22473 .n(8)
22474 .k(4)
22475 .qmax(128)
22476 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22477 }
22478
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86,strided_cm)22479 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, strided_cm) {
22480 GemmMicrokernelTester()
22481 .mr(1)
22482 .nr(8)
22483 .kr(1)
22484 .sr(4)
22485 .m(1)
22486 .n(8)
22487 .k(4)
22488 .cm_stride(11)
22489 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
22490 }
22491 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
22492
22493
22494 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,k_eq_4)22495 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_eq_4) {
22496 GemmMicrokernelTester()
22497 .mr(3)
22498 .nr(8)
22499 .kr(1)
22500 .sr(1)
22501 .m(3)
22502 .n(8)
22503 .k(4)
22504 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22505 }
22506
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,strided_cn)22507 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, strided_cn) {
22508 GemmMicrokernelTester()
22509 .mr(3)
22510 .nr(8)
22511 .kr(1)
22512 .sr(1)
22513 .m(3)
22514 .n(8)
22515 .k(4)
22516 .cn_stride(11)
22517 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22518 }
22519
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,k_eq_4_strided_a)22520 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_eq_4_strided_a) {
22521 GemmMicrokernelTester()
22522 .mr(3)
22523 .nr(8)
22524 .kr(1)
22525 .sr(1)
22526 .m(3)
22527 .n(8)
22528 .k(4)
22529 .a_stride(7)
22530 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22531 }
22532
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile)22533 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile) {
22534 for (uint32_t n = 1; n <= 8; n++) {
22535 for (uint32_t m = 1; m <= 3; m++) {
22536 GemmMicrokernelTester()
22537 .mr(3)
22538 .nr(8)
22539 .kr(1)
22540 .sr(1)
22541 .m(m)
22542 .n(n)
22543 .k(4)
22544 .iterations(1)
22545 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22546 }
22547 }
22548 }
22549
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile_m)22550 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_m) {
22551 for (uint32_t m = 1; m <= 3; m++) {
22552 GemmMicrokernelTester()
22553 .mr(3)
22554 .nr(8)
22555 .kr(1)
22556 .sr(1)
22557 .m(m)
22558 .n(8)
22559 .k(4)
22560 .iterations(1)
22561 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22562 }
22563 }
22564
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile_n)22565 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_n) {
22566 for (uint32_t n = 1; n <= 8; n++) {
22567 GemmMicrokernelTester()
22568 .mr(3)
22569 .nr(8)
22570 .kr(1)
22571 .sr(1)
22572 .m(3)
22573 .n(n)
22574 .k(4)
22575 .iterations(1)
22576 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22577 }
22578 }
22579
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,k_lt_4)22580 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_lt_4) {
22581 for (size_t k = 1; k < 4; k++) {
22582 GemmMicrokernelTester()
22583 .mr(3)
22584 .nr(8)
22585 .kr(1)
22586 .sr(1)
22587 .m(3)
22588 .n(8)
22589 .k(k)
22590 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22591 }
22592 }
22593
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,k_lt_4_strided_a)22594 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_lt_4_strided_a) {
22595 for (size_t k = 1; k < 4; k++) {
22596 GemmMicrokernelTester()
22597 .mr(3)
22598 .nr(8)
22599 .kr(1)
22600 .sr(1)
22601 .m(3)
22602 .n(8)
22603 .k(k)
22604 .a_stride(7)
22605 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22606 }
22607 }
22608
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,k_lt_4_subtile)22609 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_lt_4_subtile) {
22610 for (size_t k = 1; k < 4; k++) {
22611 for (uint32_t n = 1; n <= 8; n++) {
22612 for (uint32_t m = 1; m <= 3; m++) {
22613 GemmMicrokernelTester()
22614 .mr(3)
22615 .nr(8)
22616 .kr(1)
22617 .sr(1)
22618 .m(m)
22619 .n(n)
22620 .k(k)
22621 .iterations(1)
22622 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22623 }
22624 }
22625 }
22626 }
22627
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,k_gt_4)22628 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_gt_4) {
22629 for (size_t k = 5; k < 8; k++) {
22630 GemmMicrokernelTester()
22631 .mr(3)
22632 .nr(8)
22633 .kr(1)
22634 .sr(1)
22635 .m(3)
22636 .n(8)
22637 .k(k)
22638 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22639 }
22640 }
22641
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,k_gt_4_strided_a)22642 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_gt_4_strided_a) {
22643 for (size_t k = 5; k < 8; k++) {
22644 GemmMicrokernelTester()
22645 .mr(3)
22646 .nr(8)
22647 .kr(1)
22648 .sr(1)
22649 .m(3)
22650 .n(8)
22651 .k(k)
22652 .a_stride(11)
22653 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22654 }
22655 }
22656
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,k_gt_4_subtile)22657 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_gt_4_subtile) {
22658 for (size_t k = 5; k < 8; k++) {
22659 for (uint32_t n = 1; n <= 8; n++) {
22660 for (uint32_t m = 1; m <= 3; m++) {
22661 GemmMicrokernelTester()
22662 .mr(3)
22663 .nr(8)
22664 .kr(1)
22665 .sr(1)
22666 .m(m)
22667 .n(n)
22668 .k(k)
22669 .iterations(1)
22670 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22671 }
22672 }
22673 }
22674 }
22675
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,k_div_4)22676 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_div_4) {
22677 for (size_t k = 8; k <= 40; k += 4) {
22678 GemmMicrokernelTester()
22679 .mr(3)
22680 .nr(8)
22681 .kr(1)
22682 .sr(1)
22683 .m(3)
22684 .n(8)
22685 .k(k)
22686 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22687 }
22688 }
22689
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,k_div_4_strided_a)22690 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_div_4_strided_a) {
22691 for (size_t k = 8; k <= 40; k += 4) {
22692 GemmMicrokernelTester()
22693 .mr(3)
22694 .nr(8)
22695 .kr(1)
22696 .sr(1)
22697 .m(3)
22698 .n(8)
22699 .k(k)
22700 .a_stride(43)
22701 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22702 }
22703 }
22704
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,k_div_4_subtile)22705 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_div_4_subtile) {
22706 for (size_t k = 8; k <= 40; k += 4) {
22707 for (uint32_t n = 1; n <= 8; n++) {
22708 for (uint32_t m = 1; m <= 3; m++) {
22709 GemmMicrokernelTester()
22710 .mr(3)
22711 .nr(8)
22712 .kr(1)
22713 .sr(1)
22714 .m(m)
22715 .n(n)
22716 .k(k)
22717 .iterations(1)
22718 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22719 }
22720 }
22721 }
22722 }
22723
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,n_gt_8)22724 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
22725 for (uint32_t n = 9; n < 16; n++) {
22726 for (size_t k = 1; k <= 20; k += 5) {
22727 GemmMicrokernelTester()
22728 .mr(3)
22729 .nr(8)
22730 .kr(1)
22731 .sr(1)
22732 .m(3)
22733 .n(n)
22734 .k(k)
22735 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22736 }
22737 }
22738 }
22739
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,n_gt_8_strided_cn)22740 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
22741 for (uint32_t n = 9; n < 16; n++) {
22742 for (size_t k = 1; k <= 20; k += 5) {
22743 GemmMicrokernelTester()
22744 .mr(3)
22745 .nr(8)
22746 .kr(1)
22747 .sr(1)
22748 .m(3)
22749 .n(n)
22750 .k(k)
22751 .cn_stride(11)
22752 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22753 }
22754 }
22755 }
22756
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,n_gt_8_strided_a)22757 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_a) {
22758 for (uint32_t n = 9; n < 16; n++) {
22759 for (size_t k = 1; k <= 20; k += 5) {
22760 GemmMicrokernelTester()
22761 .mr(3)
22762 .nr(8)
22763 .kr(1)
22764 .sr(1)
22765 .m(3)
22766 .n(n)
22767 .k(k)
22768 .a_stride(23)
22769 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22770 }
22771 }
22772 }
22773
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,n_gt_8_subtile)22774 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
22775 for (uint32_t n = 9; n < 16; n++) {
22776 for (size_t k = 1; k <= 20; k += 5) {
22777 for (uint32_t m = 1; m <= 3; m++) {
22778 GemmMicrokernelTester()
22779 .mr(3)
22780 .nr(8)
22781 .kr(1)
22782 .sr(1)
22783 .m(m)
22784 .n(n)
22785 .k(k)
22786 .iterations(1)
22787 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22788 }
22789 }
22790 }
22791 }
22792
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,n_div_8)22793 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_div_8) {
22794 for (uint32_t n = 16; n <= 24; n += 8) {
22795 for (size_t k = 1; k <= 20; k += 5) {
22796 GemmMicrokernelTester()
22797 .mr(3)
22798 .nr(8)
22799 .kr(1)
22800 .sr(1)
22801 .m(3)
22802 .n(n)
22803 .k(k)
22804 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22805 }
22806 }
22807 }
22808
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,n_div_8_strided_cn)22809 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
22810 for (uint32_t n = 16; n <= 24; n += 8) {
22811 for (size_t k = 1; k <= 20; k += 5) {
22812 GemmMicrokernelTester()
22813 .mr(3)
22814 .nr(8)
22815 .kr(1)
22816 .sr(1)
22817 .m(3)
22818 .n(n)
22819 .k(k)
22820 .cn_stride(11)
22821 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22822 }
22823 }
22824 }
22825
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,n_div_8_strided_a)22826 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_a) {
22827 for (uint32_t n = 16; n <= 24; n += 8) {
22828 for (size_t k = 1; k <= 20; k += 5) {
22829 GemmMicrokernelTester()
22830 .mr(3)
22831 .nr(8)
22832 .kr(1)
22833 .sr(1)
22834 .m(3)
22835 .n(n)
22836 .k(k)
22837 .a_stride(23)
22838 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22839 }
22840 }
22841 }
22842
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,n_div_8_subtile)22843 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
22844 for (uint32_t n = 16; n <= 24; n += 8) {
22845 for (size_t k = 1; k <= 20; k += 5) {
22846 for (uint32_t m = 1; m <= 3; m++) {
22847 GemmMicrokernelTester()
22848 .mr(3)
22849 .nr(8)
22850 .kr(1)
22851 .sr(1)
22852 .m(m)
22853 .n(n)
22854 .k(k)
22855 .iterations(1)
22856 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22857 }
22858 }
22859 }
22860 }
22861
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,strided_cm_subtile)22862 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
22863 for (size_t k = 1; k <= 20; k += 5) {
22864 for (uint32_t n = 1; n <= 8; n++) {
22865 for (uint32_t m = 1; m <= 3; m++) {
22866 GemmMicrokernelTester()
22867 .mr(3)
22868 .nr(8)
22869 .kr(1)
22870 .sr(1)
22871 .m(m)
22872 .n(n)
22873 .k(k)
22874 .cm_stride(11)
22875 .iterations(1)
22876 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22877 }
22878 }
22879 }
22880 }
22881
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,qmin)22882 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, qmin) {
22883 GemmMicrokernelTester()
22884 .mr(3)
22885 .nr(8)
22886 .kr(1)
22887 .sr(1)
22888 .m(3)
22889 .n(8)
22890 .k(4)
22891 .qmin(128)
22892 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22893 }
22894
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,qmax)22895 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, qmax) {
22896 GemmMicrokernelTester()
22897 .mr(3)
22898 .nr(8)
22899 .kr(1)
22900 .sr(1)
22901 .m(3)
22902 .n(8)
22903 .k(4)
22904 .qmax(128)
22905 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22906 }
22907
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT,strided_cm)22908 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, strided_cm) {
22909 GemmMicrokernelTester()
22910 .mr(3)
22911 .nr(8)
22912 .kr(1)
22913 .sr(1)
22914 .m(3)
22915 .n(8)
22916 .k(4)
22917 .cm_stride(11)
22918 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
22919 }
22920 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
22921
22922
22923 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,k_eq_1)22924 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_eq_1) {
22925 GemmMicrokernelTester()
22926 .mr(3)
22927 .nr(8)
22928 .kr(1)
22929 .sr(1)
22930 .m(3)
22931 .n(8)
22932 .k(1)
22933 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22934 }
22935
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,strided_cn)22936 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, strided_cn) {
22937 GemmMicrokernelTester()
22938 .mr(3)
22939 .nr(8)
22940 .kr(1)
22941 .sr(1)
22942 .m(3)
22943 .n(8)
22944 .k(1)
22945 .cn_stride(11)
22946 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22947 }
22948
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_strided_a)22949 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_strided_a) {
22950 GemmMicrokernelTester()
22951 .mr(3)
22952 .nr(8)
22953 .kr(1)
22954 .sr(1)
22955 .m(3)
22956 .n(8)
22957 .k(1)
22958 .a_stride(3)
22959 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22960 }
22961
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile)22962 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile) {
22963 for (uint32_t n = 1; n <= 8; n++) {
22964 for (uint32_t m = 1; m <= 3; m++) {
22965 GemmMicrokernelTester()
22966 .mr(3)
22967 .nr(8)
22968 .kr(1)
22969 .sr(1)
22970 .m(m)
22971 .n(n)
22972 .k(1)
22973 .iterations(1)
22974 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22975 }
22976 }
22977 }
22978
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile_m)22979 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_m) {
22980 for (uint32_t m = 1; m <= 3; m++) {
22981 GemmMicrokernelTester()
22982 .mr(3)
22983 .nr(8)
22984 .kr(1)
22985 .sr(1)
22986 .m(m)
22987 .n(8)
22988 .k(1)
22989 .iterations(1)
22990 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
22991 }
22992 }
22993
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile_n)22994 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_n) {
22995 for (uint32_t n = 1; n <= 8; n++) {
22996 GemmMicrokernelTester()
22997 .mr(3)
22998 .nr(8)
22999 .kr(1)
23000 .sr(1)
23001 .m(3)
23002 .n(n)
23003 .k(1)
23004 .iterations(1)
23005 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
23006 }
23007 }
23008
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,k_gt_1)23009 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_gt_1) {
23010 for (size_t k = 2; k < 10; k++) {
23011 GemmMicrokernelTester()
23012 .mr(3)
23013 .nr(8)
23014 .kr(1)
23015 .sr(1)
23016 .m(3)
23017 .n(8)
23018 .k(k)
23019 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
23020 }
23021 }
23022
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,k_gt_1_strided_a)23023 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_strided_a) {
23024 for (size_t k = 2; k < 10; k++) {
23025 GemmMicrokernelTester()
23026 .mr(3)
23027 .nr(8)
23028 .kr(1)
23029 .sr(1)
23030 .m(3)
23031 .n(8)
23032 .k(k)
23033 .a_stride(11)
23034 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
23035 }
23036 }
23037
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,k_gt_1_subtile)23038 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_subtile) {
23039 for (size_t k = 2; k < 10; k++) {
23040 for (uint32_t n = 1; n <= 8; n++) {
23041 for (uint32_t m = 1; m <= 3; m++) {
23042 GemmMicrokernelTester()
23043 .mr(3)
23044 .nr(8)
23045 .kr(1)
23046 .sr(1)
23047 .m(m)
23048 .n(n)
23049 .k(k)
23050 .iterations(1)
23051 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
23052 }
23053 }
23054 }
23055 }
23056
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,n_gt_8)23057 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_gt_8) {
23058 for (uint32_t n = 9; n < 16; n++) {
23059 for (size_t k = 1; k <= 5; k += 2) {
23060 GemmMicrokernelTester()
23061 .mr(3)
23062 .nr(8)
23063 .kr(1)
23064 .sr(1)
23065 .m(3)
23066 .n(n)
23067 .k(k)
23068 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
23069 }
23070 }
23071 }
23072
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_strided_cn)23073 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_cn) {
23074 for (uint32_t n = 9; n < 16; n++) {
23075 for (size_t k = 1; k <= 5; k += 2) {
23076 GemmMicrokernelTester()
23077 .mr(3)
23078 .nr(8)
23079 .kr(1)
23080 .sr(1)
23081 .m(3)
23082 .n(n)
23083 .k(k)
23084 .cn_stride(11)
23085 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
23086 }
23087 }
23088 }
23089
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_strided_a)23090 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_a) {
23091 for (uint32_t n = 9; n < 16; n++) {
23092 for (size_t k = 1; k <= 5; k += 2) {
23093 GemmMicrokernelTester()
23094 .mr(3)
23095 .nr(8)
23096 .kr(1)
23097 .sr(1)
23098 .m(3)
23099 .n(n)
23100 .k(k)
23101 .a_stride(7)
23102 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
23103 }
23104 }
23105 }
23106
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_subtile)23107 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_subtile) {
23108 for (uint32_t n = 9; n < 16; n++) {
23109 for (size_t k = 1; k <= 5; k += 2) {
23110 for (uint32_t m = 1; m <= 3; m++) {
23111 GemmMicrokernelTester()
23112 .mr(3)
23113 .nr(8)
23114 .kr(1)
23115 .sr(1)
23116 .m(m)
23117 .n(n)
23118 .k(k)
23119 .iterations(1)
23120 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
23121 }
23122 }
23123 }
23124 }
23125
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,n_div_8)23126 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_div_8) {
23127 for (uint32_t n = 16; n <= 24; n += 8) {
23128 for (size_t k = 1; k <= 5; k += 2) {
23129 GemmMicrokernelTester()
23130 .mr(3)
23131 .nr(8)
23132 .kr(1)
23133 .sr(1)
23134 .m(3)
23135 .n(n)
23136 .k(k)
23137 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
23138 }
23139 }
23140 }
23141
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,n_div_8_strided_cn)23142 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_cn) {
23143 for (uint32_t n = 16; n <= 24; n += 8) {
23144 for (size_t k = 1; k <= 5; k += 2) {
23145 GemmMicrokernelTester()
23146 .mr(3)
23147 .nr(8)
23148 .kr(1)
23149 .sr(1)
23150 .m(3)
23151 .n(n)
23152 .k(k)
23153 .cn_stride(11)
23154 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
23155 }
23156 }
23157 }
23158
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,n_div_8_strided_a)23159 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_a) {
23160 for (uint32_t n = 16; n <= 24; n += 8) {
23161 for (size_t k = 1; k <= 5; k += 2) {
23162 GemmMicrokernelTester()
23163 .mr(3)
23164 .nr(8)
23165 .kr(1)
23166 .sr(1)
23167 .m(3)
23168 .n(n)
23169 .k(k)
23170 .a_stride(7)
23171 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
23172 }
23173 }
23174 }
23175
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,n_div_8_subtile)23176 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_div_8_subtile) {
23177 for (uint32_t n = 16; n <= 24; n += 8) {
23178 for (size_t k = 1; k <= 5; k += 2) {
23179 for (uint32_t m = 1; m <= 3; m++) {
23180 GemmMicrokernelTester()
23181 .mr(3)
23182 .nr(8)
23183 .kr(1)
23184 .sr(1)
23185 .m(m)
23186 .n(n)
23187 .k(k)
23188 .iterations(1)
23189 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
23190 }
23191 }
23192 }
23193 }
23194
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,strided_cm_subtile)23195 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, strided_cm_subtile) {
23196 for (size_t k = 1; k <= 5; k += 2) {
23197 for (uint32_t n = 1; n <= 8; n++) {
23198 for (uint32_t m = 1; m <= 3; m++) {
23199 GemmMicrokernelTester()
23200 .mr(3)
23201 .nr(8)
23202 .kr(1)
23203 .sr(1)
23204 .m(m)
23205 .n(n)
23206 .k(k)
23207 .cm_stride(11)
23208 .iterations(1)
23209 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
23210 }
23211 }
23212 }
23213 }
23214
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,qmin)23215 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, qmin) {
23216 GemmMicrokernelTester()
23217 .mr(3)
23218 .nr(8)
23219 .kr(1)
23220 .sr(1)
23221 .m(3)
23222 .n(8)
23223 .k(1)
23224 .qmin(128)
23225 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
23226 }
23227
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,qmax)23228 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, qmax) {
23229 GemmMicrokernelTester()
23230 .mr(3)
23231 .nr(8)
23232 .kr(1)
23233 .sr(1)
23234 .m(3)
23235 .n(8)
23236 .k(1)
23237 .qmax(128)
23238 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
23239 }
23240
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT,strided_cm)23241 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, strided_cm) {
23242 GemmMicrokernelTester()
23243 .mr(3)
23244 .nr(8)
23245 .kr(1)
23246 .sr(1)
23247 .m(3)
23248 .n(8)
23249 .k(1)
23250 .cm_stride(11)
23251 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
23252 }
23253 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
23254
23255
23256 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,k_eq_4)23257 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_eq_4) {
23258 GemmMicrokernelTester()
23259 .mr(3)
23260 .nr(8)
23261 .kr(1)
23262 .sr(1)
23263 .m(3)
23264 .n(8)
23265 .k(4)
23266 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23267 }
23268
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,strided_cn)23269 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, strided_cn) {
23270 GemmMicrokernelTester()
23271 .mr(3)
23272 .nr(8)
23273 .kr(1)
23274 .sr(1)
23275 .m(3)
23276 .n(8)
23277 .k(4)
23278 .cn_stride(11)
23279 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23280 }
23281
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,k_eq_4_strided_a)23282 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_eq_4_strided_a) {
23283 GemmMicrokernelTester()
23284 .mr(3)
23285 .nr(8)
23286 .kr(1)
23287 .sr(1)
23288 .m(3)
23289 .n(8)
23290 .k(4)
23291 .a_stride(7)
23292 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23293 }
23294
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile)23295 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile) {
23296 for (uint32_t n = 1; n <= 8; n++) {
23297 for (uint32_t m = 1; m <= 3; m++) {
23298 GemmMicrokernelTester()
23299 .mr(3)
23300 .nr(8)
23301 .kr(1)
23302 .sr(1)
23303 .m(m)
23304 .n(n)
23305 .k(4)
23306 .iterations(1)
23307 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23308 }
23309 }
23310 }
23311
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile_m)23312 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_m) {
23313 for (uint32_t m = 1; m <= 3; m++) {
23314 GemmMicrokernelTester()
23315 .mr(3)
23316 .nr(8)
23317 .kr(1)
23318 .sr(1)
23319 .m(m)
23320 .n(8)
23321 .k(4)
23322 .iterations(1)
23323 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23324 }
23325 }
23326
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile_n)23327 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_n) {
23328 for (uint32_t n = 1; n <= 8; n++) {
23329 GemmMicrokernelTester()
23330 .mr(3)
23331 .nr(8)
23332 .kr(1)
23333 .sr(1)
23334 .m(3)
23335 .n(n)
23336 .k(4)
23337 .iterations(1)
23338 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23339 }
23340 }
23341
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,k_lt_4)23342 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_lt_4) {
23343 for (size_t k = 1; k < 4; k++) {
23344 GemmMicrokernelTester()
23345 .mr(3)
23346 .nr(8)
23347 .kr(1)
23348 .sr(1)
23349 .m(3)
23350 .n(8)
23351 .k(k)
23352 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23353 }
23354 }
23355
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,k_lt_4_strided_a)23356 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_lt_4_strided_a) {
23357 for (size_t k = 1; k < 4; k++) {
23358 GemmMicrokernelTester()
23359 .mr(3)
23360 .nr(8)
23361 .kr(1)
23362 .sr(1)
23363 .m(3)
23364 .n(8)
23365 .k(k)
23366 .a_stride(7)
23367 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23368 }
23369 }
23370
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,k_lt_4_subtile)23371 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_lt_4_subtile) {
23372 for (size_t k = 1; k < 4; k++) {
23373 for (uint32_t n = 1; n <= 8; n++) {
23374 for (uint32_t m = 1; m <= 3; m++) {
23375 GemmMicrokernelTester()
23376 .mr(3)
23377 .nr(8)
23378 .kr(1)
23379 .sr(1)
23380 .m(m)
23381 .n(n)
23382 .k(k)
23383 .iterations(1)
23384 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23385 }
23386 }
23387 }
23388 }
23389
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,k_gt_4)23390 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_gt_4) {
23391 for (size_t k = 5; k < 8; k++) {
23392 GemmMicrokernelTester()
23393 .mr(3)
23394 .nr(8)
23395 .kr(1)
23396 .sr(1)
23397 .m(3)
23398 .n(8)
23399 .k(k)
23400 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23401 }
23402 }
23403
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,k_gt_4_strided_a)23404 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_gt_4_strided_a) {
23405 for (size_t k = 5; k < 8; k++) {
23406 GemmMicrokernelTester()
23407 .mr(3)
23408 .nr(8)
23409 .kr(1)
23410 .sr(1)
23411 .m(3)
23412 .n(8)
23413 .k(k)
23414 .a_stride(11)
23415 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23416 }
23417 }
23418
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,k_gt_4_subtile)23419 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_gt_4_subtile) {
23420 for (size_t k = 5; k < 8; k++) {
23421 for (uint32_t n = 1; n <= 8; n++) {
23422 for (uint32_t m = 1; m <= 3; m++) {
23423 GemmMicrokernelTester()
23424 .mr(3)
23425 .nr(8)
23426 .kr(1)
23427 .sr(1)
23428 .m(m)
23429 .n(n)
23430 .k(k)
23431 .iterations(1)
23432 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23433 }
23434 }
23435 }
23436 }
23437
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,k_div_4)23438 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_div_4) {
23439 for (size_t k = 8; k <= 40; k += 4) {
23440 GemmMicrokernelTester()
23441 .mr(3)
23442 .nr(8)
23443 .kr(1)
23444 .sr(1)
23445 .m(3)
23446 .n(8)
23447 .k(k)
23448 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23449 }
23450 }
23451
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,k_div_4_strided_a)23452 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_div_4_strided_a) {
23453 for (size_t k = 8; k <= 40; k += 4) {
23454 GemmMicrokernelTester()
23455 .mr(3)
23456 .nr(8)
23457 .kr(1)
23458 .sr(1)
23459 .m(3)
23460 .n(8)
23461 .k(k)
23462 .a_stride(43)
23463 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23464 }
23465 }
23466
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,k_div_4_subtile)23467 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_div_4_subtile) {
23468 for (size_t k = 8; k <= 40; k += 4) {
23469 for (uint32_t n = 1; n <= 8; n++) {
23470 for (uint32_t m = 1; m <= 3; m++) {
23471 GemmMicrokernelTester()
23472 .mr(3)
23473 .nr(8)
23474 .kr(1)
23475 .sr(1)
23476 .m(m)
23477 .n(n)
23478 .k(k)
23479 .iterations(1)
23480 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23481 }
23482 }
23483 }
23484 }
23485
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,n_gt_8)23486 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_gt_8) {
23487 for (uint32_t n = 9; n < 16; n++) {
23488 for (size_t k = 1; k <= 20; k += 5) {
23489 GemmMicrokernelTester()
23490 .mr(3)
23491 .nr(8)
23492 .kr(1)
23493 .sr(1)
23494 .m(3)
23495 .n(n)
23496 .k(k)
23497 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23498 }
23499 }
23500 }
23501
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,n_gt_8_strided_cn)23502 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
23503 for (uint32_t n = 9; n < 16; n++) {
23504 for (size_t k = 1; k <= 20; k += 5) {
23505 GemmMicrokernelTester()
23506 .mr(3)
23507 .nr(8)
23508 .kr(1)
23509 .sr(1)
23510 .m(3)
23511 .n(n)
23512 .k(k)
23513 .cn_stride(11)
23514 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23515 }
23516 }
23517 }
23518
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,n_gt_8_strided_a)23519 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_a) {
23520 for (uint32_t n = 9; n < 16; n++) {
23521 for (size_t k = 1; k <= 20; k += 5) {
23522 GemmMicrokernelTester()
23523 .mr(3)
23524 .nr(8)
23525 .kr(1)
23526 .sr(1)
23527 .m(3)
23528 .n(n)
23529 .k(k)
23530 .a_stride(23)
23531 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23532 }
23533 }
23534 }
23535
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,n_gt_8_subtile)23536 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
23537 for (uint32_t n = 9; n < 16; n++) {
23538 for (size_t k = 1; k <= 20; k += 5) {
23539 for (uint32_t m = 1; m <= 3; m++) {
23540 GemmMicrokernelTester()
23541 .mr(3)
23542 .nr(8)
23543 .kr(1)
23544 .sr(1)
23545 .m(m)
23546 .n(n)
23547 .k(k)
23548 .iterations(1)
23549 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23550 }
23551 }
23552 }
23553 }
23554
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,n_div_8)23555 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_div_8) {
23556 for (uint32_t n = 16; n <= 24; n += 8) {
23557 for (size_t k = 1; k <= 20; k += 5) {
23558 GemmMicrokernelTester()
23559 .mr(3)
23560 .nr(8)
23561 .kr(1)
23562 .sr(1)
23563 .m(3)
23564 .n(n)
23565 .k(k)
23566 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23567 }
23568 }
23569 }
23570
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,n_div_8_strided_cn)23571 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
23572 for (uint32_t n = 16; n <= 24; n += 8) {
23573 for (size_t k = 1; k <= 20; k += 5) {
23574 GemmMicrokernelTester()
23575 .mr(3)
23576 .nr(8)
23577 .kr(1)
23578 .sr(1)
23579 .m(3)
23580 .n(n)
23581 .k(k)
23582 .cn_stride(11)
23583 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23584 }
23585 }
23586 }
23587
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,n_div_8_strided_a)23588 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_div_8_strided_a) {
23589 for (uint32_t n = 16; n <= 24; n += 8) {
23590 for (size_t k = 1; k <= 20; k += 5) {
23591 GemmMicrokernelTester()
23592 .mr(3)
23593 .nr(8)
23594 .kr(1)
23595 .sr(1)
23596 .m(3)
23597 .n(n)
23598 .k(k)
23599 .a_stride(23)
23600 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23601 }
23602 }
23603 }
23604
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,n_div_8_subtile)23605 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
23606 for (uint32_t n = 16; n <= 24; n += 8) {
23607 for (size_t k = 1; k <= 20; k += 5) {
23608 for (uint32_t m = 1; m <= 3; m++) {
23609 GemmMicrokernelTester()
23610 .mr(3)
23611 .nr(8)
23612 .kr(1)
23613 .sr(1)
23614 .m(m)
23615 .n(n)
23616 .k(k)
23617 .iterations(1)
23618 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23619 }
23620 }
23621 }
23622 }
23623
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,strided_cm_subtile)23624 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
23625 for (size_t k = 1; k <= 20; k += 5) {
23626 for (uint32_t n = 1; n <= 8; n++) {
23627 for (uint32_t m = 1; m <= 3; m++) {
23628 GemmMicrokernelTester()
23629 .mr(3)
23630 .nr(8)
23631 .kr(1)
23632 .sr(1)
23633 .m(m)
23634 .n(n)
23635 .k(k)
23636 .cm_stride(11)
23637 .iterations(1)
23638 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23639 }
23640 }
23641 }
23642 }
23643
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,qmin)23644 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, qmin) {
23645 GemmMicrokernelTester()
23646 .mr(3)
23647 .nr(8)
23648 .kr(1)
23649 .sr(1)
23650 .m(3)
23651 .n(8)
23652 .k(4)
23653 .qmin(128)
23654 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23655 }
23656
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,qmax)23657 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, qmax) {
23658 GemmMicrokernelTester()
23659 .mr(3)
23660 .nr(8)
23661 .kr(1)
23662 .sr(1)
23663 .m(3)
23664 .n(8)
23665 .k(4)
23666 .qmax(128)
23667 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23668 }
23669
TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT,strided_cm)23670 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, strided_cm) {
23671 GemmMicrokernelTester()
23672 .mr(3)
23673 .nr(8)
23674 .kr(1)
23675 .sr(1)
23676 .m(3)
23677 .n(8)
23678 .k(4)
23679 .cm_stride(11)
23680 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
23681 }
23682 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
23683
23684
23685 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_eq_4)23686 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_4) {
23687 GemmMicrokernelTester()
23688 .mr(4)
23689 .nr(8)
23690 .kr(1)
23691 .sr(1)
23692 .m(4)
23693 .n(8)
23694 .k(4)
23695 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23696 }
23697
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,strided_cn)23698 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, strided_cn) {
23699 GemmMicrokernelTester()
23700 .mr(4)
23701 .nr(8)
23702 .kr(1)
23703 .sr(1)
23704 .m(4)
23705 .n(8)
23706 .k(4)
23707 .cn_stride(11)
23708 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23709 }
23710
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_eq_4_strided_a)23711 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_4_strided_a) {
23712 GemmMicrokernelTester()
23713 .mr(4)
23714 .nr(8)
23715 .kr(1)
23716 .sr(1)
23717 .m(4)
23718 .n(8)
23719 .k(4)
23720 .a_stride(7)
23721 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23722 }
23723
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile)23724 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile) {
23725 for (uint32_t n = 1; n <= 8; n++) {
23726 for (uint32_t m = 1; m <= 4; m++) {
23727 GemmMicrokernelTester()
23728 .mr(4)
23729 .nr(8)
23730 .kr(1)
23731 .sr(1)
23732 .m(m)
23733 .n(n)
23734 .k(4)
23735 .iterations(1)
23736 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23737 }
23738 }
23739 }
23740
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile_m)23741 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_m) {
23742 for (uint32_t m = 1; m <= 4; m++) {
23743 GemmMicrokernelTester()
23744 .mr(4)
23745 .nr(8)
23746 .kr(1)
23747 .sr(1)
23748 .m(m)
23749 .n(8)
23750 .k(4)
23751 .iterations(1)
23752 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23753 }
23754 }
23755
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile_n)23756 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_n) {
23757 for (uint32_t n = 1; n <= 8; n++) {
23758 GemmMicrokernelTester()
23759 .mr(4)
23760 .nr(8)
23761 .kr(1)
23762 .sr(1)
23763 .m(4)
23764 .n(n)
23765 .k(4)
23766 .iterations(1)
23767 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23768 }
23769 }
23770
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_lt_4)23771 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_lt_4) {
23772 for (size_t k = 1; k < 4; k++) {
23773 GemmMicrokernelTester()
23774 .mr(4)
23775 .nr(8)
23776 .kr(1)
23777 .sr(1)
23778 .m(4)
23779 .n(8)
23780 .k(k)
23781 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23782 }
23783 }
23784
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_lt_4_strided_a)23785 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_lt_4_strided_a) {
23786 for (size_t k = 1; k < 4; k++) {
23787 GemmMicrokernelTester()
23788 .mr(4)
23789 .nr(8)
23790 .kr(1)
23791 .sr(1)
23792 .m(4)
23793 .n(8)
23794 .k(k)
23795 .a_stride(7)
23796 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23797 }
23798 }
23799
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_lt_4_subtile)23800 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_lt_4_subtile) {
23801 for (size_t k = 1; k < 4; k++) {
23802 for (uint32_t n = 1; n <= 8; n++) {
23803 for (uint32_t m = 1; m <= 4; m++) {
23804 GemmMicrokernelTester()
23805 .mr(4)
23806 .nr(8)
23807 .kr(1)
23808 .sr(1)
23809 .m(m)
23810 .n(n)
23811 .k(k)
23812 .iterations(1)
23813 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23814 }
23815 }
23816 }
23817 }
23818
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_gt_4)23819 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_gt_4) {
23820 for (size_t k = 5; k < 8; k++) {
23821 GemmMicrokernelTester()
23822 .mr(4)
23823 .nr(8)
23824 .kr(1)
23825 .sr(1)
23826 .m(4)
23827 .n(8)
23828 .k(k)
23829 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23830 }
23831 }
23832
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_gt_4_strided_a)23833 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_gt_4_strided_a) {
23834 for (size_t k = 5; k < 8; k++) {
23835 GemmMicrokernelTester()
23836 .mr(4)
23837 .nr(8)
23838 .kr(1)
23839 .sr(1)
23840 .m(4)
23841 .n(8)
23842 .k(k)
23843 .a_stride(11)
23844 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23845 }
23846 }
23847
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_gt_4_subtile)23848 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_gt_4_subtile) {
23849 for (size_t k = 5; k < 8; k++) {
23850 for (uint32_t n = 1; n <= 8; n++) {
23851 for (uint32_t m = 1; m <= 4; m++) {
23852 GemmMicrokernelTester()
23853 .mr(4)
23854 .nr(8)
23855 .kr(1)
23856 .sr(1)
23857 .m(m)
23858 .n(n)
23859 .k(k)
23860 .iterations(1)
23861 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23862 }
23863 }
23864 }
23865 }
23866
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_div_4)23867 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_div_4) {
23868 for (size_t k = 8; k <= 40; k += 4) {
23869 GemmMicrokernelTester()
23870 .mr(4)
23871 .nr(8)
23872 .kr(1)
23873 .sr(1)
23874 .m(4)
23875 .n(8)
23876 .k(k)
23877 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23878 }
23879 }
23880
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_div_4_strided_a)23881 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_div_4_strided_a) {
23882 for (size_t k = 8; k <= 40; k += 4) {
23883 GemmMicrokernelTester()
23884 .mr(4)
23885 .nr(8)
23886 .kr(1)
23887 .sr(1)
23888 .m(4)
23889 .n(8)
23890 .k(k)
23891 .a_stride(43)
23892 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23893 }
23894 }
23895
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_div_4_subtile)23896 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_div_4_subtile) {
23897 for (size_t k = 8; k <= 40; k += 4) {
23898 for (uint32_t n = 1; n <= 8; n++) {
23899 for (uint32_t m = 1; m <= 4; m++) {
23900 GemmMicrokernelTester()
23901 .mr(4)
23902 .nr(8)
23903 .kr(1)
23904 .sr(1)
23905 .m(m)
23906 .n(n)
23907 .k(k)
23908 .iterations(1)
23909 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23910 }
23911 }
23912 }
23913 }
23914
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_gt_8)23915 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
23916 for (uint32_t n = 9; n < 16; n++) {
23917 for (size_t k = 1; k <= 20; k += 5) {
23918 GemmMicrokernelTester()
23919 .mr(4)
23920 .nr(8)
23921 .kr(1)
23922 .sr(1)
23923 .m(4)
23924 .n(n)
23925 .k(k)
23926 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23927 }
23928 }
23929 }
23930
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_gt_8_strided_cn)23931 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
23932 for (uint32_t n = 9; n < 16; n++) {
23933 for (size_t k = 1; k <= 20; k += 5) {
23934 GemmMicrokernelTester()
23935 .mr(4)
23936 .nr(8)
23937 .kr(1)
23938 .sr(1)
23939 .m(4)
23940 .n(n)
23941 .k(k)
23942 .cn_stride(11)
23943 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23944 }
23945 }
23946 }
23947
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_gt_8_strided_a)23948 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_a) {
23949 for (uint32_t n = 9; n < 16; n++) {
23950 for (size_t k = 1; k <= 20; k += 5) {
23951 GemmMicrokernelTester()
23952 .mr(4)
23953 .nr(8)
23954 .kr(1)
23955 .sr(1)
23956 .m(4)
23957 .n(n)
23958 .k(k)
23959 .a_stride(23)
23960 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23961 }
23962 }
23963 }
23964
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_gt_8_subtile)23965 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
23966 for (uint32_t n = 9; n < 16; n++) {
23967 for (size_t k = 1; k <= 20; k += 5) {
23968 for (uint32_t m = 1; m <= 4; m++) {
23969 GemmMicrokernelTester()
23970 .mr(4)
23971 .nr(8)
23972 .kr(1)
23973 .sr(1)
23974 .m(m)
23975 .n(n)
23976 .k(k)
23977 .iterations(1)
23978 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23979 }
23980 }
23981 }
23982 }
23983
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_div_8)23984 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8) {
23985 for (uint32_t n = 16; n <= 24; n += 8) {
23986 for (size_t k = 1; k <= 20; k += 5) {
23987 GemmMicrokernelTester()
23988 .mr(4)
23989 .nr(8)
23990 .kr(1)
23991 .sr(1)
23992 .m(4)
23993 .n(n)
23994 .k(k)
23995 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
23996 }
23997 }
23998 }
23999
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_div_8_strided_cn)24000 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
24001 for (uint32_t n = 16; n <= 24; n += 8) {
24002 for (size_t k = 1; k <= 20; k += 5) {
24003 GemmMicrokernelTester()
24004 .mr(4)
24005 .nr(8)
24006 .kr(1)
24007 .sr(1)
24008 .m(4)
24009 .n(n)
24010 .k(k)
24011 .cn_stride(11)
24012 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
24013 }
24014 }
24015 }
24016
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_div_8_strided_a)24017 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_a) {
24018 for (uint32_t n = 16; n <= 24; n += 8) {
24019 for (size_t k = 1; k <= 20; k += 5) {
24020 GemmMicrokernelTester()
24021 .mr(4)
24022 .nr(8)
24023 .kr(1)
24024 .sr(1)
24025 .m(4)
24026 .n(n)
24027 .k(k)
24028 .a_stride(23)
24029 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
24030 }
24031 }
24032 }
24033
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_div_8_subtile)24034 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
24035 for (uint32_t n = 16; n <= 24; n += 8) {
24036 for (size_t k = 1; k <= 20; k += 5) {
24037 for (uint32_t m = 1; m <= 4; m++) {
24038 GemmMicrokernelTester()
24039 .mr(4)
24040 .nr(8)
24041 .kr(1)
24042 .sr(1)
24043 .m(m)
24044 .n(n)
24045 .k(k)
24046 .iterations(1)
24047 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
24048 }
24049 }
24050 }
24051 }
24052
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,strided_cm_subtile)24053 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
24054 for (size_t k = 1; k <= 20; k += 5) {
24055 for (uint32_t n = 1; n <= 8; n++) {
24056 for (uint32_t m = 1; m <= 4; m++) {
24057 GemmMicrokernelTester()
24058 .mr(4)
24059 .nr(8)
24060 .kr(1)
24061 .sr(1)
24062 .m(m)
24063 .n(n)
24064 .k(k)
24065 .cm_stride(11)
24066 .iterations(1)
24067 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
24068 }
24069 }
24070 }
24071 }
24072
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,qmin)24073 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, qmin) {
24074 GemmMicrokernelTester()
24075 .mr(4)
24076 .nr(8)
24077 .kr(1)
24078 .sr(1)
24079 .m(4)
24080 .n(8)
24081 .k(4)
24082 .qmin(128)
24083 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
24084 }
24085
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,qmax)24086 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, qmax) {
24087 GemmMicrokernelTester()
24088 .mr(4)
24089 .nr(8)
24090 .kr(1)
24091 .sr(1)
24092 .m(4)
24093 .n(8)
24094 .k(4)
24095 .qmax(128)
24096 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
24097 }
24098
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT,strided_cm)24099 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, strided_cm) {
24100 GemmMicrokernelTester()
24101 .mr(4)
24102 .nr(8)
24103 .kr(1)
24104 .sr(1)
24105 .m(4)
24106 .n(8)
24107 .k(4)
24108 .cm_stride(11)
24109 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
24110 }
24111 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24112
24113
24114 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,k_eq_1)24115 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_eq_1) {
24116 GemmMicrokernelTester()
24117 .mr(4)
24118 .nr(8)
24119 .kr(1)
24120 .sr(1)
24121 .m(4)
24122 .n(8)
24123 .k(1)
24124 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24125 }
24126
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,strided_cn)24127 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, strided_cn) {
24128 GemmMicrokernelTester()
24129 .mr(4)
24130 .nr(8)
24131 .kr(1)
24132 .sr(1)
24133 .m(4)
24134 .n(8)
24135 .k(1)
24136 .cn_stride(11)
24137 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24138 }
24139
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_strided_a)24140 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_strided_a) {
24141 GemmMicrokernelTester()
24142 .mr(4)
24143 .nr(8)
24144 .kr(1)
24145 .sr(1)
24146 .m(4)
24147 .n(8)
24148 .k(1)
24149 .a_stride(3)
24150 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24151 }
24152
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile)24153 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile) {
24154 for (uint32_t n = 1; n <= 8; n++) {
24155 for (uint32_t m = 1; m <= 4; m++) {
24156 GemmMicrokernelTester()
24157 .mr(4)
24158 .nr(8)
24159 .kr(1)
24160 .sr(1)
24161 .m(m)
24162 .n(n)
24163 .k(1)
24164 .iterations(1)
24165 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24166 }
24167 }
24168 }
24169
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile_m)24170 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_m) {
24171 for (uint32_t m = 1; m <= 4; m++) {
24172 GemmMicrokernelTester()
24173 .mr(4)
24174 .nr(8)
24175 .kr(1)
24176 .sr(1)
24177 .m(m)
24178 .n(8)
24179 .k(1)
24180 .iterations(1)
24181 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24182 }
24183 }
24184
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile_n)24185 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_n) {
24186 for (uint32_t n = 1; n <= 8; n++) {
24187 GemmMicrokernelTester()
24188 .mr(4)
24189 .nr(8)
24190 .kr(1)
24191 .sr(1)
24192 .m(4)
24193 .n(n)
24194 .k(1)
24195 .iterations(1)
24196 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24197 }
24198 }
24199
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,k_gt_1)24200 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_gt_1) {
24201 for (size_t k = 2; k < 10; k++) {
24202 GemmMicrokernelTester()
24203 .mr(4)
24204 .nr(8)
24205 .kr(1)
24206 .sr(1)
24207 .m(4)
24208 .n(8)
24209 .k(k)
24210 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24211 }
24212 }
24213
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,k_gt_1_strided_a)24214 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_strided_a) {
24215 for (size_t k = 2; k < 10; k++) {
24216 GemmMicrokernelTester()
24217 .mr(4)
24218 .nr(8)
24219 .kr(1)
24220 .sr(1)
24221 .m(4)
24222 .n(8)
24223 .k(k)
24224 .a_stride(11)
24225 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24226 }
24227 }
24228
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,k_gt_1_subtile)24229 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_subtile) {
24230 for (size_t k = 2; k < 10; k++) {
24231 for (uint32_t n = 1; n <= 8; n++) {
24232 for (uint32_t m = 1; m <= 4; m++) {
24233 GemmMicrokernelTester()
24234 .mr(4)
24235 .nr(8)
24236 .kr(1)
24237 .sr(1)
24238 .m(m)
24239 .n(n)
24240 .k(k)
24241 .iterations(1)
24242 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24243 }
24244 }
24245 }
24246 }
24247
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,n_gt_8)24248 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_gt_8) {
24249 for (uint32_t n = 9; n < 16; n++) {
24250 for (size_t k = 1; k <= 5; k += 2) {
24251 GemmMicrokernelTester()
24252 .mr(4)
24253 .nr(8)
24254 .kr(1)
24255 .sr(1)
24256 .m(4)
24257 .n(n)
24258 .k(k)
24259 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24260 }
24261 }
24262 }
24263
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_strided_cn)24264 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_cn) {
24265 for (uint32_t n = 9; n < 16; n++) {
24266 for (size_t k = 1; k <= 5; k += 2) {
24267 GemmMicrokernelTester()
24268 .mr(4)
24269 .nr(8)
24270 .kr(1)
24271 .sr(1)
24272 .m(4)
24273 .n(n)
24274 .k(k)
24275 .cn_stride(11)
24276 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24277 }
24278 }
24279 }
24280
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_strided_a)24281 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_a) {
24282 for (uint32_t n = 9; n < 16; n++) {
24283 for (size_t k = 1; k <= 5; k += 2) {
24284 GemmMicrokernelTester()
24285 .mr(4)
24286 .nr(8)
24287 .kr(1)
24288 .sr(1)
24289 .m(4)
24290 .n(n)
24291 .k(k)
24292 .a_stride(7)
24293 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24294 }
24295 }
24296 }
24297
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_subtile)24298 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_subtile) {
24299 for (uint32_t n = 9; n < 16; n++) {
24300 for (size_t k = 1; k <= 5; k += 2) {
24301 for (uint32_t m = 1; m <= 4; m++) {
24302 GemmMicrokernelTester()
24303 .mr(4)
24304 .nr(8)
24305 .kr(1)
24306 .sr(1)
24307 .m(m)
24308 .n(n)
24309 .k(k)
24310 .iterations(1)
24311 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24312 }
24313 }
24314 }
24315 }
24316
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,n_div_8)24317 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_div_8) {
24318 for (uint32_t n = 16; n <= 24; n += 8) {
24319 for (size_t k = 1; k <= 5; k += 2) {
24320 GemmMicrokernelTester()
24321 .mr(4)
24322 .nr(8)
24323 .kr(1)
24324 .sr(1)
24325 .m(4)
24326 .n(n)
24327 .k(k)
24328 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24329 }
24330 }
24331 }
24332
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,n_div_8_strided_cn)24333 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_cn) {
24334 for (uint32_t n = 16; n <= 24; n += 8) {
24335 for (size_t k = 1; k <= 5; k += 2) {
24336 GemmMicrokernelTester()
24337 .mr(4)
24338 .nr(8)
24339 .kr(1)
24340 .sr(1)
24341 .m(4)
24342 .n(n)
24343 .k(k)
24344 .cn_stride(11)
24345 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24346 }
24347 }
24348 }
24349
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,n_div_8_strided_a)24350 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_a) {
24351 for (uint32_t n = 16; n <= 24; n += 8) {
24352 for (size_t k = 1; k <= 5; k += 2) {
24353 GemmMicrokernelTester()
24354 .mr(4)
24355 .nr(8)
24356 .kr(1)
24357 .sr(1)
24358 .m(4)
24359 .n(n)
24360 .k(k)
24361 .a_stride(7)
24362 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24363 }
24364 }
24365 }
24366
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,n_div_8_subtile)24367 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_div_8_subtile) {
24368 for (uint32_t n = 16; n <= 24; n += 8) {
24369 for (size_t k = 1; k <= 5; k += 2) {
24370 for (uint32_t m = 1; m <= 4; m++) {
24371 GemmMicrokernelTester()
24372 .mr(4)
24373 .nr(8)
24374 .kr(1)
24375 .sr(1)
24376 .m(m)
24377 .n(n)
24378 .k(k)
24379 .iterations(1)
24380 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24381 }
24382 }
24383 }
24384 }
24385
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,strided_cm_subtile)24386 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, strided_cm_subtile) {
24387 for (size_t k = 1; k <= 5; k += 2) {
24388 for (uint32_t n = 1; n <= 8; n++) {
24389 for (uint32_t m = 1; m <= 4; m++) {
24390 GemmMicrokernelTester()
24391 .mr(4)
24392 .nr(8)
24393 .kr(1)
24394 .sr(1)
24395 .m(m)
24396 .n(n)
24397 .k(k)
24398 .cm_stride(11)
24399 .iterations(1)
24400 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24401 }
24402 }
24403 }
24404 }
24405
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,qmin)24406 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, qmin) {
24407 GemmMicrokernelTester()
24408 .mr(4)
24409 .nr(8)
24410 .kr(1)
24411 .sr(1)
24412 .m(4)
24413 .n(8)
24414 .k(1)
24415 .qmin(128)
24416 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24417 }
24418
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,qmax)24419 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, qmax) {
24420 GemmMicrokernelTester()
24421 .mr(4)
24422 .nr(8)
24423 .kr(1)
24424 .sr(1)
24425 .m(4)
24426 .n(8)
24427 .k(1)
24428 .qmax(128)
24429 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24430 }
24431
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT,strided_cm)24432 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, strided_cm) {
24433 GemmMicrokernelTester()
24434 .mr(4)
24435 .nr(8)
24436 .kr(1)
24437 .sr(1)
24438 .m(4)
24439 .n(8)
24440 .k(1)
24441 .cm_stride(11)
24442 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24443 }
24444 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24445
24446
24447 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_eq_4)24448 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_4) {
24449 GemmMicrokernelTester()
24450 .mr(4)
24451 .nr(8)
24452 .kr(1)
24453 .sr(1)
24454 .m(4)
24455 .n(8)
24456 .k(4)
24457 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24458 }
24459
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,strided_cn)24460 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, strided_cn) {
24461 GemmMicrokernelTester()
24462 .mr(4)
24463 .nr(8)
24464 .kr(1)
24465 .sr(1)
24466 .m(4)
24467 .n(8)
24468 .k(4)
24469 .cn_stride(11)
24470 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24471 }
24472
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_eq_4_strided_a)24473 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_4_strided_a) {
24474 GemmMicrokernelTester()
24475 .mr(4)
24476 .nr(8)
24477 .kr(1)
24478 .sr(1)
24479 .m(4)
24480 .n(8)
24481 .k(4)
24482 .a_stride(7)
24483 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24484 }
24485
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile)24486 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile) {
24487 for (uint32_t n = 1; n <= 8; n++) {
24488 for (uint32_t m = 1; m <= 4; m++) {
24489 GemmMicrokernelTester()
24490 .mr(4)
24491 .nr(8)
24492 .kr(1)
24493 .sr(1)
24494 .m(m)
24495 .n(n)
24496 .k(4)
24497 .iterations(1)
24498 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24499 }
24500 }
24501 }
24502
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile_m)24503 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_m) {
24504 for (uint32_t m = 1; m <= 4; m++) {
24505 GemmMicrokernelTester()
24506 .mr(4)
24507 .nr(8)
24508 .kr(1)
24509 .sr(1)
24510 .m(m)
24511 .n(8)
24512 .k(4)
24513 .iterations(1)
24514 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24515 }
24516 }
24517
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile_n)24518 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_n) {
24519 for (uint32_t n = 1; n <= 8; n++) {
24520 GemmMicrokernelTester()
24521 .mr(4)
24522 .nr(8)
24523 .kr(1)
24524 .sr(1)
24525 .m(4)
24526 .n(n)
24527 .k(4)
24528 .iterations(1)
24529 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24530 }
24531 }
24532
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_lt_4)24533 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_lt_4) {
24534 for (size_t k = 1; k < 4; k++) {
24535 GemmMicrokernelTester()
24536 .mr(4)
24537 .nr(8)
24538 .kr(1)
24539 .sr(1)
24540 .m(4)
24541 .n(8)
24542 .k(k)
24543 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24544 }
24545 }
24546
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_lt_4_strided_a)24547 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_lt_4_strided_a) {
24548 for (size_t k = 1; k < 4; k++) {
24549 GemmMicrokernelTester()
24550 .mr(4)
24551 .nr(8)
24552 .kr(1)
24553 .sr(1)
24554 .m(4)
24555 .n(8)
24556 .k(k)
24557 .a_stride(7)
24558 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24559 }
24560 }
24561
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_lt_4_subtile)24562 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_lt_4_subtile) {
24563 for (size_t k = 1; k < 4; k++) {
24564 for (uint32_t n = 1; n <= 8; n++) {
24565 for (uint32_t m = 1; m <= 4; m++) {
24566 GemmMicrokernelTester()
24567 .mr(4)
24568 .nr(8)
24569 .kr(1)
24570 .sr(1)
24571 .m(m)
24572 .n(n)
24573 .k(k)
24574 .iterations(1)
24575 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24576 }
24577 }
24578 }
24579 }
24580
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_gt_4)24581 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_gt_4) {
24582 for (size_t k = 5; k < 8; k++) {
24583 GemmMicrokernelTester()
24584 .mr(4)
24585 .nr(8)
24586 .kr(1)
24587 .sr(1)
24588 .m(4)
24589 .n(8)
24590 .k(k)
24591 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24592 }
24593 }
24594
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_gt_4_strided_a)24595 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_gt_4_strided_a) {
24596 for (size_t k = 5; k < 8; k++) {
24597 GemmMicrokernelTester()
24598 .mr(4)
24599 .nr(8)
24600 .kr(1)
24601 .sr(1)
24602 .m(4)
24603 .n(8)
24604 .k(k)
24605 .a_stride(11)
24606 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24607 }
24608 }
24609
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_gt_4_subtile)24610 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_gt_4_subtile) {
24611 for (size_t k = 5; k < 8; k++) {
24612 for (uint32_t n = 1; n <= 8; n++) {
24613 for (uint32_t m = 1; m <= 4; m++) {
24614 GemmMicrokernelTester()
24615 .mr(4)
24616 .nr(8)
24617 .kr(1)
24618 .sr(1)
24619 .m(m)
24620 .n(n)
24621 .k(k)
24622 .iterations(1)
24623 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24624 }
24625 }
24626 }
24627 }
24628
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_div_4)24629 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_div_4) {
24630 for (size_t k = 8; k <= 40; k += 4) {
24631 GemmMicrokernelTester()
24632 .mr(4)
24633 .nr(8)
24634 .kr(1)
24635 .sr(1)
24636 .m(4)
24637 .n(8)
24638 .k(k)
24639 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24640 }
24641 }
24642
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_div_4_strided_a)24643 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_div_4_strided_a) {
24644 for (size_t k = 8; k <= 40; k += 4) {
24645 GemmMicrokernelTester()
24646 .mr(4)
24647 .nr(8)
24648 .kr(1)
24649 .sr(1)
24650 .m(4)
24651 .n(8)
24652 .k(k)
24653 .a_stride(43)
24654 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24655 }
24656 }
24657
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_div_4_subtile)24658 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_div_4_subtile) {
24659 for (size_t k = 8; k <= 40; k += 4) {
24660 for (uint32_t n = 1; n <= 8; n++) {
24661 for (uint32_t m = 1; m <= 4; m++) {
24662 GemmMicrokernelTester()
24663 .mr(4)
24664 .nr(8)
24665 .kr(1)
24666 .sr(1)
24667 .m(m)
24668 .n(n)
24669 .k(k)
24670 .iterations(1)
24671 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24672 }
24673 }
24674 }
24675 }
24676
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_gt_8)24677 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8) {
24678 for (uint32_t n = 9; n < 16; n++) {
24679 for (size_t k = 1; k <= 20; k += 5) {
24680 GemmMicrokernelTester()
24681 .mr(4)
24682 .nr(8)
24683 .kr(1)
24684 .sr(1)
24685 .m(4)
24686 .n(n)
24687 .k(k)
24688 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24689 }
24690 }
24691 }
24692
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_gt_8_strided_cn)24693 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
24694 for (uint32_t n = 9; n < 16; n++) {
24695 for (size_t k = 1; k <= 20; k += 5) {
24696 GemmMicrokernelTester()
24697 .mr(4)
24698 .nr(8)
24699 .kr(1)
24700 .sr(1)
24701 .m(4)
24702 .n(n)
24703 .k(k)
24704 .cn_stride(11)
24705 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24706 }
24707 }
24708 }
24709
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_gt_8_strided_a)24710 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_a) {
24711 for (uint32_t n = 9; n < 16; n++) {
24712 for (size_t k = 1; k <= 20; k += 5) {
24713 GemmMicrokernelTester()
24714 .mr(4)
24715 .nr(8)
24716 .kr(1)
24717 .sr(1)
24718 .m(4)
24719 .n(n)
24720 .k(k)
24721 .a_stride(23)
24722 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24723 }
24724 }
24725 }
24726
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_gt_8_subtile)24727 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
24728 for (uint32_t n = 9; n < 16; n++) {
24729 for (size_t k = 1; k <= 20; k += 5) {
24730 for (uint32_t m = 1; m <= 4; m++) {
24731 GemmMicrokernelTester()
24732 .mr(4)
24733 .nr(8)
24734 .kr(1)
24735 .sr(1)
24736 .m(m)
24737 .n(n)
24738 .k(k)
24739 .iterations(1)
24740 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24741 }
24742 }
24743 }
24744 }
24745
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_div_8)24746 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8) {
24747 for (uint32_t n = 16; n <= 24; n += 8) {
24748 for (size_t k = 1; k <= 20; k += 5) {
24749 GemmMicrokernelTester()
24750 .mr(4)
24751 .nr(8)
24752 .kr(1)
24753 .sr(1)
24754 .m(4)
24755 .n(n)
24756 .k(k)
24757 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24758 }
24759 }
24760 }
24761
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_div_8_strided_cn)24762 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
24763 for (uint32_t n = 16; n <= 24; n += 8) {
24764 for (size_t k = 1; k <= 20; k += 5) {
24765 GemmMicrokernelTester()
24766 .mr(4)
24767 .nr(8)
24768 .kr(1)
24769 .sr(1)
24770 .m(4)
24771 .n(n)
24772 .k(k)
24773 .cn_stride(11)
24774 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24775 }
24776 }
24777 }
24778
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_div_8_strided_a)24779 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8_strided_a) {
24780 for (uint32_t n = 16; n <= 24; n += 8) {
24781 for (size_t k = 1; k <= 20; k += 5) {
24782 GemmMicrokernelTester()
24783 .mr(4)
24784 .nr(8)
24785 .kr(1)
24786 .sr(1)
24787 .m(4)
24788 .n(n)
24789 .k(k)
24790 .a_stride(23)
24791 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24792 }
24793 }
24794 }
24795
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_div_8_subtile)24796 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
24797 for (uint32_t n = 16; n <= 24; n += 8) {
24798 for (size_t k = 1; k <= 20; k += 5) {
24799 for (uint32_t m = 1; m <= 4; m++) {
24800 GemmMicrokernelTester()
24801 .mr(4)
24802 .nr(8)
24803 .kr(1)
24804 .sr(1)
24805 .m(m)
24806 .n(n)
24807 .k(k)
24808 .iterations(1)
24809 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24810 }
24811 }
24812 }
24813 }
24814
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,strided_cm_subtile)24815 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
24816 for (size_t k = 1; k <= 20; k += 5) {
24817 for (uint32_t n = 1; n <= 8; n++) {
24818 for (uint32_t m = 1; m <= 4; m++) {
24819 GemmMicrokernelTester()
24820 .mr(4)
24821 .nr(8)
24822 .kr(1)
24823 .sr(1)
24824 .m(m)
24825 .n(n)
24826 .k(k)
24827 .cm_stride(11)
24828 .iterations(1)
24829 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24830 }
24831 }
24832 }
24833 }
24834
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,qmin)24835 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, qmin) {
24836 GemmMicrokernelTester()
24837 .mr(4)
24838 .nr(8)
24839 .kr(1)
24840 .sr(1)
24841 .m(4)
24842 .n(8)
24843 .k(4)
24844 .qmin(128)
24845 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24846 }
24847
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,qmax)24848 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, qmax) {
24849 GemmMicrokernelTester()
24850 .mr(4)
24851 .nr(8)
24852 .kr(1)
24853 .sr(1)
24854 .m(4)
24855 .n(8)
24856 .k(4)
24857 .qmax(128)
24858 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24859 }
24860
TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT,strided_cm)24861 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, strided_cm) {
24862 GemmMicrokernelTester()
24863 .mr(4)
24864 .nr(8)
24865 .kr(1)
24866 .sr(1)
24867 .m(4)
24868 .n(8)
24869 .k(4)
24870 .cm_stride(11)
24871 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
24872 }
24873 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24874
24875
24876 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,k_eq_4)24877 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_eq_4) {
24878 GemmMicrokernelTester()
24879 .mr(4)
24880 .nr(8)
24881 .kr(1)
24882 .sr(4)
24883 .m(4)
24884 .n(8)
24885 .k(4)
24886 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24887 }
24888
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,strided_cn)24889 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, strided_cn) {
24890 GemmMicrokernelTester()
24891 .mr(4)
24892 .nr(8)
24893 .kr(1)
24894 .sr(4)
24895 .m(4)
24896 .n(8)
24897 .k(4)
24898 .cn_stride(11)
24899 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24900 }
24901
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,k_eq_4_strided_a)24902 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_eq_4_strided_a) {
24903 GemmMicrokernelTester()
24904 .mr(4)
24905 .nr(8)
24906 .kr(1)
24907 .sr(4)
24908 .m(4)
24909 .n(8)
24910 .k(4)
24911 .a_stride(7)
24912 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24913 }
24914
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,k_eq_4_subtile)24915 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_eq_4_subtile) {
24916 for (uint32_t n = 1; n <= 8; n++) {
24917 for (uint32_t m = 1; m <= 4; m++) {
24918 GemmMicrokernelTester()
24919 .mr(4)
24920 .nr(8)
24921 .kr(1)
24922 .sr(4)
24923 .m(m)
24924 .n(n)
24925 .k(4)
24926 .iterations(1)
24927 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24928 }
24929 }
24930 }
24931
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,k_eq_4_subtile_m)24932 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_eq_4_subtile_m) {
24933 for (uint32_t m = 1; m <= 4; m++) {
24934 GemmMicrokernelTester()
24935 .mr(4)
24936 .nr(8)
24937 .kr(1)
24938 .sr(4)
24939 .m(m)
24940 .n(8)
24941 .k(4)
24942 .iterations(1)
24943 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24944 }
24945 }
24946
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,k_eq_4_subtile_n)24947 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_eq_4_subtile_n) {
24948 for (uint32_t n = 1; n <= 8; n++) {
24949 GemmMicrokernelTester()
24950 .mr(4)
24951 .nr(8)
24952 .kr(1)
24953 .sr(4)
24954 .m(4)
24955 .n(n)
24956 .k(4)
24957 .iterations(1)
24958 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24959 }
24960 }
24961
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,k_lt_4)24962 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_lt_4) {
24963 for (size_t k = 1; k < 4; k++) {
24964 GemmMicrokernelTester()
24965 .mr(4)
24966 .nr(8)
24967 .kr(1)
24968 .sr(4)
24969 .m(4)
24970 .n(8)
24971 .k(k)
24972 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24973 }
24974 }
24975
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,k_lt_4_strided_a)24976 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_lt_4_strided_a) {
24977 for (size_t k = 1; k < 4; k++) {
24978 GemmMicrokernelTester()
24979 .mr(4)
24980 .nr(8)
24981 .kr(1)
24982 .sr(4)
24983 .m(4)
24984 .n(8)
24985 .k(k)
24986 .a_stride(7)
24987 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24988 }
24989 }
24990
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,k_lt_4_subtile)24991 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_lt_4_subtile) {
24992 for (size_t k = 1; k < 4; k++) {
24993 for (uint32_t n = 1; n <= 8; n++) {
24994 for (uint32_t m = 1; m <= 4; m++) {
24995 GemmMicrokernelTester()
24996 .mr(4)
24997 .nr(8)
24998 .kr(1)
24999 .sr(4)
25000 .m(m)
25001 .n(n)
25002 .k(k)
25003 .iterations(1)
25004 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
25005 }
25006 }
25007 }
25008 }
25009
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,k_gt_4)25010 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_gt_4) {
25011 for (size_t k = 5; k < 8; k++) {
25012 GemmMicrokernelTester()
25013 .mr(4)
25014 .nr(8)
25015 .kr(1)
25016 .sr(4)
25017 .m(4)
25018 .n(8)
25019 .k(k)
25020 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
25021 }
25022 }
25023
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,k_gt_4_strided_a)25024 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_gt_4_strided_a) {
25025 for (size_t k = 5; k < 8; k++) {
25026 GemmMicrokernelTester()
25027 .mr(4)
25028 .nr(8)
25029 .kr(1)
25030 .sr(4)
25031 .m(4)
25032 .n(8)
25033 .k(k)
25034 .a_stride(11)
25035 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
25036 }
25037 }
25038
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,k_gt_4_subtile)25039 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_gt_4_subtile) {
25040 for (size_t k = 5; k < 8; k++) {
25041 for (uint32_t n = 1; n <= 8; n++) {
25042 for (uint32_t m = 1; m <= 4; m++) {
25043 GemmMicrokernelTester()
25044 .mr(4)
25045 .nr(8)
25046 .kr(1)
25047 .sr(4)
25048 .m(m)
25049 .n(n)
25050 .k(k)
25051 .iterations(1)
25052 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
25053 }
25054 }
25055 }
25056 }
25057
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,k_div_4)25058 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_div_4) {
25059 for (size_t k = 8; k <= 40; k += 4) {
25060 GemmMicrokernelTester()
25061 .mr(4)
25062 .nr(8)
25063 .kr(1)
25064 .sr(4)
25065 .m(4)
25066 .n(8)
25067 .k(k)
25068 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
25069 }
25070 }
25071
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,k_div_4_strided_a)25072 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_div_4_strided_a) {
25073 for (size_t k = 8; k <= 40; k += 4) {
25074 GemmMicrokernelTester()
25075 .mr(4)
25076 .nr(8)
25077 .kr(1)
25078 .sr(4)
25079 .m(4)
25080 .n(8)
25081 .k(k)
25082 .a_stride(43)
25083 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
25084 }
25085 }
25086
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,k_div_4_subtile)25087 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_div_4_subtile) {
25088 for (size_t k = 8; k <= 40; k += 4) {
25089 for (uint32_t n = 1; n <= 8; n++) {
25090 for (uint32_t m = 1; m <= 4; m++) {
25091 GemmMicrokernelTester()
25092 .mr(4)
25093 .nr(8)
25094 .kr(1)
25095 .sr(4)
25096 .m(m)
25097 .n(n)
25098 .k(k)
25099 .iterations(1)
25100 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
25101 }
25102 }
25103 }
25104 }
25105
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,n_gt_8)25106 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, n_gt_8) {
25107 for (uint32_t n = 9; n < 16; n++) {
25108 for (size_t k = 1; k <= 20; k += 5) {
25109 GemmMicrokernelTester()
25110 .mr(4)
25111 .nr(8)
25112 .kr(1)
25113 .sr(4)
25114 .m(4)
25115 .n(n)
25116 .k(k)
25117 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
25118 }
25119 }
25120 }
25121
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,n_gt_8_strided_cn)25122 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, n_gt_8_strided_cn) {
25123 for (uint32_t n = 9; n < 16; n++) {
25124 for (size_t k = 1; k <= 20; k += 5) {
25125 GemmMicrokernelTester()
25126 .mr(4)
25127 .nr(8)
25128 .kr(1)
25129 .sr(4)
25130 .m(4)
25131 .n(n)
25132 .k(k)
25133 .cn_stride(11)
25134 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
25135 }
25136 }
25137 }
25138
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,n_gt_8_strided_a)25139 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, n_gt_8_strided_a) {
25140 for (uint32_t n = 9; n < 16; n++) {
25141 for (size_t k = 1; k <= 20; k += 5) {
25142 GemmMicrokernelTester()
25143 .mr(4)
25144 .nr(8)
25145 .kr(1)
25146 .sr(4)
25147 .m(4)
25148 .n(n)
25149 .k(k)
25150 .a_stride(23)
25151 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
25152 }
25153 }
25154 }
25155
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,n_gt_8_subtile)25156 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, n_gt_8_subtile) {
25157 for (uint32_t n = 9; n < 16; n++) {
25158 for (size_t k = 1; k <= 20; k += 5) {
25159 for (uint32_t m = 1; m <= 4; m++) {
25160 GemmMicrokernelTester()
25161 .mr(4)
25162 .nr(8)
25163 .kr(1)
25164 .sr(4)
25165 .m(m)
25166 .n(n)
25167 .k(k)
25168 .iterations(1)
25169 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
25170 }
25171 }
25172 }
25173 }
25174
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,n_div_8)25175 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, n_div_8) {
25176 for (uint32_t n = 16; n <= 24; n += 8) {
25177 for (size_t k = 1; k <= 20; k += 5) {
25178 GemmMicrokernelTester()
25179 .mr(4)
25180 .nr(8)
25181 .kr(1)
25182 .sr(4)
25183 .m(4)
25184 .n(n)
25185 .k(k)
25186 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
25187 }
25188 }
25189 }
25190
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,n_div_8_strided_cn)25191 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, n_div_8_strided_cn) {
25192 for (uint32_t n = 16; n <= 24; n += 8) {
25193 for (size_t k = 1; k <= 20; k += 5) {
25194 GemmMicrokernelTester()
25195 .mr(4)
25196 .nr(8)
25197 .kr(1)
25198 .sr(4)
25199 .m(4)
25200 .n(n)
25201 .k(k)
25202 .cn_stride(11)
25203 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
25204 }
25205 }
25206 }
25207
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,n_div_8_strided_a)25208 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, n_div_8_strided_a) {
25209 for (uint32_t n = 16; n <= 24; n += 8) {
25210 for (size_t k = 1; k <= 20; k += 5) {
25211 GemmMicrokernelTester()
25212 .mr(4)
25213 .nr(8)
25214 .kr(1)
25215 .sr(4)
25216 .m(4)
25217 .n(n)
25218 .k(k)
25219 .a_stride(23)
25220 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
25221 }
25222 }
25223 }
25224
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,n_div_8_subtile)25225 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, n_div_8_subtile) {
25226 for (uint32_t n = 16; n <= 24; n += 8) {
25227 for (size_t k = 1; k <= 20; k += 5) {
25228 for (uint32_t m = 1; m <= 4; m++) {
25229 GemmMicrokernelTester()
25230 .mr(4)
25231 .nr(8)
25232 .kr(1)
25233 .sr(4)
25234 .m(m)
25235 .n(n)
25236 .k(k)
25237 .iterations(1)
25238 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
25239 }
25240 }
25241 }
25242 }
25243
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,strided_cm_subtile)25244 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, strided_cm_subtile) {
25245 for (size_t k = 1; k <= 20; k += 5) {
25246 for (uint32_t n = 1; n <= 8; n++) {
25247 for (uint32_t m = 1; m <= 4; m++) {
25248 GemmMicrokernelTester()
25249 .mr(4)
25250 .nr(8)
25251 .kr(1)
25252 .sr(4)
25253 .m(m)
25254 .n(n)
25255 .k(k)
25256 .cm_stride(11)
25257 .iterations(1)
25258 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
25259 }
25260 }
25261 }
25262 }
25263
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,qmin)25264 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, qmin) {
25265 GemmMicrokernelTester()
25266 .mr(4)
25267 .nr(8)
25268 .kr(1)
25269 .sr(4)
25270 .m(4)
25271 .n(8)
25272 .k(4)
25273 .qmin(128)
25274 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
25275 }
25276
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,qmax)25277 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, qmax) {
25278 GemmMicrokernelTester()
25279 .mr(4)
25280 .nr(8)
25281 .kr(1)
25282 .sr(4)
25283 .m(4)
25284 .n(8)
25285 .k(4)
25286 .qmax(128)
25287 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
25288 }
25289
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM,strided_cm)25290 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, strided_cm) {
25291 GemmMicrokernelTester()
25292 .mr(4)
25293 .nr(8)
25294 .kr(1)
25295 .sr(4)
25296 .m(4)
25297 .n(8)
25298 .k(4)
25299 .cm_stride(11)
25300 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
25301 }
25302 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25303
25304
25305 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,k_eq_4)25306 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_eq_4) {
25307 GemmMicrokernelTester()
25308 .mr(4)
25309 .nr(8)
25310 .kr(1)
25311 .sr(4)
25312 .m(4)
25313 .n(8)
25314 .k(4)
25315 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25316 }
25317
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,strided_cn)25318 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, strided_cn) {
25319 GemmMicrokernelTester()
25320 .mr(4)
25321 .nr(8)
25322 .kr(1)
25323 .sr(4)
25324 .m(4)
25325 .n(8)
25326 .k(4)
25327 .cn_stride(11)
25328 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25329 }
25330
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,k_eq_4_strided_a)25331 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_eq_4_strided_a) {
25332 GemmMicrokernelTester()
25333 .mr(4)
25334 .nr(8)
25335 .kr(1)
25336 .sr(4)
25337 .m(4)
25338 .n(8)
25339 .k(4)
25340 .a_stride(7)
25341 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25342 }
25343
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,k_eq_4_subtile)25344 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_eq_4_subtile) {
25345 for (uint32_t n = 1; n <= 8; n++) {
25346 for (uint32_t m = 1; m <= 4; m++) {
25347 GemmMicrokernelTester()
25348 .mr(4)
25349 .nr(8)
25350 .kr(1)
25351 .sr(4)
25352 .m(m)
25353 .n(n)
25354 .k(4)
25355 .iterations(1)
25356 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25357 }
25358 }
25359 }
25360
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,k_eq_4_subtile_m)25361 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_eq_4_subtile_m) {
25362 for (uint32_t m = 1; m <= 4; m++) {
25363 GemmMicrokernelTester()
25364 .mr(4)
25365 .nr(8)
25366 .kr(1)
25367 .sr(4)
25368 .m(m)
25369 .n(8)
25370 .k(4)
25371 .iterations(1)
25372 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25373 }
25374 }
25375
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,k_eq_4_subtile_n)25376 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_eq_4_subtile_n) {
25377 for (uint32_t n = 1; n <= 8; n++) {
25378 GemmMicrokernelTester()
25379 .mr(4)
25380 .nr(8)
25381 .kr(1)
25382 .sr(4)
25383 .m(4)
25384 .n(n)
25385 .k(4)
25386 .iterations(1)
25387 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25388 }
25389 }
25390
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,k_lt_4)25391 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_lt_4) {
25392 for (size_t k = 1; k < 4; k++) {
25393 GemmMicrokernelTester()
25394 .mr(4)
25395 .nr(8)
25396 .kr(1)
25397 .sr(4)
25398 .m(4)
25399 .n(8)
25400 .k(k)
25401 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25402 }
25403 }
25404
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,k_lt_4_strided_a)25405 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_lt_4_strided_a) {
25406 for (size_t k = 1; k < 4; k++) {
25407 GemmMicrokernelTester()
25408 .mr(4)
25409 .nr(8)
25410 .kr(1)
25411 .sr(4)
25412 .m(4)
25413 .n(8)
25414 .k(k)
25415 .a_stride(7)
25416 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25417 }
25418 }
25419
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,k_lt_4_subtile)25420 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_lt_4_subtile) {
25421 for (size_t k = 1; k < 4; k++) {
25422 for (uint32_t n = 1; n <= 8; n++) {
25423 for (uint32_t m = 1; m <= 4; m++) {
25424 GemmMicrokernelTester()
25425 .mr(4)
25426 .nr(8)
25427 .kr(1)
25428 .sr(4)
25429 .m(m)
25430 .n(n)
25431 .k(k)
25432 .iterations(1)
25433 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25434 }
25435 }
25436 }
25437 }
25438
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,k_gt_4)25439 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_gt_4) {
25440 for (size_t k = 5; k < 8; k++) {
25441 GemmMicrokernelTester()
25442 .mr(4)
25443 .nr(8)
25444 .kr(1)
25445 .sr(4)
25446 .m(4)
25447 .n(8)
25448 .k(k)
25449 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25450 }
25451 }
25452
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,k_gt_4_strided_a)25453 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_gt_4_strided_a) {
25454 for (size_t k = 5; k < 8; k++) {
25455 GemmMicrokernelTester()
25456 .mr(4)
25457 .nr(8)
25458 .kr(1)
25459 .sr(4)
25460 .m(4)
25461 .n(8)
25462 .k(k)
25463 .a_stride(11)
25464 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25465 }
25466 }
25467
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,k_gt_4_subtile)25468 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_gt_4_subtile) {
25469 for (size_t k = 5; k < 8; k++) {
25470 for (uint32_t n = 1; n <= 8; n++) {
25471 for (uint32_t m = 1; m <= 4; m++) {
25472 GemmMicrokernelTester()
25473 .mr(4)
25474 .nr(8)
25475 .kr(1)
25476 .sr(4)
25477 .m(m)
25478 .n(n)
25479 .k(k)
25480 .iterations(1)
25481 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25482 }
25483 }
25484 }
25485 }
25486
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,k_div_4)25487 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_div_4) {
25488 for (size_t k = 8; k <= 40; k += 4) {
25489 GemmMicrokernelTester()
25490 .mr(4)
25491 .nr(8)
25492 .kr(1)
25493 .sr(4)
25494 .m(4)
25495 .n(8)
25496 .k(k)
25497 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25498 }
25499 }
25500
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,k_div_4_strided_a)25501 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_div_4_strided_a) {
25502 for (size_t k = 8; k <= 40; k += 4) {
25503 GemmMicrokernelTester()
25504 .mr(4)
25505 .nr(8)
25506 .kr(1)
25507 .sr(4)
25508 .m(4)
25509 .n(8)
25510 .k(k)
25511 .a_stride(43)
25512 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25513 }
25514 }
25515
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,k_div_4_subtile)25516 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_div_4_subtile) {
25517 for (size_t k = 8; k <= 40; k += 4) {
25518 for (uint32_t n = 1; n <= 8; n++) {
25519 for (uint32_t m = 1; m <= 4; m++) {
25520 GemmMicrokernelTester()
25521 .mr(4)
25522 .nr(8)
25523 .kr(1)
25524 .sr(4)
25525 .m(m)
25526 .n(n)
25527 .k(k)
25528 .iterations(1)
25529 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25530 }
25531 }
25532 }
25533 }
25534
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,n_gt_8)25535 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, n_gt_8) {
25536 for (uint32_t n = 9; n < 16; n++) {
25537 for (size_t k = 1; k <= 20; k += 5) {
25538 GemmMicrokernelTester()
25539 .mr(4)
25540 .nr(8)
25541 .kr(1)
25542 .sr(4)
25543 .m(4)
25544 .n(n)
25545 .k(k)
25546 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25547 }
25548 }
25549 }
25550
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,n_gt_8_strided_cn)25551 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, n_gt_8_strided_cn) {
25552 for (uint32_t n = 9; n < 16; n++) {
25553 for (size_t k = 1; k <= 20; k += 5) {
25554 GemmMicrokernelTester()
25555 .mr(4)
25556 .nr(8)
25557 .kr(1)
25558 .sr(4)
25559 .m(4)
25560 .n(n)
25561 .k(k)
25562 .cn_stride(11)
25563 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25564 }
25565 }
25566 }
25567
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,n_gt_8_strided_a)25568 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, n_gt_8_strided_a) {
25569 for (uint32_t n = 9; n < 16; n++) {
25570 for (size_t k = 1; k <= 20; k += 5) {
25571 GemmMicrokernelTester()
25572 .mr(4)
25573 .nr(8)
25574 .kr(1)
25575 .sr(4)
25576 .m(4)
25577 .n(n)
25578 .k(k)
25579 .a_stride(23)
25580 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25581 }
25582 }
25583 }
25584
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,n_gt_8_subtile)25585 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, n_gt_8_subtile) {
25586 for (uint32_t n = 9; n < 16; n++) {
25587 for (size_t k = 1; k <= 20; k += 5) {
25588 for (uint32_t m = 1; m <= 4; m++) {
25589 GemmMicrokernelTester()
25590 .mr(4)
25591 .nr(8)
25592 .kr(1)
25593 .sr(4)
25594 .m(m)
25595 .n(n)
25596 .k(k)
25597 .iterations(1)
25598 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25599 }
25600 }
25601 }
25602 }
25603
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,n_div_8)25604 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, n_div_8) {
25605 for (uint32_t n = 16; n <= 24; n += 8) {
25606 for (size_t k = 1; k <= 20; k += 5) {
25607 GemmMicrokernelTester()
25608 .mr(4)
25609 .nr(8)
25610 .kr(1)
25611 .sr(4)
25612 .m(4)
25613 .n(n)
25614 .k(k)
25615 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25616 }
25617 }
25618 }
25619
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,n_div_8_strided_cn)25620 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, n_div_8_strided_cn) {
25621 for (uint32_t n = 16; n <= 24; n += 8) {
25622 for (size_t k = 1; k <= 20; k += 5) {
25623 GemmMicrokernelTester()
25624 .mr(4)
25625 .nr(8)
25626 .kr(1)
25627 .sr(4)
25628 .m(4)
25629 .n(n)
25630 .k(k)
25631 .cn_stride(11)
25632 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25633 }
25634 }
25635 }
25636
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,n_div_8_strided_a)25637 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, n_div_8_strided_a) {
25638 for (uint32_t n = 16; n <= 24; n += 8) {
25639 for (size_t k = 1; k <= 20; k += 5) {
25640 GemmMicrokernelTester()
25641 .mr(4)
25642 .nr(8)
25643 .kr(1)
25644 .sr(4)
25645 .m(4)
25646 .n(n)
25647 .k(k)
25648 .a_stride(23)
25649 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25650 }
25651 }
25652 }
25653
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,n_div_8_subtile)25654 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, n_div_8_subtile) {
25655 for (uint32_t n = 16; n <= 24; n += 8) {
25656 for (size_t k = 1; k <= 20; k += 5) {
25657 for (uint32_t m = 1; m <= 4; m++) {
25658 GemmMicrokernelTester()
25659 .mr(4)
25660 .nr(8)
25661 .kr(1)
25662 .sr(4)
25663 .m(m)
25664 .n(n)
25665 .k(k)
25666 .iterations(1)
25667 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25668 }
25669 }
25670 }
25671 }
25672
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,strided_cm_subtile)25673 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, strided_cm_subtile) {
25674 for (size_t k = 1; k <= 20; k += 5) {
25675 for (uint32_t n = 1; n <= 8; n++) {
25676 for (uint32_t m = 1; m <= 4; m++) {
25677 GemmMicrokernelTester()
25678 .mr(4)
25679 .nr(8)
25680 .kr(1)
25681 .sr(4)
25682 .m(m)
25683 .n(n)
25684 .k(k)
25685 .cm_stride(11)
25686 .iterations(1)
25687 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25688 }
25689 }
25690 }
25691 }
25692
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,qmin)25693 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, qmin) {
25694 GemmMicrokernelTester()
25695 .mr(4)
25696 .nr(8)
25697 .kr(1)
25698 .sr(4)
25699 .m(4)
25700 .n(8)
25701 .k(4)
25702 .qmin(128)
25703 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25704 }
25705
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,qmax)25706 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, qmax) {
25707 GemmMicrokernelTester()
25708 .mr(4)
25709 .nr(8)
25710 .kr(1)
25711 .sr(4)
25712 .m(4)
25713 .n(8)
25714 .k(4)
25715 .qmax(128)
25716 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25717 }
25718
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86,strided_cm)25719 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, strided_cm) {
25720 GemmMicrokernelTester()
25721 .mr(4)
25722 .nr(8)
25723 .kr(1)
25724 .sr(4)
25725 .m(4)
25726 .n(8)
25727 .k(4)
25728 .cm_stride(11)
25729 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25730 }
25731 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25732
25733
25734 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1)25735 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1) {
25736 GemmMicrokernelTester()
25737 .mr(5)
25738 .nr(8)
25739 .kr(1)
25740 .sr(1)
25741 .m(5)
25742 .n(8)
25743 .k(1)
25744 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25745 }
25746
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,strided_cn)25747 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, strided_cn) {
25748 GemmMicrokernelTester()
25749 .mr(5)
25750 .nr(8)
25751 .kr(1)
25752 .sr(1)
25753 .m(5)
25754 .n(8)
25755 .k(1)
25756 .cn_stride(11)
25757 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25758 }
25759
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_strided_a)25760 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_strided_a) {
25761 GemmMicrokernelTester()
25762 .mr(5)
25763 .nr(8)
25764 .kr(1)
25765 .sr(1)
25766 .m(5)
25767 .n(8)
25768 .k(1)
25769 .a_stride(3)
25770 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25771 }
25772
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile)25773 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile) {
25774 for (uint32_t n = 1; n <= 8; n++) {
25775 for (uint32_t m = 1; m <= 5; m++) {
25776 GemmMicrokernelTester()
25777 .mr(5)
25778 .nr(8)
25779 .kr(1)
25780 .sr(1)
25781 .m(m)
25782 .n(n)
25783 .k(1)
25784 .iterations(1)
25785 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25786 }
25787 }
25788 }
25789
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile_m)25790 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_m) {
25791 for (uint32_t m = 1; m <= 5; m++) {
25792 GemmMicrokernelTester()
25793 .mr(5)
25794 .nr(8)
25795 .kr(1)
25796 .sr(1)
25797 .m(m)
25798 .n(8)
25799 .k(1)
25800 .iterations(1)
25801 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25802 }
25803 }
25804
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile_n)25805 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_n) {
25806 for (uint32_t n = 1; n <= 8; n++) {
25807 GemmMicrokernelTester()
25808 .mr(5)
25809 .nr(8)
25810 .kr(1)
25811 .sr(1)
25812 .m(5)
25813 .n(n)
25814 .k(1)
25815 .iterations(1)
25816 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25817 }
25818 }
25819
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1)25820 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1) {
25821 for (size_t k = 2; k < 10; k++) {
25822 GemmMicrokernelTester()
25823 .mr(5)
25824 .nr(8)
25825 .kr(1)
25826 .sr(1)
25827 .m(5)
25828 .n(8)
25829 .k(k)
25830 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25831 }
25832 }
25833
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1_strided_a)25834 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_strided_a) {
25835 for (size_t k = 2; k < 10; k++) {
25836 GemmMicrokernelTester()
25837 .mr(5)
25838 .nr(8)
25839 .kr(1)
25840 .sr(1)
25841 .m(5)
25842 .n(8)
25843 .k(k)
25844 .a_stride(11)
25845 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25846 }
25847 }
25848
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1_subtile)25849 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_subtile) {
25850 for (size_t k = 2; k < 10; k++) {
25851 for (uint32_t n = 1; n <= 8; n++) {
25852 for (uint32_t m = 1; m <= 5; m++) {
25853 GemmMicrokernelTester()
25854 .mr(5)
25855 .nr(8)
25856 .kr(1)
25857 .sr(1)
25858 .m(m)
25859 .n(n)
25860 .k(k)
25861 .iterations(1)
25862 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25863 }
25864 }
25865 }
25866 }
25867
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8)25868 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8) {
25869 for (uint32_t n = 9; n < 16; n++) {
25870 for (size_t k = 1; k <= 5; k += 2) {
25871 GemmMicrokernelTester()
25872 .mr(5)
25873 .nr(8)
25874 .kr(1)
25875 .sr(1)
25876 .m(5)
25877 .n(n)
25878 .k(k)
25879 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25880 }
25881 }
25882 }
25883
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_strided_cn)25884 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_cn) {
25885 for (uint32_t n = 9; n < 16; n++) {
25886 for (size_t k = 1; k <= 5; k += 2) {
25887 GemmMicrokernelTester()
25888 .mr(5)
25889 .nr(8)
25890 .kr(1)
25891 .sr(1)
25892 .m(5)
25893 .n(n)
25894 .k(k)
25895 .cn_stride(11)
25896 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25897 }
25898 }
25899 }
25900
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_strided_a)25901 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_a) {
25902 for (uint32_t n = 9; n < 16; n++) {
25903 for (size_t k = 1; k <= 5; k += 2) {
25904 GemmMicrokernelTester()
25905 .mr(5)
25906 .nr(8)
25907 .kr(1)
25908 .sr(1)
25909 .m(5)
25910 .n(n)
25911 .k(k)
25912 .a_stride(7)
25913 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25914 }
25915 }
25916 }
25917
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_subtile)25918 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_subtile) {
25919 for (uint32_t n = 9; n < 16; n++) {
25920 for (size_t k = 1; k <= 5; k += 2) {
25921 for (uint32_t m = 1; m <= 5; m++) {
25922 GemmMicrokernelTester()
25923 .mr(5)
25924 .nr(8)
25925 .kr(1)
25926 .sr(1)
25927 .m(m)
25928 .n(n)
25929 .k(k)
25930 .iterations(1)
25931 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25932 }
25933 }
25934 }
25935 }
25936
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,n_div_8)25937 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_div_8) {
25938 for (uint32_t n = 16; n <= 24; n += 8) {
25939 for (size_t k = 1; k <= 5; k += 2) {
25940 GemmMicrokernelTester()
25941 .mr(5)
25942 .nr(8)
25943 .kr(1)
25944 .sr(1)
25945 .m(5)
25946 .n(n)
25947 .k(k)
25948 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25949 }
25950 }
25951 }
25952
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_strided_cn)25953 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_cn) {
25954 for (uint32_t n = 16; n <= 24; n += 8) {
25955 for (size_t k = 1; k <= 5; k += 2) {
25956 GemmMicrokernelTester()
25957 .mr(5)
25958 .nr(8)
25959 .kr(1)
25960 .sr(1)
25961 .m(5)
25962 .n(n)
25963 .k(k)
25964 .cn_stride(11)
25965 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25966 }
25967 }
25968 }
25969
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_strided_a)25970 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_a) {
25971 for (uint32_t n = 16; n <= 24; n += 8) {
25972 for (size_t k = 1; k <= 5; k += 2) {
25973 GemmMicrokernelTester()
25974 .mr(5)
25975 .nr(8)
25976 .kr(1)
25977 .sr(1)
25978 .m(5)
25979 .n(n)
25980 .k(k)
25981 .a_stride(7)
25982 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25983 }
25984 }
25985 }
25986
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_subtile)25987 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_subtile) {
25988 for (uint32_t n = 16; n <= 24; n += 8) {
25989 for (size_t k = 1; k <= 5; k += 2) {
25990 for (uint32_t m = 1; m <= 5; m++) {
25991 GemmMicrokernelTester()
25992 .mr(5)
25993 .nr(8)
25994 .kr(1)
25995 .sr(1)
25996 .m(m)
25997 .n(n)
25998 .k(k)
25999 .iterations(1)
26000 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26001 }
26002 }
26003 }
26004 }
26005
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,strided_cm_subtile)26006 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, strided_cm_subtile) {
26007 for (size_t k = 1; k <= 5; k += 2) {
26008 for (uint32_t n = 1; n <= 8; n++) {
26009 for (uint32_t m = 1; m <= 5; m++) {
26010 GemmMicrokernelTester()
26011 .mr(5)
26012 .nr(8)
26013 .kr(1)
26014 .sr(1)
26015 .m(m)
26016 .n(n)
26017 .k(k)
26018 .cm_stride(11)
26019 .iterations(1)
26020 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26021 }
26022 }
26023 }
26024 }
26025
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,qmin)26026 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, qmin) {
26027 GemmMicrokernelTester()
26028 .mr(5)
26029 .nr(8)
26030 .kr(1)
26031 .sr(1)
26032 .m(5)
26033 .n(8)
26034 .k(1)
26035 .qmin(128)
26036 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26037 }
26038
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,qmax)26039 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, qmax) {
26040 GemmMicrokernelTester()
26041 .mr(5)
26042 .nr(8)
26043 .kr(1)
26044 .sr(1)
26045 .m(5)
26046 .n(8)
26047 .k(1)
26048 .qmax(128)
26049 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26050 }
26051
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT,strided_cm)26052 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, strided_cm) {
26053 GemmMicrokernelTester()
26054 .mr(5)
26055 .nr(8)
26056 .kr(1)
26057 .sr(1)
26058 .m(5)
26059 .n(8)
26060 .k(1)
26061 .cm_stride(11)
26062 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26063 }
26064 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26065
26066
26067 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,k_eq_4)26068 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_eq_4) {
26069 GemmMicrokernelTester()
26070 .mr(5)
26071 .nr(8)
26072 .kr(1)
26073 .sr(1)
26074 .m(5)
26075 .n(8)
26076 .k(4)
26077 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26078 }
26079
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,strided_cn)26080 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, strided_cn) {
26081 GemmMicrokernelTester()
26082 .mr(5)
26083 .nr(8)
26084 .kr(1)
26085 .sr(1)
26086 .m(5)
26087 .n(8)
26088 .k(4)
26089 .cn_stride(11)
26090 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26091 }
26092
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,k_eq_4_strided_a)26093 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_eq_4_strided_a) {
26094 GemmMicrokernelTester()
26095 .mr(5)
26096 .nr(8)
26097 .kr(1)
26098 .sr(1)
26099 .m(5)
26100 .n(8)
26101 .k(4)
26102 .a_stride(7)
26103 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26104 }
26105
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile)26106 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile) {
26107 for (uint32_t n = 1; n <= 8; n++) {
26108 for (uint32_t m = 1; m <= 5; m++) {
26109 GemmMicrokernelTester()
26110 .mr(5)
26111 .nr(8)
26112 .kr(1)
26113 .sr(1)
26114 .m(m)
26115 .n(n)
26116 .k(4)
26117 .iterations(1)
26118 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26119 }
26120 }
26121 }
26122
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile_m)26123 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_m) {
26124 for (uint32_t m = 1; m <= 5; m++) {
26125 GemmMicrokernelTester()
26126 .mr(5)
26127 .nr(8)
26128 .kr(1)
26129 .sr(1)
26130 .m(m)
26131 .n(8)
26132 .k(4)
26133 .iterations(1)
26134 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26135 }
26136 }
26137
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile_n)26138 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_n) {
26139 for (uint32_t n = 1; n <= 8; n++) {
26140 GemmMicrokernelTester()
26141 .mr(5)
26142 .nr(8)
26143 .kr(1)
26144 .sr(1)
26145 .m(5)
26146 .n(n)
26147 .k(4)
26148 .iterations(1)
26149 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26150 }
26151 }
26152
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,k_lt_4)26153 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_lt_4) {
26154 for (size_t k = 1; k < 4; k++) {
26155 GemmMicrokernelTester()
26156 .mr(5)
26157 .nr(8)
26158 .kr(1)
26159 .sr(1)
26160 .m(5)
26161 .n(8)
26162 .k(k)
26163 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26164 }
26165 }
26166
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,k_lt_4_strided_a)26167 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_lt_4_strided_a) {
26168 for (size_t k = 1; k < 4; k++) {
26169 GemmMicrokernelTester()
26170 .mr(5)
26171 .nr(8)
26172 .kr(1)
26173 .sr(1)
26174 .m(5)
26175 .n(8)
26176 .k(k)
26177 .a_stride(7)
26178 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26179 }
26180 }
26181
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,k_lt_4_subtile)26182 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_lt_4_subtile) {
26183 for (size_t k = 1; k < 4; k++) {
26184 for (uint32_t n = 1; n <= 8; n++) {
26185 for (uint32_t m = 1; m <= 5; m++) {
26186 GemmMicrokernelTester()
26187 .mr(5)
26188 .nr(8)
26189 .kr(1)
26190 .sr(1)
26191 .m(m)
26192 .n(n)
26193 .k(k)
26194 .iterations(1)
26195 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26196 }
26197 }
26198 }
26199 }
26200
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,k_gt_4)26201 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_gt_4) {
26202 for (size_t k = 5; k < 8; k++) {
26203 GemmMicrokernelTester()
26204 .mr(5)
26205 .nr(8)
26206 .kr(1)
26207 .sr(1)
26208 .m(5)
26209 .n(8)
26210 .k(k)
26211 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26212 }
26213 }
26214
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,k_gt_4_strided_a)26215 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_gt_4_strided_a) {
26216 for (size_t k = 5; k < 8; k++) {
26217 GemmMicrokernelTester()
26218 .mr(5)
26219 .nr(8)
26220 .kr(1)
26221 .sr(1)
26222 .m(5)
26223 .n(8)
26224 .k(k)
26225 .a_stride(11)
26226 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26227 }
26228 }
26229
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,k_gt_4_subtile)26230 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_gt_4_subtile) {
26231 for (size_t k = 5; k < 8; k++) {
26232 for (uint32_t n = 1; n <= 8; n++) {
26233 for (uint32_t m = 1; m <= 5; m++) {
26234 GemmMicrokernelTester()
26235 .mr(5)
26236 .nr(8)
26237 .kr(1)
26238 .sr(1)
26239 .m(m)
26240 .n(n)
26241 .k(k)
26242 .iterations(1)
26243 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26244 }
26245 }
26246 }
26247 }
26248
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,k_div_4)26249 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_div_4) {
26250 for (size_t k = 8; k <= 40; k += 4) {
26251 GemmMicrokernelTester()
26252 .mr(5)
26253 .nr(8)
26254 .kr(1)
26255 .sr(1)
26256 .m(5)
26257 .n(8)
26258 .k(k)
26259 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26260 }
26261 }
26262
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,k_div_4_strided_a)26263 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_div_4_strided_a) {
26264 for (size_t k = 8; k <= 40; k += 4) {
26265 GemmMicrokernelTester()
26266 .mr(5)
26267 .nr(8)
26268 .kr(1)
26269 .sr(1)
26270 .m(5)
26271 .n(8)
26272 .k(k)
26273 .a_stride(43)
26274 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26275 }
26276 }
26277
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,k_div_4_subtile)26278 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_div_4_subtile) {
26279 for (size_t k = 8; k <= 40; k += 4) {
26280 for (uint32_t n = 1; n <= 8; n++) {
26281 for (uint32_t m = 1; m <= 5; m++) {
26282 GemmMicrokernelTester()
26283 .mr(5)
26284 .nr(8)
26285 .kr(1)
26286 .sr(1)
26287 .m(m)
26288 .n(n)
26289 .k(k)
26290 .iterations(1)
26291 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26292 }
26293 }
26294 }
26295 }
26296
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,n_gt_8)26297 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
26298 for (uint32_t n = 9; n < 16; n++) {
26299 for (size_t k = 1; k <= 20; k += 5) {
26300 GemmMicrokernelTester()
26301 .mr(5)
26302 .nr(8)
26303 .kr(1)
26304 .sr(1)
26305 .m(5)
26306 .n(n)
26307 .k(k)
26308 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26309 }
26310 }
26311 }
26312
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,n_gt_8_strided_cn)26313 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
26314 for (uint32_t n = 9; n < 16; n++) {
26315 for (size_t k = 1; k <= 20; k += 5) {
26316 GemmMicrokernelTester()
26317 .mr(5)
26318 .nr(8)
26319 .kr(1)
26320 .sr(1)
26321 .m(5)
26322 .n(n)
26323 .k(k)
26324 .cn_stride(11)
26325 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26326 }
26327 }
26328 }
26329
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,n_gt_8_strided_a)26330 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_a) {
26331 for (uint32_t n = 9; n < 16; n++) {
26332 for (size_t k = 1; k <= 20; k += 5) {
26333 GemmMicrokernelTester()
26334 .mr(5)
26335 .nr(8)
26336 .kr(1)
26337 .sr(1)
26338 .m(5)
26339 .n(n)
26340 .k(k)
26341 .a_stride(23)
26342 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26343 }
26344 }
26345 }
26346
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,n_gt_8_subtile)26347 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
26348 for (uint32_t n = 9; n < 16; n++) {
26349 for (size_t k = 1; k <= 20; k += 5) {
26350 for (uint32_t m = 1; m <= 5; m++) {
26351 GemmMicrokernelTester()
26352 .mr(5)
26353 .nr(8)
26354 .kr(1)
26355 .sr(1)
26356 .m(m)
26357 .n(n)
26358 .k(k)
26359 .iterations(1)
26360 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26361 }
26362 }
26363 }
26364 }
26365
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,n_div_8)26366 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_div_8) {
26367 for (uint32_t n = 16; n <= 24; n += 8) {
26368 for (size_t k = 1; k <= 20; k += 5) {
26369 GemmMicrokernelTester()
26370 .mr(5)
26371 .nr(8)
26372 .kr(1)
26373 .sr(1)
26374 .m(5)
26375 .n(n)
26376 .k(k)
26377 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26378 }
26379 }
26380 }
26381
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,n_div_8_strided_cn)26382 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
26383 for (uint32_t n = 16; n <= 24; n += 8) {
26384 for (size_t k = 1; k <= 20; k += 5) {
26385 GemmMicrokernelTester()
26386 .mr(5)
26387 .nr(8)
26388 .kr(1)
26389 .sr(1)
26390 .m(5)
26391 .n(n)
26392 .k(k)
26393 .cn_stride(11)
26394 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26395 }
26396 }
26397 }
26398
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,n_div_8_strided_a)26399 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_a) {
26400 for (uint32_t n = 16; n <= 24; n += 8) {
26401 for (size_t k = 1; k <= 20; k += 5) {
26402 GemmMicrokernelTester()
26403 .mr(5)
26404 .nr(8)
26405 .kr(1)
26406 .sr(1)
26407 .m(5)
26408 .n(n)
26409 .k(k)
26410 .a_stride(23)
26411 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26412 }
26413 }
26414 }
26415
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,n_div_8_subtile)26416 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
26417 for (uint32_t n = 16; n <= 24; n += 8) {
26418 for (size_t k = 1; k <= 20; k += 5) {
26419 for (uint32_t m = 1; m <= 5; m++) {
26420 GemmMicrokernelTester()
26421 .mr(5)
26422 .nr(8)
26423 .kr(1)
26424 .sr(1)
26425 .m(m)
26426 .n(n)
26427 .k(k)
26428 .iterations(1)
26429 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26430 }
26431 }
26432 }
26433 }
26434
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,strided_cm_subtile)26435 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
26436 for (size_t k = 1; k <= 20; k += 5) {
26437 for (uint32_t n = 1; n <= 8; n++) {
26438 for (uint32_t m = 1; m <= 5; m++) {
26439 GemmMicrokernelTester()
26440 .mr(5)
26441 .nr(8)
26442 .kr(1)
26443 .sr(1)
26444 .m(m)
26445 .n(n)
26446 .k(k)
26447 .cm_stride(11)
26448 .iterations(1)
26449 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26450 }
26451 }
26452 }
26453 }
26454
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,qmin)26455 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, qmin) {
26456 GemmMicrokernelTester()
26457 .mr(5)
26458 .nr(8)
26459 .kr(1)
26460 .sr(1)
26461 .m(5)
26462 .n(8)
26463 .k(4)
26464 .qmin(128)
26465 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26466 }
26467
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,qmax)26468 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, qmax) {
26469 GemmMicrokernelTester()
26470 .mr(5)
26471 .nr(8)
26472 .kr(1)
26473 .sr(1)
26474 .m(5)
26475 .n(8)
26476 .k(4)
26477 .qmax(128)
26478 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26479 }
26480
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT,strided_cm)26481 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, strided_cm) {
26482 GemmMicrokernelTester()
26483 .mr(5)
26484 .nr(8)
26485 .kr(1)
26486 .sr(1)
26487 .m(5)
26488 .n(8)
26489 .k(4)
26490 .cm_stride(11)
26491 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26492 }
26493 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26494
26495
26496 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,k_eq_4)26497 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_eq_4) {
26498 GemmMicrokernelTester()
26499 .mr(5)
26500 .nr(8)
26501 .kr(1)
26502 .sr(1)
26503 .m(5)
26504 .n(8)
26505 .k(4)
26506 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26507 }
26508
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,strided_cn)26509 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, strided_cn) {
26510 GemmMicrokernelTester()
26511 .mr(5)
26512 .nr(8)
26513 .kr(1)
26514 .sr(1)
26515 .m(5)
26516 .n(8)
26517 .k(4)
26518 .cn_stride(11)
26519 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26520 }
26521
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,k_eq_4_strided_a)26522 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_eq_4_strided_a) {
26523 GemmMicrokernelTester()
26524 .mr(5)
26525 .nr(8)
26526 .kr(1)
26527 .sr(1)
26528 .m(5)
26529 .n(8)
26530 .k(4)
26531 .a_stride(7)
26532 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26533 }
26534
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile)26535 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile) {
26536 for (uint32_t n = 1; n <= 8; n++) {
26537 for (uint32_t m = 1; m <= 5; m++) {
26538 GemmMicrokernelTester()
26539 .mr(5)
26540 .nr(8)
26541 .kr(1)
26542 .sr(1)
26543 .m(m)
26544 .n(n)
26545 .k(4)
26546 .iterations(1)
26547 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26548 }
26549 }
26550 }
26551
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile_m)26552 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_m) {
26553 for (uint32_t m = 1; m <= 5; m++) {
26554 GemmMicrokernelTester()
26555 .mr(5)
26556 .nr(8)
26557 .kr(1)
26558 .sr(1)
26559 .m(m)
26560 .n(8)
26561 .k(4)
26562 .iterations(1)
26563 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26564 }
26565 }
26566
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile_n)26567 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_n) {
26568 for (uint32_t n = 1; n <= 8; n++) {
26569 GemmMicrokernelTester()
26570 .mr(5)
26571 .nr(8)
26572 .kr(1)
26573 .sr(1)
26574 .m(5)
26575 .n(n)
26576 .k(4)
26577 .iterations(1)
26578 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26579 }
26580 }
26581
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,k_lt_4)26582 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_lt_4) {
26583 for (size_t k = 1; k < 4; k++) {
26584 GemmMicrokernelTester()
26585 .mr(5)
26586 .nr(8)
26587 .kr(1)
26588 .sr(1)
26589 .m(5)
26590 .n(8)
26591 .k(k)
26592 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26593 }
26594 }
26595
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,k_lt_4_strided_a)26596 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_lt_4_strided_a) {
26597 for (size_t k = 1; k < 4; k++) {
26598 GemmMicrokernelTester()
26599 .mr(5)
26600 .nr(8)
26601 .kr(1)
26602 .sr(1)
26603 .m(5)
26604 .n(8)
26605 .k(k)
26606 .a_stride(7)
26607 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26608 }
26609 }
26610
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,k_lt_4_subtile)26611 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_lt_4_subtile) {
26612 for (size_t k = 1; k < 4; k++) {
26613 for (uint32_t n = 1; n <= 8; n++) {
26614 for (uint32_t m = 1; m <= 5; m++) {
26615 GemmMicrokernelTester()
26616 .mr(5)
26617 .nr(8)
26618 .kr(1)
26619 .sr(1)
26620 .m(m)
26621 .n(n)
26622 .k(k)
26623 .iterations(1)
26624 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26625 }
26626 }
26627 }
26628 }
26629
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,k_gt_4)26630 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_gt_4) {
26631 for (size_t k = 5; k < 8; k++) {
26632 GemmMicrokernelTester()
26633 .mr(5)
26634 .nr(8)
26635 .kr(1)
26636 .sr(1)
26637 .m(5)
26638 .n(8)
26639 .k(k)
26640 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26641 }
26642 }
26643
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,k_gt_4_strided_a)26644 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_gt_4_strided_a) {
26645 for (size_t k = 5; k < 8; k++) {
26646 GemmMicrokernelTester()
26647 .mr(5)
26648 .nr(8)
26649 .kr(1)
26650 .sr(1)
26651 .m(5)
26652 .n(8)
26653 .k(k)
26654 .a_stride(11)
26655 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26656 }
26657 }
26658
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,k_gt_4_subtile)26659 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_gt_4_subtile) {
26660 for (size_t k = 5; k < 8; k++) {
26661 for (uint32_t n = 1; n <= 8; n++) {
26662 for (uint32_t m = 1; m <= 5; m++) {
26663 GemmMicrokernelTester()
26664 .mr(5)
26665 .nr(8)
26666 .kr(1)
26667 .sr(1)
26668 .m(m)
26669 .n(n)
26670 .k(k)
26671 .iterations(1)
26672 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26673 }
26674 }
26675 }
26676 }
26677
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,k_div_4)26678 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_div_4) {
26679 for (size_t k = 8; k <= 40; k += 4) {
26680 GemmMicrokernelTester()
26681 .mr(5)
26682 .nr(8)
26683 .kr(1)
26684 .sr(1)
26685 .m(5)
26686 .n(8)
26687 .k(k)
26688 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26689 }
26690 }
26691
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,k_div_4_strided_a)26692 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_div_4_strided_a) {
26693 for (size_t k = 8; k <= 40; k += 4) {
26694 GemmMicrokernelTester()
26695 .mr(5)
26696 .nr(8)
26697 .kr(1)
26698 .sr(1)
26699 .m(5)
26700 .n(8)
26701 .k(k)
26702 .a_stride(43)
26703 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26704 }
26705 }
26706
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,k_div_4_subtile)26707 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_div_4_subtile) {
26708 for (size_t k = 8; k <= 40; k += 4) {
26709 for (uint32_t n = 1; n <= 8; n++) {
26710 for (uint32_t m = 1; m <= 5; m++) {
26711 GemmMicrokernelTester()
26712 .mr(5)
26713 .nr(8)
26714 .kr(1)
26715 .sr(1)
26716 .m(m)
26717 .n(n)
26718 .k(k)
26719 .iterations(1)
26720 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26721 }
26722 }
26723 }
26724 }
26725
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,n_gt_8)26726 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_gt_8) {
26727 for (uint32_t n = 9; n < 16; n++) {
26728 for (size_t k = 1; k <= 20; k += 5) {
26729 GemmMicrokernelTester()
26730 .mr(5)
26731 .nr(8)
26732 .kr(1)
26733 .sr(1)
26734 .m(5)
26735 .n(n)
26736 .k(k)
26737 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26738 }
26739 }
26740 }
26741
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,n_gt_8_strided_cn)26742 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
26743 for (uint32_t n = 9; n < 16; n++) {
26744 for (size_t k = 1; k <= 20; k += 5) {
26745 GemmMicrokernelTester()
26746 .mr(5)
26747 .nr(8)
26748 .kr(1)
26749 .sr(1)
26750 .m(5)
26751 .n(n)
26752 .k(k)
26753 .cn_stride(11)
26754 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26755 }
26756 }
26757 }
26758
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,n_gt_8_strided_a)26759 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_a) {
26760 for (uint32_t n = 9; n < 16; n++) {
26761 for (size_t k = 1; k <= 20; k += 5) {
26762 GemmMicrokernelTester()
26763 .mr(5)
26764 .nr(8)
26765 .kr(1)
26766 .sr(1)
26767 .m(5)
26768 .n(n)
26769 .k(k)
26770 .a_stride(23)
26771 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26772 }
26773 }
26774 }
26775
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,n_gt_8_subtile)26776 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
26777 for (uint32_t n = 9; n < 16; n++) {
26778 for (size_t k = 1; k <= 20; k += 5) {
26779 for (uint32_t m = 1; m <= 5; m++) {
26780 GemmMicrokernelTester()
26781 .mr(5)
26782 .nr(8)
26783 .kr(1)
26784 .sr(1)
26785 .m(m)
26786 .n(n)
26787 .k(k)
26788 .iterations(1)
26789 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26790 }
26791 }
26792 }
26793 }
26794
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,n_div_8)26795 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_div_8) {
26796 for (uint32_t n = 16; n <= 24; n += 8) {
26797 for (size_t k = 1; k <= 20; k += 5) {
26798 GemmMicrokernelTester()
26799 .mr(5)
26800 .nr(8)
26801 .kr(1)
26802 .sr(1)
26803 .m(5)
26804 .n(n)
26805 .k(k)
26806 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26807 }
26808 }
26809 }
26810
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,n_div_8_strided_cn)26811 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
26812 for (uint32_t n = 16; n <= 24; n += 8) {
26813 for (size_t k = 1; k <= 20; k += 5) {
26814 GemmMicrokernelTester()
26815 .mr(5)
26816 .nr(8)
26817 .kr(1)
26818 .sr(1)
26819 .m(5)
26820 .n(n)
26821 .k(k)
26822 .cn_stride(11)
26823 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26824 }
26825 }
26826 }
26827
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,n_div_8_strided_a)26828 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_div_8_strided_a) {
26829 for (uint32_t n = 16; n <= 24; n += 8) {
26830 for (size_t k = 1; k <= 20; k += 5) {
26831 GemmMicrokernelTester()
26832 .mr(5)
26833 .nr(8)
26834 .kr(1)
26835 .sr(1)
26836 .m(5)
26837 .n(n)
26838 .k(k)
26839 .a_stride(23)
26840 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26841 }
26842 }
26843 }
26844
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,n_div_8_subtile)26845 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
26846 for (uint32_t n = 16; n <= 24; n += 8) {
26847 for (size_t k = 1; k <= 20; k += 5) {
26848 for (uint32_t m = 1; m <= 5; m++) {
26849 GemmMicrokernelTester()
26850 .mr(5)
26851 .nr(8)
26852 .kr(1)
26853 .sr(1)
26854 .m(m)
26855 .n(n)
26856 .k(k)
26857 .iterations(1)
26858 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26859 }
26860 }
26861 }
26862 }
26863
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,strided_cm_subtile)26864 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
26865 for (size_t k = 1; k <= 20; k += 5) {
26866 for (uint32_t n = 1; n <= 8; n++) {
26867 for (uint32_t m = 1; m <= 5; m++) {
26868 GemmMicrokernelTester()
26869 .mr(5)
26870 .nr(8)
26871 .kr(1)
26872 .sr(1)
26873 .m(m)
26874 .n(n)
26875 .k(k)
26876 .cm_stride(11)
26877 .iterations(1)
26878 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26879 }
26880 }
26881 }
26882 }
26883
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,qmin)26884 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, qmin) {
26885 GemmMicrokernelTester()
26886 .mr(5)
26887 .nr(8)
26888 .kr(1)
26889 .sr(1)
26890 .m(5)
26891 .n(8)
26892 .k(4)
26893 .qmin(128)
26894 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26895 }
26896
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,qmax)26897 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, qmax) {
26898 GemmMicrokernelTester()
26899 .mr(5)
26900 .nr(8)
26901 .kr(1)
26902 .sr(1)
26903 .m(5)
26904 .n(8)
26905 .k(4)
26906 .qmax(128)
26907 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26908 }
26909
TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT,strided_cm)26910 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, strided_cm) {
26911 GemmMicrokernelTester()
26912 .mr(5)
26913 .nr(8)
26914 .kr(1)
26915 .sr(1)
26916 .m(5)
26917 .n(8)
26918 .k(4)
26919 .cm_stride(11)
26920 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26921 }
26922 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26923
26924
26925 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,k_eq_4)26926 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_eq_4) {
26927 GemmMicrokernelTester()
26928 .mr(5)
26929 .nr(8)
26930 .kr(1)
26931 .sr(4)
26932 .m(5)
26933 .n(8)
26934 .k(4)
26935 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
26936 }
26937
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,strided_cn)26938 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, strided_cn) {
26939 GemmMicrokernelTester()
26940 .mr(5)
26941 .nr(8)
26942 .kr(1)
26943 .sr(4)
26944 .m(5)
26945 .n(8)
26946 .k(4)
26947 .cn_stride(11)
26948 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
26949 }
26950
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,k_eq_4_strided_a)26951 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_eq_4_strided_a) {
26952 GemmMicrokernelTester()
26953 .mr(5)
26954 .nr(8)
26955 .kr(1)
26956 .sr(4)
26957 .m(5)
26958 .n(8)
26959 .k(4)
26960 .a_stride(7)
26961 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
26962 }
26963
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,k_eq_4_subtile)26964 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_eq_4_subtile) {
26965 for (uint32_t n = 1; n <= 8; n++) {
26966 for (uint32_t m = 1; m <= 5; m++) {
26967 GemmMicrokernelTester()
26968 .mr(5)
26969 .nr(8)
26970 .kr(1)
26971 .sr(4)
26972 .m(m)
26973 .n(n)
26974 .k(4)
26975 .iterations(1)
26976 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
26977 }
26978 }
26979 }
26980
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,k_eq_4_subtile_m)26981 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_eq_4_subtile_m) {
26982 for (uint32_t m = 1; m <= 5; m++) {
26983 GemmMicrokernelTester()
26984 .mr(5)
26985 .nr(8)
26986 .kr(1)
26987 .sr(4)
26988 .m(m)
26989 .n(8)
26990 .k(4)
26991 .iterations(1)
26992 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
26993 }
26994 }
26995
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,k_eq_4_subtile_n)26996 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_eq_4_subtile_n) {
26997 for (uint32_t n = 1; n <= 8; n++) {
26998 GemmMicrokernelTester()
26999 .mr(5)
27000 .nr(8)
27001 .kr(1)
27002 .sr(4)
27003 .m(5)
27004 .n(n)
27005 .k(4)
27006 .iterations(1)
27007 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27008 }
27009 }
27010
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,k_lt_4)27011 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_lt_4) {
27012 for (size_t k = 1; k < 4; k++) {
27013 GemmMicrokernelTester()
27014 .mr(5)
27015 .nr(8)
27016 .kr(1)
27017 .sr(4)
27018 .m(5)
27019 .n(8)
27020 .k(k)
27021 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27022 }
27023 }
27024
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,k_lt_4_strided_a)27025 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_lt_4_strided_a) {
27026 for (size_t k = 1; k < 4; k++) {
27027 GemmMicrokernelTester()
27028 .mr(5)
27029 .nr(8)
27030 .kr(1)
27031 .sr(4)
27032 .m(5)
27033 .n(8)
27034 .k(k)
27035 .a_stride(7)
27036 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27037 }
27038 }
27039
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,k_lt_4_subtile)27040 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_lt_4_subtile) {
27041 for (size_t k = 1; k < 4; k++) {
27042 for (uint32_t n = 1; n <= 8; n++) {
27043 for (uint32_t m = 1; m <= 5; m++) {
27044 GemmMicrokernelTester()
27045 .mr(5)
27046 .nr(8)
27047 .kr(1)
27048 .sr(4)
27049 .m(m)
27050 .n(n)
27051 .k(k)
27052 .iterations(1)
27053 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27054 }
27055 }
27056 }
27057 }
27058
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,k_gt_4)27059 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_gt_4) {
27060 for (size_t k = 5; k < 8; k++) {
27061 GemmMicrokernelTester()
27062 .mr(5)
27063 .nr(8)
27064 .kr(1)
27065 .sr(4)
27066 .m(5)
27067 .n(8)
27068 .k(k)
27069 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27070 }
27071 }
27072
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,k_gt_4_strided_a)27073 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_gt_4_strided_a) {
27074 for (size_t k = 5; k < 8; k++) {
27075 GemmMicrokernelTester()
27076 .mr(5)
27077 .nr(8)
27078 .kr(1)
27079 .sr(4)
27080 .m(5)
27081 .n(8)
27082 .k(k)
27083 .a_stride(11)
27084 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27085 }
27086 }
27087
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,k_gt_4_subtile)27088 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_gt_4_subtile) {
27089 for (size_t k = 5; k < 8; k++) {
27090 for (uint32_t n = 1; n <= 8; n++) {
27091 for (uint32_t m = 1; m <= 5; m++) {
27092 GemmMicrokernelTester()
27093 .mr(5)
27094 .nr(8)
27095 .kr(1)
27096 .sr(4)
27097 .m(m)
27098 .n(n)
27099 .k(k)
27100 .iterations(1)
27101 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27102 }
27103 }
27104 }
27105 }
27106
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,k_div_4)27107 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_div_4) {
27108 for (size_t k = 8; k <= 40; k += 4) {
27109 GemmMicrokernelTester()
27110 .mr(5)
27111 .nr(8)
27112 .kr(1)
27113 .sr(4)
27114 .m(5)
27115 .n(8)
27116 .k(k)
27117 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27118 }
27119 }
27120
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,k_div_4_strided_a)27121 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_div_4_strided_a) {
27122 for (size_t k = 8; k <= 40; k += 4) {
27123 GemmMicrokernelTester()
27124 .mr(5)
27125 .nr(8)
27126 .kr(1)
27127 .sr(4)
27128 .m(5)
27129 .n(8)
27130 .k(k)
27131 .a_stride(43)
27132 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27133 }
27134 }
27135
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,k_div_4_subtile)27136 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_div_4_subtile) {
27137 for (size_t k = 8; k <= 40; k += 4) {
27138 for (uint32_t n = 1; n <= 8; n++) {
27139 for (uint32_t m = 1; m <= 5; m++) {
27140 GemmMicrokernelTester()
27141 .mr(5)
27142 .nr(8)
27143 .kr(1)
27144 .sr(4)
27145 .m(m)
27146 .n(n)
27147 .k(k)
27148 .iterations(1)
27149 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27150 }
27151 }
27152 }
27153 }
27154
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,n_gt_8)27155 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, n_gt_8) {
27156 for (uint32_t n = 9; n < 16; n++) {
27157 for (size_t k = 1; k <= 20; k += 5) {
27158 GemmMicrokernelTester()
27159 .mr(5)
27160 .nr(8)
27161 .kr(1)
27162 .sr(4)
27163 .m(5)
27164 .n(n)
27165 .k(k)
27166 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27167 }
27168 }
27169 }
27170
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,n_gt_8_strided_cn)27171 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, n_gt_8_strided_cn) {
27172 for (uint32_t n = 9; n < 16; n++) {
27173 for (size_t k = 1; k <= 20; k += 5) {
27174 GemmMicrokernelTester()
27175 .mr(5)
27176 .nr(8)
27177 .kr(1)
27178 .sr(4)
27179 .m(5)
27180 .n(n)
27181 .k(k)
27182 .cn_stride(11)
27183 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27184 }
27185 }
27186 }
27187
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,n_gt_8_strided_a)27188 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, n_gt_8_strided_a) {
27189 for (uint32_t n = 9; n < 16; n++) {
27190 for (size_t k = 1; k <= 20; k += 5) {
27191 GemmMicrokernelTester()
27192 .mr(5)
27193 .nr(8)
27194 .kr(1)
27195 .sr(4)
27196 .m(5)
27197 .n(n)
27198 .k(k)
27199 .a_stride(23)
27200 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27201 }
27202 }
27203 }
27204
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,n_gt_8_subtile)27205 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, n_gt_8_subtile) {
27206 for (uint32_t n = 9; n < 16; n++) {
27207 for (size_t k = 1; k <= 20; k += 5) {
27208 for (uint32_t m = 1; m <= 5; m++) {
27209 GemmMicrokernelTester()
27210 .mr(5)
27211 .nr(8)
27212 .kr(1)
27213 .sr(4)
27214 .m(m)
27215 .n(n)
27216 .k(k)
27217 .iterations(1)
27218 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27219 }
27220 }
27221 }
27222 }
27223
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,n_div_8)27224 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, n_div_8) {
27225 for (uint32_t n = 16; n <= 24; n += 8) {
27226 for (size_t k = 1; k <= 20; k += 5) {
27227 GemmMicrokernelTester()
27228 .mr(5)
27229 .nr(8)
27230 .kr(1)
27231 .sr(4)
27232 .m(5)
27233 .n(n)
27234 .k(k)
27235 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27236 }
27237 }
27238 }
27239
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,n_div_8_strided_cn)27240 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, n_div_8_strided_cn) {
27241 for (uint32_t n = 16; n <= 24; n += 8) {
27242 for (size_t k = 1; k <= 20; k += 5) {
27243 GemmMicrokernelTester()
27244 .mr(5)
27245 .nr(8)
27246 .kr(1)
27247 .sr(4)
27248 .m(5)
27249 .n(n)
27250 .k(k)
27251 .cn_stride(11)
27252 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27253 }
27254 }
27255 }
27256
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,n_div_8_strided_a)27257 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, n_div_8_strided_a) {
27258 for (uint32_t n = 16; n <= 24; n += 8) {
27259 for (size_t k = 1; k <= 20; k += 5) {
27260 GemmMicrokernelTester()
27261 .mr(5)
27262 .nr(8)
27263 .kr(1)
27264 .sr(4)
27265 .m(5)
27266 .n(n)
27267 .k(k)
27268 .a_stride(23)
27269 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27270 }
27271 }
27272 }
27273
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,n_div_8_subtile)27274 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, n_div_8_subtile) {
27275 for (uint32_t n = 16; n <= 24; n += 8) {
27276 for (size_t k = 1; k <= 20; k += 5) {
27277 for (uint32_t m = 1; m <= 5; m++) {
27278 GemmMicrokernelTester()
27279 .mr(5)
27280 .nr(8)
27281 .kr(1)
27282 .sr(4)
27283 .m(m)
27284 .n(n)
27285 .k(k)
27286 .iterations(1)
27287 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27288 }
27289 }
27290 }
27291 }
27292
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,strided_cm_subtile)27293 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, strided_cm_subtile) {
27294 for (size_t k = 1; k <= 20; k += 5) {
27295 for (uint32_t n = 1; n <= 8; n++) {
27296 for (uint32_t m = 1; m <= 5; m++) {
27297 GemmMicrokernelTester()
27298 .mr(5)
27299 .nr(8)
27300 .kr(1)
27301 .sr(4)
27302 .m(m)
27303 .n(n)
27304 .k(k)
27305 .cm_stride(11)
27306 .iterations(1)
27307 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27308 }
27309 }
27310 }
27311 }
27312
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,qmin)27313 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, qmin) {
27314 GemmMicrokernelTester()
27315 .mr(5)
27316 .nr(8)
27317 .kr(1)
27318 .sr(4)
27319 .m(5)
27320 .n(8)
27321 .k(4)
27322 .qmin(128)
27323 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27324 }
27325
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,qmax)27326 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, qmax) {
27327 GemmMicrokernelTester()
27328 .mr(5)
27329 .nr(8)
27330 .kr(1)
27331 .sr(4)
27332 .m(5)
27333 .n(8)
27334 .k(4)
27335 .qmax(128)
27336 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27337 }
27338
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM,strided_cm)27339 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, strided_cm) {
27340 GemmMicrokernelTester()
27341 .mr(5)
27342 .nr(8)
27343 .kr(1)
27344 .sr(4)
27345 .m(5)
27346 .n(8)
27347 .k(4)
27348 .cm_stride(11)
27349 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27350 }
27351 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27352
27353
27354 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,k_eq_4)27355 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_eq_4) {
27356 GemmMicrokernelTester()
27357 .mr(5)
27358 .nr(8)
27359 .kr(1)
27360 .sr(4)
27361 .m(5)
27362 .n(8)
27363 .k(4)
27364 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27365 }
27366
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,strided_cn)27367 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, strided_cn) {
27368 GemmMicrokernelTester()
27369 .mr(5)
27370 .nr(8)
27371 .kr(1)
27372 .sr(4)
27373 .m(5)
27374 .n(8)
27375 .k(4)
27376 .cn_stride(11)
27377 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27378 }
27379
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,k_eq_4_strided_a)27380 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_eq_4_strided_a) {
27381 GemmMicrokernelTester()
27382 .mr(5)
27383 .nr(8)
27384 .kr(1)
27385 .sr(4)
27386 .m(5)
27387 .n(8)
27388 .k(4)
27389 .a_stride(7)
27390 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27391 }
27392
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,k_eq_4_subtile)27393 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_eq_4_subtile) {
27394 for (uint32_t n = 1; n <= 8; n++) {
27395 for (uint32_t m = 1; m <= 5; m++) {
27396 GemmMicrokernelTester()
27397 .mr(5)
27398 .nr(8)
27399 .kr(1)
27400 .sr(4)
27401 .m(m)
27402 .n(n)
27403 .k(4)
27404 .iterations(1)
27405 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27406 }
27407 }
27408 }
27409
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,k_eq_4_subtile_m)27410 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_eq_4_subtile_m) {
27411 for (uint32_t m = 1; m <= 5; m++) {
27412 GemmMicrokernelTester()
27413 .mr(5)
27414 .nr(8)
27415 .kr(1)
27416 .sr(4)
27417 .m(m)
27418 .n(8)
27419 .k(4)
27420 .iterations(1)
27421 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27422 }
27423 }
27424
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,k_eq_4_subtile_n)27425 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_eq_4_subtile_n) {
27426 for (uint32_t n = 1; n <= 8; n++) {
27427 GemmMicrokernelTester()
27428 .mr(5)
27429 .nr(8)
27430 .kr(1)
27431 .sr(4)
27432 .m(5)
27433 .n(n)
27434 .k(4)
27435 .iterations(1)
27436 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27437 }
27438 }
27439
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,k_lt_4)27440 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_lt_4) {
27441 for (size_t k = 1; k < 4; k++) {
27442 GemmMicrokernelTester()
27443 .mr(5)
27444 .nr(8)
27445 .kr(1)
27446 .sr(4)
27447 .m(5)
27448 .n(8)
27449 .k(k)
27450 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27451 }
27452 }
27453
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,k_lt_4_strided_a)27454 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_lt_4_strided_a) {
27455 for (size_t k = 1; k < 4; k++) {
27456 GemmMicrokernelTester()
27457 .mr(5)
27458 .nr(8)
27459 .kr(1)
27460 .sr(4)
27461 .m(5)
27462 .n(8)
27463 .k(k)
27464 .a_stride(7)
27465 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27466 }
27467 }
27468
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,k_lt_4_subtile)27469 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_lt_4_subtile) {
27470 for (size_t k = 1; k < 4; k++) {
27471 for (uint32_t n = 1; n <= 8; n++) {
27472 for (uint32_t m = 1; m <= 5; m++) {
27473 GemmMicrokernelTester()
27474 .mr(5)
27475 .nr(8)
27476 .kr(1)
27477 .sr(4)
27478 .m(m)
27479 .n(n)
27480 .k(k)
27481 .iterations(1)
27482 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27483 }
27484 }
27485 }
27486 }
27487
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,k_gt_4)27488 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_gt_4) {
27489 for (size_t k = 5; k < 8; k++) {
27490 GemmMicrokernelTester()
27491 .mr(5)
27492 .nr(8)
27493 .kr(1)
27494 .sr(4)
27495 .m(5)
27496 .n(8)
27497 .k(k)
27498 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27499 }
27500 }
27501
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,k_gt_4_strided_a)27502 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_gt_4_strided_a) {
27503 for (size_t k = 5; k < 8; k++) {
27504 GemmMicrokernelTester()
27505 .mr(5)
27506 .nr(8)
27507 .kr(1)
27508 .sr(4)
27509 .m(5)
27510 .n(8)
27511 .k(k)
27512 .a_stride(11)
27513 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27514 }
27515 }
27516
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,k_gt_4_subtile)27517 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_gt_4_subtile) {
27518 for (size_t k = 5; k < 8; k++) {
27519 for (uint32_t n = 1; n <= 8; n++) {
27520 for (uint32_t m = 1; m <= 5; m++) {
27521 GemmMicrokernelTester()
27522 .mr(5)
27523 .nr(8)
27524 .kr(1)
27525 .sr(4)
27526 .m(m)
27527 .n(n)
27528 .k(k)
27529 .iterations(1)
27530 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27531 }
27532 }
27533 }
27534 }
27535
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,k_div_4)27536 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_div_4) {
27537 for (size_t k = 8; k <= 40; k += 4) {
27538 GemmMicrokernelTester()
27539 .mr(5)
27540 .nr(8)
27541 .kr(1)
27542 .sr(4)
27543 .m(5)
27544 .n(8)
27545 .k(k)
27546 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27547 }
27548 }
27549
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,k_div_4_strided_a)27550 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_div_4_strided_a) {
27551 for (size_t k = 8; k <= 40; k += 4) {
27552 GemmMicrokernelTester()
27553 .mr(5)
27554 .nr(8)
27555 .kr(1)
27556 .sr(4)
27557 .m(5)
27558 .n(8)
27559 .k(k)
27560 .a_stride(43)
27561 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27562 }
27563 }
27564
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,k_div_4_subtile)27565 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_div_4_subtile) {
27566 for (size_t k = 8; k <= 40; k += 4) {
27567 for (uint32_t n = 1; n <= 8; n++) {
27568 for (uint32_t m = 1; m <= 5; m++) {
27569 GemmMicrokernelTester()
27570 .mr(5)
27571 .nr(8)
27572 .kr(1)
27573 .sr(4)
27574 .m(m)
27575 .n(n)
27576 .k(k)
27577 .iterations(1)
27578 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27579 }
27580 }
27581 }
27582 }
27583
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,n_gt_8)27584 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, n_gt_8) {
27585 for (uint32_t n = 9; n < 16; n++) {
27586 for (size_t k = 1; k <= 20; k += 5) {
27587 GemmMicrokernelTester()
27588 .mr(5)
27589 .nr(8)
27590 .kr(1)
27591 .sr(4)
27592 .m(5)
27593 .n(n)
27594 .k(k)
27595 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27596 }
27597 }
27598 }
27599
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,n_gt_8_strided_cn)27600 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, n_gt_8_strided_cn) {
27601 for (uint32_t n = 9; n < 16; n++) {
27602 for (size_t k = 1; k <= 20; k += 5) {
27603 GemmMicrokernelTester()
27604 .mr(5)
27605 .nr(8)
27606 .kr(1)
27607 .sr(4)
27608 .m(5)
27609 .n(n)
27610 .k(k)
27611 .cn_stride(11)
27612 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27613 }
27614 }
27615 }
27616
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,n_gt_8_strided_a)27617 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, n_gt_8_strided_a) {
27618 for (uint32_t n = 9; n < 16; n++) {
27619 for (size_t k = 1; k <= 20; k += 5) {
27620 GemmMicrokernelTester()
27621 .mr(5)
27622 .nr(8)
27623 .kr(1)
27624 .sr(4)
27625 .m(5)
27626 .n(n)
27627 .k(k)
27628 .a_stride(23)
27629 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27630 }
27631 }
27632 }
27633
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,n_gt_8_subtile)27634 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, n_gt_8_subtile) {
27635 for (uint32_t n = 9; n < 16; n++) {
27636 for (size_t k = 1; k <= 20; k += 5) {
27637 for (uint32_t m = 1; m <= 5; m++) {
27638 GemmMicrokernelTester()
27639 .mr(5)
27640 .nr(8)
27641 .kr(1)
27642 .sr(4)
27643 .m(m)
27644 .n(n)
27645 .k(k)
27646 .iterations(1)
27647 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27648 }
27649 }
27650 }
27651 }
27652
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,n_div_8)27653 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, n_div_8) {
27654 for (uint32_t n = 16; n <= 24; n += 8) {
27655 for (size_t k = 1; k <= 20; k += 5) {
27656 GemmMicrokernelTester()
27657 .mr(5)
27658 .nr(8)
27659 .kr(1)
27660 .sr(4)
27661 .m(5)
27662 .n(n)
27663 .k(k)
27664 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27665 }
27666 }
27667 }
27668
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,n_div_8_strided_cn)27669 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, n_div_8_strided_cn) {
27670 for (uint32_t n = 16; n <= 24; n += 8) {
27671 for (size_t k = 1; k <= 20; k += 5) {
27672 GemmMicrokernelTester()
27673 .mr(5)
27674 .nr(8)
27675 .kr(1)
27676 .sr(4)
27677 .m(5)
27678 .n(n)
27679 .k(k)
27680 .cn_stride(11)
27681 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27682 }
27683 }
27684 }
27685
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,n_div_8_strided_a)27686 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, n_div_8_strided_a) {
27687 for (uint32_t n = 16; n <= 24; n += 8) {
27688 for (size_t k = 1; k <= 20; k += 5) {
27689 GemmMicrokernelTester()
27690 .mr(5)
27691 .nr(8)
27692 .kr(1)
27693 .sr(4)
27694 .m(5)
27695 .n(n)
27696 .k(k)
27697 .a_stride(23)
27698 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27699 }
27700 }
27701 }
27702
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,n_div_8_subtile)27703 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, n_div_8_subtile) {
27704 for (uint32_t n = 16; n <= 24; n += 8) {
27705 for (size_t k = 1; k <= 20; k += 5) {
27706 for (uint32_t m = 1; m <= 5; m++) {
27707 GemmMicrokernelTester()
27708 .mr(5)
27709 .nr(8)
27710 .kr(1)
27711 .sr(4)
27712 .m(m)
27713 .n(n)
27714 .k(k)
27715 .iterations(1)
27716 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27717 }
27718 }
27719 }
27720 }
27721
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,strided_cm_subtile)27722 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, strided_cm_subtile) {
27723 for (size_t k = 1; k <= 20; k += 5) {
27724 for (uint32_t n = 1; n <= 8; n++) {
27725 for (uint32_t m = 1; m <= 5; m++) {
27726 GemmMicrokernelTester()
27727 .mr(5)
27728 .nr(8)
27729 .kr(1)
27730 .sr(4)
27731 .m(m)
27732 .n(n)
27733 .k(k)
27734 .cm_stride(11)
27735 .iterations(1)
27736 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27737 }
27738 }
27739 }
27740 }
27741
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,qmin)27742 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, qmin) {
27743 GemmMicrokernelTester()
27744 .mr(5)
27745 .nr(8)
27746 .kr(1)
27747 .sr(4)
27748 .m(5)
27749 .n(8)
27750 .k(4)
27751 .qmin(128)
27752 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27753 }
27754
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,qmax)27755 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, qmax) {
27756 GemmMicrokernelTester()
27757 .mr(5)
27758 .nr(8)
27759 .kr(1)
27760 .sr(4)
27761 .m(5)
27762 .n(8)
27763 .k(4)
27764 .qmax(128)
27765 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27766 }
27767
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86,strided_cm)27768 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, strided_cm) {
27769 GemmMicrokernelTester()
27770 .mr(5)
27771 .nr(8)
27772 .kr(1)
27773 .sr(4)
27774 .m(5)
27775 .n(8)
27776 .k(4)
27777 .cm_stride(11)
27778 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
27779 }
27780 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27781
27782
27783 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,k_eq_1)27784 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_eq_1) {
27785 GemmMicrokernelTester()
27786 .mr(6)
27787 .nr(8)
27788 .kr(1)
27789 .sr(1)
27790 .m(6)
27791 .n(8)
27792 .k(1)
27793 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27794 }
27795
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,strided_cn)27796 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, strided_cn) {
27797 GemmMicrokernelTester()
27798 .mr(6)
27799 .nr(8)
27800 .kr(1)
27801 .sr(1)
27802 .m(6)
27803 .n(8)
27804 .k(1)
27805 .cn_stride(11)
27806 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27807 }
27808
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_strided_a)27809 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_strided_a) {
27810 GemmMicrokernelTester()
27811 .mr(6)
27812 .nr(8)
27813 .kr(1)
27814 .sr(1)
27815 .m(6)
27816 .n(8)
27817 .k(1)
27818 .a_stride(3)
27819 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27820 }
27821
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile)27822 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile) {
27823 for (uint32_t n = 1; n <= 8; n++) {
27824 for (uint32_t m = 1; m <= 6; m++) {
27825 GemmMicrokernelTester()
27826 .mr(6)
27827 .nr(8)
27828 .kr(1)
27829 .sr(1)
27830 .m(m)
27831 .n(n)
27832 .k(1)
27833 .iterations(1)
27834 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27835 }
27836 }
27837 }
27838
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile_m)27839 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_m) {
27840 for (uint32_t m = 1; m <= 6; m++) {
27841 GemmMicrokernelTester()
27842 .mr(6)
27843 .nr(8)
27844 .kr(1)
27845 .sr(1)
27846 .m(m)
27847 .n(8)
27848 .k(1)
27849 .iterations(1)
27850 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27851 }
27852 }
27853
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile_n)27854 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_n) {
27855 for (uint32_t n = 1; n <= 8; n++) {
27856 GemmMicrokernelTester()
27857 .mr(6)
27858 .nr(8)
27859 .kr(1)
27860 .sr(1)
27861 .m(6)
27862 .n(n)
27863 .k(1)
27864 .iterations(1)
27865 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27866 }
27867 }
27868
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,k_gt_1)27869 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_gt_1) {
27870 for (size_t k = 2; k < 10; k++) {
27871 GemmMicrokernelTester()
27872 .mr(6)
27873 .nr(8)
27874 .kr(1)
27875 .sr(1)
27876 .m(6)
27877 .n(8)
27878 .k(k)
27879 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27880 }
27881 }
27882
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,k_gt_1_strided_a)27883 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_strided_a) {
27884 for (size_t k = 2; k < 10; k++) {
27885 GemmMicrokernelTester()
27886 .mr(6)
27887 .nr(8)
27888 .kr(1)
27889 .sr(1)
27890 .m(6)
27891 .n(8)
27892 .k(k)
27893 .a_stride(11)
27894 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27895 }
27896 }
27897
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,k_gt_1_subtile)27898 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_subtile) {
27899 for (size_t k = 2; k < 10; k++) {
27900 for (uint32_t n = 1; n <= 8; n++) {
27901 for (uint32_t m = 1; m <= 6; m++) {
27902 GemmMicrokernelTester()
27903 .mr(6)
27904 .nr(8)
27905 .kr(1)
27906 .sr(1)
27907 .m(m)
27908 .n(n)
27909 .k(k)
27910 .iterations(1)
27911 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27912 }
27913 }
27914 }
27915 }
27916
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,n_gt_8)27917 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_gt_8) {
27918 for (uint32_t n = 9; n < 16; n++) {
27919 for (size_t k = 1; k <= 5; k += 2) {
27920 GemmMicrokernelTester()
27921 .mr(6)
27922 .nr(8)
27923 .kr(1)
27924 .sr(1)
27925 .m(6)
27926 .n(n)
27927 .k(k)
27928 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27929 }
27930 }
27931 }
27932
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_strided_cn)27933 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_cn) {
27934 for (uint32_t n = 9; n < 16; n++) {
27935 for (size_t k = 1; k <= 5; k += 2) {
27936 GemmMicrokernelTester()
27937 .mr(6)
27938 .nr(8)
27939 .kr(1)
27940 .sr(1)
27941 .m(6)
27942 .n(n)
27943 .k(k)
27944 .cn_stride(11)
27945 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27946 }
27947 }
27948 }
27949
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_strided_a)27950 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_a) {
27951 for (uint32_t n = 9; n < 16; n++) {
27952 for (size_t k = 1; k <= 5; k += 2) {
27953 GemmMicrokernelTester()
27954 .mr(6)
27955 .nr(8)
27956 .kr(1)
27957 .sr(1)
27958 .m(6)
27959 .n(n)
27960 .k(k)
27961 .a_stride(7)
27962 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27963 }
27964 }
27965 }
27966
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_subtile)27967 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_subtile) {
27968 for (uint32_t n = 9; n < 16; n++) {
27969 for (size_t k = 1; k <= 5; k += 2) {
27970 for (uint32_t m = 1; m <= 6; m++) {
27971 GemmMicrokernelTester()
27972 .mr(6)
27973 .nr(8)
27974 .kr(1)
27975 .sr(1)
27976 .m(m)
27977 .n(n)
27978 .k(k)
27979 .iterations(1)
27980 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27981 }
27982 }
27983 }
27984 }
27985
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,n_div_8)27986 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_div_8) {
27987 for (uint32_t n = 16; n <= 24; n += 8) {
27988 for (size_t k = 1; k <= 5; k += 2) {
27989 GemmMicrokernelTester()
27990 .mr(6)
27991 .nr(8)
27992 .kr(1)
27993 .sr(1)
27994 .m(6)
27995 .n(n)
27996 .k(k)
27997 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27998 }
27999 }
28000 }
28001
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,n_div_8_strided_cn)28002 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_cn) {
28003 for (uint32_t n = 16; n <= 24; n += 8) {
28004 for (size_t k = 1; k <= 5; k += 2) {
28005 GemmMicrokernelTester()
28006 .mr(6)
28007 .nr(8)
28008 .kr(1)
28009 .sr(1)
28010 .m(6)
28011 .n(n)
28012 .k(k)
28013 .cn_stride(11)
28014 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28015 }
28016 }
28017 }
28018
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,n_div_8_strided_a)28019 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_a) {
28020 for (uint32_t n = 16; n <= 24; n += 8) {
28021 for (size_t k = 1; k <= 5; k += 2) {
28022 GemmMicrokernelTester()
28023 .mr(6)
28024 .nr(8)
28025 .kr(1)
28026 .sr(1)
28027 .m(6)
28028 .n(n)
28029 .k(k)
28030 .a_stride(7)
28031 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28032 }
28033 }
28034 }
28035
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,n_div_8_subtile)28036 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_div_8_subtile) {
28037 for (uint32_t n = 16; n <= 24; n += 8) {
28038 for (size_t k = 1; k <= 5; k += 2) {
28039 for (uint32_t m = 1; m <= 6; m++) {
28040 GemmMicrokernelTester()
28041 .mr(6)
28042 .nr(8)
28043 .kr(1)
28044 .sr(1)
28045 .m(m)
28046 .n(n)
28047 .k(k)
28048 .iterations(1)
28049 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28050 }
28051 }
28052 }
28053 }
28054
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,strided_cm_subtile)28055 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, strided_cm_subtile) {
28056 for (size_t k = 1; k <= 5; k += 2) {
28057 for (uint32_t n = 1; n <= 8; n++) {
28058 for (uint32_t m = 1; m <= 6; m++) {
28059 GemmMicrokernelTester()
28060 .mr(6)
28061 .nr(8)
28062 .kr(1)
28063 .sr(1)
28064 .m(m)
28065 .n(n)
28066 .k(k)
28067 .cm_stride(11)
28068 .iterations(1)
28069 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28070 }
28071 }
28072 }
28073 }
28074
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,qmin)28075 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, qmin) {
28076 GemmMicrokernelTester()
28077 .mr(6)
28078 .nr(8)
28079 .kr(1)
28080 .sr(1)
28081 .m(6)
28082 .n(8)
28083 .k(1)
28084 .qmin(128)
28085 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28086 }
28087
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,qmax)28088 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, qmax) {
28089 GemmMicrokernelTester()
28090 .mr(6)
28091 .nr(8)
28092 .kr(1)
28093 .sr(1)
28094 .m(6)
28095 .n(8)
28096 .k(1)
28097 .qmax(128)
28098 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28099 }
28100
TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT,strided_cm)28101 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, strided_cm) {
28102 GemmMicrokernelTester()
28103 .mr(6)
28104 .nr(8)
28105 .kr(1)
28106 .sr(1)
28107 .m(6)
28108 .n(8)
28109 .k(1)
28110 .cm_stride(11)
28111 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28112 }
28113 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
28114
28115
28116 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)28117 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
28118 GemmMicrokernelTester()
28119 .mr(1)
28120 .nr(8)
28121 .kr(1)
28122 .sr(1)
28123 .m(1)
28124 .n(8)
28125 .k(1)
28126 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28127 }
28128
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)28129 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
28130 GemmMicrokernelTester()
28131 .mr(1)
28132 .nr(8)
28133 .kr(1)
28134 .sr(1)
28135 .m(1)
28136 .n(8)
28137 .k(1)
28138 .cn_stride(11)
28139 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28140 }
28141
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_strided_a)28142 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_strided_a) {
28143 GemmMicrokernelTester()
28144 .mr(1)
28145 .nr(8)
28146 .kr(1)
28147 .sr(1)
28148 .m(1)
28149 .n(8)
28150 .k(1)
28151 .a_stride(3)
28152 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28153 }
28154
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)28155 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
28156 for (uint32_t n = 1; n <= 8; n++) {
28157 for (uint32_t m = 1; m <= 1; m++) {
28158 GemmMicrokernelTester()
28159 .mr(1)
28160 .nr(8)
28161 .kr(1)
28162 .sr(1)
28163 .m(m)
28164 .n(n)
28165 .k(1)
28166 .iterations(1)
28167 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28168 }
28169 }
28170 }
28171
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)28172 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
28173 for (uint32_t m = 1; m <= 1; m++) {
28174 GemmMicrokernelTester()
28175 .mr(1)
28176 .nr(8)
28177 .kr(1)
28178 .sr(1)
28179 .m(m)
28180 .n(8)
28181 .k(1)
28182 .iterations(1)
28183 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28184 }
28185 }
28186
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)28187 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
28188 for (uint32_t n = 1; n <= 8; n++) {
28189 GemmMicrokernelTester()
28190 .mr(1)
28191 .nr(8)
28192 .kr(1)
28193 .sr(1)
28194 .m(1)
28195 .n(n)
28196 .k(1)
28197 .iterations(1)
28198 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28199 }
28200 }
28201
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)28202 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
28203 for (size_t k = 2; k < 10; k++) {
28204 GemmMicrokernelTester()
28205 .mr(1)
28206 .nr(8)
28207 .kr(1)
28208 .sr(1)
28209 .m(1)
28210 .n(8)
28211 .k(k)
28212 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28213 }
28214 }
28215
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_strided_a)28216 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_strided_a) {
28217 for (size_t k = 2; k < 10; k++) {
28218 GemmMicrokernelTester()
28219 .mr(1)
28220 .nr(8)
28221 .kr(1)
28222 .sr(1)
28223 .m(1)
28224 .n(8)
28225 .k(k)
28226 .a_stride(11)
28227 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28228 }
28229 }
28230
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)28231 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
28232 for (size_t k = 2; k < 10; k++) {
28233 for (uint32_t n = 1; n <= 8; n++) {
28234 for (uint32_t m = 1; m <= 1; m++) {
28235 GemmMicrokernelTester()
28236 .mr(1)
28237 .nr(8)
28238 .kr(1)
28239 .sr(1)
28240 .m(m)
28241 .n(n)
28242 .k(k)
28243 .iterations(1)
28244 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28245 }
28246 }
28247 }
28248 }
28249
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)28250 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
28251 for (uint32_t n = 9; n < 16; n++) {
28252 for (size_t k = 1; k <= 5; k += 2) {
28253 GemmMicrokernelTester()
28254 .mr(1)
28255 .nr(8)
28256 .kr(1)
28257 .sr(1)
28258 .m(1)
28259 .n(n)
28260 .k(k)
28261 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28262 }
28263 }
28264 }
28265
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)28266 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
28267 for (uint32_t n = 9; n < 16; n++) {
28268 for (size_t k = 1; k <= 5; k += 2) {
28269 GemmMicrokernelTester()
28270 .mr(1)
28271 .nr(8)
28272 .kr(1)
28273 .sr(1)
28274 .m(1)
28275 .n(n)
28276 .k(k)
28277 .cn_stride(11)
28278 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28279 }
28280 }
28281 }
28282
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_a)28283 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_a) {
28284 for (uint32_t n = 9; n < 16; n++) {
28285 for (size_t k = 1; k <= 5; k += 2) {
28286 GemmMicrokernelTester()
28287 .mr(1)
28288 .nr(8)
28289 .kr(1)
28290 .sr(1)
28291 .m(1)
28292 .n(n)
28293 .k(k)
28294 .a_stride(7)
28295 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28296 }
28297 }
28298 }
28299
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)28300 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
28301 for (uint32_t n = 9; n < 16; n++) {
28302 for (size_t k = 1; k <= 5; k += 2) {
28303 for (uint32_t m = 1; m <= 1; m++) {
28304 GemmMicrokernelTester()
28305 .mr(1)
28306 .nr(8)
28307 .kr(1)
28308 .sr(1)
28309 .m(m)
28310 .n(n)
28311 .k(k)
28312 .iterations(1)
28313 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28314 }
28315 }
28316 }
28317 }
28318
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)28319 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
28320 for (uint32_t n = 16; n <= 24; n += 8) {
28321 for (size_t k = 1; k <= 5; k += 2) {
28322 GemmMicrokernelTester()
28323 .mr(1)
28324 .nr(8)
28325 .kr(1)
28326 .sr(1)
28327 .m(1)
28328 .n(n)
28329 .k(k)
28330 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28331 }
28332 }
28333 }
28334
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)28335 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
28336 for (uint32_t n = 16; n <= 24; n += 8) {
28337 for (size_t k = 1; k <= 5; k += 2) {
28338 GemmMicrokernelTester()
28339 .mr(1)
28340 .nr(8)
28341 .kr(1)
28342 .sr(1)
28343 .m(1)
28344 .n(n)
28345 .k(k)
28346 .cn_stride(11)
28347 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28348 }
28349 }
28350 }
28351
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_a)28352 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_a) {
28353 for (uint32_t n = 16; n <= 24; n += 8) {
28354 for (size_t k = 1; k <= 5; k += 2) {
28355 GemmMicrokernelTester()
28356 .mr(1)
28357 .nr(8)
28358 .kr(1)
28359 .sr(1)
28360 .m(1)
28361 .n(n)
28362 .k(k)
28363 .a_stride(7)
28364 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28365 }
28366 }
28367 }
28368
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)28369 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
28370 for (uint32_t n = 16; n <= 24; n += 8) {
28371 for (size_t k = 1; k <= 5; k += 2) {
28372 for (uint32_t m = 1; m <= 1; m++) {
28373 GemmMicrokernelTester()
28374 .mr(1)
28375 .nr(8)
28376 .kr(1)
28377 .sr(1)
28378 .m(m)
28379 .n(n)
28380 .k(k)
28381 .iterations(1)
28382 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28383 }
28384 }
28385 }
28386 }
28387
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)28388 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
28389 for (size_t k = 1; k <= 5; k += 2) {
28390 for (uint32_t n = 1; n <= 8; n++) {
28391 for (uint32_t m = 1; m <= 1; m++) {
28392 GemmMicrokernelTester()
28393 .mr(1)
28394 .nr(8)
28395 .kr(1)
28396 .sr(1)
28397 .m(m)
28398 .n(n)
28399 .k(k)
28400 .cm_stride(11)
28401 .iterations(1)
28402 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28403 }
28404 }
28405 }
28406 }
28407
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,qmin)28408 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, qmin) {
28409 GemmMicrokernelTester()
28410 .mr(1)
28411 .nr(8)
28412 .kr(1)
28413 .sr(1)
28414 .m(1)
28415 .n(8)
28416 .k(1)
28417 .qmin(128)
28418 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28419 }
28420
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,qmax)28421 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, qmax) {
28422 GemmMicrokernelTester()
28423 .mr(1)
28424 .nr(8)
28425 .kr(1)
28426 .sr(1)
28427 .m(1)
28428 .n(8)
28429 .k(1)
28430 .qmax(128)
28431 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28432 }
28433
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)28434 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
28435 GemmMicrokernelTester()
28436 .mr(1)
28437 .nr(8)
28438 .kr(1)
28439 .sr(1)
28440 .m(1)
28441 .n(8)
28442 .k(1)
28443 .cm_stride(11)
28444 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
28445 }
28446 #endif // XNN_ARCH_WASMRELAXEDSIMD
28447
28448
28449 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)28450 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
28451 GemmMicrokernelTester()
28452 .mr(1)
28453 .nr(8)
28454 .kr(1)
28455 .sr(1)
28456 .m(1)
28457 .n(8)
28458 .k(4)
28459 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28460 }
28461
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)28462 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
28463 GemmMicrokernelTester()
28464 .mr(1)
28465 .nr(8)
28466 .kr(1)
28467 .sr(1)
28468 .m(1)
28469 .n(8)
28470 .k(4)
28471 .cn_stride(11)
28472 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28473 }
28474
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_strided_a)28475 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_strided_a) {
28476 GemmMicrokernelTester()
28477 .mr(1)
28478 .nr(8)
28479 .kr(1)
28480 .sr(1)
28481 .m(1)
28482 .n(8)
28483 .k(4)
28484 .a_stride(7)
28485 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28486 }
28487
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)28488 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
28489 for (uint32_t n = 1; n <= 8; n++) {
28490 for (uint32_t m = 1; m <= 1; m++) {
28491 GemmMicrokernelTester()
28492 .mr(1)
28493 .nr(8)
28494 .kr(1)
28495 .sr(1)
28496 .m(m)
28497 .n(n)
28498 .k(4)
28499 .iterations(1)
28500 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28501 }
28502 }
28503 }
28504
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)28505 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
28506 for (uint32_t m = 1; m <= 1; m++) {
28507 GemmMicrokernelTester()
28508 .mr(1)
28509 .nr(8)
28510 .kr(1)
28511 .sr(1)
28512 .m(m)
28513 .n(8)
28514 .k(4)
28515 .iterations(1)
28516 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28517 }
28518 }
28519
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)28520 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
28521 for (uint32_t n = 1; n <= 8; n++) {
28522 GemmMicrokernelTester()
28523 .mr(1)
28524 .nr(8)
28525 .kr(1)
28526 .sr(1)
28527 .m(1)
28528 .n(n)
28529 .k(4)
28530 .iterations(1)
28531 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28532 }
28533 }
28534
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)28535 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
28536 for (size_t k = 1; k < 4; k++) {
28537 GemmMicrokernelTester()
28538 .mr(1)
28539 .nr(8)
28540 .kr(1)
28541 .sr(1)
28542 .m(1)
28543 .n(8)
28544 .k(k)
28545 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28546 }
28547 }
28548
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_strided_a)28549 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_strided_a) {
28550 for (size_t k = 1; k < 4; k++) {
28551 GemmMicrokernelTester()
28552 .mr(1)
28553 .nr(8)
28554 .kr(1)
28555 .sr(1)
28556 .m(1)
28557 .n(8)
28558 .k(k)
28559 .a_stride(7)
28560 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28561 }
28562 }
28563
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)28564 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
28565 for (size_t k = 1; k < 4; k++) {
28566 for (uint32_t n = 1; n <= 8; n++) {
28567 for (uint32_t m = 1; m <= 1; m++) {
28568 GemmMicrokernelTester()
28569 .mr(1)
28570 .nr(8)
28571 .kr(1)
28572 .sr(1)
28573 .m(m)
28574 .n(n)
28575 .k(k)
28576 .iterations(1)
28577 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28578 }
28579 }
28580 }
28581 }
28582
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)28583 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
28584 for (size_t k = 5; k < 8; k++) {
28585 GemmMicrokernelTester()
28586 .mr(1)
28587 .nr(8)
28588 .kr(1)
28589 .sr(1)
28590 .m(1)
28591 .n(8)
28592 .k(k)
28593 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28594 }
28595 }
28596
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_strided_a)28597 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_strided_a) {
28598 for (size_t k = 5; k < 8; k++) {
28599 GemmMicrokernelTester()
28600 .mr(1)
28601 .nr(8)
28602 .kr(1)
28603 .sr(1)
28604 .m(1)
28605 .n(8)
28606 .k(k)
28607 .a_stride(11)
28608 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28609 }
28610 }
28611
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)28612 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
28613 for (size_t k = 5; k < 8; k++) {
28614 for (uint32_t n = 1; n <= 8; n++) {
28615 for (uint32_t m = 1; m <= 1; m++) {
28616 GemmMicrokernelTester()
28617 .mr(1)
28618 .nr(8)
28619 .kr(1)
28620 .sr(1)
28621 .m(m)
28622 .n(n)
28623 .k(k)
28624 .iterations(1)
28625 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28626 }
28627 }
28628 }
28629 }
28630
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)28631 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
28632 for (size_t k = 8; k <= 40; k += 4) {
28633 GemmMicrokernelTester()
28634 .mr(1)
28635 .nr(8)
28636 .kr(1)
28637 .sr(1)
28638 .m(1)
28639 .n(8)
28640 .k(k)
28641 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28642 }
28643 }
28644
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_strided_a)28645 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_strided_a) {
28646 for (size_t k = 8; k <= 40; k += 4) {
28647 GemmMicrokernelTester()
28648 .mr(1)
28649 .nr(8)
28650 .kr(1)
28651 .sr(1)
28652 .m(1)
28653 .n(8)
28654 .k(k)
28655 .a_stride(43)
28656 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28657 }
28658 }
28659
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)28660 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
28661 for (size_t k = 8; k <= 40; k += 4) {
28662 for (uint32_t n = 1; n <= 8; n++) {
28663 for (uint32_t m = 1; m <= 1; m++) {
28664 GemmMicrokernelTester()
28665 .mr(1)
28666 .nr(8)
28667 .kr(1)
28668 .sr(1)
28669 .m(m)
28670 .n(n)
28671 .k(k)
28672 .iterations(1)
28673 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28674 }
28675 }
28676 }
28677 }
28678
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)28679 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
28680 for (uint32_t n = 9; n < 16; n++) {
28681 for (size_t k = 1; k <= 20; k += 5) {
28682 GemmMicrokernelTester()
28683 .mr(1)
28684 .nr(8)
28685 .kr(1)
28686 .sr(1)
28687 .m(1)
28688 .n(n)
28689 .k(k)
28690 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28691 }
28692 }
28693 }
28694
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)28695 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
28696 for (uint32_t n = 9; n < 16; n++) {
28697 for (size_t k = 1; k <= 20; k += 5) {
28698 GemmMicrokernelTester()
28699 .mr(1)
28700 .nr(8)
28701 .kr(1)
28702 .sr(1)
28703 .m(1)
28704 .n(n)
28705 .k(k)
28706 .cn_stride(11)
28707 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28708 }
28709 }
28710 }
28711
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_a)28712 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_a) {
28713 for (uint32_t n = 9; n < 16; n++) {
28714 for (size_t k = 1; k <= 20; k += 5) {
28715 GemmMicrokernelTester()
28716 .mr(1)
28717 .nr(8)
28718 .kr(1)
28719 .sr(1)
28720 .m(1)
28721 .n(n)
28722 .k(k)
28723 .a_stride(23)
28724 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28725 }
28726 }
28727 }
28728
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)28729 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
28730 for (uint32_t n = 9; n < 16; n++) {
28731 for (size_t k = 1; k <= 20; k += 5) {
28732 for (uint32_t m = 1; m <= 1; m++) {
28733 GemmMicrokernelTester()
28734 .mr(1)
28735 .nr(8)
28736 .kr(1)
28737 .sr(1)
28738 .m(m)
28739 .n(n)
28740 .k(k)
28741 .iterations(1)
28742 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28743 }
28744 }
28745 }
28746 }
28747
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)28748 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
28749 for (uint32_t n = 16; n <= 24; n += 8) {
28750 for (size_t k = 1; k <= 20; k += 5) {
28751 GemmMicrokernelTester()
28752 .mr(1)
28753 .nr(8)
28754 .kr(1)
28755 .sr(1)
28756 .m(1)
28757 .n(n)
28758 .k(k)
28759 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28760 }
28761 }
28762 }
28763
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)28764 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
28765 for (uint32_t n = 16; n <= 24; n += 8) {
28766 for (size_t k = 1; k <= 20; k += 5) {
28767 GemmMicrokernelTester()
28768 .mr(1)
28769 .nr(8)
28770 .kr(1)
28771 .sr(1)
28772 .m(1)
28773 .n(n)
28774 .k(k)
28775 .cn_stride(11)
28776 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28777 }
28778 }
28779 }
28780
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_a)28781 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_a) {
28782 for (uint32_t n = 16; n <= 24; n += 8) {
28783 for (size_t k = 1; k <= 20; k += 5) {
28784 GemmMicrokernelTester()
28785 .mr(1)
28786 .nr(8)
28787 .kr(1)
28788 .sr(1)
28789 .m(1)
28790 .n(n)
28791 .k(k)
28792 .a_stride(23)
28793 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28794 }
28795 }
28796 }
28797
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)28798 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
28799 for (uint32_t n = 16; n <= 24; n += 8) {
28800 for (size_t k = 1; k <= 20; k += 5) {
28801 for (uint32_t m = 1; m <= 1; m++) {
28802 GemmMicrokernelTester()
28803 .mr(1)
28804 .nr(8)
28805 .kr(1)
28806 .sr(1)
28807 .m(m)
28808 .n(n)
28809 .k(k)
28810 .iterations(1)
28811 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28812 }
28813 }
28814 }
28815 }
28816
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)28817 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
28818 for (size_t k = 1; k <= 20; k += 5) {
28819 for (uint32_t n = 1; n <= 8; n++) {
28820 for (uint32_t m = 1; m <= 1; m++) {
28821 GemmMicrokernelTester()
28822 .mr(1)
28823 .nr(8)
28824 .kr(1)
28825 .sr(1)
28826 .m(m)
28827 .n(n)
28828 .k(k)
28829 .cm_stride(11)
28830 .iterations(1)
28831 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28832 }
28833 }
28834 }
28835 }
28836
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,qmin)28837 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, qmin) {
28838 GemmMicrokernelTester()
28839 .mr(1)
28840 .nr(8)
28841 .kr(1)
28842 .sr(1)
28843 .m(1)
28844 .n(8)
28845 .k(4)
28846 .qmin(128)
28847 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28848 }
28849
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,qmax)28850 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, qmax) {
28851 GemmMicrokernelTester()
28852 .mr(1)
28853 .nr(8)
28854 .kr(1)
28855 .sr(1)
28856 .m(1)
28857 .n(8)
28858 .k(4)
28859 .qmax(128)
28860 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28861 }
28862
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)28863 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
28864 GemmMicrokernelTester()
28865 .mr(1)
28866 .nr(8)
28867 .kr(1)
28868 .sr(1)
28869 .m(1)
28870 .n(8)
28871 .k(4)
28872 .cm_stride(11)
28873 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
28874 }
28875 #endif // XNN_ARCH_WASMRELAXEDSIMD
28876
28877
28878 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,k_eq_4)28879 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, k_eq_4) {
28880 GemmMicrokernelTester()
28881 .mr(1)
28882 .nr(8)
28883 .kr(1)
28884 .sr(1)
28885 .m(1)
28886 .n(8)
28887 .k(4)
28888 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
28889 }
28890
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,strided_cn)28891 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, strided_cn) {
28892 GemmMicrokernelTester()
28893 .mr(1)
28894 .nr(8)
28895 .kr(1)
28896 .sr(1)
28897 .m(1)
28898 .n(8)
28899 .k(4)
28900 .cn_stride(11)
28901 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
28902 }
28903
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_strided_a)28904 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_strided_a) {
28905 GemmMicrokernelTester()
28906 .mr(1)
28907 .nr(8)
28908 .kr(1)
28909 .sr(1)
28910 .m(1)
28911 .n(8)
28912 .k(4)
28913 .a_stride(7)
28914 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
28915 }
28916
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile)28917 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile) {
28918 for (uint32_t n = 1; n <= 8; n++) {
28919 for (uint32_t m = 1; m <= 1; m++) {
28920 GemmMicrokernelTester()
28921 .mr(1)
28922 .nr(8)
28923 .kr(1)
28924 .sr(1)
28925 .m(m)
28926 .n(n)
28927 .k(4)
28928 .iterations(1)
28929 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
28930 }
28931 }
28932 }
28933
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile_m)28934 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile_m) {
28935 for (uint32_t m = 1; m <= 1; m++) {
28936 GemmMicrokernelTester()
28937 .mr(1)
28938 .nr(8)
28939 .kr(1)
28940 .sr(1)
28941 .m(m)
28942 .n(8)
28943 .k(4)
28944 .iterations(1)
28945 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
28946 }
28947 }
28948
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile_n)28949 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile_n) {
28950 for (uint32_t n = 1; n <= 8; n++) {
28951 GemmMicrokernelTester()
28952 .mr(1)
28953 .nr(8)
28954 .kr(1)
28955 .sr(1)
28956 .m(1)
28957 .n(n)
28958 .k(4)
28959 .iterations(1)
28960 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
28961 }
28962 }
28963
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,k_lt_4)28964 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, k_lt_4) {
28965 for (size_t k = 1; k < 4; k++) {
28966 GemmMicrokernelTester()
28967 .mr(1)
28968 .nr(8)
28969 .kr(1)
28970 .sr(1)
28971 .m(1)
28972 .n(8)
28973 .k(k)
28974 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
28975 }
28976 }
28977
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,k_lt_4_strided_a)28978 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, k_lt_4_strided_a) {
28979 for (size_t k = 1; k < 4; k++) {
28980 GemmMicrokernelTester()
28981 .mr(1)
28982 .nr(8)
28983 .kr(1)
28984 .sr(1)
28985 .m(1)
28986 .n(8)
28987 .k(k)
28988 .a_stride(7)
28989 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
28990 }
28991 }
28992
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,k_lt_4_subtile)28993 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, k_lt_4_subtile) {
28994 for (size_t k = 1; k < 4; k++) {
28995 for (uint32_t n = 1; n <= 8; n++) {
28996 for (uint32_t m = 1; m <= 1; m++) {
28997 GemmMicrokernelTester()
28998 .mr(1)
28999 .nr(8)
29000 .kr(1)
29001 .sr(1)
29002 .m(m)
29003 .n(n)
29004 .k(k)
29005 .iterations(1)
29006 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
29007 }
29008 }
29009 }
29010 }
29011
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,k_gt_4)29012 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, k_gt_4) {
29013 for (size_t k = 5; k < 8; k++) {
29014 GemmMicrokernelTester()
29015 .mr(1)
29016 .nr(8)
29017 .kr(1)
29018 .sr(1)
29019 .m(1)
29020 .n(8)
29021 .k(k)
29022 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
29023 }
29024 }
29025
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,k_gt_4_strided_a)29026 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, k_gt_4_strided_a) {
29027 for (size_t k = 5; k < 8; k++) {
29028 GemmMicrokernelTester()
29029 .mr(1)
29030 .nr(8)
29031 .kr(1)
29032 .sr(1)
29033 .m(1)
29034 .n(8)
29035 .k(k)
29036 .a_stride(11)
29037 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
29038 }
29039 }
29040
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,k_gt_4_subtile)29041 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, k_gt_4_subtile) {
29042 for (size_t k = 5; k < 8; k++) {
29043 for (uint32_t n = 1; n <= 8; n++) {
29044 for (uint32_t m = 1; m <= 1; m++) {
29045 GemmMicrokernelTester()
29046 .mr(1)
29047 .nr(8)
29048 .kr(1)
29049 .sr(1)
29050 .m(m)
29051 .n(n)
29052 .k(k)
29053 .iterations(1)
29054 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
29055 }
29056 }
29057 }
29058 }
29059
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,k_div_4)29060 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, k_div_4) {
29061 for (size_t k = 8; k <= 40; k += 4) {
29062 GemmMicrokernelTester()
29063 .mr(1)
29064 .nr(8)
29065 .kr(1)
29066 .sr(1)
29067 .m(1)
29068 .n(8)
29069 .k(k)
29070 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
29071 }
29072 }
29073
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,k_div_4_strided_a)29074 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, k_div_4_strided_a) {
29075 for (size_t k = 8; k <= 40; k += 4) {
29076 GemmMicrokernelTester()
29077 .mr(1)
29078 .nr(8)
29079 .kr(1)
29080 .sr(1)
29081 .m(1)
29082 .n(8)
29083 .k(k)
29084 .a_stride(43)
29085 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
29086 }
29087 }
29088
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,k_div_4_subtile)29089 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, k_div_4_subtile) {
29090 for (size_t k = 8; k <= 40; k += 4) {
29091 for (uint32_t n = 1; n <= 8; n++) {
29092 for (uint32_t m = 1; m <= 1; m++) {
29093 GemmMicrokernelTester()
29094 .mr(1)
29095 .nr(8)
29096 .kr(1)
29097 .sr(1)
29098 .m(m)
29099 .n(n)
29100 .k(k)
29101 .iterations(1)
29102 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
29103 }
29104 }
29105 }
29106 }
29107
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,n_gt_8)29108 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, n_gt_8) {
29109 for (uint32_t n = 9; n < 16; n++) {
29110 for (size_t k = 1; k <= 20; k += 5) {
29111 GemmMicrokernelTester()
29112 .mr(1)
29113 .nr(8)
29114 .kr(1)
29115 .sr(1)
29116 .m(1)
29117 .n(n)
29118 .k(k)
29119 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
29120 }
29121 }
29122 }
29123
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_strided_cn)29124 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_strided_cn) {
29125 for (uint32_t n = 9; n < 16; n++) {
29126 for (size_t k = 1; k <= 20; k += 5) {
29127 GemmMicrokernelTester()
29128 .mr(1)
29129 .nr(8)
29130 .kr(1)
29131 .sr(1)
29132 .m(1)
29133 .n(n)
29134 .k(k)
29135 .cn_stride(11)
29136 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
29137 }
29138 }
29139 }
29140
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_strided_a)29141 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_strided_a) {
29142 for (uint32_t n = 9; n < 16; n++) {
29143 for (size_t k = 1; k <= 20; k += 5) {
29144 GemmMicrokernelTester()
29145 .mr(1)
29146 .nr(8)
29147 .kr(1)
29148 .sr(1)
29149 .m(1)
29150 .n(n)
29151 .k(k)
29152 .a_stride(23)
29153 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
29154 }
29155 }
29156 }
29157
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_subtile)29158 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_subtile) {
29159 for (uint32_t n = 9; n < 16; n++) {
29160 for (size_t k = 1; k <= 20; k += 5) {
29161 for (uint32_t m = 1; m <= 1; m++) {
29162 GemmMicrokernelTester()
29163 .mr(1)
29164 .nr(8)
29165 .kr(1)
29166 .sr(1)
29167 .m(m)
29168 .n(n)
29169 .k(k)
29170 .iterations(1)
29171 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
29172 }
29173 }
29174 }
29175 }
29176
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,n_div_8)29177 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, n_div_8) {
29178 for (uint32_t n = 16; n <= 24; n += 8) {
29179 for (size_t k = 1; k <= 20; k += 5) {
29180 GemmMicrokernelTester()
29181 .mr(1)
29182 .nr(8)
29183 .kr(1)
29184 .sr(1)
29185 .m(1)
29186 .n(n)
29187 .k(k)
29188 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
29189 }
29190 }
29191 }
29192
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,n_div_8_strided_cn)29193 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, n_div_8_strided_cn) {
29194 for (uint32_t n = 16; n <= 24; n += 8) {
29195 for (size_t k = 1; k <= 20; k += 5) {
29196 GemmMicrokernelTester()
29197 .mr(1)
29198 .nr(8)
29199 .kr(1)
29200 .sr(1)
29201 .m(1)
29202 .n(n)
29203 .k(k)
29204 .cn_stride(11)
29205 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
29206 }
29207 }
29208 }
29209
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,n_div_8_strided_a)29210 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, n_div_8_strided_a) {
29211 for (uint32_t n = 16; n <= 24; n += 8) {
29212 for (size_t k = 1; k <= 20; k += 5) {
29213 GemmMicrokernelTester()
29214 .mr(1)
29215 .nr(8)
29216 .kr(1)
29217 .sr(1)
29218 .m(1)
29219 .n(n)
29220 .k(k)
29221 .a_stride(23)
29222 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
29223 }
29224 }
29225 }
29226
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,n_div_8_subtile)29227 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, n_div_8_subtile) {
29228 for (uint32_t n = 16; n <= 24; n += 8) {
29229 for (size_t k = 1; k <= 20; k += 5) {
29230 for (uint32_t m = 1; m <= 1; m++) {
29231 GemmMicrokernelTester()
29232 .mr(1)
29233 .nr(8)
29234 .kr(1)
29235 .sr(1)
29236 .m(m)
29237 .n(n)
29238 .k(k)
29239 .iterations(1)
29240 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
29241 }
29242 }
29243 }
29244 }
29245
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,strided_cm_subtile)29246 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, strided_cm_subtile) {
29247 for (size_t k = 1; k <= 20; k += 5) {
29248 for (uint32_t n = 1; n <= 8; n++) {
29249 for (uint32_t m = 1; m <= 1; m++) {
29250 GemmMicrokernelTester()
29251 .mr(1)
29252 .nr(8)
29253 .kr(1)
29254 .sr(1)
29255 .m(m)
29256 .n(n)
29257 .k(k)
29258 .cm_stride(11)
29259 .iterations(1)
29260 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
29261 }
29262 }
29263 }
29264 }
29265
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,qmin)29266 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, qmin) {
29267 GemmMicrokernelTester()
29268 .mr(1)
29269 .nr(8)
29270 .kr(1)
29271 .sr(1)
29272 .m(1)
29273 .n(8)
29274 .k(4)
29275 .qmin(128)
29276 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
29277 }
29278
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,qmax)29279 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, qmax) {
29280 GemmMicrokernelTester()
29281 .mr(1)
29282 .nr(8)
29283 .kr(1)
29284 .sr(1)
29285 .m(1)
29286 .n(8)
29287 .k(4)
29288 .qmax(128)
29289 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
29290 }
29291
TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT,strided_cm)29292 TEST(F32_GEMMINC_MINMAX_1X8__WASMRELAXEDSIMD_SPLAT, strided_cm) {
29293 GemmMicrokernelTester()
29294 .mr(1)
29295 .nr(8)
29296 .kr(1)
29297 .sr(1)
29298 .m(1)
29299 .n(8)
29300 .k(4)
29301 .cm_stride(11)
29302 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
29303 }
29304 #endif // XNN_ARCH_WASMRELAXEDSIMD
29305
29306
29307 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,k_eq_4)29308 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, k_eq_4) {
29309 GemmMicrokernelTester()
29310 .mr(1)
29311 .nr(8)
29312 .kr(1)
29313 .sr(4)
29314 .m(1)
29315 .n(8)
29316 .k(4)
29317 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29318 }
29319
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,strided_cn)29320 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, strided_cn) {
29321 GemmMicrokernelTester()
29322 .mr(1)
29323 .nr(8)
29324 .kr(1)
29325 .sr(4)
29326 .m(1)
29327 .n(8)
29328 .k(4)
29329 .cn_stride(11)
29330 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29331 }
29332
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,k_eq_4_strided_a)29333 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, k_eq_4_strided_a) {
29334 GemmMicrokernelTester()
29335 .mr(1)
29336 .nr(8)
29337 .kr(1)
29338 .sr(4)
29339 .m(1)
29340 .n(8)
29341 .k(4)
29342 .a_stride(7)
29343 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29344 }
29345
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,k_eq_4_subtile)29346 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, k_eq_4_subtile) {
29347 for (uint32_t n = 1; n <= 8; n++) {
29348 for (uint32_t m = 1; m <= 1; m++) {
29349 GemmMicrokernelTester()
29350 .mr(1)
29351 .nr(8)
29352 .kr(1)
29353 .sr(4)
29354 .m(m)
29355 .n(n)
29356 .k(4)
29357 .iterations(1)
29358 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29359 }
29360 }
29361 }
29362
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,k_eq_4_subtile_m)29363 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, k_eq_4_subtile_m) {
29364 for (uint32_t m = 1; m <= 1; m++) {
29365 GemmMicrokernelTester()
29366 .mr(1)
29367 .nr(8)
29368 .kr(1)
29369 .sr(4)
29370 .m(m)
29371 .n(8)
29372 .k(4)
29373 .iterations(1)
29374 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29375 }
29376 }
29377
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,k_eq_4_subtile_n)29378 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, k_eq_4_subtile_n) {
29379 for (uint32_t n = 1; n <= 8; n++) {
29380 GemmMicrokernelTester()
29381 .mr(1)
29382 .nr(8)
29383 .kr(1)
29384 .sr(4)
29385 .m(1)
29386 .n(n)
29387 .k(4)
29388 .iterations(1)
29389 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29390 }
29391 }
29392
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,k_lt_4)29393 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, k_lt_4) {
29394 for (size_t k = 1; k < 4; k++) {
29395 GemmMicrokernelTester()
29396 .mr(1)
29397 .nr(8)
29398 .kr(1)
29399 .sr(4)
29400 .m(1)
29401 .n(8)
29402 .k(k)
29403 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29404 }
29405 }
29406
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,k_lt_4_strided_a)29407 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, k_lt_4_strided_a) {
29408 for (size_t k = 1; k < 4; k++) {
29409 GemmMicrokernelTester()
29410 .mr(1)
29411 .nr(8)
29412 .kr(1)
29413 .sr(4)
29414 .m(1)
29415 .n(8)
29416 .k(k)
29417 .a_stride(7)
29418 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29419 }
29420 }
29421
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,k_lt_4_subtile)29422 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, k_lt_4_subtile) {
29423 for (size_t k = 1; k < 4; k++) {
29424 for (uint32_t n = 1; n <= 8; n++) {
29425 for (uint32_t m = 1; m <= 1; m++) {
29426 GemmMicrokernelTester()
29427 .mr(1)
29428 .nr(8)
29429 .kr(1)
29430 .sr(4)
29431 .m(m)
29432 .n(n)
29433 .k(k)
29434 .iterations(1)
29435 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29436 }
29437 }
29438 }
29439 }
29440
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,k_gt_4)29441 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, k_gt_4) {
29442 for (size_t k = 5; k < 8; k++) {
29443 GemmMicrokernelTester()
29444 .mr(1)
29445 .nr(8)
29446 .kr(1)
29447 .sr(4)
29448 .m(1)
29449 .n(8)
29450 .k(k)
29451 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29452 }
29453 }
29454
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,k_gt_4_strided_a)29455 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, k_gt_4_strided_a) {
29456 for (size_t k = 5; k < 8; k++) {
29457 GemmMicrokernelTester()
29458 .mr(1)
29459 .nr(8)
29460 .kr(1)
29461 .sr(4)
29462 .m(1)
29463 .n(8)
29464 .k(k)
29465 .a_stride(11)
29466 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29467 }
29468 }
29469
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,k_gt_4_subtile)29470 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, k_gt_4_subtile) {
29471 for (size_t k = 5; k < 8; k++) {
29472 for (uint32_t n = 1; n <= 8; n++) {
29473 for (uint32_t m = 1; m <= 1; m++) {
29474 GemmMicrokernelTester()
29475 .mr(1)
29476 .nr(8)
29477 .kr(1)
29478 .sr(4)
29479 .m(m)
29480 .n(n)
29481 .k(k)
29482 .iterations(1)
29483 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29484 }
29485 }
29486 }
29487 }
29488
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,k_div_4)29489 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, k_div_4) {
29490 for (size_t k = 8; k <= 40; k += 4) {
29491 GemmMicrokernelTester()
29492 .mr(1)
29493 .nr(8)
29494 .kr(1)
29495 .sr(4)
29496 .m(1)
29497 .n(8)
29498 .k(k)
29499 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29500 }
29501 }
29502
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,k_div_4_strided_a)29503 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, k_div_4_strided_a) {
29504 for (size_t k = 8; k <= 40; k += 4) {
29505 GemmMicrokernelTester()
29506 .mr(1)
29507 .nr(8)
29508 .kr(1)
29509 .sr(4)
29510 .m(1)
29511 .n(8)
29512 .k(k)
29513 .a_stride(43)
29514 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29515 }
29516 }
29517
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,k_div_4_subtile)29518 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, k_div_4_subtile) {
29519 for (size_t k = 8; k <= 40; k += 4) {
29520 for (uint32_t n = 1; n <= 8; n++) {
29521 for (uint32_t m = 1; m <= 1; m++) {
29522 GemmMicrokernelTester()
29523 .mr(1)
29524 .nr(8)
29525 .kr(1)
29526 .sr(4)
29527 .m(m)
29528 .n(n)
29529 .k(k)
29530 .iterations(1)
29531 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29532 }
29533 }
29534 }
29535 }
29536
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,n_gt_8)29537 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, n_gt_8) {
29538 for (uint32_t n = 9; n < 16; n++) {
29539 for (size_t k = 1; k <= 20; k += 5) {
29540 GemmMicrokernelTester()
29541 .mr(1)
29542 .nr(8)
29543 .kr(1)
29544 .sr(4)
29545 .m(1)
29546 .n(n)
29547 .k(k)
29548 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29549 }
29550 }
29551 }
29552
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,n_gt_8_strided_cn)29553 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, n_gt_8_strided_cn) {
29554 for (uint32_t n = 9; n < 16; n++) {
29555 for (size_t k = 1; k <= 20; k += 5) {
29556 GemmMicrokernelTester()
29557 .mr(1)
29558 .nr(8)
29559 .kr(1)
29560 .sr(4)
29561 .m(1)
29562 .n(n)
29563 .k(k)
29564 .cn_stride(11)
29565 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29566 }
29567 }
29568 }
29569
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,n_gt_8_strided_a)29570 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, n_gt_8_strided_a) {
29571 for (uint32_t n = 9; n < 16; n++) {
29572 for (size_t k = 1; k <= 20; k += 5) {
29573 GemmMicrokernelTester()
29574 .mr(1)
29575 .nr(8)
29576 .kr(1)
29577 .sr(4)
29578 .m(1)
29579 .n(n)
29580 .k(k)
29581 .a_stride(23)
29582 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29583 }
29584 }
29585 }
29586
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,n_gt_8_subtile)29587 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, n_gt_8_subtile) {
29588 for (uint32_t n = 9; n < 16; n++) {
29589 for (size_t k = 1; k <= 20; k += 5) {
29590 for (uint32_t m = 1; m <= 1; m++) {
29591 GemmMicrokernelTester()
29592 .mr(1)
29593 .nr(8)
29594 .kr(1)
29595 .sr(4)
29596 .m(m)
29597 .n(n)
29598 .k(k)
29599 .iterations(1)
29600 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29601 }
29602 }
29603 }
29604 }
29605
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,n_div_8)29606 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, n_div_8) {
29607 for (uint32_t n = 16; n <= 24; n += 8) {
29608 for (size_t k = 1; k <= 20; k += 5) {
29609 GemmMicrokernelTester()
29610 .mr(1)
29611 .nr(8)
29612 .kr(1)
29613 .sr(4)
29614 .m(1)
29615 .n(n)
29616 .k(k)
29617 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29618 }
29619 }
29620 }
29621
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,n_div_8_strided_cn)29622 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, n_div_8_strided_cn) {
29623 for (uint32_t n = 16; n <= 24; n += 8) {
29624 for (size_t k = 1; k <= 20; k += 5) {
29625 GemmMicrokernelTester()
29626 .mr(1)
29627 .nr(8)
29628 .kr(1)
29629 .sr(4)
29630 .m(1)
29631 .n(n)
29632 .k(k)
29633 .cn_stride(11)
29634 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29635 }
29636 }
29637 }
29638
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,n_div_8_strided_a)29639 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, n_div_8_strided_a) {
29640 for (uint32_t n = 16; n <= 24; n += 8) {
29641 for (size_t k = 1; k <= 20; k += 5) {
29642 GemmMicrokernelTester()
29643 .mr(1)
29644 .nr(8)
29645 .kr(1)
29646 .sr(4)
29647 .m(1)
29648 .n(n)
29649 .k(k)
29650 .a_stride(23)
29651 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29652 }
29653 }
29654 }
29655
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,n_div_8_subtile)29656 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, n_div_8_subtile) {
29657 for (uint32_t n = 16; n <= 24; n += 8) {
29658 for (size_t k = 1; k <= 20; k += 5) {
29659 for (uint32_t m = 1; m <= 1; m++) {
29660 GemmMicrokernelTester()
29661 .mr(1)
29662 .nr(8)
29663 .kr(1)
29664 .sr(4)
29665 .m(m)
29666 .n(n)
29667 .k(k)
29668 .iterations(1)
29669 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29670 }
29671 }
29672 }
29673 }
29674
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,strided_cm_subtile)29675 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, strided_cm_subtile) {
29676 for (size_t k = 1; k <= 20; k += 5) {
29677 for (uint32_t n = 1; n <= 8; n++) {
29678 for (uint32_t m = 1; m <= 1; m++) {
29679 GemmMicrokernelTester()
29680 .mr(1)
29681 .nr(8)
29682 .kr(1)
29683 .sr(4)
29684 .m(m)
29685 .n(n)
29686 .k(k)
29687 .cm_stride(11)
29688 .iterations(1)
29689 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29690 }
29691 }
29692 }
29693 }
29694
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,qmin)29695 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, qmin) {
29696 GemmMicrokernelTester()
29697 .mr(1)
29698 .nr(8)
29699 .kr(1)
29700 .sr(4)
29701 .m(1)
29702 .n(8)
29703 .k(4)
29704 .qmin(128)
29705 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29706 }
29707
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,qmax)29708 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, qmax) {
29709 GemmMicrokernelTester()
29710 .mr(1)
29711 .nr(8)
29712 .kr(1)
29713 .sr(4)
29714 .m(1)
29715 .n(8)
29716 .k(4)
29717 .qmax(128)
29718 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29719 }
29720
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD,strided_cm)29721 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD, strided_cm) {
29722 GemmMicrokernelTester()
29723 .mr(1)
29724 .nr(8)
29725 .kr(1)
29726 .sr(4)
29727 .m(1)
29728 .n(8)
29729 .k(4)
29730 .cm_stride(11)
29731 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
29732 }
29733 #endif // XNN_ARCH_WASMRELAXEDSIMD
29734
29735
29736 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)29737 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
29738 GemmMicrokernelTester()
29739 .mr(1)
29740 .nr(8)
29741 .kr(1)
29742 .sr(4)
29743 .m(1)
29744 .n(8)
29745 .k(4)
29746 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
29747 }
29748
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,strided_cn)29749 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
29750 GemmMicrokernelTester()
29751 .mr(1)
29752 .nr(8)
29753 .kr(1)
29754 .sr(4)
29755 .m(1)
29756 .n(8)
29757 .k(4)
29758 .cn_stride(11)
29759 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
29760 }
29761
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_strided_a)29762 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_strided_a) {
29763 GemmMicrokernelTester()
29764 .mr(1)
29765 .nr(8)
29766 .kr(1)
29767 .sr(4)
29768 .m(1)
29769 .n(8)
29770 .k(4)
29771 .a_stride(7)
29772 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
29773 }
29774
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)29775 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
29776 for (uint32_t n = 1; n <= 8; n++) {
29777 for (uint32_t m = 1; m <= 1; m++) {
29778 GemmMicrokernelTester()
29779 .mr(1)
29780 .nr(8)
29781 .kr(1)
29782 .sr(4)
29783 .m(m)
29784 .n(n)
29785 .k(4)
29786 .iterations(1)
29787 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
29788 }
29789 }
29790 }
29791
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)29792 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
29793 for (uint32_t m = 1; m <= 1; m++) {
29794 GemmMicrokernelTester()
29795 .mr(1)
29796 .nr(8)
29797 .kr(1)
29798 .sr(4)
29799 .m(m)
29800 .n(8)
29801 .k(4)
29802 .iterations(1)
29803 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
29804 }
29805 }
29806
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)29807 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
29808 for (uint32_t n = 1; n <= 8; n++) {
29809 GemmMicrokernelTester()
29810 .mr(1)
29811 .nr(8)
29812 .kr(1)
29813 .sr(4)
29814 .m(1)
29815 .n(n)
29816 .k(4)
29817 .iterations(1)
29818 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
29819 }
29820 }
29821
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)29822 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
29823 for (size_t k = 1; k < 4; k++) {
29824 GemmMicrokernelTester()
29825 .mr(1)
29826 .nr(8)
29827 .kr(1)
29828 .sr(4)
29829 .m(1)
29830 .n(8)
29831 .k(k)
29832 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
29833 }
29834 }
29835
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_strided_a)29836 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_strided_a) {
29837 for (size_t k = 1; k < 4; k++) {
29838 GemmMicrokernelTester()
29839 .mr(1)
29840 .nr(8)
29841 .kr(1)
29842 .sr(4)
29843 .m(1)
29844 .n(8)
29845 .k(k)
29846 .a_stride(7)
29847 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
29848 }
29849 }
29850
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)29851 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
29852 for (size_t k = 1; k < 4; k++) {
29853 for (uint32_t n = 1; n <= 8; n++) {
29854 for (uint32_t m = 1; m <= 1; m++) {
29855 GemmMicrokernelTester()
29856 .mr(1)
29857 .nr(8)
29858 .kr(1)
29859 .sr(4)
29860 .m(m)
29861 .n(n)
29862 .k(k)
29863 .iterations(1)
29864 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
29865 }
29866 }
29867 }
29868 }
29869
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)29870 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
29871 for (size_t k = 5; k < 8; k++) {
29872 GemmMicrokernelTester()
29873 .mr(1)
29874 .nr(8)
29875 .kr(1)
29876 .sr(4)
29877 .m(1)
29878 .n(8)
29879 .k(k)
29880 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
29881 }
29882 }
29883
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_strided_a)29884 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_strided_a) {
29885 for (size_t k = 5; k < 8; k++) {
29886 GemmMicrokernelTester()
29887 .mr(1)
29888 .nr(8)
29889 .kr(1)
29890 .sr(4)
29891 .m(1)
29892 .n(8)
29893 .k(k)
29894 .a_stride(11)
29895 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
29896 }
29897 }
29898
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)29899 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
29900 for (size_t k = 5; k < 8; k++) {
29901 for (uint32_t n = 1; n <= 8; n++) {
29902 for (uint32_t m = 1; m <= 1; m++) {
29903 GemmMicrokernelTester()
29904 .mr(1)
29905 .nr(8)
29906 .kr(1)
29907 .sr(4)
29908 .m(m)
29909 .n(n)
29910 .k(k)
29911 .iterations(1)
29912 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
29913 }
29914 }
29915 }
29916 }
29917
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,k_div_4)29918 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
29919 for (size_t k = 8; k <= 40; k += 4) {
29920 GemmMicrokernelTester()
29921 .mr(1)
29922 .nr(8)
29923 .kr(1)
29924 .sr(4)
29925 .m(1)
29926 .n(8)
29927 .k(k)
29928 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
29929 }
29930 }
29931
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,k_div_4_strided_a)29932 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, k_div_4_strided_a) {
29933 for (size_t k = 8; k <= 40; k += 4) {
29934 GemmMicrokernelTester()
29935 .mr(1)
29936 .nr(8)
29937 .kr(1)
29938 .sr(4)
29939 .m(1)
29940 .n(8)
29941 .k(k)
29942 .a_stride(43)
29943 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
29944 }
29945 }
29946
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)29947 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
29948 for (size_t k = 8; k <= 40; k += 4) {
29949 for (uint32_t n = 1; n <= 8; n++) {
29950 for (uint32_t m = 1; m <= 1; m++) {
29951 GemmMicrokernelTester()
29952 .mr(1)
29953 .nr(8)
29954 .kr(1)
29955 .sr(4)
29956 .m(m)
29957 .n(n)
29958 .k(k)
29959 .iterations(1)
29960 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
29961 }
29962 }
29963 }
29964 }
29965
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)29966 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
29967 for (uint32_t n = 9; n < 16; n++) {
29968 for (size_t k = 1; k <= 20; k += 5) {
29969 GemmMicrokernelTester()
29970 .mr(1)
29971 .nr(8)
29972 .kr(1)
29973 .sr(4)
29974 .m(1)
29975 .n(n)
29976 .k(k)
29977 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
29978 }
29979 }
29980 }
29981
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)29982 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
29983 for (uint32_t n = 9; n < 16; n++) {
29984 for (size_t k = 1; k <= 20; k += 5) {
29985 GemmMicrokernelTester()
29986 .mr(1)
29987 .nr(8)
29988 .kr(1)
29989 .sr(4)
29990 .m(1)
29991 .n(n)
29992 .k(k)
29993 .cn_stride(11)
29994 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
29995 }
29996 }
29997 }
29998
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_a)29999 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_a) {
30000 for (uint32_t n = 9; n < 16; n++) {
30001 for (size_t k = 1; k <= 20; k += 5) {
30002 GemmMicrokernelTester()
30003 .mr(1)
30004 .nr(8)
30005 .kr(1)
30006 .sr(4)
30007 .m(1)
30008 .n(n)
30009 .k(k)
30010 .a_stride(23)
30011 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
30012 }
30013 }
30014 }
30015
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)30016 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
30017 for (uint32_t n = 9; n < 16; n++) {
30018 for (size_t k = 1; k <= 20; k += 5) {
30019 for (uint32_t m = 1; m <= 1; m++) {
30020 GemmMicrokernelTester()
30021 .mr(1)
30022 .nr(8)
30023 .kr(1)
30024 .sr(4)
30025 .m(m)
30026 .n(n)
30027 .k(k)
30028 .iterations(1)
30029 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
30030 }
30031 }
30032 }
30033 }
30034
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,n_div_8)30035 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
30036 for (uint32_t n = 16; n <= 24; n += 8) {
30037 for (size_t k = 1; k <= 20; k += 5) {
30038 GemmMicrokernelTester()
30039 .mr(1)
30040 .nr(8)
30041 .kr(1)
30042 .sr(4)
30043 .m(1)
30044 .n(n)
30045 .k(k)
30046 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
30047 }
30048 }
30049 }
30050
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)30051 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
30052 for (uint32_t n = 16; n <= 24; n += 8) {
30053 for (size_t k = 1; k <= 20; k += 5) {
30054 GemmMicrokernelTester()
30055 .mr(1)
30056 .nr(8)
30057 .kr(1)
30058 .sr(4)
30059 .m(1)
30060 .n(n)
30061 .k(k)
30062 .cn_stride(11)
30063 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
30064 }
30065 }
30066 }
30067
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_a)30068 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_a) {
30069 for (uint32_t n = 16; n <= 24; n += 8) {
30070 for (size_t k = 1; k <= 20; k += 5) {
30071 GemmMicrokernelTester()
30072 .mr(1)
30073 .nr(8)
30074 .kr(1)
30075 .sr(4)
30076 .m(1)
30077 .n(n)
30078 .k(k)
30079 .a_stride(23)
30080 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
30081 }
30082 }
30083 }
30084
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)30085 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
30086 for (uint32_t n = 16; n <= 24; n += 8) {
30087 for (size_t k = 1; k <= 20; k += 5) {
30088 for (uint32_t m = 1; m <= 1; m++) {
30089 GemmMicrokernelTester()
30090 .mr(1)
30091 .nr(8)
30092 .kr(1)
30093 .sr(4)
30094 .m(m)
30095 .n(n)
30096 .k(k)
30097 .iterations(1)
30098 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
30099 }
30100 }
30101 }
30102 }
30103
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)30104 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
30105 for (size_t k = 1; k <= 20; k += 5) {
30106 for (uint32_t n = 1; n <= 8; n++) {
30107 for (uint32_t m = 1; m <= 1; m++) {
30108 GemmMicrokernelTester()
30109 .mr(1)
30110 .nr(8)
30111 .kr(1)
30112 .sr(4)
30113 .m(m)
30114 .n(n)
30115 .k(k)
30116 .cm_stride(11)
30117 .iterations(1)
30118 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
30119 }
30120 }
30121 }
30122 }
30123
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,qmin)30124 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, qmin) {
30125 GemmMicrokernelTester()
30126 .mr(1)
30127 .nr(8)
30128 .kr(1)
30129 .sr(4)
30130 .m(1)
30131 .n(8)
30132 .k(4)
30133 .qmin(128)
30134 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
30135 }
30136
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,qmax)30137 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, qmax) {
30138 GemmMicrokernelTester()
30139 .mr(1)
30140 .nr(8)
30141 .kr(1)
30142 .sr(4)
30143 .m(1)
30144 .n(8)
30145 .k(4)
30146 .qmax(128)
30147 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
30148 }
30149
TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA,strided_cm)30150 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
30151 GemmMicrokernelTester()
30152 .mr(1)
30153 .nr(8)
30154 .kr(1)
30155 .sr(4)
30156 .m(1)
30157 .n(8)
30158 .k(4)
30159 .cm_stride(11)
30160 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
30161 }
30162 #endif // XNN_ARCH_WASMRELAXEDSIMD
30163
30164
30165 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)30166 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
30167 GemmMicrokernelTester()
30168 .mr(3)
30169 .nr(8)
30170 .kr(1)
30171 .sr(1)
30172 .m(3)
30173 .n(8)
30174 .k(1)
30175 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30176 }
30177
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)30178 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
30179 GemmMicrokernelTester()
30180 .mr(3)
30181 .nr(8)
30182 .kr(1)
30183 .sr(1)
30184 .m(3)
30185 .n(8)
30186 .k(1)
30187 .cn_stride(11)
30188 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30189 }
30190
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_strided_a)30191 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_strided_a) {
30192 GemmMicrokernelTester()
30193 .mr(3)
30194 .nr(8)
30195 .kr(1)
30196 .sr(1)
30197 .m(3)
30198 .n(8)
30199 .k(1)
30200 .a_stride(3)
30201 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30202 }
30203
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)30204 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
30205 for (uint32_t n = 1; n <= 8; n++) {
30206 for (uint32_t m = 1; m <= 3; m++) {
30207 GemmMicrokernelTester()
30208 .mr(3)
30209 .nr(8)
30210 .kr(1)
30211 .sr(1)
30212 .m(m)
30213 .n(n)
30214 .k(1)
30215 .iterations(1)
30216 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30217 }
30218 }
30219 }
30220
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)30221 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
30222 for (uint32_t m = 1; m <= 3; m++) {
30223 GemmMicrokernelTester()
30224 .mr(3)
30225 .nr(8)
30226 .kr(1)
30227 .sr(1)
30228 .m(m)
30229 .n(8)
30230 .k(1)
30231 .iterations(1)
30232 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30233 }
30234 }
30235
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)30236 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
30237 for (uint32_t n = 1; n <= 8; n++) {
30238 GemmMicrokernelTester()
30239 .mr(3)
30240 .nr(8)
30241 .kr(1)
30242 .sr(1)
30243 .m(3)
30244 .n(n)
30245 .k(1)
30246 .iterations(1)
30247 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30248 }
30249 }
30250
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)30251 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
30252 for (size_t k = 2; k < 10; k++) {
30253 GemmMicrokernelTester()
30254 .mr(3)
30255 .nr(8)
30256 .kr(1)
30257 .sr(1)
30258 .m(3)
30259 .n(8)
30260 .k(k)
30261 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30262 }
30263 }
30264
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_strided_a)30265 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_strided_a) {
30266 for (size_t k = 2; k < 10; k++) {
30267 GemmMicrokernelTester()
30268 .mr(3)
30269 .nr(8)
30270 .kr(1)
30271 .sr(1)
30272 .m(3)
30273 .n(8)
30274 .k(k)
30275 .a_stride(11)
30276 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30277 }
30278 }
30279
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)30280 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
30281 for (size_t k = 2; k < 10; k++) {
30282 for (uint32_t n = 1; n <= 8; n++) {
30283 for (uint32_t m = 1; m <= 3; m++) {
30284 GemmMicrokernelTester()
30285 .mr(3)
30286 .nr(8)
30287 .kr(1)
30288 .sr(1)
30289 .m(m)
30290 .n(n)
30291 .k(k)
30292 .iterations(1)
30293 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30294 }
30295 }
30296 }
30297 }
30298
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)30299 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
30300 for (uint32_t n = 9; n < 16; n++) {
30301 for (size_t k = 1; k <= 5; k += 2) {
30302 GemmMicrokernelTester()
30303 .mr(3)
30304 .nr(8)
30305 .kr(1)
30306 .sr(1)
30307 .m(3)
30308 .n(n)
30309 .k(k)
30310 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30311 }
30312 }
30313 }
30314
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)30315 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
30316 for (uint32_t n = 9; n < 16; n++) {
30317 for (size_t k = 1; k <= 5; k += 2) {
30318 GemmMicrokernelTester()
30319 .mr(3)
30320 .nr(8)
30321 .kr(1)
30322 .sr(1)
30323 .m(3)
30324 .n(n)
30325 .k(k)
30326 .cn_stride(11)
30327 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30328 }
30329 }
30330 }
30331
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_a)30332 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_a) {
30333 for (uint32_t n = 9; n < 16; n++) {
30334 for (size_t k = 1; k <= 5; k += 2) {
30335 GemmMicrokernelTester()
30336 .mr(3)
30337 .nr(8)
30338 .kr(1)
30339 .sr(1)
30340 .m(3)
30341 .n(n)
30342 .k(k)
30343 .a_stride(7)
30344 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30345 }
30346 }
30347 }
30348
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)30349 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
30350 for (uint32_t n = 9; n < 16; n++) {
30351 for (size_t k = 1; k <= 5; k += 2) {
30352 for (uint32_t m = 1; m <= 3; m++) {
30353 GemmMicrokernelTester()
30354 .mr(3)
30355 .nr(8)
30356 .kr(1)
30357 .sr(1)
30358 .m(m)
30359 .n(n)
30360 .k(k)
30361 .iterations(1)
30362 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30363 }
30364 }
30365 }
30366 }
30367
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)30368 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
30369 for (uint32_t n = 16; n <= 24; n += 8) {
30370 for (size_t k = 1; k <= 5; k += 2) {
30371 GemmMicrokernelTester()
30372 .mr(3)
30373 .nr(8)
30374 .kr(1)
30375 .sr(1)
30376 .m(3)
30377 .n(n)
30378 .k(k)
30379 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30380 }
30381 }
30382 }
30383
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)30384 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
30385 for (uint32_t n = 16; n <= 24; n += 8) {
30386 for (size_t k = 1; k <= 5; k += 2) {
30387 GemmMicrokernelTester()
30388 .mr(3)
30389 .nr(8)
30390 .kr(1)
30391 .sr(1)
30392 .m(3)
30393 .n(n)
30394 .k(k)
30395 .cn_stride(11)
30396 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30397 }
30398 }
30399 }
30400
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_a)30401 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_a) {
30402 for (uint32_t n = 16; n <= 24; n += 8) {
30403 for (size_t k = 1; k <= 5; k += 2) {
30404 GemmMicrokernelTester()
30405 .mr(3)
30406 .nr(8)
30407 .kr(1)
30408 .sr(1)
30409 .m(3)
30410 .n(n)
30411 .k(k)
30412 .a_stride(7)
30413 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30414 }
30415 }
30416 }
30417
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)30418 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
30419 for (uint32_t n = 16; n <= 24; n += 8) {
30420 for (size_t k = 1; k <= 5; k += 2) {
30421 for (uint32_t m = 1; m <= 3; m++) {
30422 GemmMicrokernelTester()
30423 .mr(3)
30424 .nr(8)
30425 .kr(1)
30426 .sr(1)
30427 .m(m)
30428 .n(n)
30429 .k(k)
30430 .iterations(1)
30431 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30432 }
30433 }
30434 }
30435 }
30436
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)30437 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
30438 for (size_t k = 1; k <= 5; k += 2) {
30439 for (uint32_t n = 1; n <= 8; n++) {
30440 for (uint32_t m = 1; m <= 3; m++) {
30441 GemmMicrokernelTester()
30442 .mr(3)
30443 .nr(8)
30444 .kr(1)
30445 .sr(1)
30446 .m(m)
30447 .n(n)
30448 .k(k)
30449 .cm_stride(11)
30450 .iterations(1)
30451 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30452 }
30453 }
30454 }
30455 }
30456
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,qmin)30457 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, qmin) {
30458 GemmMicrokernelTester()
30459 .mr(3)
30460 .nr(8)
30461 .kr(1)
30462 .sr(1)
30463 .m(3)
30464 .n(8)
30465 .k(1)
30466 .qmin(128)
30467 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30468 }
30469
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,qmax)30470 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, qmax) {
30471 GemmMicrokernelTester()
30472 .mr(3)
30473 .nr(8)
30474 .kr(1)
30475 .sr(1)
30476 .m(3)
30477 .n(8)
30478 .k(1)
30479 .qmax(128)
30480 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30481 }
30482
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)30483 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
30484 GemmMicrokernelTester()
30485 .mr(3)
30486 .nr(8)
30487 .kr(1)
30488 .sr(1)
30489 .m(3)
30490 .n(8)
30491 .k(1)
30492 .cm_stride(11)
30493 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30494 }
30495 #endif // XNN_ARCH_WASMRELAXEDSIMD
30496
30497
30498 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)30499 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
30500 GemmMicrokernelTester()
30501 .mr(3)
30502 .nr(8)
30503 .kr(1)
30504 .sr(1)
30505 .m(3)
30506 .n(8)
30507 .k(4)
30508 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30509 }
30510
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)30511 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
30512 GemmMicrokernelTester()
30513 .mr(3)
30514 .nr(8)
30515 .kr(1)
30516 .sr(1)
30517 .m(3)
30518 .n(8)
30519 .k(4)
30520 .cn_stride(11)
30521 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30522 }
30523
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_strided_a)30524 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_strided_a) {
30525 GemmMicrokernelTester()
30526 .mr(3)
30527 .nr(8)
30528 .kr(1)
30529 .sr(1)
30530 .m(3)
30531 .n(8)
30532 .k(4)
30533 .a_stride(7)
30534 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30535 }
30536
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)30537 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
30538 for (uint32_t n = 1; n <= 8; n++) {
30539 for (uint32_t m = 1; m <= 3; m++) {
30540 GemmMicrokernelTester()
30541 .mr(3)
30542 .nr(8)
30543 .kr(1)
30544 .sr(1)
30545 .m(m)
30546 .n(n)
30547 .k(4)
30548 .iterations(1)
30549 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30550 }
30551 }
30552 }
30553
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)30554 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
30555 for (uint32_t m = 1; m <= 3; m++) {
30556 GemmMicrokernelTester()
30557 .mr(3)
30558 .nr(8)
30559 .kr(1)
30560 .sr(1)
30561 .m(m)
30562 .n(8)
30563 .k(4)
30564 .iterations(1)
30565 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30566 }
30567 }
30568
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)30569 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
30570 for (uint32_t n = 1; n <= 8; n++) {
30571 GemmMicrokernelTester()
30572 .mr(3)
30573 .nr(8)
30574 .kr(1)
30575 .sr(1)
30576 .m(3)
30577 .n(n)
30578 .k(4)
30579 .iterations(1)
30580 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30581 }
30582 }
30583
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)30584 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
30585 for (size_t k = 1; k < 4; k++) {
30586 GemmMicrokernelTester()
30587 .mr(3)
30588 .nr(8)
30589 .kr(1)
30590 .sr(1)
30591 .m(3)
30592 .n(8)
30593 .k(k)
30594 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30595 }
30596 }
30597
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_strided_a)30598 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_strided_a) {
30599 for (size_t k = 1; k < 4; k++) {
30600 GemmMicrokernelTester()
30601 .mr(3)
30602 .nr(8)
30603 .kr(1)
30604 .sr(1)
30605 .m(3)
30606 .n(8)
30607 .k(k)
30608 .a_stride(7)
30609 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30610 }
30611 }
30612
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)30613 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
30614 for (size_t k = 1; k < 4; k++) {
30615 for (uint32_t n = 1; n <= 8; n++) {
30616 for (uint32_t m = 1; m <= 3; m++) {
30617 GemmMicrokernelTester()
30618 .mr(3)
30619 .nr(8)
30620 .kr(1)
30621 .sr(1)
30622 .m(m)
30623 .n(n)
30624 .k(k)
30625 .iterations(1)
30626 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30627 }
30628 }
30629 }
30630 }
30631
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)30632 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
30633 for (size_t k = 5; k < 8; k++) {
30634 GemmMicrokernelTester()
30635 .mr(3)
30636 .nr(8)
30637 .kr(1)
30638 .sr(1)
30639 .m(3)
30640 .n(8)
30641 .k(k)
30642 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30643 }
30644 }
30645
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_strided_a)30646 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_strided_a) {
30647 for (size_t k = 5; k < 8; k++) {
30648 GemmMicrokernelTester()
30649 .mr(3)
30650 .nr(8)
30651 .kr(1)
30652 .sr(1)
30653 .m(3)
30654 .n(8)
30655 .k(k)
30656 .a_stride(11)
30657 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30658 }
30659 }
30660
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)30661 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
30662 for (size_t k = 5; k < 8; k++) {
30663 for (uint32_t n = 1; n <= 8; n++) {
30664 for (uint32_t m = 1; m <= 3; m++) {
30665 GemmMicrokernelTester()
30666 .mr(3)
30667 .nr(8)
30668 .kr(1)
30669 .sr(1)
30670 .m(m)
30671 .n(n)
30672 .k(k)
30673 .iterations(1)
30674 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30675 }
30676 }
30677 }
30678 }
30679
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)30680 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
30681 for (size_t k = 8; k <= 40; k += 4) {
30682 GemmMicrokernelTester()
30683 .mr(3)
30684 .nr(8)
30685 .kr(1)
30686 .sr(1)
30687 .m(3)
30688 .n(8)
30689 .k(k)
30690 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30691 }
30692 }
30693
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_strided_a)30694 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_strided_a) {
30695 for (size_t k = 8; k <= 40; k += 4) {
30696 GemmMicrokernelTester()
30697 .mr(3)
30698 .nr(8)
30699 .kr(1)
30700 .sr(1)
30701 .m(3)
30702 .n(8)
30703 .k(k)
30704 .a_stride(43)
30705 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30706 }
30707 }
30708
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)30709 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
30710 for (size_t k = 8; k <= 40; k += 4) {
30711 for (uint32_t n = 1; n <= 8; n++) {
30712 for (uint32_t m = 1; m <= 3; m++) {
30713 GemmMicrokernelTester()
30714 .mr(3)
30715 .nr(8)
30716 .kr(1)
30717 .sr(1)
30718 .m(m)
30719 .n(n)
30720 .k(k)
30721 .iterations(1)
30722 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30723 }
30724 }
30725 }
30726 }
30727
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)30728 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
30729 for (uint32_t n = 9; n < 16; n++) {
30730 for (size_t k = 1; k <= 20; k += 5) {
30731 GemmMicrokernelTester()
30732 .mr(3)
30733 .nr(8)
30734 .kr(1)
30735 .sr(1)
30736 .m(3)
30737 .n(n)
30738 .k(k)
30739 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30740 }
30741 }
30742 }
30743
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)30744 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
30745 for (uint32_t n = 9; n < 16; n++) {
30746 for (size_t k = 1; k <= 20; k += 5) {
30747 GemmMicrokernelTester()
30748 .mr(3)
30749 .nr(8)
30750 .kr(1)
30751 .sr(1)
30752 .m(3)
30753 .n(n)
30754 .k(k)
30755 .cn_stride(11)
30756 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30757 }
30758 }
30759 }
30760
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_a)30761 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_a) {
30762 for (uint32_t n = 9; n < 16; n++) {
30763 for (size_t k = 1; k <= 20; k += 5) {
30764 GemmMicrokernelTester()
30765 .mr(3)
30766 .nr(8)
30767 .kr(1)
30768 .sr(1)
30769 .m(3)
30770 .n(n)
30771 .k(k)
30772 .a_stride(23)
30773 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30774 }
30775 }
30776 }
30777
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)30778 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
30779 for (uint32_t n = 9; n < 16; n++) {
30780 for (size_t k = 1; k <= 20; k += 5) {
30781 for (uint32_t m = 1; m <= 3; m++) {
30782 GemmMicrokernelTester()
30783 .mr(3)
30784 .nr(8)
30785 .kr(1)
30786 .sr(1)
30787 .m(m)
30788 .n(n)
30789 .k(k)
30790 .iterations(1)
30791 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30792 }
30793 }
30794 }
30795 }
30796
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)30797 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
30798 for (uint32_t n = 16; n <= 24; n += 8) {
30799 for (size_t k = 1; k <= 20; k += 5) {
30800 GemmMicrokernelTester()
30801 .mr(3)
30802 .nr(8)
30803 .kr(1)
30804 .sr(1)
30805 .m(3)
30806 .n(n)
30807 .k(k)
30808 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30809 }
30810 }
30811 }
30812
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)30813 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
30814 for (uint32_t n = 16; n <= 24; n += 8) {
30815 for (size_t k = 1; k <= 20; k += 5) {
30816 GemmMicrokernelTester()
30817 .mr(3)
30818 .nr(8)
30819 .kr(1)
30820 .sr(1)
30821 .m(3)
30822 .n(n)
30823 .k(k)
30824 .cn_stride(11)
30825 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30826 }
30827 }
30828 }
30829
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_a)30830 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_a) {
30831 for (uint32_t n = 16; n <= 24; n += 8) {
30832 for (size_t k = 1; k <= 20; k += 5) {
30833 GemmMicrokernelTester()
30834 .mr(3)
30835 .nr(8)
30836 .kr(1)
30837 .sr(1)
30838 .m(3)
30839 .n(n)
30840 .k(k)
30841 .a_stride(23)
30842 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30843 }
30844 }
30845 }
30846
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)30847 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
30848 for (uint32_t n = 16; n <= 24; n += 8) {
30849 for (size_t k = 1; k <= 20; k += 5) {
30850 for (uint32_t m = 1; m <= 3; m++) {
30851 GemmMicrokernelTester()
30852 .mr(3)
30853 .nr(8)
30854 .kr(1)
30855 .sr(1)
30856 .m(m)
30857 .n(n)
30858 .k(k)
30859 .iterations(1)
30860 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30861 }
30862 }
30863 }
30864 }
30865
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)30866 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
30867 for (size_t k = 1; k <= 20; k += 5) {
30868 for (uint32_t n = 1; n <= 8; n++) {
30869 for (uint32_t m = 1; m <= 3; m++) {
30870 GemmMicrokernelTester()
30871 .mr(3)
30872 .nr(8)
30873 .kr(1)
30874 .sr(1)
30875 .m(m)
30876 .n(n)
30877 .k(k)
30878 .cm_stride(11)
30879 .iterations(1)
30880 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30881 }
30882 }
30883 }
30884 }
30885
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,qmin)30886 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, qmin) {
30887 GemmMicrokernelTester()
30888 .mr(3)
30889 .nr(8)
30890 .kr(1)
30891 .sr(1)
30892 .m(3)
30893 .n(8)
30894 .k(4)
30895 .qmin(128)
30896 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30897 }
30898
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,qmax)30899 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, qmax) {
30900 GemmMicrokernelTester()
30901 .mr(3)
30902 .nr(8)
30903 .kr(1)
30904 .sr(1)
30905 .m(3)
30906 .n(8)
30907 .k(4)
30908 .qmax(128)
30909 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30910 }
30911
TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)30912 TEST(F32_GEMMINC_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
30913 GemmMicrokernelTester()
30914 .mr(3)
30915 .nr(8)
30916 .kr(1)
30917 .sr(1)
30918 .m(3)
30919 .n(8)
30920 .k(4)
30921 .cm_stride(11)
30922 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
30923 }
30924 #endif // XNN_ARCH_WASMRELAXEDSIMD
30925
30926
30927 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,k_eq_4)30928 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, k_eq_4) {
30929 GemmMicrokernelTester()
30930 .mr(3)
30931 .nr(8)
30932 .kr(1)
30933 .sr(4)
30934 .m(3)
30935 .n(8)
30936 .k(4)
30937 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
30938 }
30939
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,strided_cn)30940 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, strided_cn) {
30941 GemmMicrokernelTester()
30942 .mr(3)
30943 .nr(8)
30944 .kr(1)
30945 .sr(4)
30946 .m(3)
30947 .n(8)
30948 .k(4)
30949 .cn_stride(11)
30950 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
30951 }
30952
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,k_eq_4_strided_a)30953 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, k_eq_4_strided_a) {
30954 GemmMicrokernelTester()
30955 .mr(3)
30956 .nr(8)
30957 .kr(1)
30958 .sr(4)
30959 .m(3)
30960 .n(8)
30961 .k(4)
30962 .a_stride(7)
30963 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
30964 }
30965
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,k_eq_4_subtile)30966 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, k_eq_4_subtile) {
30967 for (uint32_t n = 1; n <= 8; n++) {
30968 for (uint32_t m = 1; m <= 3; m++) {
30969 GemmMicrokernelTester()
30970 .mr(3)
30971 .nr(8)
30972 .kr(1)
30973 .sr(4)
30974 .m(m)
30975 .n(n)
30976 .k(4)
30977 .iterations(1)
30978 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
30979 }
30980 }
30981 }
30982
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,k_eq_4_subtile_m)30983 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, k_eq_4_subtile_m) {
30984 for (uint32_t m = 1; m <= 3; m++) {
30985 GemmMicrokernelTester()
30986 .mr(3)
30987 .nr(8)
30988 .kr(1)
30989 .sr(4)
30990 .m(m)
30991 .n(8)
30992 .k(4)
30993 .iterations(1)
30994 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
30995 }
30996 }
30997
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,k_eq_4_subtile_n)30998 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, k_eq_4_subtile_n) {
30999 for (uint32_t n = 1; n <= 8; n++) {
31000 GemmMicrokernelTester()
31001 .mr(3)
31002 .nr(8)
31003 .kr(1)
31004 .sr(4)
31005 .m(3)
31006 .n(n)
31007 .k(4)
31008 .iterations(1)
31009 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
31010 }
31011 }
31012
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,k_lt_4)31013 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, k_lt_4) {
31014 for (size_t k = 1; k < 4; k++) {
31015 GemmMicrokernelTester()
31016 .mr(3)
31017 .nr(8)
31018 .kr(1)
31019 .sr(4)
31020 .m(3)
31021 .n(8)
31022 .k(k)
31023 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
31024 }
31025 }
31026
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,k_lt_4_strided_a)31027 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, k_lt_4_strided_a) {
31028 for (size_t k = 1; k < 4; k++) {
31029 GemmMicrokernelTester()
31030 .mr(3)
31031 .nr(8)
31032 .kr(1)
31033 .sr(4)
31034 .m(3)
31035 .n(8)
31036 .k(k)
31037 .a_stride(7)
31038 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
31039 }
31040 }
31041
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,k_lt_4_subtile)31042 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, k_lt_4_subtile) {
31043 for (size_t k = 1; k < 4; k++) {
31044 for (uint32_t n = 1; n <= 8; n++) {
31045 for (uint32_t m = 1; m <= 3; m++) {
31046 GemmMicrokernelTester()
31047 .mr(3)
31048 .nr(8)
31049 .kr(1)
31050 .sr(4)
31051 .m(m)
31052 .n(n)
31053 .k(k)
31054 .iterations(1)
31055 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
31056 }
31057 }
31058 }
31059 }
31060
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,k_gt_4)31061 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, k_gt_4) {
31062 for (size_t k = 5; k < 8; k++) {
31063 GemmMicrokernelTester()
31064 .mr(3)
31065 .nr(8)
31066 .kr(1)
31067 .sr(4)
31068 .m(3)
31069 .n(8)
31070 .k(k)
31071 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
31072 }
31073 }
31074
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,k_gt_4_strided_a)31075 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, k_gt_4_strided_a) {
31076 for (size_t k = 5; k < 8; k++) {
31077 GemmMicrokernelTester()
31078 .mr(3)
31079 .nr(8)
31080 .kr(1)
31081 .sr(4)
31082 .m(3)
31083 .n(8)
31084 .k(k)
31085 .a_stride(11)
31086 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
31087 }
31088 }
31089
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,k_gt_4_subtile)31090 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, k_gt_4_subtile) {
31091 for (size_t k = 5; k < 8; k++) {
31092 for (uint32_t n = 1; n <= 8; n++) {
31093 for (uint32_t m = 1; m <= 3; m++) {
31094 GemmMicrokernelTester()
31095 .mr(3)
31096 .nr(8)
31097 .kr(1)
31098 .sr(4)
31099 .m(m)
31100 .n(n)
31101 .k(k)
31102 .iterations(1)
31103 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
31104 }
31105 }
31106 }
31107 }
31108
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,k_div_4)31109 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, k_div_4) {
31110 for (size_t k = 8; k <= 40; k += 4) {
31111 GemmMicrokernelTester()
31112 .mr(3)
31113 .nr(8)
31114 .kr(1)
31115 .sr(4)
31116 .m(3)
31117 .n(8)
31118 .k(k)
31119 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
31120 }
31121 }
31122
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,k_div_4_strided_a)31123 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, k_div_4_strided_a) {
31124 for (size_t k = 8; k <= 40; k += 4) {
31125 GemmMicrokernelTester()
31126 .mr(3)
31127 .nr(8)
31128 .kr(1)
31129 .sr(4)
31130 .m(3)
31131 .n(8)
31132 .k(k)
31133 .a_stride(43)
31134 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
31135 }
31136 }
31137
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,k_div_4_subtile)31138 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, k_div_4_subtile) {
31139 for (size_t k = 8; k <= 40; k += 4) {
31140 for (uint32_t n = 1; n <= 8; n++) {
31141 for (uint32_t m = 1; m <= 3; m++) {
31142 GemmMicrokernelTester()
31143 .mr(3)
31144 .nr(8)
31145 .kr(1)
31146 .sr(4)
31147 .m(m)
31148 .n(n)
31149 .k(k)
31150 .iterations(1)
31151 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
31152 }
31153 }
31154 }
31155 }
31156
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,n_gt_8)31157 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, n_gt_8) {
31158 for (uint32_t n = 9; n < 16; n++) {
31159 for (size_t k = 1; k <= 20; k += 5) {
31160 GemmMicrokernelTester()
31161 .mr(3)
31162 .nr(8)
31163 .kr(1)
31164 .sr(4)
31165 .m(3)
31166 .n(n)
31167 .k(k)
31168 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
31169 }
31170 }
31171 }
31172
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,n_gt_8_strided_cn)31173 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, n_gt_8_strided_cn) {
31174 for (uint32_t n = 9; n < 16; n++) {
31175 for (size_t k = 1; k <= 20; k += 5) {
31176 GemmMicrokernelTester()
31177 .mr(3)
31178 .nr(8)
31179 .kr(1)
31180 .sr(4)
31181 .m(3)
31182 .n(n)
31183 .k(k)
31184 .cn_stride(11)
31185 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
31186 }
31187 }
31188 }
31189
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,n_gt_8_strided_a)31190 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, n_gt_8_strided_a) {
31191 for (uint32_t n = 9; n < 16; n++) {
31192 for (size_t k = 1; k <= 20; k += 5) {
31193 GemmMicrokernelTester()
31194 .mr(3)
31195 .nr(8)
31196 .kr(1)
31197 .sr(4)
31198 .m(3)
31199 .n(n)
31200 .k(k)
31201 .a_stride(23)
31202 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
31203 }
31204 }
31205 }
31206
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,n_gt_8_subtile)31207 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, n_gt_8_subtile) {
31208 for (uint32_t n = 9; n < 16; n++) {
31209 for (size_t k = 1; k <= 20; k += 5) {
31210 for (uint32_t m = 1; m <= 3; m++) {
31211 GemmMicrokernelTester()
31212 .mr(3)
31213 .nr(8)
31214 .kr(1)
31215 .sr(4)
31216 .m(m)
31217 .n(n)
31218 .k(k)
31219 .iterations(1)
31220 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
31221 }
31222 }
31223 }
31224 }
31225
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,n_div_8)31226 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, n_div_8) {
31227 for (uint32_t n = 16; n <= 24; n += 8) {
31228 for (size_t k = 1; k <= 20; k += 5) {
31229 GemmMicrokernelTester()
31230 .mr(3)
31231 .nr(8)
31232 .kr(1)
31233 .sr(4)
31234 .m(3)
31235 .n(n)
31236 .k(k)
31237 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
31238 }
31239 }
31240 }
31241
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,n_div_8_strided_cn)31242 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, n_div_8_strided_cn) {
31243 for (uint32_t n = 16; n <= 24; n += 8) {
31244 for (size_t k = 1; k <= 20; k += 5) {
31245 GemmMicrokernelTester()
31246 .mr(3)
31247 .nr(8)
31248 .kr(1)
31249 .sr(4)
31250 .m(3)
31251 .n(n)
31252 .k(k)
31253 .cn_stride(11)
31254 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
31255 }
31256 }
31257 }
31258
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,n_div_8_strided_a)31259 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, n_div_8_strided_a) {
31260 for (uint32_t n = 16; n <= 24; n += 8) {
31261 for (size_t k = 1; k <= 20; k += 5) {
31262 GemmMicrokernelTester()
31263 .mr(3)
31264 .nr(8)
31265 .kr(1)
31266 .sr(4)
31267 .m(3)
31268 .n(n)
31269 .k(k)
31270 .a_stride(23)
31271 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
31272 }
31273 }
31274 }
31275
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,n_div_8_subtile)31276 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, n_div_8_subtile) {
31277 for (uint32_t n = 16; n <= 24; n += 8) {
31278 for (size_t k = 1; k <= 20; k += 5) {
31279 for (uint32_t m = 1; m <= 3; m++) {
31280 GemmMicrokernelTester()
31281 .mr(3)
31282 .nr(8)
31283 .kr(1)
31284 .sr(4)
31285 .m(m)
31286 .n(n)
31287 .k(k)
31288 .iterations(1)
31289 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
31290 }
31291 }
31292 }
31293 }
31294
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,strided_cm_subtile)31295 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, strided_cm_subtile) {
31296 for (size_t k = 1; k <= 20; k += 5) {
31297 for (uint32_t n = 1; n <= 8; n++) {
31298 for (uint32_t m = 1; m <= 3; m++) {
31299 GemmMicrokernelTester()
31300 .mr(3)
31301 .nr(8)
31302 .kr(1)
31303 .sr(4)
31304 .m(m)
31305 .n(n)
31306 .k(k)
31307 .cm_stride(11)
31308 .iterations(1)
31309 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
31310 }
31311 }
31312 }
31313 }
31314
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,qmin)31315 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, qmin) {
31316 GemmMicrokernelTester()
31317 .mr(3)
31318 .nr(8)
31319 .kr(1)
31320 .sr(4)
31321 .m(3)
31322 .n(8)
31323 .k(4)
31324 .qmin(128)
31325 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
31326 }
31327
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,qmax)31328 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, qmax) {
31329 GemmMicrokernelTester()
31330 .mr(3)
31331 .nr(8)
31332 .kr(1)
31333 .sr(4)
31334 .m(3)
31335 .n(8)
31336 .k(4)
31337 .qmax(128)
31338 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
31339 }
31340
TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD,strided_cm)31341 TEST(F32_GEMMINC_MINMAX_3X8S4__WASMRELAXEDSIMD, strided_cm) {
31342 GemmMicrokernelTester()
31343 .mr(3)
31344 .nr(8)
31345 .kr(1)
31346 .sr(4)
31347 .m(3)
31348 .n(8)
31349 .k(4)
31350 .cm_stride(11)
31351 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
31352 }
31353 #endif // XNN_ARCH_WASMRELAXEDSIMD
31354
31355
31356 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)31357 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
31358 GemmMicrokernelTester()
31359 .mr(4)
31360 .nr(8)
31361 .kr(1)
31362 .sr(1)
31363 .m(4)
31364 .n(8)
31365 .k(1)
31366 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31367 }
31368
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)31369 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
31370 GemmMicrokernelTester()
31371 .mr(4)
31372 .nr(8)
31373 .kr(1)
31374 .sr(1)
31375 .m(4)
31376 .n(8)
31377 .k(1)
31378 .cn_stride(11)
31379 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31380 }
31381
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_strided_a)31382 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_strided_a) {
31383 GemmMicrokernelTester()
31384 .mr(4)
31385 .nr(8)
31386 .kr(1)
31387 .sr(1)
31388 .m(4)
31389 .n(8)
31390 .k(1)
31391 .a_stride(3)
31392 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31393 }
31394
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)31395 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
31396 for (uint32_t n = 1; n <= 8; n++) {
31397 for (uint32_t m = 1; m <= 4; m++) {
31398 GemmMicrokernelTester()
31399 .mr(4)
31400 .nr(8)
31401 .kr(1)
31402 .sr(1)
31403 .m(m)
31404 .n(n)
31405 .k(1)
31406 .iterations(1)
31407 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31408 }
31409 }
31410 }
31411
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)31412 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
31413 for (uint32_t m = 1; m <= 4; m++) {
31414 GemmMicrokernelTester()
31415 .mr(4)
31416 .nr(8)
31417 .kr(1)
31418 .sr(1)
31419 .m(m)
31420 .n(8)
31421 .k(1)
31422 .iterations(1)
31423 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31424 }
31425 }
31426
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)31427 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
31428 for (uint32_t n = 1; n <= 8; n++) {
31429 GemmMicrokernelTester()
31430 .mr(4)
31431 .nr(8)
31432 .kr(1)
31433 .sr(1)
31434 .m(4)
31435 .n(n)
31436 .k(1)
31437 .iterations(1)
31438 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31439 }
31440 }
31441
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)31442 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
31443 for (size_t k = 2; k < 10; k++) {
31444 GemmMicrokernelTester()
31445 .mr(4)
31446 .nr(8)
31447 .kr(1)
31448 .sr(1)
31449 .m(4)
31450 .n(8)
31451 .k(k)
31452 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31453 }
31454 }
31455
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_strided_a)31456 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_strided_a) {
31457 for (size_t k = 2; k < 10; k++) {
31458 GemmMicrokernelTester()
31459 .mr(4)
31460 .nr(8)
31461 .kr(1)
31462 .sr(1)
31463 .m(4)
31464 .n(8)
31465 .k(k)
31466 .a_stride(11)
31467 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31468 }
31469 }
31470
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)31471 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
31472 for (size_t k = 2; k < 10; k++) {
31473 for (uint32_t n = 1; n <= 8; n++) {
31474 for (uint32_t m = 1; m <= 4; m++) {
31475 GemmMicrokernelTester()
31476 .mr(4)
31477 .nr(8)
31478 .kr(1)
31479 .sr(1)
31480 .m(m)
31481 .n(n)
31482 .k(k)
31483 .iterations(1)
31484 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31485 }
31486 }
31487 }
31488 }
31489
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)31490 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
31491 for (uint32_t n = 9; n < 16; n++) {
31492 for (size_t k = 1; k <= 5; k += 2) {
31493 GemmMicrokernelTester()
31494 .mr(4)
31495 .nr(8)
31496 .kr(1)
31497 .sr(1)
31498 .m(4)
31499 .n(n)
31500 .k(k)
31501 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31502 }
31503 }
31504 }
31505
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)31506 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
31507 for (uint32_t n = 9; n < 16; n++) {
31508 for (size_t k = 1; k <= 5; k += 2) {
31509 GemmMicrokernelTester()
31510 .mr(4)
31511 .nr(8)
31512 .kr(1)
31513 .sr(1)
31514 .m(4)
31515 .n(n)
31516 .k(k)
31517 .cn_stride(11)
31518 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31519 }
31520 }
31521 }
31522
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_a)31523 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_a) {
31524 for (uint32_t n = 9; n < 16; n++) {
31525 for (size_t k = 1; k <= 5; k += 2) {
31526 GemmMicrokernelTester()
31527 .mr(4)
31528 .nr(8)
31529 .kr(1)
31530 .sr(1)
31531 .m(4)
31532 .n(n)
31533 .k(k)
31534 .a_stride(7)
31535 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31536 }
31537 }
31538 }
31539
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)31540 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
31541 for (uint32_t n = 9; n < 16; n++) {
31542 for (size_t k = 1; k <= 5; k += 2) {
31543 for (uint32_t m = 1; m <= 4; m++) {
31544 GemmMicrokernelTester()
31545 .mr(4)
31546 .nr(8)
31547 .kr(1)
31548 .sr(1)
31549 .m(m)
31550 .n(n)
31551 .k(k)
31552 .iterations(1)
31553 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31554 }
31555 }
31556 }
31557 }
31558
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)31559 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
31560 for (uint32_t n = 16; n <= 24; n += 8) {
31561 for (size_t k = 1; k <= 5; k += 2) {
31562 GemmMicrokernelTester()
31563 .mr(4)
31564 .nr(8)
31565 .kr(1)
31566 .sr(1)
31567 .m(4)
31568 .n(n)
31569 .k(k)
31570 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31571 }
31572 }
31573 }
31574
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)31575 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
31576 for (uint32_t n = 16; n <= 24; n += 8) {
31577 for (size_t k = 1; k <= 5; k += 2) {
31578 GemmMicrokernelTester()
31579 .mr(4)
31580 .nr(8)
31581 .kr(1)
31582 .sr(1)
31583 .m(4)
31584 .n(n)
31585 .k(k)
31586 .cn_stride(11)
31587 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31588 }
31589 }
31590 }
31591
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_a)31592 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_a) {
31593 for (uint32_t n = 16; n <= 24; n += 8) {
31594 for (size_t k = 1; k <= 5; k += 2) {
31595 GemmMicrokernelTester()
31596 .mr(4)
31597 .nr(8)
31598 .kr(1)
31599 .sr(1)
31600 .m(4)
31601 .n(n)
31602 .k(k)
31603 .a_stride(7)
31604 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31605 }
31606 }
31607 }
31608
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)31609 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
31610 for (uint32_t n = 16; n <= 24; n += 8) {
31611 for (size_t k = 1; k <= 5; k += 2) {
31612 for (uint32_t m = 1; m <= 4; m++) {
31613 GemmMicrokernelTester()
31614 .mr(4)
31615 .nr(8)
31616 .kr(1)
31617 .sr(1)
31618 .m(m)
31619 .n(n)
31620 .k(k)
31621 .iterations(1)
31622 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31623 }
31624 }
31625 }
31626 }
31627
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)31628 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
31629 for (size_t k = 1; k <= 5; k += 2) {
31630 for (uint32_t n = 1; n <= 8; n++) {
31631 for (uint32_t m = 1; m <= 4; m++) {
31632 GemmMicrokernelTester()
31633 .mr(4)
31634 .nr(8)
31635 .kr(1)
31636 .sr(1)
31637 .m(m)
31638 .n(n)
31639 .k(k)
31640 .cm_stride(11)
31641 .iterations(1)
31642 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31643 }
31644 }
31645 }
31646 }
31647
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,qmin)31648 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, qmin) {
31649 GemmMicrokernelTester()
31650 .mr(4)
31651 .nr(8)
31652 .kr(1)
31653 .sr(1)
31654 .m(4)
31655 .n(8)
31656 .k(1)
31657 .qmin(128)
31658 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31659 }
31660
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,qmax)31661 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, qmax) {
31662 GemmMicrokernelTester()
31663 .mr(4)
31664 .nr(8)
31665 .kr(1)
31666 .sr(1)
31667 .m(4)
31668 .n(8)
31669 .k(1)
31670 .qmax(128)
31671 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31672 }
31673
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)31674 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
31675 GemmMicrokernelTester()
31676 .mr(4)
31677 .nr(8)
31678 .kr(1)
31679 .sr(1)
31680 .m(4)
31681 .n(8)
31682 .k(1)
31683 .cm_stride(11)
31684 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31685 }
31686 #endif // XNN_ARCH_WASMRELAXEDSIMD
31687
31688
31689 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)31690 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
31691 GemmMicrokernelTester()
31692 .mr(4)
31693 .nr(8)
31694 .kr(1)
31695 .sr(1)
31696 .m(4)
31697 .n(8)
31698 .k(4)
31699 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31700 }
31701
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)31702 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
31703 GemmMicrokernelTester()
31704 .mr(4)
31705 .nr(8)
31706 .kr(1)
31707 .sr(1)
31708 .m(4)
31709 .n(8)
31710 .k(4)
31711 .cn_stride(11)
31712 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31713 }
31714
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_strided_a)31715 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_strided_a) {
31716 GemmMicrokernelTester()
31717 .mr(4)
31718 .nr(8)
31719 .kr(1)
31720 .sr(1)
31721 .m(4)
31722 .n(8)
31723 .k(4)
31724 .a_stride(7)
31725 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31726 }
31727
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)31728 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
31729 for (uint32_t n = 1; n <= 8; n++) {
31730 for (uint32_t m = 1; m <= 4; m++) {
31731 GemmMicrokernelTester()
31732 .mr(4)
31733 .nr(8)
31734 .kr(1)
31735 .sr(1)
31736 .m(m)
31737 .n(n)
31738 .k(4)
31739 .iterations(1)
31740 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31741 }
31742 }
31743 }
31744
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)31745 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
31746 for (uint32_t m = 1; m <= 4; m++) {
31747 GemmMicrokernelTester()
31748 .mr(4)
31749 .nr(8)
31750 .kr(1)
31751 .sr(1)
31752 .m(m)
31753 .n(8)
31754 .k(4)
31755 .iterations(1)
31756 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31757 }
31758 }
31759
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)31760 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
31761 for (uint32_t n = 1; n <= 8; n++) {
31762 GemmMicrokernelTester()
31763 .mr(4)
31764 .nr(8)
31765 .kr(1)
31766 .sr(1)
31767 .m(4)
31768 .n(n)
31769 .k(4)
31770 .iterations(1)
31771 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31772 }
31773 }
31774
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)31775 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
31776 for (size_t k = 1; k < 4; k++) {
31777 GemmMicrokernelTester()
31778 .mr(4)
31779 .nr(8)
31780 .kr(1)
31781 .sr(1)
31782 .m(4)
31783 .n(8)
31784 .k(k)
31785 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31786 }
31787 }
31788
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_strided_a)31789 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_strided_a) {
31790 for (size_t k = 1; k < 4; k++) {
31791 GemmMicrokernelTester()
31792 .mr(4)
31793 .nr(8)
31794 .kr(1)
31795 .sr(1)
31796 .m(4)
31797 .n(8)
31798 .k(k)
31799 .a_stride(7)
31800 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31801 }
31802 }
31803
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)31804 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
31805 for (size_t k = 1; k < 4; k++) {
31806 for (uint32_t n = 1; n <= 8; n++) {
31807 for (uint32_t m = 1; m <= 4; m++) {
31808 GemmMicrokernelTester()
31809 .mr(4)
31810 .nr(8)
31811 .kr(1)
31812 .sr(1)
31813 .m(m)
31814 .n(n)
31815 .k(k)
31816 .iterations(1)
31817 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31818 }
31819 }
31820 }
31821 }
31822
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)31823 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
31824 for (size_t k = 5; k < 8; k++) {
31825 GemmMicrokernelTester()
31826 .mr(4)
31827 .nr(8)
31828 .kr(1)
31829 .sr(1)
31830 .m(4)
31831 .n(8)
31832 .k(k)
31833 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31834 }
31835 }
31836
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_strided_a)31837 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_strided_a) {
31838 for (size_t k = 5; k < 8; k++) {
31839 GemmMicrokernelTester()
31840 .mr(4)
31841 .nr(8)
31842 .kr(1)
31843 .sr(1)
31844 .m(4)
31845 .n(8)
31846 .k(k)
31847 .a_stride(11)
31848 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31849 }
31850 }
31851
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)31852 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
31853 for (size_t k = 5; k < 8; k++) {
31854 for (uint32_t n = 1; n <= 8; n++) {
31855 for (uint32_t m = 1; m <= 4; m++) {
31856 GemmMicrokernelTester()
31857 .mr(4)
31858 .nr(8)
31859 .kr(1)
31860 .sr(1)
31861 .m(m)
31862 .n(n)
31863 .k(k)
31864 .iterations(1)
31865 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31866 }
31867 }
31868 }
31869 }
31870
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)31871 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
31872 for (size_t k = 8; k <= 40; k += 4) {
31873 GemmMicrokernelTester()
31874 .mr(4)
31875 .nr(8)
31876 .kr(1)
31877 .sr(1)
31878 .m(4)
31879 .n(8)
31880 .k(k)
31881 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31882 }
31883 }
31884
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_strided_a)31885 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_strided_a) {
31886 for (size_t k = 8; k <= 40; k += 4) {
31887 GemmMicrokernelTester()
31888 .mr(4)
31889 .nr(8)
31890 .kr(1)
31891 .sr(1)
31892 .m(4)
31893 .n(8)
31894 .k(k)
31895 .a_stride(43)
31896 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31897 }
31898 }
31899
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)31900 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
31901 for (size_t k = 8; k <= 40; k += 4) {
31902 for (uint32_t n = 1; n <= 8; n++) {
31903 for (uint32_t m = 1; m <= 4; m++) {
31904 GemmMicrokernelTester()
31905 .mr(4)
31906 .nr(8)
31907 .kr(1)
31908 .sr(1)
31909 .m(m)
31910 .n(n)
31911 .k(k)
31912 .iterations(1)
31913 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31914 }
31915 }
31916 }
31917 }
31918
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)31919 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
31920 for (uint32_t n = 9; n < 16; n++) {
31921 for (size_t k = 1; k <= 20; k += 5) {
31922 GemmMicrokernelTester()
31923 .mr(4)
31924 .nr(8)
31925 .kr(1)
31926 .sr(1)
31927 .m(4)
31928 .n(n)
31929 .k(k)
31930 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31931 }
31932 }
31933 }
31934
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)31935 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
31936 for (uint32_t n = 9; n < 16; n++) {
31937 for (size_t k = 1; k <= 20; k += 5) {
31938 GemmMicrokernelTester()
31939 .mr(4)
31940 .nr(8)
31941 .kr(1)
31942 .sr(1)
31943 .m(4)
31944 .n(n)
31945 .k(k)
31946 .cn_stride(11)
31947 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31948 }
31949 }
31950 }
31951
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_a)31952 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_a) {
31953 for (uint32_t n = 9; n < 16; n++) {
31954 for (size_t k = 1; k <= 20; k += 5) {
31955 GemmMicrokernelTester()
31956 .mr(4)
31957 .nr(8)
31958 .kr(1)
31959 .sr(1)
31960 .m(4)
31961 .n(n)
31962 .k(k)
31963 .a_stride(23)
31964 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31965 }
31966 }
31967 }
31968
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)31969 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
31970 for (uint32_t n = 9; n < 16; n++) {
31971 for (size_t k = 1; k <= 20; k += 5) {
31972 for (uint32_t m = 1; m <= 4; m++) {
31973 GemmMicrokernelTester()
31974 .mr(4)
31975 .nr(8)
31976 .kr(1)
31977 .sr(1)
31978 .m(m)
31979 .n(n)
31980 .k(k)
31981 .iterations(1)
31982 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31983 }
31984 }
31985 }
31986 }
31987
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)31988 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
31989 for (uint32_t n = 16; n <= 24; n += 8) {
31990 for (size_t k = 1; k <= 20; k += 5) {
31991 GemmMicrokernelTester()
31992 .mr(4)
31993 .nr(8)
31994 .kr(1)
31995 .sr(1)
31996 .m(4)
31997 .n(n)
31998 .k(k)
31999 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32000 }
32001 }
32002 }
32003
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)32004 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
32005 for (uint32_t n = 16; n <= 24; n += 8) {
32006 for (size_t k = 1; k <= 20; k += 5) {
32007 GemmMicrokernelTester()
32008 .mr(4)
32009 .nr(8)
32010 .kr(1)
32011 .sr(1)
32012 .m(4)
32013 .n(n)
32014 .k(k)
32015 .cn_stride(11)
32016 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32017 }
32018 }
32019 }
32020
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_a)32021 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_a) {
32022 for (uint32_t n = 16; n <= 24; n += 8) {
32023 for (size_t k = 1; k <= 20; k += 5) {
32024 GemmMicrokernelTester()
32025 .mr(4)
32026 .nr(8)
32027 .kr(1)
32028 .sr(1)
32029 .m(4)
32030 .n(n)
32031 .k(k)
32032 .a_stride(23)
32033 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32034 }
32035 }
32036 }
32037
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)32038 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
32039 for (uint32_t n = 16; n <= 24; n += 8) {
32040 for (size_t k = 1; k <= 20; k += 5) {
32041 for (uint32_t m = 1; m <= 4; m++) {
32042 GemmMicrokernelTester()
32043 .mr(4)
32044 .nr(8)
32045 .kr(1)
32046 .sr(1)
32047 .m(m)
32048 .n(n)
32049 .k(k)
32050 .iterations(1)
32051 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32052 }
32053 }
32054 }
32055 }
32056
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)32057 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
32058 for (size_t k = 1; k <= 20; k += 5) {
32059 for (uint32_t n = 1; n <= 8; n++) {
32060 for (uint32_t m = 1; m <= 4; m++) {
32061 GemmMicrokernelTester()
32062 .mr(4)
32063 .nr(8)
32064 .kr(1)
32065 .sr(1)
32066 .m(m)
32067 .n(n)
32068 .k(k)
32069 .cm_stride(11)
32070 .iterations(1)
32071 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32072 }
32073 }
32074 }
32075 }
32076
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,qmin)32077 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, qmin) {
32078 GemmMicrokernelTester()
32079 .mr(4)
32080 .nr(8)
32081 .kr(1)
32082 .sr(1)
32083 .m(4)
32084 .n(8)
32085 .k(4)
32086 .qmin(128)
32087 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32088 }
32089
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,qmax)32090 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, qmax) {
32091 GemmMicrokernelTester()
32092 .mr(4)
32093 .nr(8)
32094 .kr(1)
32095 .sr(1)
32096 .m(4)
32097 .n(8)
32098 .k(4)
32099 .qmax(128)
32100 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32101 }
32102
TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)32103 TEST(F32_GEMMINC_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
32104 GemmMicrokernelTester()
32105 .mr(4)
32106 .nr(8)
32107 .kr(1)
32108 .sr(1)
32109 .m(4)
32110 .n(8)
32111 .k(4)
32112 .cm_stride(11)
32113 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32114 }
32115 #endif // XNN_ARCH_WASMRELAXEDSIMD
32116
32117
32118 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,k_eq_4)32119 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, k_eq_4) {
32120 GemmMicrokernelTester()
32121 .mr(4)
32122 .nr(8)
32123 .kr(1)
32124 .sr(4)
32125 .m(4)
32126 .n(8)
32127 .k(4)
32128 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32129 }
32130
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,strided_cn)32131 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, strided_cn) {
32132 GemmMicrokernelTester()
32133 .mr(4)
32134 .nr(8)
32135 .kr(1)
32136 .sr(4)
32137 .m(4)
32138 .n(8)
32139 .k(4)
32140 .cn_stride(11)
32141 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32142 }
32143
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,k_eq_4_strided_a)32144 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, k_eq_4_strided_a) {
32145 GemmMicrokernelTester()
32146 .mr(4)
32147 .nr(8)
32148 .kr(1)
32149 .sr(4)
32150 .m(4)
32151 .n(8)
32152 .k(4)
32153 .a_stride(7)
32154 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32155 }
32156
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,k_eq_4_subtile)32157 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, k_eq_4_subtile) {
32158 for (uint32_t n = 1; n <= 8; n++) {
32159 for (uint32_t m = 1; m <= 4; m++) {
32160 GemmMicrokernelTester()
32161 .mr(4)
32162 .nr(8)
32163 .kr(1)
32164 .sr(4)
32165 .m(m)
32166 .n(n)
32167 .k(4)
32168 .iterations(1)
32169 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32170 }
32171 }
32172 }
32173
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,k_eq_4_subtile_m)32174 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, k_eq_4_subtile_m) {
32175 for (uint32_t m = 1; m <= 4; m++) {
32176 GemmMicrokernelTester()
32177 .mr(4)
32178 .nr(8)
32179 .kr(1)
32180 .sr(4)
32181 .m(m)
32182 .n(8)
32183 .k(4)
32184 .iterations(1)
32185 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32186 }
32187 }
32188
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,k_eq_4_subtile_n)32189 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, k_eq_4_subtile_n) {
32190 for (uint32_t n = 1; n <= 8; n++) {
32191 GemmMicrokernelTester()
32192 .mr(4)
32193 .nr(8)
32194 .kr(1)
32195 .sr(4)
32196 .m(4)
32197 .n(n)
32198 .k(4)
32199 .iterations(1)
32200 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32201 }
32202 }
32203
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,k_lt_4)32204 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, k_lt_4) {
32205 for (size_t k = 1; k < 4; k++) {
32206 GemmMicrokernelTester()
32207 .mr(4)
32208 .nr(8)
32209 .kr(1)
32210 .sr(4)
32211 .m(4)
32212 .n(8)
32213 .k(k)
32214 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32215 }
32216 }
32217
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,k_lt_4_strided_a)32218 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, k_lt_4_strided_a) {
32219 for (size_t k = 1; k < 4; k++) {
32220 GemmMicrokernelTester()
32221 .mr(4)
32222 .nr(8)
32223 .kr(1)
32224 .sr(4)
32225 .m(4)
32226 .n(8)
32227 .k(k)
32228 .a_stride(7)
32229 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32230 }
32231 }
32232
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,k_lt_4_subtile)32233 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, k_lt_4_subtile) {
32234 for (size_t k = 1; k < 4; k++) {
32235 for (uint32_t n = 1; n <= 8; n++) {
32236 for (uint32_t m = 1; m <= 4; m++) {
32237 GemmMicrokernelTester()
32238 .mr(4)
32239 .nr(8)
32240 .kr(1)
32241 .sr(4)
32242 .m(m)
32243 .n(n)
32244 .k(k)
32245 .iterations(1)
32246 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32247 }
32248 }
32249 }
32250 }
32251
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,k_gt_4)32252 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, k_gt_4) {
32253 for (size_t k = 5; k < 8; k++) {
32254 GemmMicrokernelTester()
32255 .mr(4)
32256 .nr(8)
32257 .kr(1)
32258 .sr(4)
32259 .m(4)
32260 .n(8)
32261 .k(k)
32262 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32263 }
32264 }
32265
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,k_gt_4_strided_a)32266 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, k_gt_4_strided_a) {
32267 for (size_t k = 5; k < 8; k++) {
32268 GemmMicrokernelTester()
32269 .mr(4)
32270 .nr(8)
32271 .kr(1)
32272 .sr(4)
32273 .m(4)
32274 .n(8)
32275 .k(k)
32276 .a_stride(11)
32277 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32278 }
32279 }
32280
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,k_gt_4_subtile)32281 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, k_gt_4_subtile) {
32282 for (size_t k = 5; k < 8; k++) {
32283 for (uint32_t n = 1; n <= 8; n++) {
32284 for (uint32_t m = 1; m <= 4; m++) {
32285 GemmMicrokernelTester()
32286 .mr(4)
32287 .nr(8)
32288 .kr(1)
32289 .sr(4)
32290 .m(m)
32291 .n(n)
32292 .k(k)
32293 .iterations(1)
32294 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32295 }
32296 }
32297 }
32298 }
32299
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,k_div_4)32300 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, k_div_4) {
32301 for (size_t k = 8; k <= 40; k += 4) {
32302 GemmMicrokernelTester()
32303 .mr(4)
32304 .nr(8)
32305 .kr(1)
32306 .sr(4)
32307 .m(4)
32308 .n(8)
32309 .k(k)
32310 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32311 }
32312 }
32313
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,k_div_4_strided_a)32314 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, k_div_4_strided_a) {
32315 for (size_t k = 8; k <= 40; k += 4) {
32316 GemmMicrokernelTester()
32317 .mr(4)
32318 .nr(8)
32319 .kr(1)
32320 .sr(4)
32321 .m(4)
32322 .n(8)
32323 .k(k)
32324 .a_stride(43)
32325 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32326 }
32327 }
32328
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,k_div_4_subtile)32329 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, k_div_4_subtile) {
32330 for (size_t k = 8; k <= 40; k += 4) {
32331 for (uint32_t n = 1; n <= 8; n++) {
32332 for (uint32_t m = 1; m <= 4; m++) {
32333 GemmMicrokernelTester()
32334 .mr(4)
32335 .nr(8)
32336 .kr(1)
32337 .sr(4)
32338 .m(m)
32339 .n(n)
32340 .k(k)
32341 .iterations(1)
32342 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32343 }
32344 }
32345 }
32346 }
32347
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,n_gt_8)32348 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, n_gt_8) {
32349 for (uint32_t n = 9; n < 16; n++) {
32350 for (size_t k = 1; k <= 20; k += 5) {
32351 GemmMicrokernelTester()
32352 .mr(4)
32353 .nr(8)
32354 .kr(1)
32355 .sr(4)
32356 .m(4)
32357 .n(n)
32358 .k(k)
32359 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32360 }
32361 }
32362 }
32363
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,n_gt_8_strided_cn)32364 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, n_gt_8_strided_cn) {
32365 for (uint32_t n = 9; n < 16; n++) {
32366 for (size_t k = 1; k <= 20; k += 5) {
32367 GemmMicrokernelTester()
32368 .mr(4)
32369 .nr(8)
32370 .kr(1)
32371 .sr(4)
32372 .m(4)
32373 .n(n)
32374 .k(k)
32375 .cn_stride(11)
32376 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32377 }
32378 }
32379 }
32380
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,n_gt_8_strided_a)32381 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, n_gt_8_strided_a) {
32382 for (uint32_t n = 9; n < 16; n++) {
32383 for (size_t k = 1; k <= 20; k += 5) {
32384 GemmMicrokernelTester()
32385 .mr(4)
32386 .nr(8)
32387 .kr(1)
32388 .sr(4)
32389 .m(4)
32390 .n(n)
32391 .k(k)
32392 .a_stride(23)
32393 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32394 }
32395 }
32396 }
32397
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,n_gt_8_subtile)32398 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, n_gt_8_subtile) {
32399 for (uint32_t n = 9; n < 16; n++) {
32400 for (size_t k = 1; k <= 20; k += 5) {
32401 for (uint32_t m = 1; m <= 4; m++) {
32402 GemmMicrokernelTester()
32403 .mr(4)
32404 .nr(8)
32405 .kr(1)
32406 .sr(4)
32407 .m(m)
32408 .n(n)
32409 .k(k)
32410 .iterations(1)
32411 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32412 }
32413 }
32414 }
32415 }
32416
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,n_div_8)32417 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, n_div_8) {
32418 for (uint32_t n = 16; n <= 24; n += 8) {
32419 for (size_t k = 1; k <= 20; k += 5) {
32420 GemmMicrokernelTester()
32421 .mr(4)
32422 .nr(8)
32423 .kr(1)
32424 .sr(4)
32425 .m(4)
32426 .n(n)
32427 .k(k)
32428 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32429 }
32430 }
32431 }
32432
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,n_div_8_strided_cn)32433 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, n_div_8_strided_cn) {
32434 for (uint32_t n = 16; n <= 24; n += 8) {
32435 for (size_t k = 1; k <= 20; k += 5) {
32436 GemmMicrokernelTester()
32437 .mr(4)
32438 .nr(8)
32439 .kr(1)
32440 .sr(4)
32441 .m(4)
32442 .n(n)
32443 .k(k)
32444 .cn_stride(11)
32445 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32446 }
32447 }
32448 }
32449
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,n_div_8_strided_a)32450 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, n_div_8_strided_a) {
32451 for (uint32_t n = 16; n <= 24; n += 8) {
32452 for (size_t k = 1; k <= 20; k += 5) {
32453 GemmMicrokernelTester()
32454 .mr(4)
32455 .nr(8)
32456 .kr(1)
32457 .sr(4)
32458 .m(4)
32459 .n(n)
32460 .k(k)
32461 .a_stride(23)
32462 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32463 }
32464 }
32465 }
32466
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,n_div_8_subtile)32467 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, n_div_8_subtile) {
32468 for (uint32_t n = 16; n <= 24; n += 8) {
32469 for (size_t k = 1; k <= 20; k += 5) {
32470 for (uint32_t m = 1; m <= 4; m++) {
32471 GemmMicrokernelTester()
32472 .mr(4)
32473 .nr(8)
32474 .kr(1)
32475 .sr(4)
32476 .m(m)
32477 .n(n)
32478 .k(k)
32479 .iterations(1)
32480 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32481 }
32482 }
32483 }
32484 }
32485
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,strided_cm_subtile)32486 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, strided_cm_subtile) {
32487 for (size_t k = 1; k <= 20; k += 5) {
32488 for (uint32_t n = 1; n <= 8; n++) {
32489 for (uint32_t m = 1; m <= 4; m++) {
32490 GemmMicrokernelTester()
32491 .mr(4)
32492 .nr(8)
32493 .kr(1)
32494 .sr(4)
32495 .m(m)
32496 .n(n)
32497 .k(k)
32498 .cm_stride(11)
32499 .iterations(1)
32500 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32501 }
32502 }
32503 }
32504 }
32505
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,qmin)32506 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, qmin) {
32507 GemmMicrokernelTester()
32508 .mr(4)
32509 .nr(8)
32510 .kr(1)
32511 .sr(4)
32512 .m(4)
32513 .n(8)
32514 .k(4)
32515 .qmin(128)
32516 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32517 }
32518
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,qmax)32519 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, qmax) {
32520 GemmMicrokernelTester()
32521 .mr(4)
32522 .nr(8)
32523 .kr(1)
32524 .sr(4)
32525 .m(4)
32526 .n(8)
32527 .k(4)
32528 .qmax(128)
32529 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32530 }
32531
TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD,strided_cm)32532 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMRELAXEDSIMD, strided_cm) {
32533 GemmMicrokernelTester()
32534 .mr(4)
32535 .nr(8)
32536 .kr(1)
32537 .sr(4)
32538 .m(4)
32539 .n(8)
32540 .k(4)
32541 .cm_stride(11)
32542 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
32543 }
32544 #endif // XNN_ARCH_WASMRELAXEDSIMD
32545
32546
32547 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)32548 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
32549 GemmMicrokernelTester()
32550 .mr(5)
32551 .nr(8)
32552 .kr(1)
32553 .sr(1)
32554 .m(5)
32555 .n(8)
32556 .k(1)
32557 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32558 }
32559
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)32560 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
32561 GemmMicrokernelTester()
32562 .mr(5)
32563 .nr(8)
32564 .kr(1)
32565 .sr(1)
32566 .m(5)
32567 .n(8)
32568 .k(1)
32569 .cn_stride(11)
32570 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32571 }
32572
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_strided_a)32573 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_strided_a) {
32574 GemmMicrokernelTester()
32575 .mr(5)
32576 .nr(8)
32577 .kr(1)
32578 .sr(1)
32579 .m(5)
32580 .n(8)
32581 .k(1)
32582 .a_stride(3)
32583 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32584 }
32585
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)32586 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
32587 for (uint32_t n = 1; n <= 8; n++) {
32588 for (uint32_t m = 1; m <= 5; m++) {
32589 GemmMicrokernelTester()
32590 .mr(5)
32591 .nr(8)
32592 .kr(1)
32593 .sr(1)
32594 .m(m)
32595 .n(n)
32596 .k(1)
32597 .iterations(1)
32598 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32599 }
32600 }
32601 }
32602
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)32603 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
32604 for (uint32_t m = 1; m <= 5; m++) {
32605 GemmMicrokernelTester()
32606 .mr(5)
32607 .nr(8)
32608 .kr(1)
32609 .sr(1)
32610 .m(m)
32611 .n(8)
32612 .k(1)
32613 .iterations(1)
32614 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32615 }
32616 }
32617
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)32618 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
32619 for (uint32_t n = 1; n <= 8; n++) {
32620 GemmMicrokernelTester()
32621 .mr(5)
32622 .nr(8)
32623 .kr(1)
32624 .sr(1)
32625 .m(5)
32626 .n(n)
32627 .k(1)
32628 .iterations(1)
32629 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32630 }
32631 }
32632
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)32633 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
32634 for (size_t k = 2; k < 10; k++) {
32635 GemmMicrokernelTester()
32636 .mr(5)
32637 .nr(8)
32638 .kr(1)
32639 .sr(1)
32640 .m(5)
32641 .n(8)
32642 .k(k)
32643 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32644 }
32645 }
32646
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_strided_a)32647 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_strided_a) {
32648 for (size_t k = 2; k < 10; k++) {
32649 GemmMicrokernelTester()
32650 .mr(5)
32651 .nr(8)
32652 .kr(1)
32653 .sr(1)
32654 .m(5)
32655 .n(8)
32656 .k(k)
32657 .a_stride(11)
32658 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32659 }
32660 }
32661
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)32662 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
32663 for (size_t k = 2; k < 10; k++) {
32664 for (uint32_t n = 1; n <= 8; n++) {
32665 for (uint32_t m = 1; m <= 5; m++) {
32666 GemmMicrokernelTester()
32667 .mr(5)
32668 .nr(8)
32669 .kr(1)
32670 .sr(1)
32671 .m(m)
32672 .n(n)
32673 .k(k)
32674 .iterations(1)
32675 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32676 }
32677 }
32678 }
32679 }
32680
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)32681 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
32682 for (uint32_t n = 9; n < 16; n++) {
32683 for (size_t k = 1; k <= 5; k += 2) {
32684 GemmMicrokernelTester()
32685 .mr(5)
32686 .nr(8)
32687 .kr(1)
32688 .sr(1)
32689 .m(5)
32690 .n(n)
32691 .k(k)
32692 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32693 }
32694 }
32695 }
32696
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)32697 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
32698 for (uint32_t n = 9; n < 16; n++) {
32699 for (size_t k = 1; k <= 5; k += 2) {
32700 GemmMicrokernelTester()
32701 .mr(5)
32702 .nr(8)
32703 .kr(1)
32704 .sr(1)
32705 .m(5)
32706 .n(n)
32707 .k(k)
32708 .cn_stride(11)
32709 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32710 }
32711 }
32712 }
32713
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_a)32714 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_a) {
32715 for (uint32_t n = 9; n < 16; n++) {
32716 for (size_t k = 1; k <= 5; k += 2) {
32717 GemmMicrokernelTester()
32718 .mr(5)
32719 .nr(8)
32720 .kr(1)
32721 .sr(1)
32722 .m(5)
32723 .n(n)
32724 .k(k)
32725 .a_stride(7)
32726 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32727 }
32728 }
32729 }
32730
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)32731 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
32732 for (uint32_t n = 9; n < 16; n++) {
32733 for (size_t k = 1; k <= 5; k += 2) {
32734 for (uint32_t m = 1; m <= 5; m++) {
32735 GemmMicrokernelTester()
32736 .mr(5)
32737 .nr(8)
32738 .kr(1)
32739 .sr(1)
32740 .m(m)
32741 .n(n)
32742 .k(k)
32743 .iterations(1)
32744 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32745 }
32746 }
32747 }
32748 }
32749
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)32750 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
32751 for (uint32_t n = 16; n <= 24; n += 8) {
32752 for (size_t k = 1; k <= 5; k += 2) {
32753 GemmMicrokernelTester()
32754 .mr(5)
32755 .nr(8)
32756 .kr(1)
32757 .sr(1)
32758 .m(5)
32759 .n(n)
32760 .k(k)
32761 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32762 }
32763 }
32764 }
32765
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)32766 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
32767 for (uint32_t n = 16; n <= 24; n += 8) {
32768 for (size_t k = 1; k <= 5; k += 2) {
32769 GemmMicrokernelTester()
32770 .mr(5)
32771 .nr(8)
32772 .kr(1)
32773 .sr(1)
32774 .m(5)
32775 .n(n)
32776 .k(k)
32777 .cn_stride(11)
32778 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32779 }
32780 }
32781 }
32782
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_a)32783 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_a) {
32784 for (uint32_t n = 16; n <= 24; n += 8) {
32785 for (size_t k = 1; k <= 5; k += 2) {
32786 GemmMicrokernelTester()
32787 .mr(5)
32788 .nr(8)
32789 .kr(1)
32790 .sr(1)
32791 .m(5)
32792 .n(n)
32793 .k(k)
32794 .a_stride(7)
32795 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32796 }
32797 }
32798 }
32799
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)32800 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
32801 for (uint32_t n = 16; n <= 24; n += 8) {
32802 for (size_t k = 1; k <= 5; k += 2) {
32803 for (uint32_t m = 1; m <= 5; m++) {
32804 GemmMicrokernelTester()
32805 .mr(5)
32806 .nr(8)
32807 .kr(1)
32808 .sr(1)
32809 .m(m)
32810 .n(n)
32811 .k(k)
32812 .iterations(1)
32813 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32814 }
32815 }
32816 }
32817 }
32818
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)32819 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
32820 for (size_t k = 1; k <= 5; k += 2) {
32821 for (uint32_t n = 1; n <= 8; n++) {
32822 for (uint32_t m = 1; m <= 5; m++) {
32823 GemmMicrokernelTester()
32824 .mr(5)
32825 .nr(8)
32826 .kr(1)
32827 .sr(1)
32828 .m(m)
32829 .n(n)
32830 .k(k)
32831 .cm_stride(11)
32832 .iterations(1)
32833 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32834 }
32835 }
32836 }
32837 }
32838
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,qmin)32839 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, qmin) {
32840 GemmMicrokernelTester()
32841 .mr(5)
32842 .nr(8)
32843 .kr(1)
32844 .sr(1)
32845 .m(5)
32846 .n(8)
32847 .k(1)
32848 .qmin(128)
32849 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32850 }
32851
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,qmax)32852 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, qmax) {
32853 GemmMicrokernelTester()
32854 .mr(5)
32855 .nr(8)
32856 .kr(1)
32857 .sr(1)
32858 .m(5)
32859 .n(8)
32860 .k(1)
32861 .qmax(128)
32862 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32863 }
32864
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)32865 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
32866 GemmMicrokernelTester()
32867 .mr(5)
32868 .nr(8)
32869 .kr(1)
32870 .sr(1)
32871 .m(5)
32872 .n(8)
32873 .k(1)
32874 .cm_stride(11)
32875 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32876 }
32877 #endif // XNN_ARCH_WASMRELAXEDSIMD
32878
32879
32880 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)32881 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
32882 GemmMicrokernelTester()
32883 .mr(5)
32884 .nr(8)
32885 .kr(1)
32886 .sr(1)
32887 .m(5)
32888 .n(8)
32889 .k(4)
32890 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32891 }
32892
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)32893 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
32894 GemmMicrokernelTester()
32895 .mr(5)
32896 .nr(8)
32897 .kr(1)
32898 .sr(1)
32899 .m(5)
32900 .n(8)
32901 .k(4)
32902 .cn_stride(11)
32903 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32904 }
32905
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_strided_a)32906 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_strided_a) {
32907 GemmMicrokernelTester()
32908 .mr(5)
32909 .nr(8)
32910 .kr(1)
32911 .sr(1)
32912 .m(5)
32913 .n(8)
32914 .k(4)
32915 .a_stride(7)
32916 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32917 }
32918
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)32919 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
32920 for (uint32_t n = 1; n <= 8; n++) {
32921 for (uint32_t m = 1; m <= 5; m++) {
32922 GemmMicrokernelTester()
32923 .mr(5)
32924 .nr(8)
32925 .kr(1)
32926 .sr(1)
32927 .m(m)
32928 .n(n)
32929 .k(4)
32930 .iterations(1)
32931 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32932 }
32933 }
32934 }
32935
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)32936 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
32937 for (uint32_t m = 1; m <= 5; m++) {
32938 GemmMicrokernelTester()
32939 .mr(5)
32940 .nr(8)
32941 .kr(1)
32942 .sr(1)
32943 .m(m)
32944 .n(8)
32945 .k(4)
32946 .iterations(1)
32947 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32948 }
32949 }
32950
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)32951 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
32952 for (uint32_t n = 1; n <= 8; n++) {
32953 GemmMicrokernelTester()
32954 .mr(5)
32955 .nr(8)
32956 .kr(1)
32957 .sr(1)
32958 .m(5)
32959 .n(n)
32960 .k(4)
32961 .iterations(1)
32962 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32963 }
32964 }
32965
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)32966 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
32967 for (size_t k = 1; k < 4; k++) {
32968 GemmMicrokernelTester()
32969 .mr(5)
32970 .nr(8)
32971 .kr(1)
32972 .sr(1)
32973 .m(5)
32974 .n(8)
32975 .k(k)
32976 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32977 }
32978 }
32979
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_strided_a)32980 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_strided_a) {
32981 for (size_t k = 1; k < 4; k++) {
32982 GemmMicrokernelTester()
32983 .mr(5)
32984 .nr(8)
32985 .kr(1)
32986 .sr(1)
32987 .m(5)
32988 .n(8)
32989 .k(k)
32990 .a_stride(7)
32991 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32992 }
32993 }
32994
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)32995 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
32996 for (size_t k = 1; k < 4; k++) {
32997 for (uint32_t n = 1; n <= 8; n++) {
32998 for (uint32_t m = 1; m <= 5; m++) {
32999 GemmMicrokernelTester()
33000 .mr(5)
33001 .nr(8)
33002 .kr(1)
33003 .sr(1)
33004 .m(m)
33005 .n(n)
33006 .k(k)
33007 .iterations(1)
33008 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
33009 }
33010 }
33011 }
33012 }
33013
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)33014 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
33015 for (size_t k = 5; k < 8; k++) {
33016 GemmMicrokernelTester()
33017 .mr(5)
33018 .nr(8)
33019 .kr(1)
33020 .sr(1)
33021 .m(5)
33022 .n(8)
33023 .k(k)
33024 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
33025 }
33026 }
33027
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_strided_a)33028 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_strided_a) {
33029 for (size_t k = 5; k < 8; k++) {
33030 GemmMicrokernelTester()
33031 .mr(5)
33032 .nr(8)
33033 .kr(1)
33034 .sr(1)
33035 .m(5)
33036 .n(8)
33037 .k(k)
33038 .a_stride(11)
33039 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
33040 }
33041 }
33042
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)33043 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
33044 for (size_t k = 5; k < 8; k++) {
33045 for (uint32_t n = 1; n <= 8; n++) {
33046 for (uint32_t m = 1; m <= 5; m++) {
33047 GemmMicrokernelTester()
33048 .mr(5)
33049 .nr(8)
33050 .kr(1)
33051 .sr(1)
33052 .m(m)
33053 .n(n)
33054 .k(k)
33055 .iterations(1)
33056 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
33057 }
33058 }
33059 }
33060 }
33061
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)33062 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
33063 for (size_t k = 8; k <= 40; k += 4) {
33064 GemmMicrokernelTester()
33065 .mr(5)
33066 .nr(8)
33067 .kr(1)
33068 .sr(1)
33069 .m(5)
33070 .n(8)
33071 .k(k)
33072 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
33073 }
33074 }
33075
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_strided_a)33076 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_strided_a) {
33077 for (size_t k = 8; k <= 40; k += 4) {
33078 GemmMicrokernelTester()
33079 .mr(5)
33080 .nr(8)
33081 .kr(1)
33082 .sr(1)
33083 .m(5)
33084 .n(8)
33085 .k(k)
33086 .a_stride(43)
33087 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
33088 }
33089 }
33090
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)33091 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
33092 for (size_t k = 8; k <= 40; k += 4) {
33093 for (uint32_t n = 1; n <= 8; n++) {
33094 for (uint32_t m = 1; m <= 5; m++) {
33095 GemmMicrokernelTester()
33096 .mr(5)
33097 .nr(8)
33098 .kr(1)
33099 .sr(1)
33100 .m(m)
33101 .n(n)
33102 .k(k)
33103 .iterations(1)
33104 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
33105 }
33106 }
33107 }
33108 }
33109
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)33110 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
33111 for (uint32_t n = 9; n < 16; n++) {
33112 for (size_t k = 1; k <= 20; k += 5) {
33113 GemmMicrokernelTester()
33114 .mr(5)
33115 .nr(8)
33116 .kr(1)
33117 .sr(1)
33118 .m(5)
33119 .n(n)
33120 .k(k)
33121 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
33122 }
33123 }
33124 }
33125
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)33126 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
33127 for (uint32_t n = 9; n < 16; n++) {
33128 for (size_t k = 1; k <= 20; k += 5) {
33129 GemmMicrokernelTester()
33130 .mr(5)
33131 .nr(8)
33132 .kr(1)
33133 .sr(1)
33134 .m(5)
33135 .n(n)
33136 .k(k)
33137 .cn_stride(11)
33138 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
33139 }
33140 }
33141 }
33142
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_a)33143 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_a) {
33144 for (uint32_t n = 9; n < 16; n++) {
33145 for (size_t k = 1; k <= 20; k += 5) {
33146 GemmMicrokernelTester()
33147 .mr(5)
33148 .nr(8)
33149 .kr(1)
33150 .sr(1)
33151 .m(5)
33152 .n(n)
33153 .k(k)
33154 .a_stride(23)
33155 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
33156 }
33157 }
33158 }
33159
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)33160 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
33161 for (uint32_t n = 9; n < 16; n++) {
33162 for (size_t k = 1; k <= 20; k += 5) {
33163 for (uint32_t m = 1; m <= 5; m++) {
33164 GemmMicrokernelTester()
33165 .mr(5)
33166 .nr(8)
33167 .kr(1)
33168 .sr(1)
33169 .m(m)
33170 .n(n)
33171 .k(k)
33172 .iterations(1)
33173 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
33174 }
33175 }
33176 }
33177 }
33178
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)33179 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
33180 for (uint32_t n = 16; n <= 24; n += 8) {
33181 for (size_t k = 1; k <= 20; k += 5) {
33182 GemmMicrokernelTester()
33183 .mr(5)
33184 .nr(8)
33185 .kr(1)
33186 .sr(1)
33187 .m(5)
33188 .n(n)
33189 .k(k)
33190 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
33191 }
33192 }
33193 }
33194
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)33195 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
33196 for (uint32_t n = 16; n <= 24; n += 8) {
33197 for (size_t k = 1; k <= 20; k += 5) {
33198 GemmMicrokernelTester()
33199 .mr(5)
33200 .nr(8)
33201 .kr(1)
33202 .sr(1)
33203 .m(5)
33204 .n(n)
33205 .k(k)
33206 .cn_stride(11)
33207 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
33208 }
33209 }
33210 }
33211
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_a)33212 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_a) {
33213 for (uint32_t n = 16; n <= 24; n += 8) {
33214 for (size_t k = 1; k <= 20; k += 5) {
33215 GemmMicrokernelTester()
33216 .mr(5)
33217 .nr(8)
33218 .kr(1)
33219 .sr(1)
33220 .m(5)
33221 .n(n)
33222 .k(k)
33223 .a_stride(23)
33224 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
33225 }
33226 }
33227 }
33228
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)33229 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
33230 for (uint32_t n = 16; n <= 24; n += 8) {
33231 for (size_t k = 1; k <= 20; k += 5) {
33232 for (uint32_t m = 1; m <= 5; m++) {
33233 GemmMicrokernelTester()
33234 .mr(5)
33235 .nr(8)
33236 .kr(1)
33237 .sr(1)
33238 .m(m)
33239 .n(n)
33240 .k(k)
33241 .iterations(1)
33242 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
33243 }
33244 }
33245 }
33246 }
33247
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)33248 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
33249 for (size_t k = 1; k <= 20; k += 5) {
33250 for (uint32_t n = 1; n <= 8; n++) {
33251 for (uint32_t m = 1; m <= 5; m++) {
33252 GemmMicrokernelTester()
33253 .mr(5)
33254 .nr(8)
33255 .kr(1)
33256 .sr(1)
33257 .m(m)
33258 .n(n)
33259 .k(k)
33260 .cm_stride(11)
33261 .iterations(1)
33262 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
33263 }
33264 }
33265 }
33266 }
33267
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,qmin)33268 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, qmin) {
33269 GemmMicrokernelTester()
33270 .mr(5)
33271 .nr(8)
33272 .kr(1)
33273 .sr(1)
33274 .m(5)
33275 .n(8)
33276 .k(4)
33277 .qmin(128)
33278 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
33279 }
33280
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,qmax)33281 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, qmax) {
33282 GemmMicrokernelTester()
33283 .mr(5)
33284 .nr(8)
33285 .kr(1)
33286 .sr(1)
33287 .m(5)
33288 .n(8)
33289 .k(4)
33290 .qmax(128)
33291 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
33292 }
33293
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)33294 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
33295 GemmMicrokernelTester()
33296 .mr(5)
33297 .nr(8)
33298 .kr(1)
33299 .sr(1)
33300 .m(5)
33301 .n(8)
33302 .k(4)
33303 .cm_stride(11)
33304 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
33305 }
33306 #endif // XNN_ARCH_WASMRELAXEDSIMD
33307
33308
33309 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,k_eq_4)33310 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, k_eq_4) {
33311 GemmMicrokernelTester()
33312 .mr(5)
33313 .nr(8)
33314 .kr(1)
33315 .sr(1)
33316 .m(5)
33317 .n(8)
33318 .k(4)
33319 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33320 }
33321
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,strided_cn)33322 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, strided_cn) {
33323 GemmMicrokernelTester()
33324 .mr(5)
33325 .nr(8)
33326 .kr(1)
33327 .sr(1)
33328 .m(5)
33329 .n(8)
33330 .k(4)
33331 .cn_stride(11)
33332 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33333 }
33334
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_strided_a)33335 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_strided_a) {
33336 GemmMicrokernelTester()
33337 .mr(5)
33338 .nr(8)
33339 .kr(1)
33340 .sr(1)
33341 .m(5)
33342 .n(8)
33343 .k(4)
33344 .a_stride(7)
33345 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33346 }
33347
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile)33348 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile) {
33349 for (uint32_t n = 1; n <= 8; n++) {
33350 for (uint32_t m = 1; m <= 5; m++) {
33351 GemmMicrokernelTester()
33352 .mr(5)
33353 .nr(8)
33354 .kr(1)
33355 .sr(1)
33356 .m(m)
33357 .n(n)
33358 .k(4)
33359 .iterations(1)
33360 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33361 }
33362 }
33363 }
33364
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile_m)33365 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile_m) {
33366 for (uint32_t m = 1; m <= 5; m++) {
33367 GemmMicrokernelTester()
33368 .mr(5)
33369 .nr(8)
33370 .kr(1)
33371 .sr(1)
33372 .m(m)
33373 .n(8)
33374 .k(4)
33375 .iterations(1)
33376 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33377 }
33378 }
33379
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile_n)33380 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile_n) {
33381 for (uint32_t n = 1; n <= 8; n++) {
33382 GemmMicrokernelTester()
33383 .mr(5)
33384 .nr(8)
33385 .kr(1)
33386 .sr(1)
33387 .m(5)
33388 .n(n)
33389 .k(4)
33390 .iterations(1)
33391 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33392 }
33393 }
33394
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,k_lt_4)33395 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, k_lt_4) {
33396 for (size_t k = 1; k < 4; k++) {
33397 GemmMicrokernelTester()
33398 .mr(5)
33399 .nr(8)
33400 .kr(1)
33401 .sr(1)
33402 .m(5)
33403 .n(8)
33404 .k(k)
33405 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33406 }
33407 }
33408
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,k_lt_4_strided_a)33409 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, k_lt_4_strided_a) {
33410 for (size_t k = 1; k < 4; k++) {
33411 GemmMicrokernelTester()
33412 .mr(5)
33413 .nr(8)
33414 .kr(1)
33415 .sr(1)
33416 .m(5)
33417 .n(8)
33418 .k(k)
33419 .a_stride(7)
33420 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33421 }
33422 }
33423
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,k_lt_4_subtile)33424 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, k_lt_4_subtile) {
33425 for (size_t k = 1; k < 4; k++) {
33426 for (uint32_t n = 1; n <= 8; n++) {
33427 for (uint32_t m = 1; m <= 5; m++) {
33428 GemmMicrokernelTester()
33429 .mr(5)
33430 .nr(8)
33431 .kr(1)
33432 .sr(1)
33433 .m(m)
33434 .n(n)
33435 .k(k)
33436 .iterations(1)
33437 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33438 }
33439 }
33440 }
33441 }
33442
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,k_gt_4)33443 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, k_gt_4) {
33444 for (size_t k = 5; k < 8; k++) {
33445 GemmMicrokernelTester()
33446 .mr(5)
33447 .nr(8)
33448 .kr(1)
33449 .sr(1)
33450 .m(5)
33451 .n(8)
33452 .k(k)
33453 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33454 }
33455 }
33456
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,k_gt_4_strided_a)33457 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, k_gt_4_strided_a) {
33458 for (size_t k = 5; k < 8; k++) {
33459 GemmMicrokernelTester()
33460 .mr(5)
33461 .nr(8)
33462 .kr(1)
33463 .sr(1)
33464 .m(5)
33465 .n(8)
33466 .k(k)
33467 .a_stride(11)
33468 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33469 }
33470 }
33471
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,k_gt_4_subtile)33472 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, k_gt_4_subtile) {
33473 for (size_t k = 5; k < 8; k++) {
33474 for (uint32_t n = 1; n <= 8; n++) {
33475 for (uint32_t m = 1; m <= 5; m++) {
33476 GemmMicrokernelTester()
33477 .mr(5)
33478 .nr(8)
33479 .kr(1)
33480 .sr(1)
33481 .m(m)
33482 .n(n)
33483 .k(k)
33484 .iterations(1)
33485 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33486 }
33487 }
33488 }
33489 }
33490
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,k_div_4)33491 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, k_div_4) {
33492 for (size_t k = 8; k <= 40; k += 4) {
33493 GemmMicrokernelTester()
33494 .mr(5)
33495 .nr(8)
33496 .kr(1)
33497 .sr(1)
33498 .m(5)
33499 .n(8)
33500 .k(k)
33501 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33502 }
33503 }
33504
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,k_div_4_strided_a)33505 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, k_div_4_strided_a) {
33506 for (size_t k = 8; k <= 40; k += 4) {
33507 GemmMicrokernelTester()
33508 .mr(5)
33509 .nr(8)
33510 .kr(1)
33511 .sr(1)
33512 .m(5)
33513 .n(8)
33514 .k(k)
33515 .a_stride(43)
33516 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33517 }
33518 }
33519
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,k_div_4_subtile)33520 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, k_div_4_subtile) {
33521 for (size_t k = 8; k <= 40; k += 4) {
33522 for (uint32_t n = 1; n <= 8; n++) {
33523 for (uint32_t m = 1; m <= 5; m++) {
33524 GemmMicrokernelTester()
33525 .mr(5)
33526 .nr(8)
33527 .kr(1)
33528 .sr(1)
33529 .m(m)
33530 .n(n)
33531 .k(k)
33532 .iterations(1)
33533 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33534 }
33535 }
33536 }
33537 }
33538
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,n_gt_8)33539 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, n_gt_8) {
33540 for (uint32_t n = 9; n < 16; n++) {
33541 for (size_t k = 1; k <= 20; k += 5) {
33542 GemmMicrokernelTester()
33543 .mr(5)
33544 .nr(8)
33545 .kr(1)
33546 .sr(1)
33547 .m(5)
33548 .n(n)
33549 .k(k)
33550 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33551 }
33552 }
33553 }
33554
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_strided_cn)33555 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_strided_cn) {
33556 for (uint32_t n = 9; n < 16; n++) {
33557 for (size_t k = 1; k <= 20; k += 5) {
33558 GemmMicrokernelTester()
33559 .mr(5)
33560 .nr(8)
33561 .kr(1)
33562 .sr(1)
33563 .m(5)
33564 .n(n)
33565 .k(k)
33566 .cn_stride(11)
33567 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33568 }
33569 }
33570 }
33571
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_strided_a)33572 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_strided_a) {
33573 for (uint32_t n = 9; n < 16; n++) {
33574 for (size_t k = 1; k <= 20; k += 5) {
33575 GemmMicrokernelTester()
33576 .mr(5)
33577 .nr(8)
33578 .kr(1)
33579 .sr(1)
33580 .m(5)
33581 .n(n)
33582 .k(k)
33583 .a_stride(23)
33584 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33585 }
33586 }
33587 }
33588
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_subtile)33589 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_subtile) {
33590 for (uint32_t n = 9; n < 16; n++) {
33591 for (size_t k = 1; k <= 20; k += 5) {
33592 for (uint32_t m = 1; m <= 5; m++) {
33593 GemmMicrokernelTester()
33594 .mr(5)
33595 .nr(8)
33596 .kr(1)
33597 .sr(1)
33598 .m(m)
33599 .n(n)
33600 .k(k)
33601 .iterations(1)
33602 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33603 }
33604 }
33605 }
33606 }
33607
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,n_div_8)33608 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, n_div_8) {
33609 for (uint32_t n = 16; n <= 24; n += 8) {
33610 for (size_t k = 1; k <= 20; k += 5) {
33611 GemmMicrokernelTester()
33612 .mr(5)
33613 .nr(8)
33614 .kr(1)
33615 .sr(1)
33616 .m(5)
33617 .n(n)
33618 .k(k)
33619 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33620 }
33621 }
33622 }
33623
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,n_div_8_strided_cn)33624 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, n_div_8_strided_cn) {
33625 for (uint32_t n = 16; n <= 24; n += 8) {
33626 for (size_t k = 1; k <= 20; k += 5) {
33627 GemmMicrokernelTester()
33628 .mr(5)
33629 .nr(8)
33630 .kr(1)
33631 .sr(1)
33632 .m(5)
33633 .n(n)
33634 .k(k)
33635 .cn_stride(11)
33636 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33637 }
33638 }
33639 }
33640
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,n_div_8_strided_a)33641 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, n_div_8_strided_a) {
33642 for (uint32_t n = 16; n <= 24; n += 8) {
33643 for (size_t k = 1; k <= 20; k += 5) {
33644 GemmMicrokernelTester()
33645 .mr(5)
33646 .nr(8)
33647 .kr(1)
33648 .sr(1)
33649 .m(5)
33650 .n(n)
33651 .k(k)
33652 .a_stride(23)
33653 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33654 }
33655 }
33656 }
33657
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,n_div_8_subtile)33658 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, n_div_8_subtile) {
33659 for (uint32_t n = 16; n <= 24; n += 8) {
33660 for (size_t k = 1; k <= 20; k += 5) {
33661 for (uint32_t m = 1; m <= 5; m++) {
33662 GemmMicrokernelTester()
33663 .mr(5)
33664 .nr(8)
33665 .kr(1)
33666 .sr(1)
33667 .m(m)
33668 .n(n)
33669 .k(k)
33670 .iterations(1)
33671 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33672 }
33673 }
33674 }
33675 }
33676
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,strided_cm_subtile)33677 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, strided_cm_subtile) {
33678 for (size_t k = 1; k <= 20; k += 5) {
33679 for (uint32_t n = 1; n <= 8; n++) {
33680 for (uint32_t m = 1; m <= 5; m++) {
33681 GemmMicrokernelTester()
33682 .mr(5)
33683 .nr(8)
33684 .kr(1)
33685 .sr(1)
33686 .m(m)
33687 .n(n)
33688 .k(k)
33689 .cm_stride(11)
33690 .iterations(1)
33691 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33692 }
33693 }
33694 }
33695 }
33696
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,qmin)33697 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, qmin) {
33698 GemmMicrokernelTester()
33699 .mr(5)
33700 .nr(8)
33701 .kr(1)
33702 .sr(1)
33703 .m(5)
33704 .n(8)
33705 .k(4)
33706 .qmin(128)
33707 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33708 }
33709
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,qmax)33710 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, qmax) {
33711 GemmMicrokernelTester()
33712 .mr(5)
33713 .nr(8)
33714 .kr(1)
33715 .sr(1)
33716 .m(5)
33717 .n(8)
33718 .k(4)
33719 .qmax(128)
33720 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33721 }
33722
TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT,strided_cm)33723 TEST(F32_GEMMINC_MINMAX_5X8__WASMRELAXEDSIMD_SPLAT, strided_cm) {
33724 GemmMicrokernelTester()
33725 .mr(5)
33726 .nr(8)
33727 .kr(1)
33728 .sr(1)
33729 .m(5)
33730 .n(8)
33731 .k(4)
33732 .cm_stride(11)
33733 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33734 }
33735 #endif // XNN_ARCH_WASMRELAXEDSIMD
33736
33737
33738 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,k_eq_4)33739 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, k_eq_4) {
33740 GemmMicrokernelTester()
33741 .mr(5)
33742 .nr(8)
33743 .kr(1)
33744 .sr(4)
33745 .m(5)
33746 .n(8)
33747 .k(4)
33748 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
33749 }
33750
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,strided_cn)33751 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, strided_cn) {
33752 GemmMicrokernelTester()
33753 .mr(5)
33754 .nr(8)
33755 .kr(1)
33756 .sr(4)
33757 .m(5)
33758 .n(8)
33759 .k(4)
33760 .cn_stride(11)
33761 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
33762 }
33763
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,k_eq_4_strided_a)33764 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, k_eq_4_strided_a) {
33765 GemmMicrokernelTester()
33766 .mr(5)
33767 .nr(8)
33768 .kr(1)
33769 .sr(4)
33770 .m(5)
33771 .n(8)
33772 .k(4)
33773 .a_stride(7)
33774 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
33775 }
33776
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,k_eq_4_subtile)33777 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, k_eq_4_subtile) {
33778 for (uint32_t n = 1; n <= 8; n++) {
33779 for (uint32_t m = 1; m <= 5; m++) {
33780 GemmMicrokernelTester()
33781 .mr(5)
33782 .nr(8)
33783 .kr(1)
33784 .sr(4)
33785 .m(m)
33786 .n(n)
33787 .k(4)
33788 .iterations(1)
33789 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
33790 }
33791 }
33792 }
33793
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,k_eq_4_subtile_m)33794 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, k_eq_4_subtile_m) {
33795 for (uint32_t m = 1; m <= 5; m++) {
33796 GemmMicrokernelTester()
33797 .mr(5)
33798 .nr(8)
33799 .kr(1)
33800 .sr(4)
33801 .m(m)
33802 .n(8)
33803 .k(4)
33804 .iterations(1)
33805 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
33806 }
33807 }
33808
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,k_eq_4_subtile_n)33809 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, k_eq_4_subtile_n) {
33810 for (uint32_t n = 1; n <= 8; n++) {
33811 GemmMicrokernelTester()
33812 .mr(5)
33813 .nr(8)
33814 .kr(1)
33815 .sr(4)
33816 .m(5)
33817 .n(n)
33818 .k(4)
33819 .iterations(1)
33820 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
33821 }
33822 }
33823
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,k_lt_4)33824 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, k_lt_4) {
33825 for (size_t k = 1; k < 4; k++) {
33826 GemmMicrokernelTester()
33827 .mr(5)
33828 .nr(8)
33829 .kr(1)
33830 .sr(4)
33831 .m(5)
33832 .n(8)
33833 .k(k)
33834 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
33835 }
33836 }
33837
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,k_lt_4_strided_a)33838 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, k_lt_4_strided_a) {
33839 for (size_t k = 1; k < 4; k++) {
33840 GemmMicrokernelTester()
33841 .mr(5)
33842 .nr(8)
33843 .kr(1)
33844 .sr(4)
33845 .m(5)
33846 .n(8)
33847 .k(k)
33848 .a_stride(7)
33849 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
33850 }
33851 }
33852
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,k_lt_4_subtile)33853 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, k_lt_4_subtile) {
33854 for (size_t k = 1; k < 4; k++) {
33855 for (uint32_t n = 1; n <= 8; n++) {
33856 for (uint32_t m = 1; m <= 5; m++) {
33857 GemmMicrokernelTester()
33858 .mr(5)
33859 .nr(8)
33860 .kr(1)
33861 .sr(4)
33862 .m(m)
33863 .n(n)
33864 .k(k)
33865 .iterations(1)
33866 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
33867 }
33868 }
33869 }
33870 }
33871
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,k_gt_4)33872 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, k_gt_4) {
33873 for (size_t k = 5; k < 8; k++) {
33874 GemmMicrokernelTester()
33875 .mr(5)
33876 .nr(8)
33877 .kr(1)
33878 .sr(4)
33879 .m(5)
33880 .n(8)
33881 .k(k)
33882 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
33883 }
33884 }
33885
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,k_gt_4_strided_a)33886 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, k_gt_4_strided_a) {
33887 for (size_t k = 5; k < 8; k++) {
33888 GemmMicrokernelTester()
33889 .mr(5)
33890 .nr(8)
33891 .kr(1)
33892 .sr(4)
33893 .m(5)
33894 .n(8)
33895 .k(k)
33896 .a_stride(11)
33897 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
33898 }
33899 }
33900
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,k_gt_4_subtile)33901 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, k_gt_4_subtile) {
33902 for (size_t k = 5; k < 8; k++) {
33903 for (uint32_t n = 1; n <= 8; n++) {
33904 for (uint32_t m = 1; m <= 5; m++) {
33905 GemmMicrokernelTester()
33906 .mr(5)
33907 .nr(8)
33908 .kr(1)
33909 .sr(4)
33910 .m(m)
33911 .n(n)
33912 .k(k)
33913 .iterations(1)
33914 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
33915 }
33916 }
33917 }
33918 }
33919
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,k_div_4)33920 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, k_div_4) {
33921 for (size_t k = 8; k <= 40; k += 4) {
33922 GemmMicrokernelTester()
33923 .mr(5)
33924 .nr(8)
33925 .kr(1)
33926 .sr(4)
33927 .m(5)
33928 .n(8)
33929 .k(k)
33930 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
33931 }
33932 }
33933
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,k_div_4_strided_a)33934 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, k_div_4_strided_a) {
33935 for (size_t k = 8; k <= 40; k += 4) {
33936 GemmMicrokernelTester()
33937 .mr(5)
33938 .nr(8)
33939 .kr(1)
33940 .sr(4)
33941 .m(5)
33942 .n(8)
33943 .k(k)
33944 .a_stride(43)
33945 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
33946 }
33947 }
33948
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,k_div_4_subtile)33949 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, k_div_4_subtile) {
33950 for (size_t k = 8; k <= 40; k += 4) {
33951 for (uint32_t n = 1; n <= 8; n++) {
33952 for (uint32_t m = 1; m <= 5; m++) {
33953 GemmMicrokernelTester()
33954 .mr(5)
33955 .nr(8)
33956 .kr(1)
33957 .sr(4)
33958 .m(m)
33959 .n(n)
33960 .k(k)
33961 .iterations(1)
33962 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
33963 }
33964 }
33965 }
33966 }
33967
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,n_gt_8)33968 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, n_gt_8) {
33969 for (uint32_t n = 9; n < 16; n++) {
33970 for (size_t k = 1; k <= 20; k += 5) {
33971 GemmMicrokernelTester()
33972 .mr(5)
33973 .nr(8)
33974 .kr(1)
33975 .sr(4)
33976 .m(5)
33977 .n(n)
33978 .k(k)
33979 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
33980 }
33981 }
33982 }
33983
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,n_gt_8_strided_cn)33984 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, n_gt_8_strided_cn) {
33985 for (uint32_t n = 9; n < 16; n++) {
33986 for (size_t k = 1; k <= 20; k += 5) {
33987 GemmMicrokernelTester()
33988 .mr(5)
33989 .nr(8)
33990 .kr(1)
33991 .sr(4)
33992 .m(5)
33993 .n(n)
33994 .k(k)
33995 .cn_stride(11)
33996 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
33997 }
33998 }
33999 }
34000
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,n_gt_8_strided_a)34001 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, n_gt_8_strided_a) {
34002 for (uint32_t n = 9; n < 16; n++) {
34003 for (size_t k = 1; k <= 20; k += 5) {
34004 GemmMicrokernelTester()
34005 .mr(5)
34006 .nr(8)
34007 .kr(1)
34008 .sr(4)
34009 .m(5)
34010 .n(n)
34011 .k(k)
34012 .a_stride(23)
34013 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34014 }
34015 }
34016 }
34017
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,n_gt_8_subtile)34018 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, n_gt_8_subtile) {
34019 for (uint32_t n = 9; n < 16; n++) {
34020 for (size_t k = 1; k <= 20; k += 5) {
34021 for (uint32_t m = 1; m <= 5; m++) {
34022 GemmMicrokernelTester()
34023 .mr(5)
34024 .nr(8)
34025 .kr(1)
34026 .sr(4)
34027 .m(m)
34028 .n(n)
34029 .k(k)
34030 .iterations(1)
34031 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34032 }
34033 }
34034 }
34035 }
34036
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,n_div_8)34037 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, n_div_8) {
34038 for (uint32_t n = 16; n <= 24; n += 8) {
34039 for (size_t k = 1; k <= 20; k += 5) {
34040 GemmMicrokernelTester()
34041 .mr(5)
34042 .nr(8)
34043 .kr(1)
34044 .sr(4)
34045 .m(5)
34046 .n(n)
34047 .k(k)
34048 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34049 }
34050 }
34051 }
34052
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,n_div_8_strided_cn)34053 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, n_div_8_strided_cn) {
34054 for (uint32_t n = 16; n <= 24; n += 8) {
34055 for (size_t k = 1; k <= 20; k += 5) {
34056 GemmMicrokernelTester()
34057 .mr(5)
34058 .nr(8)
34059 .kr(1)
34060 .sr(4)
34061 .m(5)
34062 .n(n)
34063 .k(k)
34064 .cn_stride(11)
34065 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34066 }
34067 }
34068 }
34069
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,n_div_8_strided_a)34070 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, n_div_8_strided_a) {
34071 for (uint32_t n = 16; n <= 24; n += 8) {
34072 for (size_t k = 1; k <= 20; k += 5) {
34073 GemmMicrokernelTester()
34074 .mr(5)
34075 .nr(8)
34076 .kr(1)
34077 .sr(4)
34078 .m(5)
34079 .n(n)
34080 .k(k)
34081 .a_stride(23)
34082 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34083 }
34084 }
34085 }
34086
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,n_div_8_subtile)34087 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, n_div_8_subtile) {
34088 for (uint32_t n = 16; n <= 24; n += 8) {
34089 for (size_t k = 1; k <= 20; k += 5) {
34090 for (uint32_t m = 1; m <= 5; m++) {
34091 GemmMicrokernelTester()
34092 .mr(5)
34093 .nr(8)
34094 .kr(1)
34095 .sr(4)
34096 .m(m)
34097 .n(n)
34098 .k(k)
34099 .iterations(1)
34100 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34101 }
34102 }
34103 }
34104 }
34105
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,strided_cm_subtile)34106 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, strided_cm_subtile) {
34107 for (size_t k = 1; k <= 20; k += 5) {
34108 for (uint32_t n = 1; n <= 8; n++) {
34109 for (uint32_t m = 1; m <= 5; m++) {
34110 GemmMicrokernelTester()
34111 .mr(5)
34112 .nr(8)
34113 .kr(1)
34114 .sr(4)
34115 .m(m)
34116 .n(n)
34117 .k(k)
34118 .cm_stride(11)
34119 .iterations(1)
34120 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34121 }
34122 }
34123 }
34124 }
34125
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,qmin)34126 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, qmin) {
34127 GemmMicrokernelTester()
34128 .mr(5)
34129 .nr(8)
34130 .kr(1)
34131 .sr(4)
34132 .m(5)
34133 .n(8)
34134 .k(4)
34135 .qmin(128)
34136 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34137 }
34138
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,qmax)34139 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, qmax) {
34140 GemmMicrokernelTester()
34141 .mr(5)
34142 .nr(8)
34143 .kr(1)
34144 .sr(4)
34145 .m(5)
34146 .n(8)
34147 .k(4)
34148 .qmax(128)
34149 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34150 }
34151
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD,strided_cm)34152 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD, strided_cm) {
34153 GemmMicrokernelTester()
34154 .mr(5)
34155 .nr(8)
34156 .kr(1)
34157 .sr(4)
34158 .m(5)
34159 .n(8)
34160 .k(4)
34161 .cm_stride(11)
34162 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
34163 }
34164 #endif // XNN_ARCH_WASMRELAXEDSIMD
34165
34166
34167 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)34168 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
34169 GemmMicrokernelTester()
34170 .mr(5)
34171 .nr(8)
34172 .kr(1)
34173 .sr(4)
34174 .m(5)
34175 .n(8)
34176 .k(4)
34177 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34178 }
34179
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,strided_cn)34180 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
34181 GemmMicrokernelTester()
34182 .mr(5)
34183 .nr(8)
34184 .kr(1)
34185 .sr(4)
34186 .m(5)
34187 .n(8)
34188 .k(4)
34189 .cn_stride(11)
34190 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34191 }
34192
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_strided_a)34193 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_strided_a) {
34194 GemmMicrokernelTester()
34195 .mr(5)
34196 .nr(8)
34197 .kr(1)
34198 .sr(4)
34199 .m(5)
34200 .n(8)
34201 .k(4)
34202 .a_stride(7)
34203 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34204 }
34205
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)34206 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
34207 for (uint32_t n = 1; n <= 8; n++) {
34208 for (uint32_t m = 1; m <= 5; m++) {
34209 GemmMicrokernelTester()
34210 .mr(5)
34211 .nr(8)
34212 .kr(1)
34213 .sr(4)
34214 .m(m)
34215 .n(n)
34216 .k(4)
34217 .iterations(1)
34218 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34219 }
34220 }
34221 }
34222
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)34223 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
34224 for (uint32_t m = 1; m <= 5; m++) {
34225 GemmMicrokernelTester()
34226 .mr(5)
34227 .nr(8)
34228 .kr(1)
34229 .sr(4)
34230 .m(m)
34231 .n(8)
34232 .k(4)
34233 .iterations(1)
34234 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34235 }
34236 }
34237
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)34238 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
34239 for (uint32_t n = 1; n <= 8; n++) {
34240 GemmMicrokernelTester()
34241 .mr(5)
34242 .nr(8)
34243 .kr(1)
34244 .sr(4)
34245 .m(5)
34246 .n(n)
34247 .k(4)
34248 .iterations(1)
34249 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34250 }
34251 }
34252
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)34253 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
34254 for (size_t k = 1; k < 4; k++) {
34255 GemmMicrokernelTester()
34256 .mr(5)
34257 .nr(8)
34258 .kr(1)
34259 .sr(4)
34260 .m(5)
34261 .n(8)
34262 .k(k)
34263 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34264 }
34265 }
34266
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_strided_a)34267 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_strided_a) {
34268 for (size_t k = 1; k < 4; k++) {
34269 GemmMicrokernelTester()
34270 .mr(5)
34271 .nr(8)
34272 .kr(1)
34273 .sr(4)
34274 .m(5)
34275 .n(8)
34276 .k(k)
34277 .a_stride(7)
34278 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34279 }
34280 }
34281
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)34282 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
34283 for (size_t k = 1; k < 4; k++) {
34284 for (uint32_t n = 1; n <= 8; n++) {
34285 for (uint32_t m = 1; m <= 5; m++) {
34286 GemmMicrokernelTester()
34287 .mr(5)
34288 .nr(8)
34289 .kr(1)
34290 .sr(4)
34291 .m(m)
34292 .n(n)
34293 .k(k)
34294 .iterations(1)
34295 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34296 }
34297 }
34298 }
34299 }
34300
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)34301 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
34302 for (size_t k = 5; k < 8; k++) {
34303 GemmMicrokernelTester()
34304 .mr(5)
34305 .nr(8)
34306 .kr(1)
34307 .sr(4)
34308 .m(5)
34309 .n(8)
34310 .k(k)
34311 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34312 }
34313 }
34314
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_strided_a)34315 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_strided_a) {
34316 for (size_t k = 5; k < 8; k++) {
34317 GemmMicrokernelTester()
34318 .mr(5)
34319 .nr(8)
34320 .kr(1)
34321 .sr(4)
34322 .m(5)
34323 .n(8)
34324 .k(k)
34325 .a_stride(11)
34326 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34327 }
34328 }
34329
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)34330 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
34331 for (size_t k = 5; k < 8; k++) {
34332 for (uint32_t n = 1; n <= 8; n++) {
34333 for (uint32_t m = 1; m <= 5; m++) {
34334 GemmMicrokernelTester()
34335 .mr(5)
34336 .nr(8)
34337 .kr(1)
34338 .sr(4)
34339 .m(m)
34340 .n(n)
34341 .k(k)
34342 .iterations(1)
34343 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34344 }
34345 }
34346 }
34347 }
34348
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,k_div_4)34349 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
34350 for (size_t k = 8; k <= 40; k += 4) {
34351 GemmMicrokernelTester()
34352 .mr(5)
34353 .nr(8)
34354 .kr(1)
34355 .sr(4)
34356 .m(5)
34357 .n(8)
34358 .k(k)
34359 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34360 }
34361 }
34362
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,k_div_4_strided_a)34363 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, k_div_4_strided_a) {
34364 for (size_t k = 8; k <= 40; k += 4) {
34365 GemmMicrokernelTester()
34366 .mr(5)
34367 .nr(8)
34368 .kr(1)
34369 .sr(4)
34370 .m(5)
34371 .n(8)
34372 .k(k)
34373 .a_stride(43)
34374 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34375 }
34376 }
34377
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)34378 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
34379 for (size_t k = 8; k <= 40; k += 4) {
34380 for (uint32_t n = 1; n <= 8; n++) {
34381 for (uint32_t m = 1; m <= 5; m++) {
34382 GemmMicrokernelTester()
34383 .mr(5)
34384 .nr(8)
34385 .kr(1)
34386 .sr(4)
34387 .m(m)
34388 .n(n)
34389 .k(k)
34390 .iterations(1)
34391 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34392 }
34393 }
34394 }
34395 }
34396
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)34397 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
34398 for (uint32_t n = 9; n < 16; n++) {
34399 for (size_t k = 1; k <= 20; k += 5) {
34400 GemmMicrokernelTester()
34401 .mr(5)
34402 .nr(8)
34403 .kr(1)
34404 .sr(4)
34405 .m(5)
34406 .n(n)
34407 .k(k)
34408 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34409 }
34410 }
34411 }
34412
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)34413 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
34414 for (uint32_t n = 9; n < 16; n++) {
34415 for (size_t k = 1; k <= 20; k += 5) {
34416 GemmMicrokernelTester()
34417 .mr(5)
34418 .nr(8)
34419 .kr(1)
34420 .sr(4)
34421 .m(5)
34422 .n(n)
34423 .k(k)
34424 .cn_stride(11)
34425 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34426 }
34427 }
34428 }
34429
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_a)34430 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_a) {
34431 for (uint32_t n = 9; n < 16; n++) {
34432 for (size_t k = 1; k <= 20; k += 5) {
34433 GemmMicrokernelTester()
34434 .mr(5)
34435 .nr(8)
34436 .kr(1)
34437 .sr(4)
34438 .m(5)
34439 .n(n)
34440 .k(k)
34441 .a_stride(23)
34442 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34443 }
34444 }
34445 }
34446
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)34447 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
34448 for (uint32_t n = 9; n < 16; n++) {
34449 for (size_t k = 1; k <= 20; k += 5) {
34450 for (uint32_t m = 1; m <= 5; m++) {
34451 GemmMicrokernelTester()
34452 .mr(5)
34453 .nr(8)
34454 .kr(1)
34455 .sr(4)
34456 .m(m)
34457 .n(n)
34458 .k(k)
34459 .iterations(1)
34460 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34461 }
34462 }
34463 }
34464 }
34465
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,n_div_8)34466 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
34467 for (uint32_t n = 16; n <= 24; n += 8) {
34468 for (size_t k = 1; k <= 20; k += 5) {
34469 GemmMicrokernelTester()
34470 .mr(5)
34471 .nr(8)
34472 .kr(1)
34473 .sr(4)
34474 .m(5)
34475 .n(n)
34476 .k(k)
34477 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34478 }
34479 }
34480 }
34481
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)34482 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
34483 for (uint32_t n = 16; n <= 24; n += 8) {
34484 for (size_t k = 1; k <= 20; k += 5) {
34485 GemmMicrokernelTester()
34486 .mr(5)
34487 .nr(8)
34488 .kr(1)
34489 .sr(4)
34490 .m(5)
34491 .n(n)
34492 .k(k)
34493 .cn_stride(11)
34494 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34495 }
34496 }
34497 }
34498
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_a)34499 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_a) {
34500 for (uint32_t n = 16; n <= 24; n += 8) {
34501 for (size_t k = 1; k <= 20; k += 5) {
34502 GemmMicrokernelTester()
34503 .mr(5)
34504 .nr(8)
34505 .kr(1)
34506 .sr(4)
34507 .m(5)
34508 .n(n)
34509 .k(k)
34510 .a_stride(23)
34511 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34512 }
34513 }
34514 }
34515
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)34516 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
34517 for (uint32_t n = 16; n <= 24; n += 8) {
34518 for (size_t k = 1; k <= 20; k += 5) {
34519 for (uint32_t m = 1; m <= 5; m++) {
34520 GemmMicrokernelTester()
34521 .mr(5)
34522 .nr(8)
34523 .kr(1)
34524 .sr(4)
34525 .m(m)
34526 .n(n)
34527 .k(k)
34528 .iterations(1)
34529 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34530 }
34531 }
34532 }
34533 }
34534
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)34535 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
34536 for (size_t k = 1; k <= 20; k += 5) {
34537 for (uint32_t n = 1; n <= 8; n++) {
34538 for (uint32_t m = 1; m <= 5; m++) {
34539 GemmMicrokernelTester()
34540 .mr(5)
34541 .nr(8)
34542 .kr(1)
34543 .sr(4)
34544 .m(m)
34545 .n(n)
34546 .k(k)
34547 .cm_stride(11)
34548 .iterations(1)
34549 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34550 }
34551 }
34552 }
34553 }
34554
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,qmin)34555 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, qmin) {
34556 GemmMicrokernelTester()
34557 .mr(5)
34558 .nr(8)
34559 .kr(1)
34560 .sr(4)
34561 .m(5)
34562 .n(8)
34563 .k(4)
34564 .qmin(128)
34565 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34566 }
34567
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,qmax)34568 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, qmax) {
34569 GemmMicrokernelTester()
34570 .mr(5)
34571 .nr(8)
34572 .kr(1)
34573 .sr(4)
34574 .m(5)
34575 .n(8)
34576 .k(4)
34577 .qmax(128)
34578 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34579 }
34580
TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA,strided_cm)34581 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
34582 GemmMicrokernelTester()
34583 .mr(5)
34584 .nr(8)
34585 .kr(1)
34586 .sr(4)
34587 .m(5)
34588 .n(8)
34589 .k(4)
34590 .cm_stride(11)
34591 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
34592 }
34593 #endif // XNN_ARCH_WASMRELAXEDSIMD
34594
34595
34596 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)34597 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
34598 GemmMicrokernelTester()
34599 .mr(6)
34600 .nr(8)
34601 .kr(1)
34602 .sr(1)
34603 .m(6)
34604 .n(8)
34605 .k(1)
34606 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34607 }
34608
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)34609 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
34610 GemmMicrokernelTester()
34611 .mr(6)
34612 .nr(8)
34613 .kr(1)
34614 .sr(1)
34615 .m(6)
34616 .n(8)
34617 .k(1)
34618 .cn_stride(11)
34619 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34620 }
34621
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_strided_a)34622 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_strided_a) {
34623 GemmMicrokernelTester()
34624 .mr(6)
34625 .nr(8)
34626 .kr(1)
34627 .sr(1)
34628 .m(6)
34629 .n(8)
34630 .k(1)
34631 .a_stride(3)
34632 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34633 }
34634
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)34635 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
34636 for (uint32_t n = 1; n <= 8; n++) {
34637 for (uint32_t m = 1; m <= 6; m++) {
34638 GemmMicrokernelTester()
34639 .mr(6)
34640 .nr(8)
34641 .kr(1)
34642 .sr(1)
34643 .m(m)
34644 .n(n)
34645 .k(1)
34646 .iterations(1)
34647 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34648 }
34649 }
34650 }
34651
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)34652 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
34653 for (uint32_t m = 1; m <= 6; m++) {
34654 GemmMicrokernelTester()
34655 .mr(6)
34656 .nr(8)
34657 .kr(1)
34658 .sr(1)
34659 .m(m)
34660 .n(8)
34661 .k(1)
34662 .iterations(1)
34663 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34664 }
34665 }
34666
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)34667 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
34668 for (uint32_t n = 1; n <= 8; n++) {
34669 GemmMicrokernelTester()
34670 .mr(6)
34671 .nr(8)
34672 .kr(1)
34673 .sr(1)
34674 .m(6)
34675 .n(n)
34676 .k(1)
34677 .iterations(1)
34678 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34679 }
34680 }
34681
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)34682 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
34683 for (size_t k = 2; k < 10; k++) {
34684 GemmMicrokernelTester()
34685 .mr(6)
34686 .nr(8)
34687 .kr(1)
34688 .sr(1)
34689 .m(6)
34690 .n(8)
34691 .k(k)
34692 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34693 }
34694 }
34695
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_strided_a)34696 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_strided_a) {
34697 for (size_t k = 2; k < 10; k++) {
34698 GemmMicrokernelTester()
34699 .mr(6)
34700 .nr(8)
34701 .kr(1)
34702 .sr(1)
34703 .m(6)
34704 .n(8)
34705 .k(k)
34706 .a_stride(11)
34707 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34708 }
34709 }
34710
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)34711 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
34712 for (size_t k = 2; k < 10; k++) {
34713 for (uint32_t n = 1; n <= 8; n++) {
34714 for (uint32_t m = 1; m <= 6; m++) {
34715 GemmMicrokernelTester()
34716 .mr(6)
34717 .nr(8)
34718 .kr(1)
34719 .sr(1)
34720 .m(m)
34721 .n(n)
34722 .k(k)
34723 .iterations(1)
34724 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34725 }
34726 }
34727 }
34728 }
34729
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)34730 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
34731 for (uint32_t n = 9; n < 16; n++) {
34732 for (size_t k = 1; k <= 5; k += 2) {
34733 GemmMicrokernelTester()
34734 .mr(6)
34735 .nr(8)
34736 .kr(1)
34737 .sr(1)
34738 .m(6)
34739 .n(n)
34740 .k(k)
34741 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34742 }
34743 }
34744 }
34745
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)34746 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
34747 for (uint32_t n = 9; n < 16; n++) {
34748 for (size_t k = 1; k <= 5; k += 2) {
34749 GemmMicrokernelTester()
34750 .mr(6)
34751 .nr(8)
34752 .kr(1)
34753 .sr(1)
34754 .m(6)
34755 .n(n)
34756 .k(k)
34757 .cn_stride(11)
34758 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34759 }
34760 }
34761 }
34762
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_a)34763 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_a) {
34764 for (uint32_t n = 9; n < 16; n++) {
34765 for (size_t k = 1; k <= 5; k += 2) {
34766 GemmMicrokernelTester()
34767 .mr(6)
34768 .nr(8)
34769 .kr(1)
34770 .sr(1)
34771 .m(6)
34772 .n(n)
34773 .k(k)
34774 .a_stride(7)
34775 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34776 }
34777 }
34778 }
34779
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)34780 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
34781 for (uint32_t n = 9; n < 16; n++) {
34782 for (size_t k = 1; k <= 5; k += 2) {
34783 for (uint32_t m = 1; m <= 6; m++) {
34784 GemmMicrokernelTester()
34785 .mr(6)
34786 .nr(8)
34787 .kr(1)
34788 .sr(1)
34789 .m(m)
34790 .n(n)
34791 .k(k)
34792 .iterations(1)
34793 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34794 }
34795 }
34796 }
34797 }
34798
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)34799 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
34800 for (uint32_t n = 16; n <= 24; n += 8) {
34801 for (size_t k = 1; k <= 5; k += 2) {
34802 GemmMicrokernelTester()
34803 .mr(6)
34804 .nr(8)
34805 .kr(1)
34806 .sr(1)
34807 .m(6)
34808 .n(n)
34809 .k(k)
34810 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34811 }
34812 }
34813 }
34814
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)34815 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
34816 for (uint32_t n = 16; n <= 24; n += 8) {
34817 for (size_t k = 1; k <= 5; k += 2) {
34818 GemmMicrokernelTester()
34819 .mr(6)
34820 .nr(8)
34821 .kr(1)
34822 .sr(1)
34823 .m(6)
34824 .n(n)
34825 .k(k)
34826 .cn_stride(11)
34827 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34828 }
34829 }
34830 }
34831
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_a)34832 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_a) {
34833 for (uint32_t n = 16; n <= 24; n += 8) {
34834 for (size_t k = 1; k <= 5; k += 2) {
34835 GemmMicrokernelTester()
34836 .mr(6)
34837 .nr(8)
34838 .kr(1)
34839 .sr(1)
34840 .m(6)
34841 .n(n)
34842 .k(k)
34843 .a_stride(7)
34844 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34845 }
34846 }
34847 }
34848
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)34849 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
34850 for (uint32_t n = 16; n <= 24; n += 8) {
34851 for (size_t k = 1; k <= 5; k += 2) {
34852 for (uint32_t m = 1; m <= 6; m++) {
34853 GemmMicrokernelTester()
34854 .mr(6)
34855 .nr(8)
34856 .kr(1)
34857 .sr(1)
34858 .m(m)
34859 .n(n)
34860 .k(k)
34861 .iterations(1)
34862 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34863 }
34864 }
34865 }
34866 }
34867
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)34868 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
34869 for (size_t k = 1; k <= 5; k += 2) {
34870 for (uint32_t n = 1; n <= 8; n++) {
34871 for (uint32_t m = 1; m <= 6; m++) {
34872 GemmMicrokernelTester()
34873 .mr(6)
34874 .nr(8)
34875 .kr(1)
34876 .sr(1)
34877 .m(m)
34878 .n(n)
34879 .k(k)
34880 .cm_stride(11)
34881 .iterations(1)
34882 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34883 }
34884 }
34885 }
34886 }
34887
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,qmin)34888 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, qmin) {
34889 GemmMicrokernelTester()
34890 .mr(6)
34891 .nr(8)
34892 .kr(1)
34893 .sr(1)
34894 .m(6)
34895 .n(8)
34896 .k(1)
34897 .qmin(128)
34898 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34899 }
34900
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,qmax)34901 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, qmax) {
34902 GemmMicrokernelTester()
34903 .mr(6)
34904 .nr(8)
34905 .kr(1)
34906 .sr(1)
34907 .m(6)
34908 .n(8)
34909 .k(1)
34910 .qmax(128)
34911 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34912 }
34913
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)34914 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
34915 GemmMicrokernelTester()
34916 .mr(6)
34917 .nr(8)
34918 .kr(1)
34919 .sr(1)
34920 .m(6)
34921 .n(8)
34922 .k(1)
34923 .cm_stride(11)
34924 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34925 }
34926 #endif // XNN_ARCH_WASMRELAXEDSIMD
34927
34928
34929 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)34930 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
34931 GemmMicrokernelTester()
34932 .mr(6)
34933 .nr(8)
34934 .kr(1)
34935 .sr(1)
34936 .m(6)
34937 .n(8)
34938 .k(4)
34939 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34940 }
34941
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)34942 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
34943 GemmMicrokernelTester()
34944 .mr(6)
34945 .nr(8)
34946 .kr(1)
34947 .sr(1)
34948 .m(6)
34949 .n(8)
34950 .k(4)
34951 .cn_stride(11)
34952 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34953 }
34954
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_strided_a)34955 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_strided_a) {
34956 GemmMicrokernelTester()
34957 .mr(6)
34958 .nr(8)
34959 .kr(1)
34960 .sr(1)
34961 .m(6)
34962 .n(8)
34963 .k(4)
34964 .a_stride(7)
34965 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34966 }
34967
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)34968 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
34969 for (uint32_t n = 1; n <= 8; n++) {
34970 for (uint32_t m = 1; m <= 6; m++) {
34971 GemmMicrokernelTester()
34972 .mr(6)
34973 .nr(8)
34974 .kr(1)
34975 .sr(1)
34976 .m(m)
34977 .n(n)
34978 .k(4)
34979 .iterations(1)
34980 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34981 }
34982 }
34983 }
34984
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)34985 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
34986 for (uint32_t m = 1; m <= 6; m++) {
34987 GemmMicrokernelTester()
34988 .mr(6)
34989 .nr(8)
34990 .kr(1)
34991 .sr(1)
34992 .m(m)
34993 .n(8)
34994 .k(4)
34995 .iterations(1)
34996 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34997 }
34998 }
34999
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)35000 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
35001 for (uint32_t n = 1; n <= 8; n++) {
35002 GemmMicrokernelTester()
35003 .mr(6)
35004 .nr(8)
35005 .kr(1)
35006 .sr(1)
35007 .m(6)
35008 .n(n)
35009 .k(4)
35010 .iterations(1)
35011 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35012 }
35013 }
35014
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)35015 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
35016 for (size_t k = 1; k < 4; k++) {
35017 GemmMicrokernelTester()
35018 .mr(6)
35019 .nr(8)
35020 .kr(1)
35021 .sr(1)
35022 .m(6)
35023 .n(8)
35024 .k(k)
35025 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35026 }
35027 }
35028
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_strided_a)35029 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_strided_a) {
35030 for (size_t k = 1; k < 4; k++) {
35031 GemmMicrokernelTester()
35032 .mr(6)
35033 .nr(8)
35034 .kr(1)
35035 .sr(1)
35036 .m(6)
35037 .n(8)
35038 .k(k)
35039 .a_stride(7)
35040 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35041 }
35042 }
35043
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)35044 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
35045 for (size_t k = 1; k < 4; k++) {
35046 for (uint32_t n = 1; n <= 8; n++) {
35047 for (uint32_t m = 1; m <= 6; m++) {
35048 GemmMicrokernelTester()
35049 .mr(6)
35050 .nr(8)
35051 .kr(1)
35052 .sr(1)
35053 .m(m)
35054 .n(n)
35055 .k(k)
35056 .iterations(1)
35057 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35058 }
35059 }
35060 }
35061 }
35062
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)35063 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
35064 for (size_t k = 5; k < 8; k++) {
35065 GemmMicrokernelTester()
35066 .mr(6)
35067 .nr(8)
35068 .kr(1)
35069 .sr(1)
35070 .m(6)
35071 .n(8)
35072 .k(k)
35073 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35074 }
35075 }
35076
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_strided_a)35077 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_strided_a) {
35078 for (size_t k = 5; k < 8; k++) {
35079 GemmMicrokernelTester()
35080 .mr(6)
35081 .nr(8)
35082 .kr(1)
35083 .sr(1)
35084 .m(6)
35085 .n(8)
35086 .k(k)
35087 .a_stride(11)
35088 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35089 }
35090 }
35091
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)35092 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
35093 for (size_t k = 5; k < 8; k++) {
35094 for (uint32_t n = 1; n <= 8; n++) {
35095 for (uint32_t m = 1; m <= 6; m++) {
35096 GemmMicrokernelTester()
35097 .mr(6)
35098 .nr(8)
35099 .kr(1)
35100 .sr(1)
35101 .m(m)
35102 .n(n)
35103 .k(k)
35104 .iterations(1)
35105 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35106 }
35107 }
35108 }
35109 }
35110
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)35111 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
35112 for (size_t k = 8; k <= 40; k += 4) {
35113 GemmMicrokernelTester()
35114 .mr(6)
35115 .nr(8)
35116 .kr(1)
35117 .sr(1)
35118 .m(6)
35119 .n(8)
35120 .k(k)
35121 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35122 }
35123 }
35124
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_strided_a)35125 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_strided_a) {
35126 for (size_t k = 8; k <= 40; k += 4) {
35127 GemmMicrokernelTester()
35128 .mr(6)
35129 .nr(8)
35130 .kr(1)
35131 .sr(1)
35132 .m(6)
35133 .n(8)
35134 .k(k)
35135 .a_stride(43)
35136 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35137 }
35138 }
35139
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)35140 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
35141 for (size_t k = 8; k <= 40; k += 4) {
35142 for (uint32_t n = 1; n <= 8; n++) {
35143 for (uint32_t m = 1; m <= 6; m++) {
35144 GemmMicrokernelTester()
35145 .mr(6)
35146 .nr(8)
35147 .kr(1)
35148 .sr(1)
35149 .m(m)
35150 .n(n)
35151 .k(k)
35152 .iterations(1)
35153 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35154 }
35155 }
35156 }
35157 }
35158
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)35159 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
35160 for (uint32_t n = 9; n < 16; n++) {
35161 for (size_t k = 1; k <= 20; k += 5) {
35162 GemmMicrokernelTester()
35163 .mr(6)
35164 .nr(8)
35165 .kr(1)
35166 .sr(1)
35167 .m(6)
35168 .n(n)
35169 .k(k)
35170 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35171 }
35172 }
35173 }
35174
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)35175 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
35176 for (uint32_t n = 9; n < 16; n++) {
35177 for (size_t k = 1; k <= 20; k += 5) {
35178 GemmMicrokernelTester()
35179 .mr(6)
35180 .nr(8)
35181 .kr(1)
35182 .sr(1)
35183 .m(6)
35184 .n(n)
35185 .k(k)
35186 .cn_stride(11)
35187 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35188 }
35189 }
35190 }
35191
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_a)35192 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_a) {
35193 for (uint32_t n = 9; n < 16; n++) {
35194 for (size_t k = 1; k <= 20; k += 5) {
35195 GemmMicrokernelTester()
35196 .mr(6)
35197 .nr(8)
35198 .kr(1)
35199 .sr(1)
35200 .m(6)
35201 .n(n)
35202 .k(k)
35203 .a_stride(23)
35204 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35205 }
35206 }
35207 }
35208
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)35209 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
35210 for (uint32_t n = 9; n < 16; n++) {
35211 for (size_t k = 1; k <= 20; k += 5) {
35212 for (uint32_t m = 1; m <= 6; m++) {
35213 GemmMicrokernelTester()
35214 .mr(6)
35215 .nr(8)
35216 .kr(1)
35217 .sr(1)
35218 .m(m)
35219 .n(n)
35220 .k(k)
35221 .iterations(1)
35222 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35223 }
35224 }
35225 }
35226 }
35227
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)35228 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
35229 for (uint32_t n = 16; n <= 24; n += 8) {
35230 for (size_t k = 1; k <= 20; k += 5) {
35231 GemmMicrokernelTester()
35232 .mr(6)
35233 .nr(8)
35234 .kr(1)
35235 .sr(1)
35236 .m(6)
35237 .n(n)
35238 .k(k)
35239 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35240 }
35241 }
35242 }
35243
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)35244 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
35245 for (uint32_t n = 16; n <= 24; n += 8) {
35246 for (size_t k = 1; k <= 20; k += 5) {
35247 GemmMicrokernelTester()
35248 .mr(6)
35249 .nr(8)
35250 .kr(1)
35251 .sr(1)
35252 .m(6)
35253 .n(n)
35254 .k(k)
35255 .cn_stride(11)
35256 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35257 }
35258 }
35259 }
35260
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_a)35261 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_a) {
35262 for (uint32_t n = 16; n <= 24; n += 8) {
35263 for (size_t k = 1; k <= 20; k += 5) {
35264 GemmMicrokernelTester()
35265 .mr(6)
35266 .nr(8)
35267 .kr(1)
35268 .sr(1)
35269 .m(6)
35270 .n(n)
35271 .k(k)
35272 .a_stride(23)
35273 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35274 }
35275 }
35276 }
35277
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)35278 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
35279 for (uint32_t n = 16; n <= 24; n += 8) {
35280 for (size_t k = 1; k <= 20; k += 5) {
35281 for (uint32_t m = 1; m <= 6; m++) {
35282 GemmMicrokernelTester()
35283 .mr(6)
35284 .nr(8)
35285 .kr(1)
35286 .sr(1)
35287 .m(m)
35288 .n(n)
35289 .k(k)
35290 .iterations(1)
35291 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35292 }
35293 }
35294 }
35295 }
35296
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)35297 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
35298 for (size_t k = 1; k <= 20; k += 5) {
35299 for (uint32_t n = 1; n <= 8; n++) {
35300 for (uint32_t m = 1; m <= 6; m++) {
35301 GemmMicrokernelTester()
35302 .mr(6)
35303 .nr(8)
35304 .kr(1)
35305 .sr(1)
35306 .m(m)
35307 .n(n)
35308 .k(k)
35309 .cm_stride(11)
35310 .iterations(1)
35311 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35312 }
35313 }
35314 }
35315 }
35316
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,qmin)35317 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, qmin) {
35318 GemmMicrokernelTester()
35319 .mr(6)
35320 .nr(8)
35321 .kr(1)
35322 .sr(1)
35323 .m(6)
35324 .n(8)
35325 .k(4)
35326 .qmin(128)
35327 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35328 }
35329
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,qmax)35330 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, qmax) {
35331 GemmMicrokernelTester()
35332 .mr(6)
35333 .nr(8)
35334 .kr(1)
35335 .sr(1)
35336 .m(6)
35337 .n(8)
35338 .k(4)
35339 .qmax(128)
35340 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35341 }
35342
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)35343 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
35344 GemmMicrokernelTester()
35345 .mr(6)
35346 .nr(8)
35347 .kr(1)
35348 .sr(1)
35349 .m(6)
35350 .n(8)
35351 .k(4)
35352 .cm_stride(11)
35353 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35354 }
35355 #endif // XNN_ARCH_WASMRELAXEDSIMD
35356
35357
35358 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,k_eq_4)35359 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, k_eq_4) {
35360 GemmMicrokernelTester()
35361 .mr(6)
35362 .nr(8)
35363 .kr(1)
35364 .sr(1)
35365 .m(6)
35366 .n(8)
35367 .k(4)
35368 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35369 }
35370
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,strided_cn)35371 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, strided_cn) {
35372 GemmMicrokernelTester()
35373 .mr(6)
35374 .nr(8)
35375 .kr(1)
35376 .sr(1)
35377 .m(6)
35378 .n(8)
35379 .k(4)
35380 .cn_stride(11)
35381 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35382 }
35383
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_strided_a)35384 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_strided_a) {
35385 GemmMicrokernelTester()
35386 .mr(6)
35387 .nr(8)
35388 .kr(1)
35389 .sr(1)
35390 .m(6)
35391 .n(8)
35392 .k(4)
35393 .a_stride(7)
35394 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35395 }
35396
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile)35397 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile) {
35398 for (uint32_t n = 1; n <= 8; n++) {
35399 for (uint32_t m = 1; m <= 6; m++) {
35400 GemmMicrokernelTester()
35401 .mr(6)
35402 .nr(8)
35403 .kr(1)
35404 .sr(1)
35405 .m(m)
35406 .n(n)
35407 .k(4)
35408 .iterations(1)
35409 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35410 }
35411 }
35412 }
35413
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile_m)35414 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile_m) {
35415 for (uint32_t m = 1; m <= 6; m++) {
35416 GemmMicrokernelTester()
35417 .mr(6)
35418 .nr(8)
35419 .kr(1)
35420 .sr(1)
35421 .m(m)
35422 .n(8)
35423 .k(4)
35424 .iterations(1)
35425 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35426 }
35427 }
35428
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile_n)35429 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile_n) {
35430 for (uint32_t n = 1; n <= 8; n++) {
35431 GemmMicrokernelTester()
35432 .mr(6)
35433 .nr(8)
35434 .kr(1)
35435 .sr(1)
35436 .m(6)
35437 .n(n)
35438 .k(4)
35439 .iterations(1)
35440 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35441 }
35442 }
35443
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,k_lt_4)35444 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, k_lt_4) {
35445 for (size_t k = 1; k < 4; k++) {
35446 GemmMicrokernelTester()
35447 .mr(6)
35448 .nr(8)
35449 .kr(1)
35450 .sr(1)
35451 .m(6)
35452 .n(8)
35453 .k(k)
35454 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35455 }
35456 }
35457
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,k_lt_4_strided_a)35458 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, k_lt_4_strided_a) {
35459 for (size_t k = 1; k < 4; k++) {
35460 GemmMicrokernelTester()
35461 .mr(6)
35462 .nr(8)
35463 .kr(1)
35464 .sr(1)
35465 .m(6)
35466 .n(8)
35467 .k(k)
35468 .a_stride(7)
35469 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35470 }
35471 }
35472
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,k_lt_4_subtile)35473 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, k_lt_4_subtile) {
35474 for (size_t k = 1; k < 4; k++) {
35475 for (uint32_t n = 1; n <= 8; n++) {
35476 for (uint32_t m = 1; m <= 6; m++) {
35477 GemmMicrokernelTester()
35478 .mr(6)
35479 .nr(8)
35480 .kr(1)
35481 .sr(1)
35482 .m(m)
35483 .n(n)
35484 .k(k)
35485 .iterations(1)
35486 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35487 }
35488 }
35489 }
35490 }
35491
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,k_gt_4)35492 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, k_gt_4) {
35493 for (size_t k = 5; k < 8; k++) {
35494 GemmMicrokernelTester()
35495 .mr(6)
35496 .nr(8)
35497 .kr(1)
35498 .sr(1)
35499 .m(6)
35500 .n(8)
35501 .k(k)
35502 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35503 }
35504 }
35505
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,k_gt_4_strided_a)35506 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, k_gt_4_strided_a) {
35507 for (size_t k = 5; k < 8; k++) {
35508 GemmMicrokernelTester()
35509 .mr(6)
35510 .nr(8)
35511 .kr(1)
35512 .sr(1)
35513 .m(6)
35514 .n(8)
35515 .k(k)
35516 .a_stride(11)
35517 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35518 }
35519 }
35520
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,k_gt_4_subtile)35521 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, k_gt_4_subtile) {
35522 for (size_t k = 5; k < 8; k++) {
35523 for (uint32_t n = 1; n <= 8; n++) {
35524 for (uint32_t m = 1; m <= 6; m++) {
35525 GemmMicrokernelTester()
35526 .mr(6)
35527 .nr(8)
35528 .kr(1)
35529 .sr(1)
35530 .m(m)
35531 .n(n)
35532 .k(k)
35533 .iterations(1)
35534 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35535 }
35536 }
35537 }
35538 }
35539
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,k_div_4)35540 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, k_div_4) {
35541 for (size_t k = 8; k <= 40; k += 4) {
35542 GemmMicrokernelTester()
35543 .mr(6)
35544 .nr(8)
35545 .kr(1)
35546 .sr(1)
35547 .m(6)
35548 .n(8)
35549 .k(k)
35550 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35551 }
35552 }
35553
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,k_div_4_strided_a)35554 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, k_div_4_strided_a) {
35555 for (size_t k = 8; k <= 40; k += 4) {
35556 GemmMicrokernelTester()
35557 .mr(6)
35558 .nr(8)
35559 .kr(1)
35560 .sr(1)
35561 .m(6)
35562 .n(8)
35563 .k(k)
35564 .a_stride(43)
35565 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35566 }
35567 }
35568
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,k_div_4_subtile)35569 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, k_div_4_subtile) {
35570 for (size_t k = 8; k <= 40; k += 4) {
35571 for (uint32_t n = 1; n <= 8; n++) {
35572 for (uint32_t m = 1; m <= 6; m++) {
35573 GemmMicrokernelTester()
35574 .mr(6)
35575 .nr(8)
35576 .kr(1)
35577 .sr(1)
35578 .m(m)
35579 .n(n)
35580 .k(k)
35581 .iterations(1)
35582 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35583 }
35584 }
35585 }
35586 }
35587
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,n_gt_8)35588 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, n_gt_8) {
35589 for (uint32_t n = 9; n < 16; n++) {
35590 for (size_t k = 1; k <= 20; k += 5) {
35591 GemmMicrokernelTester()
35592 .mr(6)
35593 .nr(8)
35594 .kr(1)
35595 .sr(1)
35596 .m(6)
35597 .n(n)
35598 .k(k)
35599 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35600 }
35601 }
35602 }
35603
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_strided_cn)35604 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_strided_cn) {
35605 for (uint32_t n = 9; n < 16; n++) {
35606 for (size_t k = 1; k <= 20; k += 5) {
35607 GemmMicrokernelTester()
35608 .mr(6)
35609 .nr(8)
35610 .kr(1)
35611 .sr(1)
35612 .m(6)
35613 .n(n)
35614 .k(k)
35615 .cn_stride(11)
35616 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35617 }
35618 }
35619 }
35620
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_strided_a)35621 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_strided_a) {
35622 for (uint32_t n = 9; n < 16; n++) {
35623 for (size_t k = 1; k <= 20; k += 5) {
35624 GemmMicrokernelTester()
35625 .mr(6)
35626 .nr(8)
35627 .kr(1)
35628 .sr(1)
35629 .m(6)
35630 .n(n)
35631 .k(k)
35632 .a_stride(23)
35633 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35634 }
35635 }
35636 }
35637
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_subtile)35638 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_subtile) {
35639 for (uint32_t n = 9; n < 16; n++) {
35640 for (size_t k = 1; k <= 20; k += 5) {
35641 for (uint32_t m = 1; m <= 6; m++) {
35642 GemmMicrokernelTester()
35643 .mr(6)
35644 .nr(8)
35645 .kr(1)
35646 .sr(1)
35647 .m(m)
35648 .n(n)
35649 .k(k)
35650 .iterations(1)
35651 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35652 }
35653 }
35654 }
35655 }
35656
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,n_div_8)35657 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, n_div_8) {
35658 for (uint32_t n = 16; n <= 24; n += 8) {
35659 for (size_t k = 1; k <= 20; k += 5) {
35660 GemmMicrokernelTester()
35661 .mr(6)
35662 .nr(8)
35663 .kr(1)
35664 .sr(1)
35665 .m(6)
35666 .n(n)
35667 .k(k)
35668 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35669 }
35670 }
35671 }
35672
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,n_div_8_strided_cn)35673 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, n_div_8_strided_cn) {
35674 for (uint32_t n = 16; n <= 24; n += 8) {
35675 for (size_t k = 1; k <= 20; k += 5) {
35676 GemmMicrokernelTester()
35677 .mr(6)
35678 .nr(8)
35679 .kr(1)
35680 .sr(1)
35681 .m(6)
35682 .n(n)
35683 .k(k)
35684 .cn_stride(11)
35685 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35686 }
35687 }
35688 }
35689
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,n_div_8_strided_a)35690 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, n_div_8_strided_a) {
35691 for (uint32_t n = 16; n <= 24; n += 8) {
35692 for (size_t k = 1; k <= 20; k += 5) {
35693 GemmMicrokernelTester()
35694 .mr(6)
35695 .nr(8)
35696 .kr(1)
35697 .sr(1)
35698 .m(6)
35699 .n(n)
35700 .k(k)
35701 .a_stride(23)
35702 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35703 }
35704 }
35705 }
35706
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,n_div_8_subtile)35707 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, n_div_8_subtile) {
35708 for (uint32_t n = 16; n <= 24; n += 8) {
35709 for (size_t k = 1; k <= 20; k += 5) {
35710 for (uint32_t m = 1; m <= 6; m++) {
35711 GemmMicrokernelTester()
35712 .mr(6)
35713 .nr(8)
35714 .kr(1)
35715 .sr(1)
35716 .m(m)
35717 .n(n)
35718 .k(k)
35719 .iterations(1)
35720 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35721 }
35722 }
35723 }
35724 }
35725
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,strided_cm_subtile)35726 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, strided_cm_subtile) {
35727 for (size_t k = 1; k <= 20; k += 5) {
35728 for (uint32_t n = 1; n <= 8; n++) {
35729 for (uint32_t m = 1; m <= 6; m++) {
35730 GemmMicrokernelTester()
35731 .mr(6)
35732 .nr(8)
35733 .kr(1)
35734 .sr(1)
35735 .m(m)
35736 .n(n)
35737 .k(k)
35738 .cm_stride(11)
35739 .iterations(1)
35740 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35741 }
35742 }
35743 }
35744 }
35745
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,qmin)35746 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, qmin) {
35747 GemmMicrokernelTester()
35748 .mr(6)
35749 .nr(8)
35750 .kr(1)
35751 .sr(1)
35752 .m(6)
35753 .n(8)
35754 .k(4)
35755 .qmin(128)
35756 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35757 }
35758
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,qmax)35759 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, qmax) {
35760 GemmMicrokernelTester()
35761 .mr(6)
35762 .nr(8)
35763 .kr(1)
35764 .sr(1)
35765 .m(6)
35766 .n(8)
35767 .k(4)
35768 .qmax(128)
35769 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35770 }
35771
TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT,strided_cm)35772 TEST(F32_GEMMINC_MINMAX_6X8__WASMRELAXEDSIMD_SPLAT, strided_cm) {
35773 GemmMicrokernelTester()
35774 .mr(6)
35775 .nr(8)
35776 .kr(1)
35777 .sr(1)
35778 .m(6)
35779 .n(8)
35780 .k(4)
35781 .cm_stride(11)
35782 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35783 }
35784 #endif // XNN_ARCH_WASMRELAXEDSIMD
35785
35786
35787 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,k_eq_4)35788 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, k_eq_4) {
35789 GemmMicrokernelTester()
35790 .mr(6)
35791 .nr(8)
35792 .kr(1)
35793 .sr(4)
35794 .m(6)
35795 .n(8)
35796 .k(4)
35797 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35798 }
35799
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,strided_cn)35800 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, strided_cn) {
35801 GemmMicrokernelTester()
35802 .mr(6)
35803 .nr(8)
35804 .kr(1)
35805 .sr(4)
35806 .m(6)
35807 .n(8)
35808 .k(4)
35809 .cn_stride(11)
35810 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35811 }
35812
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,k_eq_4_strided_a)35813 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, k_eq_4_strided_a) {
35814 GemmMicrokernelTester()
35815 .mr(6)
35816 .nr(8)
35817 .kr(1)
35818 .sr(4)
35819 .m(6)
35820 .n(8)
35821 .k(4)
35822 .a_stride(7)
35823 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35824 }
35825
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,k_eq_4_subtile)35826 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, k_eq_4_subtile) {
35827 for (uint32_t n = 1; n <= 8; n++) {
35828 for (uint32_t m = 1; m <= 6; m++) {
35829 GemmMicrokernelTester()
35830 .mr(6)
35831 .nr(8)
35832 .kr(1)
35833 .sr(4)
35834 .m(m)
35835 .n(n)
35836 .k(4)
35837 .iterations(1)
35838 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35839 }
35840 }
35841 }
35842
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,k_eq_4_subtile_m)35843 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, k_eq_4_subtile_m) {
35844 for (uint32_t m = 1; m <= 6; m++) {
35845 GemmMicrokernelTester()
35846 .mr(6)
35847 .nr(8)
35848 .kr(1)
35849 .sr(4)
35850 .m(m)
35851 .n(8)
35852 .k(4)
35853 .iterations(1)
35854 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35855 }
35856 }
35857
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,k_eq_4_subtile_n)35858 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, k_eq_4_subtile_n) {
35859 for (uint32_t n = 1; n <= 8; n++) {
35860 GemmMicrokernelTester()
35861 .mr(6)
35862 .nr(8)
35863 .kr(1)
35864 .sr(4)
35865 .m(6)
35866 .n(n)
35867 .k(4)
35868 .iterations(1)
35869 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35870 }
35871 }
35872
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,k_lt_4)35873 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, k_lt_4) {
35874 for (size_t k = 1; k < 4; k++) {
35875 GemmMicrokernelTester()
35876 .mr(6)
35877 .nr(8)
35878 .kr(1)
35879 .sr(4)
35880 .m(6)
35881 .n(8)
35882 .k(k)
35883 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35884 }
35885 }
35886
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,k_lt_4_strided_a)35887 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, k_lt_4_strided_a) {
35888 for (size_t k = 1; k < 4; k++) {
35889 GemmMicrokernelTester()
35890 .mr(6)
35891 .nr(8)
35892 .kr(1)
35893 .sr(4)
35894 .m(6)
35895 .n(8)
35896 .k(k)
35897 .a_stride(7)
35898 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35899 }
35900 }
35901
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,k_lt_4_subtile)35902 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, k_lt_4_subtile) {
35903 for (size_t k = 1; k < 4; k++) {
35904 for (uint32_t n = 1; n <= 8; n++) {
35905 for (uint32_t m = 1; m <= 6; m++) {
35906 GemmMicrokernelTester()
35907 .mr(6)
35908 .nr(8)
35909 .kr(1)
35910 .sr(4)
35911 .m(m)
35912 .n(n)
35913 .k(k)
35914 .iterations(1)
35915 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35916 }
35917 }
35918 }
35919 }
35920
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,k_gt_4)35921 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, k_gt_4) {
35922 for (size_t k = 5; k < 8; k++) {
35923 GemmMicrokernelTester()
35924 .mr(6)
35925 .nr(8)
35926 .kr(1)
35927 .sr(4)
35928 .m(6)
35929 .n(8)
35930 .k(k)
35931 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35932 }
35933 }
35934
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,k_gt_4_strided_a)35935 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, k_gt_4_strided_a) {
35936 for (size_t k = 5; k < 8; k++) {
35937 GemmMicrokernelTester()
35938 .mr(6)
35939 .nr(8)
35940 .kr(1)
35941 .sr(4)
35942 .m(6)
35943 .n(8)
35944 .k(k)
35945 .a_stride(11)
35946 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35947 }
35948 }
35949
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,k_gt_4_subtile)35950 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, k_gt_4_subtile) {
35951 for (size_t k = 5; k < 8; k++) {
35952 for (uint32_t n = 1; n <= 8; n++) {
35953 for (uint32_t m = 1; m <= 6; m++) {
35954 GemmMicrokernelTester()
35955 .mr(6)
35956 .nr(8)
35957 .kr(1)
35958 .sr(4)
35959 .m(m)
35960 .n(n)
35961 .k(k)
35962 .iterations(1)
35963 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35964 }
35965 }
35966 }
35967 }
35968
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,k_div_4)35969 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, k_div_4) {
35970 for (size_t k = 8; k <= 40; k += 4) {
35971 GemmMicrokernelTester()
35972 .mr(6)
35973 .nr(8)
35974 .kr(1)
35975 .sr(4)
35976 .m(6)
35977 .n(8)
35978 .k(k)
35979 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35980 }
35981 }
35982
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,k_div_4_strided_a)35983 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, k_div_4_strided_a) {
35984 for (size_t k = 8; k <= 40; k += 4) {
35985 GemmMicrokernelTester()
35986 .mr(6)
35987 .nr(8)
35988 .kr(1)
35989 .sr(4)
35990 .m(6)
35991 .n(8)
35992 .k(k)
35993 .a_stride(43)
35994 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
35995 }
35996 }
35997
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,k_div_4_subtile)35998 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, k_div_4_subtile) {
35999 for (size_t k = 8; k <= 40; k += 4) {
36000 for (uint32_t n = 1; n <= 8; n++) {
36001 for (uint32_t m = 1; m <= 6; m++) {
36002 GemmMicrokernelTester()
36003 .mr(6)
36004 .nr(8)
36005 .kr(1)
36006 .sr(4)
36007 .m(m)
36008 .n(n)
36009 .k(k)
36010 .iterations(1)
36011 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36012 }
36013 }
36014 }
36015 }
36016
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,n_gt_8)36017 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, n_gt_8) {
36018 for (uint32_t n = 9; n < 16; n++) {
36019 for (size_t k = 1; k <= 20; k += 5) {
36020 GemmMicrokernelTester()
36021 .mr(6)
36022 .nr(8)
36023 .kr(1)
36024 .sr(4)
36025 .m(6)
36026 .n(n)
36027 .k(k)
36028 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36029 }
36030 }
36031 }
36032
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,n_gt_8_strided_cn)36033 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, n_gt_8_strided_cn) {
36034 for (uint32_t n = 9; n < 16; n++) {
36035 for (size_t k = 1; k <= 20; k += 5) {
36036 GemmMicrokernelTester()
36037 .mr(6)
36038 .nr(8)
36039 .kr(1)
36040 .sr(4)
36041 .m(6)
36042 .n(n)
36043 .k(k)
36044 .cn_stride(11)
36045 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36046 }
36047 }
36048 }
36049
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,n_gt_8_strided_a)36050 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, n_gt_8_strided_a) {
36051 for (uint32_t n = 9; n < 16; n++) {
36052 for (size_t k = 1; k <= 20; k += 5) {
36053 GemmMicrokernelTester()
36054 .mr(6)
36055 .nr(8)
36056 .kr(1)
36057 .sr(4)
36058 .m(6)
36059 .n(n)
36060 .k(k)
36061 .a_stride(23)
36062 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36063 }
36064 }
36065 }
36066
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,n_gt_8_subtile)36067 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, n_gt_8_subtile) {
36068 for (uint32_t n = 9; n < 16; n++) {
36069 for (size_t k = 1; k <= 20; k += 5) {
36070 for (uint32_t m = 1; m <= 6; m++) {
36071 GemmMicrokernelTester()
36072 .mr(6)
36073 .nr(8)
36074 .kr(1)
36075 .sr(4)
36076 .m(m)
36077 .n(n)
36078 .k(k)
36079 .iterations(1)
36080 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36081 }
36082 }
36083 }
36084 }
36085
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,n_div_8)36086 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, n_div_8) {
36087 for (uint32_t n = 16; n <= 24; n += 8) {
36088 for (size_t k = 1; k <= 20; k += 5) {
36089 GemmMicrokernelTester()
36090 .mr(6)
36091 .nr(8)
36092 .kr(1)
36093 .sr(4)
36094 .m(6)
36095 .n(n)
36096 .k(k)
36097 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36098 }
36099 }
36100 }
36101
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,n_div_8_strided_cn)36102 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, n_div_8_strided_cn) {
36103 for (uint32_t n = 16; n <= 24; n += 8) {
36104 for (size_t k = 1; k <= 20; k += 5) {
36105 GemmMicrokernelTester()
36106 .mr(6)
36107 .nr(8)
36108 .kr(1)
36109 .sr(4)
36110 .m(6)
36111 .n(n)
36112 .k(k)
36113 .cn_stride(11)
36114 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36115 }
36116 }
36117 }
36118
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,n_div_8_strided_a)36119 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, n_div_8_strided_a) {
36120 for (uint32_t n = 16; n <= 24; n += 8) {
36121 for (size_t k = 1; k <= 20; k += 5) {
36122 GemmMicrokernelTester()
36123 .mr(6)
36124 .nr(8)
36125 .kr(1)
36126 .sr(4)
36127 .m(6)
36128 .n(n)
36129 .k(k)
36130 .a_stride(23)
36131 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36132 }
36133 }
36134 }
36135
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,n_div_8_subtile)36136 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, n_div_8_subtile) {
36137 for (uint32_t n = 16; n <= 24; n += 8) {
36138 for (size_t k = 1; k <= 20; k += 5) {
36139 for (uint32_t m = 1; m <= 6; m++) {
36140 GemmMicrokernelTester()
36141 .mr(6)
36142 .nr(8)
36143 .kr(1)
36144 .sr(4)
36145 .m(m)
36146 .n(n)
36147 .k(k)
36148 .iterations(1)
36149 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36150 }
36151 }
36152 }
36153 }
36154
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,strided_cm_subtile)36155 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, strided_cm_subtile) {
36156 for (size_t k = 1; k <= 20; k += 5) {
36157 for (uint32_t n = 1; n <= 8; n++) {
36158 for (uint32_t m = 1; m <= 6; m++) {
36159 GemmMicrokernelTester()
36160 .mr(6)
36161 .nr(8)
36162 .kr(1)
36163 .sr(4)
36164 .m(m)
36165 .n(n)
36166 .k(k)
36167 .cm_stride(11)
36168 .iterations(1)
36169 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36170 }
36171 }
36172 }
36173 }
36174
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,qmin)36175 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, qmin) {
36176 GemmMicrokernelTester()
36177 .mr(6)
36178 .nr(8)
36179 .kr(1)
36180 .sr(4)
36181 .m(6)
36182 .n(8)
36183 .k(4)
36184 .qmin(128)
36185 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36186 }
36187
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,qmax)36188 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, qmax) {
36189 GemmMicrokernelTester()
36190 .mr(6)
36191 .nr(8)
36192 .kr(1)
36193 .sr(4)
36194 .m(6)
36195 .n(8)
36196 .k(4)
36197 .qmax(128)
36198 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36199 }
36200
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD,strided_cm)36201 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD, strided_cm) {
36202 GemmMicrokernelTester()
36203 .mr(6)
36204 .nr(8)
36205 .kr(1)
36206 .sr(4)
36207 .m(6)
36208 .n(8)
36209 .k(4)
36210 .cm_stride(11)
36211 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd, xnn_init_f32_minmax_wasmsimd_params);
36212 }
36213 #endif // XNN_ARCH_WASMRELAXEDSIMD
36214
36215
36216 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)36217 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
36218 GemmMicrokernelTester()
36219 .mr(6)
36220 .nr(8)
36221 .kr(1)
36222 .sr(4)
36223 .m(6)
36224 .n(8)
36225 .k(4)
36226 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36227 }
36228
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,strided_cn)36229 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
36230 GemmMicrokernelTester()
36231 .mr(6)
36232 .nr(8)
36233 .kr(1)
36234 .sr(4)
36235 .m(6)
36236 .n(8)
36237 .k(4)
36238 .cn_stride(11)
36239 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36240 }
36241
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_strided_a)36242 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_strided_a) {
36243 GemmMicrokernelTester()
36244 .mr(6)
36245 .nr(8)
36246 .kr(1)
36247 .sr(4)
36248 .m(6)
36249 .n(8)
36250 .k(4)
36251 .a_stride(7)
36252 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36253 }
36254
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)36255 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
36256 for (uint32_t n = 1; n <= 8; n++) {
36257 for (uint32_t m = 1; m <= 6; m++) {
36258 GemmMicrokernelTester()
36259 .mr(6)
36260 .nr(8)
36261 .kr(1)
36262 .sr(4)
36263 .m(m)
36264 .n(n)
36265 .k(4)
36266 .iterations(1)
36267 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36268 }
36269 }
36270 }
36271
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)36272 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
36273 for (uint32_t m = 1; m <= 6; m++) {
36274 GemmMicrokernelTester()
36275 .mr(6)
36276 .nr(8)
36277 .kr(1)
36278 .sr(4)
36279 .m(m)
36280 .n(8)
36281 .k(4)
36282 .iterations(1)
36283 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36284 }
36285 }
36286
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)36287 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
36288 for (uint32_t n = 1; n <= 8; n++) {
36289 GemmMicrokernelTester()
36290 .mr(6)
36291 .nr(8)
36292 .kr(1)
36293 .sr(4)
36294 .m(6)
36295 .n(n)
36296 .k(4)
36297 .iterations(1)
36298 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36299 }
36300 }
36301
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)36302 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
36303 for (size_t k = 1; k < 4; k++) {
36304 GemmMicrokernelTester()
36305 .mr(6)
36306 .nr(8)
36307 .kr(1)
36308 .sr(4)
36309 .m(6)
36310 .n(8)
36311 .k(k)
36312 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36313 }
36314 }
36315
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_strided_a)36316 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_strided_a) {
36317 for (size_t k = 1; k < 4; k++) {
36318 GemmMicrokernelTester()
36319 .mr(6)
36320 .nr(8)
36321 .kr(1)
36322 .sr(4)
36323 .m(6)
36324 .n(8)
36325 .k(k)
36326 .a_stride(7)
36327 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36328 }
36329 }
36330
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)36331 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
36332 for (size_t k = 1; k < 4; k++) {
36333 for (uint32_t n = 1; n <= 8; n++) {
36334 for (uint32_t m = 1; m <= 6; m++) {
36335 GemmMicrokernelTester()
36336 .mr(6)
36337 .nr(8)
36338 .kr(1)
36339 .sr(4)
36340 .m(m)
36341 .n(n)
36342 .k(k)
36343 .iterations(1)
36344 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36345 }
36346 }
36347 }
36348 }
36349
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)36350 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
36351 for (size_t k = 5; k < 8; k++) {
36352 GemmMicrokernelTester()
36353 .mr(6)
36354 .nr(8)
36355 .kr(1)
36356 .sr(4)
36357 .m(6)
36358 .n(8)
36359 .k(k)
36360 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36361 }
36362 }
36363
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_strided_a)36364 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_strided_a) {
36365 for (size_t k = 5; k < 8; k++) {
36366 GemmMicrokernelTester()
36367 .mr(6)
36368 .nr(8)
36369 .kr(1)
36370 .sr(4)
36371 .m(6)
36372 .n(8)
36373 .k(k)
36374 .a_stride(11)
36375 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36376 }
36377 }
36378
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)36379 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
36380 for (size_t k = 5; k < 8; k++) {
36381 for (uint32_t n = 1; n <= 8; n++) {
36382 for (uint32_t m = 1; m <= 6; m++) {
36383 GemmMicrokernelTester()
36384 .mr(6)
36385 .nr(8)
36386 .kr(1)
36387 .sr(4)
36388 .m(m)
36389 .n(n)
36390 .k(k)
36391 .iterations(1)
36392 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36393 }
36394 }
36395 }
36396 }
36397
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,k_div_4)36398 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
36399 for (size_t k = 8; k <= 40; k += 4) {
36400 GemmMicrokernelTester()
36401 .mr(6)
36402 .nr(8)
36403 .kr(1)
36404 .sr(4)
36405 .m(6)
36406 .n(8)
36407 .k(k)
36408 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36409 }
36410 }
36411
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,k_div_4_strided_a)36412 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, k_div_4_strided_a) {
36413 for (size_t k = 8; k <= 40; k += 4) {
36414 GemmMicrokernelTester()
36415 .mr(6)
36416 .nr(8)
36417 .kr(1)
36418 .sr(4)
36419 .m(6)
36420 .n(8)
36421 .k(k)
36422 .a_stride(43)
36423 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36424 }
36425 }
36426
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)36427 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
36428 for (size_t k = 8; k <= 40; k += 4) {
36429 for (uint32_t n = 1; n <= 8; n++) {
36430 for (uint32_t m = 1; m <= 6; m++) {
36431 GemmMicrokernelTester()
36432 .mr(6)
36433 .nr(8)
36434 .kr(1)
36435 .sr(4)
36436 .m(m)
36437 .n(n)
36438 .k(k)
36439 .iterations(1)
36440 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36441 }
36442 }
36443 }
36444 }
36445
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)36446 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
36447 for (uint32_t n = 9; n < 16; n++) {
36448 for (size_t k = 1; k <= 20; k += 5) {
36449 GemmMicrokernelTester()
36450 .mr(6)
36451 .nr(8)
36452 .kr(1)
36453 .sr(4)
36454 .m(6)
36455 .n(n)
36456 .k(k)
36457 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36458 }
36459 }
36460 }
36461
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)36462 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
36463 for (uint32_t n = 9; n < 16; n++) {
36464 for (size_t k = 1; k <= 20; k += 5) {
36465 GemmMicrokernelTester()
36466 .mr(6)
36467 .nr(8)
36468 .kr(1)
36469 .sr(4)
36470 .m(6)
36471 .n(n)
36472 .k(k)
36473 .cn_stride(11)
36474 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36475 }
36476 }
36477 }
36478
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_a)36479 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_a) {
36480 for (uint32_t n = 9; n < 16; n++) {
36481 for (size_t k = 1; k <= 20; k += 5) {
36482 GemmMicrokernelTester()
36483 .mr(6)
36484 .nr(8)
36485 .kr(1)
36486 .sr(4)
36487 .m(6)
36488 .n(n)
36489 .k(k)
36490 .a_stride(23)
36491 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36492 }
36493 }
36494 }
36495
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)36496 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
36497 for (uint32_t n = 9; n < 16; n++) {
36498 for (size_t k = 1; k <= 20; k += 5) {
36499 for (uint32_t m = 1; m <= 6; m++) {
36500 GemmMicrokernelTester()
36501 .mr(6)
36502 .nr(8)
36503 .kr(1)
36504 .sr(4)
36505 .m(m)
36506 .n(n)
36507 .k(k)
36508 .iterations(1)
36509 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36510 }
36511 }
36512 }
36513 }
36514
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,n_div_8)36515 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
36516 for (uint32_t n = 16; n <= 24; n += 8) {
36517 for (size_t k = 1; k <= 20; k += 5) {
36518 GemmMicrokernelTester()
36519 .mr(6)
36520 .nr(8)
36521 .kr(1)
36522 .sr(4)
36523 .m(6)
36524 .n(n)
36525 .k(k)
36526 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36527 }
36528 }
36529 }
36530
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)36531 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
36532 for (uint32_t n = 16; n <= 24; n += 8) {
36533 for (size_t k = 1; k <= 20; k += 5) {
36534 GemmMicrokernelTester()
36535 .mr(6)
36536 .nr(8)
36537 .kr(1)
36538 .sr(4)
36539 .m(6)
36540 .n(n)
36541 .k(k)
36542 .cn_stride(11)
36543 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36544 }
36545 }
36546 }
36547
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_a)36548 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_a) {
36549 for (uint32_t n = 16; n <= 24; n += 8) {
36550 for (size_t k = 1; k <= 20; k += 5) {
36551 GemmMicrokernelTester()
36552 .mr(6)
36553 .nr(8)
36554 .kr(1)
36555 .sr(4)
36556 .m(6)
36557 .n(n)
36558 .k(k)
36559 .a_stride(23)
36560 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36561 }
36562 }
36563 }
36564
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)36565 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
36566 for (uint32_t n = 16; n <= 24; n += 8) {
36567 for (size_t k = 1; k <= 20; k += 5) {
36568 for (uint32_t m = 1; m <= 6; m++) {
36569 GemmMicrokernelTester()
36570 .mr(6)
36571 .nr(8)
36572 .kr(1)
36573 .sr(4)
36574 .m(m)
36575 .n(n)
36576 .k(k)
36577 .iterations(1)
36578 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36579 }
36580 }
36581 }
36582 }
36583
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)36584 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
36585 for (size_t k = 1; k <= 20; k += 5) {
36586 for (uint32_t n = 1; n <= 8; n++) {
36587 for (uint32_t m = 1; m <= 6; m++) {
36588 GemmMicrokernelTester()
36589 .mr(6)
36590 .nr(8)
36591 .kr(1)
36592 .sr(4)
36593 .m(m)
36594 .n(n)
36595 .k(k)
36596 .cm_stride(11)
36597 .iterations(1)
36598 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36599 }
36600 }
36601 }
36602 }
36603
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,qmin)36604 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, qmin) {
36605 GemmMicrokernelTester()
36606 .mr(6)
36607 .nr(8)
36608 .kr(1)
36609 .sr(4)
36610 .m(6)
36611 .n(8)
36612 .k(4)
36613 .qmin(128)
36614 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36615 }
36616
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,qmax)36617 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, qmax) {
36618 GemmMicrokernelTester()
36619 .mr(6)
36620 .nr(8)
36621 .kr(1)
36622 .sr(4)
36623 .m(6)
36624 .n(8)
36625 .k(4)
36626 .qmax(128)
36627 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36628 }
36629
TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA,strided_cm)36630 TEST(F32_GEMMINC_MINMAX_6X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
36631 GemmMicrokernelTester()
36632 .mr(6)
36633 .nr(8)
36634 .kr(1)
36635 .sr(4)
36636 .m(6)
36637 .n(8)
36638 .k(4)
36639 .cm_stride(11)
36640 .Test(xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
36641 }
36642 #endif // XNN_ARCH_WASMRELAXEDSIMD
36643
36644
36645 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMMINC_MINMAX_1X4__WASM,k_eq_1)36646 TEST(F32_GEMMINC_MINMAX_1X4__WASM, k_eq_1) {
36647 GemmMicrokernelTester()
36648 .mr(1)
36649 .nr(4)
36650 .kr(1)
36651 .sr(1)
36652 .m(1)
36653 .n(4)
36654 .k(1)
36655 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
36656 }
36657
TEST(F32_GEMMINC_MINMAX_1X4__WASM,strided_cn)36658 TEST(F32_GEMMINC_MINMAX_1X4__WASM, strided_cn) {
36659 GemmMicrokernelTester()
36660 .mr(1)
36661 .nr(4)
36662 .kr(1)
36663 .sr(1)
36664 .m(1)
36665 .n(4)
36666 .k(1)
36667 .cn_stride(7)
36668 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
36669 }
36670
TEST(F32_GEMMINC_MINMAX_1X4__WASM,k_eq_1_strided_a)36671 TEST(F32_GEMMINC_MINMAX_1X4__WASM, k_eq_1_strided_a) {
36672 GemmMicrokernelTester()
36673 .mr(1)
36674 .nr(4)
36675 .kr(1)
36676 .sr(1)
36677 .m(1)
36678 .n(4)
36679 .k(1)
36680 .a_stride(3)
36681 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
36682 }
36683
TEST(F32_GEMMINC_MINMAX_1X4__WASM,k_eq_1_subtile)36684 TEST(F32_GEMMINC_MINMAX_1X4__WASM, k_eq_1_subtile) {
36685 for (uint32_t n = 1; n <= 4; n++) {
36686 for (uint32_t m = 1; m <= 1; m++) {
36687 GemmMicrokernelTester()
36688 .mr(1)
36689 .nr(4)
36690 .kr(1)
36691 .sr(1)
36692 .m(m)
36693 .n(n)
36694 .k(1)
36695 .iterations(1)
36696 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
36697 }
36698 }
36699 }
36700
TEST(F32_GEMMINC_MINMAX_1X4__WASM,k_eq_1_subtile_m)36701 TEST(F32_GEMMINC_MINMAX_1X4__WASM, k_eq_1_subtile_m) {
36702 for (uint32_t m = 1; m <= 1; m++) {
36703 GemmMicrokernelTester()
36704 .mr(1)
36705 .nr(4)
36706 .kr(1)
36707 .sr(1)
36708 .m(m)
36709 .n(4)
36710 .k(1)
36711 .iterations(1)
36712 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
36713 }
36714 }
36715
TEST(F32_GEMMINC_MINMAX_1X4__WASM,k_eq_1_subtile_n)36716 TEST(F32_GEMMINC_MINMAX_1X4__WASM, k_eq_1_subtile_n) {
36717 for (uint32_t n = 1; n <= 4; n++) {
36718 GemmMicrokernelTester()
36719 .mr(1)
36720 .nr(4)
36721 .kr(1)
36722 .sr(1)
36723 .m(1)
36724 .n(n)
36725 .k(1)
36726 .iterations(1)
36727 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
36728 }
36729 }
36730
TEST(F32_GEMMINC_MINMAX_1X4__WASM,k_gt_1)36731 TEST(F32_GEMMINC_MINMAX_1X4__WASM, k_gt_1) {
36732 for (size_t k = 2; k < 10; k++) {
36733 GemmMicrokernelTester()
36734 .mr(1)
36735 .nr(4)
36736 .kr(1)
36737 .sr(1)
36738 .m(1)
36739 .n(4)
36740 .k(k)
36741 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
36742 }
36743 }
36744
TEST(F32_GEMMINC_MINMAX_1X4__WASM,k_gt_1_strided_a)36745 TEST(F32_GEMMINC_MINMAX_1X4__WASM, k_gt_1_strided_a) {
36746 for (size_t k = 2; k < 10; k++) {
36747 GemmMicrokernelTester()
36748 .mr(1)
36749 .nr(4)
36750 .kr(1)
36751 .sr(1)
36752 .m(1)
36753 .n(4)
36754 .k(k)
36755 .a_stride(11)
36756 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
36757 }
36758 }
36759
TEST(F32_GEMMINC_MINMAX_1X4__WASM,k_gt_1_subtile)36760 TEST(F32_GEMMINC_MINMAX_1X4__WASM, k_gt_1_subtile) {
36761 for (size_t k = 2; k < 10; k++) {
36762 for (uint32_t n = 1; n <= 4; n++) {
36763 for (uint32_t m = 1; m <= 1; m++) {
36764 GemmMicrokernelTester()
36765 .mr(1)
36766 .nr(4)
36767 .kr(1)
36768 .sr(1)
36769 .m(m)
36770 .n(n)
36771 .k(k)
36772 .iterations(1)
36773 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
36774 }
36775 }
36776 }
36777 }
36778
TEST(F32_GEMMINC_MINMAX_1X4__WASM,n_gt_4)36779 TEST(F32_GEMMINC_MINMAX_1X4__WASM, n_gt_4) {
36780 for (uint32_t n = 5; n < 8; n++) {
36781 for (size_t k = 1; k <= 5; k += 2) {
36782 GemmMicrokernelTester()
36783 .mr(1)
36784 .nr(4)
36785 .kr(1)
36786 .sr(1)
36787 .m(1)
36788 .n(n)
36789 .k(k)
36790 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
36791 }
36792 }
36793 }
36794
TEST(F32_GEMMINC_MINMAX_1X4__WASM,n_gt_4_strided_cn)36795 TEST(F32_GEMMINC_MINMAX_1X4__WASM, n_gt_4_strided_cn) {
36796 for (uint32_t n = 5; n < 8; n++) {
36797 for (size_t k = 1; k <= 5; k += 2) {
36798 GemmMicrokernelTester()
36799 .mr(1)
36800 .nr(4)
36801 .kr(1)
36802 .sr(1)
36803 .m(1)
36804 .n(n)
36805 .k(k)
36806 .cn_stride(7)
36807 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
36808 }
36809 }
36810 }
36811
TEST(F32_GEMMINC_MINMAX_1X4__WASM,n_gt_4_strided_a)36812 TEST(F32_GEMMINC_MINMAX_1X4__WASM, n_gt_4_strided_a) {
36813 for (uint32_t n = 5; n < 8; n++) {
36814 for (size_t k = 1; k <= 5; k += 2) {
36815 GemmMicrokernelTester()
36816 .mr(1)
36817 .nr(4)
36818 .kr(1)
36819 .sr(1)
36820 .m(1)
36821 .n(n)
36822 .k(k)
36823 .a_stride(7)
36824 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
36825 }
36826 }
36827 }
36828
TEST(F32_GEMMINC_MINMAX_1X4__WASM,n_gt_4_subtile)36829 TEST(F32_GEMMINC_MINMAX_1X4__WASM, n_gt_4_subtile) {
36830 for (uint32_t n = 5; n < 8; n++) {
36831 for (size_t k = 1; k <= 5; k += 2) {
36832 for (uint32_t m = 1; m <= 1; m++) {
36833 GemmMicrokernelTester()
36834 .mr(1)
36835 .nr(4)
36836 .kr(1)
36837 .sr(1)
36838 .m(m)
36839 .n(n)
36840 .k(k)
36841 .iterations(1)
36842 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
36843 }
36844 }
36845 }
36846 }
36847
TEST(F32_GEMMINC_MINMAX_1X4__WASM,n_div_4)36848 TEST(F32_GEMMINC_MINMAX_1X4__WASM, n_div_4) {
36849 for (uint32_t n = 8; n <= 12; n += 4) {
36850 for (size_t k = 1; k <= 5; k += 2) {
36851 GemmMicrokernelTester()
36852 .mr(1)
36853 .nr(4)
36854 .kr(1)
36855 .sr(1)
36856 .m(1)
36857 .n(n)
36858 .k(k)
36859 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
36860 }
36861 }
36862 }
36863
TEST(F32_GEMMINC_MINMAX_1X4__WASM,n_div_4_strided_cn)36864 TEST(F32_GEMMINC_MINMAX_1X4__WASM, n_div_4_strided_cn) {
36865 for (uint32_t n = 8; n <= 12; n += 4) {
36866 for (size_t k = 1; k <= 5; k += 2) {
36867 GemmMicrokernelTester()
36868 .mr(1)
36869 .nr(4)
36870 .kr(1)
36871 .sr(1)
36872 .m(1)
36873 .n(n)
36874 .k(k)
36875 .cn_stride(7)
36876 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
36877 }
36878 }
36879 }
36880
TEST(F32_GEMMINC_MINMAX_1X4__WASM,n_div_4_strided_a)36881 TEST(F32_GEMMINC_MINMAX_1X4__WASM, n_div_4_strided_a) {
36882 for (uint32_t n = 8; n <= 12; n += 4) {
36883 for (size_t k = 1; k <= 5; k += 2) {
36884 GemmMicrokernelTester()
36885 .mr(1)
36886 .nr(4)
36887 .kr(1)
36888 .sr(1)
36889 .m(1)
36890 .n(n)
36891 .k(k)
36892 .a_stride(7)
36893 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
36894 }
36895 }
36896 }
36897
TEST(F32_GEMMINC_MINMAX_1X4__WASM,n_div_4_subtile)36898 TEST(F32_GEMMINC_MINMAX_1X4__WASM, n_div_4_subtile) {
36899 for (uint32_t n = 8; n <= 12; n += 4) {
36900 for (size_t k = 1; k <= 5; k += 2) {
36901 for (uint32_t m = 1; m <= 1; m++) {
36902 GemmMicrokernelTester()
36903 .mr(1)
36904 .nr(4)
36905 .kr(1)
36906 .sr(1)
36907 .m(m)
36908 .n(n)
36909 .k(k)
36910 .iterations(1)
36911 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
36912 }
36913 }
36914 }
36915 }
36916
TEST(F32_GEMMINC_MINMAX_1X4__WASM,strided_cm_subtile)36917 TEST(F32_GEMMINC_MINMAX_1X4__WASM, strided_cm_subtile) {
36918 for (size_t k = 1; k <= 5; k += 2) {
36919 for (uint32_t n = 1; n <= 4; n++) {
36920 for (uint32_t m = 1; m <= 1; m++) {
36921 GemmMicrokernelTester()
36922 .mr(1)
36923 .nr(4)
36924 .kr(1)
36925 .sr(1)
36926 .m(m)
36927 .n(n)
36928 .k(k)
36929 .cm_stride(7)
36930 .iterations(1)
36931 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
36932 }
36933 }
36934 }
36935 }
36936
TEST(F32_GEMMINC_MINMAX_1X4__WASM,qmin)36937 TEST(F32_GEMMINC_MINMAX_1X4__WASM, qmin) {
36938 GemmMicrokernelTester()
36939 .mr(1)
36940 .nr(4)
36941 .kr(1)
36942 .sr(1)
36943 .m(1)
36944 .n(4)
36945 .k(1)
36946 .qmin(128)
36947 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
36948 }
36949
TEST(F32_GEMMINC_MINMAX_1X4__WASM,qmax)36950 TEST(F32_GEMMINC_MINMAX_1X4__WASM, qmax) {
36951 GemmMicrokernelTester()
36952 .mr(1)
36953 .nr(4)
36954 .kr(1)
36955 .sr(1)
36956 .m(1)
36957 .n(4)
36958 .k(1)
36959 .qmax(128)
36960 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
36961 }
36962
TEST(F32_GEMMINC_MINMAX_1X4__WASM,strided_cm)36963 TEST(F32_GEMMINC_MINMAX_1X4__WASM, strided_cm) {
36964 GemmMicrokernelTester()
36965 .mr(1)
36966 .nr(4)
36967 .kr(1)
36968 .sr(1)
36969 .m(1)
36970 .n(4)
36971 .k(1)
36972 .cm_stride(7)
36973 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
36974 }
36975 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
36976
36977
TEST(F32_GEMMINC_MINMAX_1X4__SCALAR,k_eq_1)36978 TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, k_eq_1) {
36979 GemmMicrokernelTester()
36980 .mr(1)
36981 .nr(4)
36982 .kr(1)
36983 .sr(1)
36984 .m(1)
36985 .n(4)
36986 .k(1)
36987 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
36988 }
36989
TEST(F32_GEMMINC_MINMAX_1X4__SCALAR,strided_cn)36990 TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, strided_cn) {
36991 GemmMicrokernelTester()
36992 .mr(1)
36993 .nr(4)
36994 .kr(1)
36995 .sr(1)
36996 .m(1)
36997 .n(4)
36998 .k(1)
36999 .cn_stride(7)
37000 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
37001 }
37002
TEST(F32_GEMMINC_MINMAX_1X4__SCALAR,k_eq_1_strided_a)37003 TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, k_eq_1_strided_a) {
37004 GemmMicrokernelTester()
37005 .mr(1)
37006 .nr(4)
37007 .kr(1)
37008 .sr(1)
37009 .m(1)
37010 .n(4)
37011 .k(1)
37012 .a_stride(3)
37013 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
37014 }
37015
TEST(F32_GEMMINC_MINMAX_1X4__SCALAR,k_eq_1_subtile)37016 TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, k_eq_1_subtile) {
37017 for (uint32_t n = 1; n <= 4; n++) {
37018 for (uint32_t m = 1; m <= 1; m++) {
37019 GemmMicrokernelTester()
37020 .mr(1)
37021 .nr(4)
37022 .kr(1)
37023 .sr(1)
37024 .m(m)
37025 .n(n)
37026 .k(1)
37027 .iterations(1)
37028 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
37029 }
37030 }
37031 }
37032
TEST(F32_GEMMINC_MINMAX_1X4__SCALAR,k_eq_1_subtile_m)37033 TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, k_eq_1_subtile_m) {
37034 for (uint32_t m = 1; m <= 1; m++) {
37035 GemmMicrokernelTester()
37036 .mr(1)
37037 .nr(4)
37038 .kr(1)
37039 .sr(1)
37040 .m(m)
37041 .n(4)
37042 .k(1)
37043 .iterations(1)
37044 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
37045 }
37046 }
37047
TEST(F32_GEMMINC_MINMAX_1X4__SCALAR,k_eq_1_subtile_n)37048 TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, k_eq_1_subtile_n) {
37049 for (uint32_t n = 1; n <= 4; n++) {
37050 GemmMicrokernelTester()
37051 .mr(1)
37052 .nr(4)
37053 .kr(1)
37054 .sr(1)
37055 .m(1)
37056 .n(n)
37057 .k(1)
37058 .iterations(1)
37059 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
37060 }
37061 }
37062
TEST(F32_GEMMINC_MINMAX_1X4__SCALAR,k_gt_1)37063 TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, k_gt_1) {
37064 for (size_t k = 2; k < 10; k++) {
37065 GemmMicrokernelTester()
37066 .mr(1)
37067 .nr(4)
37068 .kr(1)
37069 .sr(1)
37070 .m(1)
37071 .n(4)
37072 .k(k)
37073 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
37074 }
37075 }
37076
TEST(F32_GEMMINC_MINMAX_1X4__SCALAR,k_gt_1_strided_a)37077 TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, k_gt_1_strided_a) {
37078 for (size_t k = 2; k < 10; k++) {
37079 GemmMicrokernelTester()
37080 .mr(1)
37081 .nr(4)
37082 .kr(1)
37083 .sr(1)
37084 .m(1)
37085 .n(4)
37086 .k(k)
37087 .a_stride(11)
37088 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
37089 }
37090 }
37091
TEST(F32_GEMMINC_MINMAX_1X4__SCALAR,k_gt_1_subtile)37092 TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, k_gt_1_subtile) {
37093 for (size_t k = 2; k < 10; k++) {
37094 for (uint32_t n = 1; n <= 4; n++) {
37095 for (uint32_t m = 1; m <= 1; m++) {
37096 GemmMicrokernelTester()
37097 .mr(1)
37098 .nr(4)
37099 .kr(1)
37100 .sr(1)
37101 .m(m)
37102 .n(n)
37103 .k(k)
37104 .iterations(1)
37105 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
37106 }
37107 }
37108 }
37109 }
37110
TEST(F32_GEMMINC_MINMAX_1X4__SCALAR,n_gt_4)37111 TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, n_gt_4) {
37112 for (uint32_t n = 5; n < 8; n++) {
37113 for (size_t k = 1; k <= 5; k += 2) {
37114 GemmMicrokernelTester()
37115 .mr(1)
37116 .nr(4)
37117 .kr(1)
37118 .sr(1)
37119 .m(1)
37120 .n(n)
37121 .k(k)
37122 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
37123 }
37124 }
37125 }
37126
TEST(F32_GEMMINC_MINMAX_1X4__SCALAR,n_gt_4_strided_cn)37127 TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, n_gt_4_strided_cn) {
37128 for (uint32_t n = 5; n < 8; n++) {
37129 for (size_t k = 1; k <= 5; k += 2) {
37130 GemmMicrokernelTester()
37131 .mr(1)
37132 .nr(4)
37133 .kr(1)
37134 .sr(1)
37135 .m(1)
37136 .n(n)
37137 .k(k)
37138 .cn_stride(7)
37139 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
37140 }
37141 }
37142 }
37143
TEST(F32_GEMMINC_MINMAX_1X4__SCALAR,n_gt_4_strided_a)37144 TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, n_gt_4_strided_a) {
37145 for (uint32_t n = 5; n < 8; n++) {
37146 for (size_t k = 1; k <= 5; k += 2) {
37147 GemmMicrokernelTester()
37148 .mr(1)
37149 .nr(4)
37150 .kr(1)
37151 .sr(1)
37152 .m(1)
37153 .n(n)
37154 .k(k)
37155 .a_stride(7)
37156 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
37157 }
37158 }
37159 }
37160
TEST(F32_GEMMINC_MINMAX_1X4__SCALAR,n_gt_4_subtile)37161 TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, n_gt_4_subtile) {
37162 for (uint32_t n = 5; n < 8; n++) {
37163 for (size_t k = 1; k <= 5; k += 2) {
37164 for (uint32_t m = 1; m <= 1; m++) {
37165 GemmMicrokernelTester()
37166 .mr(1)
37167 .nr(4)
37168 .kr(1)
37169 .sr(1)
37170 .m(m)
37171 .n(n)
37172 .k(k)
37173 .iterations(1)
37174 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
37175 }
37176 }
37177 }
37178 }
37179
TEST(F32_GEMMINC_MINMAX_1X4__SCALAR,n_div_4)37180 TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, n_div_4) {
37181 for (uint32_t n = 8; n <= 12; n += 4) {
37182 for (size_t k = 1; k <= 5; k += 2) {
37183 GemmMicrokernelTester()
37184 .mr(1)
37185 .nr(4)
37186 .kr(1)
37187 .sr(1)
37188 .m(1)
37189 .n(n)
37190 .k(k)
37191 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
37192 }
37193 }
37194 }
37195
TEST(F32_GEMMINC_MINMAX_1X4__SCALAR,n_div_4_strided_cn)37196 TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, n_div_4_strided_cn) {
37197 for (uint32_t n = 8; n <= 12; n += 4) {
37198 for (size_t k = 1; k <= 5; k += 2) {
37199 GemmMicrokernelTester()
37200 .mr(1)
37201 .nr(4)
37202 .kr(1)
37203 .sr(1)
37204 .m(1)
37205 .n(n)
37206 .k(k)
37207 .cn_stride(7)
37208 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
37209 }
37210 }
37211 }
37212
TEST(F32_GEMMINC_MINMAX_1X4__SCALAR,n_div_4_strided_a)37213 TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, n_div_4_strided_a) {
37214 for (uint32_t n = 8; n <= 12; n += 4) {
37215 for (size_t k = 1; k <= 5; k += 2) {
37216 GemmMicrokernelTester()
37217 .mr(1)
37218 .nr(4)
37219 .kr(1)
37220 .sr(1)
37221 .m(1)
37222 .n(n)
37223 .k(k)
37224 .a_stride(7)
37225 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
37226 }
37227 }
37228 }
37229
TEST(F32_GEMMINC_MINMAX_1X4__SCALAR,n_div_4_subtile)37230 TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, n_div_4_subtile) {
37231 for (uint32_t n = 8; n <= 12; n += 4) {
37232 for (size_t k = 1; k <= 5; k += 2) {
37233 for (uint32_t m = 1; m <= 1; m++) {
37234 GemmMicrokernelTester()
37235 .mr(1)
37236 .nr(4)
37237 .kr(1)
37238 .sr(1)
37239 .m(m)
37240 .n(n)
37241 .k(k)
37242 .iterations(1)
37243 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
37244 }
37245 }
37246 }
37247 }
37248
TEST(F32_GEMMINC_MINMAX_1X4__SCALAR,strided_cm_subtile)37249 TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, strided_cm_subtile) {
37250 for (size_t k = 1; k <= 5; k += 2) {
37251 for (uint32_t n = 1; n <= 4; n++) {
37252 for (uint32_t m = 1; m <= 1; m++) {
37253 GemmMicrokernelTester()
37254 .mr(1)
37255 .nr(4)
37256 .kr(1)
37257 .sr(1)
37258 .m(m)
37259 .n(n)
37260 .k(k)
37261 .cm_stride(7)
37262 .iterations(1)
37263 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
37264 }
37265 }
37266 }
37267 }
37268
TEST(F32_GEMMINC_MINMAX_1X4__SCALAR,qmin)37269 TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, qmin) {
37270 GemmMicrokernelTester()
37271 .mr(1)
37272 .nr(4)
37273 .kr(1)
37274 .sr(1)
37275 .m(1)
37276 .n(4)
37277 .k(1)
37278 .qmin(128)
37279 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
37280 }
37281
TEST(F32_GEMMINC_MINMAX_1X4__SCALAR,qmax)37282 TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, qmax) {
37283 GemmMicrokernelTester()
37284 .mr(1)
37285 .nr(4)
37286 .kr(1)
37287 .sr(1)
37288 .m(1)
37289 .n(4)
37290 .k(1)
37291 .qmax(128)
37292 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
37293 }
37294
TEST(F32_GEMMINC_MINMAX_1X4__SCALAR,strided_cm)37295 TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, strided_cm) {
37296 GemmMicrokernelTester()
37297 .mr(1)
37298 .nr(4)
37299 .kr(1)
37300 .sr(1)
37301 .m(1)
37302 .n(4)
37303 .k(1)
37304 .cm_stride(7)
37305 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
37306 }
37307
37308
TEST(F32_GEMMINC_MINMAX_4X4__SCALAR,k_eq_1)37309 TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, k_eq_1) {
37310 GemmMicrokernelTester()
37311 .mr(4)
37312 .nr(4)
37313 .kr(1)
37314 .sr(1)
37315 .m(4)
37316 .n(4)
37317 .k(1)
37318 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
37319 }
37320
TEST(F32_GEMMINC_MINMAX_4X4__SCALAR,strided_cn)37321 TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, strided_cn) {
37322 GemmMicrokernelTester()
37323 .mr(4)
37324 .nr(4)
37325 .kr(1)
37326 .sr(1)
37327 .m(4)
37328 .n(4)
37329 .k(1)
37330 .cn_stride(7)
37331 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
37332 }
37333
TEST(F32_GEMMINC_MINMAX_4X4__SCALAR,k_eq_1_strided_a)37334 TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, k_eq_1_strided_a) {
37335 GemmMicrokernelTester()
37336 .mr(4)
37337 .nr(4)
37338 .kr(1)
37339 .sr(1)
37340 .m(4)
37341 .n(4)
37342 .k(1)
37343 .a_stride(3)
37344 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
37345 }
37346
TEST(F32_GEMMINC_MINMAX_4X4__SCALAR,k_eq_1_subtile)37347 TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, k_eq_1_subtile) {
37348 for (uint32_t n = 1; n <= 4; n++) {
37349 for (uint32_t m = 1; m <= 4; m++) {
37350 GemmMicrokernelTester()
37351 .mr(4)
37352 .nr(4)
37353 .kr(1)
37354 .sr(1)
37355 .m(m)
37356 .n(n)
37357 .k(1)
37358 .iterations(1)
37359 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
37360 }
37361 }
37362 }
37363
TEST(F32_GEMMINC_MINMAX_4X4__SCALAR,k_eq_1_subtile_m)37364 TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, k_eq_1_subtile_m) {
37365 for (uint32_t m = 1; m <= 4; m++) {
37366 GemmMicrokernelTester()
37367 .mr(4)
37368 .nr(4)
37369 .kr(1)
37370 .sr(1)
37371 .m(m)
37372 .n(4)
37373 .k(1)
37374 .iterations(1)
37375 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
37376 }
37377 }
37378
TEST(F32_GEMMINC_MINMAX_4X4__SCALAR,k_eq_1_subtile_n)37379 TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, k_eq_1_subtile_n) {
37380 for (uint32_t n = 1; n <= 4; n++) {
37381 GemmMicrokernelTester()
37382 .mr(4)
37383 .nr(4)
37384 .kr(1)
37385 .sr(1)
37386 .m(4)
37387 .n(n)
37388 .k(1)
37389 .iterations(1)
37390 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
37391 }
37392 }
37393
TEST(F32_GEMMINC_MINMAX_4X4__SCALAR,k_gt_1)37394 TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, k_gt_1) {
37395 for (size_t k = 2; k < 10; k++) {
37396 GemmMicrokernelTester()
37397 .mr(4)
37398 .nr(4)
37399 .kr(1)
37400 .sr(1)
37401 .m(4)
37402 .n(4)
37403 .k(k)
37404 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
37405 }
37406 }
37407
TEST(F32_GEMMINC_MINMAX_4X4__SCALAR,k_gt_1_strided_a)37408 TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, k_gt_1_strided_a) {
37409 for (size_t k = 2; k < 10; k++) {
37410 GemmMicrokernelTester()
37411 .mr(4)
37412 .nr(4)
37413 .kr(1)
37414 .sr(1)
37415 .m(4)
37416 .n(4)
37417 .k(k)
37418 .a_stride(11)
37419 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
37420 }
37421 }
37422
TEST(F32_GEMMINC_MINMAX_4X4__SCALAR,k_gt_1_subtile)37423 TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, k_gt_1_subtile) {
37424 for (size_t k = 2; k < 10; k++) {
37425 for (uint32_t n = 1; n <= 4; n++) {
37426 for (uint32_t m = 1; m <= 4; m++) {
37427 GemmMicrokernelTester()
37428 .mr(4)
37429 .nr(4)
37430 .kr(1)
37431 .sr(1)
37432 .m(m)
37433 .n(n)
37434 .k(k)
37435 .iterations(1)
37436 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
37437 }
37438 }
37439 }
37440 }
37441
TEST(F32_GEMMINC_MINMAX_4X4__SCALAR,n_gt_4)37442 TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, n_gt_4) {
37443 for (uint32_t n = 5; n < 8; n++) {
37444 for (size_t k = 1; k <= 5; k += 2) {
37445 GemmMicrokernelTester()
37446 .mr(4)
37447 .nr(4)
37448 .kr(1)
37449 .sr(1)
37450 .m(4)
37451 .n(n)
37452 .k(k)
37453 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
37454 }
37455 }
37456 }
37457
TEST(F32_GEMMINC_MINMAX_4X4__SCALAR,n_gt_4_strided_cn)37458 TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, n_gt_4_strided_cn) {
37459 for (uint32_t n = 5; n < 8; n++) {
37460 for (size_t k = 1; k <= 5; k += 2) {
37461 GemmMicrokernelTester()
37462 .mr(4)
37463 .nr(4)
37464 .kr(1)
37465 .sr(1)
37466 .m(4)
37467 .n(n)
37468 .k(k)
37469 .cn_stride(7)
37470 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
37471 }
37472 }
37473 }
37474
TEST(F32_GEMMINC_MINMAX_4X4__SCALAR,n_gt_4_strided_a)37475 TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, n_gt_4_strided_a) {
37476 for (uint32_t n = 5; n < 8; n++) {
37477 for (size_t k = 1; k <= 5; k += 2) {
37478 GemmMicrokernelTester()
37479 .mr(4)
37480 .nr(4)
37481 .kr(1)
37482 .sr(1)
37483 .m(4)
37484 .n(n)
37485 .k(k)
37486 .a_stride(7)
37487 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
37488 }
37489 }
37490 }
37491
TEST(F32_GEMMINC_MINMAX_4X4__SCALAR,n_gt_4_subtile)37492 TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, n_gt_4_subtile) {
37493 for (uint32_t n = 5; n < 8; n++) {
37494 for (size_t k = 1; k <= 5; k += 2) {
37495 for (uint32_t m = 1; m <= 4; m++) {
37496 GemmMicrokernelTester()
37497 .mr(4)
37498 .nr(4)
37499 .kr(1)
37500 .sr(1)
37501 .m(m)
37502 .n(n)
37503 .k(k)
37504 .iterations(1)
37505 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
37506 }
37507 }
37508 }
37509 }
37510
TEST(F32_GEMMINC_MINMAX_4X4__SCALAR,n_div_4)37511 TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, n_div_4) {
37512 for (uint32_t n = 8; n <= 12; n += 4) {
37513 for (size_t k = 1; k <= 5; k += 2) {
37514 GemmMicrokernelTester()
37515 .mr(4)
37516 .nr(4)
37517 .kr(1)
37518 .sr(1)
37519 .m(4)
37520 .n(n)
37521 .k(k)
37522 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
37523 }
37524 }
37525 }
37526
TEST(F32_GEMMINC_MINMAX_4X4__SCALAR,n_div_4_strided_cn)37527 TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, n_div_4_strided_cn) {
37528 for (uint32_t n = 8; n <= 12; n += 4) {
37529 for (size_t k = 1; k <= 5; k += 2) {
37530 GemmMicrokernelTester()
37531 .mr(4)
37532 .nr(4)
37533 .kr(1)
37534 .sr(1)
37535 .m(4)
37536 .n(n)
37537 .k(k)
37538 .cn_stride(7)
37539 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
37540 }
37541 }
37542 }
37543
TEST(F32_GEMMINC_MINMAX_4X4__SCALAR,n_div_4_strided_a)37544 TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, n_div_4_strided_a) {
37545 for (uint32_t n = 8; n <= 12; n += 4) {
37546 for (size_t k = 1; k <= 5; k += 2) {
37547 GemmMicrokernelTester()
37548 .mr(4)
37549 .nr(4)
37550 .kr(1)
37551 .sr(1)
37552 .m(4)
37553 .n(n)
37554 .k(k)
37555 .a_stride(7)
37556 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
37557 }
37558 }
37559 }
37560
TEST(F32_GEMMINC_MINMAX_4X4__SCALAR,n_div_4_subtile)37561 TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, n_div_4_subtile) {
37562 for (uint32_t n = 8; n <= 12; n += 4) {
37563 for (size_t k = 1; k <= 5; k += 2) {
37564 for (uint32_t m = 1; m <= 4; m++) {
37565 GemmMicrokernelTester()
37566 .mr(4)
37567 .nr(4)
37568 .kr(1)
37569 .sr(1)
37570 .m(m)
37571 .n(n)
37572 .k(k)
37573 .iterations(1)
37574 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
37575 }
37576 }
37577 }
37578 }
37579
TEST(F32_GEMMINC_MINMAX_4X4__SCALAR,strided_cm_subtile)37580 TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, strided_cm_subtile) {
37581 for (size_t k = 1; k <= 5; k += 2) {
37582 for (uint32_t n = 1; n <= 4; n++) {
37583 for (uint32_t m = 1; m <= 4; m++) {
37584 GemmMicrokernelTester()
37585 .mr(4)
37586 .nr(4)
37587 .kr(1)
37588 .sr(1)
37589 .m(m)
37590 .n(n)
37591 .k(k)
37592 .cm_stride(7)
37593 .iterations(1)
37594 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
37595 }
37596 }
37597 }
37598 }
37599
TEST(F32_GEMMINC_MINMAX_4X4__SCALAR,qmin)37600 TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, qmin) {
37601 GemmMicrokernelTester()
37602 .mr(4)
37603 .nr(4)
37604 .kr(1)
37605 .sr(1)
37606 .m(4)
37607 .n(4)
37608 .k(1)
37609 .qmin(128)
37610 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
37611 }
37612
TEST(F32_GEMMINC_MINMAX_4X4__SCALAR,qmax)37613 TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, qmax) {
37614 GemmMicrokernelTester()
37615 .mr(4)
37616 .nr(4)
37617 .kr(1)
37618 .sr(1)
37619 .m(4)
37620 .n(4)
37621 .k(1)
37622 .qmax(128)
37623 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
37624 }
37625
TEST(F32_GEMMINC_MINMAX_4X4__SCALAR,strided_cm)37626 TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, strided_cm) {
37627 GemmMicrokernelTester()
37628 .mr(4)
37629 .nr(4)
37630 .kr(1)
37631 .sr(1)
37632 .m(4)
37633 .n(4)
37634 .k(1)
37635 .cm_stride(7)
37636 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
37637 }
37638