1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/f32-igemm-minmax.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25
26
27 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,k_eq_2)28 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, k_eq_2) {
29 TEST_REQUIRES_ARM_NEON;
30 GemmMicrokernelTester()
31 .mr(4)
32 .nr(8)
33 .kr(1)
34 .sr(1)
35 .m(4)
36 .n(8)
37 .k(2)
38 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
39 }
40
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,strided_cn)41 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, strided_cn) {
42 TEST_REQUIRES_ARM_NEON;
43 GemmMicrokernelTester()
44 .mr(4)
45 .nr(8)
46 .kr(1)
47 .sr(1)
48 .m(4)
49 .n(8)
50 .k(2)
51 .cn_stride(11)
52 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
53 }
54
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,k_eq_2_subtile)55 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, k_eq_2_subtile) {
56 TEST_REQUIRES_ARM_NEON;
57 for (uint32_t n = 1; n <= 8; n++) {
58 for (uint32_t m = 1; m <= 4; m++) {
59 GemmMicrokernelTester()
60 .mr(4)
61 .nr(8)
62 .kr(1)
63 .sr(1)
64 .m(m)
65 .n(n)
66 .k(2)
67 .iterations(1)
68 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
69 }
70 }
71 }
72
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,k_eq_2_subtile_m)73 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, k_eq_2_subtile_m) {
74 TEST_REQUIRES_ARM_NEON;
75 for (uint32_t m = 1; m <= 4; m++) {
76 GemmMicrokernelTester()
77 .mr(4)
78 .nr(8)
79 .kr(1)
80 .sr(1)
81 .m(m)
82 .n(8)
83 .k(2)
84 .iterations(1)
85 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
86 }
87 }
88
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,k_eq_2_subtile_n)89 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, k_eq_2_subtile_n) {
90 TEST_REQUIRES_ARM_NEON;
91 for (uint32_t n = 1; n <= 8; n++) {
92 GemmMicrokernelTester()
93 .mr(4)
94 .nr(8)
95 .kr(1)
96 .sr(1)
97 .m(4)
98 .n(n)
99 .k(2)
100 .iterations(1)
101 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
102 }
103 }
104
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,k_lt_2)105 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, k_lt_2) {
106 TEST_REQUIRES_ARM_NEON;
107 for (size_t k = 1; k < 2; k++) {
108 GemmMicrokernelTester()
109 .mr(4)
110 .nr(8)
111 .kr(1)
112 .sr(1)
113 .m(4)
114 .n(8)
115 .k(k)
116 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
117 }
118 }
119
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,k_lt_2_subtile)120 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, k_lt_2_subtile) {
121 TEST_REQUIRES_ARM_NEON;
122 for (size_t k = 1; k < 2; k++) {
123 for (uint32_t n = 1; n <= 8; n++) {
124 for (uint32_t m = 1; m <= 4; m++) {
125 GemmMicrokernelTester()
126 .mr(4)
127 .nr(8)
128 .kr(1)
129 .sr(1)
130 .m(m)
131 .n(n)
132 .k(k)
133 .iterations(1)
134 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
135 }
136 }
137 }
138 }
139
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,k_gt_2)140 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, k_gt_2) {
141 TEST_REQUIRES_ARM_NEON;
142 for (size_t k = 3; k < 4; k++) {
143 GemmMicrokernelTester()
144 .mr(4)
145 .nr(8)
146 .kr(1)
147 .sr(1)
148 .m(4)
149 .n(8)
150 .k(k)
151 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
152 }
153 }
154
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,k_gt_2_subtile)155 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, k_gt_2_subtile) {
156 TEST_REQUIRES_ARM_NEON;
157 for (size_t k = 3; k < 4; k++) {
158 for (uint32_t n = 1; n <= 8; n++) {
159 for (uint32_t m = 1; m <= 4; m++) {
160 GemmMicrokernelTester()
161 .mr(4)
162 .nr(8)
163 .kr(1)
164 .sr(1)
165 .m(m)
166 .n(n)
167 .k(k)
168 .iterations(1)
169 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
170 }
171 }
172 }
173 }
174
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,k_div_2)175 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, k_div_2) {
176 TEST_REQUIRES_ARM_NEON;
177 for (size_t k = 4; k <= 20; k += 2) {
178 GemmMicrokernelTester()
179 .mr(4)
180 .nr(8)
181 .kr(1)
182 .sr(1)
183 .m(4)
184 .n(8)
185 .k(k)
186 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
187 }
188 }
189
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,k_div_2_subtile)190 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, k_div_2_subtile) {
191 TEST_REQUIRES_ARM_NEON;
192 for (size_t k = 4; k <= 20; k += 2) {
193 for (uint32_t n = 1; n <= 8; n++) {
194 for (uint32_t m = 1; m <= 4; m++) {
195 GemmMicrokernelTester()
196 .mr(4)
197 .nr(8)
198 .kr(1)
199 .sr(1)
200 .m(m)
201 .n(n)
202 .k(k)
203 .iterations(1)
204 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
205 }
206 }
207 }
208 }
209
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,n_gt_8)210 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, n_gt_8) {
211 TEST_REQUIRES_ARM_NEON;
212 for (uint32_t n = 9; n < 16; n++) {
213 for (size_t k = 1; k <= 10; k += 3) {
214 GemmMicrokernelTester()
215 .mr(4)
216 .nr(8)
217 .kr(1)
218 .sr(1)
219 .m(4)
220 .n(n)
221 .k(k)
222 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
223 }
224 }
225 }
226
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,n_gt_8_strided_cn)227 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, n_gt_8_strided_cn) {
228 TEST_REQUIRES_ARM_NEON;
229 for (uint32_t n = 9; n < 16; n++) {
230 for (size_t k = 1; k <= 10; k += 3) {
231 GemmMicrokernelTester()
232 .mr(4)
233 .nr(8)
234 .kr(1)
235 .sr(1)
236 .m(4)
237 .n(n)
238 .k(k)
239 .cn_stride(11)
240 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
241 }
242 }
243 }
244
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,n_gt_8_subtile)245 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, n_gt_8_subtile) {
246 TEST_REQUIRES_ARM_NEON;
247 for (uint32_t n = 9; n < 16; n++) {
248 for (size_t k = 1; k <= 10; k += 3) {
249 for (uint32_t m = 1; m <= 4; m++) {
250 GemmMicrokernelTester()
251 .mr(4)
252 .nr(8)
253 .kr(1)
254 .sr(1)
255 .m(m)
256 .n(n)
257 .k(k)
258 .iterations(1)
259 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
260 }
261 }
262 }
263 }
264
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,n_div_8)265 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, n_div_8) {
266 TEST_REQUIRES_ARM_NEON;
267 for (uint32_t n = 16; n <= 24; n += 8) {
268 for (size_t k = 1; k <= 10; k += 3) {
269 GemmMicrokernelTester()
270 .mr(4)
271 .nr(8)
272 .kr(1)
273 .sr(1)
274 .m(4)
275 .n(n)
276 .k(k)
277 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
278 }
279 }
280 }
281
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,n_div_8_strided_cn)282 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, n_div_8_strided_cn) {
283 TEST_REQUIRES_ARM_NEON;
284 for (uint32_t n = 16; n <= 24; n += 8) {
285 for (size_t k = 1; k <= 10; k += 3) {
286 GemmMicrokernelTester()
287 .mr(4)
288 .nr(8)
289 .kr(1)
290 .sr(1)
291 .m(4)
292 .n(n)
293 .k(k)
294 .cn_stride(11)
295 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
296 }
297 }
298 }
299
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,n_div_8_subtile)300 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, n_div_8_subtile) {
301 TEST_REQUIRES_ARM_NEON;
302 for (uint32_t n = 16; n <= 24; n += 8) {
303 for (size_t k = 1; k <= 10; k += 3) {
304 for (uint32_t m = 1; m <= 4; m++) {
305 GemmMicrokernelTester()
306 .mr(4)
307 .nr(8)
308 .kr(1)
309 .sr(1)
310 .m(m)
311 .n(n)
312 .k(k)
313 .iterations(1)
314 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
315 }
316 }
317 }
318 }
319
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,small_kernel)320 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, small_kernel) {
321 TEST_REQUIRES_ARM_NEON;
322 for (size_t k = 1; k <= 10; k += 3) {
323 GemmMicrokernelTester()
324 .mr(4)
325 .nr(8)
326 .kr(1)
327 .sr(1)
328 .m(4)
329 .n(8)
330 .k(k)
331 .ks(3)
332 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
333 }
334 }
335
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,small_kernel_subtile)336 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, small_kernel_subtile) {
337 TEST_REQUIRES_ARM_NEON;
338 for (size_t k = 1; k <= 10; k += 3) {
339 for (uint32_t n = 1; n <= 8; n++) {
340 for (uint32_t m = 1; m <= 4; m++) {
341 GemmMicrokernelTester()
342 .mr(4)
343 .nr(8)
344 .kr(1)
345 .sr(1)
346 .m(m)
347 .n(n)
348 .k(k)
349 .ks(3)
350 .iterations(1)
351 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
352 }
353 }
354 }
355 }
356
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,n_gt_8_small_kernel)357 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, n_gt_8_small_kernel) {
358 TEST_REQUIRES_ARM_NEON;
359 for (uint32_t n = 9; n < 16; n++) {
360 for (size_t k = 1; k <= 10; k += 3) {
361 GemmMicrokernelTester()
362 .mr(4)
363 .nr(8)
364 .kr(1)
365 .sr(1)
366 .m(4)
367 .n(n)
368 .k(k)
369 .ks(3)
370 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
371 }
372 }
373 }
374
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,n_div_8_small_kernel)375 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, n_div_8_small_kernel) {
376 TEST_REQUIRES_ARM_NEON;
377 for (uint32_t n = 16; n <= 24; n += 8) {
378 for (size_t k = 1; k <= 10; k += 3) {
379 GemmMicrokernelTester()
380 .mr(4)
381 .nr(8)
382 .kr(1)
383 .sr(1)
384 .m(4)
385 .n(n)
386 .k(k)
387 .ks(3)
388 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
389 }
390 }
391 }
392
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,strided_cm_subtile)393 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, strided_cm_subtile) {
394 TEST_REQUIRES_ARM_NEON;
395 for (size_t k = 1; k <= 10; k += 3) {
396 for (uint32_t n = 1; n <= 8; n++) {
397 for (uint32_t m = 1; m <= 4; m++) {
398 GemmMicrokernelTester()
399 .mr(4)
400 .nr(8)
401 .kr(1)
402 .sr(1)
403 .m(m)
404 .n(n)
405 .k(k)
406 .cm_stride(11)
407 .iterations(1)
408 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
409 }
410 }
411 }
412 }
413
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,a_offset)414 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, a_offset) {
415 TEST_REQUIRES_ARM_NEON;
416 for (size_t k = 1; k <= 10; k += 3) {
417 GemmMicrokernelTester()
418 .mr(4)
419 .nr(8)
420 .kr(1)
421 .sr(1)
422 .m(4)
423 .n(8)
424 .k(k)
425 .ks(3)
426 .a_offset(43)
427 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
428 }
429 }
430
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,zero)431 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, zero) {
432 TEST_REQUIRES_ARM_NEON;
433 for (size_t k = 1; k <= 10; k += 3) {
434 for (uint32_t mz = 0; mz < 4; mz++) {
435 GemmMicrokernelTester()
436 .mr(4)
437 .nr(8)
438 .kr(1)
439 .sr(1)
440 .m(4)
441 .n(8)
442 .k(k)
443 .ks(3)
444 .a_offset(43)
445 .zero_index(mz)
446 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
447 }
448 }
449 }
450
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,qmin)451 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, qmin) {
452 TEST_REQUIRES_ARM_NEON;
453 GemmMicrokernelTester()
454 .mr(4)
455 .nr(8)
456 .kr(1)
457 .sr(1)
458 .m(4)
459 .n(8)
460 .k(2)
461 .qmin(128)
462 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
463 }
464
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,qmax)465 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, qmax) {
466 TEST_REQUIRES_ARM_NEON;
467 GemmMicrokernelTester()
468 .mr(4)
469 .nr(8)
470 .kr(1)
471 .sr(1)
472 .m(4)
473 .n(8)
474 .k(2)
475 .qmax(128)
476 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
477 }
478
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7,strided_cm)479 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, strided_cm) {
480 TEST_REQUIRES_ARM_NEON;
481 GemmMicrokernelTester()
482 .mr(4)
483 .nr(8)
484 .kr(1)
485 .sr(1)
486 .m(4)
487 .n(8)
488 .k(2)
489 .cm_stride(11)
490 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
491 }
492 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
493
494
495 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,k_eq_4)496 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4) {
497 TEST_REQUIRES_ARM_NEON;
498 GemmMicrokernelTester()
499 .mr(4)
500 .nr(8)
501 .kr(1)
502 .sr(1)
503 .m(4)
504 .n(8)
505 .k(4)
506 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
507 }
508
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,strided_cn)509 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, strided_cn) {
510 TEST_REQUIRES_ARM_NEON;
511 GemmMicrokernelTester()
512 .mr(4)
513 .nr(8)
514 .kr(1)
515 .sr(1)
516 .m(4)
517 .n(8)
518 .k(4)
519 .cn_stride(11)
520 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
521 }
522
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,k_eq_4_subtile)523 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4_subtile) {
524 TEST_REQUIRES_ARM_NEON;
525 for (uint32_t n = 1; n <= 8; n++) {
526 for (uint32_t m = 1; m <= 4; m++) {
527 GemmMicrokernelTester()
528 .mr(4)
529 .nr(8)
530 .kr(1)
531 .sr(1)
532 .m(m)
533 .n(n)
534 .k(4)
535 .iterations(1)
536 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
537 }
538 }
539 }
540
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,k_eq_4_subtile_m)541 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4_subtile_m) {
542 TEST_REQUIRES_ARM_NEON;
543 for (uint32_t m = 1; m <= 4; m++) {
544 GemmMicrokernelTester()
545 .mr(4)
546 .nr(8)
547 .kr(1)
548 .sr(1)
549 .m(m)
550 .n(8)
551 .k(4)
552 .iterations(1)
553 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
554 }
555 }
556
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,k_eq_4_subtile_n)557 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4_subtile_n) {
558 TEST_REQUIRES_ARM_NEON;
559 for (uint32_t n = 1; n <= 8; n++) {
560 GemmMicrokernelTester()
561 .mr(4)
562 .nr(8)
563 .kr(1)
564 .sr(1)
565 .m(4)
566 .n(n)
567 .k(4)
568 .iterations(1)
569 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
570 }
571 }
572
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,k_eq_8)573 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_eq_8) {
574 TEST_REQUIRES_ARM_NEON;
575 GemmMicrokernelTester()
576 .mr(4)
577 .nr(8)
578 .kr(1)
579 .sr(1)
580 .m(4)
581 .n(8)
582 .k(8)
583 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
584 }
585
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,k_eq_8_subtile)586 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_eq_8_subtile) {
587 TEST_REQUIRES_ARM_NEON;
588 for (uint32_t n = 1; n <= 8; n++) {
589 for (uint32_t m = 1; m <= 4; m++) {
590 GemmMicrokernelTester()
591 .mr(4)
592 .nr(8)
593 .kr(1)
594 .sr(1)
595 .m(m)
596 .n(n)
597 .k(8)
598 .iterations(1)
599 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
600 }
601 }
602 }
603
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,k_lt_8)604 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_lt_8) {
605 TEST_REQUIRES_ARM_NEON;
606 for (size_t k = 1; k < 8; k++) {
607 GemmMicrokernelTester()
608 .mr(4)
609 .nr(8)
610 .kr(1)
611 .sr(1)
612 .m(4)
613 .n(8)
614 .k(k)
615 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
616 }
617 }
618
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,k_lt_8_subtile)619 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_lt_8_subtile) {
620 TEST_REQUIRES_ARM_NEON;
621 for (size_t k = 1; k < 8; k++) {
622 for (uint32_t n = 1; n <= 8; n++) {
623 for (uint32_t m = 1; m <= 4; m++) {
624 GemmMicrokernelTester()
625 .mr(4)
626 .nr(8)
627 .kr(1)
628 .sr(1)
629 .m(m)
630 .n(n)
631 .k(k)
632 .iterations(1)
633 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
634 }
635 }
636 }
637 }
638
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,k_gt_8)639 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_gt_8) {
640 TEST_REQUIRES_ARM_NEON;
641 for (size_t k = 9; k < 16; k++) {
642 GemmMicrokernelTester()
643 .mr(4)
644 .nr(8)
645 .kr(1)
646 .sr(1)
647 .m(4)
648 .n(8)
649 .k(k)
650 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
651 }
652 }
653
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,k_gt_8_subtile)654 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_gt_8_subtile) {
655 TEST_REQUIRES_ARM_NEON;
656 for (size_t k = 9; k < 16; k++) {
657 for (uint32_t n = 1; n <= 8; n++) {
658 for (uint32_t m = 1; m <= 4; m++) {
659 GemmMicrokernelTester()
660 .mr(4)
661 .nr(8)
662 .kr(1)
663 .sr(1)
664 .m(m)
665 .n(n)
666 .k(k)
667 .iterations(1)
668 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
669 }
670 }
671 }
672 }
673
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,k_div_4)674 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_div_4) {
675 TEST_REQUIRES_ARM_NEON;
676 for (size_t k = 12; k <= 40; k += 4) {
677 GemmMicrokernelTester()
678 .mr(4)
679 .nr(8)
680 .kr(1)
681 .sr(1)
682 .m(4)
683 .n(8)
684 .k(k)
685 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
686 }
687 }
688
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,k_div_4_subtile)689 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_div_4_subtile) {
690 TEST_REQUIRES_ARM_NEON;
691 for (size_t k = 12; k <= 40; k += 4) {
692 for (uint32_t n = 1; n <= 8; n++) {
693 for (uint32_t m = 1; m <= 4; m++) {
694 GemmMicrokernelTester()
695 .mr(4)
696 .nr(8)
697 .kr(1)
698 .sr(1)
699 .m(m)
700 .n(n)
701 .k(k)
702 .iterations(1)
703 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
704 }
705 }
706 }
707 }
708
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,n_gt_8)709 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, n_gt_8) {
710 TEST_REQUIRES_ARM_NEON;
711 for (uint32_t n = 9; n < 16; n++) {
712 for (size_t k = 1; k <= 20; k += 5) {
713 GemmMicrokernelTester()
714 .mr(4)
715 .nr(8)
716 .kr(1)
717 .sr(1)
718 .m(4)
719 .n(n)
720 .k(k)
721 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
722 }
723 }
724 }
725
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,n_gt_8_strided_cn)726 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, n_gt_8_strided_cn) {
727 TEST_REQUIRES_ARM_NEON;
728 for (uint32_t n = 9; n < 16; n++) {
729 for (size_t k = 1; k <= 20; k += 5) {
730 GemmMicrokernelTester()
731 .mr(4)
732 .nr(8)
733 .kr(1)
734 .sr(1)
735 .m(4)
736 .n(n)
737 .k(k)
738 .cn_stride(11)
739 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
740 }
741 }
742 }
743
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,n_gt_8_subtile)744 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, n_gt_8_subtile) {
745 TEST_REQUIRES_ARM_NEON;
746 for (uint32_t n = 9; n < 16; n++) {
747 for (size_t k = 1; k <= 20; k += 5) {
748 for (uint32_t m = 1; m <= 4; m++) {
749 GemmMicrokernelTester()
750 .mr(4)
751 .nr(8)
752 .kr(1)
753 .sr(1)
754 .m(m)
755 .n(n)
756 .k(k)
757 .iterations(1)
758 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
759 }
760 }
761 }
762 }
763
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,n_div_8)764 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, n_div_8) {
765 TEST_REQUIRES_ARM_NEON;
766 for (uint32_t n = 16; n <= 24; n += 8) {
767 for (size_t k = 1; k <= 20; k += 5) {
768 GemmMicrokernelTester()
769 .mr(4)
770 .nr(8)
771 .kr(1)
772 .sr(1)
773 .m(4)
774 .n(n)
775 .k(k)
776 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
777 }
778 }
779 }
780
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,n_div_8_strided_cn)781 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, n_div_8_strided_cn) {
782 TEST_REQUIRES_ARM_NEON;
783 for (uint32_t n = 16; n <= 24; n += 8) {
784 for (size_t k = 1; k <= 20; k += 5) {
785 GemmMicrokernelTester()
786 .mr(4)
787 .nr(8)
788 .kr(1)
789 .sr(1)
790 .m(4)
791 .n(n)
792 .k(k)
793 .cn_stride(11)
794 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
795 }
796 }
797 }
798
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,n_div_8_subtile)799 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, n_div_8_subtile) {
800 TEST_REQUIRES_ARM_NEON;
801 for (uint32_t n = 16; n <= 24; n += 8) {
802 for (size_t k = 1; k <= 20; k += 5) {
803 for (uint32_t m = 1; m <= 4; m++) {
804 GemmMicrokernelTester()
805 .mr(4)
806 .nr(8)
807 .kr(1)
808 .sr(1)
809 .m(m)
810 .n(n)
811 .k(k)
812 .iterations(1)
813 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
814 }
815 }
816 }
817 }
818
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,small_kernel)819 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, small_kernel) {
820 TEST_REQUIRES_ARM_NEON;
821 for (size_t k = 1; k <= 20; k += 5) {
822 GemmMicrokernelTester()
823 .mr(4)
824 .nr(8)
825 .kr(1)
826 .sr(1)
827 .m(4)
828 .n(8)
829 .k(k)
830 .ks(3)
831 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
832 }
833 }
834
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,small_kernel_subtile)835 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, small_kernel_subtile) {
836 TEST_REQUIRES_ARM_NEON;
837 for (size_t k = 1; k <= 20; k += 5) {
838 for (uint32_t n = 1; n <= 8; n++) {
839 for (uint32_t m = 1; m <= 4; m++) {
840 GemmMicrokernelTester()
841 .mr(4)
842 .nr(8)
843 .kr(1)
844 .sr(1)
845 .m(m)
846 .n(n)
847 .k(k)
848 .ks(3)
849 .iterations(1)
850 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
851 }
852 }
853 }
854 }
855
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,n_gt_8_small_kernel)856 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, n_gt_8_small_kernel) {
857 TEST_REQUIRES_ARM_NEON;
858 for (uint32_t n = 9; n < 16; n++) {
859 for (size_t k = 1; k <= 20; k += 5) {
860 GemmMicrokernelTester()
861 .mr(4)
862 .nr(8)
863 .kr(1)
864 .sr(1)
865 .m(4)
866 .n(n)
867 .k(k)
868 .ks(3)
869 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
870 }
871 }
872 }
873
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,n_div_8_small_kernel)874 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, n_div_8_small_kernel) {
875 TEST_REQUIRES_ARM_NEON;
876 for (uint32_t n = 16; n <= 24; n += 8) {
877 for (size_t k = 1; k <= 20; k += 5) {
878 GemmMicrokernelTester()
879 .mr(4)
880 .nr(8)
881 .kr(1)
882 .sr(1)
883 .m(4)
884 .n(n)
885 .k(k)
886 .ks(3)
887 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
888 }
889 }
890 }
891
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,strided_cm_subtile)892 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, strided_cm_subtile) {
893 TEST_REQUIRES_ARM_NEON;
894 for (size_t k = 1; k <= 20; k += 5) {
895 for (uint32_t n = 1; n <= 8; n++) {
896 for (uint32_t m = 1; m <= 4; m++) {
897 GemmMicrokernelTester()
898 .mr(4)
899 .nr(8)
900 .kr(1)
901 .sr(1)
902 .m(m)
903 .n(n)
904 .k(k)
905 .cm_stride(11)
906 .iterations(1)
907 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
908 }
909 }
910 }
911 }
912
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,a_offset)913 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, a_offset) {
914 TEST_REQUIRES_ARM_NEON;
915 for (size_t k = 1; k <= 20; k += 5) {
916 GemmMicrokernelTester()
917 .mr(4)
918 .nr(8)
919 .kr(1)
920 .sr(1)
921 .m(4)
922 .n(8)
923 .k(k)
924 .ks(3)
925 .a_offset(83)
926 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
927 }
928 }
929
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,zero)930 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, zero) {
931 TEST_REQUIRES_ARM_NEON;
932 for (size_t k = 1; k <= 20; k += 5) {
933 for (uint32_t mz = 0; mz < 4; mz++) {
934 GemmMicrokernelTester()
935 .mr(4)
936 .nr(8)
937 .kr(1)
938 .sr(1)
939 .m(4)
940 .n(8)
941 .k(k)
942 .ks(3)
943 .a_offset(83)
944 .zero_index(mz)
945 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
946 }
947 }
948 }
949
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,qmin)950 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, qmin) {
951 TEST_REQUIRES_ARM_NEON;
952 GemmMicrokernelTester()
953 .mr(4)
954 .nr(8)
955 .kr(1)
956 .sr(1)
957 .m(4)
958 .n(8)
959 .k(4)
960 .qmin(128)
961 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
962 }
963
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,qmax)964 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, qmax) {
965 TEST_REQUIRES_ARM_NEON;
966 GemmMicrokernelTester()
967 .mr(4)
968 .nr(8)
969 .kr(1)
970 .sr(1)
971 .m(4)
972 .n(8)
973 .k(4)
974 .qmax(128)
975 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
976 }
977
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55,strided_cm)978 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, strided_cm) {
979 TEST_REQUIRES_ARM_NEON;
980 GemmMicrokernelTester()
981 .mr(4)
982 .nr(8)
983 .kr(1)
984 .sr(1)
985 .m(4)
986 .n(8)
987 .k(4)
988 .cm_stride(11)
989 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
990 }
991 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
992
993
994 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,k_eq_4)995 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4) {
996 TEST_REQUIRES_ARM_NEON;
997 GemmMicrokernelTester()
998 .mr(4)
999 .nr(8)
1000 .kr(1)
1001 .sr(1)
1002 .m(4)
1003 .n(8)
1004 .k(4)
1005 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1006 }
1007
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,strided_cn)1008 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, strided_cn) {
1009 TEST_REQUIRES_ARM_NEON;
1010 GemmMicrokernelTester()
1011 .mr(4)
1012 .nr(8)
1013 .kr(1)
1014 .sr(1)
1015 .m(4)
1016 .n(8)
1017 .k(4)
1018 .cn_stride(11)
1019 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1020 }
1021
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,k_eq_4_subtile)1022 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4_subtile) {
1023 TEST_REQUIRES_ARM_NEON;
1024 for (uint32_t n = 1; n <= 8; n++) {
1025 for (uint32_t m = 1; m <= 4; m++) {
1026 GemmMicrokernelTester()
1027 .mr(4)
1028 .nr(8)
1029 .kr(1)
1030 .sr(1)
1031 .m(m)
1032 .n(n)
1033 .k(4)
1034 .iterations(1)
1035 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1036 }
1037 }
1038 }
1039
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,k_eq_4_subtile_m)1040 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4_subtile_m) {
1041 TEST_REQUIRES_ARM_NEON;
1042 for (uint32_t m = 1; m <= 4; m++) {
1043 GemmMicrokernelTester()
1044 .mr(4)
1045 .nr(8)
1046 .kr(1)
1047 .sr(1)
1048 .m(m)
1049 .n(8)
1050 .k(4)
1051 .iterations(1)
1052 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1053 }
1054 }
1055
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,k_eq_4_subtile_n)1056 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4_subtile_n) {
1057 TEST_REQUIRES_ARM_NEON;
1058 for (uint32_t n = 1; n <= 8; n++) {
1059 GemmMicrokernelTester()
1060 .mr(4)
1061 .nr(8)
1062 .kr(1)
1063 .sr(1)
1064 .m(4)
1065 .n(n)
1066 .k(4)
1067 .iterations(1)
1068 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1069 }
1070 }
1071
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,k_eq_8)1072 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_eq_8) {
1073 TEST_REQUIRES_ARM_NEON;
1074 GemmMicrokernelTester()
1075 .mr(4)
1076 .nr(8)
1077 .kr(1)
1078 .sr(1)
1079 .m(4)
1080 .n(8)
1081 .k(8)
1082 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1083 }
1084
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,k_eq_8_subtile)1085 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_eq_8_subtile) {
1086 TEST_REQUIRES_ARM_NEON;
1087 for (uint32_t n = 1; n <= 8; n++) {
1088 for (uint32_t m = 1; m <= 4; m++) {
1089 GemmMicrokernelTester()
1090 .mr(4)
1091 .nr(8)
1092 .kr(1)
1093 .sr(1)
1094 .m(m)
1095 .n(n)
1096 .k(8)
1097 .iterations(1)
1098 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1099 }
1100 }
1101 }
1102
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,k_lt_8)1103 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_lt_8) {
1104 TEST_REQUIRES_ARM_NEON;
1105 for (size_t k = 1; k < 8; k++) {
1106 GemmMicrokernelTester()
1107 .mr(4)
1108 .nr(8)
1109 .kr(1)
1110 .sr(1)
1111 .m(4)
1112 .n(8)
1113 .k(k)
1114 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1115 }
1116 }
1117
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,k_lt_8_subtile)1118 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_lt_8_subtile) {
1119 TEST_REQUIRES_ARM_NEON;
1120 for (size_t k = 1; k < 8; k++) {
1121 for (uint32_t n = 1; n <= 8; n++) {
1122 for (uint32_t m = 1; m <= 4; m++) {
1123 GemmMicrokernelTester()
1124 .mr(4)
1125 .nr(8)
1126 .kr(1)
1127 .sr(1)
1128 .m(m)
1129 .n(n)
1130 .k(k)
1131 .iterations(1)
1132 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1133 }
1134 }
1135 }
1136 }
1137
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,k_gt_8)1138 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_gt_8) {
1139 TEST_REQUIRES_ARM_NEON;
1140 for (size_t k = 9; k < 16; k++) {
1141 GemmMicrokernelTester()
1142 .mr(4)
1143 .nr(8)
1144 .kr(1)
1145 .sr(1)
1146 .m(4)
1147 .n(8)
1148 .k(k)
1149 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1150 }
1151 }
1152
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,k_gt_8_subtile)1153 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_gt_8_subtile) {
1154 TEST_REQUIRES_ARM_NEON;
1155 for (size_t k = 9; k < 16; k++) {
1156 for (uint32_t n = 1; n <= 8; n++) {
1157 for (uint32_t m = 1; m <= 4; m++) {
1158 GemmMicrokernelTester()
1159 .mr(4)
1160 .nr(8)
1161 .kr(1)
1162 .sr(1)
1163 .m(m)
1164 .n(n)
1165 .k(k)
1166 .iterations(1)
1167 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1168 }
1169 }
1170 }
1171 }
1172
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,k_div_4)1173 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_div_4) {
1174 TEST_REQUIRES_ARM_NEON;
1175 for (size_t k = 12; k <= 40; k += 4) {
1176 GemmMicrokernelTester()
1177 .mr(4)
1178 .nr(8)
1179 .kr(1)
1180 .sr(1)
1181 .m(4)
1182 .n(8)
1183 .k(k)
1184 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1185 }
1186 }
1187
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,k_div_4_subtile)1188 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_div_4_subtile) {
1189 TEST_REQUIRES_ARM_NEON;
1190 for (size_t k = 12; k <= 40; k += 4) {
1191 for (uint32_t n = 1; n <= 8; n++) {
1192 for (uint32_t m = 1; m <= 4; m++) {
1193 GemmMicrokernelTester()
1194 .mr(4)
1195 .nr(8)
1196 .kr(1)
1197 .sr(1)
1198 .m(m)
1199 .n(n)
1200 .k(k)
1201 .iterations(1)
1202 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1203 }
1204 }
1205 }
1206 }
1207
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,n_gt_8)1208 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, n_gt_8) {
1209 TEST_REQUIRES_ARM_NEON;
1210 for (uint32_t n = 9; n < 16; n++) {
1211 for (size_t k = 1; k <= 20; k += 5) {
1212 GemmMicrokernelTester()
1213 .mr(4)
1214 .nr(8)
1215 .kr(1)
1216 .sr(1)
1217 .m(4)
1218 .n(n)
1219 .k(k)
1220 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1221 }
1222 }
1223 }
1224
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,n_gt_8_strided_cn)1225 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, n_gt_8_strided_cn) {
1226 TEST_REQUIRES_ARM_NEON;
1227 for (uint32_t n = 9; n < 16; n++) {
1228 for (size_t k = 1; k <= 20; k += 5) {
1229 GemmMicrokernelTester()
1230 .mr(4)
1231 .nr(8)
1232 .kr(1)
1233 .sr(1)
1234 .m(4)
1235 .n(n)
1236 .k(k)
1237 .cn_stride(11)
1238 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1239 }
1240 }
1241 }
1242
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,n_gt_8_subtile)1243 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, n_gt_8_subtile) {
1244 TEST_REQUIRES_ARM_NEON;
1245 for (uint32_t n = 9; n < 16; n++) {
1246 for (size_t k = 1; k <= 20; k += 5) {
1247 for (uint32_t m = 1; m <= 4; m++) {
1248 GemmMicrokernelTester()
1249 .mr(4)
1250 .nr(8)
1251 .kr(1)
1252 .sr(1)
1253 .m(m)
1254 .n(n)
1255 .k(k)
1256 .iterations(1)
1257 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1258 }
1259 }
1260 }
1261 }
1262
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,n_div_8)1263 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, n_div_8) {
1264 TEST_REQUIRES_ARM_NEON;
1265 for (uint32_t n = 16; n <= 24; n += 8) {
1266 for (size_t k = 1; k <= 20; k += 5) {
1267 GemmMicrokernelTester()
1268 .mr(4)
1269 .nr(8)
1270 .kr(1)
1271 .sr(1)
1272 .m(4)
1273 .n(n)
1274 .k(k)
1275 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1276 }
1277 }
1278 }
1279
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,n_div_8_strided_cn)1280 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, n_div_8_strided_cn) {
1281 TEST_REQUIRES_ARM_NEON;
1282 for (uint32_t n = 16; n <= 24; n += 8) {
1283 for (size_t k = 1; k <= 20; k += 5) {
1284 GemmMicrokernelTester()
1285 .mr(4)
1286 .nr(8)
1287 .kr(1)
1288 .sr(1)
1289 .m(4)
1290 .n(n)
1291 .k(k)
1292 .cn_stride(11)
1293 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1294 }
1295 }
1296 }
1297
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,n_div_8_subtile)1298 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, n_div_8_subtile) {
1299 TEST_REQUIRES_ARM_NEON;
1300 for (uint32_t n = 16; n <= 24; n += 8) {
1301 for (size_t k = 1; k <= 20; k += 5) {
1302 for (uint32_t m = 1; m <= 4; m++) {
1303 GemmMicrokernelTester()
1304 .mr(4)
1305 .nr(8)
1306 .kr(1)
1307 .sr(1)
1308 .m(m)
1309 .n(n)
1310 .k(k)
1311 .iterations(1)
1312 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1313 }
1314 }
1315 }
1316 }
1317
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,small_kernel)1318 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, small_kernel) {
1319 TEST_REQUIRES_ARM_NEON;
1320 for (size_t k = 1; k <= 20; k += 5) {
1321 GemmMicrokernelTester()
1322 .mr(4)
1323 .nr(8)
1324 .kr(1)
1325 .sr(1)
1326 .m(4)
1327 .n(8)
1328 .k(k)
1329 .ks(3)
1330 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1331 }
1332 }
1333
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,small_kernel_subtile)1334 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, small_kernel_subtile) {
1335 TEST_REQUIRES_ARM_NEON;
1336 for (size_t k = 1; k <= 20; k += 5) {
1337 for (uint32_t n = 1; n <= 8; n++) {
1338 for (uint32_t m = 1; m <= 4; m++) {
1339 GemmMicrokernelTester()
1340 .mr(4)
1341 .nr(8)
1342 .kr(1)
1343 .sr(1)
1344 .m(m)
1345 .n(n)
1346 .k(k)
1347 .ks(3)
1348 .iterations(1)
1349 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1350 }
1351 }
1352 }
1353 }
1354
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,n_gt_8_small_kernel)1355 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, n_gt_8_small_kernel) {
1356 TEST_REQUIRES_ARM_NEON;
1357 for (uint32_t n = 9; n < 16; n++) {
1358 for (size_t k = 1; k <= 20; k += 5) {
1359 GemmMicrokernelTester()
1360 .mr(4)
1361 .nr(8)
1362 .kr(1)
1363 .sr(1)
1364 .m(4)
1365 .n(n)
1366 .k(k)
1367 .ks(3)
1368 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1369 }
1370 }
1371 }
1372
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,n_div_8_small_kernel)1373 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, n_div_8_small_kernel) {
1374 TEST_REQUIRES_ARM_NEON;
1375 for (uint32_t n = 16; n <= 24; n += 8) {
1376 for (size_t k = 1; k <= 20; k += 5) {
1377 GemmMicrokernelTester()
1378 .mr(4)
1379 .nr(8)
1380 .kr(1)
1381 .sr(1)
1382 .m(4)
1383 .n(n)
1384 .k(k)
1385 .ks(3)
1386 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1387 }
1388 }
1389 }
1390
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,strided_cm_subtile)1391 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, strided_cm_subtile) {
1392 TEST_REQUIRES_ARM_NEON;
1393 for (size_t k = 1; k <= 20; k += 5) {
1394 for (uint32_t n = 1; n <= 8; n++) {
1395 for (uint32_t m = 1; m <= 4; m++) {
1396 GemmMicrokernelTester()
1397 .mr(4)
1398 .nr(8)
1399 .kr(1)
1400 .sr(1)
1401 .m(m)
1402 .n(n)
1403 .k(k)
1404 .cm_stride(11)
1405 .iterations(1)
1406 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1407 }
1408 }
1409 }
1410 }
1411
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,a_offset)1412 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, a_offset) {
1413 TEST_REQUIRES_ARM_NEON;
1414 for (size_t k = 1; k <= 20; k += 5) {
1415 GemmMicrokernelTester()
1416 .mr(4)
1417 .nr(8)
1418 .kr(1)
1419 .sr(1)
1420 .m(4)
1421 .n(8)
1422 .k(k)
1423 .ks(3)
1424 .a_offset(83)
1425 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1426 }
1427 }
1428
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,zero)1429 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, zero) {
1430 TEST_REQUIRES_ARM_NEON;
1431 for (size_t k = 1; k <= 20; k += 5) {
1432 for (uint32_t mz = 0; mz < 4; mz++) {
1433 GemmMicrokernelTester()
1434 .mr(4)
1435 .nr(8)
1436 .kr(1)
1437 .sr(1)
1438 .m(4)
1439 .n(8)
1440 .k(k)
1441 .ks(3)
1442 .a_offset(83)
1443 .zero_index(mz)
1444 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1445 }
1446 }
1447 }
1448
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,qmin)1449 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, qmin) {
1450 TEST_REQUIRES_ARM_NEON;
1451 GemmMicrokernelTester()
1452 .mr(4)
1453 .nr(8)
1454 .kr(1)
1455 .sr(1)
1456 .m(4)
1457 .n(8)
1458 .k(4)
1459 .qmin(128)
1460 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1461 }
1462
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,qmax)1463 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, qmax) {
1464 TEST_REQUIRES_ARM_NEON;
1465 GemmMicrokernelTester()
1466 .mr(4)
1467 .nr(8)
1468 .kr(1)
1469 .sr(1)
1470 .m(4)
1471 .n(8)
1472 .k(4)
1473 .qmax(128)
1474 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1475 }
1476
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75,strided_cm)1477 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, strided_cm) {
1478 TEST_REQUIRES_ARM_NEON;
1479 GemmMicrokernelTester()
1480 .mr(4)
1481 .nr(8)
1482 .kr(1)
1483 .sr(1)
1484 .m(4)
1485 .n(8)
1486 .k(4)
1487 .cm_stride(11)
1488 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
1489 }
1490 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
1491
1492
1493 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,k_eq_4)1494 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_4) {
1495 TEST_REQUIRES_ARM_NEON;
1496 GemmMicrokernelTester()
1497 .mr(4)
1498 .nr(8)
1499 .kr(1)
1500 .sr(1)
1501 .m(4)
1502 .n(8)
1503 .k(4)
1504 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1505 }
1506
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,strided_cn)1507 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, strided_cn) {
1508 TEST_REQUIRES_ARM_NEON;
1509 GemmMicrokernelTester()
1510 .mr(4)
1511 .nr(8)
1512 .kr(1)
1513 .sr(1)
1514 .m(4)
1515 .n(8)
1516 .k(4)
1517 .cn_stride(11)
1518 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1519 }
1520
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,k_eq_4_subtile)1521 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_4_subtile) {
1522 TEST_REQUIRES_ARM_NEON;
1523 for (uint32_t n = 1; n <= 8; n++) {
1524 for (uint32_t m = 1; m <= 4; m++) {
1525 GemmMicrokernelTester()
1526 .mr(4)
1527 .nr(8)
1528 .kr(1)
1529 .sr(1)
1530 .m(m)
1531 .n(n)
1532 .k(4)
1533 .iterations(1)
1534 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1535 }
1536 }
1537 }
1538
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,k_eq_4_subtile_m)1539 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_4_subtile_m) {
1540 TEST_REQUIRES_ARM_NEON;
1541 for (uint32_t m = 1; m <= 4; m++) {
1542 GemmMicrokernelTester()
1543 .mr(4)
1544 .nr(8)
1545 .kr(1)
1546 .sr(1)
1547 .m(m)
1548 .n(8)
1549 .k(4)
1550 .iterations(1)
1551 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1552 }
1553 }
1554
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,k_eq_4_subtile_n)1555 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_4_subtile_n) {
1556 TEST_REQUIRES_ARM_NEON;
1557 for (uint32_t n = 1; n <= 8; n++) {
1558 GemmMicrokernelTester()
1559 .mr(4)
1560 .nr(8)
1561 .kr(1)
1562 .sr(1)
1563 .m(4)
1564 .n(n)
1565 .k(4)
1566 .iterations(1)
1567 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1568 }
1569 }
1570
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,k_eq_8)1571 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_8) {
1572 TEST_REQUIRES_ARM_NEON;
1573 GemmMicrokernelTester()
1574 .mr(4)
1575 .nr(8)
1576 .kr(1)
1577 .sr(1)
1578 .m(4)
1579 .n(8)
1580 .k(8)
1581 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1582 }
1583
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,k_eq_8_subtile)1584 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_8_subtile) {
1585 TEST_REQUIRES_ARM_NEON;
1586 for (uint32_t n = 1; n <= 8; n++) {
1587 for (uint32_t m = 1; m <= 4; m++) {
1588 GemmMicrokernelTester()
1589 .mr(4)
1590 .nr(8)
1591 .kr(1)
1592 .sr(1)
1593 .m(m)
1594 .n(n)
1595 .k(8)
1596 .iterations(1)
1597 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1598 }
1599 }
1600 }
1601
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,k_lt_8)1602 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_lt_8) {
1603 TEST_REQUIRES_ARM_NEON;
1604 for (size_t k = 1; k < 8; k++) {
1605 GemmMicrokernelTester()
1606 .mr(4)
1607 .nr(8)
1608 .kr(1)
1609 .sr(1)
1610 .m(4)
1611 .n(8)
1612 .k(k)
1613 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1614 }
1615 }
1616
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,k_lt_8_subtile)1617 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_lt_8_subtile) {
1618 TEST_REQUIRES_ARM_NEON;
1619 for (size_t k = 1; k < 8; k++) {
1620 for (uint32_t n = 1; n <= 8; n++) {
1621 for (uint32_t m = 1; m <= 4; m++) {
1622 GemmMicrokernelTester()
1623 .mr(4)
1624 .nr(8)
1625 .kr(1)
1626 .sr(1)
1627 .m(m)
1628 .n(n)
1629 .k(k)
1630 .iterations(1)
1631 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1632 }
1633 }
1634 }
1635 }
1636
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,k_gt_8)1637 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_gt_8) {
1638 TEST_REQUIRES_ARM_NEON;
1639 for (size_t k = 9; k < 16; k++) {
1640 GemmMicrokernelTester()
1641 .mr(4)
1642 .nr(8)
1643 .kr(1)
1644 .sr(1)
1645 .m(4)
1646 .n(8)
1647 .k(k)
1648 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1649 }
1650 }
1651
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,k_gt_8_subtile)1652 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_gt_8_subtile) {
1653 TEST_REQUIRES_ARM_NEON;
1654 for (size_t k = 9; k < 16; k++) {
1655 for (uint32_t n = 1; n <= 8; n++) {
1656 for (uint32_t m = 1; m <= 4; m++) {
1657 GemmMicrokernelTester()
1658 .mr(4)
1659 .nr(8)
1660 .kr(1)
1661 .sr(1)
1662 .m(m)
1663 .n(n)
1664 .k(k)
1665 .iterations(1)
1666 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1667 }
1668 }
1669 }
1670 }
1671
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,k_div_4)1672 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_div_4) {
1673 TEST_REQUIRES_ARM_NEON;
1674 for (size_t k = 12; k <= 40; k += 4) {
1675 GemmMicrokernelTester()
1676 .mr(4)
1677 .nr(8)
1678 .kr(1)
1679 .sr(1)
1680 .m(4)
1681 .n(8)
1682 .k(k)
1683 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1684 }
1685 }
1686
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,k_div_4_subtile)1687 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_div_4_subtile) {
1688 TEST_REQUIRES_ARM_NEON;
1689 for (size_t k = 12; k <= 40; k += 4) {
1690 for (uint32_t n = 1; n <= 8; n++) {
1691 for (uint32_t m = 1; m <= 4; m++) {
1692 GemmMicrokernelTester()
1693 .mr(4)
1694 .nr(8)
1695 .kr(1)
1696 .sr(1)
1697 .m(m)
1698 .n(n)
1699 .k(k)
1700 .iterations(1)
1701 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1702 }
1703 }
1704 }
1705 }
1706
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,n_gt_8)1707 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_gt_8) {
1708 TEST_REQUIRES_ARM_NEON;
1709 for (uint32_t n = 9; n < 16; n++) {
1710 for (size_t k = 1; k <= 20; k += 5) {
1711 GemmMicrokernelTester()
1712 .mr(4)
1713 .nr(8)
1714 .kr(1)
1715 .sr(1)
1716 .m(4)
1717 .n(n)
1718 .k(k)
1719 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1720 }
1721 }
1722 }
1723
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,n_gt_8_strided_cn)1724 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
1725 TEST_REQUIRES_ARM_NEON;
1726 for (uint32_t n = 9; n < 16; n++) {
1727 for (size_t k = 1; k <= 20; k += 5) {
1728 GemmMicrokernelTester()
1729 .mr(4)
1730 .nr(8)
1731 .kr(1)
1732 .sr(1)
1733 .m(4)
1734 .n(n)
1735 .k(k)
1736 .cn_stride(11)
1737 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1738 }
1739 }
1740 }
1741
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,n_gt_8_subtile)1742 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_gt_8_subtile) {
1743 TEST_REQUIRES_ARM_NEON;
1744 for (uint32_t n = 9; n < 16; n++) {
1745 for (size_t k = 1; k <= 20; k += 5) {
1746 for (uint32_t m = 1; m <= 4; m++) {
1747 GemmMicrokernelTester()
1748 .mr(4)
1749 .nr(8)
1750 .kr(1)
1751 .sr(1)
1752 .m(m)
1753 .n(n)
1754 .k(k)
1755 .iterations(1)
1756 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1757 }
1758 }
1759 }
1760 }
1761
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,n_div_8)1762 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_div_8) {
1763 TEST_REQUIRES_ARM_NEON;
1764 for (uint32_t n = 16; n <= 24; n += 8) {
1765 for (size_t k = 1; k <= 20; k += 5) {
1766 GemmMicrokernelTester()
1767 .mr(4)
1768 .nr(8)
1769 .kr(1)
1770 .sr(1)
1771 .m(4)
1772 .n(n)
1773 .k(k)
1774 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1775 }
1776 }
1777 }
1778
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,n_div_8_strided_cn)1779 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_div_8_strided_cn) {
1780 TEST_REQUIRES_ARM_NEON;
1781 for (uint32_t n = 16; n <= 24; n += 8) {
1782 for (size_t k = 1; k <= 20; k += 5) {
1783 GemmMicrokernelTester()
1784 .mr(4)
1785 .nr(8)
1786 .kr(1)
1787 .sr(1)
1788 .m(4)
1789 .n(n)
1790 .k(k)
1791 .cn_stride(11)
1792 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1793 }
1794 }
1795 }
1796
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,n_div_8_subtile)1797 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_div_8_subtile) {
1798 TEST_REQUIRES_ARM_NEON;
1799 for (uint32_t n = 16; n <= 24; n += 8) {
1800 for (size_t k = 1; k <= 20; k += 5) {
1801 for (uint32_t m = 1; m <= 4; m++) {
1802 GemmMicrokernelTester()
1803 .mr(4)
1804 .nr(8)
1805 .kr(1)
1806 .sr(1)
1807 .m(m)
1808 .n(n)
1809 .k(k)
1810 .iterations(1)
1811 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1812 }
1813 }
1814 }
1815 }
1816
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,small_kernel)1817 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, small_kernel) {
1818 TEST_REQUIRES_ARM_NEON;
1819 for (size_t k = 1; k <= 20; k += 5) {
1820 GemmMicrokernelTester()
1821 .mr(4)
1822 .nr(8)
1823 .kr(1)
1824 .sr(1)
1825 .m(4)
1826 .n(8)
1827 .k(k)
1828 .ks(3)
1829 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1830 }
1831 }
1832
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,small_kernel_subtile)1833 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, small_kernel_subtile) {
1834 TEST_REQUIRES_ARM_NEON;
1835 for (size_t k = 1; k <= 20; k += 5) {
1836 for (uint32_t n = 1; n <= 8; n++) {
1837 for (uint32_t m = 1; m <= 4; m++) {
1838 GemmMicrokernelTester()
1839 .mr(4)
1840 .nr(8)
1841 .kr(1)
1842 .sr(1)
1843 .m(m)
1844 .n(n)
1845 .k(k)
1846 .ks(3)
1847 .iterations(1)
1848 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1849 }
1850 }
1851 }
1852 }
1853
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,n_gt_8_small_kernel)1854 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_gt_8_small_kernel) {
1855 TEST_REQUIRES_ARM_NEON;
1856 for (uint32_t n = 9; n < 16; n++) {
1857 for (size_t k = 1; k <= 20; k += 5) {
1858 GemmMicrokernelTester()
1859 .mr(4)
1860 .nr(8)
1861 .kr(1)
1862 .sr(1)
1863 .m(4)
1864 .n(n)
1865 .k(k)
1866 .ks(3)
1867 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1868 }
1869 }
1870 }
1871
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,n_div_8_small_kernel)1872 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_div_8_small_kernel) {
1873 TEST_REQUIRES_ARM_NEON;
1874 for (uint32_t n = 16; n <= 24; n += 8) {
1875 for (size_t k = 1; k <= 20; k += 5) {
1876 GemmMicrokernelTester()
1877 .mr(4)
1878 .nr(8)
1879 .kr(1)
1880 .sr(1)
1881 .m(4)
1882 .n(n)
1883 .k(k)
1884 .ks(3)
1885 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1886 }
1887 }
1888 }
1889
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,strided_cm_subtile)1890 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, strided_cm_subtile) {
1891 TEST_REQUIRES_ARM_NEON;
1892 for (size_t k = 1; k <= 20; k += 5) {
1893 for (uint32_t n = 1; n <= 8; n++) {
1894 for (uint32_t m = 1; m <= 4; m++) {
1895 GemmMicrokernelTester()
1896 .mr(4)
1897 .nr(8)
1898 .kr(1)
1899 .sr(1)
1900 .m(m)
1901 .n(n)
1902 .k(k)
1903 .cm_stride(11)
1904 .iterations(1)
1905 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1906 }
1907 }
1908 }
1909 }
1910
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,a_offset)1911 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, a_offset) {
1912 TEST_REQUIRES_ARM_NEON;
1913 for (size_t k = 1; k <= 20; k += 5) {
1914 GemmMicrokernelTester()
1915 .mr(4)
1916 .nr(8)
1917 .kr(1)
1918 .sr(1)
1919 .m(4)
1920 .n(8)
1921 .k(k)
1922 .ks(3)
1923 .a_offset(83)
1924 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1925 }
1926 }
1927
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,zero)1928 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, zero) {
1929 TEST_REQUIRES_ARM_NEON;
1930 for (size_t k = 1; k <= 20; k += 5) {
1931 for (uint32_t mz = 0; mz < 4; mz++) {
1932 GemmMicrokernelTester()
1933 .mr(4)
1934 .nr(8)
1935 .kr(1)
1936 .sr(1)
1937 .m(4)
1938 .n(8)
1939 .k(k)
1940 .ks(3)
1941 .a_offset(83)
1942 .zero_index(mz)
1943 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1944 }
1945 }
1946 }
1947
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,qmin)1948 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, qmin) {
1949 TEST_REQUIRES_ARM_NEON;
1950 GemmMicrokernelTester()
1951 .mr(4)
1952 .nr(8)
1953 .kr(1)
1954 .sr(1)
1955 .m(4)
1956 .n(8)
1957 .k(4)
1958 .qmin(128)
1959 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1960 }
1961
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,qmax)1962 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, qmax) {
1963 TEST_REQUIRES_ARM_NEON;
1964 GemmMicrokernelTester()
1965 .mr(4)
1966 .nr(8)
1967 .kr(1)
1968 .sr(1)
1969 .m(4)
1970 .n(8)
1971 .k(4)
1972 .qmax(128)
1973 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1974 }
1975
TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75,strided_cm)1976 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, strided_cm) {
1977 TEST_REQUIRES_ARM_NEON;
1978 GemmMicrokernelTester()
1979 .mr(4)
1980 .nr(8)
1981 .kr(1)
1982 .sr(1)
1983 .m(4)
1984 .n(8)
1985 .k(4)
1986 .cm_stride(11)
1987 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1988 }
1989 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
1990
1991
1992 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8)1993 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
1994 TEST_REQUIRES_ARM_NEON_FMA;
1995 GemmMicrokernelTester()
1996 .mr(1)
1997 .nr(8)
1998 .kr(1)
1999 .sr(1)
2000 .m(1)
2001 .n(8)
2002 .k(8)
2003 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2004 }
2005
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,strided_cn)2006 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, strided_cn) {
2007 TEST_REQUIRES_ARM_NEON_FMA;
2008 GemmMicrokernelTester()
2009 .mr(1)
2010 .nr(8)
2011 .kr(1)
2012 .sr(1)
2013 .m(1)
2014 .n(8)
2015 .k(8)
2016 .cn_stride(11)
2017 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2018 }
2019
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile)2020 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile) {
2021 TEST_REQUIRES_ARM_NEON_FMA;
2022 for (uint32_t n = 1; n <= 8; n++) {
2023 for (uint32_t m = 1; m <= 1; m++) {
2024 GemmMicrokernelTester()
2025 .mr(1)
2026 .nr(8)
2027 .kr(1)
2028 .sr(1)
2029 .m(m)
2030 .n(n)
2031 .k(8)
2032 .iterations(1)
2033 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2034 }
2035 }
2036 }
2037
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile_m)2038 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_m) {
2039 TEST_REQUIRES_ARM_NEON_FMA;
2040 for (uint32_t m = 1; m <= 1; m++) {
2041 GemmMicrokernelTester()
2042 .mr(1)
2043 .nr(8)
2044 .kr(1)
2045 .sr(1)
2046 .m(m)
2047 .n(8)
2048 .k(8)
2049 .iterations(1)
2050 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2051 }
2052 }
2053
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile_n)2054 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_n) {
2055 TEST_REQUIRES_ARM_NEON_FMA;
2056 for (uint32_t n = 1; n <= 8; n++) {
2057 GemmMicrokernelTester()
2058 .mr(1)
2059 .nr(8)
2060 .kr(1)
2061 .sr(1)
2062 .m(1)
2063 .n(n)
2064 .k(8)
2065 .iterations(1)
2066 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2067 }
2068 }
2069
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_16)2070 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16) {
2071 TEST_REQUIRES_ARM_NEON_FMA;
2072 GemmMicrokernelTester()
2073 .mr(1)
2074 .nr(8)
2075 .kr(1)
2076 .sr(1)
2077 .m(1)
2078 .n(8)
2079 .k(16)
2080 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2081 }
2082
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_16_subtile)2083 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_subtile) {
2084 TEST_REQUIRES_ARM_NEON_FMA;
2085 for (uint32_t n = 1; n <= 8; n++) {
2086 for (uint32_t m = 1; m <= 1; m++) {
2087 GemmMicrokernelTester()
2088 .mr(1)
2089 .nr(8)
2090 .kr(1)
2091 .sr(1)
2092 .m(m)
2093 .n(n)
2094 .k(16)
2095 .iterations(1)
2096 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2097 }
2098 }
2099 }
2100
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_lt_16)2101 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16) {
2102 TEST_REQUIRES_ARM_NEON_FMA;
2103 for (size_t k = 1; k < 16; k++) {
2104 GemmMicrokernelTester()
2105 .mr(1)
2106 .nr(8)
2107 .kr(1)
2108 .sr(1)
2109 .m(1)
2110 .n(8)
2111 .k(k)
2112 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2113 }
2114 }
2115
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_lt_16_subtile)2116 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_subtile) {
2117 TEST_REQUIRES_ARM_NEON_FMA;
2118 for (size_t k = 1; k < 16; k++) {
2119 for (uint32_t n = 1; n <= 8; n++) {
2120 for (uint32_t m = 1; m <= 1; m++) {
2121 GemmMicrokernelTester()
2122 .mr(1)
2123 .nr(8)
2124 .kr(1)
2125 .sr(1)
2126 .m(m)
2127 .n(n)
2128 .k(k)
2129 .iterations(1)
2130 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2131 }
2132 }
2133 }
2134 }
2135
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_gt_16)2136 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16) {
2137 TEST_REQUIRES_ARM_NEON_FMA;
2138 for (size_t k = 17; k < 32; k++) {
2139 GemmMicrokernelTester()
2140 .mr(1)
2141 .nr(8)
2142 .kr(1)
2143 .sr(1)
2144 .m(1)
2145 .n(8)
2146 .k(k)
2147 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2148 }
2149 }
2150
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_gt_16_subtile)2151 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_subtile) {
2152 TEST_REQUIRES_ARM_NEON_FMA;
2153 for (size_t k = 17; k < 32; k++) {
2154 for (uint32_t n = 1; n <= 8; n++) {
2155 for (uint32_t m = 1; m <= 1; m++) {
2156 GemmMicrokernelTester()
2157 .mr(1)
2158 .nr(8)
2159 .kr(1)
2160 .sr(1)
2161 .m(m)
2162 .n(n)
2163 .k(k)
2164 .iterations(1)
2165 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2166 }
2167 }
2168 }
2169 }
2170
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_div_8)2171 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8) {
2172 TEST_REQUIRES_ARM_NEON_FMA;
2173 for (size_t k = 24; k <= 80; k += 8) {
2174 GemmMicrokernelTester()
2175 .mr(1)
2176 .nr(8)
2177 .kr(1)
2178 .sr(1)
2179 .m(1)
2180 .n(8)
2181 .k(k)
2182 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2183 }
2184 }
2185
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,k_div_8_subtile)2186 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_subtile) {
2187 TEST_REQUIRES_ARM_NEON_FMA;
2188 for (size_t k = 24; k <= 80; k += 8) {
2189 for (uint32_t n = 1; n <= 8; n++) {
2190 for (uint32_t m = 1; m <= 1; m++) {
2191 GemmMicrokernelTester()
2192 .mr(1)
2193 .nr(8)
2194 .kr(1)
2195 .sr(1)
2196 .m(m)
2197 .n(n)
2198 .k(k)
2199 .iterations(1)
2200 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2201 }
2202 }
2203 }
2204 }
2205
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8)2206 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8) {
2207 TEST_REQUIRES_ARM_NEON_FMA;
2208 for (uint32_t n = 9; n < 16; n++) {
2209 for (size_t k = 1; k <= 40; k += 9) {
2210 GemmMicrokernelTester()
2211 .mr(1)
2212 .nr(8)
2213 .kr(1)
2214 .sr(1)
2215 .m(1)
2216 .n(n)
2217 .k(k)
2218 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2219 }
2220 }
2221 }
2222
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_strided_cn)2223 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_cn) {
2224 TEST_REQUIRES_ARM_NEON_FMA;
2225 for (uint32_t n = 9; n < 16; n++) {
2226 for (size_t k = 1; k <= 40; k += 9) {
2227 GemmMicrokernelTester()
2228 .mr(1)
2229 .nr(8)
2230 .kr(1)
2231 .sr(1)
2232 .m(1)
2233 .n(n)
2234 .k(k)
2235 .cn_stride(11)
2236 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2237 }
2238 }
2239 }
2240
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_subtile)2241 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_subtile) {
2242 TEST_REQUIRES_ARM_NEON_FMA;
2243 for (uint32_t n = 9; n < 16; n++) {
2244 for (size_t k = 1; k <= 40; k += 9) {
2245 for (uint32_t m = 1; m <= 1; m++) {
2246 GemmMicrokernelTester()
2247 .mr(1)
2248 .nr(8)
2249 .kr(1)
2250 .sr(1)
2251 .m(m)
2252 .n(n)
2253 .k(k)
2254 .iterations(1)
2255 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2256 }
2257 }
2258 }
2259 }
2260
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8)2261 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8) {
2262 TEST_REQUIRES_ARM_NEON_FMA;
2263 for (uint32_t n = 16; n <= 24; n += 8) {
2264 for (size_t k = 1; k <= 40; k += 9) {
2265 GemmMicrokernelTester()
2266 .mr(1)
2267 .nr(8)
2268 .kr(1)
2269 .sr(1)
2270 .m(1)
2271 .n(n)
2272 .k(k)
2273 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2274 }
2275 }
2276 }
2277
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_strided_cn)2278 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_cn) {
2279 TEST_REQUIRES_ARM_NEON_FMA;
2280 for (uint32_t n = 16; n <= 24; n += 8) {
2281 for (size_t k = 1; k <= 40; k += 9) {
2282 GemmMicrokernelTester()
2283 .mr(1)
2284 .nr(8)
2285 .kr(1)
2286 .sr(1)
2287 .m(1)
2288 .n(n)
2289 .k(k)
2290 .cn_stride(11)
2291 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2292 }
2293 }
2294 }
2295
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_subtile)2296 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_subtile) {
2297 TEST_REQUIRES_ARM_NEON_FMA;
2298 for (uint32_t n = 16; n <= 24; n += 8) {
2299 for (size_t k = 1; k <= 40; k += 9) {
2300 for (uint32_t m = 1; m <= 1; m++) {
2301 GemmMicrokernelTester()
2302 .mr(1)
2303 .nr(8)
2304 .kr(1)
2305 .sr(1)
2306 .m(m)
2307 .n(n)
2308 .k(k)
2309 .iterations(1)
2310 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2311 }
2312 }
2313 }
2314 }
2315
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,small_kernel)2316 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel) {
2317 TEST_REQUIRES_ARM_NEON_FMA;
2318 for (size_t k = 1; k <= 40; k += 9) {
2319 GemmMicrokernelTester()
2320 .mr(1)
2321 .nr(8)
2322 .kr(1)
2323 .sr(1)
2324 .m(1)
2325 .n(8)
2326 .k(k)
2327 .ks(3)
2328 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2329 }
2330 }
2331
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,small_kernel_subtile)2332 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel_subtile) {
2333 TEST_REQUIRES_ARM_NEON_FMA;
2334 for (size_t k = 1; k <= 40; k += 9) {
2335 for (uint32_t n = 1; n <= 8; n++) {
2336 for (uint32_t m = 1; m <= 1; m++) {
2337 GemmMicrokernelTester()
2338 .mr(1)
2339 .nr(8)
2340 .kr(1)
2341 .sr(1)
2342 .m(m)
2343 .n(n)
2344 .k(k)
2345 .ks(3)
2346 .iterations(1)
2347 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2348 }
2349 }
2350 }
2351 }
2352
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_small_kernel)2353 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_small_kernel) {
2354 TEST_REQUIRES_ARM_NEON_FMA;
2355 for (uint32_t n = 9; n < 16; n++) {
2356 for (size_t k = 1; k <= 40; k += 9) {
2357 GemmMicrokernelTester()
2358 .mr(1)
2359 .nr(8)
2360 .kr(1)
2361 .sr(1)
2362 .m(1)
2363 .n(n)
2364 .k(k)
2365 .ks(3)
2366 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2367 }
2368 }
2369 }
2370
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_small_kernel)2371 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_small_kernel) {
2372 TEST_REQUIRES_ARM_NEON_FMA;
2373 for (uint32_t n = 16; n <= 24; n += 8) {
2374 for (size_t k = 1; k <= 40; k += 9) {
2375 GemmMicrokernelTester()
2376 .mr(1)
2377 .nr(8)
2378 .kr(1)
2379 .sr(1)
2380 .m(1)
2381 .n(n)
2382 .k(k)
2383 .ks(3)
2384 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2385 }
2386 }
2387 }
2388
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,strided_cm_subtile)2389 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm_subtile) {
2390 TEST_REQUIRES_ARM_NEON_FMA;
2391 for (size_t k = 1; k <= 40; k += 9) {
2392 for (uint32_t n = 1; n <= 8; n++) {
2393 for (uint32_t m = 1; m <= 1; m++) {
2394 GemmMicrokernelTester()
2395 .mr(1)
2396 .nr(8)
2397 .kr(1)
2398 .sr(1)
2399 .m(m)
2400 .n(n)
2401 .k(k)
2402 .cm_stride(11)
2403 .iterations(1)
2404 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2405 }
2406 }
2407 }
2408 }
2409
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,a_offset)2410 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, a_offset) {
2411 TEST_REQUIRES_ARM_NEON_FMA;
2412 for (size_t k = 1; k <= 40; k += 9) {
2413 GemmMicrokernelTester()
2414 .mr(1)
2415 .nr(8)
2416 .kr(1)
2417 .sr(1)
2418 .m(1)
2419 .n(8)
2420 .k(k)
2421 .ks(3)
2422 .a_offset(43)
2423 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2424 }
2425 }
2426
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,zero)2427 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, zero) {
2428 TEST_REQUIRES_ARM_NEON_FMA;
2429 for (size_t k = 1; k <= 40; k += 9) {
2430 for (uint32_t mz = 0; mz < 1; mz++) {
2431 GemmMicrokernelTester()
2432 .mr(1)
2433 .nr(8)
2434 .kr(1)
2435 .sr(1)
2436 .m(1)
2437 .n(8)
2438 .k(k)
2439 .ks(3)
2440 .a_offset(43)
2441 .zero_index(mz)
2442 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2443 }
2444 }
2445 }
2446
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,qmin)2447 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, qmin) {
2448 TEST_REQUIRES_ARM_NEON_FMA;
2449 GemmMicrokernelTester()
2450 .mr(1)
2451 .nr(8)
2452 .kr(1)
2453 .sr(1)
2454 .m(1)
2455 .n(8)
2456 .k(8)
2457 .qmin(128)
2458 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2459 }
2460
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,qmax)2461 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, qmax) {
2462 TEST_REQUIRES_ARM_NEON_FMA;
2463 GemmMicrokernelTester()
2464 .mr(1)
2465 .nr(8)
2466 .kr(1)
2467 .sr(1)
2468 .m(1)
2469 .n(8)
2470 .k(8)
2471 .qmax(128)
2472 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2473 }
2474
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75,strided_cm)2475 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm) {
2476 TEST_REQUIRES_ARM_NEON_FMA;
2477 GemmMicrokernelTester()
2478 .mr(1)
2479 .nr(8)
2480 .kr(1)
2481 .sr(1)
2482 .m(1)
2483 .n(8)
2484 .k(8)
2485 .cm_stride(11)
2486 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2487 }
2488 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2489
2490
2491 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_eq_8)2492 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_eq_8) {
2493 TEST_REQUIRES_ARM_NEON_FMA;
2494 GemmMicrokernelTester()
2495 .mr(1)
2496 .nr(8)
2497 .kr(1)
2498 .sr(1)
2499 .m(1)
2500 .n(8)
2501 .k(8)
2502 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2503 }
2504
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,strided_cn)2505 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, strided_cn) {
2506 TEST_REQUIRES_ARM_NEON_FMA;
2507 GemmMicrokernelTester()
2508 .mr(1)
2509 .nr(8)
2510 .kr(1)
2511 .sr(1)
2512 .m(1)
2513 .n(8)
2514 .k(8)
2515 .cn_stride(11)
2516 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2517 }
2518
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_eq_8_subtile)2519 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_eq_8_subtile) {
2520 TEST_REQUIRES_ARM_NEON_FMA;
2521 for (uint32_t n = 1; n <= 8; n++) {
2522 for (uint32_t m = 1; m <= 1; m++) {
2523 GemmMicrokernelTester()
2524 .mr(1)
2525 .nr(8)
2526 .kr(1)
2527 .sr(1)
2528 .m(m)
2529 .n(n)
2530 .k(8)
2531 .iterations(1)
2532 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2533 }
2534 }
2535 }
2536
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_eq_8_subtile_m)2537 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_eq_8_subtile_m) {
2538 TEST_REQUIRES_ARM_NEON_FMA;
2539 for (uint32_t m = 1; m <= 1; m++) {
2540 GemmMicrokernelTester()
2541 .mr(1)
2542 .nr(8)
2543 .kr(1)
2544 .sr(1)
2545 .m(m)
2546 .n(8)
2547 .k(8)
2548 .iterations(1)
2549 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2550 }
2551 }
2552
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_eq_8_subtile_n)2553 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_eq_8_subtile_n) {
2554 TEST_REQUIRES_ARM_NEON_FMA;
2555 for (uint32_t n = 1; n <= 8; n++) {
2556 GemmMicrokernelTester()
2557 .mr(1)
2558 .nr(8)
2559 .kr(1)
2560 .sr(1)
2561 .m(1)
2562 .n(n)
2563 .k(8)
2564 .iterations(1)
2565 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2566 }
2567 }
2568
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_eq_16)2569 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_eq_16) {
2570 TEST_REQUIRES_ARM_NEON_FMA;
2571 GemmMicrokernelTester()
2572 .mr(1)
2573 .nr(8)
2574 .kr(1)
2575 .sr(1)
2576 .m(1)
2577 .n(8)
2578 .k(16)
2579 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2580 }
2581
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_eq_16_subtile)2582 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_eq_16_subtile) {
2583 TEST_REQUIRES_ARM_NEON_FMA;
2584 for (uint32_t n = 1; n <= 8; n++) {
2585 for (uint32_t m = 1; m <= 1; m++) {
2586 GemmMicrokernelTester()
2587 .mr(1)
2588 .nr(8)
2589 .kr(1)
2590 .sr(1)
2591 .m(m)
2592 .n(n)
2593 .k(16)
2594 .iterations(1)
2595 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2596 }
2597 }
2598 }
2599
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_lt_16)2600 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_lt_16) {
2601 TEST_REQUIRES_ARM_NEON_FMA;
2602 for (size_t k = 1; k < 16; k++) {
2603 GemmMicrokernelTester()
2604 .mr(1)
2605 .nr(8)
2606 .kr(1)
2607 .sr(1)
2608 .m(1)
2609 .n(8)
2610 .k(k)
2611 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2612 }
2613 }
2614
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_lt_16_subtile)2615 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_lt_16_subtile) {
2616 TEST_REQUIRES_ARM_NEON_FMA;
2617 for (size_t k = 1; k < 16; k++) {
2618 for (uint32_t n = 1; n <= 8; n++) {
2619 for (uint32_t m = 1; m <= 1; m++) {
2620 GemmMicrokernelTester()
2621 .mr(1)
2622 .nr(8)
2623 .kr(1)
2624 .sr(1)
2625 .m(m)
2626 .n(n)
2627 .k(k)
2628 .iterations(1)
2629 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2630 }
2631 }
2632 }
2633 }
2634
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_gt_16)2635 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_gt_16) {
2636 TEST_REQUIRES_ARM_NEON_FMA;
2637 for (size_t k = 17; k < 32; k++) {
2638 GemmMicrokernelTester()
2639 .mr(1)
2640 .nr(8)
2641 .kr(1)
2642 .sr(1)
2643 .m(1)
2644 .n(8)
2645 .k(k)
2646 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2647 }
2648 }
2649
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_gt_16_subtile)2650 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_gt_16_subtile) {
2651 TEST_REQUIRES_ARM_NEON_FMA;
2652 for (size_t k = 17; k < 32; k++) {
2653 for (uint32_t n = 1; n <= 8; n++) {
2654 for (uint32_t m = 1; m <= 1; m++) {
2655 GemmMicrokernelTester()
2656 .mr(1)
2657 .nr(8)
2658 .kr(1)
2659 .sr(1)
2660 .m(m)
2661 .n(n)
2662 .k(k)
2663 .iterations(1)
2664 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2665 }
2666 }
2667 }
2668 }
2669
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_div_8)2670 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_div_8) {
2671 TEST_REQUIRES_ARM_NEON_FMA;
2672 for (size_t k = 24; k <= 80; k += 8) {
2673 GemmMicrokernelTester()
2674 .mr(1)
2675 .nr(8)
2676 .kr(1)
2677 .sr(1)
2678 .m(1)
2679 .n(8)
2680 .k(k)
2681 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2682 }
2683 }
2684
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_div_8_subtile)2685 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_div_8_subtile) {
2686 TEST_REQUIRES_ARM_NEON_FMA;
2687 for (size_t k = 24; k <= 80; k += 8) {
2688 for (uint32_t n = 1; n <= 8; n++) {
2689 for (uint32_t m = 1; m <= 1; m++) {
2690 GemmMicrokernelTester()
2691 .mr(1)
2692 .nr(8)
2693 .kr(1)
2694 .sr(1)
2695 .m(m)
2696 .n(n)
2697 .k(k)
2698 .iterations(1)
2699 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2700 }
2701 }
2702 }
2703 }
2704
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,n_gt_8)2705 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, n_gt_8) {
2706 TEST_REQUIRES_ARM_NEON_FMA;
2707 for (uint32_t n = 9; n < 16; n++) {
2708 for (size_t k = 1; k <= 40; k += 9) {
2709 GemmMicrokernelTester()
2710 .mr(1)
2711 .nr(8)
2712 .kr(1)
2713 .sr(1)
2714 .m(1)
2715 .n(n)
2716 .k(k)
2717 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2718 }
2719 }
2720 }
2721
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,n_gt_8_strided_cn)2722 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
2723 TEST_REQUIRES_ARM_NEON_FMA;
2724 for (uint32_t n = 9; n < 16; n++) {
2725 for (size_t k = 1; k <= 40; k += 9) {
2726 GemmMicrokernelTester()
2727 .mr(1)
2728 .nr(8)
2729 .kr(1)
2730 .sr(1)
2731 .m(1)
2732 .n(n)
2733 .k(k)
2734 .cn_stride(11)
2735 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2736 }
2737 }
2738 }
2739
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,n_gt_8_subtile)2740 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, n_gt_8_subtile) {
2741 TEST_REQUIRES_ARM_NEON_FMA;
2742 for (uint32_t n = 9; n < 16; n++) {
2743 for (size_t k = 1; k <= 40; k += 9) {
2744 for (uint32_t m = 1; m <= 1; m++) {
2745 GemmMicrokernelTester()
2746 .mr(1)
2747 .nr(8)
2748 .kr(1)
2749 .sr(1)
2750 .m(m)
2751 .n(n)
2752 .k(k)
2753 .iterations(1)
2754 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2755 }
2756 }
2757 }
2758 }
2759
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,n_div_8)2760 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, n_div_8) {
2761 TEST_REQUIRES_ARM_NEON_FMA;
2762 for (uint32_t n = 16; n <= 24; n += 8) {
2763 for (size_t k = 1; k <= 40; k += 9) {
2764 GemmMicrokernelTester()
2765 .mr(1)
2766 .nr(8)
2767 .kr(1)
2768 .sr(1)
2769 .m(1)
2770 .n(n)
2771 .k(k)
2772 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2773 }
2774 }
2775 }
2776
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,n_div_8_strided_cn)2777 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, n_div_8_strided_cn) {
2778 TEST_REQUIRES_ARM_NEON_FMA;
2779 for (uint32_t n = 16; n <= 24; n += 8) {
2780 for (size_t k = 1; k <= 40; k += 9) {
2781 GemmMicrokernelTester()
2782 .mr(1)
2783 .nr(8)
2784 .kr(1)
2785 .sr(1)
2786 .m(1)
2787 .n(n)
2788 .k(k)
2789 .cn_stride(11)
2790 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2791 }
2792 }
2793 }
2794
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,n_div_8_subtile)2795 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, n_div_8_subtile) {
2796 TEST_REQUIRES_ARM_NEON_FMA;
2797 for (uint32_t n = 16; n <= 24; n += 8) {
2798 for (size_t k = 1; k <= 40; k += 9) {
2799 for (uint32_t m = 1; m <= 1; m++) {
2800 GemmMicrokernelTester()
2801 .mr(1)
2802 .nr(8)
2803 .kr(1)
2804 .sr(1)
2805 .m(m)
2806 .n(n)
2807 .k(k)
2808 .iterations(1)
2809 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2810 }
2811 }
2812 }
2813 }
2814
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,small_kernel)2815 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, small_kernel) {
2816 TEST_REQUIRES_ARM_NEON_FMA;
2817 for (size_t k = 1; k <= 40; k += 9) {
2818 GemmMicrokernelTester()
2819 .mr(1)
2820 .nr(8)
2821 .kr(1)
2822 .sr(1)
2823 .m(1)
2824 .n(8)
2825 .k(k)
2826 .ks(3)
2827 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2828 }
2829 }
2830
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,small_kernel_subtile)2831 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, small_kernel_subtile) {
2832 TEST_REQUIRES_ARM_NEON_FMA;
2833 for (size_t k = 1; k <= 40; k += 9) {
2834 for (uint32_t n = 1; n <= 8; n++) {
2835 for (uint32_t m = 1; m <= 1; m++) {
2836 GemmMicrokernelTester()
2837 .mr(1)
2838 .nr(8)
2839 .kr(1)
2840 .sr(1)
2841 .m(m)
2842 .n(n)
2843 .k(k)
2844 .ks(3)
2845 .iterations(1)
2846 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2847 }
2848 }
2849 }
2850 }
2851
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,n_gt_8_small_kernel)2852 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, n_gt_8_small_kernel) {
2853 TEST_REQUIRES_ARM_NEON_FMA;
2854 for (uint32_t n = 9; n < 16; n++) {
2855 for (size_t k = 1; k <= 40; k += 9) {
2856 GemmMicrokernelTester()
2857 .mr(1)
2858 .nr(8)
2859 .kr(1)
2860 .sr(1)
2861 .m(1)
2862 .n(n)
2863 .k(k)
2864 .ks(3)
2865 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2866 }
2867 }
2868 }
2869
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,n_div_8_small_kernel)2870 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, n_div_8_small_kernel) {
2871 TEST_REQUIRES_ARM_NEON_FMA;
2872 for (uint32_t n = 16; n <= 24; n += 8) {
2873 for (size_t k = 1; k <= 40; k += 9) {
2874 GemmMicrokernelTester()
2875 .mr(1)
2876 .nr(8)
2877 .kr(1)
2878 .sr(1)
2879 .m(1)
2880 .n(n)
2881 .k(k)
2882 .ks(3)
2883 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2884 }
2885 }
2886 }
2887
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,strided_cm_subtile)2888 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, strided_cm_subtile) {
2889 TEST_REQUIRES_ARM_NEON_FMA;
2890 for (size_t k = 1; k <= 40; k += 9) {
2891 for (uint32_t n = 1; n <= 8; n++) {
2892 for (uint32_t m = 1; m <= 1; m++) {
2893 GemmMicrokernelTester()
2894 .mr(1)
2895 .nr(8)
2896 .kr(1)
2897 .sr(1)
2898 .m(m)
2899 .n(n)
2900 .k(k)
2901 .cm_stride(11)
2902 .iterations(1)
2903 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2904 }
2905 }
2906 }
2907 }
2908
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,a_offset)2909 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, a_offset) {
2910 TEST_REQUIRES_ARM_NEON_FMA;
2911 for (size_t k = 1; k <= 40; k += 9) {
2912 GemmMicrokernelTester()
2913 .mr(1)
2914 .nr(8)
2915 .kr(1)
2916 .sr(1)
2917 .m(1)
2918 .n(8)
2919 .k(k)
2920 .ks(3)
2921 .a_offset(43)
2922 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2923 }
2924 }
2925
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,zero)2926 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, zero) {
2927 TEST_REQUIRES_ARM_NEON_FMA;
2928 for (size_t k = 1; k <= 40; k += 9) {
2929 for (uint32_t mz = 0; mz < 1; mz++) {
2930 GemmMicrokernelTester()
2931 .mr(1)
2932 .nr(8)
2933 .kr(1)
2934 .sr(1)
2935 .m(1)
2936 .n(8)
2937 .k(k)
2938 .ks(3)
2939 .a_offset(43)
2940 .zero_index(mz)
2941 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2942 }
2943 }
2944 }
2945
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,qmin)2946 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, qmin) {
2947 TEST_REQUIRES_ARM_NEON_FMA;
2948 GemmMicrokernelTester()
2949 .mr(1)
2950 .nr(8)
2951 .kr(1)
2952 .sr(1)
2953 .m(1)
2954 .n(8)
2955 .k(8)
2956 .qmin(128)
2957 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2958 }
2959
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,qmax)2960 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, qmax) {
2961 TEST_REQUIRES_ARM_NEON_FMA;
2962 GemmMicrokernelTester()
2963 .mr(1)
2964 .nr(8)
2965 .kr(1)
2966 .sr(1)
2967 .m(1)
2968 .n(8)
2969 .k(8)
2970 .qmax(128)
2971 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2972 }
2973
TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,strided_cm)2974 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, strided_cm) {
2975 TEST_REQUIRES_ARM_NEON_FMA;
2976 GemmMicrokernelTester()
2977 .mr(1)
2978 .nr(8)
2979 .kr(1)
2980 .sr(1)
2981 .m(1)
2982 .n(8)
2983 .k(8)
2984 .cm_stride(11)
2985 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
2986 }
2987 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2988
2989
2990 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,k_eq_2)2991 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, k_eq_2) {
2992 TEST_REQUIRES_ARM_NEON_FMA;
2993 GemmMicrokernelTester()
2994 .mr(4)
2995 .nr(2)
2996 .kr(1)
2997 .sr(1)
2998 .m(4)
2999 .n(2)
3000 .k(2)
3001 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3002 }
3003
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,strided_cn)3004 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, strided_cn) {
3005 TEST_REQUIRES_ARM_NEON_FMA;
3006 GemmMicrokernelTester()
3007 .mr(4)
3008 .nr(2)
3009 .kr(1)
3010 .sr(1)
3011 .m(4)
3012 .n(2)
3013 .k(2)
3014 .cn_stride(5)
3015 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3016 }
3017
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,k_eq_2_subtile)3018 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, k_eq_2_subtile) {
3019 TEST_REQUIRES_ARM_NEON_FMA;
3020 for (uint32_t n = 1; n <= 2; n++) {
3021 for (uint32_t m = 1; m <= 4; m++) {
3022 GemmMicrokernelTester()
3023 .mr(4)
3024 .nr(2)
3025 .kr(1)
3026 .sr(1)
3027 .m(m)
3028 .n(n)
3029 .k(2)
3030 .iterations(1)
3031 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3032 }
3033 }
3034 }
3035
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,k_eq_2_subtile_m)3036 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, k_eq_2_subtile_m) {
3037 TEST_REQUIRES_ARM_NEON_FMA;
3038 for (uint32_t m = 1; m <= 4; m++) {
3039 GemmMicrokernelTester()
3040 .mr(4)
3041 .nr(2)
3042 .kr(1)
3043 .sr(1)
3044 .m(m)
3045 .n(2)
3046 .k(2)
3047 .iterations(1)
3048 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3049 }
3050 }
3051
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,k_eq_2_subtile_n)3052 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, k_eq_2_subtile_n) {
3053 TEST_REQUIRES_ARM_NEON_FMA;
3054 for (uint32_t n = 1; n <= 2; n++) {
3055 GemmMicrokernelTester()
3056 .mr(4)
3057 .nr(2)
3058 .kr(1)
3059 .sr(1)
3060 .m(4)
3061 .n(n)
3062 .k(2)
3063 .iterations(1)
3064 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3065 }
3066 }
3067
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,k_lt_2)3068 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, k_lt_2) {
3069 TEST_REQUIRES_ARM_NEON_FMA;
3070 for (size_t k = 1; k < 2; k++) {
3071 GemmMicrokernelTester()
3072 .mr(4)
3073 .nr(2)
3074 .kr(1)
3075 .sr(1)
3076 .m(4)
3077 .n(2)
3078 .k(k)
3079 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3080 }
3081 }
3082
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,k_lt_2_subtile)3083 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, k_lt_2_subtile) {
3084 TEST_REQUIRES_ARM_NEON_FMA;
3085 for (size_t k = 1; k < 2; k++) {
3086 for (uint32_t n = 1; n <= 2; n++) {
3087 for (uint32_t m = 1; m <= 4; m++) {
3088 GemmMicrokernelTester()
3089 .mr(4)
3090 .nr(2)
3091 .kr(1)
3092 .sr(1)
3093 .m(m)
3094 .n(n)
3095 .k(k)
3096 .iterations(1)
3097 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3098 }
3099 }
3100 }
3101 }
3102
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,k_gt_2)3103 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, k_gt_2) {
3104 TEST_REQUIRES_ARM_NEON_FMA;
3105 for (size_t k = 3; k < 4; k++) {
3106 GemmMicrokernelTester()
3107 .mr(4)
3108 .nr(2)
3109 .kr(1)
3110 .sr(1)
3111 .m(4)
3112 .n(2)
3113 .k(k)
3114 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3115 }
3116 }
3117
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,k_gt_2_subtile)3118 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, k_gt_2_subtile) {
3119 TEST_REQUIRES_ARM_NEON_FMA;
3120 for (size_t k = 3; k < 4; k++) {
3121 for (uint32_t n = 1; n <= 2; n++) {
3122 for (uint32_t m = 1; m <= 4; m++) {
3123 GemmMicrokernelTester()
3124 .mr(4)
3125 .nr(2)
3126 .kr(1)
3127 .sr(1)
3128 .m(m)
3129 .n(n)
3130 .k(k)
3131 .iterations(1)
3132 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3133 }
3134 }
3135 }
3136 }
3137
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,k_div_2)3138 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, k_div_2) {
3139 TEST_REQUIRES_ARM_NEON_FMA;
3140 for (size_t k = 4; k <= 20; k += 2) {
3141 GemmMicrokernelTester()
3142 .mr(4)
3143 .nr(2)
3144 .kr(1)
3145 .sr(1)
3146 .m(4)
3147 .n(2)
3148 .k(k)
3149 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3150 }
3151 }
3152
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,k_div_2_subtile)3153 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, k_div_2_subtile) {
3154 TEST_REQUIRES_ARM_NEON_FMA;
3155 for (size_t k = 4; k <= 20; k += 2) {
3156 for (uint32_t n = 1; n <= 2; n++) {
3157 for (uint32_t m = 1; m <= 4; m++) {
3158 GemmMicrokernelTester()
3159 .mr(4)
3160 .nr(2)
3161 .kr(1)
3162 .sr(1)
3163 .m(m)
3164 .n(n)
3165 .k(k)
3166 .iterations(1)
3167 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3168 }
3169 }
3170 }
3171 }
3172
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,n_gt_2)3173 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, n_gt_2) {
3174 TEST_REQUIRES_ARM_NEON_FMA;
3175 for (uint32_t n = 3; n < 4; n++) {
3176 for (size_t k = 1; k <= 10; k += 3) {
3177 GemmMicrokernelTester()
3178 .mr(4)
3179 .nr(2)
3180 .kr(1)
3181 .sr(1)
3182 .m(4)
3183 .n(n)
3184 .k(k)
3185 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3186 }
3187 }
3188 }
3189
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,n_gt_2_strided_cn)3190 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, n_gt_2_strided_cn) {
3191 TEST_REQUIRES_ARM_NEON_FMA;
3192 for (uint32_t n = 3; n < 4; n++) {
3193 for (size_t k = 1; k <= 10; k += 3) {
3194 GemmMicrokernelTester()
3195 .mr(4)
3196 .nr(2)
3197 .kr(1)
3198 .sr(1)
3199 .m(4)
3200 .n(n)
3201 .k(k)
3202 .cn_stride(5)
3203 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3204 }
3205 }
3206 }
3207
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,n_gt_2_subtile)3208 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, n_gt_2_subtile) {
3209 TEST_REQUIRES_ARM_NEON_FMA;
3210 for (uint32_t n = 3; n < 4; n++) {
3211 for (size_t k = 1; k <= 10; k += 3) {
3212 for (uint32_t m = 1; m <= 4; m++) {
3213 GemmMicrokernelTester()
3214 .mr(4)
3215 .nr(2)
3216 .kr(1)
3217 .sr(1)
3218 .m(m)
3219 .n(n)
3220 .k(k)
3221 .iterations(1)
3222 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3223 }
3224 }
3225 }
3226 }
3227
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,n_div_2)3228 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, n_div_2) {
3229 TEST_REQUIRES_ARM_NEON_FMA;
3230 for (uint32_t n = 4; n <= 6; n += 2) {
3231 for (size_t k = 1; k <= 10; k += 3) {
3232 GemmMicrokernelTester()
3233 .mr(4)
3234 .nr(2)
3235 .kr(1)
3236 .sr(1)
3237 .m(4)
3238 .n(n)
3239 .k(k)
3240 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3241 }
3242 }
3243 }
3244
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,n_div_2_strided_cn)3245 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, n_div_2_strided_cn) {
3246 TEST_REQUIRES_ARM_NEON_FMA;
3247 for (uint32_t n = 4; n <= 6; n += 2) {
3248 for (size_t k = 1; k <= 10; k += 3) {
3249 GemmMicrokernelTester()
3250 .mr(4)
3251 .nr(2)
3252 .kr(1)
3253 .sr(1)
3254 .m(4)
3255 .n(n)
3256 .k(k)
3257 .cn_stride(5)
3258 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3259 }
3260 }
3261 }
3262
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,n_div_2_subtile)3263 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, n_div_2_subtile) {
3264 TEST_REQUIRES_ARM_NEON_FMA;
3265 for (uint32_t n = 4; n <= 6; n += 2) {
3266 for (size_t k = 1; k <= 10; k += 3) {
3267 for (uint32_t m = 1; m <= 4; m++) {
3268 GemmMicrokernelTester()
3269 .mr(4)
3270 .nr(2)
3271 .kr(1)
3272 .sr(1)
3273 .m(m)
3274 .n(n)
3275 .k(k)
3276 .iterations(1)
3277 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3278 }
3279 }
3280 }
3281 }
3282
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,small_kernel)3283 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, small_kernel) {
3284 TEST_REQUIRES_ARM_NEON_FMA;
3285 for (size_t k = 1; k <= 10; k += 3) {
3286 GemmMicrokernelTester()
3287 .mr(4)
3288 .nr(2)
3289 .kr(1)
3290 .sr(1)
3291 .m(4)
3292 .n(2)
3293 .k(k)
3294 .ks(3)
3295 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3296 }
3297 }
3298
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,small_kernel_subtile)3299 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, small_kernel_subtile) {
3300 TEST_REQUIRES_ARM_NEON_FMA;
3301 for (size_t k = 1; k <= 10; k += 3) {
3302 for (uint32_t n = 1; n <= 2; n++) {
3303 for (uint32_t m = 1; m <= 4; m++) {
3304 GemmMicrokernelTester()
3305 .mr(4)
3306 .nr(2)
3307 .kr(1)
3308 .sr(1)
3309 .m(m)
3310 .n(n)
3311 .k(k)
3312 .ks(3)
3313 .iterations(1)
3314 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3315 }
3316 }
3317 }
3318 }
3319
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,n_gt_2_small_kernel)3320 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, n_gt_2_small_kernel) {
3321 TEST_REQUIRES_ARM_NEON_FMA;
3322 for (uint32_t n = 3; n < 4; n++) {
3323 for (size_t k = 1; k <= 10; k += 3) {
3324 GemmMicrokernelTester()
3325 .mr(4)
3326 .nr(2)
3327 .kr(1)
3328 .sr(1)
3329 .m(4)
3330 .n(n)
3331 .k(k)
3332 .ks(3)
3333 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3334 }
3335 }
3336 }
3337
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,n_div_2_small_kernel)3338 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, n_div_2_small_kernel) {
3339 TEST_REQUIRES_ARM_NEON_FMA;
3340 for (uint32_t n = 4; n <= 6; n += 2) {
3341 for (size_t k = 1; k <= 10; k += 3) {
3342 GemmMicrokernelTester()
3343 .mr(4)
3344 .nr(2)
3345 .kr(1)
3346 .sr(1)
3347 .m(4)
3348 .n(n)
3349 .k(k)
3350 .ks(3)
3351 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3352 }
3353 }
3354 }
3355
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,strided_cm_subtile)3356 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, strided_cm_subtile) {
3357 TEST_REQUIRES_ARM_NEON_FMA;
3358 for (size_t k = 1; k <= 10; k += 3) {
3359 for (uint32_t n = 1; n <= 2; n++) {
3360 for (uint32_t m = 1; m <= 4; m++) {
3361 GemmMicrokernelTester()
3362 .mr(4)
3363 .nr(2)
3364 .kr(1)
3365 .sr(1)
3366 .m(m)
3367 .n(n)
3368 .k(k)
3369 .cm_stride(5)
3370 .iterations(1)
3371 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3372 }
3373 }
3374 }
3375 }
3376
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,a_offset)3377 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, a_offset) {
3378 TEST_REQUIRES_ARM_NEON_FMA;
3379 for (size_t k = 1; k <= 10; k += 3) {
3380 GemmMicrokernelTester()
3381 .mr(4)
3382 .nr(2)
3383 .kr(1)
3384 .sr(1)
3385 .m(4)
3386 .n(2)
3387 .k(k)
3388 .ks(3)
3389 .a_offset(43)
3390 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3391 }
3392 }
3393
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,zero)3394 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, zero) {
3395 TEST_REQUIRES_ARM_NEON_FMA;
3396 for (size_t k = 1; k <= 10; k += 3) {
3397 for (uint32_t mz = 0; mz < 4; mz++) {
3398 GemmMicrokernelTester()
3399 .mr(4)
3400 .nr(2)
3401 .kr(1)
3402 .sr(1)
3403 .m(4)
3404 .n(2)
3405 .k(k)
3406 .ks(3)
3407 .a_offset(43)
3408 .zero_index(mz)
3409 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3410 }
3411 }
3412 }
3413
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,qmin)3414 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, qmin) {
3415 TEST_REQUIRES_ARM_NEON_FMA;
3416 GemmMicrokernelTester()
3417 .mr(4)
3418 .nr(2)
3419 .kr(1)
3420 .sr(1)
3421 .m(4)
3422 .n(2)
3423 .k(2)
3424 .qmin(128)
3425 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3426 }
3427
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,qmax)3428 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, qmax) {
3429 TEST_REQUIRES_ARM_NEON_FMA;
3430 GemmMicrokernelTester()
3431 .mr(4)
3432 .nr(2)
3433 .kr(1)
3434 .sr(1)
3435 .m(4)
3436 .n(2)
3437 .k(2)
3438 .qmax(128)
3439 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3440 }
3441
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64,strided_cm)3442 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_LD64, strided_cm) {
3443 TEST_REQUIRES_ARM_NEON_FMA;
3444 GemmMicrokernelTester()
3445 .mr(4)
3446 .nr(2)
3447 .kr(1)
3448 .sr(1)
3449 .m(4)
3450 .n(2)
3451 .k(2)
3452 .cm_stride(5)
3453 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
3454 }
3455 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3456
3457
3458 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8)3459 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
3460 TEST_REQUIRES_ARM_NEON_FMA;
3461 GemmMicrokernelTester()
3462 .mr(4)
3463 .nr(2)
3464 .kr(1)
3465 .sr(1)
3466 .m(4)
3467 .n(2)
3468 .k(8)
3469 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3470 }
3471
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cn)3472 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
3473 TEST_REQUIRES_ARM_NEON_FMA;
3474 GemmMicrokernelTester()
3475 .mr(4)
3476 .nr(2)
3477 .kr(1)
3478 .sr(1)
3479 .m(4)
3480 .n(2)
3481 .k(8)
3482 .cn_stride(5)
3483 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3484 }
3485
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile)3486 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
3487 TEST_REQUIRES_ARM_NEON_FMA;
3488 for (uint32_t n = 1; n <= 2; n++) {
3489 for (uint32_t m = 1; m <= 4; m++) {
3490 GemmMicrokernelTester()
3491 .mr(4)
3492 .nr(2)
3493 .kr(1)
3494 .sr(1)
3495 .m(m)
3496 .n(n)
3497 .k(8)
3498 .iterations(1)
3499 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3500 }
3501 }
3502 }
3503
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_m)3504 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
3505 TEST_REQUIRES_ARM_NEON_FMA;
3506 for (uint32_t m = 1; m <= 4; m++) {
3507 GemmMicrokernelTester()
3508 .mr(4)
3509 .nr(2)
3510 .kr(1)
3511 .sr(1)
3512 .m(m)
3513 .n(2)
3514 .k(8)
3515 .iterations(1)
3516 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3517 }
3518 }
3519
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_n)3520 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
3521 TEST_REQUIRES_ARM_NEON_FMA;
3522 for (uint32_t n = 1; n <= 2; n++) {
3523 GemmMicrokernelTester()
3524 .mr(4)
3525 .nr(2)
3526 .kr(1)
3527 .sr(1)
3528 .m(4)
3529 .n(n)
3530 .k(8)
3531 .iterations(1)
3532 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3533 }
3534 }
3535
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16)3536 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
3537 TEST_REQUIRES_ARM_NEON_FMA;
3538 GemmMicrokernelTester()
3539 .mr(4)
3540 .nr(2)
3541 .kr(1)
3542 .sr(1)
3543 .m(4)
3544 .n(2)
3545 .k(16)
3546 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3547 }
3548
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16_subtile)3549 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
3550 TEST_REQUIRES_ARM_NEON_FMA;
3551 for (uint32_t n = 1; n <= 2; n++) {
3552 for (uint32_t m = 1; m <= 4; m++) {
3553 GemmMicrokernelTester()
3554 .mr(4)
3555 .nr(2)
3556 .kr(1)
3557 .sr(1)
3558 .m(m)
3559 .n(n)
3560 .k(16)
3561 .iterations(1)
3562 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3563 }
3564 }
3565 }
3566
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16)3567 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
3568 TEST_REQUIRES_ARM_NEON_FMA;
3569 for (size_t k = 1; k < 16; k++) {
3570 GemmMicrokernelTester()
3571 .mr(4)
3572 .nr(2)
3573 .kr(1)
3574 .sr(1)
3575 .m(4)
3576 .n(2)
3577 .k(k)
3578 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3579 }
3580 }
3581
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16_subtile)3582 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
3583 TEST_REQUIRES_ARM_NEON_FMA;
3584 for (size_t k = 1; k < 16; k++) {
3585 for (uint32_t n = 1; n <= 2; n++) {
3586 for (uint32_t m = 1; m <= 4; m++) {
3587 GemmMicrokernelTester()
3588 .mr(4)
3589 .nr(2)
3590 .kr(1)
3591 .sr(1)
3592 .m(m)
3593 .n(n)
3594 .k(k)
3595 .iterations(1)
3596 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3597 }
3598 }
3599 }
3600 }
3601
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16)3602 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
3603 TEST_REQUIRES_ARM_NEON_FMA;
3604 for (size_t k = 17; k < 32; k++) {
3605 GemmMicrokernelTester()
3606 .mr(4)
3607 .nr(2)
3608 .kr(1)
3609 .sr(1)
3610 .m(4)
3611 .n(2)
3612 .k(k)
3613 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3614 }
3615 }
3616
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16_subtile)3617 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
3618 TEST_REQUIRES_ARM_NEON_FMA;
3619 for (size_t k = 17; k < 32; k++) {
3620 for (uint32_t n = 1; n <= 2; n++) {
3621 for (uint32_t m = 1; m <= 4; m++) {
3622 GemmMicrokernelTester()
3623 .mr(4)
3624 .nr(2)
3625 .kr(1)
3626 .sr(1)
3627 .m(m)
3628 .n(n)
3629 .k(k)
3630 .iterations(1)
3631 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3632 }
3633 }
3634 }
3635 }
3636
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8)3637 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
3638 TEST_REQUIRES_ARM_NEON_FMA;
3639 for (size_t k = 24; k <= 80; k += 8) {
3640 GemmMicrokernelTester()
3641 .mr(4)
3642 .nr(2)
3643 .kr(1)
3644 .sr(1)
3645 .m(4)
3646 .n(2)
3647 .k(k)
3648 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3649 }
3650 }
3651
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8_subtile)3652 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
3653 TEST_REQUIRES_ARM_NEON_FMA;
3654 for (size_t k = 24; k <= 80; k += 8) {
3655 for (uint32_t n = 1; n <= 2; n++) {
3656 for (uint32_t m = 1; m <= 4; m++) {
3657 GemmMicrokernelTester()
3658 .mr(4)
3659 .nr(2)
3660 .kr(1)
3661 .sr(1)
3662 .m(m)
3663 .n(n)
3664 .k(k)
3665 .iterations(1)
3666 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3667 }
3668 }
3669 }
3670 }
3671
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_2)3672 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_2) {
3673 TEST_REQUIRES_ARM_NEON_FMA;
3674 for (uint32_t n = 3; n < 4; n++) {
3675 for (size_t k = 1; k <= 40; k += 9) {
3676 GemmMicrokernelTester()
3677 .mr(4)
3678 .nr(2)
3679 .kr(1)
3680 .sr(1)
3681 .m(4)
3682 .n(n)
3683 .k(k)
3684 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3685 }
3686 }
3687 }
3688
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_2_strided_cn)3689 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_2_strided_cn) {
3690 TEST_REQUIRES_ARM_NEON_FMA;
3691 for (uint32_t n = 3; n < 4; n++) {
3692 for (size_t k = 1; k <= 40; k += 9) {
3693 GemmMicrokernelTester()
3694 .mr(4)
3695 .nr(2)
3696 .kr(1)
3697 .sr(1)
3698 .m(4)
3699 .n(n)
3700 .k(k)
3701 .cn_stride(5)
3702 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3703 }
3704 }
3705 }
3706
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_2_subtile)3707 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_2_subtile) {
3708 TEST_REQUIRES_ARM_NEON_FMA;
3709 for (uint32_t n = 3; n < 4; n++) {
3710 for (size_t k = 1; k <= 40; k += 9) {
3711 for (uint32_t m = 1; m <= 4; m++) {
3712 GemmMicrokernelTester()
3713 .mr(4)
3714 .nr(2)
3715 .kr(1)
3716 .sr(1)
3717 .m(m)
3718 .n(n)
3719 .k(k)
3720 .iterations(1)
3721 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3722 }
3723 }
3724 }
3725 }
3726
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_2)3727 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_2) {
3728 TEST_REQUIRES_ARM_NEON_FMA;
3729 for (uint32_t n = 4; n <= 6; n += 2) {
3730 for (size_t k = 1; k <= 40; k += 9) {
3731 GemmMicrokernelTester()
3732 .mr(4)
3733 .nr(2)
3734 .kr(1)
3735 .sr(1)
3736 .m(4)
3737 .n(n)
3738 .k(k)
3739 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3740 }
3741 }
3742 }
3743
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_2_strided_cn)3744 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_2_strided_cn) {
3745 TEST_REQUIRES_ARM_NEON_FMA;
3746 for (uint32_t n = 4; n <= 6; n += 2) {
3747 for (size_t k = 1; k <= 40; k += 9) {
3748 GemmMicrokernelTester()
3749 .mr(4)
3750 .nr(2)
3751 .kr(1)
3752 .sr(1)
3753 .m(4)
3754 .n(n)
3755 .k(k)
3756 .cn_stride(5)
3757 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3758 }
3759 }
3760 }
3761
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_2_subtile)3762 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_2_subtile) {
3763 TEST_REQUIRES_ARM_NEON_FMA;
3764 for (uint32_t n = 4; n <= 6; n += 2) {
3765 for (size_t k = 1; k <= 40; k += 9) {
3766 for (uint32_t m = 1; m <= 4; m++) {
3767 GemmMicrokernelTester()
3768 .mr(4)
3769 .nr(2)
3770 .kr(1)
3771 .sr(1)
3772 .m(m)
3773 .n(n)
3774 .k(k)
3775 .iterations(1)
3776 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3777 }
3778 }
3779 }
3780 }
3781
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,small_kernel)3782 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel) {
3783 TEST_REQUIRES_ARM_NEON_FMA;
3784 for (size_t k = 1; k <= 40; k += 9) {
3785 GemmMicrokernelTester()
3786 .mr(4)
3787 .nr(2)
3788 .kr(1)
3789 .sr(1)
3790 .m(4)
3791 .n(2)
3792 .k(k)
3793 .ks(3)
3794 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3795 }
3796 }
3797
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,small_kernel_subtile)3798 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel_subtile) {
3799 TEST_REQUIRES_ARM_NEON_FMA;
3800 for (size_t k = 1; k <= 40; k += 9) {
3801 for (uint32_t n = 1; n <= 2; n++) {
3802 for (uint32_t m = 1; m <= 4; m++) {
3803 GemmMicrokernelTester()
3804 .mr(4)
3805 .nr(2)
3806 .kr(1)
3807 .sr(1)
3808 .m(m)
3809 .n(n)
3810 .k(k)
3811 .ks(3)
3812 .iterations(1)
3813 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3814 }
3815 }
3816 }
3817 }
3818
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_2_small_kernel)3819 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_2_small_kernel) {
3820 TEST_REQUIRES_ARM_NEON_FMA;
3821 for (uint32_t n = 3; n < 4; n++) {
3822 for (size_t k = 1; k <= 40; k += 9) {
3823 GemmMicrokernelTester()
3824 .mr(4)
3825 .nr(2)
3826 .kr(1)
3827 .sr(1)
3828 .m(4)
3829 .n(n)
3830 .k(k)
3831 .ks(3)
3832 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3833 }
3834 }
3835 }
3836
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_2_small_kernel)3837 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_2_small_kernel) {
3838 TEST_REQUIRES_ARM_NEON_FMA;
3839 for (uint32_t n = 4; n <= 6; n += 2) {
3840 for (size_t k = 1; k <= 40; k += 9) {
3841 GemmMicrokernelTester()
3842 .mr(4)
3843 .nr(2)
3844 .kr(1)
3845 .sr(1)
3846 .m(4)
3847 .n(n)
3848 .k(k)
3849 .ks(3)
3850 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3851 }
3852 }
3853 }
3854
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cm_subtile)3855 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
3856 TEST_REQUIRES_ARM_NEON_FMA;
3857 for (size_t k = 1; k <= 40; k += 9) {
3858 for (uint32_t n = 1; n <= 2; n++) {
3859 for (uint32_t m = 1; m <= 4; m++) {
3860 GemmMicrokernelTester()
3861 .mr(4)
3862 .nr(2)
3863 .kr(1)
3864 .sr(1)
3865 .m(m)
3866 .n(n)
3867 .k(k)
3868 .cm_stride(5)
3869 .iterations(1)
3870 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3871 }
3872 }
3873 }
3874 }
3875
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,a_offset)3876 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, a_offset) {
3877 TEST_REQUIRES_ARM_NEON_FMA;
3878 for (size_t k = 1; k <= 40; k += 9) {
3879 GemmMicrokernelTester()
3880 .mr(4)
3881 .nr(2)
3882 .kr(1)
3883 .sr(1)
3884 .m(4)
3885 .n(2)
3886 .k(k)
3887 .ks(3)
3888 .a_offset(163)
3889 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3890 }
3891 }
3892
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,zero)3893 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, zero) {
3894 TEST_REQUIRES_ARM_NEON_FMA;
3895 for (size_t k = 1; k <= 40; k += 9) {
3896 for (uint32_t mz = 0; mz < 4; mz++) {
3897 GemmMicrokernelTester()
3898 .mr(4)
3899 .nr(2)
3900 .kr(1)
3901 .sr(1)
3902 .m(4)
3903 .n(2)
3904 .k(k)
3905 .ks(3)
3906 .a_offset(163)
3907 .zero_index(mz)
3908 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3909 }
3910 }
3911 }
3912
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,qmin)3913 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
3914 TEST_REQUIRES_ARM_NEON_FMA;
3915 GemmMicrokernelTester()
3916 .mr(4)
3917 .nr(2)
3918 .kr(1)
3919 .sr(1)
3920 .m(4)
3921 .n(2)
3922 .k(8)
3923 .qmin(128)
3924 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3925 }
3926
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,qmax)3927 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
3928 TEST_REQUIRES_ARM_NEON_FMA;
3929 GemmMicrokernelTester()
3930 .mr(4)
3931 .nr(2)
3932 .kr(1)
3933 .sr(1)
3934 .m(4)
3935 .n(2)
3936 .k(8)
3937 .qmax(128)
3938 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3939 }
3940
TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cm)3941 TEST(F32_IGEMM_MINMAX_4X2__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
3942 TEST_REQUIRES_ARM_NEON_FMA;
3943 GemmMicrokernelTester()
3944 .mr(4)
3945 .nr(2)
3946 .kr(1)
3947 .sr(1)
3948 .m(4)
3949 .n(2)
3950 .k(8)
3951 .cm_stride(5)
3952 .Test(xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3953 }
3954 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3955
3956
3957 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_4)3958 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4) {
3959 TEST_REQUIRES_ARM_NEON_FMA;
3960 GemmMicrokernelTester()
3961 .mr(4)
3962 .nr(8)
3963 .kr(1)
3964 .sr(1)
3965 .m(4)
3966 .n(8)
3967 .k(4)
3968 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
3969 }
3970
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,strided_cn)3971 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, strided_cn) {
3972 TEST_REQUIRES_ARM_NEON_FMA;
3973 GemmMicrokernelTester()
3974 .mr(4)
3975 .nr(8)
3976 .kr(1)
3977 .sr(1)
3978 .m(4)
3979 .n(8)
3980 .k(4)
3981 .cn_stride(11)
3982 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
3983 }
3984
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_subtile)3985 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile) {
3986 TEST_REQUIRES_ARM_NEON_FMA;
3987 for (uint32_t n = 1; n <= 8; n++) {
3988 for (uint32_t m = 1; m <= 4; m++) {
3989 GemmMicrokernelTester()
3990 .mr(4)
3991 .nr(8)
3992 .kr(1)
3993 .sr(1)
3994 .m(m)
3995 .n(n)
3996 .k(4)
3997 .iterations(1)
3998 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
3999 }
4000 }
4001 }
4002
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_subtile_m)4003 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_m) {
4004 TEST_REQUIRES_ARM_NEON_FMA;
4005 for (uint32_t m = 1; m <= 4; m++) {
4006 GemmMicrokernelTester()
4007 .mr(4)
4008 .nr(8)
4009 .kr(1)
4010 .sr(1)
4011 .m(m)
4012 .n(8)
4013 .k(4)
4014 .iterations(1)
4015 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4016 }
4017 }
4018
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_subtile_n)4019 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_n) {
4020 TEST_REQUIRES_ARM_NEON_FMA;
4021 for (uint32_t n = 1; n <= 8; n++) {
4022 GemmMicrokernelTester()
4023 .mr(4)
4024 .nr(8)
4025 .kr(1)
4026 .sr(1)
4027 .m(4)
4028 .n(n)
4029 .k(4)
4030 .iterations(1)
4031 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4032 }
4033 }
4034
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_8)4035 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8) {
4036 TEST_REQUIRES_ARM_NEON_FMA;
4037 GemmMicrokernelTester()
4038 .mr(4)
4039 .nr(8)
4040 .kr(1)
4041 .sr(1)
4042 .m(4)
4043 .n(8)
4044 .k(8)
4045 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4046 }
4047
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_8_subtile)4048 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile) {
4049 TEST_REQUIRES_ARM_NEON_FMA;
4050 for (uint32_t n = 1; n <= 8; n++) {
4051 for (uint32_t m = 1; m <= 4; m++) {
4052 GemmMicrokernelTester()
4053 .mr(4)
4054 .nr(8)
4055 .kr(1)
4056 .sr(1)
4057 .m(m)
4058 .n(n)
4059 .k(8)
4060 .iterations(1)
4061 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4062 }
4063 }
4064 }
4065
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_lt_8)4066 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_8) {
4067 TEST_REQUIRES_ARM_NEON_FMA;
4068 for (size_t k = 1; k < 8; k++) {
4069 GemmMicrokernelTester()
4070 .mr(4)
4071 .nr(8)
4072 .kr(1)
4073 .sr(1)
4074 .m(4)
4075 .n(8)
4076 .k(k)
4077 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4078 }
4079 }
4080
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_lt_8_subtile)4081 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_8_subtile) {
4082 TEST_REQUIRES_ARM_NEON_FMA;
4083 for (size_t k = 1; k < 8; k++) {
4084 for (uint32_t n = 1; n <= 8; n++) {
4085 for (uint32_t m = 1; m <= 4; m++) {
4086 GemmMicrokernelTester()
4087 .mr(4)
4088 .nr(8)
4089 .kr(1)
4090 .sr(1)
4091 .m(m)
4092 .n(n)
4093 .k(k)
4094 .iterations(1)
4095 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4096 }
4097 }
4098 }
4099 }
4100
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_gt_8)4101 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_8) {
4102 TEST_REQUIRES_ARM_NEON_FMA;
4103 for (size_t k = 9; k < 16; k++) {
4104 GemmMicrokernelTester()
4105 .mr(4)
4106 .nr(8)
4107 .kr(1)
4108 .sr(1)
4109 .m(4)
4110 .n(8)
4111 .k(k)
4112 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4113 }
4114 }
4115
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_gt_8_subtile)4116 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_8_subtile) {
4117 TEST_REQUIRES_ARM_NEON_FMA;
4118 for (size_t k = 9; k < 16; k++) {
4119 for (uint32_t n = 1; n <= 8; n++) {
4120 for (uint32_t m = 1; m <= 4; m++) {
4121 GemmMicrokernelTester()
4122 .mr(4)
4123 .nr(8)
4124 .kr(1)
4125 .sr(1)
4126 .m(m)
4127 .n(n)
4128 .k(k)
4129 .iterations(1)
4130 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4131 }
4132 }
4133 }
4134 }
4135
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_div_4)4136 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_div_4) {
4137 TEST_REQUIRES_ARM_NEON_FMA;
4138 for (size_t k = 12; k <= 40; k += 4) {
4139 GemmMicrokernelTester()
4140 .mr(4)
4141 .nr(8)
4142 .kr(1)
4143 .sr(1)
4144 .m(4)
4145 .n(8)
4146 .k(k)
4147 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4148 }
4149 }
4150
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,k_div_4_subtile)4151 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_div_4_subtile) {
4152 TEST_REQUIRES_ARM_NEON_FMA;
4153 for (size_t k = 12; k <= 40; k += 4) {
4154 for (uint32_t n = 1; n <= 8; n++) {
4155 for (uint32_t m = 1; m <= 4; m++) {
4156 GemmMicrokernelTester()
4157 .mr(4)
4158 .nr(8)
4159 .kr(1)
4160 .sr(1)
4161 .m(m)
4162 .n(n)
4163 .k(k)
4164 .iterations(1)
4165 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4166 }
4167 }
4168 }
4169 }
4170
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,n_gt_8)4171 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8) {
4172 TEST_REQUIRES_ARM_NEON_FMA;
4173 for (uint32_t n = 9; n < 16; n++) {
4174 for (size_t k = 1; k <= 20; k += 5) {
4175 GemmMicrokernelTester()
4176 .mr(4)
4177 .nr(8)
4178 .kr(1)
4179 .sr(1)
4180 .m(4)
4181 .n(n)
4182 .k(k)
4183 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4184 }
4185 }
4186 }
4187
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,n_gt_8_strided_cn)4188 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_strided_cn) {
4189 TEST_REQUIRES_ARM_NEON_FMA;
4190 for (uint32_t n = 9; n < 16; n++) {
4191 for (size_t k = 1; k <= 20; k += 5) {
4192 GemmMicrokernelTester()
4193 .mr(4)
4194 .nr(8)
4195 .kr(1)
4196 .sr(1)
4197 .m(4)
4198 .n(n)
4199 .k(k)
4200 .cn_stride(11)
4201 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4202 }
4203 }
4204 }
4205
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,n_gt_8_subtile)4206 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_subtile) {
4207 TEST_REQUIRES_ARM_NEON_FMA;
4208 for (uint32_t n = 9; n < 16; n++) {
4209 for (size_t k = 1; k <= 20; k += 5) {
4210 for (uint32_t m = 1; m <= 4; m++) {
4211 GemmMicrokernelTester()
4212 .mr(4)
4213 .nr(8)
4214 .kr(1)
4215 .sr(1)
4216 .m(m)
4217 .n(n)
4218 .k(k)
4219 .iterations(1)
4220 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4221 }
4222 }
4223 }
4224 }
4225
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,n_div_8)4226 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8) {
4227 TEST_REQUIRES_ARM_NEON_FMA;
4228 for (uint32_t n = 16; n <= 24; n += 8) {
4229 for (size_t k = 1; k <= 20; k += 5) {
4230 GemmMicrokernelTester()
4231 .mr(4)
4232 .nr(8)
4233 .kr(1)
4234 .sr(1)
4235 .m(4)
4236 .n(n)
4237 .k(k)
4238 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4239 }
4240 }
4241 }
4242
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,n_div_8_strided_cn)4243 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_strided_cn) {
4244 TEST_REQUIRES_ARM_NEON_FMA;
4245 for (uint32_t n = 16; n <= 24; n += 8) {
4246 for (size_t k = 1; k <= 20; k += 5) {
4247 GemmMicrokernelTester()
4248 .mr(4)
4249 .nr(8)
4250 .kr(1)
4251 .sr(1)
4252 .m(4)
4253 .n(n)
4254 .k(k)
4255 .cn_stride(11)
4256 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4257 }
4258 }
4259 }
4260
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,n_div_8_subtile)4261 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_subtile) {
4262 TEST_REQUIRES_ARM_NEON_FMA;
4263 for (uint32_t n = 16; n <= 24; n += 8) {
4264 for (size_t k = 1; k <= 20; k += 5) {
4265 for (uint32_t m = 1; m <= 4; m++) {
4266 GemmMicrokernelTester()
4267 .mr(4)
4268 .nr(8)
4269 .kr(1)
4270 .sr(1)
4271 .m(m)
4272 .n(n)
4273 .k(k)
4274 .iterations(1)
4275 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4276 }
4277 }
4278 }
4279 }
4280
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,small_kernel)4281 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, small_kernel) {
4282 TEST_REQUIRES_ARM_NEON_FMA;
4283 for (size_t k = 1; k <= 20; k += 5) {
4284 GemmMicrokernelTester()
4285 .mr(4)
4286 .nr(8)
4287 .kr(1)
4288 .sr(1)
4289 .m(4)
4290 .n(8)
4291 .k(k)
4292 .ks(3)
4293 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4294 }
4295 }
4296
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,small_kernel_subtile)4297 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, small_kernel_subtile) {
4298 TEST_REQUIRES_ARM_NEON_FMA;
4299 for (size_t k = 1; k <= 20; k += 5) {
4300 for (uint32_t n = 1; n <= 8; n++) {
4301 for (uint32_t m = 1; m <= 4; m++) {
4302 GemmMicrokernelTester()
4303 .mr(4)
4304 .nr(8)
4305 .kr(1)
4306 .sr(1)
4307 .m(m)
4308 .n(n)
4309 .k(k)
4310 .ks(3)
4311 .iterations(1)
4312 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4313 }
4314 }
4315 }
4316 }
4317
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,n_gt_8_small_kernel)4318 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_small_kernel) {
4319 TEST_REQUIRES_ARM_NEON_FMA;
4320 for (uint32_t n = 9; n < 16; n++) {
4321 for (size_t k = 1; k <= 20; k += 5) {
4322 GemmMicrokernelTester()
4323 .mr(4)
4324 .nr(8)
4325 .kr(1)
4326 .sr(1)
4327 .m(4)
4328 .n(n)
4329 .k(k)
4330 .ks(3)
4331 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4332 }
4333 }
4334 }
4335
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,n_div_8_small_kernel)4336 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_small_kernel) {
4337 TEST_REQUIRES_ARM_NEON_FMA;
4338 for (uint32_t n = 16; n <= 24; n += 8) {
4339 for (size_t k = 1; k <= 20; k += 5) {
4340 GemmMicrokernelTester()
4341 .mr(4)
4342 .nr(8)
4343 .kr(1)
4344 .sr(1)
4345 .m(4)
4346 .n(n)
4347 .k(k)
4348 .ks(3)
4349 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4350 }
4351 }
4352 }
4353
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,strided_cm_subtile)4354 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm_subtile) {
4355 TEST_REQUIRES_ARM_NEON_FMA;
4356 for (size_t k = 1; k <= 20; k += 5) {
4357 for (uint32_t n = 1; n <= 8; n++) {
4358 for (uint32_t m = 1; m <= 4; m++) {
4359 GemmMicrokernelTester()
4360 .mr(4)
4361 .nr(8)
4362 .kr(1)
4363 .sr(1)
4364 .m(m)
4365 .n(n)
4366 .k(k)
4367 .cm_stride(11)
4368 .iterations(1)
4369 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4370 }
4371 }
4372 }
4373 }
4374
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,a_offset)4375 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, a_offset) {
4376 TEST_REQUIRES_ARM_NEON_FMA;
4377 for (size_t k = 1; k <= 20; k += 5) {
4378 GemmMicrokernelTester()
4379 .mr(4)
4380 .nr(8)
4381 .kr(1)
4382 .sr(1)
4383 .m(4)
4384 .n(8)
4385 .k(k)
4386 .ks(3)
4387 .a_offset(83)
4388 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4389 }
4390 }
4391
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,zero)4392 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, zero) {
4393 TEST_REQUIRES_ARM_NEON_FMA;
4394 for (size_t k = 1; k <= 20; k += 5) {
4395 for (uint32_t mz = 0; mz < 4; mz++) {
4396 GemmMicrokernelTester()
4397 .mr(4)
4398 .nr(8)
4399 .kr(1)
4400 .sr(1)
4401 .m(4)
4402 .n(8)
4403 .k(k)
4404 .ks(3)
4405 .a_offset(83)
4406 .zero_index(mz)
4407 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4408 }
4409 }
4410 }
4411
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,qmin)4412 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, qmin) {
4413 TEST_REQUIRES_ARM_NEON_FMA;
4414 GemmMicrokernelTester()
4415 .mr(4)
4416 .nr(8)
4417 .kr(1)
4418 .sr(1)
4419 .m(4)
4420 .n(8)
4421 .k(4)
4422 .qmin(128)
4423 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4424 }
4425
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,qmax)4426 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, qmax) {
4427 TEST_REQUIRES_ARM_NEON_FMA;
4428 GemmMicrokernelTester()
4429 .mr(4)
4430 .nr(8)
4431 .kr(1)
4432 .sr(1)
4433 .m(4)
4434 .n(8)
4435 .k(4)
4436 .qmax(128)
4437 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4438 }
4439
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53,strided_cm)4440 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm) {
4441 TEST_REQUIRES_ARM_NEON_FMA;
4442 GemmMicrokernelTester()
4443 .mr(4)
4444 .nr(8)
4445 .kr(1)
4446 .sr(1)
4447 .m(4)
4448 .n(8)
4449 .k(4)
4450 .cm_stride(11)
4451 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
4452 }
4453 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4454
4455
4456 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_eq_4)4457 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_eq_4) {
4458 TEST_REQUIRES_ARM_NEON_FMA;
4459 GemmMicrokernelTester()
4460 .mr(4)
4461 .nr(8)
4462 .kr(1)
4463 .sr(1)
4464 .m(4)
4465 .n(8)
4466 .k(4)
4467 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4468 }
4469
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,strided_cn)4470 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, strided_cn) {
4471 TEST_REQUIRES_ARM_NEON_FMA;
4472 GemmMicrokernelTester()
4473 .mr(4)
4474 .nr(8)
4475 .kr(1)
4476 .sr(1)
4477 .m(4)
4478 .n(8)
4479 .k(4)
4480 .cn_stride(11)
4481 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4482 }
4483
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_eq_4_subtile)4484 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_eq_4_subtile) {
4485 TEST_REQUIRES_ARM_NEON_FMA;
4486 for (uint32_t n = 1; n <= 8; n++) {
4487 for (uint32_t m = 1; m <= 4; m++) {
4488 GemmMicrokernelTester()
4489 .mr(4)
4490 .nr(8)
4491 .kr(1)
4492 .sr(1)
4493 .m(m)
4494 .n(n)
4495 .k(4)
4496 .iterations(1)
4497 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4498 }
4499 }
4500 }
4501
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_eq_4_subtile_m)4502 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_eq_4_subtile_m) {
4503 TEST_REQUIRES_ARM_NEON_FMA;
4504 for (uint32_t m = 1; m <= 4; m++) {
4505 GemmMicrokernelTester()
4506 .mr(4)
4507 .nr(8)
4508 .kr(1)
4509 .sr(1)
4510 .m(m)
4511 .n(8)
4512 .k(4)
4513 .iterations(1)
4514 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4515 }
4516 }
4517
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_eq_4_subtile_n)4518 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_eq_4_subtile_n) {
4519 TEST_REQUIRES_ARM_NEON_FMA;
4520 for (uint32_t n = 1; n <= 8; n++) {
4521 GemmMicrokernelTester()
4522 .mr(4)
4523 .nr(8)
4524 .kr(1)
4525 .sr(1)
4526 .m(4)
4527 .n(n)
4528 .k(4)
4529 .iterations(1)
4530 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4531 }
4532 }
4533
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_eq_8)4534 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_eq_8) {
4535 TEST_REQUIRES_ARM_NEON_FMA;
4536 GemmMicrokernelTester()
4537 .mr(4)
4538 .nr(8)
4539 .kr(1)
4540 .sr(1)
4541 .m(4)
4542 .n(8)
4543 .k(8)
4544 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4545 }
4546
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_eq_8_subtile)4547 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_eq_8_subtile) {
4548 TEST_REQUIRES_ARM_NEON_FMA;
4549 for (uint32_t n = 1; n <= 8; n++) {
4550 for (uint32_t m = 1; m <= 4; m++) {
4551 GemmMicrokernelTester()
4552 .mr(4)
4553 .nr(8)
4554 .kr(1)
4555 .sr(1)
4556 .m(m)
4557 .n(n)
4558 .k(8)
4559 .iterations(1)
4560 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4561 }
4562 }
4563 }
4564
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_lt_8)4565 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_lt_8) {
4566 TEST_REQUIRES_ARM_NEON_FMA;
4567 for (size_t k = 1; k < 8; k++) {
4568 GemmMicrokernelTester()
4569 .mr(4)
4570 .nr(8)
4571 .kr(1)
4572 .sr(1)
4573 .m(4)
4574 .n(8)
4575 .k(k)
4576 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4577 }
4578 }
4579
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_lt_8_subtile)4580 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_lt_8_subtile) {
4581 TEST_REQUIRES_ARM_NEON_FMA;
4582 for (size_t k = 1; k < 8; k++) {
4583 for (uint32_t n = 1; n <= 8; n++) {
4584 for (uint32_t m = 1; m <= 4; m++) {
4585 GemmMicrokernelTester()
4586 .mr(4)
4587 .nr(8)
4588 .kr(1)
4589 .sr(1)
4590 .m(m)
4591 .n(n)
4592 .k(k)
4593 .iterations(1)
4594 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4595 }
4596 }
4597 }
4598 }
4599
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_gt_8)4600 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_gt_8) {
4601 TEST_REQUIRES_ARM_NEON_FMA;
4602 for (size_t k = 9; k < 16; k++) {
4603 GemmMicrokernelTester()
4604 .mr(4)
4605 .nr(8)
4606 .kr(1)
4607 .sr(1)
4608 .m(4)
4609 .n(8)
4610 .k(k)
4611 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4612 }
4613 }
4614
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_gt_8_subtile)4615 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_gt_8_subtile) {
4616 TEST_REQUIRES_ARM_NEON_FMA;
4617 for (size_t k = 9; k < 16; k++) {
4618 for (uint32_t n = 1; n <= 8; n++) {
4619 for (uint32_t m = 1; m <= 4; m++) {
4620 GemmMicrokernelTester()
4621 .mr(4)
4622 .nr(8)
4623 .kr(1)
4624 .sr(1)
4625 .m(m)
4626 .n(n)
4627 .k(k)
4628 .iterations(1)
4629 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4630 }
4631 }
4632 }
4633 }
4634
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_div_4)4635 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_div_4) {
4636 TEST_REQUIRES_ARM_NEON_FMA;
4637 for (size_t k = 12; k <= 40; k += 4) {
4638 GemmMicrokernelTester()
4639 .mr(4)
4640 .nr(8)
4641 .kr(1)
4642 .sr(1)
4643 .m(4)
4644 .n(8)
4645 .k(k)
4646 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4647 }
4648 }
4649
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,k_div_4_subtile)4650 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, k_div_4_subtile) {
4651 TEST_REQUIRES_ARM_NEON_FMA;
4652 for (size_t k = 12; k <= 40; k += 4) {
4653 for (uint32_t n = 1; n <= 8; n++) {
4654 for (uint32_t m = 1; m <= 4; m++) {
4655 GemmMicrokernelTester()
4656 .mr(4)
4657 .nr(8)
4658 .kr(1)
4659 .sr(1)
4660 .m(m)
4661 .n(n)
4662 .k(k)
4663 .iterations(1)
4664 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4665 }
4666 }
4667 }
4668 }
4669
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,n_gt_8)4670 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, n_gt_8) {
4671 TEST_REQUIRES_ARM_NEON_FMA;
4672 for (uint32_t n = 9; n < 16; n++) {
4673 for (size_t k = 1; k <= 20; k += 5) {
4674 GemmMicrokernelTester()
4675 .mr(4)
4676 .nr(8)
4677 .kr(1)
4678 .sr(1)
4679 .m(4)
4680 .n(n)
4681 .k(k)
4682 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4683 }
4684 }
4685 }
4686
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,n_gt_8_strided_cn)4687 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
4688 TEST_REQUIRES_ARM_NEON_FMA;
4689 for (uint32_t n = 9; n < 16; n++) {
4690 for (size_t k = 1; k <= 20; k += 5) {
4691 GemmMicrokernelTester()
4692 .mr(4)
4693 .nr(8)
4694 .kr(1)
4695 .sr(1)
4696 .m(4)
4697 .n(n)
4698 .k(k)
4699 .cn_stride(11)
4700 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4701 }
4702 }
4703 }
4704
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,n_gt_8_subtile)4705 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, n_gt_8_subtile) {
4706 TEST_REQUIRES_ARM_NEON_FMA;
4707 for (uint32_t n = 9; n < 16; n++) {
4708 for (size_t k = 1; k <= 20; k += 5) {
4709 for (uint32_t m = 1; m <= 4; m++) {
4710 GemmMicrokernelTester()
4711 .mr(4)
4712 .nr(8)
4713 .kr(1)
4714 .sr(1)
4715 .m(m)
4716 .n(n)
4717 .k(k)
4718 .iterations(1)
4719 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4720 }
4721 }
4722 }
4723 }
4724
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,n_div_8)4725 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, n_div_8) {
4726 TEST_REQUIRES_ARM_NEON_FMA;
4727 for (uint32_t n = 16; n <= 24; n += 8) {
4728 for (size_t k = 1; k <= 20; k += 5) {
4729 GemmMicrokernelTester()
4730 .mr(4)
4731 .nr(8)
4732 .kr(1)
4733 .sr(1)
4734 .m(4)
4735 .n(n)
4736 .k(k)
4737 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4738 }
4739 }
4740 }
4741
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,n_div_8_strided_cn)4742 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, n_div_8_strided_cn) {
4743 TEST_REQUIRES_ARM_NEON_FMA;
4744 for (uint32_t n = 16; n <= 24; n += 8) {
4745 for (size_t k = 1; k <= 20; k += 5) {
4746 GemmMicrokernelTester()
4747 .mr(4)
4748 .nr(8)
4749 .kr(1)
4750 .sr(1)
4751 .m(4)
4752 .n(n)
4753 .k(k)
4754 .cn_stride(11)
4755 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4756 }
4757 }
4758 }
4759
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,n_div_8_subtile)4760 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, n_div_8_subtile) {
4761 TEST_REQUIRES_ARM_NEON_FMA;
4762 for (uint32_t n = 16; n <= 24; n += 8) {
4763 for (size_t k = 1; k <= 20; k += 5) {
4764 for (uint32_t m = 1; m <= 4; m++) {
4765 GemmMicrokernelTester()
4766 .mr(4)
4767 .nr(8)
4768 .kr(1)
4769 .sr(1)
4770 .m(m)
4771 .n(n)
4772 .k(k)
4773 .iterations(1)
4774 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4775 }
4776 }
4777 }
4778 }
4779
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,small_kernel)4780 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, small_kernel) {
4781 TEST_REQUIRES_ARM_NEON_FMA;
4782 for (size_t k = 1; k <= 20; k += 5) {
4783 GemmMicrokernelTester()
4784 .mr(4)
4785 .nr(8)
4786 .kr(1)
4787 .sr(1)
4788 .m(4)
4789 .n(8)
4790 .k(k)
4791 .ks(3)
4792 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4793 }
4794 }
4795
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,small_kernel_subtile)4796 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, small_kernel_subtile) {
4797 TEST_REQUIRES_ARM_NEON_FMA;
4798 for (size_t k = 1; k <= 20; k += 5) {
4799 for (uint32_t n = 1; n <= 8; n++) {
4800 for (uint32_t m = 1; m <= 4; m++) {
4801 GemmMicrokernelTester()
4802 .mr(4)
4803 .nr(8)
4804 .kr(1)
4805 .sr(1)
4806 .m(m)
4807 .n(n)
4808 .k(k)
4809 .ks(3)
4810 .iterations(1)
4811 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4812 }
4813 }
4814 }
4815 }
4816
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,n_gt_8_small_kernel)4817 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, n_gt_8_small_kernel) {
4818 TEST_REQUIRES_ARM_NEON_FMA;
4819 for (uint32_t n = 9; n < 16; n++) {
4820 for (size_t k = 1; k <= 20; k += 5) {
4821 GemmMicrokernelTester()
4822 .mr(4)
4823 .nr(8)
4824 .kr(1)
4825 .sr(1)
4826 .m(4)
4827 .n(n)
4828 .k(k)
4829 .ks(3)
4830 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4831 }
4832 }
4833 }
4834
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,n_div_8_small_kernel)4835 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, n_div_8_small_kernel) {
4836 TEST_REQUIRES_ARM_NEON_FMA;
4837 for (uint32_t n = 16; n <= 24; n += 8) {
4838 for (size_t k = 1; k <= 20; k += 5) {
4839 GemmMicrokernelTester()
4840 .mr(4)
4841 .nr(8)
4842 .kr(1)
4843 .sr(1)
4844 .m(4)
4845 .n(n)
4846 .k(k)
4847 .ks(3)
4848 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4849 }
4850 }
4851 }
4852
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,strided_cm_subtile)4853 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, strided_cm_subtile) {
4854 TEST_REQUIRES_ARM_NEON_FMA;
4855 for (size_t k = 1; k <= 20; k += 5) {
4856 for (uint32_t n = 1; n <= 8; n++) {
4857 for (uint32_t m = 1; m <= 4; m++) {
4858 GemmMicrokernelTester()
4859 .mr(4)
4860 .nr(8)
4861 .kr(1)
4862 .sr(1)
4863 .m(m)
4864 .n(n)
4865 .k(k)
4866 .cm_stride(11)
4867 .iterations(1)
4868 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4869 }
4870 }
4871 }
4872 }
4873
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,a_offset)4874 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, a_offset) {
4875 TEST_REQUIRES_ARM_NEON_FMA;
4876 for (size_t k = 1; k <= 20; k += 5) {
4877 GemmMicrokernelTester()
4878 .mr(4)
4879 .nr(8)
4880 .kr(1)
4881 .sr(1)
4882 .m(4)
4883 .n(8)
4884 .k(k)
4885 .ks(3)
4886 .a_offset(83)
4887 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4888 }
4889 }
4890
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,zero)4891 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, zero) {
4892 TEST_REQUIRES_ARM_NEON_FMA;
4893 for (size_t k = 1; k <= 20; k += 5) {
4894 for (uint32_t mz = 0; mz < 4; mz++) {
4895 GemmMicrokernelTester()
4896 .mr(4)
4897 .nr(8)
4898 .kr(1)
4899 .sr(1)
4900 .m(4)
4901 .n(8)
4902 .k(k)
4903 .ks(3)
4904 .a_offset(83)
4905 .zero_index(mz)
4906 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4907 }
4908 }
4909 }
4910
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,qmin)4911 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, qmin) {
4912 TEST_REQUIRES_ARM_NEON_FMA;
4913 GemmMicrokernelTester()
4914 .mr(4)
4915 .nr(8)
4916 .kr(1)
4917 .sr(1)
4918 .m(4)
4919 .n(8)
4920 .k(4)
4921 .qmin(128)
4922 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4923 }
4924
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,qmax)4925 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, qmax) {
4926 TEST_REQUIRES_ARM_NEON_FMA;
4927 GemmMicrokernelTester()
4928 .mr(4)
4929 .nr(8)
4930 .kr(1)
4931 .sr(1)
4932 .m(4)
4933 .n(8)
4934 .k(4)
4935 .qmax(128)
4936 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4937 }
4938
TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53,strided_cm)4939 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A53, strided_cm) {
4940 TEST_REQUIRES_ARM_NEON_FMA;
4941 GemmMicrokernelTester()
4942 .mr(4)
4943 .nr(8)
4944 .kr(1)
4945 .sr(1)
4946 .m(4)
4947 .n(8)
4948 .k(4)
4949 .cm_stride(11)
4950 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, xnn_init_f32_minmax_scalar_params);
4951 }
4952 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4953
4954
4955 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8)4956 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
4957 TEST_REQUIRES_ARM_NEON_FMA;
4958 GemmMicrokernelTester()
4959 .mr(5)
4960 .nr(8)
4961 .kr(1)
4962 .sr(1)
4963 .m(5)
4964 .n(8)
4965 .k(8)
4966 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4967 }
4968
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,strided_cn)4969 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, strided_cn) {
4970 TEST_REQUIRES_ARM_NEON_FMA;
4971 GemmMicrokernelTester()
4972 .mr(5)
4973 .nr(8)
4974 .kr(1)
4975 .sr(1)
4976 .m(5)
4977 .n(8)
4978 .k(8)
4979 .cn_stride(11)
4980 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4981 }
4982
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile)4983 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile) {
4984 TEST_REQUIRES_ARM_NEON_FMA;
4985 for (uint32_t n = 1; n <= 8; n++) {
4986 for (uint32_t m = 1; m <= 5; m++) {
4987 GemmMicrokernelTester()
4988 .mr(5)
4989 .nr(8)
4990 .kr(1)
4991 .sr(1)
4992 .m(m)
4993 .n(n)
4994 .k(8)
4995 .iterations(1)
4996 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4997 }
4998 }
4999 }
5000
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile_m)5001 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_m) {
5002 TEST_REQUIRES_ARM_NEON_FMA;
5003 for (uint32_t m = 1; m <= 5; m++) {
5004 GemmMicrokernelTester()
5005 .mr(5)
5006 .nr(8)
5007 .kr(1)
5008 .sr(1)
5009 .m(m)
5010 .n(8)
5011 .k(8)
5012 .iterations(1)
5013 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5014 }
5015 }
5016
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile_n)5017 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_n) {
5018 TEST_REQUIRES_ARM_NEON_FMA;
5019 for (uint32_t n = 1; n <= 8; n++) {
5020 GemmMicrokernelTester()
5021 .mr(5)
5022 .nr(8)
5023 .kr(1)
5024 .sr(1)
5025 .m(5)
5026 .n(n)
5027 .k(8)
5028 .iterations(1)
5029 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5030 }
5031 }
5032
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_16)5033 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16) {
5034 TEST_REQUIRES_ARM_NEON_FMA;
5035 GemmMicrokernelTester()
5036 .mr(5)
5037 .nr(8)
5038 .kr(1)
5039 .sr(1)
5040 .m(5)
5041 .n(8)
5042 .k(16)
5043 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5044 }
5045
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_16_subtile)5046 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_subtile) {
5047 TEST_REQUIRES_ARM_NEON_FMA;
5048 for (uint32_t n = 1; n <= 8; n++) {
5049 for (uint32_t m = 1; m <= 5; m++) {
5050 GemmMicrokernelTester()
5051 .mr(5)
5052 .nr(8)
5053 .kr(1)
5054 .sr(1)
5055 .m(m)
5056 .n(n)
5057 .k(16)
5058 .iterations(1)
5059 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5060 }
5061 }
5062 }
5063
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,k_lt_16)5064 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16) {
5065 TEST_REQUIRES_ARM_NEON_FMA;
5066 for (size_t k = 1; k < 16; k++) {
5067 GemmMicrokernelTester()
5068 .mr(5)
5069 .nr(8)
5070 .kr(1)
5071 .sr(1)
5072 .m(5)
5073 .n(8)
5074 .k(k)
5075 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5076 }
5077 }
5078
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,k_lt_16_subtile)5079 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_subtile) {
5080 TEST_REQUIRES_ARM_NEON_FMA;
5081 for (size_t k = 1; k < 16; k++) {
5082 for (uint32_t n = 1; n <= 8; n++) {
5083 for (uint32_t m = 1; m <= 5; m++) {
5084 GemmMicrokernelTester()
5085 .mr(5)
5086 .nr(8)
5087 .kr(1)
5088 .sr(1)
5089 .m(m)
5090 .n(n)
5091 .k(k)
5092 .iterations(1)
5093 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5094 }
5095 }
5096 }
5097 }
5098
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,k_gt_16)5099 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16) {
5100 TEST_REQUIRES_ARM_NEON_FMA;
5101 for (size_t k = 17; k < 32; k++) {
5102 GemmMicrokernelTester()
5103 .mr(5)
5104 .nr(8)
5105 .kr(1)
5106 .sr(1)
5107 .m(5)
5108 .n(8)
5109 .k(k)
5110 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5111 }
5112 }
5113
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,k_gt_16_subtile)5114 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_subtile) {
5115 TEST_REQUIRES_ARM_NEON_FMA;
5116 for (size_t k = 17; k < 32; k++) {
5117 for (uint32_t n = 1; n <= 8; n++) {
5118 for (uint32_t m = 1; m <= 5; m++) {
5119 GemmMicrokernelTester()
5120 .mr(5)
5121 .nr(8)
5122 .kr(1)
5123 .sr(1)
5124 .m(m)
5125 .n(n)
5126 .k(k)
5127 .iterations(1)
5128 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5129 }
5130 }
5131 }
5132 }
5133
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,k_div_8)5134 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8) {
5135 TEST_REQUIRES_ARM_NEON_FMA;
5136 for (size_t k = 24; k <= 80; k += 8) {
5137 GemmMicrokernelTester()
5138 .mr(5)
5139 .nr(8)
5140 .kr(1)
5141 .sr(1)
5142 .m(5)
5143 .n(8)
5144 .k(k)
5145 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5146 }
5147 }
5148
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,k_div_8_subtile)5149 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_subtile) {
5150 TEST_REQUIRES_ARM_NEON_FMA;
5151 for (size_t k = 24; k <= 80; k += 8) {
5152 for (uint32_t n = 1; n <= 8; n++) {
5153 for (uint32_t m = 1; m <= 5; m++) {
5154 GemmMicrokernelTester()
5155 .mr(5)
5156 .nr(8)
5157 .kr(1)
5158 .sr(1)
5159 .m(m)
5160 .n(n)
5161 .k(k)
5162 .iterations(1)
5163 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5164 }
5165 }
5166 }
5167 }
5168
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8)5169 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8) {
5170 TEST_REQUIRES_ARM_NEON_FMA;
5171 for (uint32_t n = 9; n < 16; n++) {
5172 for (size_t k = 1; k <= 40; k += 9) {
5173 GemmMicrokernelTester()
5174 .mr(5)
5175 .nr(8)
5176 .kr(1)
5177 .sr(1)
5178 .m(5)
5179 .n(n)
5180 .k(k)
5181 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5182 }
5183 }
5184 }
5185
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_strided_cn)5186 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_cn) {
5187 TEST_REQUIRES_ARM_NEON_FMA;
5188 for (uint32_t n = 9; n < 16; n++) {
5189 for (size_t k = 1; k <= 40; k += 9) {
5190 GemmMicrokernelTester()
5191 .mr(5)
5192 .nr(8)
5193 .kr(1)
5194 .sr(1)
5195 .m(5)
5196 .n(n)
5197 .k(k)
5198 .cn_stride(11)
5199 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5200 }
5201 }
5202 }
5203
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_subtile)5204 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_subtile) {
5205 TEST_REQUIRES_ARM_NEON_FMA;
5206 for (uint32_t n = 9; n < 16; n++) {
5207 for (size_t k = 1; k <= 40; k += 9) {
5208 for (uint32_t m = 1; m <= 5; m++) {
5209 GemmMicrokernelTester()
5210 .mr(5)
5211 .nr(8)
5212 .kr(1)
5213 .sr(1)
5214 .m(m)
5215 .n(n)
5216 .k(k)
5217 .iterations(1)
5218 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5219 }
5220 }
5221 }
5222 }
5223
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8)5224 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8) {
5225 TEST_REQUIRES_ARM_NEON_FMA;
5226 for (uint32_t n = 16; n <= 24; n += 8) {
5227 for (size_t k = 1; k <= 40; k += 9) {
5228 GemmMicrokernelTester()
5229 .mr(5)
5230 .nr(8)
5231 .kr(1)
5232 .sr(1)
5233 .m(5)
5234 .n(n)
5235 .k(k)
5236 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5237 }
5238 }
5239 }
5240
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_strided_cn)5241 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_cn) {
5242 TEST_REQUIRES_ARM_NEON_FMA;
5243 for (uint32_t n = 16; n <= 24; n += 8) {
5244 for (size_t k = 1; k <= 40; k += 9) {
5245 GemmMicrokernelTester()
5246 .mr(5)
5247 .nr(8)
5248 .kr(1)
5249 .sr(1)
5250 .m(5)
5251 .n(n)
5252 .k(k)
5253 .cn_stride(11)
5254 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5255 }
5256 }
5257 }
5258
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_subtile)5259 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_subtile) {
5260 TEST_REQUIRES_ARM_NEON_FMA;
5261 for (uint32_t n = 16; n <= 24; n += 8) {
5262 for (size_t k = 1; k <= 40; k += 9) {
5263 for (uint32_t m = 1; m <= 5; m++) {
5264 GemmMicrokernelTester()
5265 .mr(5)
5266 .nr(8)
5267 .kr(1)
5268 .sr(1)
5269 .m(m)
5270 .n(n)
5271 .k(k)
5272 .iterations(1)
5273 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5274 }
5275 }
5276 }
5277 }
5278
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,small_kernel)5279 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel) {
5280 TEST_REQUIRES_ARM_NEON_FMA;
5281 for (size_t k = 1; k <= 40; k += 9) {
5282 GemmMicrokernelTester()
5283 .mr(5)
5284 .nr(8)
5285 .kr(1)
5286 .sr(1)
5287 .m(5)
5288 .n(8)
5289 .k(k)
5290 .ks(3)
5291 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5292 }
5293 }
5294
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,small_kernel_subtile)5295 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel_subtile) {
5296 TEST_REQUIRES_ARM_NEON_FMA;
5297 for (size_t k = 1; k <= 40; k += 9) {
5298 for (uint32_t n = 1; n <= 8; n++) {
5299 for (uint32_t m = 1; m <= 5; m++) {
5300 GemmMicrokernelTester()
5301 .mr(5)
5302 .nr(8)
5303 .kr(1)
5304 .sr(1)
5305 .m(m)
5306 .n(n)
5307 .k(k)
5308 .ks(3)
5309 .iterations(1)
5310 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5311 }
5312 }
5313 }
5314 }
5315
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_small_kernel)5316 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_small_kernel) {
5317 TEST_REQUIRES_ARM_NEON_FMA;
5318 for (uint32_t n = 9; n < 16; n++) {
5319 for (size_t k = 1; k <= 40; k += 9) {
5320 GemmMicrokernelTester()
5321 .mr(5)
5322 .nr(8)
5323 .kr(1)
5324 .sr(1)
5325 .m(5)
5326 .n(n)
5327 .k(k)
5328 .ks(3)
5329 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5330 }
5331 }
5332 }
5333
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_small_kernel)5334 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_small_kernel) {
5335 TEST_REQUIRES_ARM_NEON_FMA;
5336 for (uint32_t n = 16; n <= 24; n += 8) {
5337 for (size_t k = 1; k <= 40; k += 9) {
5338 GemmMicrokernelTester()
5339 .mr(5)
5340 .nr(8)
5341 .kr(1)
5342 .sr(1)
5343 .m(5)
5344 .n(n)
5345 .k(k)
5346 .ks(3)
5347 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5348 }
5349 }
5350 }
5351
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,strided_cm_subtile)5352 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm_subtile) {
5353 TEST_REQUIRES_ARM_NEON_FMA;
5354 for (size_t k = 1; k <= 40; k += 9) {
5355 for (uint32_t n = 1; n <= 8; n++) {
5356 for (uint32_t m = 1; m <= 5; m++) {
5357 GemmMicrokernelTester()
5358 .mr(5)
5359 .nr(8)
5360 .kr(1)
5361 .sr(1)
5362 .m(m)
5363 .n(n)
5364 .k(k)
5365 .cm_stride(11)
5366 .iterations(1)
5367 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5368 }
5369 }
5370 }
5371 }
5372
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,a_offset)5373 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, a_offset) {
5374 TEST_REQUIRES_ARM_NEON_FMA;
5375 for (size_t k = 1; k <= 40; k += 9) {
5376 GemmMicrokernelTester()
5377 .mr(5)
5378 .nr(8)
5379 .kr(1)
5380 .sr(1)
5381 .m(5)
5382 .n(8)
5383 .k(k)
5384 .ks(3)
5385 .a_offset(211)
5386 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5387 }
5388 }
5389
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,zero)5390 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, zero) {
5391 TEST_REQUIRES_ARM_NEON_FMA;
5392 for (size_t k = 1; k <= 40; k += 9) {
5393 for (uint32_t mz = 0; mz < 5; mz++) {
5394 GemmMicrokernelTester()
5395 .mr(5)
5396 .nr(8)
5397 .kr(1)
5398 .sr(1)
5399 .m(5)
5400 .n(8)
5401 .k(k)
5402 .ks(3)
5403 .a_offset(211)
5404 .zero_index(mz)
5405 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5406 }
5407 }
5408 }
5409
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,qmin)5410 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, qmin) {
5411 TEST_REQUIRES_ARM_NEON_FMA;
5412 GemmMicrokernelTester()
5413 .mr(5)
5414 .nr(8)
5415 .kr(1)
5416 .sr(1)
5417 .m(5)
5418 .n(8)
5419 .k(8)
5420 .qmin(128)
5421 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5422 }
5423
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,qmax)5424 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, qmax) {
5425 TEST_REQUIRES_ARM_NEON_FMA;
5426 GemmMicrokernelTester()
5427 .mr(5)
5428 .nr(8)
5429 .kr(1)
5430 .sr(1)
5431 .m(5)
5432 .n(8)
5433 .k(8)
5434 .qmax(128)
5435 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5436 }
5437
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75,strided_cm)5438 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm) {
5439 TEST_REQUIRES_ARM_NEON_FMA;
5440 GemmMicrokernelTester()
5441 .mr(5)
5442 .nr(8)
5443 .kr(1)
5444 .sr(1)
5445 .m(5)
5446 .n(8)
5447 .k(8)
5448 .cm_stride(11)
5449 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5450 }
5451 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
5452
5453
5454 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8)5455 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
5456 TEST_REQUIRES_ARM_NEON_FMA;
5457 GemmMicrokernelTester()
5458 .mr(5)
5459 .nr(8)
5460 .kr(1)
5461 .sr(1)
5462 .m(5)
5463 .n(8)
5464 .k(8)
5465 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5466 }
5467
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cn)5468 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
5469 TEST_REQUIRES_ARM_NEON_FMA;
5470 GemmMicrokernelTester()
5471 .mr(5)
5472 .nr(8)
5473 .kr(1)
5474 .sr(1)
5475 .m(5)
5476 .n(8)
5477 .k(8)
5478 .cn_stride(11)
5479 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5480 }
5481
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile)5482 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
5483 TEST_REQUIRES_ARM_NEON_FMA;
5484 for (uint32_t n = 1; n <= 8; n++) {
5485 for (uint32_t m = 1; m <= 5; m++) {
5486 GemmMicrokernelTester()
5487 .mr(5)
5488 .nr(8)
5489 .kr(1)
5490 .sr(1)
5491 .m(m)
5492 .n(n)
5493 .k(8)
5494 .iterations(1)
5495 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5496 }
5497 }
5498 }
5499
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_m)5500 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
5501 TEST_REQUIRES_ARM_NEON_FMA;
5502 for (uint32_t m = 1; m <= 5; m++) {
5503 GemmMicrokernelTester()
5504 .mr(5)
5505 .nr(8)
5506 .kr(1)
5507 .sr(1)
5508 .m(m)
5509 .n(8)
5510 .k(8)
5511 .iterations(1)
5512 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5513 }
5514 }
5515
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_n)5516 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
5517 TEST_REQUIRES_ARM_NEON_FMA;
5518 for (uint32_t n = 1; n <= 8; n++) {
5519 GemmMicrokernelTester()
5520 .mr(5)
5521 .nr(8)
5522 .kr(1)
5523 .sr(1)
5524 .m(5)
5525 .n(n)
5526 .k(8)
5527 .iterations(1)
5528 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5529 }
5530 }
5531
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16)5532 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
5533 TEST_REQUIRES_ARM_NEON_FMA;
5534 GemmMicrokernelTester()
5535 .mr(5)
5536 .nr(8)
5537 .kr(1)
5538 .sr(1)
5539 .m(5)
5540 .n(8)
5541 .k(16)
5542 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5543 }
5544
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16_subtile)5545 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
5546 TEST_REQUIRES_ARM_NEON_FMA;
5547 for (uint32_t n = 1; n <= 8; n++) {
5548 for (uint32_t m = 1; m <= 5; m++) {
5549 GemmMicrokernelTester()
5550 .mr(5)
5551 .nr(8)
5552 .kr(1)
5553 .sr(1)
5554 .m(m)
5555 .n(n)
5556 .k(16)
5557 .iterations(1)
5558 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5559 }
5560 }
5561 }
5562
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16)5563 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
5564 TEST_REQUIRES_ARM_NEON_FMA;
5565 for (size_t k = 1; k < 16; k++) {
5566 GemmMicrokernelTester()
5567 .mr(5)
5568 .nr(8)
5569 .kr(1)
5570 .sr(1)
5571 .m(5)
5572 .n(8)
5573 .k(k)
5574 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5575 }
5576 }
5577
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16_subtile)5578 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
5579 TEST_REQUIRES_ARM_NEON_FMA;
5580 for (size_t k = 1; k < 16; k++) {
5581 for (uint32_t n = 1; n <= 8; n++) {
5582 for (uint32_t m = 1; m <= 5; m++) {
5583 GemmMicrokernelTester()
5584 .mr(5)
5585 .nr(8)
5586 .kr(1)
5587 .sr(1)
5588 .m(m)
5589 .n(n)
5590 .k(k)
5591 .iterations(1)
5592 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5593 }
5594 }
5595 }
5596 }
5597
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16)5598 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
5599 TEST_REQUIRES_ARM_NEON_FMA;
5600 for (size_t k = 17; k < 32; k++) {
5601 GemmMicrokernelTester()
5602 .mr(5)
5603 .nr(8)
5604 .kr(1)
5605 .sr(1)
5606 .m(5)
5607 .n(8)
5608 .k(k)
5609 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5610 }
5611 }
5612
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16_subtile)5613 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
5614 TEST_REQUIRES_ARM_NEON_FMA;
5615 for (size_t k = 17; k < 32; k++) {
5616 for (uint32_t n = 1; n <= 8; n++) {
5617 for (uint32_t m = 1; m <= 5; m++) {
5618 GemmMicrokernelTester()
5619 .mr(5)
5620 .nr(8)
5621 .kr(1)
5622 .sr(1)
5623 .m(m)
5624 .n(n)
5625 .k(k)
5626 .iterations(1)
5627 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5628 }
5629 }
5630 }
5631 }
5632
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8)5633 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
5634 TEST_REQUIRES_ARM_NEON_FMA;
5635 for (size_t k = 24; k <= 80; k += 8) {
5636 GemmMicrokernelTester()
5637 .mr(5)
5638 .nr(8)
5639 .kr(1)
5640 .sr(1)
5641 .m(5)
5642 .n(8)
5643 .k(k)
5644 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5645 }
5646 }
5647
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8_subtile)5648 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
5649 TEST_REQUIRES_ARM_NEON_FMA;
5650 for (size_t k = 24; k <= 80; k += 8) {
5651 for (uint32_t n = 1; n <= 8; n++) {
5652 for (uint32_t m = 1; m <= 5; m++) {
5653 GemmMicrokernelTester()
5654 .mr(5)
5655 .nr(8)
5656 .kr(1)
5657 .sr(1)
5658 .m(m)
5659 .n(n)
5660 .k(k)
5661 .iterations(1)
5662 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5663 }
5664 }
5665 }
5666 }
5667
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8)5668 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8) {
5669 TEST_REQUIRES_ARM_NEON_FMA;
5670 for (uint32_t n = 9; n < 16; n++) {
5671 for (size_t k = 1; k <= 40; k += 9) {
5672 GemmMicrokernelTester()
5673 .mr(5)
5674 .nr(8)
5675 .kr(1)
5676 .sr(1)
5677 .m(5)
5678 .n(n)
5679 .k(k)
5680 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5681 }
5682 }
5683 }
5684
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_strided_cn)5685 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
5686 TEST_REQUIRES_ARM_NEON_FMA;
5687 for (uint32_t n = 9; n < 16; n++) {
5688 for (size_t k = 1; k <= 40; k += 9) {
5689 GemmMicrokernelTester()
5690 .mr(5)
5691 .nr(8)
5692 .kr(1)
5693 .sr(1)
5694 .m(5)
5695 .n(n)
5696 .k(k)
5697 .cn_stride(11)
5698 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5699 }
5700 }
5701 }
5702
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_subtile)5703 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_subtile) {
5704 TEST_REQUIRES_ARM_NEON_FMA;
5705 for (uint32_t n = 9; n < 16; n++) {
5706 for (size_t k = 1; k <= 40; k += 9) {
5707 for (uint32_t m = 1; m <= 5; m++) {
5708 GemmMicrokernelTester()
5709 .mr(5)
5710 .nr(8)
5711 .kr(1)
5712 .sr(1)
5713 .m(m)
5714 .n(n)
5715 .k(k)
5716 .iterations(1)
5717 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5718 }
5719 }
5720 }
5721 }
5722
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8)5723 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8) {
5724 TEST_REQUIRES_ARM_NEON_FMA;
5725 for (uint32_t n = 16; n <= 24; n += 8) {
5726 for (size_t k = 1; k <= 40; k += 9) {
5727 GemmMicrokernelTester()
5728 .mr(5)
5729 .nr(8)
5730 .kr(1)
5731 .sr(1)
5732 .m(5)
5733 .n(n)
5734 .k(k)
5735 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5736 }
5737 }
5738 }
5739
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_strided_cn)5740 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_cn) {
5741 TEST_REQUIRES_ARM_NEON_FMA;
5742 for (uint32_t n = 16; n <= 24; n += 8) {
5743 for (size_t k = 1; k <= 40; k += 9) {
5744 GemmMicrokernelTester()
5745 .mr(5)
5746 .nr(8)
5747 .kr(1)
5748 .sr(1)
5749 .m(5)
5750 .n(n)
5751 .k(k)
5752 .cn_stride(11)
5753 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5754 }
5755 }
5756 }
5757
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_subtile)5758 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_subtile) {
5759 TEST_REQUIRES_ARM_NEON_FMA;
5760 for (uint32_t n = 16; n <= 24; n += 8) {
5761 for (size_t k = 1; k <= 40; k += 9) {
5762 for (uint32_t m = 1; m <= 5; m++) {
5763 GemmMicrokernelTester()
5764 .mr(5)
5765 .nr(8)
5766 .kr(1)
5767 .sr(1)
5768 .m(m)
5769 .n(n)
5770 .k(k)
5771 .iterations(1)
5772 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5773 }
5774 }
5775 }
5776 }
5777
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,small_kernel)5778 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel) {
5779 TEST_REQUIRES_ARM_NEON_FMA;
5780 for (size_t k = 1; k <= 40; k += 9) {
5781 GemmMicrokernelTester()
5782 .mr(5)
5783 .nr(8)
5784 .kr(1)
5785 .sr(1)
5786 .m(5)
5787 .n(8)
5788 .k(k)
5789 .ks(3)
5790 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5791 }
5792 }
5793
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,small_kernel_subtile)5794 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel_subtile) {
5795 TEST_REQUIRES_ARM_NEON_FMA;
5796 for (size_t k = 1; k <= 40; k += 9) {
5797 for (uint32_t n = 1; n <= 8; n++) {
5798 for (uint32_t m = 1; m <= 5; m++) {
5799 GemmMicrokernelTester()
5800 .mr(5)
5801 .nr(8)
5802 .kr(1)
5803 .sr(1)
5804 .m(m)
5805 .n(n)
5806 .k(k)
5807 .ks(3)
5808 .iterations(1)
5809 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5810 }
5811 }
5812 }
5813 }
5814
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_small_kernel)5815 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_small_kernel) {
5816 TEST_REQUIRES_ARM_NEON_FMA;
5817 for (uint32_t n = 9; n < 16; n++) {
5818 for (size_t k = 1; k <= 40; k += 9) {
5819 GemmMicrokernelTester()
5820 .mr(5)
5821 .nr(8)
5822 .kr(1)
5823 .sr(1)
5824 .m(5)
5825 .n(n)
5826 .k(k)
5827 .ks(3)
5828 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5829 }
5830 }
5831 }
5832
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_small_kernel)5833 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_small_kernel) {
5834 TEST_REQUIRES_ARM_NEON_FMA;
5835 for (uint32_t n = 16; n <= 24; n += 8) {
5836 for (size_t k = 1; k <= 40; k += 9) {
5837 GemmMicrokernelTester()
5838 .mr(5)
5839 .nr(8)
5840 .kr(1)
5841 .sr(1)
5842 .m(5)
5843 .n(n)
5844 .k(k)
5845 .ks(3)
5846 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5847 }
5848 }
5849 }
5850
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cm_subtile)5851 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
5852 TEST_REQUIRES_ARM_NEON_FMA;
5853 for (size_t k = 1; k <= 40; k += 9) {
5854 for (uint32_t n = 1; n <= 8; n++) {
5855 for (uint32_t m = 1; m <= 5; m++) {
5856 GemmMicrokernelTester()
5857 .mr(5)
5858 .nr(8)
5859 .kr(1)
5860 .sr(1)
5861 .m(m)
5862 .n(n)
5863 .k(k)
5864 .cm_stride(11)
5865 .iterations(1)
5866 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5867 }
5868 }
5869 }
5870 }
5871
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,a_offset)5872 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, a_offset) {
5873 TEST_REQUIRES_ARM_NEON_FMA;
5874 for (size_t k = 1; k <= 40; k += 9) {
5875 GemmMicrokernelTester()
5876 .mr(5)
5877 .nr(8)
5878 .kr(1)
5879 .sr(1)
5880 .m(5)
5881 .n(8)
5882 .k(k)
5883 .ks(3)
5884 .a_offset(211)
5885 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5886 }
5887 }
5888
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,zero)5889 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, zero) {
5890 TEST_REQUIRES_ARM_NEON_FMA;
5891 for (size_t k = 1; k <= 40; k += 9) {
5892 for (uint32_t mz = 0; mz < 5; mz++) {
5893 GemmMicrokernelTester()
5894 .mr(5)
5895 .nr(8)
5896 .kr(1)
5897 .sr(1)
5898 .m(5)
5899 .n(8)
5900 .k(k)
5901 .ks(3)
5902 .a_offset(211)
5903 .zero_index(mz)
5904 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5905 }
5906 }
5907 }
5908
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,qmin)5909 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
5910 TEST_REQUIRES_ARM_NEON_FMA;
5911 GemmMicrokernelTester()
5912 .mr(5)
5913 .nr(8)
5914 .kr(1)
5915 .sr(1)
5916 .m(5)
5917 .n(8)
5918 .k(8)
5919 .qmin(128)
5920 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5921 }
5922
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,qmax)5923 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
5924 TEST_REQUIRES_ARM_NEON_FMA;
5925 GemmMicrokernelTester()
5926 .mr(5)
5927 .nr(8)
5928 .kr(1)
5929 .sr(1)
5930 .m(5)
5931 .n(8)
5932 .k(8)
5933 .qmax(128)
5934 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5935 }
5936
TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cm)5937 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
5938 TEST_REQUIRES_ARM_NEON_FMA;
5939 GemmMicrokernelTester()
5940 .mr(5)
5941 .nr(8)
5942 .kr(1)
5943 .sr(1)
5944 .m(5)
5945 .n(8)
5946 .k(8)
5947 .cm_stride(11)
5948 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5949 }
5950 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
5951
5952
5953 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_4)5954 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4) {
5955 TEST_REQUIRES_ARM_NEON_FMA;
5956 GemmMicrokernelTester()
5957 .mr(6)
5958 .nr(8)
5959 .kr(1)
5960 .sr(1)
5961 .m(6)
5962 .n(8)
5963 .k(4)
5964 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5965 }
5966
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,strided_cn)5967 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, strided_cn) {
5968 TEST_REQUIRES_ARM_NEON_FMA;
5969 GemmMicrokernelTester()
5970 .mr(6)
5971 .nr(8)
5972 .kr(1)
5973 .sr(1)
5974 .m(6)
5975 .n(8)
5976 .k(4)
5977 .cn_stride(11)
5978 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5979 }
5980
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_subtile)5981 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile) {
5982 TEST_REQUIRES_ARM_NEON_FMA;
5983 for (uint32_t n = 1; n <= 8; n++) {
5984 for (uint32_t m = 1; m <= 6; m++) {
5985 GemmMicrokernelTester()
5986 .mr(6)
5987 .nr(8)
5988 .kr(1)
5989 .sr(1)
5990 .m(m)
5991 .n(n)
5992 .k(4)
5993 .iterations(1)
5994 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5995 }
5996 }
5997 }
5998
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_subtile_m)5999 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_m) {
6000 TEST_REQUIRES_ARM_NEON_FMA;
6001 for (uint32_t m = 1; m <= 6; m++) {
6002 GemmMicrokernelTester()
6003 .mr(6)
6004 .nr(8)
6005 .kr(1)
6006 .sr(1)
6007 .m(m)
6008 .n(8)
6009 .k(4)
6010 .iterations(1)
6011 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6012 }
6013 }
6014
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_4_subtile_n)6015 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_n) {
6016 TEST_REQUIRES_ARM_NEON_FMA;
6017 for (uint32_t n = 1; n <= 8; n++) {
6018 GemmMicrokernelTester()
6019 .mr(6)
6020 .nr(8)
6021 .kr(1)
6022 .sr(1)
6023 .m(6)
6024 .n(n)
6025 .k(4)
6026 .iterations(1)
6027 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6028 }
6029 }
6030
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_8)6031 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8) {
6032 TEST_REQUIRES_ARM_NEON_FMA;
6033 GemmMicrokernelTester()
6034 .mr(6)
6035 .nr(8)
6036 .kr(1)
6037 .sr(1)
6038 .m(6)
6039 .n(8)
6040 .k(8)
6041 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6042 }
6043
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_eq_8_subtile)6044 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile) {
6045 TEST_REQUIRES_ARM_NEON_FMA;
6046 for (uint32_t n = 1; n <= 8; n++) {
6047 for (uint32_t m = 1; m <= 6; m++) {
6048 GemmMicrokernelTester()
6049 .mr(6)
6050 .nr(8)
6051 .kr(1)
6052 .sr(1)
6053 .m(m)
6054 .n(n)
6055 .k(8)
6056 .iterations(1)
6057 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6058 }
6059 }
6060 }
6061
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_lt_8)6062 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_8) {
6063 TEST_REQUIRES_ARM_NEON_FMA;
6064 for (size_t k = 1; k < 8; k++) {
6065 GemmMicrokernelTester()
6066 .mr(6)
6067 .nr(8)
6068 .kr(1)
6069 .sr(1)
6070 .m(6)
6071 .n(8)
6072 .k(k)
6073 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6074 }
6075 }
6076
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_lt_8_subtile)6077 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_8_subtile) {
6078 TEST_REQUIRES_ARM_NEON_FMA;
6079 for (size_t k = 1; k < 8; k++) {
6080 for (uint32_t n = 1; n <= 8; n++) {
6081 for (uint32_t m = 1; m <= 6; m++) {
6082 GemmMicrokernelTester()
6083 .mr(6)
6084 .nr(8)
6085 .kr(1)
6086 .sr(1)
6087 .m(m)
6088 .n(n)
6089 .k(k)
6090 .iterations(1)
6091 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6092 }
6093 }
6094 }
6095 }
6096
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_gt_8)6097 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_8) {
6098 TEST_REQUIRES_ARM_NEON_FMA;
6099 for (size_t k = 9; k < 16; k++) {
6100 GemmMicrokernelTester()
6101 .mr(6)
6102 .nr(8)
6103 .kr(1)
6104 .sr(1)
6105 .m(6)
6106 .n(8)
6107 .k(k)
6108 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6109 }
6110 }
6111
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_gt_8_subtile)6112 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_8_subtile) {
6113 TEST_REQUIRES_ARM_NEON_FMA;
6114 for (size_t k = 9; k < 16; k++) {
6115 for (uint32_t n = 1; n <= 8; n++) {
6116 for (uint32_t m = 1; m <= 6; m++) {
6117 GemmMicrokernelTester()
6118 .mr(6)
6119 .nr(8)
6120 .kr(1)
6121 .sr(1)
6122 .m(m)
6123 .n(n)
6124 .k(k)
6125 .iterations(1)
6126 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6127 }
6128 }
6129 }
6130 }
6131
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_div_4)6132 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_div_4) {
6133 TEST_REQUIRES_ARM_NEON_FMA;
6134 for (size_t k = 12; k <= 40; k += 4) {
6135 GemmMicrokernelTester()
6136 .mr(6)
6137 .nr(8)
6138 .kr(1)
6139 .sr(1)
6140 .m(6)
6141 .n(8)
6142 .k(k)
6143 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6144 }
6145 }
6146
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,k_div_4_subtile)6147 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_div_4_subtile) {
6148 TEST_REQUIRES_ARM_NEON_FMA;
6149 for (size_t k = 12; k <= 40; k += 4) {
6150 for (uint32_t n = 1; n <= 8; n++) {
6151 for (uint32_t m = 1; m <= 6; m++) {
6152 GemmMicrokernelTester()
6153 .mr(6)
6154 .nr(8)
6155 .kr(1)
6156 .sr(1)
6157 .m(m)
6158 .n(n)
6159 .k(k)
6160 .iterations(1)
6161 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6162 }
6163 }
6164 }
6165 }
6166
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,n_gt_8)6167 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8) {
6168 TEST_REQUIRES_ARM_NEON_FMA;
6169 for (uint32_t n = 9; n < 16; n++) {
6170 for (size_t k = 1; k <= 20; k += 5) {
6171 GemmMicrokernelTester()
6172 .mr(6)
6173 .nr(8)
6174 .kr(1)
6175 .sr(1)
6176 .m(6)
6177 .n(n)
6178 .k(k)
6179 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6180 }
6181 }
6182 }
6183
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,n_gt_8_strided_cn)6184 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_strided_cn) {
6185 TEST_REQUIRES_ARM_NEON_FMA;
6186 for (uint32_t n = 9; n < 16; n++) {
6187 for (size_t k = 1; k <= 20; k += 5) {
6188 GemmMicrokernelTester()
6189 .mr(6)
6190 .nr(8)
6191 .kr(1)
6192 .sr(1)
6193 .m(6)
6194 .n(n)
6195 .k(k)
6196 .cn_stride(11)
6197 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6198 }
6199 }
6200 }
6201
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,n_gt_8_subtile)6202 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_subtile) {
6203 TEST_REQUIRES_ARM_NEON_FMA;
6204 for (uint32_t n = 9; n < 16; n++) {
6205 for (size_t k = 1; k <= 20; k += 5) {
6206 for (uint32_t m = 1; m <= 6; m++) {
6207 GemmMicrokernelTester()
6208 .mr(6)
6209 .nr(8)
6210 .kr(1)
6211 .sr(1)
6212 .m(m)
6213 .n(n)
6214 .k(k)
6215 .iterations(1)
6216 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6217 }
6218 }
6219 }
6220 }
6221
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,n_div_8)6222 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8) {
6223 TEST_REQUIRES_ARM_NEON_FMA;
6224 for (uint32_t n = 16; n <= 24; n += 8) {
6225 for (size_t k = 1; k <= 20; k += 5) {
6226 GemmMicrokernelTester()
6227 .mr(6)
6228 .nr(8)
6229 .kr(1)
6230 .sr(1)
6231 .m(6)
6232 .n(n)
6233 .k(k)
6234 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6235 }
6236 }
6237 }
6238
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,n_div_8_strided_cn)6239 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_strided_cn) {
6240 TEST_REQUIRES_ARM_NEON_FMA;
6241 for (uint32_t n = 16; n <= 24; n += 8) {
6242 for (size_t k = 1; k <= 20; k += 5) {
6243 GemmMicrokernelTester()
6244 .mr(6)
6245 .nr(8)
6246 .kr(1)
6247 .sr(1)
6248 .m(6)
6249 .n(n)
6250 .k(k)
6251 .cn_stride(11)
6252 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6253 }
6254 }
6255 }
6256
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,n_div_8_subtile)6257 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_subtile) {
6258 TEST_REQUIRES_ARM_NEON_FMA;
6259 for (uint32_t n = 16; n <= 24; n += 8) {
6260 for (size_t k = 1; k <= 20; k += 5) {
6261 for (uint32_t m = 1; m <= 6; m++) {
6262 GemmMicrokernelTester()
6263 .mr(6)
6264 .nr(8)
6265 .kr(1)
6266 .sr(1)
6267 .m(m)
6268 .n(n)
6269 .k(k)
6270 .iterations(1)
6271 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6272 }
6273 }
6274 }
6275 }
6276
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,small_kernel)6277 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, small_kernel) {
6278 TEST_REQUIRES_ARM_NEON_FMA;
6279 for (size_t k = 1; k <= 20; k += 5) {
6280 GemmMicrokernelTester()
6281 .mr(6)
6282 .nr(8)
6283 .kr(1)
6284 .sr(1)
6285 .m(6)
6286 .n(8)
6287 .k(k)
6288 .ks(3)
6289 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6290 }
6291 }
6292
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,small_kernel_subtile)6293 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, small_kernel_subtile) {
6294 TEST_REQUIRES_ARM_NEON_FMA;
6295 for (size_t k = 1; k <= 20; k += 5) {
6296 for (uint32_t n = 1; n <= 8; n++) {
6297 for (uint32_t m = 1; m <= 6; m++) {
6298 GemmMicrokernelTester()
6299 .mr(6)
6300 .nr(8)
6301 .kr(1)
6302 .sr(1)
6303 .m(m)
6304 .n(n)
6305 .k(k)
6306 .ks(3)
6307 .iterations(1)
6308 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6309 }
6310 }
6311 }
6312 }
6313
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,n_gt_8_small_kernel)6314 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_small_kernel) {
6315 TEST_REQUIRES_ARM_NEON_FMA;
6316 for (uint32_t n = 9; n < 16; n++) {
6317 for (size_t k = 1; k <= 20; k += 5) {
6318 GemmMicrokernelTester()
6319 .mr(6)
6320 .nr(8)
6321 .kr(1)
6322 .sr(1)
6323 .m(6)
6324 .n(n)
6325 .k(k)
6326 .ks(3)
6327 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6328 }
6329 }
6330 }
6331
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,n_div_8_small_kernel)6332 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_small_kernel) {
6333 TEST_REQUIRES_ARM_NEON_FMA;
6334 for (uint32_t n = 16; n <= 24; n += 8) {
6335 for (size_t k = 1; k <= 20; k += 5) {
6336 GemmMicrokernelTester()
6337 .mr(6)
6338 .nr(8)
6339 .kr(1)
6340 .sr(1)
6341 .m(6)
6342 .n(n)
6343 .k(k)
6344 .ks(3)
6345 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6346 }
6347 }
6348 }
6349
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,strided_cm_subtile)6350 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm_subtile) {
6351 TEST_REQUIRES_ARM_NEON_FMA;
6352 for (size_t k = 1; k <= 20; k += 5) {
6353 for (uint32_t n = 1; n <= 8; n++) {
6354 for (uint32_t m = 1; m <= 6; m++) {
6355 GemmMicrokernelTester()
6356 .mr(6)
6357 .nr(8)
6358 .kr(1)
6359 .sr(1)
6360 .m(m)
6361 .n(n)
6362 .k(k)
6363 .cm_stride(11)
6364 .iterations(1)
6365 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6366 }
6367 }
6368 }
6369 }
6370
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,a_offset)6371 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, a_offset) {
6372 TEST_REQUIRES_ARM_NEON_FMA;
6373 for (size_t k = 1; k <= 20; k += 5) {
6374 GemmMicrokernelTester()
6375 .mr(6)
6376 .nr(8)
6377 .kr(1)
6378 .sr(1)
6379 .m(6)
6380 .n(8)
6381 .k(k)
6382 .ks(3)
6383 .a_offset(127)
6384 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6385 }
6386 }
6387
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,zero)6388 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, zero) {
6389 TEST_REQUIRES_ARM_NEON_FMA;
6390 for (size_t k = 1; k <= 20; k += 5) {
6391 for (uint32_t mz = 0; mz < 6; mz++) {
6392 GemmMicrokernelTester()
6393 .mr(6)
6394 .nr(8)
6395 .kr(1)
6396 .sr(1)
6397 .m(6)
6398 .n(8)
6399 .k(k)
6400 .ks(3)
6401 .a_offset(127)
6402 .zero_index(mz)
6403 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6404 }
6405 }
6406 }
6407
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,qmin)6408 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, qmin) {
6409 TEST_REQUIRES_ARM_NEON_FMA;
6410 GemmMicrokernelTester()
6411 .mr(6)
6412 .nr(8)
6413 .kr(1)
6414 .sr(1)
6415 .m(6)
6416 .n(8)
6417 .k(4)
6418 .qmin(128)
6419 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6420 }
6421
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,qmax)6422 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, qmax) {
6423 TEST_REQUIRES_ARM_NEON_FMA;
6424 GemmMicrokernelTester()
6425 .mr(6)
6426 .nr(8)
6427 .kr(1)
6428 .sr(1)
6429 .m(6)
6430 .n(8)
6431 .k(4)
6432 .qmax(128)
6433 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6434 }
6435
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53,strided_cm)6436 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm) {
6437 TEST_REQUIRES_ARM_NEON_FMA;
6438 GemmMicrokernelTester()
6439 .mr(6)
6440 .nr(8)
6441 .kr(1)
6442 .sr(1)
6443 .m(6)
6444 .n(8)
6445 .k(4)
6446 .cm_stride(11)
6447 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6448 }
6449 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
6450
6451
6452 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_eq_8)6453 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_8) {
6454 TEST_REQUIRES_ARM_NEON_FMA;
6455 GemmMicrokernelTester()
6456 .mr(6)
6457 .nr(8)
6458 .kr(1)
6459 .sr(1)
6460 .m(6)
6461 .n(8)
6462 .k(8)
6463 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6464 }
6465
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,strided_cn)6466 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, strided_cn) {
6467 TEST_REQUIRES_ARM_NEON_FMA;
6468 GemmMicrokernelTester()
6469 .mr(6)
6470 .nr(8)
6471 .kr(1)
6472 .sr(1)
6473 .m(6)
6474 .n(8)
6475 .k(8)
6476 .cn_stride(11)
6477 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6478 }
6479
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_eq_8_subtile)6480 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_8_subtile) {
6481 TEST_REQUIRES_ARM_NEON_FMA;
6482 for (uint32_t n = 1; n <= 8; n++) {
6483 for (uint32_t m = 1; m <= 6; m++) {
6484 GemmMicrokernelTester()
6485 .mr(6)
6486 .nr(8)
6487 .kr(1)
6488 .sr(1)
6489 .m(m)
6490 .n(n)
6491 .k(8)
6492 .iterations(1)
6493 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6494 }
6495 }
6496 }
6497
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_eq_8_subtile_m)6498 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_8_subtile_m) {
6499 TEST_REQUIRES_ARM_NEON_FMA;
6500 for (uint32_t m = 1; m <= 6; m++) {
6501 GemmMicrokernelTester()
6502 .mr(6)
6503 .nr(8)
6504 .kr(1)
6505 .sr(1)
6506 .m(m)
6507 .n(8)
6508 .k(8)
6509 .iterations(1)
6510 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6511 }
6512 }
6513
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_eq_8_subtile_n)6514 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_8_subtile_n) {
6515 TEST_REQUIRES_ARM_NEON_FMA;
6516 for (uint32_t n = 1; n <= 8; n++) {
6517 GemmMicrokernelTester()
6518 .mr(6)
6519 .nr(8)
6520 .kr(1)
6521 .sr(1)
6522 .m(6)
6523 .n(n)
6524 .k(8)
6525 .iterations(1)
6526 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6527 }
6528 }
6529
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_eq_16)6530 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_16) {
6531 TEST_REQUIRES_ARM_NEON_FMA;
6532 GemmMicrokernelTester()
6533 .mr(6)
6534 .nr(8)
6535 .kr(1)
6536 .sr(1)
6537 .m(6)
6538 .n(8)
6539 .k(16)
6540 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6541 }
6542
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_eq_16_subtile)6543 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_16_subtile) {
6544 TEST_REQUIRES_ARM_NEON_FMA;
6545 for (uint32_t n = 1; n <= 8; n++) {
6546 for (uint32_t m = 1; m <= 6; m++) {
6547 GemmMicrokernelTester()
6548 .mr(6)
6549 .nr(8)
6550 .kr(1)
6551 .sr(1)
6552 .m(m)
6553 .n(n)
6554 .k(16)
6555 .iterations(1)
6556 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6557 }
6558 }
6559 }
6560
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_lt_16)6561 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_lt_16) {
6562 TEST_REQUIRES_ARM_NEON_FMA;
6563 for (size_t k = 1; k < 16; k++) {
6564 GemmMicrokernelTester()
6565 .mr(6)
6566 .nr(8)
6567 .kr(1)
6568 .sr(1)
6569 .m(6)
6570 .n(8)
6571 .k(k)
6572 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6573 }
6574 }
6575
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_lt_16_subtile)6576 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_lt_16_subtile) {
6577 TEST_REQUIRES_ARM_NEON_FMA;
6578 for (size_t k = 1; k < 16; k++) {
6579 for (uint32_t n = 1; n <= 8; n++) {
6580 for (uint32_t m = 1; m <= 6; m++) {
6581 GemmMicrokernelTester()
6582 .mr(6)
6583 .nr(8)
6584 .kr(1)
6585 .sr(1)
6586 .m(m)
6587 .n(n)
6588 .k(k)
6589 .iterations(1)
6590 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6591 }
6592 }
6593 }
6594 }
6595
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_gt_16)6596 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_gt_16) {
6597 TEST_REQUIRES_ARM_NEON_FMA;
6598 for (size_t k = 17; k < 32; k++) {
6599 GemmMicrokernelTester()
6600 .mr(6)
6601 .nr(8)
6602 .kr(1)
6603 .sr(1)
6604 .m(6)
6605 .n(8)
6606 .k(k)
6607 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6608 }
6609 }
6610
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_gt_16_subtile)6611 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_gt_16_subtile) {
6612 TEST_REQUIRES_ARM_NEON_FMA;
6613 for (size_t k = 17; k < 32; k++) {
6614 for (uint32_t n = 1; n <= 8; n++) {
6615 for (uint32_t m = 1; m <= 6; m++) {
6616 GemmMicrokernelTester()
6617 .mr(6)
6618 .nr(8)
6619 .kr(1)
6620 .sr(1)
6621 .m(m)
6622 .n(n)
6623 .k(k)
6624 .iterations(1)
6625 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6626 }
6627 }
6628 }
6629 }
6630
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_div_8)6631 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_div_8) {
6632 TEST_REQUIRES_ARM_NEON_FMA;
6633 for (size_t k = 24; k <= 80; k += 8) {
6634 GemmMicrokernelTester()
6635 .mr(6)
6636 .nr(8)
6637 .kr(1)
6638 .sr(1)
6639 .m(6)
6640 .n(8)
6641 .k(k)
6642 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6643 }
6644 }
6645
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,k_div_8_subtile)6646 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_div_8_subtile) {
6647 TEST_REQUIRES_ARM_NEON_FMA;
6648 for (size_t k = 24; k <= 80; k += 8) {
6649 for (uint32_t n = 1; n <= 8; n++) {
6650 for (uint32_t m = 1; m <= 6; m++) {
6651 GemmMicrokernelTester()
6652 .mr(6)
6653 .nr(8)
6654 .kr(1)
6655 .sr(1)
6656 .m(m)
6657 .n(n)
6658 .k(k)
6659 .iterations(1)
6660 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6661 }
6662 }
6663 }
6664 }
6665
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,n_gt_8)6666 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_gt_8) {
6667 TEST_REQUIRES_ARM_NEON_FMA;
6668 for (uint32_t n = 9; n < 16; n++) {
6669 for (size_t k = 1; k <= 40; k += 9) {
6670 GemmMicrokernelTester()
6671 .mr(6)
6672 .nr(8)
6673 .kr(1)
6674 .sr(1)
6675 .m(6)
6676 .n(n)
6677 .k(k)
6678 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6679 }
6680 }
6681 }
6682
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,n_gt_8_strided_cn)6683 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_gt_8_strided_cn) {
6684 TEST_REQUIRES_ARM_NEON_FMA;
6685 for (uint32_t n = 9; n < 16; n++) {
6686 for (size_t k = 1; k <= 40; k += 9) {
6687 GemmMicrokernelTester()
6688 .mr(6)
6689 .nr(8)
6690 .kr(1)
6691 .sr(1)
6692 .m(6)
6693 .n(n)
6694 .k(k)
6695 .cn_stride(11)
6696 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6697 }
6698 }
6699 }
6700
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,n_gt_8_subtile)6701 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_gt_8_subtile) {
6702 TEST_REQUIRES_ARM_NEON_FMA;
6703 for (uint32_t n = 9; n < 16; n++) {
6704 for (size_t k = 1; k <= 40; k += 9) {
6705 for (uint32_t m = 1; m <= 6; m++) {
6706 GemmMicrokernelTester()
6707 .mr(6)
6708 .nr(8)
6709 .kr(1)
6710 .sr(1)
6711 .m(m)
6712 .n(n)
6713 .k(k)
6714 .iterations(1)
6715 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6716 }
6717 }
6718 }
6719 }
6720
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,n_div_8)6721 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_div_8) {
6722 TEST_REQUIRES_ARM_NEON_FMA;
6723 for (uint32_t n = 16; n <= 24; n += 8) {
6724 for (size_t k = 1; k <= 40; k += 9) {
6725 GemmMicrokernelTester()
6726 .mr(6)
6727 .nr(8)
6728 .kr(1)
6729 .sr(1)
6730 .m(6)
6731 .n(n)
6732 .k(k)
6733 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6734 }
6735 }
6736 }
6737
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,n_div_8_strided_cn)6738 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_div_8_strided_cn) {
6739 TEST_REQUIRES_ARM_NEON_FMA;
6740 for (uint32_t n = 16; n <= 24; n += 8) {
6741 for (size_t k = 1; k <= 40; k += 9) {
6742 GemmMicrokernelTester()
6743 .mr(6)
6744 .nr(8)
6745 .kr(1)
6746 .sr(1)
6747 .m(6)
6748 .n(n)
6749 .k(k)
6750 .cn_stride(11)
6751 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6752 }
6753 }
6754 }
6755
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,n_div_8_subtile)6756 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_div_8_subtile) {
6757 TEST_REQUIRES_ARM_NEON_FMA;
6758 for (uint32_t n = 16; n <= 24; n += 8) {
6759 for (size_t k = 1; k <= 40; k += 9) {
6760 for (uint32_t m = 1; m <= 6; m++) {
6761 GemmMicrokernelTester()
6762 .mr(6)
6763 .nr(8)
6764 .kr(1)
6765 .sr(1)
6766 .m(m)
6767 .n(n)
6768 .k(k)
6769 .iterations(1)
6770 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6771 }
6772 }
6773 }
6774 }
6775
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,small_kernel)6776 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, small_kernel) {
6777 TEST_REQUIRES_ARM_NEON_FMA;
6778 for (size_t k = 1; k <= 40; k += 9) {
6779 GemmMicrokernelTester()
6780 .mr(6)
6781 .nr(8)
6782 .kr(1)
6783 .sr(1)
6784 .m(6)
6785 .n(8)
6786 .k(k)
6787 .ks(3)
6788 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6789 }
6790 }
6791
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,small_kernel_subtile)6792 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, small_kernel_subtile) {
6793 TEST_REQUIRES_ARM_NEON_FMA;
6794 for (size_t k = 1; k <= 40; k += 9) {
6795 for (uint32_t n = 1; n <= 8; n++) {
6796 for (uint32_t m = 1; m <= 6; m++) {
6797 GemmMicrokernelTester()
6798 .mr(6)
6799 .nr(8)
6800 .kr(1)
6801 .sr(1)
6802 .m(m)
6803 .n(n)
6804 .k(k)
6805 .ks(3)
6806 .iterations(1)
6807 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6808 }
6809 }
6810 }
6811 }
6812
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,n_gt_8_small_kernel)6813 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_gt_8_small_kernel) {
6814 TEST_REQUIRES_ARM_NEON_FMA;
6815 for (uint32_t n = 9; n < 16; n++) {
6816 for (size_t k = 1; k <= 40; k += 9) {
6817 GemmMicrokernelTester()
6818 .mr(6)
6819 .nr(8)
6820 .kr(1)
6821 .sr(1)
6822 .m(6)
6823 .n(n)
6824 .k(k)
6825 .ks(3)
6826 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6827 }
6828 }
6829 }
6830
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,n_div_8_small_kernel)6831 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_div_8_small_kernel) {
6832 TEST_REQUIRES_ARM_NEON_FMA;
6833 for (uint32_t n = 16; n <= 24; n += 8) {
6834 for (size_t k = 1; k <= 40; k += 9) {
6835 GemmMicrokernelTester()
6836 .mr(6)
6837 .nr(8)
6838 .kr(1)
6839 .sr(1)
6840 .m(6)
6841 .n(n)
6842 .k(k)
6843 .ks(3)
6844 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6845 }
6846 }
6847 }
6848
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,strided_cm_subtile)6849 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, strided_cm_subtile) {
6850 TEST_REQUIRES_ARM_NEON_FMA;
6851 for (size_t k = 1; k <= 40; k += 9) {
6852 for (uint32_t n = 1; n <= 8; n++) {
6853 for (uint32_t m = 1; m <= 6; m++) {
6854 GemmMicrokernelTester()
6855 .mr(6)
6856 .nr(8)
6857 .kr(1)
6858 .sr(1)
6859 .m(m)
6860 .n(n)
6861 .k(k)
6862 .cm_stride(11)
6863 .iterations(1)
6864 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6865 }
6866 }
6867 }
6868 }
6869
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,a_offset)6870 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, a_offset) {
6871 TEST_REQUIRES_ARM_NEON_FMA;
6872 for (size_t k = 1; k <= 40; k += 9) {
6873 GemmMicrokernelTester()
6874 .mr(6)
6875 .nr(8)
6876 .kr(1)
6877 .sr(1)
6878 .m(6)
6879 .n(8)
6880 .k(k)
6881 .ks(3)
6882 .a_offset(251)
6883 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6884 }
6885 }
6886
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,zero)6887 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, zero) {
6888 TEST_REQUIRES_ARM_NEON_FMA;
6889 for (size_t k = 1; k <= 40; k += 9) {
6890 for (uint32_t mz = 0; mz < 6; mz++) {
6891 GemmMicrokernelTester()
6892 .mr(6)
6893 .nr(8)
6894 .kr(1)
6895 .sr(1)
6896 .m(6)
6897 .n(8)
6898 .k(k)
6899 .ks(3)
6900 .a_offset(251)
6901 .zero_index(mz)
6902 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6903 }
6904 }
6905 }
6906
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,qmin)6907 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, qmin) {
6908 TEST_REQUIRES_ARM_NEON_FMA;
6909 GemmMicrokernelTester()
6910 .mr(6)
6911 .nr(8)
6912 .kr(1)
6913 .sr(1)
6914 .m(6)
6915 .n(8)
6916 .k(8)
6917 .qmin(128)
6918 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6919 }
6920
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,qmax)6921 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, qmax) {
6922 TEST_REQUIRES_ARM_NEON_FMA;
6923 GemmMicrokernelTester()
6924 .mr(6)
6925 .nr(8)
6926 .kr(1)
6927 .sr(1)
6928 .m(6)
6929 .n(8)
6930 .k(8)
6931 .qmax(128)
6932 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6933 }
6934
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73,strided_cm)6935 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, strided_cm) {
6936 TEST_REQUIRES_ARM_NEON_FMA;
6937 GemmMicrokernelTester()
6938 .mr(6)
6939 .nr(8)
6940 .kr(1)
6941 .sr(1)
6942 .m(6)
6943 .n(8)
6944 .k(8)
6945 .cm_stride(11)
6946 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
6947 }
6948 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
6949
6950
6951 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,k_eq_2)6952 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_eq_2) {
6953 TEST_REQUIRES_ARM_NEON_FMA;
6954 GemmMicrokernelTester()
6955 .mr(6)
6956 .nr(8)
6957 .kr(1)
6958 .sr(1)
6959 .m(6)
6960 .n(8)
6961 .k(2)
6962 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
6963 }
6964
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,strided_cn)6965 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, strided_cn) {
6966 TEST_REQUIRES_ARM_NEON_FMA;
6967 GemmMicrokernelTester()
6968 .mr(6)
6969 .nr(8)
6970 .kr(1)
6971 .sr(1)
6972 .m(6)
6973 .n(8)
6974 .k(2)
6975 .cn_stride(11)
6976 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
6977 }
6978
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,k_eq_2_subtile)6979 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile) {
6980 TEST_REQUIRES_ARM_NEON_FMA;
6981 for (uint32_t n = 1; n <= 8; n++) {
6982 for (uint32_t m = 1; m <= 6; m++) {
6983 GemmMicrokernelTester()
6984 .mr(6)
6985 .nr(8)
6986 .kr(1)
6987 .sr(1)
6988 .m(m)
6989 .n(n)
6990 .k(2)
6991 .iterations(1)
6992 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
6993 }
6994 }
6995 }
6996
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,k_eq_2_subtile_m)6997 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile_m) {
6998 TEST_REQUIRES_ARM_NEON_FMA;
6999 for (uint32_t m = 1; m <= 6; m++) {
7000 GemmMicrokernelTester()
7001 .mr(6)
7002 .nr(8)
7003 .kr(1)
7004 .sr(1)
7005 .m(m)
7006 .n(8)
7007 .k(2)
7008 .iterations(1)
7009 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
7010 }
7011 }
7012
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,k_eq_2_subtile_n)7013 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile_n) {
7014 TEST_REQUIRES_ARM_NEON_FMA;
7015 for (uint32_t n = 1; n <= 8; n++) {
7016 GemmMicrokernelTester()
7017 .mr(6)
7018 .nr(8)
7019 .kr(1)
7020 .sr(1)
7021 .m(6)
7022 .n(n)
7023 .k(2)
7024 .iterations(1)
7025 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
7026 }
7027 }
7028
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,k_lt_2)7029 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_lt_2) {
7030 TEST_REQUIRES_ARM_NEON_FMA;
7031 for (size_t k = 1; k < 2; k++) {
7032 GemmMicrokernelTester()
7033 .mr(6)
7034 .nr(8)
7035 .kr(1)
7036 .sr(1)
7037 .m(6)
7038 .n(8)
7039 .k(k)
7040 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
7041 }
7042 }
7043
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,k_lt_2_subtile)7044 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_lt_2_subtile) {
7045 TEST_REQUIRES_ARM_NEON_FMA;
7046 for (size_t k = 1; k < 2; k++) {
7047 for (uint32_t n = 1; n <= 8; n++) {
7048 for (uint32_t m = 1; m <= 6; m++) {
7049 GemmMicrokernelTester()
7050 .mr(6)
7051 .nr(8)
7052 .kr(1)
7053 .sr(1)
7054 .m(m)
7055 .n(n)
7056 .k(k)
7057 .iterations(1)
7058 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
7059 }
7060 }
7061 }
7062 }
7063
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,k_gt_2)7064 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_gt_2) {
7065 TEST_REQUIRES_ARM_NEON_FMA;
7066 for (size_t k = 3; k < 4; k++) {
7067 GemmMicrokernelTester()
7068 .mr(6)
7069 .nr(8)
7070 .kr(1)
7071 .sr(1)
7072 .m(6)
7073 .n(8)
7074 .k(k)
7075 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
7076 }
7077 }
7078
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,k_gt_2_subtile)7079 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_gt_2_subtile) {
7080 TEST_REQUIRES_ARM_NEON_FMA;
7081 for (size_t k = 3; k < 4; k++) {
7082 for (uint32_t n = 1; n <= 8; n++) {
7083 for (uint32_t m = 1; m <= 6; m++) {
7084 GemmMicrokernelTester()
7085 .mr(6)
7086 .nr(8)
7087 .kr(1)
7088 .sr(1)
7089 .m(m)
7090 .n(n)
7091 .k(k)
7092 .iterations(1)
7093 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
7094 }
7095 }
7096 }
7097 }
7098
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,k_div_2)7099 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_div_2) {
7100 TEST_REQUIRES_ARM_NEON_FMA;
7101 for (size_t k = 4; k <= 20; k += 2) {
7102 GemmMicrokernelTester()
7103 .mr(6)
7104 .nr(8)
7105 .kr(1)
7106 .sr(1)
7107 .m(6)
7108 .n(8)
7109 .k(k)
7110 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
7111 }
7112 }
7113
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,k_div_2_subtile)7114 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_div_2_subtile) {
7115 TEST_REQUIRES_ARM_NEON_FMA;
7116 for (size_t k = 4; k <= 20; k += 2) {
7117 for (uint32_t n = 1; n <= 8; n++) {
7118 for (uint32_t m = 1; m <= 6; m++) {
7119 GemmMicrokernelTester()
7120 .mr(6)
7121 .nr(8)
7122 .kr(1)
7123 .sr(1)
7124 .m(m)
7125 .n(n)
7126 .k(k)
7127 .iterations(1)
7128 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
7129 }
7130 }
7131 }
7132 }
7133
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,n_gt_8)7134 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_gt_8) {
7135 TEST_REQUIRES_ARM_NEON_FMA;
7136 for (uint32_t n = 9; n < 16; n++) {
7137 for (size_t k = 1; k <= 10; k += 3) {
7138 GemmMicrokernelTester()
7139 .mr(6)
7140 .nr(8)
7141 .kr(1)
7142 .sr(1)
7143 .m(6)
7144 .n(n)
7145 .k(k)
7146 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
7147 }
7148 }
7149 }
7150
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,n_gt_8_strided_cn)7151 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_gt_8_strided_cn) {
7152 TEST_REQUIRES_ARM_NEON_FMA;
7153 for (uint32_t n = 9; n < 16; n++) {
7154 for (size_t k = 1; k <= 10; k += 3) {
7155 GemmMicrokernelTester()
7156 .mr(6)
7157 .nr(8)
7158 .kr(1)
7159 .sr(1)
7160 .m(6)
7161 .n(n)
7162 .k(k)
7163 .cn_stride(11)
7164 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
7165 }
7166 }
7167 }
7168
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,n_gt_8_subtile)7169 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_gt_8_subtile) {
7170 TEST_REQUIRES_ARM_NEON_FMA;
7171 for (uint32_t n = 9; n < 16; n++) {
7172 for (size_t k = 1; k <= 10; k += 3) {
7173 for (uint32_t m = 1; m <= 6; m++) {
7174 GemmMicrokernelTester()
7175 .mr(6)
7176 .nr(8)
7177 .kr(1)
7178 .sr(1)
7179 .m(m)
7180 .n(n)
7181 .k(k)
7182 .iterations(1)
7183 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
7184 }
7185 }
7186 }
7187 }
7188
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,n_div_8)7189 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_div_8) {
7190 TEST_REQUIRES_ARM_NEON_FMA;
7191 for (uint32_t n = 16; n <= 24; n += 8) {
7192 for (size_t k = 1; k <= 10; k += 3) {
7193 GemmMicrokernelTester()
7194 .mr(6)
7195 .nr(8)
7196 .kr(1)
7197 .sr(1)
7198 .m(6)
7199 .n(n)
7200 .k(k)
7201 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
7202 }
7203 }
7204 }
7205
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,n_div_8_strided_cn)7206 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_div_8_strided_cn) {
7207 TEST_REQUIRES_ARM_NEON_FMA;
7208 for (uint32_t n = 16; n <= 24; n += 8) {
7209 for (size_t k = 1; k <= 10; k += 3) {
7210 GemmMicrokernelTester()
7211 .mr(6)
7212 .nr(8)
7213 .kr(1)
7214 .sr(1)
7215 .m(6)
7216 .n(n)
7217 .k(k)
7218 .cn_stride(11)
7219 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
7220 }
7221 }
7222 }
7223
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,n_div_8_subtile)7224 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_div_8_subtile) {
7225 TEST_REQUIRES_ARM_NEON_FMA;
7226 for (uint32_t n = 16; n <= 24; n += 8) {
7227 for (size_t k = 1; k <= 10; k += 3) {
7228 for (uint32_t m = 1; m <= 6; m++) {
7229 GemmMicrokernelTester()
7230 .mr(6)
7231 .nr(8)
7232 .kr(1)
7233 .sr(1)
7234 .m(m)
7235 .n(n)
7236 .k(k)
7237 .iterations(1)
7238 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
7239 }
7240 }
7241 }
7242 }
7243
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,small_kernel)7244 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, small_kernel) {
7245 TEST_REQUIRES_ARM_NEON_FMA;
7246 for (size_t k = 1; k <= 10; k += 3) {
7247 GemmMicrokernelTester()
7248 .mr(6)
7249 .nr(8)
7250 .kr(1)
7251 .sr(1)
7252 .m(6)
7253 .n(8)
7254 .k(k)
7255 .ks(3)
7256 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
7257 }
7258 }
7259
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,small_kernel_subtile)7260 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, small_kernel_subtile) {
7261 TEST_REQUIRES_ARM_NEON_FMA;
7262 for (size_t k = 1; k <= 10; k += 3) {
7263 for (uint32_t n = 1; n <= 8; n++) {
7264 for (uint32_t m = 1; m <= 6; m++) {
7265 GemmMicrokernelTester()
7266 .mr(6)
7267 .nr(8)
7268 .kr(1)
7269 .sr(1)
7270 .m(m)
7271 .n(n)
7272 .k(k)
7273 .ks(3)
7274 .iterations(1)
7275 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
7276 }
7277 }
7278 }
7279 }
7280
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,n_gt_8_small_kernel)7281 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_gt_8_small_kernel) {
7282 TEST_REQUIRES_ARM_NEON_FMA;
7283 for (uint32_t n = 9; n < 16; n++) {
7284 for (size_t k = 1; k <= 10; k += 3) {
7285 GemmMicrokernelTester()
7286 .mr(6)
7287 .nr(8)
7288 .kr(1)
7289 .sr(1)
7290 .m(6)
7291 .n(n)
7292 .k(k)
7293 .ks(3)
7294 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
7295 }
7296 }
7297 }
7298
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,n_div_8_small_kernel)7299 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_div_8_small_kernel) {
7300 TEST_REQUIRES_ARM_NEON_FMA;
7301 for (uint32_t n = 16; n <= 24; n += 8) {
7302 for (size_t k = 1; k <= 10; k += 3) {
7303 GemmMicrokernelTester()
7304 .mr(6)
7305 .nr(8)
7306 .kr(1)
7307 .sr(1)
7308 .m(6)
7309 .n(n)
7310 .k(k)
7311 .ks(3)
7312 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
7313 }
7314 }
7315 }
7316
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,strided_cm_subtile)7317 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, strided_cm_subtile) {
7318 TEST_REQUIRES_ARM_NEON_FMA;
7319 for (size_t k = 1; k <= 10; k += 3) {
7320 for (uint32_t n = 1; n <= 8; n++) {
7321 for (uint32_t m = 1; m <= 6; m++) {
7322 GemmMicrokernelTester()
7323 .mr(6)
7324 .nr(8)
7325 .kr(1)
7326 .sr(1)
7327 .m(m)
7328 .n(n)
7329 .k(k)
7330 .cm_stride(11)
7331 .iterations(1)
7332 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
7333 }
7334 }
7335 }
7336 }
7337
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,a_offset)7338 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, a_offset) {
7339 TEST_REQUIRES_ARM_NEON_FMA;
7340 for (size_t k = 1; k <= 10; k += 3) {
7341 GemmMicrokernelTester()
7342 .mr(6)
7343 .nr(8)
7344 .kr(1)
7345 .sr(1)
7346 .m(6)
7347 .n(8)
7348 .k(k)
7349 .ks(3)
7350 .a_offset(67)
7351 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
7352 }
7353 }
7354
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,zero)7355 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, zero) {
7356 TEST_REQUIRES_ARM_NEON_FMA;
7357 for (size_t k = 1; k <= 10; k += 3) {
7358 for (uint32_t mz = 0; mz < 6; mz++) {
7359 GemmMicrokernelTester()
7360 .mr(6)
7361 .nr(8)
7362 .kr(1)
7363 .sr(1)
7364 .m(6)
7365 .n(8)
7366 .k(k)
7367 .ks(3)
7368 .a_offset(67)
7369 .zero_index(mz)
7370 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
7371 }
7372 }
7373 }
7374
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,qmin)7375 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, qmin) {
7376 TEST_REQUIRES_ARM_NEON_FMA;
7377 GemmMicrokernelTester()
7378 .mr(6)
7379 .nr(8)
7380 .kr(1)
7381 .sr(1)
7382 .m(6)
7383 .n(8)
7384 .k(2)
7385 .qmin(128)
7386 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
7387 }
7388
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,qmax)7389 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, qmax) {
7390 TEST_REQUIRES_ARM_NEON_FMA;
7391 GemmMicrokernelTester()
7392 .mr(6)
7393 .nr(8)
7394 .kr(1)
7395 .sr(1)
7396 .m(6)
7397 .n(8)
7398 .k(2)
7399 .qmax(128)
7400 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
7401 }
7402
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64,strided_cm)7403 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, strided_cm) {
7404 TEST_REQUIRES_ARM_NEON_FMA;
7405 GemmMicrokernelTester()
7406 .mr(6)
7407 .nr(8)
7408 .kr(1)
7409 .sr(1)
7410 .m(6)
7411 .n(8)
7412 .k(2)
7413 .cm_stride(11)
7414 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
7415 }
7416 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
7417
7418
7419 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,k_eq_4)7420 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_eq_4) {
7421 TEST_REQUIRES_ARM_NEON_FMA;
7422 GemmMicrokernelTester()
7423 .mr(6)
7424 .nr(8)
7425 .kr(1)
7426 .sr(1)
7427 .m(6)
7428 .n(8)
7429 .k(4)
7430 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7431 }
7432
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,strided_cn)7433 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, strided_cn) {
7434 TEST_REQUIRES_ARM_NEON_FMA;
7435 GemmMicrokernelTester()
7436 .mr(6)
7437 .nr(8)
7438 .kr(1)
7439 .sr(1)
7440 .m(6)
7441 .n(8)
7442 .k(4)
7443 .cn_stride(11)
7444 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7445 }
7446
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,k_eq_4_subtile)7447 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile) {
7448 TEST_REQUIRES_ARM_NEON_FMA;
7449 for (uint32_t n = 1; n <= 8; n++) {
7450 for (uint32_t m = 1; m <= 6; m++) {
7451 GemmMicrokernelTester()
7452 .mr(6)
7453 .nr(8)
7454 .kr(1)
7455 .sr(1)
7456 .m(m)
7457 .n(n)
7458 .k(4)
7459 .iterations(1)
7460 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7461 }
7462 }
7463 }
7464
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,k_eq_4_subtile_m)7465 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile_m) {
7466 TEST_REQUIRES_ARM_NEON_FMA;
7467 for (uint32_t m = 1; m <= 6; m++) {
7468 GemmMicrokernelTester()
7469 .mr(6)
7470 .nr(8)
7471 .kr(1)
7472 .sr(1)
7473 .m(m)
7474 .n(8)
7475 .k(4)
7476 .iterations(1)
7477 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7478 }
7479 }
7480
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,k_eq_4_subtile_n)7481 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile_n) {
7482 TEST_REQUIRES_ARM_NEON_FMA;
7483 for (uint32_t n = 1; n <= 8; n++) {
7484 GemmMicrokernelTester()
7485 .mr(6)
7486 .nr(8)
7487 .kr(1)
7488 .sr(1)
7489 .m(6)
7490 .n(n)
7491 .k(4)
7492 .iterations(1)
7493 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7494 }
7495 }
7496
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,k_lt_4)7497 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_lt_4) {
7498 TEST_REQUIRES_ARM_NEON_FMA;
7499 for (size_t k = 1; k < 4; k++) {
7500 GemmMicrokernelTester()
7501 .mr(6)
7502 .nr(8)
7503 .kr(1)
7504 .sr(1)
7505 .m(6)
7506 .n(8)
7507 .k(k)
7508 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7509 }
7510 }
7511
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,k_lt_4_subtile)7512 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_lt_4_subtile) {
7513 TEST_REQUIRES_ARM_NEON_FMA;
7514 for (size_t k = 1; k < 4; k++) {
7515 for (uint32_t n = 1; n <= 8; n++) {
7516 for (uint32_t m = 1; m <= 6; m++) {
7517 GemmMicrokernelTester()
7518 .mr(6)
7519 .nr(8)
7520 .kr(1)
7521 .sr(1)
7522 .m(m)
7523 .n(n)
7524 .k(k)
7525 .iterations(1)
7526 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7527 }
7528 }
7529 }
7530 }
7531
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,k_gt_4)7532 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_gt_4) {
7533 TEST_REQUIRES_ARM_NEON_FMA;
7534 for (size_t k = 5; k < 8; k++) {
7535 GemmMicrokernelTester()
7536 .mr(6)
7537 .nr(8)
7538 .kr(1)
7539 .sr(1)
7540 .m(6)
7541 .n(8)
7542 .k(k)
7543 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7544 }
7545 }
7546
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,k_gt_4_subtile)7547 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_gt_4_subtile) {
7548 TEST_REQUIRES_ARM_NEON_FMA;
7549 for (size_t k = 5; k < 8; k++) {
7550 for (uint32_t n = 1; n <= 8; n++) {
7551 for (uint32_t m = 1; m <= 6; m++) {
7552 GemmMicrokernelTester()
7553 .mr(6)
7554 .nr(8)
7555 .kr(1)
7556 .sr(1)
7557 .m(m)
7558 .n(n)
7559 .k(k)
7560 .iterations(1)
7561 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7562 }
7563 }
7564 }
7565 }
7566
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,k_div_4)7567 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_div_4) {
7568 TEST_REQUIRES_ARM_NEON_FMA;
7569 for (size_t k = 8; k <= 40; k += 4) {
7570 GemmMicrokernelTester()
7571 .mr(6)
7572 .nr(8)
7573 .kr(1)
7574 .sr(1)
7575 .m(6)
7576 .n(8)
7577 .k(k)
7578 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7579 }
7580 }
7581
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,k_div_4_subtile)7582 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_div_4_subtile) {
7583 TEST_REQUIRES_ARM_NEON_FMA;
7584 for (size_t k = 8; k <= 40; k += 4) {
7585 for (uint32_t n = 1; n <= 8; n++) {
7586 for (uint32_t m = 1; m <= 6; m++) {
7587 GemmMicrokernelTester()
7588 .mr(6)
7589 .nr(8)
7590 .kr(1)
7591 .sr(1)
7592 .m(m)
7593 .n(n)
7594 .k(k)
7595 .iterations(1)
7596 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7597 }
7598 }
7599 }
7600 }
7601
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,n_gt_8)7602 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_gt_8) {
7603 TEST_REQUIRES_ARM_NEON_FMA;
7604 for (uint32_t n = 9; n < 16; n++) {
7605 for (size_t k = 1; k <= 20; k += 5) {
7606 GemmMicrokernelTester()
7607 .mr(6)
7608 .nr(8)
7609 .kr(1)
7610 .sr(1)
7611 .m(6)
7612 .n(n)
7613 .k(k)
7614 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7615 }
7616 }
7617 }
7618
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,n_gt_8_strided_cn)7619 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_gt_8_strided_cn) {
7620 TEST_REQUIRES_ARM_NEON_FMA;
7621 for (uint32_t n = 9; n < 16; n++) {
7622 for (size_t k = 1; k <= 20; k += 5) {
7623 GemmMicrokernelTester()
7624 .mr(6)
7625 .nr(8)
7626 .kr(1)
7627 .sr(1)
7628 .m(6)
7629 .n(n)
7630 .k(k)
7631 .cn_stride(11)
7632 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7633 }
7634 }
7635 }
7636
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,n_gt_8_subtile)7637 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_gt_8_subtile) {
7638 TEST_REQUIRES_ARM_NEON_FMA;
7639 for (uint32_t n = 9; n < 16; n++) {
7640 for (size_t k = 1; k <= 20; k += 5) {
7641 for (uint32_t m = 1; m <= 6; m++) {
7642 GemmMicrokernelTester()
7643 .mr(6)
7644 .nr(8)
7645 .kr(1)
7646 .sr(1)
7647 .m(m)
7648 .n(n)
7649 .k(k)
7650 .iterations(1)
7651 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7652 }
7653 }
7654 }
7655 }
7656
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,n_div_8)7657 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_div_8) {
7658 TEST_REQUIRES_ARM_NEON_FMA;
7659 for (uint32_t n = 16; n <= 24; n += 8) {
7660 for (size_t k = 1; k <= 20; k += 5) {
7661 GemmMicrokernelTester()
7662 .mr(6)
7663 .nr(8)
7664 .kr(1)
7665 .sr(1)
7666 .m(6)
7667 .n(n)
7668 .k(k)
7669 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7670 }
7671 }
7672 }
7673
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,n_div_8_strided_cn)7674 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_div_8_strided_cn) {
7675 TEST_REQUIRES_ARM_NEON_FMA;
7676 for (uint32_t n = 16; n <= 24; n += 8) {
7677 for (size_t k = 1; k <= 20; k += 5) {
7678 GemmMicrokernelTester()
7679 .mr(6)
7680 .nr(8)
7681 .kr(1)
7682 .sr(1)
7683 .m(6)
7684 .n(n)
7685 .k(k)
7686 .cn_stride(11)
7687 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7688 }
7689 }
7690 }
7691
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,n_div_8_subtile)7692 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_div_8_subtile) {
7693 TEST_REQUIRES_ARM_NEON_FMA;
7694 for (uint32_t n = 16; n <= 24; n += 8) {
7695 for (size_t k = 1; k <= 20; k += 5) {
7696 for (uint32_t m = 1; m <= 6; m++) {
7697 GemmMicrokernelTester()
7698 .mr(6)
7699 .nr(8)
7700 .kr(1)
7701 .sr(1)
7702 .m(m)
7703 .n(n)
7704 .k(k)
7705 .iterations(1)
7706 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7707 }
7708 }
7709 }
7710 }
7711
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,small_kernel)7712 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, small_kernel) {
7713 TEST_REQUIRES_ARM_NEON_FMA;
7714 for (size_t k = 1; k <= 20; k += 5) {
7715 GemmMicrokernelTester()
7716 .mr(6)
7717 .nr(8)
7718 .kr(1)
7719 .sr(1)
7720 .m(6)
7721 .n(8)
7722 .k(k)
7723 .ks(3)
7724 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7725 }
7726 }
7727
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,small_kernel_subtile)7728 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, small_kernel_subtile) {
7729 TEST_REQUIRES_ARM_NEON_FMA;
7730 for (size_t k = 1; k <= 20; k += 5) {
7731 for (uint32_t n = 1; n <= 8; n++) {
7732 for (uint32_t m = 1; m <= 6; m++) {
7733 GemmMicrokernelTester()
7734 .mr(6)
7735 .nr(8)
7736 .kr(1)
7737 .sr(1)
7738 .m(m)
7739 .n(n)
7740 .k(k)
7741 .ks(3)
7742 .iterations(1)
7743 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7744 }
7745 }
7746 }
7747 }
7748
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,n_gt_8_small_kernel)7749 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_gt_8_small_kernel) {
7750 TEST_REQUIRES_ARM_NEON_FMA;
7751 for (uint32_t n = 9; n < 16; n++) {
7752 for (size_t k = 1; k <= 20; k += 5) {
7753 GemmMicrokernelTester()
7754 .mr(6)
7755 .nr(8)
7756 .kr(1)
7757 .sr(1)
7758 .m(6)
7759 .n(n)
7760 .k(k)
7761 .ks(3)
7762 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7763 }
7764 }
7765 }
7766
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,n_div_8_small_kernel)7767 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_div_8_small_kernel) {
7768 TEST_REQUIRES_ARM_NEON_FMA;
7769 for (uint32_t n = 16; n <= 24; n += 8) {
7770 for (size_t k = 1; k <= 20; k += 5) {
7771 GemmMicrokernelTester()
7772 .mr(6)
7773 .nr(8)
7774 .kr(1)
7775 .sr(1)
7776 .m(6)
7777 .n(n)
7778 .k(k)
7779 .ks(3)
7780 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7781 }
7782 }
7783 }
7784
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,strided_cm_subtile)7785 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, strided_cm_subtile) {
7786 TEST_REQUIRES_ARM_NEON_FMA;
7787 for (size_t k = 1; k <= 20; k += 5) {
7788 for (uint32_t n = 1; n <= 8; n++) {
7789 for (uint32_t m = 1; m <= 6; m++) {
7790 GemmMicrokernelTester()
7791 .mr(6)
7792 .nr(8)
7793 .kr(1)
7794 .sr(1)
7795 .m(m)
7796 .n(n)
7797 .k(k)
7798 .cm_stride(11)
7799 .iterations(1)
7800 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7801 }
7802 }
7803 }
7804 }
7805
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,a_offset)7806 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, a_offset) {
7807 TEST_REQUIRES_ARM_NEON_FMA;
7808 for (size_t k = 1; k <= 20; k += 5) {
7809 GemmMicrokernelTester()
7810 .mr(6)
7811 .nr(8)
7812 .kr(1)
7813 .sr(1)
7814 .m(6)
7815 .n(8)
7816 .k(k)
7817 .ks(3)
7818 .a_offset(127)
7819 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7820 }
7821 }
7822
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,zero)7823 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, zero) {
7824 TEST_REQUIRES_ARM_NEON_FMA;
7825 for (size_t k = 1; k <= 20; k += 5) {
7826 for (uint32_t mz = 0; mz < 6; mz++) {
7827 GemmMicrokernelTester()
7828 .mr(6)
7829 .nr(8)
7830 .kr(1)
7831 .sr(1)
7832 .m(6)
7833 .n(8)
7834 .k(k)
7835 .ks(3)
7836 .a_offset(127)
7837 .zero_index(mz)
7838 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7839 }
7840 }
7841 }
7842
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,qmin)7843 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, qmin) {
7844 TEST_REQUIRES_ARM_NEON_FMA;
7845 GemmMicrokernelTester()
7846 .mr(6)
7847 .nr(8)
7848 .kr(1)
7849 .sr(1)
7850 .m(6)
7851 .n(8)
7852 .k(4)
7853 .qmin(128)
7854 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7855 }
7856
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,qmax)7857 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, qmax) {
7858 TEST_REQUIRES_ARM_NEON_FMA;
7859 GemmMicrokernelTester()
7860 .mr(6)
7861 .nr(8)
7862 .kr(1)
7863 .sr(1)
7864 .m(6)
7865 .n(8)
7866 .k(4)
7867 .qmax(128)
7868 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7869 }
7870
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128,strided_cm)7871 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, strided_cm) {
7872 TEST_REQUIRES_ARM_NEON_FMA;
7873 GemmMicrokernelTester()
7874 .mr(6)
7875 .nr(8)
7876 .kr(1)
7877 .sr(1)
7878 .m(6)
7879 .n(8)
7880 .k(4)
7881 .cm_stride(11)
7882 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
7883 }
7884 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
7885
7886
7887 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8)7888 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
7889 TEST_REQUIRES_ARM_NEON_FMA;
7890 GemmMicrokernelTester()
7891 .mr(6)
7892 .nr(8)
7893 .kr(1)
7894 .sr(1)
7895 .m(6)
7896 .n(8)
7897 .k(8)
7898 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
7899 }
7900
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cn)7901 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
7902 TEST_REQUIRES_ARM_NEON_FMA;
7903 GemmMicrokernelTester()
7904 .mr(6)
7905 .nr(8)
7906 .kr(1)
7907 .sr(1)
7908 .m(6)
7909 .n(8)
7910 .k(8)
7911 .cn_stride(11)
7912 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
7913 }
7914
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile)7915 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
7916 TEST_REQUIRES_ARM_NEON_FMA;
7917 for (uint32_t n = 1; n <= 8; n++) {
7918 for (uint32_t m = 1; m <= 6; m++) {
7919 GemmMicrokernelTester()
7920 .mr(6)
7921 .nr(8)
7922 .kr(1)
7923 .sr(1)
7924 .m(m)
7925 .n(n)
7926 .k(8)
7927 .iterations(1)
7928 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
7929 }
7930 }
7931 }
7932
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_m)7933 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
7934 TEST_REQUIRES_ARM_NEON_FMA;
7935 for (uint32_t m = 1; m <= 6; m++) {
7936 GemmMicrokernelTester()
7937 .mr(6)
7938 .nr(8)
7939 .kr(1)
7940 .sr(1)
7941 .m(m)
7942 .n(8)
7943 .k(8)
7944 .iterations(1)
7945 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
7946 }
7947 }
7948
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_8_subtile_n)7949 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
7950 TEST_REQUIRES_ARM_NEON_FMA;
7951 for (uint32_t n = 1; n <= 8; n++) {
7952 GemmMicrokernelTester()
7953 .mr(6)
7954 .nr(8)
7955 .kr(1)
7956 .sr(1)
7957 .m(6)
7958 .n(n)
7959 .k(8)
7960 .iterations(1)
7961 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
7962 }
7963 }
7964
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16)7965 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
7966 TEST_REQUIRES_ARM_NEON_FMA;
7967 GemmMicrokernelTester()
7968 .mr(6)
7969 .nr(8)
7970 .kr(1)
7971 .sr(1)
7972 .m(6)
7973 .n(8)
7974 .k(16)
7975 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
7976 }
7977
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_eq_16_subtile)7978 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
7979 TEST_REQUIRES_ARM_NEON_FMA;
7980 for (uint32_t n = 1; n <= 8; n++) {
7981 for (uint32_t m = 1; m <= 6; m++) {
7982 GemmMicrokernelTester()
7983 .mr(6)
7984 .nr(8)
7985 .kr(1)
7986 .sr(1)
7987 .m(m)
7988 .n(n)
7989 .k(16)
7990 .iterations(1)
7991 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
7992 }
7993 }
7994 }
7995
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16)7996 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
7997 TEST_REQUIRES_ARM_NEON_FMA;
7998 for (size_t k = 1; k < 16; k++) {
7999 GemmMicrokernelTester()
8000 .mr(6)
8001 .nr(8)
8002 .kr(1)
8003 .sr(1)
8004 .m(6)
8005 .n(8)
8006 .k(k)
8007 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
8008 }
8009 }
8010
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_lt_16_subtile)8011 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
8012 TEST_REQUIRES_ARM_NEON_FMA;
8013 for (size_t k = 1; k < 16; k++) {
8014 for (uint32_t n = 1; n <= 8; n++) {
8015 for (uint32_t m = 1; m <= 6; m++) {
8016 GemmMicrokernelTester()
8017 .mr(6)
8018 .nr(8)
8019 .kr(1)
8020 .sr(1)
8021 .m(m)
8022 .n(n)
8023 .k(k)
8024 .iterations(1)
8025 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
8026 }
8027 }
8028 }
8029 }
8030
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16)8031 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
8032 TEST_REQUIRES_ARM_NEON_FMA;
8033 for (size_t k = 17; k < 32; k++) {
8034 GemmMicrokernelTester()
8035 .mr(6)
8036 .nr(8)
8037 .kr(1)
8038 .sr(1)
8039 .m(6)
8040 .n(8)
8041 .k(k)
8042 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
8043 }
8044 }
8045
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_gt_16_subtile)8046 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
8047 TEST_REQUIRES_ARM_NEON_FMA;
8048 for (size_t k = 17; k < 32; k++) {
8049 for (uint32_t n = 1; n <= 8; n++) {
8050 for (uint32_t m = 1; m <= 6; m++) {
8051 GemmMicrokernelTester()
8052 .mr(6)
8053 .nr(8)
8054 .kr(1)
8055 .sr(1)
8056 .m(m)
8057 .n(n)
8058 .k(k)
8059 .iterations(1)
8060 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
8061 }
8062 }
8063 }
8064 }
8065
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8)8066 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
8067 TEST_REQUIRES_ARM_NEON_FMA;
8068 for (size_t k = 24; k <= 80; k += 8) {
8069 GemmMicrokernelTester()
8070 .mr(6)
8071 .nr(8)
8072 .kr(1)
8073 .sr(1)
8074 .m(6)
8075 .n(8)
8076 .k(k)
8077 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
8078 }
8079 }
8080
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,k_div_8_subtile)8081 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
8082 TEST_REQUIRES_ARM_NEON_FMA;
8083 for (size_t k = 24; k <= 80; k += 8) {
8084 for (uint32_t n = 1; n <= 8; n++) {
8085 for (uint32_t m = 1; m <= 6; m++) {
8086 GemmMicrokernelTester()
8087 .mr(6)
8088 .nr(8)
8089 .kr(1)
8090 .sr(1)
8091 .m(m)
8092 .n(n)
8093 .k(k)
8094 .iterations(1)
8095 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
8096 }
8097 }
8098 }
8099 }
8100
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8)8101 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8) {
8102 TEST_REQUIRES_ARM_NEON_FMA;
8103 for (uint32_t n = 9; n < 16; n++) {
8104 for (size_t k = 1; k <= 40; k += 9) {
8105 GemmMicrokernelTester()
8106 .mr(6)
8107 .nr(8)
8108 .kr(1)
8109 .sr(1)
8110 .m(6)
8111 .n(n)
8112 .k(k)
8113 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
8114 }
8115 }
8116 }
8117
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_strided_cn)8118 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
8119 TEST_REQUIRES_ARM_NEON_FMA;
8120 for (uint32_t n = 9; n < 16; n++) {
8121 for (size_t k = 1; k <= 40; k += 9) {
8122 GemmMicrokernelTester()
8123 .mr(6)
8124 .nr(8)
8125 .kr(1)
8126 .sr(1)
8127 .m(6)
8128 .n(n)
8129 .k(k)
8130 .cn_stride(11)
8131 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
8132 }
8133 }
8134 }
8135
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_subtile)8136 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_subtile) {
8137 TEST_REQUIRES_ARM_NEON_FMA;
8138 for (uint32_t n = 9; n < 16; n++) {
8139 for (size_t k = 1; k <= 40; k += 9) {
8140 for (uint32_t m = 1; m <= 6; m++) {
8141 GemmMicrokernelTester()
8142 .mr(6)
8143 .nr(8)
8144 .kr(1)
8145 .sr(1)
8146 .m(m)
8147 .n(n)
8148 .k(k)
8149 .iterations(1)
8150 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
8151 }
8152 }
8153 }
8154 }
8155
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8)8156 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8) {
8157 TEST_REQUIRES_ARM_NEON_FMA;
8158 for (uint32_t n = 16; n <= 24; n += 8) {
8159 for (size_t k = 1; k <= 40; k += 9) {
8160 GemmMicrokernelTester()
8161 .mr(6)
8162 .nr(8)
8163 .kr(1)
8164 .sr(1)
8165 .m(6)
8166 .n(n)
8167 .k(k)
8168 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
8169 }
8170 }
8171 }
8172
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_strided_cn)8173 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_cn) {
8174 TEST_REQUIRES_ARM_NEON_FMA;
8175 for (uint32_t n = 16; n <= 24; n += 8) {
8176 for (size_t k = 1; k <= 40; k += 9) {
8177 GemmMicrokernelTester()
8178 .mr(6)
8179 .nr(8)
8180 .kr(1)
8181 .sr(1)
8182 .m(6)
8183 .n(n)
8184 .k(k)
8185 .cn_stride(11)
8186 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
8187 }
8188 }
8189 }
8190
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_subtile)8191 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_subtile) {
8192 TEST_REQUIRES_ARM_NEON_FMA;
8193 for (uint32_t n = 16; n <= 24; n += 8) {
8194 for (size_t k = 1; k <= 40; k += 9) {
8195 for (uint32_t m = 1; m <= 6; m++) {
8196 GemmMicrokernelTester()
8197 .mr(6)
8198 .nr(8)
8199 .kr(1)
8200 .sr(1)
8201 .m(m)
8202 .n(n)
8203 .k(k)
8204 .iterations(1)
8205 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
8206 }
8207 }
8208 }
8209 }
8210
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,small_kernel)8211 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel) {
8212 TEST_REQUIRES_ARM_NEON_FMA;
8213 for (size_t k = 1; k <= 40; k += 9) {
8214 GemmMicrokernelTester()
8215 .mr(6)
8216 .nr(8)
8217 .kr(1)
8218 .sr(1)
8219 .m(6)
8220 .n(8)
8221 .k(k)
8222 .ks(3)
8223 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
8224 }
8225 }
8226
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,small_kernel_subtile)8227 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel_subtile) {
8228 TEST_REQUIRES_ARM_NEON_FMA;
8229 for (size_t k = 1; k <= 40; k += 9) {
8230 for (uint32_t n = 1; n <= 8; n++) {
8231 for (uint32_t m = 1; m <= 6; m++) {
8232 GemmMicrokernelTester()
8233 .mr(6)
8234 .nr(8)
8235 .kr(1)
8236 .sr(1)
8237 .m(m)
8238 .n(n)
8239 .k(k)
8240 .ks(3)
8241 .iterations(1)
8242 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
8243 }
8244 }
8245 }
8246 }
8247
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_gt_8_small_kernel)8248 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_small_kernel) {
8249 TEST_REQUIRES_ARM_NEON_FMA;
8250 for (uint32_t n = 9; n < 16; n++) {
8251 for (size_t k = 1; k <= 40; k += 9) {
8252 GemmMicrokernelTester()
8253 .mr(6)
8254 .nr(8)
8255 .kr(1)
8256 .sr(1)
8257 .m(6)
8258 .n(n)
8259 .k(k)
8260 .ks(3)
8261 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
8262 }
8263 }
8264 }
8265
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,n_div_8_small_kernel)8266 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_small_kernel) {
8267 TEST_REQUIRES_ARM_NEON_FMA;
8268 for (uint32_t n = 16; n <= 24; n += 8) {
8269 for (size_t k = 1; k <= 40; k += 9) {
8270 GemmMicrokernelTester()
8271 .mr(6)
8272 .nr(8)
8273 .kr(1)
8274 .sr(1)
8275 .m(6)
8276 .n(n)
8277 .k(k)
8278 .ks(3)
8279 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
8280 }
8281 }
8282 }
8283
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cm_subtile)8284 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
8285 TEST_REQUIRES_ARM_NEON_FMA;
8286 for (size_t k = 1; k <= 40; k += 9) {
8287 for (uint32_t n = 1; n <= 8; n++) {
8288 for (uint32_t m = 1; m <= 6; m++) {
8289 GemmMicrokernelTester()
8290 .mr(6)
8291 .nr(8)
8292 .kr(1)
8293 .sr(1)
8294 .m(m)
8295 .n(n)
8296 .k(k)
8297 .cm_stride(11)
8298 .iterations(1)
8299 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
8300 }
8301 }
8302 }
8303 }
8304
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,a_offset)8305 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, a_offset) {
8306 TEST_REQUIRES_ARM_NEON_FMA;
8307 for (size_t k = 1; k <= 40; k += 9) {
8308 GemmMicrokernelTester()
8309 .mr(6)
8310 .nr(8)
8311 .kr(1)
8312 .sr(1)
8313 .m(6)
8314 .n(8)
8315 .k(k)
8316 .ks(3)
8317 .a_offset(251)
8318 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
8319 }
8320 }
8321
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,zero)8322 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, zero) {
8323 TEST_REQUIRES_ARM_NEON_FMA;
8324 for (size_t k = 1; k <= 40; k += 9) {
8325 for (uint32_t mz = 0; mz < 6; mz++) {
8326 GemmMicrokernelTester()
8327 .mr(6)
8328 .nr(8)
8329 .kr(1)
8330 .sr(1)
8331 .m(6)
8332 .n(8)
8333 .k(k)
8334 .ks(3)
8335 .a_offset(251)
8336 .zero_index(mz)
8337 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
8338 }
8339 }
8340 }
8341
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,qmin)8342 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
8343 TEST_REQUIRES_ARM_NEON_FMA;
8344 GemmMicrokernelTester()
8345 .mr(6)
8346 .nr(8)
8347 .kr(1)
8348 .sr(1)
8349 .m(6)
8350 .n(8)
8351 .k(8)
8352 .qmin(128)
8353 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
8354 }
8355
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,qmax)8356 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
8357 TEST_REQUIRES_ARM_NEON_FMA;
8358 GemmMicrokernelTester()
8359 .mr(6)
8360 .nr(8)
8361 .kr(1)
8362 .sr(1)
8363 .m(6)
8364 .n(8)
8365 .k(8)
8366 .qmax(128)
8367 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
8368 }
8369
TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75,strided_cm)8370 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
8371 TEST_REQUIRES_ARM_NEON_FMA;
8372 GemmMicrokernelTester()
8373 .mr(6)
8374 .nr(8)
8375 .kr(1)
8376 .sr(1)
8377 .m(6)
8378 .n(8)
8379 .k(8)
8380 .cm_stride(11)
8381 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
8382 }
8383 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
8384
8385
8386 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,k_eq_2)8387 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, k_eq_2) {
8388 TEST_REQUIRES_ARM_NEON;
8389 GemmMicrokernelTester()
8390 .mr(1)
8391 .nr(8)
8392 .kr(1)
8393 .sr(1)
8394 .m(1)
8395 .n(8)
8396 .k(2)
8397 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8398 }
8399
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,strided_cn)8400 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, strided_cn) {
8401 TEST_REQUIRES_ARM_NEON;
8402 GemmMicrokernelTester()
8403 .mr(1)
8404 .nr(8)
8405 .kr(1)
8406 .sr(1)
8407 .m(1)
8408 .n(8)
8409 .k(2)
8410 .cn_stride(11)
8411 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8412 }
8413
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,k_eq_2_subtile)8414 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, k_eq_2_subtile) {
8415 TEST_REQUIRES_ARM_NEON;
8416 for (uint32_t n = 1; n <= 8; n++) {
8417 for (uint32_t m = 1; m <= 1; m++) {
8418 GemmMicrokernelTester()
8419 .mr(1)
8420 .nr(8)
8421 .kr(1)
8422 .sr(1)
8423 .m(m)
8424 .n(n)
8425 .k(2)
8426 .iterations(1)
8427 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8428 }
8429 }
8430 }
8431
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,k_eq_2_subtile_m)8432 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, k_eq_2_subtile_m) {
8433 TEST_REQUIRES_ARM_NEON;
8434 for (uint32_t m = 1; m <= 1; m++) {
8435 GemmMicrokernelTester()
8436 .mr(1)
8437 .nr(8)
8438 .kr(1)
8439 .sr(1)
8440 .m(m)
8441 .n(8)
8442 .k(2)
8443 .iterations(1)
8444 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8445 }
8446 }
8447
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,k_eq_2_subtile_n)8448 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, k_eq_2_subtile_n) {
8449 TEST_REQUIRES_ARM_NEON;
8450 for (uint32_t n = 1; n <= 8; n++) {
8451 GemmMicrokernelTester()
8452 .mr(1)
8453 .nr(8)
8454 .kr(1)
8455 .sr(1)
8456 .m(1)
8457 .n(n)
8458 .k(2)
8459 .iterations(1)
8460 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8461 }
8462 }
8463
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,k_lt_2)8464 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, k_lt_2) {
8465 TEST_REQUIRES_ARM_NEON;
8466 for (size_t k = 1; k < 2; k++) {
8467 GemmMicrokernelTester()
8468 .mr(1)
8469 .nr(8)
8470 .kr(1)
8471 .sr(1)
8472 .m(1)
8473 .n(8)
8474 .k(k)
8475 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8476 }
8477 }
8478
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,k_lt_2_subtile)8479 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, k_lt_2_subtile) {
8480 TEST_REQUIRES_ARM_NEON;
8481 for (size_t k = 1; k < 2; k++) {
8482 for (uint32_t n = 1; n <= 8; n++) {
8483 for (uint32_t m = 1; m <= 1; m++) {
8484 GemmMicrokernelTester()
8485 .mr(1)
8486 .nr(8)
8487 .kr(1)
8488 .sr(1)
8489 .m(m)
8490 .n(n)
8491 .k(k)
8492 .iterations(1)
8493 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8494 }
8495 }
8496 }
8497 }
8498
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,k_gt_2)8499 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, k_gt_2) {
8500 TEST_REQUIRES_ARM_NEON;
8501 for (size_t k = 3; k < 4; k++) {
8502 GemmMicrokernelTester()
8503 .mr(1)
8504 .nr(8)
8505 .kr(1)
8506 .sr(1)
8507 .m(1)
8508 .n(8)
8509 .k(k)
8510 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8511 }
8512 }
8513
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,k_gt_2_subtile)8514 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, k_gt_2_subtile) {
8515 TEST_REQUIRES_ARM_NEON;
8516 for (size_t k = 3; k < 4; k++) {
8517 for (uint32_t n = 1; n <= 8; n++) {
8518 for (uint32_t m = 1; m <= 1; m++) {
8519 GemmMicrokernelTester()
8520 .mr(1)
8521 .nr(8)
8522 .kr(1)
8523 .sr(1)
8524 .m(m)
8525 .n(n)
8526 .k(k)
8527 .iterations(1)
8528 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8529 }
8530 }
8531 }
8532 }
8533
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,k_div_2)8534 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, k_div_2) {
8535 TEST_REQUIRES_ARM_NEON;
8536 for (size_t k = 4; k <= 20; k += 2) {
8537 GemmMicrokernelTester()
8538 .mr(1)
8539 .nr(8)
8540 .kr(1)
8541 .sr(1)
8542 .m(1)
8543 .n(8)
8544 .k(k)
8545 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8546 }
8547 }
8548
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,k_div_2_subtile)8549 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, k_div_2_subtile) {
8550 TEST_REQUIRES_ARM_NEON;
8551 for (size_t k = 4; k <= 20; k += 2) {
8552 for (uint32_t n = 1; n <= 8; n++) {
8553 for (uint32_t m = 1; m <= 1; m++) {
8554 GemmMicrokernelTester()
8555 .mr(1)
8556 .nr(8)
8557 .kr(1)
8558 .sr(1)
8559 .m(m)
8560 .n(n)
8561 .k(k)
8562 .iterations(1)
8563 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8564 }
8565 }
8566 }
8567 }
8568
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,n_gt_8)8569 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, n_gt_8) {
8570 TEST_REQUIRES_ARM_NEON;
8571 for (uint32_t n = 9; n < 16; n++) {
8572 for (size_t k = 1; k <= 10; k += 3) {
8573 GemmMicrokernelTester()
8574 .mr(1)
8575 .nr(8)
8576 .kr(1)
8577 .sr(1)
8578 .m(1)
8579 .n(n)
8580 .k(k)
8581 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8582 }
8583 }
8584 }
8585
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,n_gt_8_strided_cn)8586 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, n_gt_8_strided_cn) {
8587 TEST_REQUIRES_ARM_NEON;
8588 for (uint32_t n = 9; n < 16; n++) {
8589 for (size_t k = 1; k <= 10; k += 3) {
8590 GemmMicrokernelTester()
8591 .mr(1)
8592 .nr(8)
8593 .kr(1)
8594 .sr(1)
8595 .m(1)
8596 .n(n)
8597 .k(k)
8598 .cn_stride(11)
8599 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8600 }
8601 }
8602 }
8603
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,n_gt_8_subtile)8604 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, n_gt_8_subtile) {
8605 TEST_REQUIRES_ARM_NEON;
8606 for (uint32_t n = 9; n < 16; n++) {
8607 for (size_t k = 1; k <= 10; k += 3) {
8608 for (uint32_t m = 1; m <= 1; m++) {
8609 GemmMicrokernelTester()
8610 .mr(1)
8611 .nr(8)
8612 .kr(1)
8613 .sr(1)
8614 .m(m)
8615 .n(n)
8616 .k(k)
8617 .iterations(1)
8618 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8619 }
8620 }
8621 }
8622 }
8623
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,n_div_8)8624 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, n_div_8) {
8625 TEST_REQUIRES_ARM_NEON;
8626 for (uint32_t n = 16; n <= 24; n += 8) {
8627 for (size_t k = 1; k <= 10; k += 3) {
8628 GemmMicrokernelTester()
8629 .mr(1)
8630 .nr(8)
8631 .kr(1)
8632 .sr(1)
8633 .m(1)
8634 .n(n)
8635 .k(k)
8636 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8637 }
8638 }
8639 }
8640
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,n_div_8_strided_cn)8641 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, n_div_8_strided_cn) {
8642 TEST_REQUIRES_ARM_NEON;
8643 for (uint32_t n = 16; n <= 24; n += 8) {
8644 for (size_t k = 1; k <= 10; k += 3) {
8645 GemmMicrokernelTester()
8646 .mr(1)
8647 .nr(8)
8648 .kr(1)
8649 .sr(1)
8650 .m(1)
8651 .n(n)
8652 .k(k)
8653 .cn_stride(11)
8654 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8655 }
8656 }
8657 }
8658
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,n_div_8_subtile)8659 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, n_div_8_subtile) {
8660 TEST_REQUIRES_ARM_NEON;
8661 for (uint32_t n = 16; n <= 24; n += 8) {
8662 for (size_t k = 1; k <= 10; k += 3) {
8663 for (uint32_t m = 1; m <= 1; m++) {
8664 GemmMicrokernelTester()
8665 .mr(1)
8666 .nr(8)
8667 .kr(1)
8668 .sr(1)
8669 .m(m)
8670 .n(n)
8671 .k(k)
8672 .iterations(1)
8673 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8674 }
8675 }
8676 }
8677 }
8678
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,small_kernel)8679 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, small_kernel) {
8680 TEST_REQUIRES_ARM_NEON;
8681 for (size_t k = 1; k <= 10; k += 3) {
8682 GemmMicrokernelTester()
8683 .mr(1)
8684 .nr(8)
8685 .kr(1)
8686 .sr(1)
8687 .m(1)
8688 .n(8)
8689 .k(k)
8690 .ks(3)
8691 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8692 }
8693 }
8694
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,small_kernel_subtile)8695 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, small_kernel_subtile) {
8696 TEST_REQUIRES_ARM_NEON;
8697 for (size_t k = 1; k <= 10; k += 3) {
8698 for (uint32_t n = 1; n <= 8; n++) {
8699 for (uint32_t m = 1; m <= 1; m++) {
8700 GemmMicrokernelTester()
8701 .mr(1)
8702 .nr(8)
8703 .kr(1)
8704 .sr(1)
8705 .m(m)
8706 .n(n)
8707 .k(k)
8708 .ks(3)
8709 .iterations(1)
8710 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8711 }
8712 }
8713 }
8714 }
8715
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,n_gt_8_small_kernel)8716 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, n_gt_8_small_kernel) {
8717 TEST_REQUIRES_ARM_NEON;
8718 for (uint32_t n = 9; n < 16; n++) {
8719 for (size_t k = 1; k <= 10; k += 3) {
8720 GemmMicrokernelTester()
8721 .mr(1)
8722 .nr(8)
8723 .kr(1)
8724 .sr(1)
8725 .m(1)
8726 .n(n)
8727 .k(k)
8728 .ks(3)
8729 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8730 }
8731 }
8732 }
8733
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,n_div_8_small_kernel)8734 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, n_div_8_small_kernel) {
8735 TEST_REQUIRES_ARM_NEON;
8736 for (uint32_t n = 16; n <= 24; n += 8) {
8737 for (size_t k = 1; k <= 10; k += 3) {
8738 GemmMicrokernelTester()
8739 .mr(1)
8740 .nr(8)
8741 .kr(1)
8742 .sr(1)
8743 .m(1)
8744 .n(n)
8745 .k(k)
8746 .ks(3)
8747 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8748 }
8749 }
8750 }
8751
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,strided_cm_subtile)8752 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, strided_cm_subtile) {
8753 TEST_REQUIRES_ARM_NEON;
8754 for (size_t k = 1; k <= 10; k += 3) {
8755 for (uint32_t n = 1; n <= 8; n++) {
8756 for (uint32_t m = 1; m <= 1; m++) {
8757 GemmMicrokernelTester()
8758 .mr(1)
8759 .nr(8)
8760 .kr(1)
8761 .sr(1)
8762 .m(m)
8763 .n(n)
8764 .k(k)
8765 .cm_stride(11)
8766 .iterations(1)
8767 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8768 }
8769 }
8770 }
8771 }
8772
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,a_offset)8773 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, a_offset) {
8774 TEST_REQUIRES_ARM_NEON;
8775 for (size_t k = 1; k <= 10; k += 3) {
8776 GemmMicrokernelTester()
8777 .mr(1)
8778 .nr(8)
8779 .kr(1)
8780 .sr(1)
8781 .m(1)
8782 .n(8)
8783 .k(k)
8784 .ks(3)
8785 .a_offset(13)
8786 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8787 }
8788 }
8789
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,zero)8790 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, zero) {
8791 TEST_REQUIRES_ARM_NEON;
8792 for (size_t k = 1; k <= 10; k += 3) {
8793 for (uint32_t mz = 0; mz < 1; mz++) {
8794 GemmMicrokernelTester()
8795 .mr(1)
8796 .nr(8)
8797 .kr(1)
8798 .sr(1)
8799 .m(1)
8800 .n(8)
8801 .k(k)
8802 .ks(3)
8803 .a_offset(13)
8804 .zero_index(mz)
8805 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8806 }
8807 }
8808 }
8809
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,qmin)8810 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, qmin) {
8811 TEST_REQUIRES_ARM_NEON;
8812 GemmMicrokernelTester()
8813 .mr(1)
8814 .nr(8)
8815 .kr(1)
8816 .sr(1)
8817 .m(1)
8818 .n(8)
8819 .k(2)
8820 .qmin(128)
8821 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8822 }
8823
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,qmax)8824 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, qmax) {
8825 TEST_REQUIRES_ARM_NEON;
8826 GemmMicrokernelTester()
8827 .mr(1)
8828 .nr(8)
8829 .kr(1)
8830 .sr(1)
8831 .m(1)
8832 .n(8)
8833 .k(2)
8834 .qmax(128)
8835 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8836 }
8837
TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64,strided_cm)8838 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, strided_cm) {
8839 TEST_REQUIRES_ARM_NEON;
8840 GemmMicrokernelTester()
8841 .mr(1)
8842 .nr(8)
8843 .kr(1)
8844 .sr(1)
8845 .m(1)
8846 .n(8)
8847 .k(2)
8848 .cm_stride(11)
8849 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
8850 }
8851 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8852
8853
8854 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,k_eq_2)8855 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, k_eq_2) {
8856 TEST_REQUIRES_ARM_NEON_FMA;
8857 GemmMicrokernelTester()
8858 .mr(1)
8859 .nr(8)
8860 .kr(1)
8861 .sr(1)
8862 .m(1)
8863 .n(8)
8864 .k(2)
8865 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8866 }
8867
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,strided_cn)8868 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, strided_cn) {
8869 TEST_REQUIRES_ARM_NEON_FMA;
8870 GemmMicrokernelTester()
8871 .mr(1)
8872 .nr(8)
8873 .kr(1)
8874 .sr(1)
8875 .m(1)
8876 .n(8)
8877 .k(2)
8878 .cn_stride(11)
8879 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8880 }
8881
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,k_eq_2_subtile)8882 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, k_eq_2_subtile) {
8883 TEST_REQUIRES_ARM_NEON_FMA;
8884 for (uint32_t n = 1; n <= 8; n++) {
8885 for (uint32_t m = 1; m <= 1; m++) {
8886 GemmMicrokernelTester()
8887 .mr(1)
8888 .nr(8)
8889 .kr(1)
8890 .sr(1)
8891 .m(m)
8892 .n(n)
8893 .k(2)
8894 .iterations(1)
8895 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8896 }
8897 }
8898 }
8899
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,k_eq_2_subtile_m)8900 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, k_eq_2_subtile_m) {
8901 TEST_REQUIRES_ARM_NEON_FMA;
8902 for (uint32_t m = 1; m <= 1; m++) {
8903 GemmMicrokernelTester()
8904 .mr(1)
8905 .nr(8)
8906 .kr(1)
8907 .sr(1)
8908 .m(m)
8909 .n(8)
8910 .k(2)
8911 .iterations(1)
8912 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8913 }
8914 }
8915
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,k_eq_2_subtile_n)8916 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, k_eq_2_subtile_n) {
8917 TEST_REQUIRES_ARM_NEON_FMA;
8918 for (uint32_t n = 1; n <= 8; n++) {
8919 GemmMicrokernelTester()
8920 .mr(1)
8921 .nr(8)
8922 .kr(1)
8923 .sr(1)
8924 .m(1)
8925 .n(n)
8926 .k(2)
8927 .iterations(1)
8928 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8929 }
8930 }
8931
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,k_lt_2)8932 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, k_lt_2) {
8933 TEST_REQUIRES_ARM_NEON_FMA;
8934 for (size_t k = 1; k < 2; k++) {
8935 GemmMicrokernelTester()
8936 .mr(1)
8937 .nr(8)
8938 .kr(1)
8939 .sr(1)
8940 .m(1)
8941 .n(8)
8942 .k(k)
8943 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8944 }
8945 }
8946
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,k_lt_2_subtile)8947 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, k_lt_2_subtile) {
8948 TEST_REQUIRES_ARM_NEON_FMA;
8949 for (size_t k = 1; k < 2; k++) {
8950 for (uint32_t n = 1; n <= 8; n++) {
8951 for (uint32_t m = 1; m <= 1; m++) {
8952 GemmMicrokernelTester()
8953 .mr(1)
8954 .nr(8)
8955 .kr(1)
8956 .sr(1)
8957 .m(m)
8958 .n(n)
8959 .k(k)
8960 .iterations(1)
8961 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8962 }
8963 }
8964 }
8965 }
8966
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,k_gt_2)8967 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, k_gt_2) {
8968 TEST_REQUIRES_ARM_NEON_FMA;
8969 for (size_t k = 3; k < 4; k++) {
8970 GemmMicrokernelTester()
8971 .mr(1)
8972 .nr(8)
8973 .kr(1)
8974 .sr(1)
8975 .m(1)
8976 .n(8)
8977 .k(k)
8978 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8979 }
8980 }
8981
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,k_gt_2_subtile)8982 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, k_gt_2_subtile) {
8983 TEST_REQUIRES_ARM_NEON_FMA;
8984 for (size_t k = 3; k < 4; k++) {
8985 for (uint32_t n = 1; n <= 8; n++) {
8986 for (uint32_t m = 1; m <= 1; m++) {
8987 GemmMicrokernelTester()
8988 .mr(1)
8989 .nr(8)
8990 .kr(1)
8991 .sr(1)
8992 .m(m)
8993 .n(n)
8994 .k(k)
8995 .iterations(1)
8996 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
8997 }
8998 }
8999 }
9000 }
9001
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,k_div_2)9002 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, k_div_2) {
9003 TEST_REQUIRES_ARM_NEON_FMA;
9004 for (size_t k = 4; k <= 20; k += 2) {
9005 GemmMicrokernelTester()
9006 .mr(1)
9007 .nr(8)
9008 .kr(1)
9009 .sr(1)
9010 .m(1)
9011 .n(8)
9012 .k(k)
9013 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9014 }
9015 }
9016
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,k_div_2_subtile)9017 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, k_div_2_subtile) {
9018 TEST_REQUIRES_ARM_NEON_FMA;
9019 for (size_t k = 4; k <= 20; k += 2) {
9020 for (uint32_t n = 1; n <= 8; n++) {
9021 for (uint32_t m = 1; m <= 1; m++) {
9022 GemmMicrokernelTester()
9023 .mr(1)
9024 .nr(8)
9025 .kr(1)
9026 .sr(1)
9027 .m(m)
9028 .n(n)
9029 .k(k)
9030 .iterations(1)
9031 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9032 }
9033 }
9034 }
9035 }
9036
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,n_gt_8)9037 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, n_gt_8) {
9038 TEST_REQUIRES_ARM_NEON_FMA;
9039 for (uint32_t n = 9; n < 16; n++) {
9040 for (size_t k = 1; k <= 10; k += 3) {
9041 GemmMicrokernelTester()
9042 .mr(1)
9043 .nr(8)
9044 .kr(1)
9045 .sr(1)
9046 .m(1)
9047 .n(n)
9048 .k(k)
9049 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9050 }
9051 }
9052 }
9053
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,n_gt_8_strided_cn)9054 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, n_gt_8_strided_cn) {
9055 TEST_REQUIRES_ARM_NEON_FMA;
9056 for (uint32_t n = 9; n < 16; n++) {
9057 for (size_t k = 1; k <= 10; k += 3) {
9058 GemmMicrokernelTester()
9059 .mr(1)
9060 .nr(8)
9061 .kr(1)
9062 .sr(1)
9063 .m(1)
9064 .n(n)
9065 .k(k)
9066 .cn_stride(11)
9067 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9068 }
9069 }
9070 }
9071
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,n_gt_8_subtile)9072 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, n_gt_8_subtile) {
9073 TEST_REQUIRES_ARM_NEON_FMA;
9074 for (uint32_t n = 9; n < 16; n++) {
9075 for (size_t k = 1; k <= 10; k += 3) {
9076 for (uint32_t m = 1; m <= 1; m++) {
9077 GemmMicrokernelTester()
9078 .mr(1)
9079 .nr(8)
9080 .kr(1)
9081 .sr(1)
9082 .m(m)
9083 .n(n)
9084 .k(k)
9085 .iterations(1)
9086 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9087 }
9088 }
9089 }
9090 }
9091
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,n_div_8)9092 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, n_div_8) {
9093 TEST_REQUIRES_ARM_NEON_FMA;
9094 for (uint32_t n = 16; n <= 24; n += 8) {
9095 for (size_t k = 1; k <= 10; k += 3) {
9096 GemmMicrokernelTester()
9097 .mr(1)
9098 .nr(8)
9099 .kr(1)
9100 .sr(1)
9101 .m(1)
9102 .n(n)
9103 .k(k)
9104 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9105 }
9106 }
9107 }
9108
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,n_div_8_strided_cn)9109 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, n_div_8_strided_cn) {
9110 TEST_REQUIRES_ARM_NEON_FMA;
9111 for (uint32_t n = 16; n <= 24; n += 8) {
9112 for (size_t k = 1; k <= 10; k += 3) {
9113 GemmMicrokernelTester()
9114 .mr(1)
9115 .nr(8)
9116 .kr(1)
9117 .sr(1)
9118 .m(1)
9119 .n(n)
9120 .k(k)
9121 .cn_stride(11)
9122 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9123 }
9124 }
9125 }
9126
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,n_div_8_subtile)9127 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, n_div_8_subtile) {
9128 TEST_REQUIRES_ARM_NEON_FMA;
9129 for (uint32_t n = 16; n <= 24; n += 8) {
9130 for (size_t k = 1; k <= 10; k += 3) {
9131 for (uint32_t m = 1; m <= 1; m++) {
9132 GemmMicrokernelTester()
9133 .mr(1)
9134 .nr(8)
9135 .kr(1)
9136 .sr(1)
9137 .m(m)
9138 .n(n)
9139 .k(k)
9140 .iterations(1)
9141 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9142 }
9143 }
9144 }
9145 }
9146
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,small_kernel)9147 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, small_kernel) {
9148 TEST_REQUIRES_ARM_NEON_FMA;
9149 for (size_t k = 1; k <= 10; k += 3) {
9150 GemmMicrokernelTester()
9151 .mr(1)
9152 .nr(8)
9153 .kr(1)
9154 .sr(1)
9155 .m(1)
9156 .n(8)
9157 .k(k)
9158 .ks(3)
9159 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9160 }
9161 }
9162
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,small_kernel_subtile)9163 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, small_kernel_subtile) {
9164 TEST_REQUIRES_ARM_NEON_FMA;
9165 for (size_t k = 1; k <= 10; k += 3) {
9166 for (uint32_t n = 1; n <= 8; n++) {
9167 for (uint32_t m = 1; m <= 1; m++) {
9168 GemmMicrokernelTester()
9169 .mr(1)
9170 .nr(8)
9171 .kr(1)
9172 .sr(1)
9173 .m(m)
9174 .n(n)
9175 .k(k)
9176 .ks(3)
9177 .iterations(1)
9178 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9179 }
9180 }
9181 }
9182 }
9183
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,n_gt_8_small_kernel)9184 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, n_gt_8_small_kernel) {
9185 TEST_REQUIRES_ARM_NEON_FMA;
9186 for (uint32_t n = 9; n < 16; n++) {
9187 for (size_t k = 1; k <= 10; k += 3) {
9188 GemmMicrokernelTester()
9189 .mr(1)
9190 .nr(8)
9191 .kr(1)
9192 .sr(1)
9193 .m(1)
9194 .n(n)
9195 .k(k)
9196 .ks(3)
9197 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9198 }
9199 }
9200 }
9201
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,n_div_8_small_kernel)9202 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, n_div_8_small_kernel) {
9203 TEST_REQUIRES_ARM_NEON_FMA;
9204 for (uint32_t n = 16; n <= 24; n += 8) {
9205 for (size_t k = 1; k <= 10; k += 3) {
9206 GemmMicrokernelTester()
9207 .mr(1)
9208 .nr(8)
9209 .kr(1)
9210 .sr(1)
9211 .m(1)
9212 .n(n)
9213 .k(k)
9214 .ks(3)
9215 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9216 }
9217 }
9218 }
9219
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,strided_cm_subtile)9220 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, strided_cm_subtile) {
9221 TEST_REQUIRES_ARM_NEON_FMA;
9222 for (size_t k = 1; k <= 10; k += 3) {
9223 for (uint32_t n = 1; n <= 8; n++) {
9224 for (uint32_t m = 1; m <= 1; m++) {
9225 GemmMicrokernelTester()
9226 .mr(1)
9227 .nr(8)
9228 .kr(1)
9229 .sr(1)
9230 .m(m)
9231 .n(n)
9232 .k(k)
9233 .cm_stride(11)
9234 .iterations(1)
9235 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9236 }
9237 }
9238 }
9239 }
9240
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,a_offset)9241 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, a_offset) {
9242 TEST_REQUIRES_ARM_NEON_FMA;
9243 for (size_t k = 1; k <= 10; k += 3) {
9244 GemmMicrokernelTester()
9245 .mr(1)
9246 .nr(8)
9247 .kr(1)
9248 .sr(1)
9249 .m(1)
9250 .n(8)
9251 .k(k)
9252 .ks(3)
9253 .a_offset(13)
9254 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9255 }
9256 }
9257
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,zero)9258 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, zero) {
9259 TEST_REQUIRES_ARM_NEON_FMA;
9260 for (size_t k = 1; k <= 10; k += 3) {
9261 for (uint32_t mz = 0; mz < 1; mz++) {
9262 GemmMicrokernelTester()
9263 .mr(1)
9264 .nr(8)
9265 .kr(1)
9266 .sr(1)
9267 .m(1)
9268 .n(8)
9269 .k(k)
9270 .ks(3)
9271 .a_offset(13)
9272 .zero_index(mz)
9273 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9274 }
9275 }
9276 }
9277
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,qmin)9278 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, qmin) {
9279 TEST_REQUIRES_ARM_NEON_FMA;
9280 GemmMicrokernelTester()
9281 .mr(1)
9282 .nr(8)
9283 .kr(1)
9284 .sr(1)
9285 .m(1)
9286 .n(8)
9287 .k(2)
9288 .qmin(128)
9289 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9290 }
9291
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,qmax)9292 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, qmax) {
9293 TEST_REQUIRES_ARM_NEON_FMA;
9294 GemmMicrokernelTester()
9295 .mr(1)
9296 .nr(8)
9297 .kr(1)
9298 .sr(1)
9299 .m(1)
9300 .n(8)
9301 .k(2)
9302 .qmax(128)
9303 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9304 }
9305
TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64,strided_cm)9306 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, strided_cm) {
9307 TEST_REQUIRES_ARM_NEON_FMA;
9308 GemmMicrokernelTester()
9309 .mr(1)
9310 .nr(8)
9311 .kr(1)
9312 .sr(1)
9313 .m(1)
9314 .n(8)
9315 .k(2)
9316 .cm_stride(11)
9317 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
9318 }
9319 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9320
9321
9322 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,k_eq_4)9323 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, k_eq_4) {
9324 TEST_REQUIRES_ARM_NEON_FMA;
9325 GemmMicrokernelTester()
9326 .mr(1)
9327 .nr(8)
9328 .kr(1)
9329 .sr(4)
9330 .m(1)
9331 .n(8)
9332 .k(4)
9333 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9334 }
9335
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,strided_cn)9336 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, strided_cn) {
9337 TEST_REQUIRES_ARM_NEON_FMA;
9338 GemmMicrokernelTester()
9339 .mr(1)
9340 .nr(8)
9341 .kr(1)
9342 .sr(4)
9343 .m(1)
9344 .n(8)
9345 .k(4)
9346 .cn_stride(11)
9347 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9348 }
9349
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,k_eq_4_subtile)9350 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, k_eq_4_subtile) {
9351 TEST_REQUIRES_ARM_NEON_FMA;
9352 for (uint32_t n = 1; n <= 8; n++) {
9353 for (uint32_t m = 1; m <= 1; m++) {
9354 GemmMicrokernelTester()
9355 .mr(1)
9356 .nr(8)
9357 .kr(1)
9358 .sr(4)
9359 .m(m)
9360 .n(n)
9361 .k(4)
9362 .iterations(1)
9363 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9364 }
9365 }
9366 }
9367
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,k_eq_4_subtile_m)9368 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, k_eq_4_subtile_m) {
9369 TEST_REQUIRES_ARM_NEON_FMA;
9370 for (uint32_t m = 1; m <= 1; m++) {
9371 GemmMicrokernelTester()
9372 .mr(1)
9373 .nr(8)
9374 .kr(1)
9375 .sr(4)
9376 .m(m)
9377 .n(8)
9378 .k(4)
9379 .iterations(1)
9380 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9381 }
9382 }
9383
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,k_eq_4_subtile_n)9384 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, k_eq_4_subtile_n) {
9385 TEST_REQUIRES_ARM_NEON_FMA;
9386 for (uint32_t n = 1; n <= 8; n++) {
9387 GemmMicrokernelTester()
9388 .mr(1)
9389 .nr(8)
9390 .kr(1)
9391 .sr(4)
9392 .m(1)
9393 .n(n)
9394 .k(4)
9395 .iterations(1)
9396 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9397 }
9398 }
9399
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,k_lt_4)9400 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, k_lt_4) {
9401 TEST_REQUIRES_ARM_NEON_FMA;
9402 for (size_t k = 1; k < 4; k++) {
9403 GemmMicrokernelTester()
9404 .mr(1)
9405 .nr(8)
9406 .kr(1)
9407 .sr(4)
9408 .m(1)
9409 .n(8)
9410 .k(k)
9411 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9412 }
9413 }
9414
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,k_lt_4_subtile)9415 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, k_lt_4_subtile) {
9416 TEST_REQUIRES_ARM_NEON_FMA;
9417 for (size_t k = 1; k < 4; k++) {
9418 for (uint32_t n = 1; n <= 8; n++) {
9419 for (uint32_t m = 1; m <= 1; m++) {
9420 GemmMicrokernelTester()
9421 .mr(1)
9422 .nr(8)
9423 .kr(1)
9424 .sr(4)
9425 .m(m)
9426 .n(n)
9427 .k(k)
9428 .iterations(1)
9429 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9430 }
9431 }
9432 }
9433 }
9434
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,k_gt_4)9435 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, k_gt_4) {
9436 TEST_REQUIRES_ARM_NEON_FMA;
9437 for (size_t k = 5; k < 8; k++) {
9438 GemmMicrokernelTester()
9439 .mr(1)
9440 .nr(8)
9441 .kr(1)
9442 .sr(4)
9443 .m(1)
9444 .n(8)
9445 .k(k)
9446 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9447 }
9448 }
9449
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,k_gt_4_subtile)9450 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, k_gt_4_subtile) {
9451 TEST_REQUIRES_ARM_NEON_FMA;
9452 for (size_t k = 5; k < 8; k++) {
9453 for (uint32_t n = 1; n <= 8; n++) {
9454 for (uint32_t m = 1; m <= 1; m++) {
9455 GemmMicrokernelTester()
9456 .mr(1)
9457 .nr(8)
9458 .kr(1)
9459 .sr(4)
9460 .m(m)
9461 .n(n)
9462 .k(k)
9463 .iterations(1)
9464 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9465 }
9466 }
9467 }
9468 }
9469
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,k_div_4)9470 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, k_div_4) {
9471 TEST_REQUIRES_ARM_NEON_FMA;
9472 for (size_t k = 8; k <= 40; k += 4) {
9473 GemmMicrokernelTester()
9474 .mr(1)
9475 .nr(8)
9476 .kr(1)
9477 .sr(4)
9478 .m(1)
9479 .n(8)
9480 .k(k)
9481 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9482 }
9483 }
9484
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,k_div_4_subtile)9485 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, k_div_4_subtile) {
9486 TEST_REQUIRES_ARM_NEON_FMA;
9487 for (size_t k = 8; k <= 40; k += 4) {
9488 for (uint32_t n = 1; n <= 8; n++) {
9489 for (uint32_t m = 1; m <= 1; m++) {
9490 GemmMicrokernelTester()
9491 .mr(1)
9492 .nr(8)
9493 .kr(1)
9494 .sr(4)
9495 .m(m)
9496 .n(n)
9497 .k(k)
9498 .iterations(1)
9499 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9500 }
9501 }
9502 }
9503 }
9504
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,n_gt_8)9505 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, n_gt_8) {
9506 TEST_REQUIRES_ARM_NEON_FMA;
9507 for (uint32_t n = 9; n < 16; n++) {
9508 for (size_t k = 1; k <= 20; k += 5) {
9509 GemmMicrokernelTester()
9510 .mr(1)
9511 .nr(8)
9512 .kr(1)
9513 .sr(4)
9514 .m(1)
9515 .n(n)
9516 .k(k)
9517 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9518 }
9519 }
9520 }
9521
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,n_gt_8_strided_cn)9522 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, n_gt_8_strided_cn) {
9523 TEST_REQUIRES_ARM_NEON_FMA;
9524 for (uint32_t n = 9; n < 16; n++) {
9525 for (size_t k = 1; k <= 20; k += 5) {
9526 GemmMicrokernelTester()
9527 .mr(1)
9528 .nr(8)
9529 .kr(1)
9530 .sr(4)
9531 .m(1)
9532 .n(n)
9533 .k(k)
9534 .cn_stride(11)
9535 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9536 }
9537 }
9538 }
9539
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,n_gt_8_subtile)9540 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, n_gt_8_subtile) {
9541 TEST_REQUIRES_ARM_NEON_FMA;
9542 for (uint32_t n = 9; n < 16; n++) {
9543 for (size_t k = 1; k <= 20; k += 5) {
9544 for (uint32_t m = 1; m <= 1; m++) {
9545 GemmMicrokernelTester()
9546 .mr(1)
9547 .nr(8)
9548 .kr(1)
9549 .sr(4)
9550 .m(m)
9551 .n(n)
9552 .k(k)
9553 .iterations(1)
9554 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9555 }
9556 }
9557 }
9558 }
9559
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,n_div_8)9560 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, n_div_8) {
9561 TEST_REQUIRES_ARM_NEON_FMA;
9562 for (uint32_t n = 16; n <= 24; n += 8) {
9563 for (size_t k = 1; k <= 20; k += 5) {
9564 GemmMicrokernelTester()
9565 .mr(1)
9566 .nr(8)
9567 .kr(1)
9568 .sr(4)
9569 .m(1)
9570 .n(n)
9571 .k(k)
9572 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9573 }
9574 }
9575 }
9576
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,n_div_8_strided_cn)9577 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, n_div_8_strided_cn) {
9578 TEST_REQUIRES_ARM_NEON_FMA;
9579 for (uint32_t n = 16; n <= 24; n += 8) {
9580 for (size_t k = 1; k <= 20; k += 5) {
9581 GemmMicrokernelTester()
9582 .mr(1)
9583 .nr(8)
9584 .kr(1)
9585 .sr(4)
9586 .m(1)
9587 .n(n)
9588 .k(k)
9589 .cn_stride(11)
9590 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9591 }
9592 }
9593 }
9594
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,n_div_8_subtile)9595 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, n_div_8_subtile) {
9596 TEST_REQUIRES_ARM_NEON_FMA;
9597 for (uint32_t n = 16; n <= 24; n += 8) {
9598 for (size_t k = 1; k <= 20; k += 5) {
9599 for (uint32_t m = 1; m <= 1; m++) {
9600 GemmMicrokernelTester()
9601 .mr(1)
9602 .nr(8)
9603 .kr(1)
9604 .sr(4)
9605 .m(m)
9606 .n(n)
9607 .k(k)
9608 .iterations(1)
9609 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9610 }
9611 }
9612 }
9613 }
9614
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,small_kernel)9615 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, small_kernel) {
9616 TEST_REQUIRES_ARM_NEON_FMA;
9617 for (size_t k = 1; k <= 20; k += 5) {
9618 GemmMicrokernelTester()
9619 .mr(1)
9620 .nr(8)
9621 .kr(1)
9622 .sr(4)
9623 .m(1)
9624 .n(8)
9625 .k(k)
9626 .ks(3)
9627 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9628 }
9629 }
9630
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,small_kernel_subtile)9631 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, small_kernel_subtile) {
9632 TEST_REQUIRES_ARM_NEON_FMA;
9633 for (size_t k = 1; k <= 20; k += 5) {
9634 for (uint32_t n = 1; n <= 8; n++) {
9635 for (uint32_t m = 1; m <= 1; m++) {
9636 GemmMicrokernelTester()
9637 .mr(1)
9638 .nr(8)
9639 .kr(1)
9640 .sr(4)
9641 .m(m)
9642 .n(n)
9643 .k(k)
9644 .ks(3)
9645 .iterations(1)
9646 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9647 }
9648 }
9649 }
9650 }
9651
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,n_gt_8_small_kernel)9652 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, n_gt_8_small_kernel) {
9653 TEST_REQUIRES_ARM_NEON_FMA;
9654 for (uint32_t n = 9; n < 16; n++) {
9655 for (size_t k = 1; k <= 20; k += 5) {
9656 GemmMicrokernelTester()
9657 .mr(1)
9658 .nr(8)
9659 .kr(1)
9660 .sr(4)
9661 .m(1)
9662 .n(n)
9663 .k(k)
9664 .ks(3)
9665 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9666 }
9667 }
9668 }
9669
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,n_div_8_small_kernel)9670 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, n_div_8_small_kernel) {
9671 TEST_REQUIRES_ARM_NEON_FMA;
9672 for (uint32_t n = 16; n <= 24; n += 8) {
9673 for (size_t k = 1; k <= 20; k += 5) {
9674 GemmMicrokernelTester()
9675 .mr(1)
9676 .nr(8)
9677 .kr(1)
9678 .sr(4)
9679 .m(1)
9680 .n(n)
9681 .k(k)
9682 .ks(3)
9683 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9684 }
9685 }
9686 }
9687
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,strided_cm_subtile)9688 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, strided_cm_subtile) {
9689 TEST_REQUIRES_ARM_NEON_FMA;
9690 for (size_t k = 1; k <= 20; k += 5) {
9691 for (uint32_t n = 1; n <= 8; n++) {
9692 for (uint32_t m = 1; m <= 1; m++) {
9693 GemmMicrokernelTester()
9694 .mr(1)
9695 .nr(8)
9696 .kr(1)
9697 .sr(4)
9698 .m(m)
9699 .n(n)
9700 .k(k)
9701 .cm_stride(11)
9702 .iterations(1)
9703 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9704 }
9705 }
9706 }
9707 }
9708
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,a_offset)9709 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, a_offset) {
9710 TEST_REQUIRES_ARM_NEON_FMA;
9711 for (size_t k = 1; k <= 20; k += 5) {
9712 GemmMicrokernelTester()
9713 .mr(1)
9714 .nr(8)
9715 .kr(1)
9716 .sr(4)
9717 .m(1)
9718 .n(8)
9719 .k(k)
9720 .ks(3)
9721 .a_offset(23)
9722 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9723 }
9724 }
9725
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,zero)9726 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, zero) {
9727 TEST_REQUIRES_ARM_NEON_FMA;
9728 for (size_t k = 1; k <= 20; k += 5) {
9729 for (uint32_t mz = 0; mz < 1; mz++) {
9730 GemmMicrokernelTester()
9731 .mr(1)
9732 .nr(8)
9733 .kr(1)
9734 .sr(4)
9735 .m(1)
9736 .n(8)
9737 .k(k)
9738 .ks(3)
9739 .a_offset(23)
9740 .zero_index(mz)
9741 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9742 }
9743 }
9744 }
9745
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,qmin)9746 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, qmin) {
9747 TEST_REQUIRES_ARM_NEON_FMA;
9748 GemmMicrokernelTester()
9749 .mr(1)
9750 .nr(8)
9751 .kr(1)
9752 .sr(4)
9753 .m(1)
9754 .n(8)
9755 .k(4)
9756 .qmin(128)
9757 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9758 }
9759
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,qmax)9760 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, qmax) {
9761 TEST_REQUIRES_ARM_NEON_FMA;
9762 GemmMicrokernelTester()
9763 .mr(1)
9764 .nr(8)
9765 .kr(1)
9766 .sr(4)
9767 .m(1)
9768 .n(8)
9769 .k(4)
9770 .qmax(128)
9771 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9772 }
9773
TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA,strided_cm)9774 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, strided_cm) {
9775 TEST_REQUIRES_ARM_NEON_FMA;
9776 GemmMicrokernelTester()
9777 .mr(1)
9778 .nr(8)
9779 .kr(1)
9780 .sr(4)
9781 .m(1)
9782 .n(8)
9783 .k(4)
9784 .cm_stride(11)
9785 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
9786 }
9787 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9788
9789
9790 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,k_eq_2)9791 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, k_eq_2) {
9792 TEST_REQUIRES_ARM_NEON;
9793 GemmMicrokernelTester()
9794 .mr(4)
9795 .nr(2)
9796 .kr(1)
9797 .sr(1)
9798 .m(4)
9799 .n(2)
9800 .k(2)
9801 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
9802 }
9803
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,strided_cn)9804 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, strided_cn) {
9805 TEST_REQUIRES_ARM_NEON;
9806 GemmMicrokernelTester()
9807 .mr(4)
9808 .nr(2)
9809 .kr(1)
9810 .sr(1)
9811 .m(4)
9812 .n(2)
9813 .k(2)
9814 .cn_stride(5)
9815 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
9816 }
9817
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,k_eq_2_subtile)9818 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, k_eq_2_subtile) {
9819 TEST_REQUIRES_ARM_NEON;
9820 for (uint32_t n = 1; n <= 2; n++) {
9821 for (uint32_t m = 1; m <= 4; m++) {
9822 GemmMicrokernelTester()
9823 .mr(4)
9824 .nr(2)
9825 .kr(1)
9826 .sr(1)
9827 .m(m)
9828 .n(n)
9829 .k(2)
9830 .iterations(1)
9831 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
9832 }
9833 }
9834 }
9835
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,k_eq_2_subtile_m)9836 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, k_eq_2_subtile_m) {
9837 TEST_REQUIRES_ARM_NEON;
9838 for (uint32_t m = 1; m <= 4; m++) {
9839 GemmMicrokernelTester()
9840 .mr(4)
9841 .nr(2)
9842 .kr(1)
9843 .sr(1)
9844 .m(m)
9845 .n(2)
9846 .k(2)
9847 .iterations(1)
9848 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
9849 }
9850 }
9851
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,k_eq_2_subtile_n)9852 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, k_eq_2_subtile_n) {
9853 TEST_REQUIRES_ARM_NEON;
9854 for (uint32_t n = 1; n <= 2; n++) {
9855 GemmMicrokernelTester()
9856 .mr(4)
9857 .nr(2)
9858 .kr(1)
9859 .sr(1)
9860 .m(4)
9861 .n(n)
9862 .k(2)
9863 .iterations(1)
9864 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
9865 }
9866 }
9867
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,k_lt_2)9868 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, k_lt_2) {
9869 TEST_REQUIRES_ARM_NEON;
9870 for (size_t k = 1; k < 2; k++) {
9871 GemmMicrokernelTester()
9872 .mr(4)
9873 .nr(2)
9874 .kr(1)
9875 .sr(1)
9876 .m(4)
9877 .n(2)
9878 .k(k)
9879 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
9880 }
9881 }
9882
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,k_lt_2_subtile)9883 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, k_lt_2_subtile) {
9884 TEST_REQUIRES_ARM_NEON;
9885 for (size_t k = 1; k < 2; k++) {
9886 for (uint32_t n = 1; n <= 2; n++) {
9887 for (uint32_t m = 1; m <= 4; m++) {
9888 GemmMicrokernelTester()
9889 .mr(4)
9890 .nr(2)
9891 .kr(1)
9892 .sr(1)
9893 .m(m)
9894 .n(n)
9895 .k(k)
9896 .iterations(1)
9897 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
9898 }
9899 }
9900 }
9901 }
9902
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,k_gt_2)9903 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, k_gt_2) {
9904 TEST_REQUIRES_ARM_NEON;
9905 for (size_t k = 3; k < 4; k++) {
9906 GemmMicrokernelTester()
9907 .mr(4)
9908 .nr(2)
9909 .kr(1)
9910 .sr(1)
9911 .m(4)
9912 .n(2)
9913 .k(k)
9914 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
9915 }
9916 }
9917
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,k_gt_2_subtile)9918 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, k_gt_2_subtile) {
9919 TEST_REQUIRES_ARM_NEON;
9920 for (size_t k = 3; k < 4; k++) {
9921 for (uint32_t n = 1; n <= 2; n++) {
9922 for (uint32_t m = 1; m <= 4; m++) {
9923 GemmMicrokernelTester()
9924 .mr(4)
9925 .nr(2)
9926 .kr(1)
9927 .sr(1)
9928 .m(m)
9929 .n(n)
9930 .k(k)
9931 .iterations(1)
9932 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
9933 }
9934 }
9935 }
9936 }
9937
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,k_div_2)9938 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, k_div_2) {
9939 TEST_REQUIRES_ARM_NEON;
9940 for (size_t k = 4; k <= 20; k += 2) {
9941 GemmMicrokernelTester()
9942 .mr(4)
9943 .nr(2)
9944 .kr(1)
9945 .sr(1)
9946 .m(4)
9947 .n(2)
9948 .k(k)
9949 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
9950 }
9951 }
9952
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,k_div_2_subtile)9953 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, k_div_2_subtile) {
9954 TEST_REQUIRES_ARM_NEON;
9955 for (size_t k = 4; k <= 20; k += 2) {
9956 for (uint32_t n = 1; n <= 2; n++) {
9957 for (uint32_t m = 1; m <= 4; m++) {
9958 GemmMicrokernelTester()
9959 .mr(4)
9960 .nr(2)
9961 .kr(1)
9962 .sr(1)
9963 .m(m)
9964 .n(n)
9965 .k(k)
9966 .iterations(1)
9967 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
9968 }
9969 }
9970 }
9971 }
9972
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,n_gt_2)9973 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, n_gt_2) {
9974 TEST_REQUIRES_ARM_NEON;
9975 for (uint32_t n = 3; n < 4; n++) {
9976 for (size_t k = 1; k <= 10; k += 3) {
9977 GemmMicrokernelTester()
9978 .mr(4)
9979 .nr(2)
9980 .kr(1)
9981 .sr(1)
9982 .m(4)
9983 .n(n)
9984 .k(k)
9985 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
9986 }
9987 }
9988 }
9989
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,n_gt_2_strided_cn)9990 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, n_gt_2_strided_cn) {
9991 TEST_REQUIRES_ARM_NEON;
9992 for (uint32_t n = 3; n < 4; n++) {
9993 for (size_t k = 1; k <= 10; k += 3) {
9994 GemmMicrokernelTester()
9995 .mr(4)
9996 .nr(2)
9997 .kr(1)
9998 .sr(1)
9999 .m(4)
10000 .n(n)
10001 .k(k)
10002 .cn_stride(5)
10003 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10004 }
10005 }
10006 }
10007
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,n_gt_2_subtile)10008 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, n_gt_2_subtile) {
10009 TEST_REQUIRES_ARM_NEON;
10010 for (uint32_t n = 3; n < 4; n++) {
10011 for (size_t k = 1; k <= 10; k += 3) {
10012 for (uint32_t m = 1; m <= 4; m++) {
10013 GemmMicrokernelTester()
10014 .mr(4)
10015 .nr(2)
10016 .kr(1)
10017 .sr(1)
10018 .m(m)
10019 .n(n)
10020 .k(k)
10021 .iterations(1)
10022 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10023 }
10024 }
10025 }
10026 }
10027
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,n_div_2)10028 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, n_div_2) {
10029 TEST_REQUIRES_ARM_NEON;
10030 for (uint32_t n = 4; n <= 6; n += 2) {
10031 for (size_t k = 1; k <= 10; k += 3) {
10032 GemmMicrokernelTester()
10033 .mr(4)
10034 .nr(2)
10035 .kr(1)
10036 .sr(1)
10037 .m(4)
10038 .n(n)
10039 .k(k)
10040 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10041 }
10042 }
10043 }
10044
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,n_div_2_strided_cn)10045 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, n_div_2_strided_cn) {
10046 TEST_REQUIRES_ARM_NEON;
10047 for (uint32_t n = 4; n <= 6; n += 2) {
10048 for (size_t k = 1; k <= 10; k += 3) {
10049 GemmMicrokernelTester()
10050 .mr(4)
10051 .nr(2)
10052 .kr(1)
10053 .sr(1)
10054 .m(4)
10055 .n(n)
10056 .k(k)
10057 .cn_stride(5)
10058 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10059 }
10060 }
10061 }
10062
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,n_div_2_subtile)10063 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, n_div_2_subtile) {
10064 TEST_REQUIRES_ARM_NEON;
10065 for (uint32_t n = 4; n <= 6; n += 2) {
10066 for (size_t k = 1; k <= 10; k += 3) {
10067 for (uint32_t m = 1; m <= 4; m++) {
10068 GemmMicrokernelTester()
10069 .mr(4)
10070 .nr(2)
10071 .kr(1)
10072 .sr(1)
10073 .m(m)
10074 .n(n)
10075 .k(k)
10076 .iterations(1)
10077 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10078 }
10079 }
10080 }
10081 }
10082
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,small_kernel)10083 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, small_kernel) {
10084 TEST_REQUIRES_ARM_NEON;
10085 for (size_t k = 1; k <= 10; k += 3) {
10086 GemmMicrokernelTester()
10087 .mr(4)
10088 .nr(2)
10089 .kr(1)
10090 .sr(1)
10091 .m(4)
10092 .n(2)
10093 .k(k)
10094 .ks(3)
10095 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10096 }
10097 }
10098
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,small_kernel_subtile)10099 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, small_kernel_subtile) {
10100 TEST_REQUIRES_ARM_NEON;
10101 for (size_t k = 1; k <= 10; k += 3) {
10102 for (uint32_t n = 1; n <= 2; n++) {
10103 for (uint32_t m = 1; m <= 4; m++) {
10104 GemmMicrokernelTester()
10105 .mr(4)
10106 .nr(2)
10107 .kr(1)
10108 .sr(1)
10109 .m(m)
10110 .n(n)
10111 .k(k)
10112 .ks(3)
10113 .iterations(1)
10114 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10115 }
10116 }
10117 }
10118 }
10119
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,n_gt_2_small_kernel)10120 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, n_gt_2_small_kernel) {
10121 TEST_REQUIRES_ARM_NEON;
10122 for (uint32_t n = 3; n < 4; n++) {
10123 for (size_t k = 1; k <= 10; k += 3) {
10124 GemmMicrokernelTester()
10125 .mr(4)
10126 .nr(2)
10127 .kr(1)
10128 .sr(1)
10129 .m(4)
10130 .n(n)
10131 .k(k)
10132 .ks(3)
10133 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10134 }
10135 }
10136 }
10137
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,n_div_2_small_kernel)10138 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, n_div_2_small_kernel) {
10139 TEST_REQUIRES_ARM_NEON;
10140 for (uint32_t n = 4; n <= 6; n += 2) {
10141 for (size_t k = 1; k <= 10; k += 3) {
10142 GemmMicrokernelTester()
10143 .mr(4)
10144 .nr(2)
10145 .kr(1)
10146 .sr(1)
10147 .m(4)
10148 .n(n)
10149 .k(k)
10150 .ks(3)
10151 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10152 }
10153 }
10154 }
10155
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,strided_cm_subtile)10156 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, strided_cm_subtile) {
10157 TEST_REQUIRES_ARM_NEON;
10158 for (size_t k = 1; k <= 10; k += 3) {
10159 for (uint32_t n = 1; n <= 2; n++) {
10160 for (uint32_t m = 1; m <= 4; m++) {
10161 GemmMicrokernelTester()
10162 .mr(4)
10163 .nr(2)
10164 .kr(1)
10165 .sr(1)
10166 .m(m)
10167 .n(n)
10168 .k(k)
10169 .cm_stride(5)
10170 .iterations(1)
10171 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10172 }
10173 }
10174 }
10175 }
10176
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,a_offset)10177 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, a_offset) {
10178 TEST_REQUIRES_ARM_NEON;
10179 for (size_t k = 1; k <= 10; k += 3) {
10180 GemmMicrokernelTester()
10181 .mr(4)
10182 .nr(2)
10183 .kr(1)
10184 .sr(1)
10185 .m(4)
10186 .n(2)
10187 .k(k)
10188 .ks(3)
10189 .a_offset(43)
10190 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10191 }
10192 }
10193
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,zero)10194 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, zero) {
10195 TEST_REQUIRES_ARM_NEON;
10196 for (size_t k = 1; k <= 10; k += 3) {
10197 for (uint32_t mz = 0; mz < 4; mz++) {
10198 GemmMicrokernelTester()
10199 .mr(4)
10200 .nr(2)
10201 .kr(1)
10202 .sr(1)
10203 .m(4)
10204 .n(2)
10205 .k(k)
10206 .ks(3)
10207 .a_offset(43)
10208 .zero_index(mz)
10209 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10210 }
10211 }
10212 }
10213
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,qmin)10214 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, qmin) {
10215 TEST_REQUIRES_ARM_NEON;
10216 GemmMicrokernelTester()
10217 .mr(4)
10218 .nr(2)
10219 .kr(1)
10220 .sr(1)
10221 .m(4)
10222 .n(2)
10223 .k(2)
10224 .qmin(128)
10225 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10226 }
10227
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,qmax)10228 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, qmax) {
10229 TEST_REQUIRES_ARM_NEON;
10230 GemmMicrokernelTester()
10231 .mr(4)
10232 .nr(2)
10233 .kr(1)
10234 .sr(1)
10235 .m(4)
10236 .n(2)
10237 .k(2)
10238 .qmax(128)
10239 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10240 }
10241
TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64,strided_cm)10242 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, strided_cm) {
10243 TEST_REQUIRES_ARM_NEON;
10244 GemmMicrokernelTester()
10245 .mr(4)
10246 .nr(2)
10247 .kr(1)
10248 .sr(1)
10249 .m(4)
10250 .n(2)
10251 .k(2)
10252 .cm_stride(5)
10253 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10254 }
10255 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10256
10257
10258 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,k_eq_2)10259 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, k_eq_2) {
10260 TEST_REQUIRES_ARM_NEON;
10261 GemmMicrokernelTester()
10262 .mr(4)
10263 .nr(4)
10264 .kr(1)
10265 .sr(1)
10266 .m(4)
10267 .n(4)
10268 .k(2)
10269 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10270 }
10271
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,strided_cn)10272 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, strided_cn) {
10273 TEST_REQUIRES_ARM_NEON;
10274 GemmMicrokernelTester()
10275 .mr(4)
10276 .nr(4)
10277 .kr(1)
10278 .sr(1)
10279 .m(4)
10280 .n(4)
10281 .k(2)
10282 .cn_stride(7)
10283 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10284 }
10285
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,k_eq_2_subtile)10286 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, k_eq_2_subtile) {
10287 TEST_REQUIRES_ARM_NEON;
10288 for (uint32_t n = 1; n <= 4; n++) {
10289 for (uint32_t m = 1; m <= 4; m++) {
10290 GemmMicrokernelTester()
10291 .mr(4)
10292 .nr(4)
10293 .kr(1)
10294 .sr(1)
10295 .m(m)
10296 .n(n)
10297 .k(2)
10298 .iterations(1)
10299 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10300 }
10301 }
10302 }
10303
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,k_eq_2_subtile_m)10304 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, k_eq_2_subtile_m) {
10305 TEST_REQUIRES_ARM_NEON;
10306 for (uint32_t m = 1; m <= 4; m++) {
10307 GemmMicrokernelTester()
10308 .mr(4)
10309 .nr(4)
10310 .kr(1)
10311 .sr(1)
10312 .m(m)
10313 .n(4)
10314 .k(2)
10315 .iterations(1)
10316 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10317 }
10318 }
10319
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,k_eq_2_subtile_n)10320 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, k_eq_2_subtile_n) {
10321 TEST_REQUIRES_ARM_NEON;
10322 for (uint32_t n = 1; n <= 4; n++) {
10323 GemmMicrokernelTester()
10324 .mr(4)
10325 .nr(4)
10326 .kr(1)
10327 .sr(1)
10328 .m(4)
10329 .n(n)
10330 .k(2)
10331 .iterations(1)
10332 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10333 }
10334 }
10335
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,k_lt_2)10336 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, k_lt_2) {
10337 TEST_REQUIRES_ARM_NEON;
10338 for (size_t k = 1; k < 2; k++) {
10339 GemmMicrokernelTester()
10340 .mr(4)
10341 .nr(4)
10342 .kr(1)
10343 .sr(1)
10344 .m(4)
10345 .n(4)
10346 .k(k)
10347 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10348 }
10349 }
10350
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,k_lt_2_subtile)10351 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, k_lt_2_subtile) {
10352 TEST_REQUIRES_ARM_NEON;
10353 for (size_t k = 1; k < 2; k++) {
10354 for (uint32_t n = 1; n <= 4; n++) {
10355 for (uint32_t m = 1; m <= 4; m++) {
10356 GemmMicrokernelTester()
10357 .mr(4)
10358 .nr(4)
10359 .kr(1)
10360 .sr(1)
10361 .m(m)
10362 .n(n)
10363 .k(k)
10364 .iterations(1)
10365 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10366 }
10367 }
10368 }
10369 }
10370
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,k_gt_2)10371 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, k_gt_2) {
10372 TEST_REQUIRES_ARM_NEON;
10373 for (size_t k = 3; k < 4; k++) {
10374 GemmMicrokernelTester()
10375 .mr(4)
10376 .nr(4)
10377 .kr(1)
10378 .sr(1)
10379 .m(4)
10380 .n(4)
10381 .k(k)
10382 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10383 }
10384 }
10385
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,k_gt_2_subtile)10386 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, k_gt_2_subtile) {
10387 TEST_REQUIRES_ARM_NEON;
10388 for (size_t k = 3; k < 4; k++) {
10389 for (uint32_t n = 1; n <= 4; n++) {
10390 for (uint32_t m = 1; m <= 4; m++) {
10391 GemmMicrokernelTester()
10392 .mr(4)
10393 .nr(4)
10394 .kr(1)
10395 .sr(1)
10396 .m(m)
10397 .n(n)
10398 .k(k)
10399 .iterations(1)
10400 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10401 }
10402 }
10403 }
10404 }
10405
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,k_div_2)10406 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, k_div_2) {
10407 TEST_REQUIRES_ARM_NEON;
10408 for (size_t k = 4; k <= 20; k += 2) {
10409 GemmMicrokernelTester()
10410 .mr(4)
10411 .nr(4)
10412 .kr(1)
10413 .sr(1)
10414 .m(4)
10415 .n(4)
10416 .k(k)
10417 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10418 }
10419 }
10420
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,k_div_2_subtile)10421 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, k_div_2_subtile) {
10422 TEST_REQUIRES_ARM_NEON;
10423 for (size_t k = 4; k <= 20; k += 2) {
10424 for (uint32_t n = 1; n <= 4; n++) {
10425 for (uint32_t m = 1; m <= 4; m++) {
10426 GemmMicrokernelTester()
10427 .mr(4)
10428 .nr(4)
10429 .kr(1)
10430 .sr(1)
10431 .m(m)
10432 .n(n)
10433 .k(k)
10434 .iterations(1)
10435 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10436 }
10437 }
10438 }
10439 }
10440
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,n_gt_4)10441 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, n_gt_4) {
10442 TEST_REQUIRES_ARM_NEON;
10443 for (uint32_t n = 5; n < 8; n++) {
10444 for (size_t k = 1; k <= 10; k += 3) {
10445 GemmMicrokernelTester()
10446 .mr(4)
10447 .nr(4)
10448 .kr(1)
10449 .sr(1)
10450 .m(4)
10451 .n(n)
10452 .k(k)
10453 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10454 }
10455 }
10456 }
10457
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,n_gt_4_strided_cn)10458 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, n_gt_4_strided_cn) {
10459 TEST_REQUIRES_ARM_NEON;
10460 for (uint32_t n = 5; n < 8; n++) {
10461 for (size_t k = 1; k <= 10; k += 3) {
10462 GemmMicrokernelTester()
10463 .mr(4)
10464 .nr(4)
10465 .kr(1)
10466 .sr(1)
10467 .m(4)
10468 .n(n)
10469 .k(k)
10470 .cn_stride(7)
10471 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10472 }
10473 }
10474 }
10475
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,n_gt_4_subtile)10476 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, n_gt_4_subtile) {
10477 TEST_REQUIRES_ARM_NEON;
10478 for (uint32_t n = 5; n < 8; n++) {
10479 for (size_t k = 1; k <= 10; k += 3) {
10480 for (uint32_t m = 1; m <= 4; m++) {
10481 GemmMicrokernelTester()
10482 .mr(4)
10483 .nr(4)
10484 .kr(1)
10485 .sr(1)
10486 .m(m)
10487 .n(n)
10488 .k(k)
10489 .iterations(1)
10490 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10491 }
10492 }
10493 }
10494 }
10495
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,n_div_4)10496 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, n_div_4) {
10497 TEST_REQUIRES_ARM_NEON;
10498 for (uint32_t n = 8; n <= 12; n += 4) {
10499 for (size_t k = 1; k <= 10; k += 3) {
10500 GemmMicrokernelTester()
10501 .mr(4)
10502 .nr(4)
10503 .kr(1)
10504 .sr(1)
10505 .m(4)
10506 .n(n)
10507 .k(k)
10508 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10509 }
10510 }
10511 }
10512
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,n_div_4_strided_cn)10513 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, n_div_4_strided_cn) {
10514 TEST_REQUIRES_ARM_NEON;
10515 for (uint32_t n = 8; n <= 12; n += 4) {
10516 for (size_t k = 1; k <= 10; k += 3) {
10517 GemmMicrokernelTester()
10518 .mr(4)
10519 .nr(4)
10520 .kr(1)
10521 .sr(1)
10522 .m(4)
10523 .n(n)
10524 .k(k)
10525 .cn_stride(7)
10526 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10527 }
10528 }
10529 }
10530
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,n_div_4_subtile)10531 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, n_div_4_subtile) {
10532 TEST_REQUIRES_ARM_NEON;
10533 for (uint32_t n = 8; n <= 12; n += 4) {
10534 for (size_t k = 1; k <= 10; k += 3) {
10535 for (uint32_t m = 1; m <= 4; m++) {
10536 GemmMicrokernelTester()
10537 .mr(4)
10538 .nr(4)
10539 .kr(1)
10540 .sr(1)
10541 .m(m)
10542 .n(n)
10543 .k(k)
10544 .iterations(1)
10545 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10546 }
10547 }
10548 }
10549 }
10550
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,small_kernel)10551 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, small_kernel) {
10552 TEST_REQUIRES_ARM_NEON;
10553 for (size_t k = 1; k <= 10; k += 3) {
10554 GemmMicrokernelTester()
10555 .mr(4)
10556 .nr(4)
10557 .kr(1)
10558 .sr(1)
10559 .m(4)
10560 .n(4)
10561 .k(k)
10562 .ks(3)
10563 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10564 }
10565 }
10566
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,small_kernel_subtile)10567 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, small_kernel_subtile) {
10568 TEST_REQUIRES_ARM_NEON;
10569 for (size_t k = 1; k <= 10; k += 3) {
10570 for (uint32_t n = 1; n <= 4; n++) {
10571 for (uint32_t m = 1; m <= 4; m++) {
10572 GemmMicrokernelTester()
10573 .mr(4)
10574 .nr(4)
10575 .kr(1)
10576 .sr(1)
10577 .m(m)
10578 .n(n)
10579 .k(k)
10580 .ks(3)
10581 .iterations(1)
10582 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10583 }
10584 }
10585 }
10586 }
10587
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,n_gt_4_small_kernel)10588 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, n_gt_4_small_kernel) {
10589 TEST_REQUIRES_ARM_NEON;
10590 for (uint32_t n = 5; n < 8; n++) {
10591 for (size_t k = 1; k <= 10; k += 3) {
10592 GemmMicrokernelTester()
10593 .mr(4)
10594 .nr(4)
10595 .kr(1)
10596 .sr(1)
10597 .m(4)
10598 .n(n)
10599 .k(k)
10600 .ks(3)
10601 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10602 }
10603 }
10604 }
10605
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,n_div_4_small_kernel)10606 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, n_div_4_small_kernel) {
10607 TEST_REQUIRES_ARM_NEON;
10608 for (uint32_t n = 8; n <= 12; n += 4) {
10609 for (size_t k = 1; k <= 10; k += 3) {
10610 GemmMicrokernelTester()
10611 .mr(4)
10612 .nr(4)
10613 .kr(1)
10614 .sr(1)
10615 .m(4)
10616 .n(n)
10617 .k(k)
10618 .ks(3)
10619 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10620 }
10621 }
10622 }
10623
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,strided_cm_subtile)10624 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, strided_cm_subtile) {
10625 TEST_REQUIRES_ARM_NEON;
10626 for (size_t k = 1; k <= 10; k += 3) {
10627 for (uint32_t n = 1; n <= 4; n++) {
10628 for (uint32_t m = 1; m <= 4; m++) {
10629 GemmMicrokernelTester()
10630 .mr(4)
10631 .nr(4)
10632 .kr(1)
10633 .sr(1)
10634 .m(m)
10635 .n(n)
10636 .k(k)
10637 .cm_stride(7)
10638 .iterations(1)
10639 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10640 }
10641 }
10642 }
10643 }
10644
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,a_offset)10645 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, a_offset) {
10646 TEST_REQUIRES_ARM_NEON;
10647 for (size_t k = 1; k <= 10; k += 3) {
10648 GemmMicrokernelTester()
10649 .mr(4)
10650 .nr(4)
10651 .kr(1)
10652 .sr(1)
10653 .m(4)
10654 .n(4)
10655 .k(k)
10656 .ks(3)
10657 .a_offset(43)
10658 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10659 }
10660 }
10661
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,zero)10662 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, zero) {
10663 TEST_REQUIRES_ARM_NEON;
10664 for (size_t k = 1; k <= 10; k += 3) {
10665 for (uint32_t mz = 0; mz < 4; mz++) {
10666 GemmMicrokernelTester()
10667 .mr(4)
10668 .nr(4)
10669 .kr(1)
10670 .sr(1)
10671 .m(4)
10672 .n(4)
10673 .k(k)
10674 .ks(3)
10675 .a_offset(43)
10676 .zero_index(mz)
10677 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10678 }
10679 }
10680 }
10681
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,qmin)10682 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, qmin) {
10683 TEST_REQUIRES_ARM_NEON;
10684 GemmMicrokernelTester()
10685 .mr(4)
10686 .nr(4)
10687 .kr(1)
10688 .sr(1)
10689 .m(4)
10690 .n(4)
10691 .k(2)
10692 .qmin(128)
10693 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10694 }
10695
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,qmax)10696 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, qmax) {
10697 TEST_REQUIRES_ARM_NEON;
10698 GemmMicrokernelTester()
10699 .mr(4)
10700 .nr(4)
10701 .kr(1)
10702 .sr(1)
10703 .m(4)
10704 .n(4)
10705 .k(2)
10706 .qmax(128)
10707 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10708 }
10709
TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64,strided_cm)10710 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, strided_cm) {
10711 TEST_REQUIRES_ARM_NEON;
10712 GemmMicrokernelTester()
10713 .mr(4)
10714 .nr(4)
10715 .kr(1)
10716 .sr(1)
10717 .m(4)
10718 .n(4)
10719 .k(2)
10720 .cm_stride(7)
10721 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
10722 }
10723 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10724
10725
10726 #if XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,k_eq_2)10727 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, k_eq_2) {
10728 TEST_REQUIRES_ARM_NEON_FMA;
10729 GemmMicrokernelTester()
10730 .mr(4)
10731 .nr(4)
10732 .kr(1)
10733 .sr(1)
10734 .m(4)
10735 .n(4)
10736 .k(2)
10737 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10738 }
10739
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,strided_cn)10740 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, strided_cn) {
10741 TEST_REQUIRES_ARM_NEON_FMA;
10742 GemmMicrokernelTester()
10743 .mr(4)
10744 .nr(4)
10745 .kr(1)
10746 .sr(1)
10747 .m(4)
10748 .n(4)
10749 .k(2)
10750 .cn_stride(7)
10751 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10752 }
10753
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,k_eq_2_subtile)10754 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, k_eq_2_subtile) {
10755 TEST_REQUIRES_ARM_NEON_FMA;
10756 for (uint32_t n = 1; n <= 4; n++) {
10757 for (uint32_t m = 1; m <= 4; m++) {
10758 GemmMicrokernelTester()
10759 .mr(4)
10760 .nr(4)
10761 .kr(1)
10762 .sr(1)
10763 .m(m)
10764 .n(n)
10765 .k(2)
10766 .iterations(1)
10767 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10768 }
10769 }
10770 }
10771
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,k_eq_2_subtile_m)10772 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, k_eq_2_subtile_m) {
10773 TEST_REQUIRES_ARM_NEON_FMA;
10774 for (uint32_t m = 1; m <= 4; m++) {
10775 GemmMicrokernelTester()
10776 .mr(4)
10777 .nr(4)
10778 .kr(1)
10779 .sr(1)
10780 .m(m)
10781 .n(4)
10782 .k(2)
10783 .iterations(1)
10784 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10785 }
10786 }
10787
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,k_eq_2_subtile_n)10788 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, k_eq_2_subtile_n) {
10789 TEST_REQUIRES_ARM_NEON_FMA;
10790 for (uint32_t n = 1; n <= 4; n++) {
10791 GemmMicrokernelTester()
10792 .mr(4)
10793 .nr(4)
10794 .kr(1)
10795 .sr(1)
10796 .m(4)
10797 .n(n)
10798 .k(2)
10799 .iterations(1)
10800 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10801 }
10802 }
10803
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,k_lt_2)10804 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, k_lt_2) {
10805 TEST_REQUIRES_ARM_NEON_FMA;
10806 for (size_t k = 1; k < 2; k++) {
10807 GemmMicrokernelTester()
10808 .mr(4)
10809 .nr(4)
10810 .kr(1)
10811 .sr(1)
10812 .m(4)
10813 .n(4)
10814 .k(k)
10815 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10816 }
10817 }
10818
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,k_lt_2_subtile)10819 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, k_lt_2_subtile) {
10820 TEST_REQUIRES_ARM_NEON_FMA;
10821 for (size_t k = 1; k < 2; k++) {
10822 for (uint32_t n = 1; n <= 4; n++) {
10823 for (uint32_t m = 1; m <= 4; m++) {
10824 GemmMicrokernelTester()
10825 .mr(4)
10826 .nr(4)
10827 .kr(1)
10828 .sr(1)
10829 .m(m)
10830 .n(n)
10831 .k(k)
10832 .iterations(1)
10833 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10834 }
10835 }
10836 }
10837 }
10838
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,k_gt_2)10839 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, k_gt_2) {
10840 TEST_REQUIRES_ARM_NEON_FMA;
10841 for (size_t k = 3; k < 4; k++) {
10842 GemmMicrokernelTester()
10843 .mr(4)
10844 .nr(4)
10845 .kr(1)
10846 .sr(1)
10847 .m(4)
10848 .n(4)
10849 .k(k)
10850 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10851 }
10852 }
10853
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,k_gt_2_subtile)10854 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, k_gt_2_subtile) {
10855 TEST_REQUIRES_ARM_NEON_FMA;
10856 for (size_t k = 3; k < 4; k++) {
10857 for (uint32_t n = 1; n <= 4; n++) {
10858 for (uint32_t m = 1; m <= 4; m++) {
10859 GemmMicrokernelTester()
10860 .mr(4)
10861 .nr(4)
10862 .kr(1)
10863 .sr(1)
10864 .m(m)
10865 .n(n)
10866 .k(k)
10867 .iterations(1)
10868 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10869 }
10870 }
10871 }
10872 }
10873
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,k_div_2)10874 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, k_div_2) {
10875 TEST_REQUIRES_ARM_NEON_FMA;
10876 for (size_t k = 4; k <= 20; k += 2) {
10877 GemmMicrokernelTester()
10878 .mr(4)
10879 .nr(4)
10880 .kr(1)
10881 .sr(1)
10882 .m(4)
10883 .n(4)
10884 .k(k)
10885 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10886 }
10887 }
10888
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,k_div_2_subtile)10889 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, k_div_2_subtile) {
10890 TEST_REQUIRES_ARM_NEON_FMA;
10891 for (size_t k = 4; k <= 20; k += 2) {
10892 for (uint32_t n = 1; n <= 4; n++) {
10893 for (uint32_t m = 1; m <= 4; m++) {
10894 GemmMicrokernelTester()
10895 .mr(4)
10896 .nr(4)
10897 .kr(1)
10898 .sr(1)
10899 .m(m)
10900 .n(n)
10901 .k(k)
10902 .iterations(1)
10903 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10904 }
10905 }
10906 }
10907 }
10908
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,n_gt_4)10909 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, n_gt_4) {
10910 TEST_REQUIRES_ARM_NEON_FMA;
10911 for (uint32_t n = 5; n < 8; n++) {
10912 for (size_t k = 1; k <= 10; k += 3) {
10913 GemmMicrokernelTester()
10914 .mr(4)
10915 .nr(4)
10916 .kr(1)
10917 .sr(1)
10918 .m(4)
10919 .n(n)
10920 .k(k)
10921 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10922 }
10923 }
10924 }
10925
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,n_gt_4_strided_cn)10926 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, n_gt_4_strided_cn) {
10927 TEST_REQUIRES_ARM_NEON_FMA;
10928 for (uint32_t n = 5; n < 8; n++) {
10929 for (size_t k = 1; k <= 10; k += 3) {
10930 GemmMicrokernelTester()
10931 .mr(4)
10932 .nr(4)
10933 .kr(1)
10934 .sr(1)
10935 .m(4)
10936 .n(n)
10937 .k(k)
10938 .cn_stride(7)
10939 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10940 }
10941 }
10942 }
10943
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,n_gt_4_subtile)10944 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, n_gt_4_subtile) {
10945 TEST_REQUIRES_ARM_NEON_FMA;
10946 for (uint32_t n = 5; n < 8; n++) {
10947 for (size_t k = 1; k <= 10; k += 3) {
10948 for (uint32_t m = 1; m <= 4; m++) {
10949 GemmMicrokernelTester()
10950 .mr(4)
10951 .nr(4)
10952 .kr(1)
10953 .sr(1)
10954 .m(m)
10955 .n(n)
10956 .k(k)
10957 .iterations(1)
10958 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10959 }
10960 }
10961 }
10962 }
10963
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,n_div_4)10964 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, n_div_4) {
10965 TEST_REQUIRES_ARM_NEON_FMA;
10966 for (uint32_t n = 8; n <= 12; n += 4) {
10967 for (size_t k = 1; k <= 10; k += 3) {
10968 GemmMicrokernelTester()
10969 .mr(4)
10970 .nr(4)
10971 .kr(1)
10972 .sr(1)
10973 .m(4)
10974 .n(n)
10975 .k(k)
10976 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10977 }
10978 }
10979 }
10980
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,n_div_4_strided_cn)10981 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, n_div_4_strided_cn) {
10982 TEST_REQUIRES_ARM_NEON_FMA;
10983 for (uint32_t n = 8; n <= 12; n += 4) {
10984 for (size_t k = 1; k <= 10; k += 3) {
10985 GemmMicrokernelTester()
10986 .mr(4)
10987 .nr(4)
10988 .kr(1)
10989 .sr(1)
10990 .m(4)
10991 .n(n)
10992 .k(k)
10993 .cn_stride(7)
10994 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10995 }
10996 }
10997 }
10998
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,n_div_4_subtile)10999 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, n_div_4_subtile) {
11000 TEST_REQUIRES_ARM_NEON_FMA;
11001 for (uint32_t n = 8; n <= 12; n += 4) {
11002 for (size_t k = 1; k <= 10; k += 3) {
11003 for (uint32_t m = 1; m <= 4; m++) {
11004 GemmMicrokernelTester()
11005 .mr(4)
11006 .nr(4)
11007 .kr(1)
11008 .sr(1)
11009 .m(m)
11010 .n(n)
11011 .k(k)
11012 .iterations(1)
11013 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11014 }
11015 }
11016 }
11017 }
11018
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,small_kernel)11019 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, small_kernel) {
11020 TEST_REQUIRES_ARM_NEON_FMA;
11021 for (size_t k = 1; k <= 10; k += 3) {
11022 GemmMicrokernelTester()
11023 .mr(4)
11024 .nr(4)
11025 .kr(1)
11026 .sr(1)
11027 .m(4)
11028 .n(4)
11029 .k(k)
11030 .ks(3)
11031 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11032 }
11033 }
11034
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,small_kernel_subtile)11035 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, small_kernel_subtile) {
11036 TEST_REQUIRES_ARM_NEON_FMA;
11037 for (size_t k = 1; k <= 10; k += 3) {
11038 for (uint32_t n = 1; n <= 4; n++) {
11039 for (uint32_t m = 1; m <= 4; m++) {
11040 GemmMicrokernelTester()
11041 .mr(4)
11042 .nr(4)
11043 .kr(1)
11044 .sr(1)
11045 .m(m)
11046 .n(n)
11047 .k(k)
11048 .ks(3)
11049 .iterations(1)
11050 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11051 }
11052 }
11053 }
11054 }
11055
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,n_gt_4_small_kernel)11056 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, n_gt_4_small_kernel) {
11057 TEST_REQUIRES_ARM_NEON_FMA;
11058 for (uint32_t n = 5; n < 8; n++) {
11059 for (size_t k = 1; k <= 10; k += 3) {
11060 GemmMicrokernelTester()
11061 .mr(4)
11062 .nr(4)
11063 .kr(1)
11064 .sr(1)
11065 .m(4)
11066 .n(n)
11067 .k(k)
11068 .ks(3)
11069 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11070 }
11071 }
11072 }
11073
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,n_div_4_small_kernel)11074 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, n_div_4_small_kernel) {
11075 TEST_REQUIRES_ARM_NEON_FMA;
11076 for (uint32_t n = 8; n <= 12; n += 4) {
11077 for (size_t k = 1; k <= 10; k += 3) {
11078 GemmMicrokernelTester()
11079 .mr(4)
11080 .nr(4)
11081 .kr(1)
11082 .sr(1)
11083 .m(4)
11084 .n(n)
11085 .k(k)
11086 .ks(3)
11087 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11088 }
11089 }
11090 }
11091
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,strided_cm_subtile)11092 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, strided_cm_subtile) {
11093 TEST_REQUIRES_ARM_NEON_FMA;
11094 for (size_t k = 1; k <= 10; k += 3) {
11095 for (uint32_t n = 1; n <= 4; n++) {
11096 for (uint32_t m = 1; m <= 4; m++) {
11097 GemmMicrokernelTester()
11098 .mr(4)
11099 .nr(4)
11100 .kr(1)
11101 .sr(1)
11102 .m(m)
11103 .n(n)
11104 .k(k)
11105 .cm_stride(7)
11106 .iterations(1)
11107 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11108 }
11109 }
11110 }
11111 }
11112
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,a_offset)11113 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, a_offset) {
11114 TEST_REQUIRES_ARM_NEON_FMA;
11115 for (size_t k = 1; k <= 10; k += 3) {
11116 GemmMicrokernelTester()
11117 .mr(4)
11118 .nr(4)
11119 .kr(1)
11120 .sr(1)
11121 .m(4)
11122 .n(4)
11123 .k(k)
11124 .ks(3)
11125 .a_offset(43)
11126 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11127 }
11128 }
11129
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,zero)11130 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, zero) {
11131 TEST_REQUIRES_ARM_NEON_FMA;
11132 for (size_t k = 1; k <= 10; k += 3) {
11133 for (uint32_t mz = 0; mz < 4; mz++) {
11134 GemmMicrokernelTester()
11135 .mr(4)
11136 .nr(4)
11137 .kr(1)
11138 .sr(1)
11139 .m(4)
11140 .n(4)
11141 .k(k)
11142 .ks(3)
11143 .a_offset(43)
11144 .zero_index(mz)
11145 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11146 }
11147 }
11148 }
11149
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,qmin)11150 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, qmin) {
11151 TEST_REQUIRES_ARM_NEON_FMA;
11152 GemmMicrokernelTester()
11153 .mr(4)
11154 .nr(4)
11155 .kr(1)
11156 .sr(1)
11157 .m(4)
11158 .n(4)
11159 .k(2)
11160 .qmin(128)
11161 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11162 }
11163
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,qmax)11164 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, qmax) {
11165 TEST_REQUIRES_ARM_NEON_FMA;
11166 GemmMicrokernelTester()
11167 .mr(4)
11168 .nr(4)
11169 .kr(1)
11170 .sr(1)
11171 .m(4)
11172 .n(4)
11173 .k(2)
11174 .qmax(128)
11175 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11176 }
11177
TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64,strided_cm)11178 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, strided_cm) {
11179 TEST_REQUIRES_ARM_NEON_FMA;
11180 GemmMicrokernelTester()
11181 .mr(4)
11182 .nr(4)
11183 .kr(1)
11184 .sr(1)
11185 .m(4)
11186 .n(4)
11187 .k(2)
11188 .cm_stride(7)
11189 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
11190 }
11191 #endif // XNN_ARCH_ARM64
11192
11193
11194 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,k_eq_2)11195 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, k_eq_2) {
11196 TEST_REQUIRES_ARM_NEON;
11197 GemmMicrokernelTester()
11198 .mr(6)
11199 .nr(8)
11200 .kr(1)
11201 .sr(1)
11202 .m(6)
11203 .n(8)
11204 .k(2)
11205 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11206 }
11207
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,strided_cn)11208 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, strided_cn) {
11209 TEST_REQUIRES_ARM_NEON;
11210 GemmMicrokernelTester()
11211 .mr(6)
11212 .nr(8)
11213 .kr(1)
11214 .sr(1)
11215 .m(6)
11216 .n(8)
11217 .k(2)
11218 .cn_stride(11)
11219 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11220 }
11221
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,k_eq_2_subtile)11222 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, k_eq_2_subtile) {
11223 TEST_REQUIRES_ARM_NEON;
11224 for (uint32_t n = 1; n <= 8; n++) {
11225 for (uint32_t m = 1; m <= 6; m++) {
11226 GemmMicrokernelTester()
11227 .mr(6)
11228 .nr(8)
11229 .kr(1)
11230 .sr(1)
11231 .m(m)
11232 .n(n)
11233 .k(2)
11234 .iterations(1)
11235 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11236 }
11237 }
11238 }
11239
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,k_eq_2_subtile_m)11240 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, k_eq_2_subtile_m) {
11241 TEST_REQUIRES_ARM_NEON;
11242 for (uint32_t m = 1; m <= 6; m++) {
11243 GemmMicrokernelTester()
11244 .mr(6)
11245 .nr(8)
11246 .kr(1)
11247 .sr(1)
11248 .m(m)
11249 .n(8)
11250 .k(2)
11251 .iterations(1)
11252 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11253 }
11254 }
11255
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,k_eq_2_subtile_n)11256 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, k_eq_2_subtile_n) {
11257 TEST_REQUIRES_ARM_NEON;
11258 for (uint32_t n = 1; n <= 8; n++) {
11259 GemmMicrokernelTester()
11260 .mr(6)
11261 .nr(8)
11262 .kr(1)
11263 .sr(1)
11264 .m(6)
11265 .n(n)
11266 .k(2)
11267 .iterations(1)
11268 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11269 }
11270 }
11271
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,k_lt_2)11272 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, k_lt_2) {
11273 TEST_REQUIRES_ARM_NEON;
11274 for (size_t k = 1; k < 2; k++) {
11275 GemmMicrokernelTester()
11276 .mr(6)
11277 .nr(8)
11278 .kr(1)
11279 .sr(1)
11280 .m(6)
11281 .n(8)
11282 .k(k)
11283 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11284 }
11285 }
11286
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,k_lt_2_subtile)11287 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, k_lt_2_subtile) {
11288 TEST_REQUIRES_ARM_NEON;
11289 for (size_t k = 1; k < 2; k++) {
11290 for (uint32_t n = 1; n <= 8; n++) {
11291 for (uint32_t m = 1; m <= 6; m++) {
11292 GemmMicrokernelTester()
11293 .mr(6)
11294 .nr(8)
11295 .kr(1)
11296 .sr(1)
11297 .m(m)
11298 .n(n)
11299 .k(k)
11300 .iterations(1)
11301 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11302 }
11303 }
11304 }
11305 }
11306
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,k_gt_2)11307 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, k_gt_2) {
11308 TEST_REQUIRES_ARM_NEON;
11309 for (size_t k = 3; k < 4; k++) {
11310 GemmMicrokernelTester()
11311 .mr(6)
11312 .nr(8)
11313 .kr(1)
11314 .sr(1)
11315 .m(6)
11316 .n(8)
11317 .k(k)
11318 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11319 }
11320 }
11321
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,k_gt_2_subtile)11322 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, k_gt_2_subtile) {
11323 TEST_REQUIRES_ARM_NEON;
11324 for (size_t k = 3; k < 4; k++) {
11325 for (uint32_t n = 1; n <= 8; n++) {
11326 for (uint32_t m = 1; m <= 6; m++) {
11327 GemmMicrokernelTester()
11328 .mr(6)
11329 .nr(8)
11330 .kr(1)
11331 .sr(1)
11332 .m(m)
11333 .n(n)
11334 .k(k)
11335 .iterations(1)
11336 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11337 }
11338 }
11339 }
11340 }
11341
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,k_div_2)11342 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, k_div_2) {
11343 TEST_REQUIRES_ARM_NEON;
11344 for (size_t k = 4; k <= 20; k += 2) {
11345 GemmMicrokernelTester()
11346 .mr(6)
11347 .nr(8)
11348 .kr(1)
11349 .sr(1)
11350 .m(6)
11351 .n(8)
11352 .k(k)
11353 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11354 }
11355 }
11356
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,k_div_2_subtile)11357 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, k_div_2_subtile) {
11358 TEST_REQUIRES_ARM_NEON;
11359 for (size_t k = 4; k <= 20; k += 2) {
11360 for (uint32_t n = 1; n <= 8; n++) {
11361 for (uint32_t m = 1; m <= 6; m++) {
11362 GemmMicrokernelTester()
11363 .mr(6)
11364 .nr(8)
11365 .kr(1)
11366 .sr(1)
11367 .m(m)
11368 .n(n)
11369 .k(k)
11370 .iterations(1)
11371 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11372 }
11373 }
11374 }
11375 }
11376
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,n_gt_8)11377 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, n_gt_8) {
11378 TEST_REQUIRES_ARM_NEON;
11379 for (uint32_t n = 9; n < 16; n++) {
11380 for (size_t k = 1; k <= 10; k += 3) {
11381 GemmMicrokernelTester()
11382 .mr(6)
11383 .nr(8)
11384 .kr(1)
11385 .sr(1)
11386 .m(6)
11387 .n(n)
11388 .k(k)
11389 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11390 }
11391 }
11392 }
11393
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,n_gt_8_strided_cn)11394 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, n_gt_8_strided_cn) {
11395 TEST_REQUIRES_ARM_NEON;
11396 for (uint32_t n = 9; n < 16; n++) {
11397 for (size_t k = 1; k <= 10; k += 3) {
11398 GemmMicrokernelTester()
11399 .mr(6)
11400 .nr(8)
11401 .kr(1)
11402 .sr(1)
11403 .m(6)
11404 .n(n)
11405 .k(k)
11406 .cn_stride(11)
11407 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11408 }
11409 }
11410 }
11411
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,n_gt_8_subtile)11412 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, n_gt_8_subtile) {
11413 TEST_REQUIRES_ARM_NEON;
11414 for (uint32_t n = 9; n < 16; n++) {
11415 for (size_t k = 1; k <= 10; k += 3) {
11416 for (uint32_t m = 1; m <= 6; m++) {
11417 GemmMicrokernelTester()
11418 .mr(6)
11419 .nr(8)
11420 .kr(1)
11421 .sr(1)
11422 .m(m)
11423 .n(n)
11424 .k(k)
11425 .iterations(1)
11426 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11427 }
11428 }
11429 }
11430 }
11431
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,n_div_8)11432 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, n_div_8) {
11433 TEST_REQUIRES_ARM_NEON;
11434 for (uint32_t n = 16; n <= 24; n += 8) {
11435 for (size_t k = 1; k <= 10; k += 3) {
11436 GemmMicrokernelTester()
11437 .mr(6)
11438 .nr(8)
11439 .kr(1)
11440 .sr(1)
11441 .m(6)
11442 .n(n)
11443 .k(k)
11444 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11445 }
11446 }
11447 }
11448
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,n_div_8_strided_cn)11449 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, n_div_8_strided_cn) {
11450 TEST_REQUIRES_ARM_NEON;
11451 for (uint32_t n = 16; n <= 24; n += 8) {
11452 for (size_t k = 1; k <= 10; k += 3) {
11453 GemmMicrokernelTester()
11454 .mr(6)
11455 .nr(8)
11456 .kr(1)
11457 .sr(1)
11458 .m(6)
11459 .n(n)
11460 .k(k)
11461 .cn_stride(11)
11462 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11463 }
11464 }
11465 }
11466
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,n_div_8_subtile)11467 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, n_div_8_subtile) {
11468 TEST_REQUIRES_ARM_NEON;
11469 for (uint32_t n = 16; n <= 24; n += 8) {
11470 for (size_t k = 1; k <= 10; k += 3) {
11471 for (uint32_t m = 1; m <= 6; m++) {
11472 GemmMicrokernelTester()
11473 .mr(6)
11474 .nr(8)
11475 .kr(1)
11476 .sr(1)
11477 .m(m)
11478 .n(n)
11479 .k(k)
11480 .iterations(1)
11481 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11482 }
11483 }
11484 }
11485 }
11486
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,small_kernel)11487 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, small_kernel) {
11488 TEST_REQUIRES_ARM_NEON;
11489 for (size_t k = 1; k <= 10; k += 3) {
11490 GemmMicrokernelTester()
11491 .mr(6)
11492 .nr(8)
11493 .kr(1)
11494 .sr(1)
11495 .m(6)
11496 .n(8)
11497 .k(k)
11498 .ks(3)
11499 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11500 }
11501 }
11502
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,small_kernel_subtile)11503 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, small_kernel_subtile) {
11504 TEST_REQUIRES_ARM_NEON;
11505 for (size_t k = 1; k <= 10; k += 3) {
11506 for (uint32_t n = 1; n <= 8; n++) {
11507 for (uint32_t m = 1; m <= 6; m++) {
11508 GemmMicrokernelTester()
11509 .mr(6)
11510 .nr(8)
11511 .kr(1)
11512 .sr(1)
11513 .m(m)
11514 .n(n)
11515 .k(k)
11516 .ks(3)
11517 .iterations(1)
11518 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11519 }
11520 }
11521 }
11522 }
11523
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,n_gt_8_small_kernel)11524 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, n_gt_8_small_kernel) {
11525 TEST_REQUIRES_ARM_NEON;
11526 for (uint32_t n = 9; n < 16; n++) {
11527 for (size_t k = 1; k <= 10; k += 3) {
11528 GemmMicrokernelTester()
11529 .mr(6)
11530 .nr(8)
11531 .kr(1)
11532 .sr(1)
11533 .m(6)
11534 .n(n)
11535 .k(k)
11536 .ks(3)
11537 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11538 }
11539 }
11540 }
11541
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,n_div_8_small_kernel)11542 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, n_div_8_small_kernel) {
11543 TEST_REQUIRES_ARM_NEON;
11544 for (uint32_t n = 16; n <= 24; n += 8) {
11545 for (size_t k = 1; k <= 10; k += 3) {
11546 GemmMicrokernelTester()
11547 .mr(6)
11548 .nr(8)
11549 .kr(1)
11550 .sr(1)
11551 .m(6)
11552 .n(n)
11553 .k(k)
11554 .ks(3)
11555 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11556 }
11557 }
11558 }
11559
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,strided_cm_subtile)11560 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, strided_cm_subtile) {
11561 TEST_REQUIRES_ARM_NEON;
11562 for (size_t k = 1; k <= 10; k += 3) {
11563 for (uint32_t n = 1; n <= 8; n++) {
11564 for (uint32_t m = 1; m <= 6; m++) {
11565 GemmMicrokernelTester()
11566 .mr(6)
11567 .nr(8)
11568 .kr(1)
11569 .sr(1)
11570 .m(m)
11571 .n(n)
11572 .k(k)
11573 .cm_stride(11)
11574 .iterations(1)
11575 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11576 }
11577 }
11578 }
11579 }
11580
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,a_offset)11581 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, a_offset) {
11582 TEST_REQUIRES_ARM_NEON;
11583 for (size_t k = 1; k <= 10; k += 3) {
11584 GemmMicrokernelTester()
11585 .mr(6)
11586 .nr(8)
11587 .kr(1)
11588 .sr(1)
11589 .m(6)
11590 .n(8)
11591 .k(k)
11592 .ks(3)
11593 .a_offset(67)
11594 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11595 }
11596 }
11597
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,zero)11598 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, zero) {
11599 TEST_REQUIRES_ARM_NEON;
11600 for (size_t k = 1; k <= 10; k += 3) {
11601 for (uint32_t mz = 0; mz < 6; mz++) {
11602 GemmMicrokernelTester()
11603 .mr(6)
11604 .nr(8)
11605 .kr(1)
11606 .sr(1)
11607 .m(6)
11608 .n(8)
11609 .k(k)
11610 .ks(3)
11611 .a_offset(67)
11612 .zero_index(mz)
11613 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11614 }
11615 }
11616 }
11617
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,qmin)11618 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, qmin) {
11619 TEST_REQUIRES_ARM_NEON;
11620 GemmMicrokernelTester()
11621 .mr(6)
11622 .nr(8)
11623 .kr(1)
11624 .sr(1)
11625 .m(6)
11626 .n(8)
11627 .k(2)
11628 .qmin(128)
11629 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11630 }
11631
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,qmax)11632 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, qmax) {
11633 TEST_REQUIRES_ARM_NEON;
11634 GemmMicrokernelTester()
11635 .mr(6)
11636 .nr(8)
11637 .kr(1)
11638 .sr(1)
11639 .m(6)
11640 .n(8)
11641 .k(2)
11642 .qmax(128)
11643 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11644 }
11645
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64,strided_cm)11646 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, strided_cm) {
11647 TEST_REQUIRES_ARM_NEON;
11648 GemmMicrokernelTester()
11649 .mr(6)
11650 .nr(8)
11651 .kr(1)
11652 .sr(1)
11653 .m(6)
11654 .n(8)
11655 .k(2)
11656 .cm_stride(11)
11657 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11658 }
11659 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11660
11661
11662 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,k_eq_4)11663 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, k_eq_4) {
11664 TEST_REQUIRES_ARM_NEON;
11665 GemmMicrokernelTester()
11666 .mr(6)
11667 .nr(8)
11668 .kr(1)
11669 .sr(1)
11670 .m(6)
11671 .n(8)
11672 .k(4)
11673 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11674 }
11675
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,strided_cn)11676 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, strided_cn) {
11677 TEST_REQUIRES_ARM_NEON;
11678 GemmMicrokernelTester()
11679 .mr(6)
11680 .nr(8)
11681 .kr(1)
11682 .sr(1)
11683 .m(6)
11684 .n(8)
11685 .k(4)
11686 .cn_stride(11)
11687 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11688 }
11689
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,k_eq_4_subtile)11690 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, k_eq_4_subtile) {
11691 TEST_REQUIRES_ARM_NEON;
11692 for (uint32_t n = 1; n <= 8; n++) {
11693 for (uint32_t m = 1; m <= 6; m++) {
11694 GemmMicrokernelTester()
11695 .mr(6)
11696 .nr(8)
11697 .kr(1)
11698 .sr(1)
11699 .m(m)
11700 .n(n)
11701 .k(4)
11702 .iterations(1)
11703 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11704 }
11705 }
11706 }
11707
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,k_eq_4_subtile_m)11708 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, k_eq_4_subtile_m) {
11709 TEST_REQUIRES_ARM_NEON;
11710 for (uint32_t m = 1; m <= 6; m++) {
11711 GemmMicrokernelTester()
11712 .mr(6)
11713 .nr(8)
11714 .kr(1)
11715 .sr(1)
11716 .m(m)
11717 .n(8)
11718 .k(4)
11719 .iterations(1)
11720 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11721 }
11722 }
11723
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,k_eq_4_subtile_n)11724 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, k_eq_4_subtile_n) {
11725 TEST_REQUIRES_ARM_NEON;
11726 for (uint32_t n = 1; n <= 8; n++) {
11727 GemmMicrokernelTester()
11728 .mr(6)
11729 .nr(8)
11730 .kr(1)
11731 .sr(1)
11732 .m(6)
11733 .n(n)
11734 .k(4)
11735 .iterations(1)
11736 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11737 }
11738 }
11739
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,k_lt_4)11740 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, k_lt_4) {
11741 TEST_REQUIRES_ARM_NEON;
11742 for (size_t k = 1; k < 4; k++) {
11743 GemmMicrokernelTester()
11744 .mr(6)
11745 .nr(8)
11746 .kr(1)
11747 .sr(1)
11748 .m(6)
11749 .n(8)
11750 .k(k)
11751 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11752 }
11753 }
11754
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,k_lt_4_subtile)11755 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, k_lt_4_subtile) {
11756 TEST_REQUIRES_ARM_NEON;
11757 for (size_t k = 1; k < 4; k++) {
11758 for (uint32_t n = 1; n <= 8; n++) {
11759 for (uint32_t m = 1; m <= 6; m++) {
11760 GemmMicrokernelTester()
11761 .mr(6)
11762 .nr(8)
11763 .kr(1)
11764 .sr(1)
11765 .m(m)
11766 .n(n)
11767 .k(k)
11768 .iterations(1)
11769 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11770 }
11771 }
11772 }
11773 }
11774
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,k_gt_4)11775 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, k_gt_4) {
11776 TEST_REQUIRES_ARM_NEON;
11777 for (size_t k = 5; k < 8; k++) {
11778 GemmMicrokernelTester()
11779 .mr(6)
11780 .nr(8)
11781 .kr(1)
11782 .sr(1)
11783 .m(6)
11784 .n(8)
11785 .k(k)
11786 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11787 }
11788 }
11789
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,k_gt_4_subtile)11790 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, k_gt_4_subtile) {
11791 TEST_REQUIRES_ARM_NEON;
11792 for (size_t k = 5; k < 8; k++) {
11793 for (uint32_t n = 1; n <= 8; n++) {
11794 for (uint32_t m = 1; m <= 6; m++) {
11795 GemmMicrokernelTester()
11796 .mr(6)
11797 .nr(8)
11798 .kr(1)
11799 .sr(1)
11800 .m(m)
11801 .n(n)
11802 .k(k)
11803 .iterations(1)
11804 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11805 }
11806 }
11807 }
11808 }
11809
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,k_div_4)11810 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, k_div_4) {
11811 TEST_REQUIRES_ARM_NEON;
11812 for (size_t k = 8; k <= 40; k += 4) {
11813 GemmMicrokernelTester()
11814 .mr(6)
11815 .nr(8)
11816 .kr(1)
11817 .sr(1)
11818 .m(6)
11819 .n(8)
11820 .k(k)
11821 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11822 }
11823 }
11824
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,k_div_4_subtile)11825 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, k_div_4_subtile) {
11826 TEST_REQUIRES_ARM_NEON;
11827 for (size_t k = 8; k <= 40; k += 4) {
11828 for (uint32_t n = 1; n <= 8; n++) {
11829 for (uint32_t m = 1; m <= 6; m++) {
11830 GemmMicrokernelTester()
11831 .mr(6)
11832 .nr(8)
11833 .kr(1)
11834 .sr(1)
11835 .m(m)
11836 .n(n)
11837 .k(k)
11838 .iterations(1)
11839 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11840 }
11841 }
11842 }
11843 }
11844
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,n_gt_8)11845 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, n_gt_8) {
11846 TEST_REQUIRES_ARM_NEON;
11847 for (uint32_t n = 9; n < 16; n++) {
11848 for (size_t k = 1; k <= 20; k += 5) {
11849 GemmMicrokernelTester()
11850 .mr(6)
11851 .nr(8)
11852 .kr(1)
11853 .sr(1)
11854 .m(6)
11855 .n(n)
11856 .k(k)
11857 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11858 }
11859 }
11860 }
11861
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,n_gt_8_strided_cn)11862 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, n_gt_8_strided_cn) {
11863 TEST_REQUIRES_ARM_NEON;
11864 for (uint32_t n = 9; n < 16; n++) {
11865 for (size_t k = 1; k <= 20; k += 5) {
11866 GemmMicrokernelTester()
11867 .mr(6)
11868 .nr(8)
11869 .kr(1)
11870 .sr(1)
11871 .m(6)
11872 .n(n)
11873 .k(k)
11874 .cn_stride(11)
11875 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11876 }
11877 }
11878 }
11879
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,n_gt_8_subtile)11880 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, n_gt_8_subtile) {
11881 TEST_REQUIRES_ARM_NEON;
11882 for (uint32_t n = 9; n < 16; n++) {
11883 for (size_t k = 1; k <= 20; k += 5) {
11884 for (uint32_t m = 1; m <= 6; m++) {
11885 GemmMicrokernelTester()
11886 .mr(6)
11887 .nr(8)
11888 .kr(1)
11889 .sr(1)
11890 .m(m)
11891 .n(n)
11892 .k(k)
11893 .iterations(1)
11894 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11895 }
11896 }
11897 }
11898 }
11899
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,n_div_8)11900 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, n_div_8) {
11901 TEST_REQUIRES_ARM_NEON;
11902 for (uint32_t n = 16; n <= 24; n += 8) {
11903 for (size_t k = 1; k <= 20; k += 5) {
11904 GemmMicrokernelTester()
11905 .mr(6)
11906 .nr(8)
11907 .kr(1)
11908 .sr(1)
11909 .m(6)
11910 .n(n)
11911 .k(k)
11912 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11913 }
11914 }
11915 }
11916
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,n_div_8_strided_cn)11917 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, n_div_8_strided_cn) {
11918 TEST_REQUIRES_ARM_NEON;
11919 for (uint32_t n = 16; n <= 24; n += 8) {
11920 for (size_t k = 1; k <= 20; k += 5) {
11921 GemmMicrokernelTester()
11922 .mr(6)
11923 .nr(8)
11924 .kr(1)
11925 .sr(1)
11926 .m(6)
11927 .n(n)
11928 .k(k)
11929 .cn_stride(11)
11930 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11931 }
11932 }
11933 }
11934
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,n_div_8_subtile)11935 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, n_div_8_subtile) {
11936 TEST_REQUIRES_ARM_NEON;
11937 for (uint32_t n = 16; n <= 24; n += 8) {
11938 for (size_t k = 1; k <= 20; k += 5) {
11939 for (uint32_t m = 1; m <= 6; m++) {
11940 GemmMicrokernelTester()
11941 .mr(6)
11942 .nr(8)
11943 .kr(1)
11944 .sr(1)
11945 .m(m)
11946 .n(n)
11947 .k(k)
11948 .iterations(1)
11949 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11950 }
11951 }
11952 }
11953 }
11954
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,small_kernel)11955 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, small_kernel) {
11956 TEST_REQUIRES_ARM_NEON;
11957 for (size_t k = 1; k <= 20; k += 5) {
11958 GemmMicrokernelTester()
11959 .mr(6)
11960 .nr(8)
11961 .kr(1)
11962 .sr(1)
11963 .m(6)
11964 .n(8)
11965 .k(k)
11966 .ks(3)
11967 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11968 }
11969 }
11970
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,small_kernel_subtile)11971 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, small_kernel_subtile) {
11972 TEST_REQUIRES_ARM_NEON;
11973 for (size_t k = 1; k <= 20; k += 5) {
11974 for (uint32_t n = 1; n <= 8; n++) {
11975 for (uint32_t m = 1; m <= 6; m++) {
11976 GemmMicrokernelTester()
11977 .mr(6)
11978 .nr(8)
11979 .kr(1)
11980 .sr(1)
11981 .m(m)
11982 .n(n)
11983 .k(k)
11984 .ks(3)
11985 .iterations(1)
11986 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11987 }
11988 }
11989 }
11990 }
11991
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,n_gt_8_small_kernel)11992 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, n_gt_8_small_kernel) {
11993 TEST_REQUIRES_ARM_NEON;
11994 for (uint32_t n = 9; n < 16; n++) {
11995 for (size_t k = 1; k <= 20; k += 5) {
11996 GemmMicrokernelTester()
11997 .mr(6)
11998 .nr(8)
11999 .kr(1)
12000 .sr(1)
12001 .m(6)
12002 .n(n)
12003 .k(k)
12004 .ks(3)
12005 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
12006 }
12007 }
12008 }
12009
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,n_div_8_small_kernel)12010 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, n_div_8_small_kernel) {
12011 TEST_REQUIRES_ARM_NEON;
12012 for (uint32_t n = 16; n <= 24; n += 8) {
12013 for (size_t k = 1; k <= 20; k += 5) {
12014 GemmMicrokernelTester()
12015 .mr(6)
12016 .nr(8)
12017 .kr(1)
12018 .sr(1)
12019 .m(6)
12020 .n(n)
12021 .k(k)
12022 .ks(3)
12023 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
12024 }
12025 }
12026 }
12027
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,strided_cm_subtile)12028 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, strided_cm_subtile) {
12029 TEST_REQUIRES_ARM_NEON;
12030 for (size_t k = 1; k <= 20; k += 5) {
12031 for (uint32_t n = 1; n <= 8; n++) {
12032 for (uint32_t m = 1; m <= 6; m++) {
12033 GemmMicrokernelTester()
12034 .mr(6)
12035 .nr(8)
12036 .kr(1)
12037 .sr(1)
12038 .m(m)
12039 .n(n)
12040 .k(k)
12041 .cm_stride(11)
12042 .iterations(1)
12043 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
12044 }
12045 }
12046 }
12047 }
12048
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,a_offset)12049 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, a_offset) {
12050 TEST_REQUIRES_ARM_NEON;
12051 for (size_t k = 1; k <= 20; k += 5) {
12052 GemmMicrokernelTester()
12053 .mr(6)
12054 .nr(8)
12055 .kr(1)
12056 .sr(1)
12057 .m(6)
12058 .n(8)
12059 .k(k)
12060 .ks(3)
12061 .a_offset(127)
12062 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
12063 }
12064 }
12065
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,zero)12066 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, zero) {
12067 TEST_REQUIRES_ARM_NEON;
12068 for (size_t k = 1; k <= 20; k += 5) {
12069 for (uint32_t mz = 0; mz < 6; mz++) {
12070 GemmMicrokernelTester()
12071 .mr(6)
12072 .nr(8)
12073 .kr(1)
12074 .sr(1)
12075 .m(6)
12076 .n(8)
12077 .k(k)
12078 .ks(3)
12079 .a_offset(127)
12080 .zero_index(mz)
12081 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
12082 }
12083 }
12084 }
12085
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,qmin)12086 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, qmin) {
12087 TEST_REQUIRES_ARM_NEON;
12088 GemmMicrokernelTester()
12089 .mr(6)
12090 .nr(8)
12091 .kr(1)
12092 .sr(1)
12093 .m(6)
12094 .n(8)
12095 .k(4)
12096 .qmin(128)
12097 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
12098 }
12099
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,qmax)12100 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, qmax) {
12101 TEST_REQUIRES_ARM_NEON;
12102 GemmMicrokernelTester()
12103 .mr(6)
12104 .nr(8)
12105 .kr(1)
12106 .sr(1)
12107 .m(6)
12108 .n(8)
12109 .k(4)
12110 .qmax(128)
12111 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
12112 }
12113
TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128,strided_cm)12114 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, strided_cm) {
12115 TEST_REQUIRES_ARM_NEON;
12116 GemmMicrokernelTester()
12117 .mr(6)
12118 .nr(8)
12119 .kr(1)
12120 .sr(1)
12121 .m(6)
12122 .n(8)
12123 .k(4)
12124 .cm_stride(11)
12125 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
12126 }
12127 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12128
12129
12130 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,k_eq_2)12131 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, k_eq_2) {
12132 TEST_REQUIRES_ARM_NEON;
12133 GemmMicrokernelTester()
12134 .mr(6)
12135 .nr(8)
12136 .kr(1)
12137 .sr(1)
12138 .m(6)
12139 .n(8)
12140 .k(2)
12141 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12142 }
12143
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,strided_cn)12144 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, strided_cn) {
12145 TEST_REQUIRES_ARM_NEON;
12146 GemmMicrokernelTester()
12147 .mr(6)
12148 .nr(8)
12149 .kr(1)
12150 .sr(1)
12151 .m(6)
12152 .n(8)
12153 .k(2)
12154 .cn_stride(11)
12155 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12156 }
12157
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,k_eq_2_subtile)12158 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, k_eq_2_subtile) {
12159 TEST_REQUIRES_ARM_NEON;
12160 for (uint32_t n = 1; n <= 8; n++) {
12161 for (uint32_t m = 1; m <= 6; m++) {
12162 GemmMicrokernelTester()
12163 .mr(6)
12164 .nr(8)
12165 .kr(1)
12166 .sr(1)
12167 .m(m)
12168 .n(n)
12169 .k(2)
12170 .iterations(1)
12171 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12172 }
12173 }
12174 }
12175
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,k_eq_2_subtile_m)12176 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, k_eq_2_subtile_m) {
12177 TEST_REQUIRES_ARM_NEON;
12178 for (uint32_t m = 1; m <= 6; m++) {
12179 GemmMicrokernelTester()
12180 .mr(6)
12181 .nr(8)
12182 .kr(1)
12183 .sr(1)
12184 .m(m)
12185 .n(8)
12186 .k(2)
12187 .iterations(1)
12188 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12189 }
12190 }
12191
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,k_eq_2_subtile_n)12192 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, k_eq_2_subtile_n) {
12193 TEST_REQUIRES_ARM_NEON;
12194 for (uint32_t n = 1; n <= 8; n++) {
12195 GemmMicrokernelTester()
12196 .mr(6)
12197 .nr(8)
12198 .kr(1)
12199 .sr(1)
12200 .m(6)
12201 .n(n)
12202 .k(2)
12203 .iterations(1)
12204 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12205 }
12206 }
12207
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,k_lt_2)12208 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, k_lt_2) {
12209 TEST_REQUIRES_ARM_NEON;
12210 for (size_t k = 1; k < 2; k++) {
12211 GemmMicrokernelTester()
12212 .mr(6)
12213 .nr(8)
12214 .kr(1)
12215 .sr(1)
12216 .m(6)
12217 .n(8)
12218 .k(k)
12219 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12220 }
12221 }
12222
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,k_lt_2_subtile)12223 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, k_lt_2_subtile) {
12224 TEST_REQUIRES_ARM_NEON;
12225 for (size_t k = 1; k < 2; k++) {
12226 for (uint32_t n = 1; n <= 8; n++) {
12227 for (uint32_t m = 1; m <= 6; m++) {
12228 GemmMicrokernelTester()
12229 .mr(6)
12230 .nr(8)
12231 .kr(1)
12232 .sr(1)
12233 .m(m)
12234 .n(n)
12235 .k(k)
12236 .iterations(1)
12237 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12238 }
12239 }
12240 }
12241 }
12242
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,k_gt_2)12243 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, k_gt_2) {
12244 TEST_REQUIRES_ARM_NEON;
12245 for (size_t k = 3; k < 4; k++) {
12246 GemmMicrokernelTester()
12247 .mr(6)
12248 .nr(8)
12249 .kr(1)
12250 .sr(1)
12251 .m(6)
12252 .n(8)
12253 .k(k)
12254 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12255 }
12256 }
12257
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,k_gt_2_subtile)12258 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, k_gt_2_subtile) {
12259 TEST_REQUIRES_ARM_NEON;
12260 for (size_t k = 3; k < 4; k++) {
12261 for (uint32_t n = 1; n <= 8; n++) {
12262 for (uint32_t m = 1; m <= 6; m++) {
12263 GemmMicrokernelTester()
12264 .mr(6)
12265 .nr(8)
12266 .kr(1)
12267 .sr(1)
12268 .m(m)
12269 .n(n)
12270 .k(k)
12271 .iterations(1)
12272 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12273 }
12274 }
12275 }
12276 }
12277
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,k_div_2)12278 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, k_div_2) {
12279 TEST_REQUIRES_ARM_NEON;
12280 for (size_t k = 4; k <= 20; k += 2) {
12281 GemmMicrokernelTester()
12282 .mr(6)
12283 .nr(8)
12284 .kr(1)
12285 .sr(1)
12286 .m(6)
12287 .n(8)
12288 .k(k)
12289 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12290 }
12291 }
12292
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,k_div_2_subtile)12293 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, k_div_2_subtile) {
12294 TEST_REQUIRES_ARM_NEON;
12295 for (size_t k = 4; k <= 20; k += 2) {
12296 for (uint32_t n = 1; n <= 8; n++) {
12297 for (uint32_t m = 1; m <= 6; m++) {
12298 GemmMicrokernelTester()
12299 .mr(6)
12300 .nr(8)
12301 .kr(1)
12302 .sr(1)
12303 .m(m)
12304 .n(n)
12305 .k(k)
12306 .iterations(1)
12307 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12308 }
12309 }
12310 }
12311 }
12312
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,n_gt_8)12313 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, n_gt_8) {
12314 TEST_REQUIRES_ARM_NEON;
12315 for (uint32_t n = 9; n < 16; n++) {
12316 for (size_t k = 1; k <= 10; k += 3) {
12317 GemmMicrokernelTester()
12318 .mr(6)
12319 .nr(8)
12320 .kr(1)
12321 .sr(1)
12322 .m(6)
12323 .n(n)
12324 .k(k)
12325 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12326 }
12327 }
12328 }
12329
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,n_gt_8_strided_cn)12330 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, n_gt_8_strided_cn) {
12331 TEST_REQUIRES_ARM_NEON;
12332 for (uint32_t n = 9; n < 16; n++) {
12333 for (size_t k = 1; k <= 10; k += 3) {
12334 GemmMicrokernelTester()
12335 .mr(6)
12336 .nr(8)
12337 .kr(1)
12338 .sr(1)
12339 .m(6)
12340 .n(n)
12341 .k(k)
12342 .cn_stride(11)
12343 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12344 }
12345 }
12346 }
12347
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,n_gt_8_subtile)12348 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, n_gt_8_subtile) {
12349 TEST_REQUIRES_ARM_NEON;
12350 for (uint32_t n = 9; n < 16; n++) {
12351 for (size_t k = 1; k <= 10; k += 3) {
12352 for (uint32_t m = 1; m <= 6; m++) {
12353 GemmMicrokernelTester()
12354 .mr(6)
12355 .nr(8)
12356 .kr(1)
12357 .sr(1)
12358 .m(m)
12359 .n(n)
12360 .k(k)
12361 .iterations(1)
12362 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12363 }
12364 }
12365 }
12366 }
12367
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,n_div_8)12368 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, n_div_8) {
12369 TEST_REQUIRES_ARM_NEON;
12370 for (uint32_t n = 16; n <= 24; n += 8) {
12371 for (size_t k = 1; k <= 10; k += 3) {
12372 GemmMicrokernelTester()
12373 .mr(6)
12374 .nr(8)
12375 .kr(1)
12376 .sr(1)
12377 .m(6)
12378 .n(n)
12379 .k(k)
12380 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12381 }
12382 }
12383 }
12384
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,n_div_8_strided_cn)12385 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, n_div_8_strided_cn) {
12386 TEST_REQUIRES_ARM_NEON;
12387 for (uint32_t n = 16; n <= 24; n += 8) {
12388 for (size_t k = 1; k <= 10; k += 3) {
12389 GemmMicrokernelTester()
12390 .mr(6)
12391 .nr(8)
12392 .kr(1)
12393 .sr(1)
12394 .m(6)
12395 .n(n)
12396 .k(k)
12397 .cn_stride(11)
12398 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12399 }
12400 }
12401 }
12402
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,n_div_8_subtile)12403 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, n_div_8_subtile) {
12404 TEST_REQUIRES_ARM_NEON;
12405 for (uint32_t n = 16; n <= 24; n += 8) {
12406 for (size_t k = 1; k <= 10; k += 3) {
12407 for (uint32_t m = 1; m <= 6; m++) {
12408 GemmMicrokernelTester()
12409 .mr(6)
12410 .nr(8)
12411 .kr(1)
12412 .sr(1)
12413 .m(m)
12414 .n(n)
12415 .k(k)
12416 .iterations(1)
12417 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12418 }
12419 }
12420 }
12421 }
12422
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,small_kernel)12423 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, small_kernel) {
12424 TEST_REQUIRES_ARM_NEON;
12425 for (size_t k = 1; k <= 10; k += 3) {
12426 GemmMicrokernelTester()
12427 .mr(6)
12428 .nr(8)
12429 .kr(1)
12430 .sr(1)
12431 .m(6)
12432 .n(8)
12433 .k(k)
12434 .ks(3)
12435 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12436 }
12437 }
12438
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,small_kernel_subtile)12439 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, small_kernel_subtile) {
12440 TEST_REQUIRES_ARM_NEON;
12441 for (size_t k = 1; k <= 10; k += 3) {
12442 for (uint32_t n = 1; n <= 8; n++) {
12443 for (uint32_t m = 1; m <= 6; m++) {
12444 GemmMicrokernelTester()
12445 .mr(6)
12446 .nr(8)
12447 .kr(1)
12448 .sr(1)
12449 .m(m)
12450 .n(n)
12451 .k(k)
12452 .ks(3)
12453 .iterations(1)
12454 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12455 }
12456 }
12457 }
12458 }
12459
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,n_gt_8_small_kernel)12460 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, n_gt_8_small_kernel) {
12461 TEST_REQUIRES_ARM_NEON;
12462 for (uint32_t n = 9; n < 16; n++) {
12463 for (size_t k = 1; k <= 10; k += 3) {
12464 GemmMicrokernelTester()
12465 .mr(6)
12466 .nr(8)
12467 .kr(1)
12468 .sr(1)
12469 .m(6)
12470 .n(n)
12471 .k(k)
12472 .ks(3)
12473 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12474 }
12475 }
12476 }
12477
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,n_div_8_small_kernel)12478 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, n_div_8_small_kernel) {
12479 TEST_REQUIRES_ARM_NEON;
12480 for (uint32_t n = 16; n <= 24; n += 8) {
12481 for (size_t k = 1; k <= 10; k += 3) {
12482 GemmMicrokernelTester()
12483 .mr(6)
12484 .nr(8)
12485 .kr(1)
12486 .sr(1)
12487 .m(6)
12488 .n(n)
12489 .k(k)
12490 .ks(3)
12491 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12492 }
12493 }
12494 }
12495
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,strided_cm_subtile)12496 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, strided_cm_subtile) {
12497 TEST_REQUIRES_ARM_NEON;
12498 for (size_t k = 1; k <= 10; k += 3) {
12499 for (uint32_t n = 1; n <= 8; n++) {
12500 for (uint32_t m = 1; m <= 6; m++) {
12501 GemmMicrokernelTester()
12502 .mr(6)
12503 .nr(8)
12504 .kr(1)
12505 .sr(1)
12506 .m(m)
12507 .n(n)
12508 .k(k)
12509 .cm_stride(11)
12510 .iterations(1)
12511 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12512 }
12513 }
12514 }
12515 }
12516
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,a_offset)12517 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, a_offset) {
12518 TEST_REQUIRES_ARM_NEON;
12519 for (size_t k = 1; k <= 10; k += 3) {
12520 GemmMicrokernelTester()
12521 .mr(6)
12522 .nr(8)
12523 .kr(1)
12524 .sr(1)
12525 .m(6)
12526 .n(8)
12527 .k(k)
12528 .ks(3)
12529 .a_offset(67)
12530 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12531 }
12532 }
12533
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,zero)12534 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, zero) {
12535 TEST_REQUIRES_ARM_NEON;
12536 for (size_t k = 1; k <= 10; k += 3) {
12537 for (uint32_t mz = 0; mz < 6; mz++) {
12538 GemmMicrokernelTester()
12539 .mr(6)
12540 .nr(8)
12541 .kr(1)
12542 .sr(1)
12543 .m(6)
12544 .n(8)
12545 .k(k)
12546 .ks(3)
12547 .a_offset(67)
12548 .zero_index(mz)
12549 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12550 }
12551 }
12552 }
12553
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,qmin)12554 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, qmin) {
12555 TEST_REQUIRES_ARM_NEON;
12556 GemmMicrokernelTester()
12557 .mr(6)
12558 .nr(8)
12559 .kr(1)
12560 .sr(1)
12561 .m(6)
12562 .n(8)
12563 .k(2)
12564 .qmin(128)
12565 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12566 }
12567
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,qmax)12568 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, qmax) {
12569 TEST_REQUIRES_ARM_NEON;
12570 GemmMicrokernelTester()
12571 .mr(6)
12572 .nr(8)
12573 .kr(1)
12574 .sr(1)
12575 .m(6)
12576 .n(8)
12577 .k(2)
12578 .qmax(128)
12579 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12580 }
12581
TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64,strided_cm)12582 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, strided_cm) {
12583 TEST_REQUIRES_ARM_NEON;
12584 GemmMicrokernelTester()
12585 .mr(6)
12586 .nr(8)
12587 .kr(1)
12588 .sr(1)
12589 .m(6)
12590 .n(8)
12591 .k(2)
12592 .cm_stride(11)
12593 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
12594 }
12595 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12596
12597
12598 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,k_eq_2)12599 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, k_eq_2) {
12600 TEST_REQUIRES_ARM_NEON_FMA;
12601 GemmMicrokernelTester()
12602 .mr(6)
12603 .nr(8)
12604 .kr(1)
12605 .sr(1)
12606 .m(6)
12607 .n(8)
12608 .k(2)
12609 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12610 }
12611
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,strided_cn)12612 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, strided_cn) {
12613 TEST_REQUIRES_ARM_NEON_FMA;
12614 GemmMicrokernelTester()
12615 .mr(6)
12616 .nr(8)
12617 .kr(1)
12618 .sr(1)
12619 .m(6)
12620 .n(8)
12621 .k(2)
12622 .cn_stride(11)
12623 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12624 }
12625
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,k_eq_2_subtile)12626 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, k_eq_2_subtile) {
12627 TEST_REQUIRES_ARM_NEON_FMA;
12628 for (uint32_t n = 1; n <= 8; n++) {
12629 for (uint32_t m = 1; m <= 6; m++) {
12630 GemmMicrokernelTester()
12631 .mr(6)
12632 .nr(8)
12633 .kr(1)
12634 .sr(1)
12635 .m(m)
12636 .n(n)
12637 .k(2)
12638 .iterations(1)
12639 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12640 }
12641 }
12642 }
12643
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,k_eq_2_subtile_m)12644 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, k_eq_2_subtile_m) {
12645 TEST_REQUIRES_ARM_NEON_FMA;
12646 for (uint32_t m = 1; m <= 6; m++) {
12647 GemmMicrokernelTester()
12648 .mr(6)
12649 .nr(8)
12650 .kr(1)
12651 .sr(1)
12652 .m(m)
12653 .n(8)
12654 .k(2)
12655 .iterations(1)
12656 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12657 }
12658 }
12659
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,k_eq_2_subtile_n)12660 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, k_eq_2_subtile_n) {
12661 TEST_REQUIRES_ARM_NEON_FMA;
12662 for (uint32_t n = 1; n <= 8; n++) {
12663 GemmMicrokernelTester()
12664 .mr(6)
12665 .nr(8)
12666 .kr(1)
12667 .sr(1)
12668 .m(6)
12669 .n(n)
12670 .k(2)
12671 .iterations(1)
12672 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12673 }
12674 }
12675
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,k_lt_2)12676 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, k_lt_2) {
12677 TEST_REQUIRES_ARM_NEON_FMA;
12678 for (size_t k = 1; k < 2; k++) {
12679 GemmMicrokernelTester()
12680 .mr(6)
12681 .nr(8)
12682 .kr(1)
12683 .sr(1)
12684 .m(6)
12685 .n(8)
12686 .k(k)
12687 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12688 }
12689 }
12690
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,k_lt_2_subtile)12691 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, k_lt_2_subtile) {
12692 TEST_REQUIRES_ARM_NEON_FMA;
12693 for (size_t k = 1; k < 2; k++) {
12694 for (uint32_t n = 1; n <= 8; n++) {
12695 for (uint32_t m = 1; m <= 6; m++) {
12696 GemmMicrokernelTester()
12697 .mr(6)
12698 .nr(8)
12699 .kr(1)
12700 .sr(1)
12701 .m(m)
12702 .n(n)
12703 .k(k)
12704 .iterations(1)
12705 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12706 }
12707 }
12708 }
12709 }
12710
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,k_gt_2)12711 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, k_gt_2) {
12712 TEST_REQUIRES_ARM_NEON_FMA;
12713 for (size_t k = 3; k < 4; k++) {
12714 GemmMicrokernelTester()
12715 .mr(6)
12716 .nr(8)
12717 .kr(1)
12718 .sr(1)
12719 .m(6)
12720 .n(8)
12721 .k(k)
12722 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12723 }
12724 }
12725
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,k_gt_2_subtile)12726 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, k_gt_2_subtile) {
12727 TEST_REQUIRES_ARM_NEON_FMA;
12728 for (size_t k = 3; k < 4; k++) {
12729 for (uint32_t n = 1; n <= 8; n++) {
12730 for (uint32_t m = 1; m <= 6; m++) {
12731 GemmMicrokernelTester()
12732 .mr(6)
12733 .nr(8)
12734 .kr(1)
12735 .sr(1)
12736 .m(m)
12737 .n(n)
12738 .k(k)
12739 .iterations(1)
12740 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12741 }
12742 }
12743 }
12744 }
12745
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,k_div_2)12746 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, k_div_2) {
12747 TEST_REQUIRES_ARM_NEON_FMA;
12748 for (size_t k = 4; k <= 20; k += 2) {
12749 GemmMicrokernelTester()
12750 .mr(6)
12751 .nr(8)
12752 .kr(1)
12753 .sr(1)
12754 .m(6)
12755 .n(8)
12756 .k(k)
12757 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12758 }
12759 }
12760
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,k_div_2_subtile)12761 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, k_div_2_subtile) {
12762 TEST_REQUIRES_ARM_NEON_FMA;
12763 for (size_t k = 4; k <= 20; k += 2) {
12764 for (uint32_t n = 1; n <= 8; n++) {
12765 for (uint32_t m = 1; m <= 6; m++) {
12766 GemmMicrokernelTester()
12767 .mr(6)
12768 .nr(8)
12769 .kr(1)
12770 .sr(1)
12771 .m(m)
12772 .n(n)
12773 .k(k)
12774 .iterations(1)
12775 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12776 }
12777 }
12778 }
12779 }
12780
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,n_gt_8)12781 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, n_gt_8) {
12782 TEST_REQUIRES_ARM_NEON_FMA;
12783 for (uint32_t n = 9; n < 16; n++) {
12784 for (size_t k = 1; k <= 10; k += 3) {
12785 GemmMicrokernelTester()
12786 .mr(6)
12787 .nr(8)
12788 .kr(1)
12789 .sr(1)
12790 .m(6)
12791 .n(n)
12792 .k(k)
12793 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12794 }
12795 }
12796 }
12797
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,n_gt_8_strided_cn)12798 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, n_gt_8_strided_cn) {
12799 TEST_REQUIRES_ARM_NEON_FMA;
12800 for (uint32_t n = 9; n < 16; n++) {
12801 for (size_t k = 1; k <= 10; k += 3) {
12802 GemmMicrokernelTester()
12803 .mr(6)
12804 .nr(8)
12805 .kr(1)
12806 .sr(1)
12807 .m(6)
12808 .n(n)
12809 .k(k)
12810 .cn_stride(11)
12811 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12812 }
12813 }
12814 }
12815
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,n_gt_8_subtile)12816 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, n_gt_8_subtile) {
12817 TEST_REQUIRES_ARM_NEON_FMA;
12818 for (uint32_t n = 9; n < 16; n++) {
12819 for (size_t k = 1; k <= 10; k += 3) {
12820 for (uint32_t m = 1; m <= 6; m++) {
12821 GemmMicrokernelTester()
12822 .mr(6)
12823 .nr(8)
12824 .kr(1)
12825 .sr(1)
12826 .m(m)
12827 .n(n)
12828 .k(k)
12829 .iterations(1)
12830 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12831 }
12832 }
12833 }
12834 }
12835
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,n_div_8)12836 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, n_div_8) {
12837 TEST_REQUIRES_ARM_NEON_FMA;
12838 for (uint32_t n = 16; n <= 24; n += 8) {
12839 for (size_t k = 1; k <= 10; k += 3) {
12840 GemmMicrokernelTester()
12841 .mr(6)
12842 .nr(8)
12843 .kr(1)
12844 .sr(1)
12845 .m(6)
12846 .n(n)
12847 .k(k)
12848 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12849 }
12850 }
12851 }
12852
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,n_div_8_strided_cn)12853 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, n_div_8_strided_cn) {
12854 TEST_REQUIRES_ARM_NEON_FMA;
12855 for (uint32_t n = 16; n <= 24; n += 8) {
12856 for (size_t k = 1; k <= 10; k += 3) {
12857 GemmMicrokernelTester()
12858 .mr(6)
12859 .nr(8)
12860 .kr(1)
12861 .sr(1)
12862 .m(6)
12863 .n(n)
12864 .k(k)
12865 .cn_stride(11)
12866 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12867 }
12868 }
12869 }
12870
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,n_div_8_subtile)12871 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, n_div_8_subtile) {
12872 TEST_REQUIRES_ARM_NEON_FMA;
12873 for (uint32_t n = 16; n <= 24; n += 8) {
12874 for (size_t k = 1; k <= 10; k += 3) {
12875 for (uint32_t m = 1; m <= 6; m++) {
12876 GemmMicrokernelTester()
12877 .mr(6)
12878 .nr(8)
12879 .kr(1)
12880 .sr(1)
12881 .m(m)
12882 .n(n)
12883 .k(k)
12884 .iterations(1)
12885 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12886 }
12887 }
12888 }
12889 }
12890
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,small_kernel)12891 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, small_kernel) {
12892 TEST_REQUIRES_ARM_NEON_FMA;
12893 for (size_t k = 1; k <= 10; k += 3) {
12894 GemmMicrokernelTester()
12895 .mr(6)
12896 .nr(8)
12897 .kr(1)
12898 .sr(1)
12899 .m(6)
12900 .n(8)
12901 .k(k)
12902 .ks(3)
12903 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12904 }
12905 }
12906
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,small_kernel_subtile)12907 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, small_kernel_subtile) {
12908 TEST_REQUIRES_ARM_NEON_FMA;
12909 for (size_t k = 1; k <= 10; k += 3) {
12910 for (uint32_t n = 1; n <= 8; n++) {
12911 for (uint32_t m = 1; m <= 6; m++) {
12912 GemmMicrokernelTester()
12913 .mr(6)
12914 .nr(8)
12915 .kr(1)
12916 .sr(1)
12917 .m(m)
12918 .n(n)
12919 .k(k)
12920 .ks(3)
12921 .iterations(1)
12922 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12923 }
12924 }
12925 }
12926 }
12927
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,n_gt_8_small_kernel)12928 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, n_gt_8_small_kernel) {
12929 TEST_REQUIRES_ARM_NEON_FMA;
12930 for (uint32_t n = 9; n < 16; n++) {
12931 for (size_t k = 1; k <= 10; k += 3) {
12932 GemmMicrokernelTester()
12933 .mr(6)
12934 .nr(8)
12935 .kr(1)
12936 .sr(1)
12937 .m(6)
12938 .n(n)
12939 .k(k)
12940 .ks(3)
12941 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12942 }
12943 }
12944 }
12945
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,n_div_8_small_kernel)12946 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, n_div_8_small_kernel) {
12947 TEST_REQUIRES_ARM_NEON_FMA;
12948 for (uint32_t n = 16; n <= 24; n += 8) {
12949 for (size_t k = 1; k <= 10; k += 3) {
12950 GemmMicrokernelTester()
12951 .mr(6)
12952 .nr(8)
12953 .kr(1)
12954 .sr(1)
12955 .m(6)
12956 .n(n)
12957 .k(k)
12958 .ks(3)
12959 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12960 }
12961 }
12962 }
12963
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,strided_cm_subtile)12964 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, strided_cm_subtile) {
12965 TEST_REQUIRES_ARM_NEON_FMA;
12966 for (size_t k = 1; k <= 10; k += 3) {
12967 for (uint32_t n = 1; n <= 8; n++) {
12968 for (uint32_t m = 1; m <= 6; m++) {
12969 GemmMicrokernelTester()
12970 .mr(6)
12971 .nr(8)
12972 .kr(1)
12973 .sr(1)
12974 .m(m)
12975 .n(n)
12976 .k(k)
12977 .cm_stride(11)
12978 .iterations(1)
12979 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12980 }
12981 }
12982 }
12983 }
12984
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,a_offset)12985 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, a_offset) {
12986 TEST_REQUIRES_ARM_NEON_FMA;
12987 for (size_t k = 1; k <= 10; k += 3) {
12988 GemmMicrokernelTester()
12989 .mr(6)
12990 .nr(8)
12991 .kr(1)
12992 .sr(1)
12993 .m(6)
12994 .n(8)
12995 .k(k)
12996 .ks(3)
12997 .a_offset(67)
12998 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12999 }
13000 }
13001
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,zero)13002 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, zero) {
13003 TEST_REQUIRES_ARM_NEON_FMA;
13004 for (size_t k = 1; k <= 10; k += 3) {
13005 for (uint32_t mz = 0; mz < 6; mz++) {
13006 GemmMicrokernelTester()
13007 .mr(6)
13008 .nr(8)
13009 .kr(1)
13010 .sr(1)
13011 .m(6)
13012 .n(8)
13013 .k(k)
13014 .ks(3)
13015 .a_offset(67)
13016 .zero_index(mz)
13017 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
13018 }
13019 }
13020 }
13021
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,qmin)13022 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, qmin) {
13023 TEST_REQUIRES_ARM_NEON_FMA;
13024 GemmMicrokernelTester()
13025 .mr(6)
13026 .nr(8)
13027 .kr(1)
13028 .sr(1)
13029 .m(6)
13030 .n(8)
13031 .k(2)
13032 .qmin(128)
13033 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
13034 }
13035
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,qmax)13036 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, qmax) {
13037 TEST_REQUIRES_ARM_NEON_FMA;
13038 GemmMicrokernelTester()
13039 .mr(6)
13040 .nr(8)
13041 .kr(1)
13042 .sr(1)
13043 .m(6)
13044 .n(8)
13045 .k(2)
13046 .qmax(128)
13047 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
13048 }
13049
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64,strided_cm)13050 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, strided_cm) {
13051 TEST_REQUIRES_ARM_NEON_FMA;
13052 GemmMicrokernelTester()
13053 .mr(6)
13054 .nr(8)
13055 .kr(1)
13056 .sr(1)
13057 .m(6)
13058 .n(8)
13059 .k(2)
13060 .cm_stride(11)
13061 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
13062 }
13063 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
13064
13065
13066 #if XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,k_eq_4)13067 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, k_eq_4) {
13068 TEST_REQUIRES_ARM_NEON_FMA;
13069 GemmMicrokernelTester()
13070 .mr(6)
13071 .nr(8)
13072 .kr(1)
13073 .sr(1)
13074 .m(6)
13075 .n(8)
13076 .k(4)
13077 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13078 }
13079
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,strided_cn)13080 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, strided_cn) {
13081 TEST_REQUIRES_ARM_NEON_FMA;
13082 GemmMicrokernelTester()
13083 .mr(6)
13084 .nr(8)
13085 .kr(1)
13086 .sr(1)
13087 .m(6)
13088 .n(8)
13089 .k(4)
13090 .cn_stride(11)
13091 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13092 }
13093
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,k_eq_4_subtile)13094 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, k_eq_4_subtile) {
13095 TEST_REQUIRES_ARM_NEON_FMA;
13096 for (uint32_t n = 1; n <= 8; n++) {
13097 for (uint32_t m = 1; m <= 6; m++) {
13098 GemmMicrokernelTester()
13099 .mr(6)
13100 .nr(8)
13101 .kr(1)
13102 .sr(1)
13103 .m(m)
13104 .n(n)
13105 .k(4)
13106 .iterations(1)
13107 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13108 }
13109 }
13110 }
13111
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,k_eq_4_subtile_m)13112 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, k_eq_4_subtile_m) {
13113 TEST_REQUIRES_ARM_NEON_FMA;
13114 for (uint32_t m = 1; m <= 6; m++) {
13115 GemmMicrokernelTester()
13116 .mr(6)
13117 .nr(8)
13118 .kr(1)
13119 .sr(1)
13120 .m(m)
13121 .n(8)
13122 .k(4)
13123 .iterations(1)
13124 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13125 }
13126 }
13127
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,k_eq_4_subtile_n)13128 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, k_eq_4_subtile_n) {
13129 TEST_REQUIRES_ARM_NEON_FMA;
13130 for (uint32_t n = 1; n <= 8; n++) {
13131 GemmMicrokernelTester()
13132 .mr(6)
13133 .nr(8)
13134 .kr(1)
13135 .sr(1)
13136 .m(6)
13137 .n(n)
13138 .k(4)
13139 .iterations(1)
13140 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13141 }
13142 }
13143
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,k_lt_4)13144 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, k_lt_4) {
13145 TEST_REQUIRES_ARM_NEON_FMA;
13146 for (size_t k = 1; k < 4; k++) {
13147 GemmMicrokernelTester()
13148 .mr(6)
13149 .nr(8)
13150 .kr(1)
13151 .sr(1)
13152 .m(6)
13153 .n(8)
13154 .k(k)
13155 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13156 }
13157 }
13158
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,k_lt_4_subtile)13159 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, k_lt_4_subtile) {
13160 TEST_REQUIRES_ARM_NEON_FMA;
13161 for (size_t k = 1; k < 4; k++) {
13162 for (uint32_t n = 1; n <= 8; n++) {
13163 for (uint32_t m = 1; m <= 6; m++) {
13164 GemmMicrokernelTester()
13165 .mr(6)
13166 .nr(8)
13167 .kr(1)
13168 .sr(1)
13169 .m(m)
13170 .n(n)
13171 .k(k)
13172 .iterations(1)
13173 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13174 }
13175 }
13176 }
13177 }
13178
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,k_gt_4)13179 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, k_gt_4) {
13180 TEST_REQUIRES_ARM_NEON_FMA;
13181 for (size_t k = 5; k < 8; k++) {
13182 GemmMicrokernelTester()
13183 .mr(6)
13184 .nr(8)
13185 .kr(1)
13186 .sr(1)
13187 .m(6)
13188 .n(8)
13189 .k(k)
13190 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13191 }
13192 }
13193
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,k_gt_4_subtile)13194 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, k_gt_4_subtile) {
13195 TEST_REQUIRES_ARM_NEON_FMA;
13196 for (size_t k = 5; k < 8; k++) {
13197 for (uint32_t n = 1; n <= 8; n++) {
13198 for (uint32_t m = 1; m <= 6; m++) {
13199 GemmMicrokernelTester()
13200 .mr(6)
13201 .nr(8)
13202 .kr(1)
13203 .sr(1)
13204 .m(m)
13205 .n(n)
13206 .k(k)
13207 .iterations(1)
13208 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13209 }
13210 }
13211 }
13212 }
13213
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,k_div_4)13214 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, k_div_4) {
13215 TEST_REQUIRES_ARM_NEON_FMA;
13216 for (size_t k = 8; k <= 40; k += 4) {
13217 GemmMicrokernelTester()
13218 .mr(6)
13219 .nr(8)
13220 .kr(1)
13221 .sr(1)
13222 .m(6)
13223 .n(8)
13224 .k(k)
13225 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13226 }
13227 }
13228
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,k_div_4_subtile)13229 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, k_div_4_subtile) {
13230 TEST_REQUIRES_ARM_NEON_FMA;
13231 for (size_t k = 8; k <= 40; k += 4) {
13232 for (uint32_t n = 1; n <= 8; n++) {
13233 for (uint32_t m = 1; m <= 6; m++) {
13234 GemmMicrokernelTester()
13235 .mr(6)
13236 .nr(8)
13237 .kr(1)
13238 .sr(1)
13239 .m(m)
13240 .n(n)
13241 .k(k)
13242 .iterations(1)
13243 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13244 }
13245 }
13246 }
13247 }
13248
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,n_gt_8)13249 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, n_gt_8) {
13250 TEST_REQUIRES_ARM_NEON_FMA;
13251 for (uint32_t n = 9; n < 16; n++) {
13252 for (size_t k = 1; k <= 20; k += 5) {
13253 GemmMicrokernelTester()
13254 .mr(6)
13255 .nr(8)
13256 .kr(1)
13257 .sr(1)
13258 .m(6)
13259 .n(n)
13260 .k(k)
13261 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13262 }
13263 }
13264 }
13265
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,n_gt_8_strided_cn)13266 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, n_gt_8_strided_cn) {
13267 TEST_REQUIRES_ARM_NEON_FMA;
13268 for (uint32_t n = 9; n < 16; n++) {
13269 for (size_t k = 1; k <= 20; k += 5) {
13270 GemmMicrokernelTester()
13271 .mr(6)
13272 .nr(8)
13273 .kr(1)
13274 .sr(1)
13275 .m(6)
13276 .n(n)
13277 .k(k)
13278 .cn_stride(11)
13279 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13280 }
13281 }
13282 }
13283
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,n_gt_8_subtile)13284 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, n_gt_8_subtile) {
13285 TEST_REQUIRES_ARM_NEON_FMA;
13286 for (uint32_t n = 9; n < 16; n++) {
13287 for (size_t k = 1; k <= 20; k += 5) {
13288 for (uint32_t m = 1; m <= 6; m++) {
13289 GemmMicrokernelTester()
13290 .mr(6)
13291 .nr(8)
13292 .kr(1)
13293 .sr(1)
13294 .m(m)
13295 .n(n)
13296 .k(k)
13297 .iterations(1)
13298 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13299 }
13300 }
13301 }
13302 }
13303
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,n_div_8)13304 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, n_div_8) {
13305 TEST_REQUIRES_ARM_NEON_FMA;
13306 for (uint32_t n = 16; n <= 24; n += 8) {
13307 for (size_t k = 1; k <= 20; k += 5) {
13308 GemmMicrokernelTester()
13309 .mr(6)
13310 .nr(8)
13311 .kr(1)
13312 .sr(1)
13313 .m(6)
13314 .n(n)
13315 .k(k)
13316 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13317 }
13318 }
13319 }
13320
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,n_div_8_strided_cn)13321 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, n_div_8_strided_cn) {
13322 TEST_REQUIRES_ARM_NEON_FMA;
13323 for (uint32_t n = 16; n <= 24; n += 8) {
13324 for (size_t k = 1; k <= 20; k += 5) {
13325 GemmMicrokernelTester()
13326 .mr(6)
13327 .nr(8)
13328 .kr(1)
13329 .sr(1)
13330 .m(6)
13331 .n(n)
13332 .k(k)
13333 .cn_stride(11)
13334 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13335 }
13336 }
13337 }
13338
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,n_div_8_subtile)13339 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, n_div_8_subtile) {
13340 TEST_REQUIRES_ARM_NEON_FMA;
13341 for (uint32_t n = 16; n <= 24; n += 8) {
13342 for (size_t k = 1; k <= 20; k += 5) {
13343 for (uint32_t m = 1; m <= 6; m++) {
13344 GemmMicrokernelTester()
13345 .mr(6)
13346 .nr(8)
13347 .kr(1)
13348 .sr(1)
13349 .m(m)
13350 .n(n)
13351 .k(k)
13352 .iterations(1)
13353 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13354 }
13355 }
13356 }
13357 }
13358
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,small_kernel)13359 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, small_kernel) {
13360 TEST_REQUIRES_ARM_NEON_FMA;
13361 for (size_t k = 1; k <= 20; k += 5) {
13362 GemmMicrokernelTester()
13363 .mr(6)
13364 .nr(8)
13365 .kr(1)
13366 .sr(1)
13367 .m(6)
13368 .n(8)
13369 .k(k)
13370 .ks(3)
13371 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13372 }
13373 }
13374
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,small_kernel_subtile)13375 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, small_kernel_subtile) {
13376 TEST_REQUIRES_ARM_NEON_FMA;
13377 for (size_t k = 1; k <= 20; k += 5) {
13378 for (uint32_t n = 1; n <= 8; n++) {
13379 for (uint32_t m = 1; m <= 6; m++) {
13380 GemmMicrokernelTester()
13381 .mr(6)
13382 .nr(8)
13383 .kr(1)
13384 .sr(1)
13385 .m(m)
13386 .n(n)
13387 .k(k)
13388 .ks(3)
13389 .iterations(1)
13390 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13391 }
13392 }
13393 }
13394 }
13395
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,n_gt_8_small_kernel)13396 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, n_gt_8_small_kernel) {
13397 TEST_REQUIRES_ARM_NEON_FMA;
13398 for (uint32_t n = 9; n < 16; n++) {
13399 for (size_t k = 1; k <= 20; k += 5) {
13400 GemmMicrokernelTester()
13401 .mr(6)
13402 .nr(8)
13403 .kr(1)
13404 .sr(1)
13405 .m(6)
13406 .n(n)
13407 .k(k)
13408 .ks(3)
13409 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13410 }
13411 }
13412 }
13413
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,n_div_8_small_kernel)13414 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, n_div_8_small_kernel) {
13415 TEST_REQUIRES_ARM_NEON_FMA;
13416 for (uint32_t n = 16; n <= 24; n += 8) {
13417 for (size_t k = 1; k <= 20; k += 5) {
13418 GemmMicrokernelTester()
13419 .mr(6)
13420 .nr(8)
13421 .kr(1)
13422 .sr(1)
13423 .m(6)
13424 .n(n)
13425 .k(k)
13426 .ks(3)
13427 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13428 }
13429 }
13430 }
13431
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,strided_cm_subtile)13432 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, strided_cm_subtile) {
13433 TEST_REQUIRES_ARM_NEON_FMA;
13434 for (size_t k = 1; k <= 20; k += 5) {
13435 for (uint32_t n = 1; n <= 8; n++) {
13436 for (uint32_t m = 1; m <= 6; m++) {
13437 GemmMicrokernelTester()
13438 .mr(6)
13439 .nr(8)
13440 .kr(1)
13441 .sr(1)
13442 .m(m)
13443 .n(n)
13444 .k(k)
13445 .cm_stride(11)
13446 .iterations(1)
13447 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13448 }
13449 }
13450 }
13451 }
13452
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,a_offset)13453 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, a_offset) {
13454 TEST_REQUIRES_ARM_NEON_FMA;
13455 for (size_t k = 1; k <= 20; k += 5) {
13456 GemmMicrokernelTester()
13457 .mr(6)
13458 .nr(8)
13459 .kr(1)
13460 .sr(1)
13461 .m(6)
13462 .n(8)
13463 .k(k)
13464 .ks(3)
13465 .a_offset(127)
13466 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13467 }
13468 }
13469
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,zero)13470 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, zero) {
13471 TEST_REQUIRES_ARM_NEON_FMA;
13472 for (size_t k = 1; k <= 20; k += 5) {
13473 for (uint32_t mz = 0; mz < 6; mz++) {
13474 GemmMicrokernelTester()
13475 .mr(6)
13476 .nr(8)
13477 .kr(1)
13478 .sr(1)
13479 .m(6)
13480 .n(8)
13481 .k(k)
13482 .ks(3)
13483 .a_offset(127)
13484 .zero_index(mz)
13485 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13486 }
13487 }
13488 }
13489
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,qmin)13490 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, qmin) {
13491 TEST_REQUIRES_ARM_NEON_FMA;
13492 GemmMicrokernelTester()
13493 .mr(6)
13494 .nr(8)
13495 .kr(1)
13496 .sr(1)
13497 .m(6)
13498 .n(8)
13499 .k(4)
13500 .qmin(128)
13501 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13502 }
13503
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,qmax)13504 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, qmax) {
13505 TEST_REQUIRES_ARM_NEON_FMA;
13506 GemmMicrokernelTester()
13507 .mr(6)
13508 .nr(8)
13509 .kr(1)
13510 .sr(1)
13511 .m(6)
13512 .n(8)
13513 .k(4)
13514 .qmax(128)
13515 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13516 }
13517
TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128,strided_cm)13518 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, strided_cm) {
13519 TEST_REQUIRES_ARM_NEON_FMA;
13520 GemmMicrokernelTester()
13521 .mr(6)
13522 .nr(8)
13523 .kr(1)
13524 .sr(1)
13525 .m(6)
13526 .n(8)
13527 .k(4)
13528 .cm_stride(11)
13529 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
13530 }
13531 #endif // XNN_ARCH_ARM64
13532
13533
13534 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,k_eq_4)13535 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, k_eq_4) {
13536 TEST_REQUIRES_ARM_NEON;
13537 GemmMicrokernelTester()
13538 .mr(6)
13539 .nr(8)
13540 .kr(1)
13541 .sr(4)
13542 .m(6)
13543 .n(8)
13544 .k(4)
13545 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13546 }
13547
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,strided_cn)13548 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, strided_cn) {
13549 TEST_REQUIRES_ARM_NEON;
13550 GemmMicrokernelTester()
13551 .mr(6)
13552 .nr(8)
13553 .kr(1)
13554 .sr(4)
13555 .m(6)
13556 .n(8)
13557 .k(4)
13558 .cn_stride(11)
13559 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13560 }
13561
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,k_eq_4_subtile)13562 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, k_eq_4_subtile) {
13563 TEST_REQUIRES_ARM_NEON;
13564 for (uint32_t n = 1; n <= 8; n++) {
13565 for (uint32_t m = 1; m <= 6; m++) {
13566 GemmMicrokernelTester()
13567 .mr(6)
13568 .nr(8)
13569 .kr(1)
13570 .sr(4)
13571 .m(m)
13572 .n(n)
13573 .k(4)
13574 .iterations(1)
13575 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13576 }
13577 }
13578 }
13579
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,k_eq_4_subtile_m)13580 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, k_eq_4_subtile_m) {
13581 TEST_REQUIRES_ARM_NEON;
13582 for (uint32_t m = 1; m <= 6; m++) {
13583 GemmMicrokernelTester()
13584 .mr(6)
13585 .nr(8)
13586 .kr(1)
13587 .sr(4)
13588 .m(m)
13589 .n(8)
13590 .k(4)
13591 .iterations(1)
13592 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13593 }
13594 }
13595
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,k_eq_4_subtile_n)13596 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, k_eq_4_subtile_n) {
13597 TEST_REQUIRES_ARM_NEON;
13598 for (uint32_t n = 1; n <= 8; n++) {
13599 GemmMicrokernelTester()
13600 .mr(6)
13601 .nr(8)
13602 .kr(1)
13603 .sr(4)
13604 .m(6)
13605 .n(n)
13606 .k(4)
13607 .iterations(1)
13608 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13609 }
13610 }
13611
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,k_lt_4)13612 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, k_lt_4) {
13613 TEST_REQUIRES_ARM_NEON;
13614 for (size_t k = 1; k < 4; k++) {
13615 GemmMicrokernelTester()
13616 .mr(6)
13617 .nr(8)
13618 .kr(1)
13619 .sr(4)
13620 .m(6)
13621 .n(8)
13622 .k(k)
13623 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13624 }
13625 }
13626
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,k_lt_4_subtile)13627 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, k_lt_4_subtile) {
13628 TEST_REQUIRES_ARM_NEON;
13629 for (size_t k = 1; k < 4; k++) {
13630 for (uint32_t n = 1; n <= 8; n++) {
13631 for (uint32_t m = 1; m <= 6; m++) {
13632 GemmMicrokernelTester()
13633 .mr(6)
13634 .nr(8)
13635 .kr(1)
13636 .sr(4)
13637 .m(m)
13638 .n(n)
13639 .k(k)
13640 .iterations(1)
13641 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13642 }
13643 }
13644 }
13645 }
13646
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,k_gt_4)13647 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, k_gt_4) {
13648 TEST_REQUIRES_ARM_NEON;
13649 for (size_t k = 5; k < 8; k++) {
13650 GemmMicrokernelTester()
13651 .mr(6)
13652 .nr(8)
13653 .kr(1)
13654 .sr(4)
13655 .m(6)
13656 .n(8)
13657 .k(k)
13658 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13659 }
13660 }
13661
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,k_gt_4_subtile)13662 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, k_gt_4_subtile) {
13663 TEST_REQUIRES_ARM_NEON;
13664 for (size_t k = 5; k < 8; k++) {
13665 for (uint32_t n = 1; n <= 8; n++) {
13666 for (uint32_t m = 1; m <= 6; m++) {
13667 GemmMicrokernelTester()
13668 .mr(6)
13669 .nr(8)
13670 .kr(1)
13671 .sr(4)
13672 .m(m)
13673 .n(n)
13674 .k(k)
13675 .iterations(1)
13676 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13677 }
13678 }
13679 }
13680 }
13681
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,k_div_4)13682 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, k_div_4) {
13683 TEST_REQUIRES_ARM_NEON;
13684 for (size_t k = 8; k <= 40; k += 4) {
13685 GemmMicrokernelTester()
13686 .mr(6)
13687 .nr(8)
13688 .kr(1)
13689 .sr(4)
13690 .m(6)
13691 .n(8)
13692 .k(k)
13693 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13694 }
13695 }
13696
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,k_div_4_subtile)13697 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, k_div_4_subtile) {
13698 TEST_REQUIRES_ARM_NEON;
13699 for (size_t k = 8; k <= 40; k += 4) {
13700 for (uint32_t n = 1; n <= 8; n++) {
13701 for (uint32_t m = 1; m <= 6; m++) {
13702 GemmMicrokernelTester()
13703 .mr(6)
13704 .nr(8)
13705 .kr(1)
13706 .sr(4)
13707 .m(m)
13708 .n(n)
13709 .k(k)
13710 .iterations(1)
13711 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13712 }
13713 }
13714 }
13715 }
13716
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,n_gt_8)13717 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, n_gt_8) {
13718 TEST_REQUIRES_ARM_NEON;
13719 for (uint32_t n = 9; n < 16; n++) {
13720 for (size_t k = 1; k <= 20; k += 5) {
13721 GemmMicrokernelTester()
13722 .mr(6)
13723 .nr(8)
13724 .kr(1)
13725 .sr(4)
13726 .m(6)
13727 .n(n)
13728 .k(k)
13729 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13730 }
13731 }
13732 }
13733
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,n_gt_8_strided_cn)13734 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, n_gt_8_strided_cn) {
13735 TEST_REQUIRES_ARM_NEON;
13736 for (uint32_t n = 9; n < 16; n++) {
13737 for (size_t k = 1; k <= 20; k += 5) {
13738 GemmMicrokernelTester()
13739 .mr(6)
13740 .nr(8)
13741 .kr(1)
13742 .sr(4)
13743 .m(6)
13744 .n(n)
13745 .k(k)
13746 .cn_stride(11)
13747 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13748 }
13749 }
13750 }
13751
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,n_gt_8_subtile)13752 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, n_gt_8_subtile) {
13753 TEST_REQUIRES_ARM_NEON;
13754 for (uint32_t n = 9; n < 16; n++) {
13755 for (size_t k = 1; k <= 20; k += 5) {
13756 for (uint32_t m = 1; m <= 6; m++) {
13757 GemmMicrokernelTester()
13758 .mr(6)
13759 .nr(8)
13760 .kr(1)
13761 .sr(4)
13762 .m(m)
13763 .n(n)
13764 .k(k)
13765 .iterations(1)
13766 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13767 }
13768 }
13769 }
13770 }
13771
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,n_div_8)13772 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, n_div_8) {
13773 TEST_REQUIRES_ARM_NEON;
13774 for (uint32_t n = 16; n <= 24; n += 8) {
13775 for (size_t k = 1; k <= 20; k += 5) {
13776 GemmMicrokernelTester()
13777 .mr(6)
13778 .nr(8)
13779 .kr(1)
13780 .sr(4)
13781 .m(6)
13782 .n(n)
13783 .k(k)
13784 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13785 }
13786 }
13787 }
13788
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,n_div_8_strided_cn)13789 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, n_div_8_strided_cn) {
13790 TEST_REQUIRES_ARM_NEON;
13791 for (uint32_t n = 16; n <= 24; n += 8) {
13792 for (size_t k = 1; k <= 20; k += 5) {
13793 GemmMicrokernelTester()
13794 .mr(6)
13795 .nr(8)
13796 .kr(1)
13797 .sr(4)
13798 .m(6)
13799 .n(n)
13800 .k(k)
13801 .cn_stride(11)
13802 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13803 }
13804 }
13805 }
13806
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,n_div_8_subtile)13807 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, n_div_8_subtile) {
13808 TEST_REQUIRES_ARM_NEON;
13809 for (uint32_t n = 16; n <= 24; n += 8) {
13810 for (size_t k = 1; k <= 20; k += 5) {
13811 for (uint32_t m = 1; m <= 6; m++) {
13812 GemmMicrokernelTester()
13813 .mr(6)
13814 .nr(8)
13815 .kr(1)
13816 .sr(4)
13817 .m(m)
13818 .n(n)
13819 .k(k)
13820 .iterations(1)
13821 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13822 }
13823 }
13824 }
13825 }
13826
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,small_kernel)13827 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, small_kernel) {
13828 TEST_REQUIRES_ARM_NEON;
13829 for (size_t k = 1; k <= 20; k += 5) {
13830 GemmMicrokernelTester()
13831 .mr(6)
13832 .nr(8)
13833 .kr(1)
13834 .sr(4)
13835 .m(6)
13836 .n(8)
13837 .k(k)
13838 .ks(3)
13839 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13840 }
13841 }
13842
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,small_kernel_subtile)13843 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, small_kernel_subtile) {
13844 TEST_REQUIRES_ARM_NEON;
13845 for (size_t k = 1; k <= 20; k += 5) {
13846 for (uint32_t n = 1; n <= 8; n++) {
13847 for (uint32_t m = 1; m <= 6; m++) {
13848 GemmMicrokernelTester()
13849 .mr(6)
13850 .nr(8)
13851 .kr(1)
13852 .sr(4)
13853 .m(m)
13854 .n(n)
13855 .k(k)
13856 .ks(3)
13857 .iterations(1)
13858 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13859 }
13860 }
13861 }
13862 }
13863
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,n_gt_8_small_kernel)13864 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, n_gt_8_small_kernel) {
13865 TEST_REQUIRES_ARM_NEON;
13866 for (uint32_t n = 9; n < 16; n++) {
13867 for (size_t k = 1; k <= 20; k += 5) {
13868 GemmMicrokernelTester()
13869 .mr(6)
13870 .nr(8)
13871 .kr(1)
13872 .sr(4)
13873 .m(6)
13874 .n(n)
13875 .k(k)
13876 .ks(3)
13877 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13878 }
13879 }
13880 }
13881
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,n_div_8_small_kernel)13882 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, n_div_8_small_kernel) {
13883 TEST_REQUIRES_ARM_NEON;
13884 for (uint32_t n = 16; n <= 24; n += 8) {
13885 for (size_t k = 1; k <= 20; k += 5) {
13886 GemmMicrokernelTester()
13887 .mr(6)
13888 .nr(8)
13889 .kr(1)
13890 .sr(4)
13891 .m(6)
13892 .n(n)
13893 .k(k)
13894 .ks(3)
13895 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13896 }
13897 }
13898 }
13899
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,strided_cm_subtile)13900 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, strided_cm_subtile) {
13901 TEST_REQUIRES_ARM_NEON;
13902 for (size_t k = 1; k <= 20; k += 5) {
13903 for (uint32_t n = 1; n <= 8; n++) {
13904 for (uint32_t m = 1; m <= 6; m++) {
13905 GemmMicrokernelTester()
13906 .mr(6)
13907 .nr(8)
13908 .kr(1)
13909 .sr(4)
13910 .m(m)
13911 .n(n)
13912 .k(k)
13913 .cm_stride(11)
13914 .iterations(1)
13915 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13916 }
13917 }
13918 }
13919 }
13920
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,a_offset)13921 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, a_offset) {
13922 TEST_REQUIRES_ARM_NEON;
13923 for (size_t k = 1; k <= 20; k += 5) {
13924 GemmMicrokernelTester()
13925 .mr(6)
13926 .nr(8)
13927 .kr(1)
13928 .sr(4)
13929 .m(6)
13930 .n(8)
13931 .k(k)
13932 .ks(3)
13933 .a_offset(127)
13934 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13935 }
13936 }
13937
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,zero)13938 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, zero) {
13939 TEST_REQUIRES_ARM_NEON;
13940 for (size_t k = 1; k <= 20; k += 5) {
13941 for (uint32_t mz = 0; mz < 6; mz++) {
13942 GemmMicrokernelTester()
13943 .mr(6)
13944 .nr(8)
13945 .kr(1)
13946 .sr(4)
13947 .m(6)
13948 .n(8)
13949 .k(k)
13950 .ks(3)
13951 .a_offset(127)
13952 .zero_index(mz)
13953 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13954 }
13955 }
13956 }
13957
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,qmin)13958 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, qmin) {
13959 TEST_REQUIRES_ARM_NEON;
13960 GemmMicrokernelTester()
13961 .mr(6)
13962 .nr(8)
13963 .kr(1)
13964 .sr(4)
13965 .m(6)
13966 .n(8)
13967 .k(4)
13968 .qmin(128)
13969 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13970 }
13971
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,qmax)13972 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, qmax) {
13973 TEST_REQUIRES_ARM_NEON;
13974 GemmMicrokernelTester()
13975 .mr(6)
13976 .nr(8)
13977 .kr(1)
13978 .sr(4)
13979 .m(6)
13980 .n(8)
13981 .k(4)
13982 .qmax(128)
13983 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13984 }
13985
TEST(F32_IGEMM_MINMAX_6X8S4__NEON,strided_cm)13986 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, strided_cm) {
13987 TEST_REQUIRES_ARM_NEON;
13988 GemmMicrokernelTester()
13989 .mr(6)
13990 .nr(8)
13991 .kr(1)
13992 .sr(4)
13993 .m(6)
13994 .n(8)
13995 .k(4)
13996 .cm_stride(11)
13997 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
13998 }
13999 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14000
14001
14002 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,k_eq_4)14003 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, k_eq_4) {
14004 TEST_REQUIRES_ARM_NEON_FMA;
14005 GemmMicrokernelTester()
14006 .mr(6)
14007 .nr(8)
14008 .kr(1)
14009 .sr(4)
14010 .m(6)
14011 .n(8)
14012 .k(4)
14013 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14014 }
14015
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,strided_cn)14016 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, strided_cn) {
14017 TEST_REQUIRES_ARM_NEON_FMA;
14018 GemmMicrokernelTester()
14019 .mr(6)
14020 .nr(8)
14021 .kr(1)
14022 .sr(4)
14023 .m(6)
14024 .n(8)
14025 .k(4)
14026 .cn_stride(11)
14027 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14028 }
14029
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,k_eq_4_subtile)14030 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, k_eq_4_subtile) {
14031 TEST_REQUIRES_ARM_NEON_FMA;
14032 for (uint32_t n = 1; n <= 8; n++) {
14033 for (uint32_t m = 1; m <= 6; m++) {
14034 GemmMicrokernelTester()
14035 .mr(6)
14036 .nr(8)
14037 .kr(1)
14038 .sr(4)
14039 .m(m)
14040 .n(n)
14041 .k(4)
14042 .iterations(1)
14043 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14044 }
14045 }
14046 }
14047
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,k_eq_4_subtile_m)14048 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, k_eq_4_subtile_m) {
14049 TEST_REQUIRES_ARM_NEON_FMA;
14050 for (uint32_t m = 1; m <= 6; m++) {
14051 GemmMicrokernelTester()
14052 .mr(6)
14053 .nr(8)
14054 .kr(1)
14055 .sr(4)
14056 .m(m)
14057 .n(8)
14058 .k(4)
14059 .iterations(1)
14060 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14061 }
14062 }
14063
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,k_eq_4_subtile_n)14064 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, k_eq_4_subtile_n) {
14065 TEST_REQUIRES_ARM_NEON_FMA;
14066 for (uint32_t n = 1; n <= 8; n++) {
14067 GemmMicrokernelTester()
14068 .mr(6)
14069 .nr(8)
14070 .kr(1)
14071 .sr(4)
14072 .m(6)
14073 .n(n)
14074 .k(4)
14075 .iterations(1)
14076 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14077 }
14078 }
14079
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,k_lt_4)14080 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, k_lt_4) {
14081 TEST_REQUIRES_ARM_NEON_FMA;
14082 for (size_t k = 1; k < 4; k++) {
14083 GemmMicrokernelTester()
14084 .mr(6)
14085 .nr(8)
14086 .kr(1)
14087 .sr(4)
14088 .m(6)
14089 .n(8)
14090 .k(k)
14091 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14092 }
14093 }
14094
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,k_lt_4_subtile)14095 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, k_lt_4_subtile) {
14096 TEST_REQUIRES_ARM_NEON_FMA;
14097 for (size_t k = 1; k < 4; k++) {
14098 for (uint32_t n = 1; n <= 8; n++) {
14099 for (uint32_t m = 1; m <= 6; m++) {
14100 GemmMicrokernelTester()
14101 .mr(6)
14102 .nr(8)
14103 .kr(1)
14104 .sr(4)
14105 .m(m)
14106 .n(n)
14107 .k(k)
14108 .iterations(1)
14109 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14110 }
14111 }
14112 }
14113 }
14114
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,k_gt_4)14115 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, k_gt_4) {
14116 TEST_REQUIRES_ARM_NEON_FMA;
14117 for (size_t k = 5; k < 8; k++) {
14118 GemmMicrokernelTester()
14119 .mr(6)
14120 .nr(8)
14121 .kr(1)
14122 .sr(4)
14123 .m(6)
14124 .n(8)
14125 .k(k)
14126 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14127 }
14128 }
14129
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,k_gt_4_subtile)14130 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, k_gt_4_subtile) {
14131 TEST_REQUIRES_ARM_NEON_FMA;
14132 for (size_t k = 5; k < 8; k++) {
14133 for (uint32_t n = 1; n <= 8; n++) {
14134 for (uint32_t m = 1; m <= 6; m++) {
14135 GemmMicrokernelTester()
14136 .mr(6)
14137 .nr(8)
14138 .kr(1)
14139 .sr(4)
14140 .m(m)
14141 .n(n)
14142 .k(k)
14143 .iterations(1)
14144 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14145 }
14146 }
14147 }
14148 }
14149
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,k_div_4)14150 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, k_div_4) {
14151 TEST_REQUIRES_ARM_NEON_FMA;
14152 for (size_t k = 8; k <= 40; k += 4) {
14153 GemmMicrokernelTester()
14154 .mr(6)
14155 .nr(8)
14156 .kr(1)
14157 .sr(4)
14158 .m(6)
14159 .n(8)
14160 .k(k)
14161 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14162 }
14163 }
14164
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,k_div_4_subtile)14165 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, k_div_4_subtile) {
14166 TEST_REQUIRES_ARM_NEON_FMA;
14167 for (size_t k = 8; k <= 40; k += 4) {
14168 for (uint32_t n = 1; n <= 8; n++) {
14169 for (uint32_t m = 1; m <= 6; m++) {
14170 GemmMicrokernelTester()
14171 .mr(6)
14172 .nr(8)
14173 .kr(1)
14174 .sr(4)
14175 .m(m)
14176 .n(n)
14177 .k(k)
14178 .iterations(1)
14179 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14180 }
14181 }
14182 }
14183 }
14184
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,n_gt_8)14185 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, n_gt_8) {
14186 TEST_REQUIRES_ARM_NEON_FMA;
14187 for (uint32_t n = 9; n < 16; n++) {
14188 for (size_t k = 1; k <= 20; k += 5) {
14189 GemmMicrokernelTester()
14190 .mr(6)
14191 .nr(8)
14192 .kr(1)
14193 .sr(4)
14194 .m(6)
14195 .n(n)
14196 .k(k)
14197 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14198 }
14199 }
14200 }
14201
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,n_gt_8_strided_cn)14202 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, n_gt_8_strided_cn) {
14203 TEST_REQUIRES_ARM_NEON_FMA;
14204 for (uint32_t n = 9; n < 16; n++) {
14205 for (size_t k = 1; k <= 20; k += 5) {
14206 GemmMicrokernelTester()
14207 .mr(6)
14208 .nr(8)
14209 .kr(1)
14210 .sr(4)
14211 .m(6)
14212 .n(n)
14213 .k(k)
14214 .cn_stride(11)
14215 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14216 }
14217 }
14218 }
14219
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,n_gt_8_subtile)14220 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, n_gt_8_subtile) {
14221 TEST_REQUIRES_ARM_NEON_FMA;
14222 for (uint32_t n = 9; n < 16; n++) {
14223 for (size_t k = 1; k <= 20; k += 5) {
14224 for (uint32_t m = 1; m <= 6; m++) {
14225 GemmMicrokernelTester()
14226 .mr(6)
14227 .nr(8)
14228 .kr(1)
14229 .sr(4)
14230 .m(m)
14231 .n(n)
14232 .k(k)
14233 .iterations(1)
14234 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14235 }
14236 }
14237 }
14238 }
14239
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,n_div_8)14240 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, n_div_8) {
14241 TEST_REQUIRES_ARM_NEON_FMA;
14242 for (uint32_t n = 16; n <= 24; n += 8) {
14243 for (size_t k = 1; k <= 20; k += 5) {
14244 GemmMicrokernelTester()
14245 .mr(6)
14246 .nr(8)
14247 .kr(1)
14248 .sr(4)
14249 .m(6)
14250 .n(n)
14251 .k(k)
14252 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14253 }
14254 }
14255 }
14256
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,n_div_8_strided_cn)14257 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, n_div_8_strided_cn) {
14258 TEST_REQUIRES_ARM_NEON_FMA;
14259 for (uint32_t n = 16; n <= 24; n += 8) {
14260 for (size_t k = 1; k <= 20; k += 5) {
14261 GemmMicrokernelTester()
14262 .mr(6)
14263 .nr(8)
14264 .kr(1)
14265 .sr(4)
14266 .m(6)
14267 .n(n)
14268 .k(k)
14269 .cn_stride(11)
14270 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14271 }
14272 }
14273 }
14274
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,n_div_8_subtile)14275 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, n_div_8_subtile) {
14276 TEST_REQUIRES_ARM_NEON_FMA;
14277 for (uint32_t n = 16; n <= 24; n += 8) {
14278 for (size_t k = 1; k <= 20; k += 5) {
14279 for (uint32_t m = 1; m <= 6; m++) {
14280 GemmMicrokernelTester()
14281 .mr(6)
14282 .nr(8)
14283 .kr(1)
14284 .sr(4)
14285 .m(m)
14286 .n(n)
14287 .k(k)
14288 .iterations(1)
14289 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14290 }
14291 }
14292 }
14293 }
14294
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,small_kernel)14295 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, small_kernel) {
14296 TEST_REQUIRES_ARM_NEON_FMA;
14297 for (size_t k = 1; k <= 20; k += 5) {
14298 GemmMicrokernelTester()
14299 .mr(6)
14300 .nr(8)
14301 .kr(1)
14302 .sr(4)
14303 .m(6)
14304 .n(8)
14305 .k(k)
14306 .ks(3)
14307 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14308 }
14309 }
14310
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,small_kernel_subtile)14311 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, small_kernel_subtile) {
14312 TEST_REQUIRES_ARM_NEON_FMA;
14313 for (size_t k = 1; k <= 20; k += 5) {
14314 for (uint32_t n = 1; n <= 8; n++) {
14315 for (uint32_t m = 1; m <= 6; m++) {
14316 GemmMicrokernelTester()
14317 .mr(6)
14318 .nr(8)
14319 .kr(1)
14320 .sr(4)
14321 .m(m)
14322 .n(n)
14323 .k(k)
14324 .ks(3)
14325 .iterations(1)
14326 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14327 }
14328 }
14329 }
14330 }
14331
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,n_gt_8_small_kernel)14332 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, n_gt_8_small_kernel) {
14333 TEST_REQUIRES_ARM_NEON_FMA;
14334 for (uint32_t n = 9; n < 16; n++) {
14335 for (size_t k = 1; k <= 20; k += 5) {
14336 GemmMicrokernelTester()
14337 .mr(6)
14338 .nr(8)
14339 .kr(1)
14340 .sr(4)
14341 .m(6)
14342 .n(n)
14343 .k(k)
14344 .ks(3)
14345 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14346 }
14347 }
14348 }
14349
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,n_div_8_small_kernel)14350 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, n_div_8_small_kernel) {
14351 TEST_REQUIRES_ARM_NEON_FMA;
14352 for (uint32_t n = 16; n <= 24; n += 8) {
14353 for (size_t k = 1; k <= 20; k += 5) {
14354 GemmMicrokernelTester()
14355 .mr(6)
14356 .nr(8)
14357 .kr(1)
14358 .sr(4)
14359 .m(6)
14360 .n(n)
14361 .k(k)
14362 .ks(3)
14363 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14364 }
14365 }
14366 }
14367
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,strided_cm_subtile)14368 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, strided_cm_subtile) {
14369 TEST_REQUIRES_ARM_NEON_FMA;
14370 for (size_t k = 1; k <= 20; k += 5) {
14371 for (uint32_t n = 1; n <= 8; n++) {
14372 for (uint32_t m = 1; m <= 6; m++) {
14373 GemmMicrokernelTester()
14374 .mr(6)
14375 .nr(8)
14376 .kr(1)
14377 .sr(4)
14378 .m(m)
14379 .n(n)
14380 .k(k)
14381 .cm_stride(11)
14382 .iterations(1)
14383 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14384 }
14385 }
14386 }
14387 }
14388
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,a_offset)14389 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, a_offset) {
14390 TEST_REQUIRES_ARM_NEON_FMA;
14391 for (size_t k = 1; k <= 20; k += 5) {
14392 GemmMicrokernelTester()
14393 .mr(6)
14394 .nr(8)
14395 .kr(1)
14396 .sr(4)
14397 .m(6)
14398 .n(8)
14399 .k(k)
14400 .ks(3)
14401 .a_offset(127)
14402 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14403 }
14404 }
14405
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,zero)14406 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, zero) {
14407 TEST_REQUIRES_ARM_NEON_FMA;
14408 for (size_t k = 1; k <= 20; k += 5) {
14409 for (uint32_t mz = 0; mz < 6; mz++) {
14410 GemmMicrokernelTester()
14411 .mr(6)
14412 .nr(8)
14413 .kr(1)
14414 .sr(4)
14415 .m(6)
14416 .n(8)
14417 .k(k)
14418 .ks(3)
14419 .a_offset(127)
14420 .zero_index(mz)
14421 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14422 }
14423 }
14424 }
14425
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,qmin)14426 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, qmin) {
14427 TEST_REQUIRES_ARM_NEON_FMA;
14428 GemmMicrokernelTester()
14429 .mr(6)
14430 .nr(8)
14431 .kr(1)
14432 .sr(4)
14433 .m(6)
14434 .n(8)
14435 .k(4)
14436 .qmin(128)
14437 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14438 }
14439
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,qmax)14440 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, qmax) {
14441 TEST_REQUIRES_ARM_NEON_FMA;
14442 GemmMicrokernelTester()
14443 .mr(6)
14444 .nr(8)
14445 .kr(1)
14446 .sr(4)
14447 .m(6)
14448 .n(8)
14449 .k(4)
14450 .qmax(128)
14451 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14452 }
14453
TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA,strided_cm)14454 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, strided_cm) {
14455 TEST_REQUIRES_ARM_NEON_FMA;
14456 GemmMicrokernelTester()
14457 .mr(6)
14458 .nr(8)
14459 .kr(1)
14460 .sr(4)
14461 .m(6)
14462 .n(8)
14463 .k(4)
14464 .cm_stride(11)
14465 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14466 }
14467 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14468
14469
14470 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,k_eq_4)14471 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, k_eq_4) {
14472 TEST_REQUIRES_ARM_NEON;
14473 GemmMicrokernelTester()
14474 .mr(8)
14475 .nr(8)
14476 .kr(1)
14477 .sr(4)
14478 .m(8)
14479 .n(8)
14480 .k(4)
14481 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14482 }
14483
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,strided_cn)14484 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, strided_cn) {
14485 TEST_REQUIRES_ARM_NEON;
14486 GemmMicrokernelTester()
14487 .mr(8)
14488 .nr(8)
14489 .kr(1)
14490 .sr(4)
14491 .m(8)
14492 .n(8)
14493 .k(4)
14494 .cn_stride(11)
14495 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14496 }
14497
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,k_eq_4_subtile)14498 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, k_eq_4_subtile) {
14499 TEST_REQUIRES_ARM_NEON;
14500 for (uint32_t n = 1; n <= 8; n++) {
14501 for (uint32_t m = 1; m <= 8; m++) {
14502 GemmMicrokernelTester()
14503 .mr(8)
14504 .nr(8)
14505 .kr(1)
14506 .sr(4)
14507 .m(m)
14508 .n(n)
14509 .k(4)
14510 .iterations(1)
14511 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14512 }
14513 }
14514 }
14515
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,k_eq_4_subtile_m)14516 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, k_eq_4_subtile_m) {
14517 TEST_REQUIRES_ARM_NEON;
14518 for (uint32_t m = 1; m <= 8; m++) {
14519 GemmMicrokernelTester()
14520 .mr(8)
14521 .nr(8)
14522 .kr(1)
14523 .sr(4)
14524 .m(m)
14525 .n(8)
14526 .k(4)
14527 .iterations(1)
14528 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14529 }
14530 }
14531
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,k_eq_4_subtile_n)14532 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, k_eq_4_subtile_n) {
14533 TEST_REQUIRES_ARM_NEON;
14534 for (uint32_t n = 1; n <= 8; n++) {
14535 GemmMicrokernelTester()
14536 .mr(8)
14537 .nr(8)
14538 .kr(1)
14539 .sr(4)
14540 .m(8)
14541 .n(n)
14542 .k(4)
14543 .iterations(1)
14544 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14545 }
14546 }
14547
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,k_lt_4)14548 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, k_lt_4) {
14549 TEST_REQUIRES_ARM_NEON;
14550 for (size_t k = 1; k < 4; k++) {
14551 GemmMicrokernelTester()
14552 .mr(8)
14553 .nr(8)
14554 .kr(1)
14555 .sr(4)
14556 .m(8)
14557 .n(8)
14558 .k(k)
14559 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14560 }
14561 }
14562
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,k_lt_4_subtile)14563 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, k_lt_4_subtile) {
14564 TEST_REQUIRES_ARM_NEON;
14565 for (size_t k = 1; k < 4; k++) {
14566 for (uint32_t n = 1; n <= 8; n++) {
14567 for (uint32_t m = 1; m <= 8; m++) {
14568 GemmMicrokernelTester()
14569 .mr(8)
14570 .nr(8)
14571 .kr(1)
14572 .sr(4)
14573 .m(m)
14574 .n(n)
14575 .k(k)
14576 .iterations(1)
14577 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14578 }
14579 }
14580 }
14581 }
14582
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,k_gt_4)14583 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, k_gt_4) {
14584 TEST_REQUIRES_ARM_NEON;
14585 for (size_t k = 5; k < 8; k++) {
14586 GemmMicrokernelTester()
14587 .mr(8)
14588 .nr(8)
14589 .kr(1)
14590 .sr(4)
14591 .m(8)
14592 .n(8)
14593 .k(k)
14594 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14595 }
14596 }
14597
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,k_gt_4_subtile)14598 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, k_gt_4_subtile) {
14599 TEST_REQUIRES_ARM_NEON;
14600 for (size_t k = 5; k < 8; k++) {
14601 for (uint32_t n = 1; n <= 8; n++) {
14602 for (uint32_t m = 1; m <= 8; m++) {
14603 GemmMicrokernelTester()
14604 .mr(8)
14605 .nr(8)
14606 .kr(1)
14607 .sr(4)
14608 .m(m)
14609 .n(n)
14610 .k(k)
14611 .iterations(1)
14612 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14613 }
14614 }
14615 }
14616 }
14617
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,k_div_4)14618 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, k_div_4) {
14619 TEST_REQUIRES_ARM_NEON;
14620 for (size_t k = 8; k <= 40; k += 4) {
14621 GemmMicrokernelTester()
14622 .mr(8)
14623 .nr(8)
14624 .kr(1)
14625 .sr(4)
14626 .m(8)
14627 .n(8)
14628 .k(k)
14629 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14630 }
14631 }
14632
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,k_div_4_subtile)14633 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, k_div_4_subtile) {
14634 TEST_REQUIRES_ARM_NEON;
14635 for (size_t k = 8; k <= 40; k += 4) {
14636 for (uint32_t n = 1; n <= 8; n++) {
14637 for (uint32_t m = 1; m <= 8; m++) {
14638 GemmMicrokernelTester()
14639 .mr(8)
14640 .nr(8)
14641 .kr(1)
14642 .sr(4)
14643 .m(m)
14644 .n(n)
14645 .k(k)
14646 .iterations(1)
14647 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14648 }
14649 }
14650 }
14651 }
14652
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,n_gt_8)14653 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, n_gt_8) {
14654 TEST_REQUIRES_ARM_NEON;
14655 for (uint32_t n = 9; n < 16; n++) {
14656 for (size_t k = 1; k <= 20; k += 5) {
14657 GemmMicrokernelTester()
14658 .mr(8)
14659 .nr(8)
14660 .kr(1)
14661 .sr(4)
14662 .m(8)
14663 .n(n)
14664 .k(k)
14665 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14666 }
14667 }
14668 }
14669
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,n_gt_8_strided_cn)14670 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, n_gt_8_strided_cn) {
14671 TEST_REQUIRES_ARM_NEON;
14672 for (uint32_t n = 9; n < 16; n++) {
14673 for (size_t k = 1; k <= 20; k += 5) {
14674 GemmMicrokernelTester()
14675 .mr(8)
14676 .nr(8)
14677 .kr(1)
14678 .sr(4)
14679 .m(8)
14680 .n(n)
14681 .k(k)
14682 .cn_stride(11)
14683 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14684 }
14685 }
14686 }
14687
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,n_gt_8_subtile)14688 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, n_gt_8_subtile) {
14689 TEST_REQUIRES_ARM_NEON;
14690 for (uint32_t n = 9; n < 16; n++) {
14691 for (size_t k = 1; k <= 20; k += 5) {
14692 for (uint32_t m = 1; m <= 8; m++) {
14693 GemmMicrokernelTester()
14694 .mr(8)
14695 .nr(8)
14696 .kr(1)
14697 .sr(4)
14698 .m(m)
14699 .n(n)
14700 .k(k)
14701 .iterations(1)
14702 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14703 }
14704 }
14705 }
14706 }
14707
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,n_div_8)14708 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, n_div_8) {
14709 TEST_REQUIRES_ARM_NEON;
14710 for (uint32_t n = 16; n <= 24; n += 8) {
14711 for (size_t k = 1; k <= 20; k += 5) {
14712 GemmMicrokernelTester()
14713 .mr(8)
14714 .nr(8)
14715 .kr(1)
14716 .sr(4)
14717 .m(8)
14718 .n(n)
14719 .k(k)
14720 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14721 }
14722 }
14723 }
14724
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,n_div_8_strided_cn)14725 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, n_div_8_strided_cn) {
14726 TEST_REQUIRES_ARM_NEON;
14727 for (uint32_t n = 16; n <= 24; n += 8) {
14728 for (size_t k = 1; k <= 20; k += 5) {
14729 GemmMicrokernelTester()
14730 .mr(8)
14731 .nr(8)
14732 .kr(1)
14733 .sr(4)
14734 .m(8)
14735 .n(n)
14736 .k(k)
14737 .cn_stride(11)
14738 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14739 }
14740 }
14741 }
14742
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,n_div_8_subtile)14743 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, n_div_8_subtile) {
14744 TEST_REQUIRES_ARM_NEON;
14745 for (uint32_t n = 16; n <= 24; n += 8) {
14746 for (size_t k = 1; k <= 20; k += 5) {
14747 for (uint32_t m = 1; m <= 8; m++) {
14748 GemmMicrokernelTester()
14749 .mr(8)
14750 .nr(8)
14751 .kr(1)
14752 .sr(4)
14753 .m(m)
14754 .n(n)
14755 .k(k)
14756 .iterations(1)
14757 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14758 }
14759 }
14760 }
14761 }
14762
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,small_kernel)14763 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, small_kernel) {
14764 TEST_REQUIRES_ARM_NEON;
14765 for (size_t k = 1; k <= 20; k += 5) {
14766 GemmMicrokernelTester()
14767 .mr(8)
14768 .nr(8)
14769 .kr(1)
14770 .sr(4)
14771 .m(8)
14772 .n(8)
14773 .k(k)
14774 .ks(3)
14775 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14776 }
14777 }
14778
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,small_kernel_subtile)14779 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, small_kernel_subtile) {
14780 TEST_REQUIRES_ARM_NEON;
14781 for (size_t k = 1; k <= 20; k += 5) {
14782 for (uint32_t n = 1; n <= 8; n++) {
14783 for (uint32_t m = 1; m <= 8; m++) {
14784 GemmMicrokernelTester()
14785 .mr(8)
14786 .nr(8)
14787 .kr(1)
14788 .sr(4)
14789 .m(m)
14790 .n(n)
14791 .k(k)
14792 .ks(3)
14793 .iterations(1)
14794 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14795 }
14796 }
14797 }
14798 }
14799
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,n_gt_8_small_kernel)14800 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, n_gt_8_small_kernel) {
14801 TEST_REQUIRES_ARM_NEON;
14802 for (uint32_t n = 9; n < 16; n++) {
14803 for (size_t k = 1; k <= 20; k += 5) {
14804 GemmMicrokernelTester()
14805 .mr(8)
14806 .nr(8)
14807 .kr(1)
14808 .sr(4)
14809 .m(8)
14810 .n(n)
14811 .k(k)
14812 .ks(3)
14813 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14814 }
14815 }
14816 }
14817
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,n_div_8_small_kernel)14818 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, n_div_8_small_kernel) {
14819 TEST_REQUIRES_ARM_NEON;
14820 for (uint32_t n = 16; n <= 24; n += 8) {
14821 for (size_t k = 1; k <= 20; k += 5) {
14822 GemmMicrokernelTester()
14823 .mr(8)
14824 .nr(8)
14825 .kr(1)
14826 .sr(4)
14827 .m(8)
14828 .n(n)
14829 .k(k)
14830 .ks(3)
14831 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14832 }
14833 }
14834 }
14835
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,strided_cm_subtile)14836 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, strided_cm_subtile) {
14837 TEST_REQUIRES_ARM_NEON;
14838 for (size_t k = 1; k <= 20; k += 5) {
14839 for (uint32_t n = 1; n <= 8; n++) {
14840 for (uint32_t m = 1; m <= 8; m++) {
14841 GemmMicrokernelTester()
14842 .mr(8)
14843 .nr(8)
14844 .kr(1)
14845 .sr(4)
14846 .m(m)
14847 .n(n)
14848 .k(k)
14849 .cm_stride(11)
14850 .iterations(1)
14851 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14852 }
14853 }
14854 }
14855 }
14856
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,a_offset)14857 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, a_offset) {
14858 TEST_REQUIRES_ARM_NEON;
14859 for (size_t k = 1; k <= 20; k += 5) {
14860 GemmMicrokernelTester()
14861 .mr(8)
14862 .nr(8)
14863 .kr(1)
14864 .sr(4)
14865 .m(8)
14866 .n(8)
14867 .k(k)
14868 .ks(3)
14869 .a_offset(163)
14870 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14871 }
14872 }
14873
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,zero)14874 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, zero) {
14875 TEST_REQUIRES_ARM_NEON;
14876 for (size_t k = 1; k <= 20; k += 5) {
14877 for (uint32_t mz = 0; mz < 8; mz++) {
14878 GemmMicrokernelTester()
14879 .mr(8)
14880 .nr(8)
14881 .kr(1)
14882 .sr(4)
14883 .m(8)
14884 .n(8)
14885 .k(k)
14886 .ks(3)
14887 .a_offset(163)
14888 .zero_index(mz)
14889 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14890 }
14891 }
14892 }
14893
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,qmin)14894 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, qmin) {
14895 TEST_REQUIRES_ARM_NEON;
14896 GemmMicrokernelTester()
14897 .mr(8)
14898 .nr(8)
14899 .kr(1)
14900 .sr(4)
14901 .m(8)
14902 .n(8)
14903 .k(4)
14904 .qmin(128)
14905 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14906 }
14907
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,qmax)14908 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, qmax) {
14909 TEST_REQUIRES_ARM_NEON;
14910 GemmMicrokernelTester()
14911 .mr(8)
14912 .nr(8)
14913 .kr(1)
14914 .sr(4)
14915 .m(8)
14916 .n(8)
14917 .k(4)
14918 .qmax(128)
14919 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14920 }
14921
TEST(F32_IGEMM_MINMAX_8X8S4__NEON,strided_cm)14922 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, strided_cm) {
14923 TEST_REQUIRES_ARM_NEON;
14924 GemmMicrokernelTester()
14925 .mr(8)
14926 .nr(8)
14927 .kr(1)
14928 .sr(4)
14929 .m(8)
14930 .n(8)
14931 .k(4)
14932 .cm_stride(11)
14933 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
14934 }
14935 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14936
14937
14938 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,k_eq_4)14939 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, k_eq_4) {
14940 TEST_REQUIRES_ARM_NEON_FMA;
14941 GemmMicrokernelTester()
14942 .mr(8)
14943 .nr(8)
14944 .kr(1)
14945 .sr(4)
14946 .m(8)
14947 .n(8)
14948 .k(4)
14949 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14950 }
14951
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,strided_cn)14952 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, strided_cn) {
14953 TEST_REQUIRES_ARM_NEON_FMA;
14954 GemmMicrokernelTester()
14955 .mr(8)
14956 .nr(8)
14957 .kr(1)
14958 .sr(4)
14959 .m(8)
14960 .n(8)
14961 .k(4)
14962 .cn_stride(11)
14963 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14964 }
14965
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,k_eq_4_subtile)14966 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, k_eq_4_subtile) {
14967 TEST_REQUIRES_ARM_NEON_FMA;
14968 for (uint32_t n = 1; n <= 8; n++) {
14969 for (uint32_t m = 1; m <= 8; m++) {
14970 GemmMicrokernelTester()
14971 .mr(8)
14972 .nr(8)
14973 .kr(1)
14974 .sr(4)
14975 .m(m)
14976 .n(n)
14977 .k(4)
14978 .iterations(1)
14979 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14980 }
14981 }
14982 }
14983
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,k_eq_4_subtile_m)14984 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, k_eq_4_subtile_m) {
14985 TEST_REQUIRES_ARM_NEON_FMA;
14986 for (uint32_t m = 1; m <= 8; m++) {
14987 GemmMicrokernelTester()
14988 .mr(8)
14989 .nr(8)
14990 .kr(1)
14991 .sr(4)
14992 .m(m)
14993 .n(8)
14994 .k(4)
14995 .iterations(1)
14996 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14997 }
14998 }
14999
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,k_eq_4_subtile_n)15000 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, k_eq_4_subtile_n) {
15001 TEST_REQUIRES_ARM_NEON_FMA;
15002 for (uint32_t n = 1; n <= 8; n++) {
15003 GemmMicrokernelTester()
15004 .mr(8)
15005 .nr(8)
15006 .kr(1)
15007 .sr(4)
15008 .m(8)
15009 .n(n)
15010 .k(4)
15011 .iterations(1)
15012 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
15013 }
15014 }
15015
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,k_lt_4)15016 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, k_lt_4) {
15017 TEST_REQUIRES_ARM_NEON_FMA;
15018 for (size_t k = 1; k < 4; k++) {
15019 GemmMicrokernelTester()
15020 .mr(8)
15021 .nr(8)
15022 .kr(1)
15023 .sr(4)
15024 .m(8)
15025 .n(8)
15026 .k(k)
15027 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
15028 }
15029 }
15030
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,k_lt_4_subtile)15031 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, k_lt_4_subtile) {
15032 TEST_REQUIRES_ARM_NEON_FMA;
15033 for (size_t k = 1; k < 4; k++) {
15034 for (uint32_t n = 1; n <= 8; n++) {
15035 for (uint32_t m = 1; m <= 8; m++) {
15036 GemmMicrokernelTester()
15037 .mr(8)
15038 .nr(8)
15039 .kr(1)
15040 .sr(4)
15041 .m(m)
15042 .n(n)
15043 .k(k)
15044 .iterations(1)
15045 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
15046 }
15047 }
15048 }
15049 }
15050
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,k_gt_4)15051 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, k_gt_4) {
15052 TEST_REQUIRES_ARM_NEON_FMA;
15053 for (size_t k = 5; k < 8; k++) {
15054 GemmMicrokernelTester()
15055 .mr(8)
15056 .nr(8)
15057 .kr(1)
15058 .sr(4)
15059 .m(8)
15060 .n(8)
15061 .k(k)
15062 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
15063 }
15064 }
15065
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,k_gt_4_subtile)15066 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, k_gt_4_subtile) {
15067 TEST_REQUIRES_ARM_NEON_FMA;
15068 for (size_t k = 5; k < 8; k++) {
15069 for (uint32_t n = 1; n <= 8; n++) {
15070 for (uint32_t m = 1; m <= 8; m++) {
15071 GemmMicrokernelTester()
15072 .mr(8)
15073 .nr(8)
15074 .kr(1)
15075 .sr(4)
15076 .m(m)
15077 .n(n)
15078 .k(k)
15079 .iterations(1)
15080 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
15081 }
15082 }
15083 }
15084 }
15085
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,k_div_4)15086 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, k_div_4) {
15087 TEST_REQUIRES_ARM_NEON_FMA;
15088 for (size_t k = 8; k <= 40; k += 4) {
15089 GemmMicrokernelTester()
15090 .mr(8)
15091 .nr(8)
15092 .kr(1)
15093 .sr(4)
15094 .m(8)
15095 .n(8)
15096 .k(k)
15097 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
15098 }
15099 }
15100
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,k_div_4_subtile)15101 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, k_div_4_subtile) {
15102 TEST_REQUIRES_ARM_NEON_FMA;
15103 for (size_t k = 8; k <= 40; k += 4) {
15104 for (uint32_t n = 1; n <= 8; n++) {
15105 for (uint32_t m = 1; m <= 8; m++) {
15106 GemmMicrokernelTester()
15107 .mr(8)
15108 .nr(8)
15109 .kr(1)
15110 .sr(4)
15111 .m(m)
15112 .n(n)
15113 .k(k)
15114 .iterations(1)
15115 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
15116 }
15117 }
15118 }
15119 }
15120
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,n_gt_8)15121 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, n_gt_8) {
15122 TEST_REQUIRES_ARM_NEON_FMA;
15123 for (uint32_t n = 9; n < 16; n++) {
15124 for (size_t k = 1; k <= 20; k += 5) {
15125 GemmMicrokernelTester()
15126 .mr(8)
15127 .nr(8)
15128 .kr(1)
15129 .sr(4)
15130 .m(8)
15131 .n(n)
15132 .k(k)
15133 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
15134 }
15135 }
15136 }
15137
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,n_gt_8_strided_cn)15138 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, n_gt_8_strided_cn) {
15139 TEST_REQUIRES_ARM_NEON_FMA;
15140 for (uint32_t n = 9; n < 16; n++) {
15141 for (size_t k = 1; k <= 20; k += 5) {
15142 GemmMicrokernelTester()
15143 .mr(8)
15144 .nr(8)
15145 .kr(1)
15146 .sr(4)
15147 .m(8)
15148 .n(n)
15149 .k(k)
15150 .cn_stride(11)
15151 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
15152 }
15153 }
15154 }
15155
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,n_gt_8_subtile)15156 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, n_gt_8_subtile) {
15157 TEST_REQUIRES_ARM_NEON_FMA;
15158 for (uint32_t n = 9; n < 16; n++) {
15159 for (size_t k = 1; k <= 20; k += 5) {
15160 for (uint32_t m = 1; m <= 8; m++) {
15161 GemmMicrokernelTester()
15162 .mr(8)
15163 .nr(8)
15164 .kr(1)
15165 .sr(4)
15166 .m(m)
15167 .n(n)
15168 .k(k)
15169 .iterations(1)
15170 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
15171 }
15172 }
15173 }
15174 }
15175
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,n_div_8)15176 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, n_div_8) {
15177 TEST_REQUIRES_ARM_NEON_FMA;
15178 for (uint32_t n = 16; n <= 24; n += 8) {
15179 for (size_t k = 1; k <= 20; k += 5) {
15180 GemmMicrokernelTester()
15181 .mr(8)
15182 .nr(8)
15183 .kr(1)
15184 .sr(4)
15185 .m(8)
15186 .n(n)
15187 .k(k)
15188 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
15189 }
15190 }
15191 }
15192
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,n_div_8_strided_cn)15193 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, n_div_8_strided_cn) {
15194 TEST_REQUIRES_ARM_NEON_FMA;
15195 for (uint32_t n = 16; n <= 24; n += 8) {
15196 for (size_t k = 1; k <= 20; k += 5) {
15197 GemmMicrokernelTester()
15198 .mr(8)
15199 .nr(8)
15200 .kr(1)
15201 .sr(4)
15202 .m(8)
15203 .n(n)
15204 .k(k)
15205 .cn_stride(11)
15206 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
15207 }
15208 }
15209 }
15210
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,n_div_8_subtile)15211 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, n_div_8_subtile) {
15212 TEST_REQUIRES_ARM_NEON_FMA;
15213 for (uint32_t n = 16; n <= 24; n += 8) {
15214 for (size_t k = 1; k <= 20; k += 5) {
15215 for (uint32_t m = 1; m <= 8; m++) {
15216 GemmMicrokernelTester()
15217 .mr(8)
15218 .nr(8)
15219 .kr(1)
15220 .sr(4)
15221 .m(m)
15222 .n(n)
15223 .k(k)
15224 .iterations(1)
15225 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
15226 }
15227 }
15228 }
15229 }
15230
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,small_kernel)15231 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, small_kernel) {
15232 TEST_REQUIRES_ARM_NEON_FMA;
15233 for (size_t k = 1; k <= 20; k += 5) {
15234 GemmMicrokernelTester()
15235 .mr(8)
15236 .nr(8)
15237 .kr(1)
15238 .sr(4)
15239 .m(8)
15240 .n(8)
15241 .k(k)
15242 .ks(3)
15243 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
15244 }
15245 }
15246
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,small_kernel_subtile)15247 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, small_kernel_subtile) {
15248 TEST_REQUIRES_ARM_NEON_FMA;
15249 for (size_t k = 1; k <= 20; k += 5) {
15250 for (uint32_t n = 1; n <= 8; n++) {
15251 for (uint32_t m = 1; m <= 8; m++) {
15252 GemmMicrokernelTester()
15253 .mr(8)
15254 .nr(8)
15255 .kr(1)
15256 .sr(4)
15257 .m(m)
15258 .n(n)
15259 .k(k)
15260 .ks(3)
15261 .iterations(1)
15262 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
15263 }
15264 }
15265 }
15266 }
15267
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,n_gt_8_small_kernel)15268 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, n_gt_8_small_kernel) {
15269 TEST_REQUIRES_ARM_NEON_FMA;
15270 for (uint32_t n = 9; n < 16; n++) {
15271 for (size_t k = 1; k <= 20; k += 5) {
15272 GemmMicrokernelTester()
15273 .mr(8)
15274 .nr(8)
15275 .kr(1)
15276 .sr(4)
15277 .m(8)
15278 .n(n)
15279 .k(k)
15280 .ks(3)
15281 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
15282 }
15283 }
15284 }
15285
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,n_div_8_small_kernel)15286 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, n_div_8_small_kernel) {
15287 TEST_REQUIRES_ARM_NEON_FMA;
15288 for (uint32_t n = 16; n <= 24; n += 8) {
15289 for (size_t k = 1; k <= 20; k += 5) {
15290 GemmMicrokernelTester()
15291 .mr(8)
15292 .nr(8)
15293 .kr(1)
15294 .sr(4)
15295 .m(8)
15296 .n(n)
15297 .k(k)
15298 .ks(3)
15299 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
15300 }
15301 }
15302 }
15303
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,strided_cm_subtile)15304 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, strided_cm_subtile) {
15305 TEST_REQUIRES_ARM_NEON_FMA;
15306 for (size_t k = 1; k <= 20; k += 5) {
15307 for (uint32_t n = 1; n <= 8; n++) {
15308 for (uint32_t m = 1; m <= 8; m++) {
15309 GemmMicrokernelTester()
15310 .mr(8)
15311 .nr(8)
15312 .kr(1)
15313 .sr(4)
15314 .m(m)
15315 .n(n)
15316 .k(k)
15317 .cm_stride(11)
15318 .iterations(1)
15319 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
15320 }
15321 }
15322 }
15323 }
15324
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,a_offset)15325 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, a_offset) {
15326 TEST_REQUIRES_ARM_NEON_FMA;
15327 for (size_t k = 1; k <= 20; k += 5) {
15328 GemmMicrokernelTester()
15329 .mr(8)
15330 .nr(8)
15331 .kr(1)
15332 .sr(4)
15333 .m(8)
15334 .n(8)
15335 .k(k)
15336 .ks(3)
15337 .a_offset(163)
15338 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
15339 }
15340 }
15341
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,zero)15342 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, zero) {
15343 TEST_REQUIRES_ARM_NEON_FMA;
15344 for (size_t k = 1; k <= 20; k += 5) {
15345 for (uint32_t mz = 0; mz < 8; mz++) {
15346 GemmMicrokernelTester()
15347 .mr(8)
15348 .nr(8)
15349 .kr(1)
15350 .sr(4)
15351 .m(8)
15352 .n(8)
15353 .k(k)
15354 .ks(3)
15355 .a_offset(163)
15356 .zero_index(mz)
15357 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
15358 }
15359 }
15360 }
15361
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,qmin)15362 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, qmin) {
15363 TEST_REQUIRES_ARM_NEON_FMA;
15364 GemmMicrokernelTester()
15365 .mr(8)
15366 .nr(8)
15367 .kr(1)
15368 .sr(4)
15369 .m(8)
15370 .n(8)
15371 .k(4)
15372 .qmin(128)
15373 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
15374 }
15375
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,qmax)15376 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, qmax) {
15377 TEST_REQUIRES_ARM_NEON_FMA;
15378 GemmMicrokernelTester()
15379 .mr(8)
15380 .nr(8)
15381 .kr(1)
15382 .sr(4)
15383 .m(8)
15384 .n(8)
15385 .k(4)
15386 .qmax(128)
15387 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
15388 }
15389
TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA,strided_cm)15390 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, strided_cm) {
15391 TEST_REQUIRES_ARM_NEON_FMA;
15392 GemmMicrokernelTester()
15393 .mr(8)
15394 .nr(8)
15395 .kr(1)
15396 .sr(4)
15397 .m(8)
15398 .n(8)
15399 .k(4)
15400 .cm_stride(11)
15401 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
15402 }
15403 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15404
15405
15406 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,k_eq_4)15407 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, k_eq_4) {
15408 TEST_REQUIRES_X86_SSE2;
15409 GemmMicrokernelTester()
15410 .mr(1)
15411 .nr(8)
15412 .kr(1)
15413 .sr(1)
15414 .m(1)
15415 .n(8)
15416 .k(4)
15417 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15418 }
15419
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,strided_cn)15420 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, strided_cn) {
15421 TEST_REQUIRES_X86_SSE2;
15422 GemmMicrokernelTester()
15423 .mr(1)
15424 .nr(8)
15425 .kr(1)
15426 .sr(1)
15427 .m(1)
15428 .n(8)
15429 .k(4)
15430 .cn_stride(11)
15431 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15432 }
15433
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,k_eq_4_subtile)15434 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, k_eq_4_subtile) {
15435 TEST_REQUIRES_X86_SSE2;
15436 for (uint32_t n = 1; n <= 8; n++) {
15437 for (uint32_t m = 1; m <= 1; m++) {
15438 GemmMicrokernelTester()
15439 .mr(1)
15440 .nr(8)
15441 .kr(1)
15442 .sr(1)
15443 .m(m)
15444 .n(n)
15445 .k(4)
15446 .iterations(1)
15447 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15448 }
15449 }
15450 }
15451
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,k_eq_4_subtile_m)15452 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, k_eq_4_subtile_m) {
15453 TEST_REQUIRES_X86_SSE2;
15454 for (uint32_t m = 1; m <= 1; m++) {
15455 GemmMicrokernelTester()
15456 .mr(1)
15457 .nr(8)
15458 .kr(1)
15459 .sr(1)
15460 .m(m)
15461 .n(8)
15462 .k(4)
15463 .iterations(1)
15464 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15465 }
15466 }
15467
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,k_eq_4_subtile_n)15468 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, k_eq_4_subtile_n) {
15469 TEST_REQUIRES_X86_SSE2;
15470 for (uint32_t n = 1; n <= 8; n++) {
15471 GemmMicrokernelTester()
15472 .mr(1)
15473 .nr(8)
15474 .kr(1)
15475 .sr(1)
15476 .m(1)
15477 .n(n)
15478 .k(4)
15479 .iterations(1)
15480 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15481 }
15482 }
15483
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,k_lt_4)15484 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, k_lt_4) {
15485 TEST_REQUIRES_X86_SSE2;
15486 for (size_t k = 1; k < 4; k++) {
15487 GemmMicrokernelTester()
15488 .mr(1)
15489 .nr(8)
15490 .kr(1)
15491 .sr(1)
15492 .m(1)
15493 .n(8)
15494 .k(k)
15495 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15496 }
15497 }
15498
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,k_lt_4_subtile)15499 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, k_lt_4_subtile) {
15500 TEST_REQUIRES_X86_SSE2;
15501 for (size_t k = 1; k < 4; k++) {
15502 for (uint32_t n = 1; n <= 8; n++) {
15503 for (uint32_t m = 1; m <= 1; m++) {
15504 GemmMicrokernelTester()
15505 .mr(1)
15506 .nr(8)
15507 .kr(1)
15508 .sr(1)
15509 .m(m)
15510 .n(n)
15511 .k(k)
15512 .iterations(1)
15513 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15514 }
15515 }
15516 }
15517 }
15518
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,k_gt_4)15519 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, k_gt_4) {
15520 TEST_REQUIRES_X86_SSE2;
15521 for (size_t k = 5; k < 8; k++) {
15522 GemmMicrokernelTester()
15523 .mr(1)
15524 .nr(8)
15525 .kr(1)
15526 .sr(1)
15527 .m(1)
15528 .n(8)
15529 .k(k)
15530 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15531 }
15532 }
15533
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,k_gt_4_subtile)15534 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, k_gt_4_subtile) {
15535 TEST_REQUIRES_X86_SSE2;
15536 for (size_t k = 5; k < 8; k++) {
15537 for (uint32_t n = 1; n <= 8; n++) {
15538 for (uint32_t m = 1; m <= 1; m++) {
15539 GemmMicrokernelTester()
15540 .mr(1)
15541 .nr(8)
15542 .kr(1)
15543 .sr(1)
15544 .m(m)
15545 .n(n)
15546 .k(k)
15547 .iterations(1)
15548 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15549 }
15550 }
15551 }
15552 }
15553
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,k_div_4)15554 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, k_div_4) {
15555 TEST_REQUIRES_X86_SSE2;
15556 for (size_t k = 8; k <= 40; k += 4) {
15557 GemmMicrokernelTester()
15558 .mr(1)
15559 .nr(8)
15560 .kr(1)
15561 .sr(1)
15562 .m(1)
15563 .n(8)
15564 .k(k)
15565 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15566 }
15567 }
15568
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,k_div_4_subtile)15569 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, k_div_4_subtile) {
15570 TEST_REQUIRES_X86_SSE2;
15571 for (size_t k = 8; k <= 40; k += 4) {
15572 for (uint32_t n = 1; n <= 8; n++) {
15573 for (uint32_t m = 1; m <= 1; m++) {
15574 GemmMicrokernelTester()
15575 .mr(1)
15576 .nr(8)
15577 .kr(1)
15578 .sr(1)
15579 .m(m)
15580 .n(n)
15581 .k(k)
15582 .iterations(1)
15583 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15584 }
15585 }
15586 }
15587 }
15588
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,n_gt_8)15589 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, n_gt_8) {
15590 TEST_REQUIRES_X86_SSE2;
15591 for (uint32_t n = 9; n < 16; n++) {
15592 for (size_t k = 1; k <= 20; k += 5) {
15593 GemmMicrokernelTester()
15594 .mr(1)
15595 .nr(8)
15596 .kr(1)
15597 .sr(1)
15598 .m(1)
15599 .n(n)
15600 .k(k)
15601 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15602 }
15603 }
15604 }
15605
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,n_gt_8_strided_cn)15606 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, n_gt_8_strided_cn) {
15607 TEST_REQUIRES_X86_SSE2;
15608 for (uint32_t n = 9; n < 16; n++) {
15609 for (size_t k = 1; k <= 20; k += 5) {
15610 GemmMicrokernelTester()
15611 .mr(1)
15612 .nr(8)
15613 .kr(1)
15614 .sr(1)
15615 .m(1)
15616 .n(n)
15617 .k(k)
15618 .cn_stride(11)
15619 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15620 }
15621 }
15622 }
15623
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,n_gt_8_subtile)15624 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, n_gt_8_subtile) {
15625 TEST_REQUIRES_X86_SSE2;
15626 for (uint32_t n = 9; n < 16; n++) {
15627 for (size_t k = 1; k <= 20; k += 5) {
15628 for (uint32_t m = 1; m <= 1; m++) {
15629 GemmMicrokernelTester()
15630 .mr(1)
15631 .nr(8)
15632 .kr(1)
15633 .sr(1)
15634 .m(m)
15635 .n(n)
15636 .k(k)
15637 .iterations(1)
15638 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15639 }
15640 }
15641 }
15642 }
15643
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,n_div_8)15644 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, n_div_8) {
15645 TEST_REQUIRES_X86_SSE2;
15646 for (uint32_t n = 16; n <= 24; n += 8) {
15647 for (size_t k = 1; k <= 20; k += 5) {
15648 GemmMicrokernelTester()
15649 .mr(1)
15650 .nr(8)
15651 .kr(1)
15652 .sr(1)
15653 .m(1)
15654 .n(n)
15655 .k(k)
15656 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15657 }
15658 }
15659 }
15660
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,n_div_8_strided_cn)15661 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, n_div_8_strided_cn) {
15662 TEST_REQUIRES_X86_SSE2;
15663 for (uint32_t n = 16; n <= 24; n += 8) {
15664 for (size_t k = 1; k <= 20; k += 5) {
15665 GemmMicrokernelTester()
15666 .mr(1)
15667 .nr(8)
15668 .kr(1)
15669 .sr(1)
15670 .m(1)
15671 .n(n)
15672 .k(k)
15673 .cn_stride(11)
15674 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15675 }
15676 }
15677 }
15678
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,n_div_8_subtile)15679 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, n_div_8_subtile) {
15680 TEST_REQUIRES_X86_SSE2;
15681 for (uint32_t n = 16; n <= 24; n += 8) {
15682 for (size_t k = 1; k <= 20; k += 5) {
15683 for (uint32_t m = 1; m <= 1; m++) {
15684 GemmMicrokernelTester()
15685 .mr(1)
15686 .nr(8)
15687 .kr(1)
15688 .sr(1)
15689 .m(m)
15690 .n(n)
15691 .k(k)
15692 .iterations(1)
15693 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15694 }
15695 }
15696 }
15697 }
15698
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,small_kernel)15699 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, small_kernel) {
15700 TEST_REQUIRES_X86_SSE2;
15701 for (size_t k = 1; k <= 20; k += 5) {
15702 GemmMicrokernelTester()
15703 .mr(1)
15704 .nr(8)
15705 .kr(1)
15706 .sr(1)
15707 .m(1)
15708 .n(8)
15709 .k(k)
15710 .ks(3)
15711 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15712 }
15713 }
15714
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,small_kernel_subtile)15715 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, small_kernel_subtile) {
15716 TEST_REQUIRES_X86_SSE2;
15717 for (size_t k = 1; k <= 20; k += 5) {
15718 for (uint32_t n = 1; n <= 8; n++) {
15719 for (uint32_t m = 1; m <= 1; m++) {
15720 GemmMicrokernelTester()
15721 .mr(1)
15722 .nr(8)
15723 .kr(1)
15724 .sr(1)
15725 .m(m)
15726 .n(n)
15727 .k(k)
15728 .ks(3)
15729 .iterations(1)
15730 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15731 }
15732 }
15733 }
15734 }
15735
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,n_gt_8_small_kernel)15736 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, n_gt_8_small_kernel) {
15737 TEST_REQUIRES_X86_SSE2;
15738 for (uint32_t n = 9; n < 16; n++) {
15739 for (size_t k = 1; k <= 20; k += 5) {
15740 GemmMicrokernelTester()
15741 .mr(1)
15742 .nr(8)
15743 .kr(1)
15744 .sr(1)
15745 .m(1)
15746 .n(n)
15747 .k(k)
15748 .ks(3)
15749 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15750 }
15751 }
15752 }
15753
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,n_div_8_small_kernel)15754 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, n_div_8_small_kernel) {
15755 TEST_REQUIRES_X86_SSE2;
15756 for (uint32_t n = 16; n <= 24; n += 8) {
15757 for (size_t k = 1; k <= 20; k += 5) {
15758 GemmMicrokernelTester()
15759 .mr(1)
15760 .nr(8)
15761 .kr(1)
15762 .sr(1)
15763 .m(1)
15764 .n(n)
15765 .k(k)
15766 .ks(3)
15767 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15768 }
15769 }
15770 }
15771
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,strided_cm_subtile)15772 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, strided_cm_subtile) {
15773 TEST_REQUIRES_X86_SSE2;
15774 for (size_t k = 1; k <= 20; k += 5) {
15775 for (uint32_t n = 1; n <= 8; n++) {
15776 for (uint32_t m = 1; m <= 1; m++) {
15777 GemmMicrokernelTester()
15778 .mr(1)
15779 .nr(8)
15780 .kr(1)
15781 .sr(1)
15782 .m(m)
15783 .n(n)
15784 .k(k)
15785 .cm_stride(11)
15786 .iterations(1)
15787 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15788 }
15789 }
15790 }
15791 }
15792
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,a_offset)15793 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, a_offset) {
15794 TEST_REQUIRES_X86_SSE2;
15795 for (size_t k = 1; k <= 20; k += 5) {
15796 GemmMicrokernelTester()
15797 .mr(1)
15798 .nr(8)
15799 .kr(1)
15800 .sr(1)
15801 .m(1)
15802 .n(8)
15803 .k(k)
15804 .ks(3)
15805 .a_offset(23)
15806 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15807 }
15808 }
15809
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,zero)15810 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, zero) {
15811 TEST_REQUIRES_X86_SSE2;
15812 for (size_t k = 1; k <= 20; k += 5) {
15813 for (uint32_t mz = 0; mz < 1; mz++) {
15814 GemmMicrokernelTester()
15815 .mr(1)
15816 .nr(8)
15817 .kr(1)
15818 .sr(1)
15819 .m(1)
15820 .n(8)
15821 .k(k)
15822 .ks(3)
15823 .a_offset(23)
15824 .zero_index(mz)
15825 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15826 }
15827 }
15828 }
15829
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,qmin)15830 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, qmin) {
15831 TEST_REQUIRES_X86_SSE2;
15832 GemmMicrokernelTester()
15833 .mr(1)
15834 .nr(8)
15835 .kr(1)
15836 .sr(1)
15837 .m(1)
15838 .n(8)
15839 .k(4)
15840 .qmin(128)
15841 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15842 }
15843
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,qmax)15844 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, qmax) {
15845 TEST_REQUIRES_X86_SSE2;
15846 GemmMicrokernelTester()
15847 .mr(1)
15848 .nr(8)
15849 .kr(1)
15850 .sr(1)
15851 .m(1)
15852 .n(8)
15853 .k(4)
15854 .qmax(128)
15855 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15856 }
15857
TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP,strided_cm)15858 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, strided_cm) {
15859 TEST_REQUIRES_X86_SSE2;
15860 GemmMicrokernelTester()
15861 .mr(1)
15862 .nr(8)
15863 .kr(1)
15864 .sr(1)
15865 .m(1)
15866 .n(8)
15867 .k(4)
15868 .cm_stride(11)
15869 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
15870 }
15871 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15872
15873
15874 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,k_eq_4)15875 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, k_eq_4) {
15876 TEST_REQUIRES_X86_SSE;
15877 GemmMicrokernelTester()
15878 .mr(3)
15879 .nr(8)
15880 .kr(1)
15881 .sr(1)
15882 .m(3)
15883 .n(8)
15884 .k(4)
15885 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
15886 }
15887
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,strided_cn)15888 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, strided_cn) {
15889 TEST_REQUIRES_X86_SSE;
15890 GemmMicrokernelTester()
15891 .mr(3)
15892 .nr(8)
15893 .kr(1)
15894 .sr(1)
15895 .m(3)
15896 .n(8)
15897 .k(4)
15898 .cn_stride(11)
15899 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
15900 }
15901
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,k_eq_4_subtile)15902 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, k_eq_4_subtile) {
15903 TEST_REQUIRES_X86_SSE;
15904 for (uint32_t n = 1; n <= 8; n++) {
15905 for (uint32_t m = 1; m <= 3; m++) {
15906 GemmMicrokernelTester()
15907 .mr(3)
15908 .nr(8)
15909 .kr(1)
15910 .sr(1)
15911 .m(m)
15912 .n(n)
15913 .k(4)
15914 .iterations(1)
15915 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
15916 }
15917 }
15918 }
15919
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,k_eq_4_subtile_m)15920 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, k_eq_4_subtile_m) {
15921 TEST_REQUIRES_X86_SSE;
15922 for (uint32_t m = 1; m <= 3; m++) {
15923 GemmMicrokernelTester()
15924 .mr(3)
15925 .nr(8)
15926 .kr(1)
15927 .sr(1)
15928 .m(m)
15929 .n(8)
15930 .k(4)
15931 .iterations(1)
15932 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
15933 }
15934 }
15935
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,k_eq_4_subtile_n)15936 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, k_eq_4_subtile_n) {
15937 TEST_REQUIRES_X86_SSE;
15938 for (uint32_t n = 1; n <= 8; n++) {
15939 GemmMicrokernelTester()
15940 .mr(3)
15941 .nr(8)
15942 .kr(1)
15943 .sr(1)
15944 .m(3)
15945 .n(n)
15946 .k(4)
15947 .iterations(1)
15948 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
15949 }
15950 }
15951
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,k_lt_4)15952 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, k_lt_4) {
15953 TEST_REQUIRES_X86_SSE;
15954 for (size_t k = 1; k < 4; k++) {
15955 GemmMicrokernelTester()
15956 .mr(3)
15957 .nr(8)
15958 .kr(1)
15959 .sr(1)
15960 .m(3)
15961 .n(8)
15962 .k(k)
15963 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
15964 }
15965 }
15966
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,k_lt_4_subtile)15967 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, k_lt_4_subtile) {
15968 TEST_REQUIRES_X86_SSE;
15969 for (size_t k = 1; k < 4; k++) {
15970 for (uint32_t n = 1; n <= 8; n++) {
15971 for (uint32_t m = 1; m <= 3; m++) {
15972 GemmMicrokernelTester()
15973 .mr(3)
15974 .nr(8)
15975 .kr(1)
15976 .sr(1)
15977 .m(m)
15978 .n(n)
15979 .k(k)
15980 .iterations(1)
15981 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
15982 }
15983 }
15984 }
15985 }
15986
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,k_gt_4)15987 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, k_gt_4) {
15988 TEST_REQUIRES_X86_SSE;
15989 for (size_t k = 5; k < 8; k++) {
15990 GemmMicrokernelTester()
15991 .mr(3)
15992 .nr(8)
15993 .kr(1)
15994 .sr(1)
15995 .m(3)
15996 .n(8)
15997 .k(k)
15998 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
15999 }
16000 }
16001
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,k_gt_4_subtile)16002 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, k_gt_4_subtile) {
16003 TEST_REQUIRES_X86_SSE;
16004 for (size_t k = 5; k < 8; k++) {
16005 for (uint32_t n = 1; n <= 8; n++) {
16006 for (uint32_t m = 1; m <= 3; m++) {
16007 GemmMicrokernelTester()
16008 .mr(3)
16009 .nr(8)
16010 .kr(1)
16011 .sr(1)
16012 .m(m)
16013 .n(n)
16014 .k(k)
16015 .iterations(1)
16016 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
16017 }
16018 }
16019 }
16020 }
16021
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,k_div_4)16022 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, k_div_4) {
16023 TEST_REQUIRES_X86_SSE;
16024 for (size_t k = 8; k <= 40; k += 4) {
16025 GemmMicrokernelTester()
16026 .mr(3)
16027 .nr(8)
16028 .kr(1)
16029 .sr(1)
16030 .m(3)
16031 .n(8)
16032 .k(k)
16033 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
16034 }
16035 }
16036
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,k_div_4_subtile)16037 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, k_div_4_subtile) {
16038 TEST_REQUIRES_X86_SSE;
16039 for (size_t k = 8; k <= 40; k += 4) {
16040 for (uint32_t n = 1; n <= 8; n++) {
16041 for (uint32_t m = 1; m <= 3; m++) {
16042 GemmMicrokernelTester()
16043 .mr(3)
16044 .nr(8)
16045 .kr(1)
16046 .sr(1)
16047 .m(m)
16048 .n(n)
16049 .k(k)
16050 .iterations(1)
16051 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
16052 }
16053 }
16054 }
16055 }
16056
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,n_gt_8)16057 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, n_gt_8) {
16058 TEST_REQUIRES_X86_SSE;
16059 for (uint32_t n = 9; n < 16; n++) {
16060 for (size_t k = 1; k <= 20; k += 5) {
16061 GemmMicrokernelTester()
16062 .mr(3)
16063 .nr(8)
16064 .kr(1)
16065 .sr(1)
16066 .m(3)
16067 .n(n)
16068 .k(k)
16069 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
16070 }
16071 }
16072 }
16073
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,n_gt_8_strided_cn)16074 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, n_gt_8_strided_cn) {
16075 TEST_REQUIRES_X86_SSE;
16076 for (uint32_t n = 9; n < 16; n++) {
16077 for (size_t k = 1; k <= 20; k += 5) {
16078 GemmMicrokernelTester()
16079 .mr(3)
16080 .nr(8)
16081 .kr(1)
16082 .sr(1)
16083 .m(3)
16084 .n(n)
16085 .k(k)
16086 .cn_stride(11)
16087 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
16088 }
16089 }
16090 }
16091
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,n_gt_8_subtile)16092 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, n_gt_8_subtile) {
16093 TEST_REQUIRES_X86_SSE;
16094 for (uint32_t n = 9; n < 16; n++) {
16095 for (size_t k = 1; k <= 20; k += 5) {
16096 for (uint32_t m = 1; m <= 3; m++) {
16097 GemmMicrokernelTester()
16098 .mr(3)
16099 .nr(8)
16100 .kr(1)
16101 .sr(1)
16102 .m(m)
16103 .n(n)
16104 .k(k)
16105 .iterations(1)
16106 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
16107 }
16108 }
16109 }
16110 }
16111
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,n_div_8)16112 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, n_div_8) {
16113 TEST_REQUIRES_X86_SSE;
16114 for (uint32_t n = 16; n <= 24; n += 8) {
16115 for (size_t k = 1; k <= 20; k += 5) {
16116 GemmMicrokernelTester()
16117 .mr(3)
16118 .nr(8)
16119 .kr(1)
16120 .sr(1)
16121 .m(3)
16122 .n(n)
16123 .k(k)
16124 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
16125 }
16126 }
16127 }
16128
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,n_div_8_strided_cn)16129 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, n_div_8_strided_cn) {
16130 TEST_REQUIRES_X86_SSE;
16131 for (uint32_t n = 16; n <= 24; n += 8) {
16132 for (size_t k = 1; k <= 20; k += 5) {
16133 GemmMicrokernelTester()
16134 .mr(3)
16135 .nr(8)
16136 .kr(1)
16137 .sr(1)
16138 .m(3)
16139 .n(n)
16140 .k(k)
16141 .cn_stride(11)
16142 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
16143 }
16144 }
16145 }
16146
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,n_div_8_subtile)16147 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, n_div_8_subtile) {
16148 TEST_REQUIRES_X86_SSE;
16149 for (uint32_t n = 16; n <= 24; n += 8) {
16150 for (size_t k = 1; k <= 20; k += 5) {
16151 for (uint32_t m = 1; m <= 3; m++) {
16152 GemmMicrokernelTester()
16153 .mr(3)
16154 .nr(8)
16155 .kr(1)
16156 .sr(1)
16157 .m(m)
16158 .n(n)
16159 .k(k)
16160 .iterations(1)
16161 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
16162 }
16163 }
16164 }
16165 }
16166
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,small_kernel)16167 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, small_kernel) {
16168 TEST_REQUIRES_X86_SSE;
16169 for (size_t k = 1; k <= 20; k += 5) {
16170 GemmMicrokernelTester()
16171 .mr(3)
16172 .nr(8)
16173 .kr(1)
16174 .sr(1)
16175 .m(3)
16176 .n(8)
16177 .k(k)
16178 .ks(3)
16179 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
16180 }
16181 }
16182
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,small_kernel_subtile)16183 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, small_kernel_subtile) {
16184 TEST_REQUIRES_X86_SSE;
16185 for (size_t k = 1; k <= 20; k += 5) {
16186 for (uint32_t n = 1; n <= 8; n++) {
16187 for (uint32_t m = 1; m <= 3; m++) {
16188 GemmMicrokernelTester()
16189 .mr(3)
16190 .nr(8)
16191 .kr(1)
16192 .sr(1)
16193 .m(m)
16194 .n(n)
16195 .k(k)
16196 .ks(3)
16197 .iterations(1)
16198 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
16199 }
16200 }
16201 }
16202 }
16203
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,n_gt_8_small_kernel)16204 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, n_gt_8_small_kernel) {
16205 TEST_REQUIRES_X86_SSE;
16206 for (uint32_t n = 9; n < 16; n++) {
16207 for (size_t k = 1; k <= 20; k += 5) {
16208 GemmMicrokernelTester()
16209 .mr(3)
16210 .nr(8)
16211 .kr(1)
16212 .sr(1)
16213 .m(3)
16214 .n(n)
16215 .k(k)
16216 .ks(3)
16217 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
16218 }
16219 }
16220 }
16221
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,n_div_8_small_kernel)16222 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, n_div_8_small_kernel) {
16223 TEST_REQUIRES_X86_SSE;
16224 for (uint32_t n = 16; n <= 24; n += 8) {
16225 for (size_t k = 1; k <= 20; k += 5) {
16226 GemmMicrokernelTester()
16227 .mr(3)
16228 .nr(8)
16229 .kr(1)
16230 .sr(1)
16231 .m(3)
16232 .n(n)
16233 .k(k)
16234 .ks(3)
16235 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
16236 }
16237 }
16238 }
16239
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,strided_cm_subtile)16240 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, strided_cm_subtile) {
16241 TEST_REQUIRES_X86_SSE;
16242 for (size_t k = 1; k <= 20; k += 5) {
16243 for (uint32_t n = 1; n <= 8; n++) {
16244 for (uint32_t m = 1; m <= 3; m++) {
16245 GemmMicrokernelTester()
16246 .mr(3)
16247 .nr(8)
16248 .kr(1)
16249 .sr(1)
16250 .m(m)
16251 .n(n)
16252 .k(k)
16253 .cm_stride(11)
16254 .iterations(1)
16255 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
16256 }
16257 }
16258 }
16259 }
16260
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,a_offset)16261 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, a_offset) {
16262 TEST_REQUIRES_X86_SSE;
16263 for (size_t k = 1; k <= 20; k += 5) {
16264 GemmMicrokernelTester()
16265 .mr(3)
16266 .nr(8)
16267 .kr(1)
16268 .sr(1)
16269 .m(3)
16270 .n(8)
16271 .k(k)
16272 .ks(3)
16273 .a_offset(67)
16274 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
16275 }
16276 }
16277
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,zero)16278 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, zero) {
16279 TEST_REQUIRES_X86_SSE;
16280 for (size_t k = 1; k <= 20; k += 5) {
16281 for (uint32_t mz = 0; mz < 3; mz++) {
16282 GemmMicrokernelTester()
16283 .mr(3)
16284 .nr(8)
16285 .kr(1)
16286 .sr(1)
16287 .m(3)
16288 .n(8)
16289 .k(k)
16290 .ks(3)
16291 .a_offset(67)
16292 .zero_index(mz)
16293 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
16294 }
16295 }
16296 }
16297
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,qmin)16298 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, qmin) {
16299 TEST_REQUIRES_X86_SSE;
16300 GemmMicrokernelTester()
16301 .mr(3)
16302 .nr(8)
16303 .kr(1)
16304 .sr(1)
16305 .m(3)
16306 .n(8)
16307 .k(4)
16308 .qmin(128)
16309 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
16310 }
16311
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,qmax)16312 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, qmax) {
16313 TEST_REQUIRES_X86_SSE;
16314 GemmMicrokernelTester()
16315 .mr(3)
16316 .nr(8)
16317 .kr(1)
16318 .sr(1)
16319 .m(3)
16320 .n(8)
16321 .k(4)
16322 .qmax(128)
16323 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
16324 }
16325
TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP,strided_cm)16326 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, strided_cm) {
16327 TEST_REQUIRES_X86_SSE;
16328 GemmMicrokernelTester()
16329 .mr(3)
16330 .nr(8)
16331 .kr(1)
16332 .sr(1)
16333 .m(3)
16334 .n(8)
16335 .k(4)
16336 .cm_stride(11)
16337 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
16338 }
16339 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16340
16341
16342 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1,k_eq_1)16343 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, k_eq_1) {
16344 TEST_REQUIRES_X86_SSE;
16345 GemmMicrokernelTester()
16346 .mr(3)
16347 .nr(8)
16348 .kr(1)
16349 .sr(1)
16350 .m(3)
16351 .n(8)
16352 .k(1)
16353 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
16354 }
16355
TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1,strided_cn)16356 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, strided_cn) {
16357 TEST_REQUIRES_X86_SSE;
16358 GemmMicrokernelTester()
16359 .mr(3)
16360 .nr(8)
16361 .kr(1)
16362 .sr(1)
16363 .m(3)
16364 .n(8)
16365 .k(1)
16366 .cn_stride(11)
16367 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
16368 }
16369
TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1,k_eq_1_subtile)16370 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, k_eq_1_subtile) {
16371 TEST_REQUIRES_X86_SSE;
16372 for (uint32_t n = 1; n <= 8; n++) {
16373 for (uint32_t m = 1; m <= 3; m++) {
16374 GemmMicrokernelTester()
16375 .mr(3)
16376 .nr(8)
16377 .kr(1)
16378 .sr(1)
16379 .m(m)
16380 .n(n)
16381 .k(1)
16382 .iterations(1)
16383 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
16384 }
16385 }
16386 }
16387
TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1,k_eq_1_subtile_m)16388 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, k_eq_1_subtile_m) {
16389 TEST_REQUIRES_X86_SSE;
16390 for (uint32_t m = 1; m <= 3; m++) {
16391 GemmMicrokernelTester()
16392 .mr(3)
16393 .nr(8)
16394 .kr(1)
16395 .sr(1)
16396 .m(m)
16397 .n(8)
16398 .k(1)
16399 .iterations(1)
16400 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
16401 }
16402 }
16403
TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1,k_eq_1_subtile_n)16404 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, k_eq_1_subtile_n) {
16405 TEST_REQUIRES_X86_SSE;
16406 for (uint32_t n = 1; n <= 8; n++) {
16407 GemmMicrokernelTester()
16408 .mr(3)
16409 .nr(8)
16410 .kr(1)
16411 .sr(1)
16412 .m(3)
16413 .n(n)
16414 .k(1)
16415 .iterations(1)
16416 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
16417 }
16418 }
16419
TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1,k_gt_1)16420 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, k_gt_1) {
16421 TEST_REQUIRES_X86_SSE;
16422 for (size_t k = 2; k < 10; k++) {
16423 GemmMicrokernelTester()
16424 .mr(3)
16425 .nr(8)
16426 .kr(1)
16427 .sr(1)
16428 .m(3)
16429 .n(8)
16430 .k(k)
16431 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
16432 }
16433 }
16434
TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1,k_gt_1_subtile)16435 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, k_gt_1_subtile) {
16436 TEST_REQUIRES_X86_SSE;
16437 for (size_t k = 2; k < 10; k++) {
16438 for (uint32_t n = 1; n <= 8; n++) {
16439 for (uint32_t m = 1; m <= 3; m++) {
16440 GemmMicrokernelTester()
16441 .mr(3)
16442 .nr(8)
16443 .kr(1)
16444 .sr(1)
16445 .m(m)
16446 .n(n)
16447 .k(k)
16448 .iterations(1)
16449 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
16450 }
16451 }
16452 }
16453 }
16454
TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1,n_gt_8)16455 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, n_gt_8) {
16456 TEST_REQUIRES_X86_SSE;
16457 for (uint32_t n = 9; n < 16; n++) {
16458 for (size_t k = 1; k <= 5; k += 2) {
16459 GemmMicrokernelTester()
16460 .mr(3)
16461 .nr(8)
16462 .kr(1)
16463 .sr(1)
16464 .m(3)
16465 .n(n)
16466 .k(k)
16467 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
16468 }
16469 }
16470 }
16471
TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1,n_gt_8_strided_cn)16472 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, n_gt_8_strided_cn) {
16473 TEST_REQUIRES_X86_SSE;
16474 for (uint32_t n = 9; n < 16; n++) {
16475 for (size_t k = 1; k <= 5; k += 2) {
16476 GemmMicrokernelTester()
16477 .mr(3)
16478 .nr(8)
16479 .kr(1)
16480 .sr(1)
16481 .m(3)
16482 .n(n)
16483 .k(k)
16484 .cn_stride(11)
16485 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
16486 }
16487 }
16488 }
16489
TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1,n_gt_8_subtile)16490 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, n_gt_8_subtile) {
16491 TEST_REQUIRES_X86_SSE;
16492 for (uint32_t n = 9; n < 16; n++) {
16493 for (size_t k = 1; k <= 5; k += 2) {
16494 for (uint32_t m = 1; m <= 3; m++) {
16495 GemmMicrokernelTester()
16496 .mr(3)
16497 .nr(8)
16498 .kr(1)
16499 .sr(1)
16500 .m(m)
16501 .n(n)
16502 .k(k)
16503 .iterations(1)
16504 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
16505 }
16506 }
16507 }
16508 }
16509
TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1,n_div_8)16510 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, n_div_8) {
16511 TEST_REQUIRES_X86_SSE;
16512 for (uint32_t n = 16; n <= 24; n += 8) {
16513 for (size_t k = 1; k <= 5; k += 2) {
16514 GemmMicrokernelTester()
16515 .mr(3)
16516 .nr(8)
16517 .kr(1)
16518 .sr(1)
16519 .m(3)
16520 .n(n)
16521 .k(k)
16522 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
16523 }
16524 }
16525 }
16526
TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1,n_div_8_strided_cn)16527 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, n_div_8_strided_cn) {
16528 TEST_REQUIRES_X86_SSE;
16529 for (uint32_t n = 16; n <= 24; n += 8) {
16530 for (size_t k = 1; k <= 5; k += 2) {
16531 GemmMicrokernelTester()
16532 .mr(3)
16533 .nr(8)
16534 .kr(1)
16535 .sr(1)
16536 .m(3)
16537 .n(n)
16538 .k(k)
16539 .cn_stride(11)
16540 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
16541 }
16542 }
16543 }
16544
TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1,n_div_8_subtile)16545 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, n_div_8_subtile) {
16546 TEST_REQUIRES_X86_SSE;
16547 for (uint32_t n = 16; n <= 24; n += 8) {
16548 for (size_t k = 1; k <= 5; k += 2) {
16549 for (uint32_t m = 1; m <= 3; m++) {
16550 GemmMicrokernelTester()
16551 .mr(3)
16552 .nr(8)
16553 .kr(1)
16554 .sr(1)
16555 .m(m)
16556 .n(n)
16557 .k(k)
16558 .iterations(1)
16559 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
16560 }
16561 }
16562 }
16563 }
16564
TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1,small_kernel)16565 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, small_kernel) {
16566 TEST_REQUIRES_X86_SSE;
16567 for (size_t k = 1; k <= 5; k += 2) {
16568 GemmMicrokernelTester()
16569 .mr(3)
16570 .nr(8)
16571 .kr(1)
16572 .sr(1)
16573 .m(3)
16574 .n(8)
16575 .k(k)
16576 .ks(3)
16577 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
16578 }
16579 }
16580
TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1,small_kernel_subtile)16581 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, small_kernel_subtile) {
16582 TEST_REQUIRES_X86_SSE;
16583 for (size_t k = 1; k <= 5; k += 2) {
16584 for (uint32_t n = 1; n <= 8; n++) {
16585 for (uint32_t m = 1; m <= 3; m++) {
16586 GemmMicrokernelTester()
16587 .mr(3)
16588 .nr(8)
16589 .kr(1)
16590 .sr(1)
16591 .m(m)
16592 .n(n)
16593 .k(k)
16594 .ks(3)
16595 .iterations(1)
16596 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
16597 }
16598 }
16599 }
16600 }
16601
TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1,n_gt_8_small_kernel)16602 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, n_gt_8_small_kernel) {
16603 TEST_REQUIRES_X86_SSE;
16604 for (uint32_t n = 9; n < 16; n++) {
16605 for (size_t k = 1; k <= 5; k += 2) {
16606 GemmMicrokernelTester()
16607 .mr(3)
16608 .nr(8)
16609 .kr(1)
16610 .sr(1)
16611 .m(3)
16612 .n(n)
16613 .k(k)
16614 .ks(3)
16615 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
16616 }
16617 }
16618 }
16619
TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1,n_div_8_small_kernel)16620 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, n_div_8_small_kernel) {
16621 TEST_REQUIRES_X86_SSE;
16622 for (uint32_t n = 16; n <= 24; n += 8) {
16623 for (size_t k = 1; k <= 5; k += 2) {
16624 GemmMicrokernelTester()
16625 .mr(3)
16626 .nr(8)
16627 .kr(1)
16628 .sr(1)
16629 .m(3)
16630 .n(n)
16631 .k(k)
16632 .ks(3)
16633 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
16634 }
16635 }
16636 }
16637
TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1,strided_cm_subtile)16638 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, strided_cm_subtile) {
16639 TEST_REQUIRES_X86_SSE;
16640 for (size_t k = 1; k <= 5; k += 2) {
16641 for (uint32_t n = 1; n <= 8; n++) {
16642 for (uint32_t m = 1; m <= 3; m++) {
16643 GemmMicrokernelTester()
16644 .mr(3)
16645 .nr(8)
16646 .kr(1)
16647 .sr(1)
16648 .m(m)
16649 .n(n)
16650 .k(k)
16651 .cm_stride(11)
16652 .iterations(1)
16653 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
16654 }
16655 }
16656 }
16657 }
16658
TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1,a_offset)16659 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, a_offset) {
16660 TEST_REQUIRES_X86_SSE;
16661 for (size_t k = 1; k <= 5; k += 2) {
16662 GemmMicrokernelTester()
16663 .mr(3)
16664 .nr(8)
16665 .kr(1)
16666 .sr(1)
16667 .m(3)
16668 .n(8)
16669 .k(k)
16670 .ks(3)
16671 .a_offset(17)
16672 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
16673 }
16674 }
16675
TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1,zero)16676 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, zero) {
16677 TEST_REQUIRES_X86_SSE;
16678 for (size_t k = 1; k <= 5; k += 2) {
16679 for (uint32_t mz = 0; mz < 3; mz++) {
16680 GemmMicrokernelTester()
16681 .mr(3)
16682 .nr(8)
16683 .kr(1)
16684 .sr(1)
16685 .m(3)
16686 .n(8)
16687 .k(k)
16688 .ks(3)
16689 .a_offset(17)
16690 .zero_index(mz)
16691 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
16692 }
16693 }
16694 }
16695
TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1,qmin)16696 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, qmin) {
16697 TEST_REQUIRES_X86_SSE;
16698 GemmMicrokernelTester()
16699 .mr(3)
16700 .nr(8)
16701 .kr(1)
16702 .sr(1)
16703 .m(3)
16704 .n(8)
16705 .k(1)
16706 .qmin(128)
16707 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
16708 }
16709
TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1,qmax)16710 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, qmax) {
16711 TEST_REQUIRES_X86_SSE;
16712 GemmMicrokernelTester()
16713 .mr(3)
16714 .nr(8)
16715 .kr(1)
16716 .sr(1)
16717 .m(3)
16718 .n(8)
16719 .k(1)
16720 .qmax(128)
16721 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
16722 }
16723
TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1,strided_cm)16724 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, strided_cm) {
16725 TEST_REQUIRES_X86_SSE;
16726 GemmMicrokernelTester()
16727 .mr(3)
16728 .nr(8)
16729 .kr(1)
16730 .sr(1)
16731 .m(3)
16732 .n(8)
16733 .k(1)
16734 .cm_stride(11)
16735 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
16736 }
16737 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16738
16739
16740 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,k_eq_4)16741 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, k_eq_4) {
16742 TEST_REQUIRES_X86_SSE;
16743 GemmMicrokernelTester()
16744 .mr(3)
16745 .nr(8)
16746 .kr(1)
16747 .sr(4)
16748 .m(3)
16749 .n(8)
16750 .k(4)
16751 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
16752 }
16753
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,strided_cn)16754 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, strided_cn) {
16755 TEST_REQUIRES_X86_SSE;
16756 GemmMicrokernelTester()
16757 .mr(3)
16758 .nr(8)
16759 .kr(1)
16760 .sr(4)
16761 .m(3)
16762 .n(8)
16763 .k(4)
16764 .cn_stride(11)
16765 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
16766 }
16767
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,k_eq_4_subtile)16768 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, k_eq_4_subtile) {
16769 TEST_REQUIRES_X86_SSE;
16770 for (uint32_t n = 1; n <= 8; n++) {
16771 for (uint32_t m = 1; m <= 3; m++) {
16772 GemmMicrokernelTester()
16773 .mr(3)
16774 .nr(8)
16775 .kr(1)
16776 .sr(4)
16777 .m(m)
16778 .n(n)
16779 .k(4)
16780 .iterations(1)
16781 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
16782 }
16783 }
16784 }
16785
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,k_eq_4_subtile_m)16786 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, k_eq_4_subtile_m) {
16787 TEST_REQUIRES_X86_SSE;
16788 for (uint32_t m = 1; m <= 3; m++) {
16789 GemmMicrokernelTester()
16790 .mr(3)
16791 .nr(8)
16792 .kr(1)
16793 .sr(4)
16794 .m(m)
16795 .n(8)
16796 .k(4)
16797 .iterations(1)
16798 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
16799 }
16800 }
16801
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,k_eq_4_subtile_n)16802 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, k_eq_4_subtile_n) {
16803 TEST_REQUIRES_X86_SSE;
16804 for (uint32_t n = 1; n <= 8; n++) {
16805 GemmMicrokernelTester()
16806 .mr(3)
16807 .nr(8)
16808 .kr(1)
16809 .sr(4)
16810 .m(3)
16811 .n(n)
16812 .k(4)
16813 .iterations(1)
16814 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
16815 }
16816 }
16817
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,k_lt_4)16818 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, k_lt_4) {
16819 TEST_REQUIRES_X86_SSE;
16820 for (size_t k = 1; k < 4; k++) {
16821 GemmMicrokernelTester()
16822 .mr(3)
16823 .nr(8)
16824 .kr(1)
16825 .sr(4)
16826 .m(3)
16827 .n(8)
16828 .k(k)
16829 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
16830 }
16831 }
16832
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,k_lt_4_subtile)16833 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, k_lt_4_subtile) {
16834 TEST_REQUIRES_X86_SSE;
16835 for (size_t k = 1; k < 4; k++) {
16836 for (uint32_t n = 1; n <= 8; n++) {
16837 for (uint32_t m = 1; m <= 3; m++) {
16838 GemmMicrokernelTester()
16839 .mr(3)
16840 .nr(8)
16841 .kr(1)
16842 .sr(4)
16843 .m(m)
16844 .n(n)
16845 .k(k)
16846 .iterations(1)
16847 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
16848 }
16849 }
16850 }
16851 }
16852
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,k_gt_4)16853 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, k_gt_4) {
16854 TEST_REQUIRES_X86_SSE;
16855 for (size_t k = 5; k < 8; k++) {
16856 GemmMicrokernelTester()
16857 .mr(3)
16858 .nr(8)
16859 .kr(1)
16860 .sr(4)
16861 .m(3)
16862 .n(8)
16863 .k(k)
16864 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
16865 }
16866 }
16867
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,k_gt_4_subtile)16868 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, k_gt_4_subtile) {
16869 TEST_REQUIRES_X86_SSE;
16870 for (size_t k = 5; k < 8; k++) {
16871 for (uint32_t n = 1; n <= 8; n++) {
16872 for (uint32_t m = 1; m <= 3; m++) {
16873 GemmMicrokernelTester()
16874 .mr(3)
16875 .nr(8)
16876 .kr(1)
16877 .sr(4)
16878 .m(m)
16879 .n(n)
16880 .k(k)
16881 .iterations(1)
16882 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
16883 }
16884 }
16885 }
16886 }
16887
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,k_div_4)16888 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, k_div_4) {
16889 TEST_REQUIRES_X86_SSE;
16890 for (size_t k = 8; k <= 40; k += 4) {
16891 GemmMicrokernelTester()
16892 .mr(3)
16893 .nr(8)
16894 .kr(1)
16895 .sr(4)
16896 .m(3)
16897 .n(8)
16898 .k(k)
16899 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
16900 }
16901 }
16902
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,k_div_4_subtile)16903 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, k_div_4_subtile) {
16904 TEST_REQUIRES_X86_SSE;
16905 for (size_t k = 8; k <= 40; k += 4) {
16906 for (uint32_t n = 1; n <= 8; n++) {
16907 for (uint32_t m = 1; m <= 3; m++) {
16908 GemmMicrokernelTester()
16909 .mr(3)
16910 .nr(8)
16911 .kr(1)
16912 .sr(4)
16913 .m(m)
16914 .n(n)
16915 .k(k)
16916 .iterations(1)
16917 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
16918 }
16919 }
16920 }
16921 }
16922
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,n_gt_8)16923 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, n_gt_8) {
16924 TEST_REQUIRES_X86_SSE;
16925 for (uint32_t n = 9; n < 16; n++) {
16926 for (size_t k = 1; k <= 20; k += 5) {
16927 GemmMicrokernelTester()
16928 .mr(3)
16929 .nr(8)
16930 .kr(1)
16931 .sr(4)
16932 .m(3)
16933 .n(n)
16934 .k(k)
16935 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
16936 }
16937 }
16938 }
16939
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,n_gt_8_strided_cn)16940 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, n_gt_8_strided_cn) {
16941 TEST_REQUIRES_X86_SSE;
16942 for (uint32_t n = 9; n < 16; n++) {
16943 for (size_t k = 1; k <= 20; k += 5) {
16944 GemmMicrokernelTester()
16945 .mr(3)
16946 .nr(8)
16947 .kr(1)
16948 .sr(4)
16949 .m(3)
16950 .n(n)
16951 .k(k)
16952 .cn_stride(11)
16953 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
16954 }
16955 }
16956 }
16957
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,n_gt_8_subtile)16958 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, n_gt_8_subtile) {
16959 TEST_REQUIRES_X86_SSE;
16960 for (uint32_t n = 9; n < 16; n++) {
16961 for (size_t k = 1; k <= 20; k += 5) {
16962 for (uint32_t m = 1; m <= 3; m++) {
16963 GemmMicrokernelTester()
16964 .mr(3)
16965 .nr(8)
16966 .kr(1)
16967 .sr(4)
16968 .m(m)
16969 .n(n)
16970 .k(k)
16971 .iterations(1)
16972 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
16973 }
16974 }
16975 }
16976 }
16977
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,n_div_8)16978 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, n_div_8) {
16979 TEST_REQUIRES_X86_SSE;
16980 for (uint32_t n = 16; n <= 24; n += 8) {
16981 for (size_t k = 1; k <= 20; k += 5) {
16982 GemmMicrokernelTester()
16983 .mr(3)
16984 .nr(8)
16985 .kr(1)
16986 .sr(4)
16987 .m(3)
16988 .n(n)
16989 .k(k)
16990 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
16991 }
16992 }
16993 }
16994
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,n_div_8_strided_cn)16995 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, n_div_8_strided_cn) {
16996 TEST_REQUIRES_X86_SSE;
16997 for (uint32_t n = 16; n <= 24; n += 8) {
16998 for (size_t k = 1; k <= 20; k += 5) {
16999 GemmMicrokernelTester()
17000 .mr(3)
17001 .nr(8)
17002 .kr(1)
17003 .sr(4)
17004 .m(3)
17005 .n(n)
17006 .k(k)
17007 .cn_stride(11)
17008 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
17009 }
17010 }
17011 }
17012
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,n_div_8_subtile)17013 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, n_div_8_subtile) {
17014 TEST_REQUIRES_X86_SSE;
17015 for (uint32_t n = 16; n <= 24; n += 8) {
17016 for (size_t k = 1; k <= 20; k += 5) {
17017 for (uint32_t m = 1; m <= 3; m++) {
17018 GemmMicrokernelTester()
17019 .mr(3)
17020 .nr(8)
17021 .kr(1)
17022 .sr(4)
17023 .m(m)
17024 .n(n)
17025 .k(k)
17026 .iterations(1)
17027 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
17028 }
17029 }
17030 }
17031 }
17032
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,small_kernel)17033 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, small_kernel) {
17034 TEST_REQUIRES_X86_SSE;
17035 for (size_t k = 1; k <= 20; k += 5) {
17036 GemmMicrokernelTester()
17037 .mr(3)
17038 .nr(8)
17039 .kr(1)
17040 .sr(4)
17041 .m(3)
17042 .n(8)
17043 .k(k)
17044 .ks(3)
17045 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
17046 }
17047 }
17048
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,small_kernel_subtile)17049 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, small_kernel_subtile) {
17050 TEST_REQUIRES_X86_SSE;
17051 for (size_t k = 1; k <= 20; k += 5) {
17052 for (uint32_t n = 1; n <= 8; n++) {
17053 for (uint32_t m = 1; m <= 3; m++) {
17054 GemmMicrokernelTester()
17055 .mr(3)
17056 .nr(8)
17057 .kr(1)
17058 .sr(4)
17059 .m(m)
17060 .n(n)
17061 .k(k)
17062 .ks(3)
17063 .iterations(1)
17064 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
17065 }
17066 }
17067 }
17068 }
17069
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,n_gt_8_small_kernel)17070 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, n_gt_8_small_kernel) {
17071 TEST_REQUIRES_X86_SSE;
17072 for (uint32_t n = 9; n < 16; n++) {
17073 for (size_t k = 1; k <= 20; k += 5) {
17074 GemmMicrokernelTester()
17075 .mr(3)
17076 .nr(8)
17077 .kr(1)
17078 .sr(4)
17079 .m(3)
17080 .n(n)
17081 .k(k)
17082 .ks(3)
17083 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
17084 }
17085 }
17086 }
17087
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,n_div_8_small_kernel)17088 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, n_div_8_small_kernel) {
17089 TEST_REQUIRES_X86_SSE;
17090 for (uint32_t n = 16; n <= 24; n += 8) {
17091 for (size_t k = 1; k <= 20; k += 5) {
17092 GemmMicrokernelTester()
17093 .mr(3)
17094 .nr(8)
17095 .kr(1)
17096 .sr(4)
17097 .m(3)
17098 .n(n)
17099 .k(k)
17100 .ks(3)
17101 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
17102 }
17103 }
17104 }
17105
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,strided_cm_subtile)17106 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, strided_cm_subtile) {
17107 TEST_REQUIRES_X86_SSE;
17108 for (size_t k = 1; k <= 20; k += 5) {
17109 for (uint32_t n = 1; n <= 8; n++) {
17110 for (uint32_t m = 1; m <= 3; m++) {
17111 GemmMicrokernelTester()
17112 .mr(3)
17113 .nr(8)
17114 .kr(1)
17115 .sr(4)
17116 .m(m)
17117 .n(n)
17118 .k(k)
17119 .cm_stride(11)
17120 .iterations(1)
17121 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
17122 }
17123 }
17124 }
17125 }
17126
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,a_offset)17127 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, a_offset) {
17128 TEST_REQUIRES_X86_SSE;
17129 for (size_t k = 1; k <= 20; k += 5) {
17130 GemmMicrokernelTester()
17131 .mr(3)
17132 .nr(8)
17133 .kr(1)
17134 .sr(4)
17135 .m(3)
17136 .n(8)
17137 .k(k)
17138 .ks(3)
17139 .a_offset(67)
17140 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
17141 }
17142 }
17143
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,zero)17144 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, zero) {
17145 TEST_REQUIRES_X86_SSE;
17146 for (size_t k = 1; k <= 20; k += 5) {
17147 for (uint32_t mz = 0; mz < 3; mz++) {
17148 GemmMicrokernelTester()
17149 .mr(3)
17150 .nr(8)
17151 .kr(1)
17152 .sr(4)
17153 .m(3)
17154 .n(8)
17155 .k(k)
17156 .ks(3)
17157 .a_offset(67)
17158 .zero_index(mz)
17159 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
17160 }
17161 }
17162 }
17163
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,qmin)17164 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, qmin) {
17165 TEST_REQUIRES_X86_SSE;
17166 GemmMicrokernelTester()
17167 .mr(3)
17168 .nr(8)
17169 .kr(1)
17170 .sr(4)
17171 .m(3)
17172 .n(8)
17173 .k(4)
17174 .qmin(128)
17175 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
17176 }
17177
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,qmax)17178 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, qmax) {
17179 TEST_REQUIRES_X86_SSE;
17180 GemmMicrokernelTester()
17181 .mr(3)
17182 .nr(8)
17183 .kr(1)
17184 .sr(4)
17185 .m(3)
17186 .n(8)
17187 .k(4)
17188 .qmax(128)
17189 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
17190 }
17191
TEST(F32_IGEMM_MINMAX_3X8S4__SSE,strided_cm)17192 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, strided_cm) {
17193 TEST_REQUIRES_X86_SSE;
17194 GemmMicrokernelTester()
17195 .mr(3)
17196 .nr(8)
17197 .kr(1)
17198 .sr(4)
17199 .m(3)
17200 .n(8)
17201 .k(4)
17202 .cm_stride(11)
17203 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
17204 }
17205 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17206
17207
17208 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1,k_eq_1)17209 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, k_eq_1) {
17210 TEST_REQUIRES_X86_SSE;
17211 GemmMicrokernelTester()
17212 .mr(4)
17213 .nr(8)
17214 .kr(1)
17215 .sr(1)
17216 .m(4)
17217 .n(8)
17218 .k(1)
17219 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
17220 }
17221
TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1,strided_cn)17222 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, strided_cn) {
17223 TEST_REQUIRES_X86_SSE;
17224 GemmMicrokernelTester()
17225 .mr(4)
17226 .nr(8)
17227 .kr(1)
17228 .sr(1)
17229 .m(4)
17230 .n(8)
17231 .k(1)
17232 .cn_stride(11)
17233 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
17234 }
17235
TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1,k_eq_1_subtile)17236 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, k_eq_1_subtile) {
17237 TEST_REQUIRES_X86_SSE;
17238 for (uint32_t n = 1; n <= 8; n++) {
17239 for (uint32_t m = 1; m <= 4; m++) {
17240 GemmMicrokernelTester()
17241 .mr(4)
17242 .nr(8)
17243 .kr(1)
17244 .sr(1)
17245 .m(m)
17246 .n(n)
17247 .k(1)
17248 .iterations(1)
17249 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
17250 }
17251 }
17252 }
17253
TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1,k_eq_1_subtile_m)17254 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, k_eq_1_subtile_m) {
17255 TEST_REQUIRES_X86_SSE;
17256 for (uint32_t m = 1; m <= 4; m++) {
17257 GemmMicrokernelTester()
17258 .mr(4)
17259 .nr(8)
17260 .kr(1)
17261 .sr(1)
17262 .m(m)
17263 .n(8)
17264 .k(1)
17265 .iterations(1)
17266 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
17267 }
17268 }
17269
TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1,k_eq_1_subtile_n)17270 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, k_eq_1_subtile_n) {
17271 TEST_REQUIRES_X86_SSE;
17272 for (uint32_t n = 1; n <= 8; n++) {
17273 GemmMicrokernelTester()
17274 .mr(4)
17275 .nr(8)
17276 .kr(1)
17277 .sr(1)
17278 .m(4)
17279 .n(n)
17280 .k(1)
17281 .iterations(1)
17282 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
17283 }
17284 }
17285
TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1,k_gt_1)17286 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, k_gt_1) {
17287 TEST_REQUIRES_X86_SSE;
17288 for (size_t k = 2; k < 10; k++) {
17289 GemmMicrokernelTester()
17290 .mr(4)
17291 .nr(8)
17292 .kr(1)
17293 .sr(1)
17294 .m(4)
17295 .n(8)
17296 .k(k)
17297 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
17298 }
17299 }
17300
TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1,k_gt_1_subtile)17301 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, k_gt_1_subtile) {
17302 TEST_REQUIRES_X86_SSE;
17303 for (size_t k = 2; k < 10; k++) {
17304 for (uint32_t n = 1; n <= 8; n++) {
17305 for (uint32_t m = 1; m <= 4; m++) {
17306 GemmMicrokernelTester()
17307 .mr(4)
17308 .nr(8)
17309 .kr(1)
17310 .sr(1)
17311 .m(m)
17312 .n(n)
17313 .k(k)
17314 .iterations(1)
17315 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
17316 }
17317 }
17318 }
17319 }
17320
TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1,n_gt_8)17321 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, n_gt_8) {
17322 TEST_REQUIRES_X86_SSE;
17323 for (uint32_t n = 9; n < 16; n++) {
17324 for (size_t k = 1; k <= 5; k += 2) {
17325 GemmMicrokernelTester()
17326 .mr(4)
17327 .nr(8)
17328 .kr(1)
17329 .sr(1)
17330 .m(4)
17331 .n(n)
17332 .k(k)
17333 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
17334 }
17335 }
17336 }
17337
TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1,n_gt_8_strided_cn)17338 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, n_gt_8_strided_cn) {
17339 TEST_REQUIRES_X86_SSE;
17340 for (uint32_t n = 9; n < 16; n++) {
17341 for (size_t k = 1; k <= 5; k += 2) {
17342 GemmMicrokernelTester()
17343 .mr(4)
17344 .nr(8)
17345 .kr(1)
17346 .sr(1)
17347 .m(4)
17348 .n(n)
17349 .k(k)
17350 .cn_stride(11)
17351 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
17352 }
17353 }
17354 }
17355
TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1,n_gt_8_subtile)17356 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, n_gt_8_subtile) {
17357 TEST_REQUIRES_X86_SSE;
17358 for (uint32_t n = 9; n < 16; n++) {
17359 for (size_t k = 1; k <= 5; k += 2) {
17360 for (uint32_t m = 1; m <= 4; m++) {
17361 GemmMicrokernelTester()
17362 .mr(4)
17363 .nr(8)
17364 .kr(1)
17365 .sr(1)
17366 .m(m)
17367 .n(n)
17368 .k(k)
17369 .iterations(1)
17370 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
17371 }
17372 }
17373 }
17374 }
17375
TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1,n_div_8)17376 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, n_div_8) {
17377 TEST_REQUIRES_X86_SSE;
17378 for (uint32_t n = 16; n <= 24; n += 8) {
17379 for (size_t k = 1; k <= 5; k += 2) {
17380 GemmMicrokernelTester()
17381 .mr(4)
17382 .nr(8)
17383 .kr(1)
17384 .sr(1)
17385 .m(4)
17386 .n(n)
17387 .k(k)
17388 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
17389 }
17390 }
17391 }
17392
TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1,n_div_8_strided_cn)17393 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, n_div_8_strided_cn) {
17394 TEST_REQUIRES_X86_SSE;
17395 for (uint32_t n = 16; n <= 24; n += 8) {
17396 for (size_t k = 1; k <= 5; k += 2) {
17397 GemmMicrokernelTester()
17398 .mr(4)
17399 .nr(8)
17400 .kr(1)
17401 .sr(1)
17402 .m(4)
17403 .n(n)
17404 .k(k)
17405 .cn_stride(11)
17406 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
17407 }
17408 }
17409 }
17410
TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1,n_div_8_subtile)17411 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, n_div_8_subtile) {
17412 TEST_REQUIRES_X86_SSE;
17413 for (uint32_t n = 16; n <= 24; n += 8) {
17414 for (size_t k = 1; k <= 5; k += 2) {
17415 for (uint32_t m = 1; m <= 4; m++) {
17416 GemmMicrokernelTester()
17417 .mr(4)
17418 .nr(8)
17419 .kr(1)
17420 .sr(1)
17421 .m(m)
17422 .n(n)
17423 .k(k)
17424 .iterations(1)
17425 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
17426 }
17427 }
17428 }
17429 }
17430
TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1,small_kernel)17431 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, small_kernel) {
17432 TEST_REQUIRES_X86_SSE;
17433 for (size_t k = 1; k <= 5; k += 2) {
17434 GemmMicrokernelTester()
17435 .mr(4)
17436 .nr(8)
17437 .kr(1)
17438 .sr(1)
17439 .m(4)
17440 .n(8)
17441 .k(k)
17442 .ks(3)
17443 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
17444 }
17445 }
17446
TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1,small_kernel_subtile)17447 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, small_kernel_subtile) {
17448 TEST_REQUIRES_X86_SSE;
17449 for (size_t k = 1; k <= 5; k += 2) {
17450 for (uint32_t n = 1; n <= 8; n++) {
17451 for (uint32_t m = 1; m <= 4; m++) {
17452 GemmMicrokernelTester()
17453 .mr(4)
17454 .nr(8)
17455 .kr(1)
17456 .sr(1)
17457 .m(m)
17458 .n(n)
17459 .k(k)
17460 .ks(3)
17461 .iterations(1)
17462 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
17463 }
17464 }
17465 }
17466 }
17467
TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1,n_gt_8_small_kernel)17468 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, n_gt_8_small_kernel) {
17469 TEST_REQUIRES_X86_SSE;
17470 for (uint32_t n = 9; n < 16; n++) {
17471 for (size_t k = 1; k <= 5; k += 2) {
17472 GemmMicrokernelTester()
17473 .mr(4)
17474 .nr(8)
17475 .kr(1)
17476 .sr(1)
17477 .m(4)
17478 .n(n)
17479 .k(k)
17480 .ks(3)
17481 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
17482 }
17483 }
17484 }
17485
TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1,n_div_8_small_kernel)17486 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, n_div_8_small_kernel) {
17487 TEST_REQUIRES_X86_SSE;
17488 for (uint32_t n = 16; n <= 24; n += 8) {
17489 for (size_t k = 1; k <= 5; k += 2) {
17490 GemmMicrokernelTester()
17491 .mr(4)
17492 .nr(8)
17493 .kr(1)
17494 .sr(1)
17495 .m(4)
17496 .n(n)
17497 .k(k)
17498 .ks(3)
17499 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
17500 }
17501 }
17502 }
17503
TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1,strided_cm_subtile)17504 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, strided_cm_subtile) {
17505 TEST_REQUIRES_X86_SSE;
17506 for (size_t k = 1; k <= 5; k += 2) {
17507 for (uint32_t n = 1; n <= 8; n++) {
17508 for (uint32_t m = 1; m <= 4; m++) {
17509 GemmMicrokernelTester()
17510 .mr(4)
17511 .nr(8)
17512 .kr(1)
17513 .sr(1)
17514 .m(m)
17515 .n(n)
17516 .k(k)
17517 .cm_stride(11)
17518 .iterations(1)
17519 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
17520 }
17521 }
17522 }
17523 }
17524
TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1,a_offset)17525 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, a_offset) {
17526 TEST_REQUIRES_X86_SSE;
17527 for (size_t k = 1; k <= 5; k += 2) {
17528 GemmMicrokernelTester()
17529 .mr(4)
17530 .nr(8)
17531 .kr(1)
17532 .sr(1)
17533 .m(4)
17534 .n(8)
17535 .k(k)
17536 .ks(3)
17537 .a_offset(23)
17538 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
17539 }
17540 }
17541
TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1,zero)17542 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, zero) {
17543 TEST_REQUIRES_X86_SSE;
17544 for (size_t k = 1; k <= 5; k += 2) {
17545 for (uint32_t mz = 0; mz < 4; mz++) {
17546 GemmMicrokernelTester()
17547 .mr(4)
17548 .nr(8)
17549 .kr(1)
17550 .sr(1)
17551 .m(4)
17552 .n(8)
17553 .k(k)
17554 .ks(3)
17555 .a_offset(23)
17556 .zero_index(mz)
17557 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
17558 }
17559 }
17560 }
17561
TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1,qmin)17562 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, qmin) {
17563 TEST_REQUIRES_X86_SSE;
17564 GemmMicrokernelTester()
17565 .mr(4)
17566 .nr(8)
17567 .kr(1)
17568 .sr(1)
17569 .m(4)
17570 .n(8)
17571 .k(1)
17572 .qmin(128)
17573 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
17574 }
17575
TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1,qmax)17576 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, qmax) {
17577 TEST_REQUIRES_X86_SSE;
17578 GemmMicrokernelTester()
17579 .mr(4)
17580 .nr(8)
17581 .kr(1)
17582 .sr(1)
17583 .m(4)
17584 .n(8)
17585 .k(1)
17586 .qmax(128)
17587 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
17588 }
17589
TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1,strided_cm)17590 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, strided_cm) {
17591 TEST_REQUIRES_X86_SSE;
17592 GemmMicrokernelTester()
17593 .mr(4)
17594 .nr(8)
17595 .kr(1)
17596 .sr(1)
17597 .m(4)
17598 .n(8)
17599 .k(1)
17600 .cm_stride(11)
17601 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
17602 }
17603 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17604
17605
17606 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,k_eq_4)17607 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, k_eq_4) {
17608 TEST_REQUIRES_X86_SSE;
17609 GemmMicrokernelTester()
17610 .mr(5)
17611 .nr(8)
17612 .kr(1)
17613 .sr(1)
17614 .m(5)
17615 .n(8)
17616 .k(4)
17617 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
17618 }
17619
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,strided_cn)17620 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, strided_cn) {
17621 TEST_REQUIRES_X86_SSE;
17622 GemmMicrokernelTester()
17623 .mr(5)
17624 .nr(8)
17625 .kr(1)
17626 .sr(1)
17627 .m(5)
17628 .n(8)
17629 .k(4)
17630 .cn_stride(11)
17631 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
17632 }
17633
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,k_eq_4_subtile)17634 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, k_eq_4_subtile) {
17635 TEST_REQUIRES_X86_SSE;
17636 for (uint32_t n = 1; n <= 8; n++) {
17637 for (uint32_t m = 1; m <= 5; m++) {
17638 GemmMicrokernelTester()
17639 .mr(5)
17640 .nr(8)
17641 .kr(1)
17642 .sr(1)
17643 .m(m)
17644 .n(n)
17645 .k(4)
17646 .iterations(1)
17647 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
17648 }
17649 }
17650 }
17651
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,k_eq_4_subtile_m)17652 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, k_eq_4_subtile_m) {
17653 TEST_REQUIRES_X86_SSE;
17654 for (uint32_t m = 1; m <= 5; m++) {
17655 GemmMicrokernelTester()
17656 .mr(5)
17657 .nr(8)
17658 .kr(1)
17659 .sr(1)
17660 .m(m)
17661 .n(8)
17662 .k(4)
17663 .iterations(1)
17664 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
17665 }
17666 }
17667
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,k_eq_4_subtile_n)17668 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, k_eq_4_subtile_n) {
17669 TEST_REQUIRES_X86_SSE;
17670 for (uint32_t n = 1; n <= 8; n++) {
17671 GemmMicrokernelTester()
17672 .mr(5)
17673 .nr(8)
17674 .kr(1)
17675 .sr(1)
17676 .m(5)
17677 .n(n)
17678 .k(4)
17679 .iterations(1)
17680 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
17681 }
17682 }
17683
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,k_lt_4)17684 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, k_lt_4) {
17685 TEST_REQUIRES_X86_SSE;
17686 for (size_t k = 1; k < 4; k++) {
17687 GemmMicrokernelTester()
17688 .mr(5)
17689 .nr(8)
17690 .kr(1)
17691 .sr(1)
17692 .m(5)
17693 .n(8)
17694 .k(k)
17695 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
17696 }
17697 }
17698
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,k_lt_4_subtile)17699 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, k_lt_4_subtile) {
17700 TEST_REQUIRES_X86_SSE;
17701 for (size_t k = 1; k < 4; k++) {
17702 for (uint32_t n = 1; n <= 8; n++) {
17703 for (uint32_t m = 1; m <= 5; m++) {
17704 GemmMicrokernelTester()
17705 .mr(5)
17706 .nr(8)
17707 .kr(1)
17708 .sr(1)
17709 .m(m)
17710 .n(n)
17711 .k(k)
17712 .iterations(1)
17713 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
17714 }
17715 }
17716 }
17717 }
17718
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,k_gt_4)17719 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, k_gt_4) {
17720 TEST_REQUIRES_X86_SSE;
17721 for (size_t k = 5; k < 8; k++) {
17722 GemmMicrokernelTester()
17723 .mr(5)
17724 .nr(8)
17725 .kr(1)
17726 .sr(1)
17727 .m(5)
17728 .n(8)
17729 .k(k)
17730 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
17731 }
17732 }
17733
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,k_gt_4_subtile)17734 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, k_gt_4_subtile) {
17735 TEST_REQUIRES_X86_SSE;
17736 for (size_t k = 5; k < 8; k++) {
17737 for (uint32_t n = 1; n <= 8; n++) {
17738 for (uint32_t m = 1; m <= 5; m++) {
17739 GemmMicrokernelTester()
17740 .mr(5)
17741 .nr(8)
17742 .kr(1)
17743 .sr(1)
17744 .m(m)
17745 .n(n)
17746 .k(k)
17747 .iterations(1)
17748 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
17749 }
17750 }
17751 }
17752 }
17753
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,k_div_4)17754 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, k_div_4) {
17755 TEST_REQUIRES_X86_SSE;
17756 for (size_t k = 8; k <= 40; k += 4) {
17757 GemmMicrokernelTester()
17758 .mr(5)
17759 .nr(8)
17760 .kr(1)
17761 .sr(1)
17762 .m(5)
17763 .n(8)
17764 .k(k)
17765 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
17766 }
17767 }
17768
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,k_div_4_subtile)17769 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, k_div_4_subtile) {
17770 TEST_REQUIRES_X86_SSE;
17771 for (size_t k = 8; k <= 40; k += 4) {
17772 for (uint32_t n = 1; n <= 8; n++) {
17773 for (uint32_t m = 1; m <= 5; m++) {
17774 GemmMicrokernelTester()
17775 .mr(5)
17776 .nr(8)
17777 .kr(1)
17778 .sr(1)
17779 .m(m)
17780 .n(n)
17781 .k(k)
17782 .iterations(1)
17783 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
17784 }
17785 }
17786 }
17787 }
17788
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,n_gt_8)17789 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, n_gt_8) {
17790 TEST_REQUIRES_X86_SSE;
17791 for (uint32_t n = 9; n < 16; n++) {
17792 for (size_t k = 1; k <= 20; k += 5) {
17793 GemmMicrokernelTester()
17794 .mr(5)
17795 .nr(8)
17796 .kr(1)
17797 .sr(1)
17798 .m(5)
17799 .n(n)
17800 .k(k)
17801 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
17802 }
17803 }
17804 }
17805
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,n_gt_8_strided_cn)17806 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, n_gt_8_strided_cn) {
17807 TEST_REQUIRES_X86_SSE;
17808 for (uint32_t n = 9; n < 16; n++) {
17809 for (size_t k = 1; k <= 20; k += 5) {
17810 GemmMicrokernelTester()
17811 .mr(5)
17812 .nr(8)
17813 .kr(1)
17814 .sr(1)
17815 .m(5)
17816 .n(n)
17817 .k(k)
17818 .cn_stride(11)
17819 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
17820 }
17821 }
17822 }
17823
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,n_gt_8_subtile)17824 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, n_gt_8_subtile) {
17825 TEST_REQUIRES_X86_SSE;
17826 for (uint32_t n = 9; n < 16; n++) {
17827 for (size_t k = 1; k <= 20; k += 5) {
17828 for (uint32_t m = 1; m <= 5; m++) {
17829 GemmMicrokernelTester()
17830 .mr(5)
17831 .nr(8)
17832 .kr(1)
17833 .sr(1)
17834 .m(m)
17835 .n(n)
17836 .k(k)
17837 .iterations(1)
17838 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
17839 }
17840 }
17841 }
17842 }
17843
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,n_div_8)17844 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, n_div_8) {
17845 TEST_REQUIRES_X86_SSE;
17846 for (uint32_t n = 16; n <= 24; n += 8) {
17847 for (size_t k = 1; k <= 20; k += 5) {
17848 GemmMicrokernelTester()
17849 .mr(5)
17850 .nr(8)
17851 .kr(1)
17852 .sr(1)
17853 .m(5)
17854 .n(n)
17855 .k(k)
17856 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
17857 }
17858 }
17859 }
17860
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,n_div_8_strided_cn)17861 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, n_div_8_strided_cn) {
17862 TEST_REQUIRES_X86_SSE;
17863 for (uint32_t n = 16; n <= 24; n += 8) {
17864 for (size_t k = 1; k <= 20; k += 5) {
17865 GemmMicrokernelTester()
17866 .mr(5)
17867 .nr(8)
17868 .kr(1)
17869 .sr(1)
17870 .m(5)
17871 .n(n)
17872 .k(k)
17873 .cn_stride(11)
17874 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
17875 }
17876 }
17877 }
17878
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,n_div_8_subtile)17879 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, n_div_8_subtile) {
17880 TEST_REQUIRES_X86_SSE;
17881 for (uint32_t n = 16; n <= 24; n += 8) {
17882 for (size_t k = 1; k <= 20; k += 5) {
17883 for (uint32_t m = 1; m <= 5; m++) {
17884 GemmMicrokernelTester()
17885 .mr(5)
17886 .nr(8)
17887 .kr(1)
17888 .sr(1)
17889 .m(m)
17890 .n(n)
17891 .k(k)
17892 .iterations(1)
17893 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
17894 }
17895 }
17896 }
17897 }
17898
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,small_kernel)17899 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, small_kernel) {
17900 TEST_REQUIRES_X86_SSE;
17901 for (size_t k = 1; k <= 20; k += 5) {
17902 GemmMicrokernelTester()
17903 .mr(5)
17904 .nr(8)
17905 .kr(1)
17906 .sr(1)
17907 .m(5)
17908 .n(8)
17909 .k(k)
17910 .ks(3)
17911 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
17912 }
17913 }
17914
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,small_kernel_subtile)17915 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, small_kernel_subtile) {
17916 TEST_REQUIRES_X86_SSE;
17917 for (size_t k = 1; k <= 20; k += 5) {
17918 for (uint32_t n = 1; n <= 8; n++) {
17919 for (uint32_t m = 1; m <= 5; m++) {
17920 GemmMicrokernelTester()
17921 .mr(5)
17922 .nr(8)
17923 .kr(1)
17924 .sr(1)
17925 .m(m)
17926 .n(n)
17927 .k(k)
17928 .ks(3)
17929 .iterations(1)
17930 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
17931 }
17932 }
17933 }
17934 }
17935
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,n_gt_8_small_kernel)17936 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, n_gt_8_small_kernel) {
17937 TEST_REQUIRES_X86_SSE;
17938 for (uint32_t n = 9; n < 16; n++) {
17939 for (size_t k = 1; k <= 20; k += 5) {
17940 GemmMicrokernelTester()
17941 .mr(5)
17942 .nr(8)
17943 .kr(1)
17944 .sr(1)
17945 .m(5)
17946 .n(n)
17947 .k(k)
17948 .ks(3)
17949 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
17950 }
17951 }
17952 }
17953
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,n_div_8_small_kernel)17954 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, n_div_8_small_kernel) {
17955 TEST_REQUIRES_X86_SSE;
17956 for (uint32_t n = 16; n <= 24; n += 8) {
17957 for (size_t k = 1; k <= 20; k += 5) {
17958 GemmMicrokernelTester()
17959 .mr(5)
17960 .nr(8)
17961 .kr(1)
17962 .sr(1)
17963 .m(5)
17964 .n(n)
17965 .k(k)
17966 .ks(3)
17967 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
17968 }
17969 }
17970 }
17971
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,strided_cm_subtile)17972 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, strided_cm_subtile) {
17973 TEST_REQUIRES_X86_SSE;
17974 for (size_t k = 1; k <= 20; k += 5) {
17975 for (uint32_t n = 1; n <= 8; n++) {
17976 for (uint32_t m = 1; m <= 5; m++) {
17977 GemmMicrokernelTester()
17978 .mr(5)
17979 .nr(8)
17980 .kr(1)
17981 .sr(1)
17982 .m(m)
17983 .n(n)
17984 .k(k)
17985 .cm_stride(11)
17986 .iterations(1)
17987 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
17988 }
17989 }
17990 }
17991 }
17992
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,a_offset)17993 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, a_offset) {
17994 TEST_REQUIRES_X86_SSE;
17995 for (size_t k = 1; k <= 20; k += 5) {
17996 GemmMicrokernelTester()
17997 .mr(5)
17998 .nr(8)
17999 .kr(1)
18000 .sr(1)
18001 .m(5)
18002 .n(8)
18003 .k(k)
18004 .ks(3)
18005 .a_offset(103)
18006 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
18007 }
18008 }
18009
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,zero)18010 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, zero) {
18011 TEST_REQUIRES_X86_SSE;
18012 for (size_t k = 1; k <= 20; k += 5) {
18013 for (uint32_t mz = 0; mz < 5; mz++) {
18014 GemmMicrokernelTester()
18015 .mr(5)
18016 .nr(8)
18017 .kr(1)
18018 .sr(1)
18019 .m(5)
18020 .n(8)
18021 .k(k)
18022 .ks(3)
18023 .a_offset(103)
18024 .zero_index(mz)
18025 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
18026 }
18027 }
18028 }
18029
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,qmin)18030 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, qmin) {
18031 TEST_REQUIRES_X86_SSE;
18032 GemmMicrokernelTester()
18033 .mr(5)
18034 .nr(8)
18035 .kr(1)
18036 .sr(1)
18037 .m(5)
18038 .n(8)
18039 .k(4)
18040 .qmin(128)
18041 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
18042 }
18043
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,qmax)18044 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, qmax) {
18045 TEST_REQUIRES_X86_SSE;
18046 GemmMicrokernelTester()
18047 .mr(5)
18048 .nr(8)
18049 .kr(1)
18050 .sr(1)
18051 .m(5)
18052 .n(8)
18053 .k(4)
18054 .qmax(128)
18055 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
18056 }
18057
TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP,strided_cm)18058 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, strided_cm) {
18059 TEST_REQUIRES_X86_SSE;
18060 GemmMicrokernelTester()
18061 .mr(5)
18062 .nr(8)
18063 .kr(1)
18064 .sr(1)
18065 .m(5)
18066 .n(8)
18067 .k(4)
18068 .cm_stride(11)
18069 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
18070 }
18071 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18072
18073
18074 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1,k_eq_1)18075 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, k_eq_1) {
18076 TEST_REQUIRES_X86_SSE;
18077 GemmMicrokernelTester()
18078 .mr(5)
18079 .nr(8)
18080 .kr(1)
18081 .sr(1)
18082 .m(5)
18083 .n(8)
18084 .k(1)
18085 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
18086 }
18087
TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1,strided_cn)18088 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, strided_cn) {
18089 TEST_REQUIRES_X86_SSE;
18090 GemmMicrokernelTester()
18091 .mr(5)
18092 .nr(8)
18093 .kr(1)
18094 .sr(1)
18095 .m(5)
18096 .n(8)
18097 .k(1)
18098 .cn_stride(11)
18099 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
18100 }
18101
TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1,k_eq_1_subtile)18102 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, k_eq_1_subtile) {
18103 TEST_REQUIRES_X86_SSE;
18104 for (uint32_t n = 1; n <= 8; n++) {
18105 for (uint32_t m = 1; m <= 5; m++) {
18106 GemmMicrokernelTester()
18107 .mr(5)
18108 .nr(8)
18109 .kr(1)
18110 .sr(1)
18111 .m(m)
18112 .n(n)
18113 .k(1)
18114 .iterations(1)
18115 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
18116 }
18117 }
18118 }
18119
TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1,k_eq_1_subtile_m)18120 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, k_eq_1_subtile_m) {
18121 TEST_REQUIRES_X86_SSE;
18122 for (uint32_t m = 1; m <= 5; m++) {
18123 GemmMicrokernelTester()
18124 .mr(5)
18125 .nr(8)
18126 .kr(1)
18127 .sr(1)
18128 .m(m)
18129 .n(8)
18130 .k(1)
18131 .iterations(1)
18132 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
18133 }
18134 }
18135
TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1,k_eq_1_subtile_n)18136 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, k_eq_1_subtile_n) {
18137 TEST_REQUIRES_X86_SSE;
18138 for (uint32_t n = 1; n <= 8; n++) {
18139 GemmMicrokernelTester()
18140 .mr(5)
18141 .nr(8)
18142 .kr(1)
18143 .sr(1)
18144 .m(5)
18145 .n(n)
18146 .k(1)
18147 .iterations(1)
18148 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
18149 }
18150 }
18151
TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1,k_gt_1)18152 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, k_gt_1) {
18153 TEST_REQUIRES_X86_SSE;
18154 for (size_t k = 2; k < 10; k++) {
18155 GemmMicrokernelTester()
18156 .mr(5)
18157 .nr(8)
18158 .kr(1)
18159 .sr(1)
18160 .m(5)
18161 .n(8)
18162 .k(k)
18163 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
18164 }
18165 }
18166
TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1,k_gt_1_subtile)18167 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, k_gt_1_subtile) {
18168 TEST_REQUIRES_X86_SSE;
18169 for (size_t k = 2; k < 10; k++) {
18170 for (uint32_t n = 1; n <= 8; n++) {
18171 for (uint32_t m = 1; m <= 5; m++) {
18172 GemmMicrokernelTester()
18173 .mr(5)
18174 .nr(8)
18175 .kr(1)
18176 .sr(1)
18177 .m(m)
18178 .n(n)
18179 .k(k)
18180 .iterations(1)
18181 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
18182 }
18183 }
18184 }
18185 }
18186
TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1,n_gt_8)18187 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, n_gt_8) {
18188 TEST_REQUIRES_X86_SSE;
18189 for (uint32_t n = 9; n < 16; n++) {
18190 for (size_t k = 1; k <= 5; k += 2) {
18191 GemmMicrokernelTester()
18192 .mr(5)
18193 .nr(8)
18194 .kr(1)
18195 .sr(1)
18196 .m(5)
18197 .n(n)
18198 .k(k)
18199 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
18200 }
18201 }
18202 }
18203
TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1,n_gt_8_strided_cn)18204 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, n_gt_8_strided_cn) {
18205 TEST_REQUIRES_X86_SSE;
18206 for (uint32_t n = 9; n < 16; n++) {
18207 for (size_t k = 1; k <= 5; k += 2) {
18208 GemmMicrokernelTester()
18209 .mr(5)
18210 .nr(8)
18211 .kr(1)
18212 .sr(1)
18213 .m(5)
18214 .n(n)
18215 .k(k)
18216 .cn_stride(11)
18217 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
18218 }
18219 }
18220 }
18221
TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1,n_gt_8_subtile)18222 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, n_gt_8_subtile) {
18223 TEST_REQUIRES_X86_SSE;
18224 for (uint32_t n = 9; n < 16; n++) {
18225 for (size_t k = 1; k <= 5; k += 2) {
18226 for (uint32_t m = 1; m <= 5; m++) {
18227 GemmMicrokernelTester()
18228 .mr(5)
18229 .nr(8)
18230 .kr(1)
18231 .sr(1)
18232 .m(m)
18233 .n(n)
18234 .k(k)
18235 .iterations(1)
18236 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
18237 }
18238 }
18239 }
18240 }
18241
TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1,n_div_8)18242 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, n_div_8) {
18243 TEST_REQUIRES_X86_SSE;
18244 for (uint32_t n = 16; n <= 24; n += 8) {
18245 for (size_t k = 1; k <= 5; k += 2) {
18246 GemmMicrokernelTester()
18247 .mr(5)
18248 .nr(8)
18249 .kr(1)
18250 .sr(1)
18251 .m(5)
18252 .n(n)
18253 .k(k)
18254 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
18255 }
18256 }
18257 }
18258
TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1,n_div_8_strided_cn)18259 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, n_div_8_strided_cn) {
18260 TEST_REQUIRES_X86_SSE;
18261 for (uint32_t n = 16; n <= 24; n += 8) {
18262 for (size_t k = 1; k <= 5; k += 2) {
18263 GemmMicrokernelTester()
18264 .mr(5)
18265 .nr(8)
18266 .kr(1)
18267 .sr(1)
18268 .m(5)
18269 .n(n)
18270 .k(k)
18271 .cn_stride(11)
18272 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
18273 }
18274 }
18275 }
18276
TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1,n_div_8_subtile)18277 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, n_div_8_subtile) {
18278 TEST_REQUIRES_X86_SSE;
18279 for (uint32_t n = 16; n <= 24; n += 8) {
18280 for (size_t k = 1; k <= 5; k += 2) {
18281 for (uint32_t m = 1; m <= 5; m++) {
18282 GemmMicrokernelTester()
18283 .mr(5)
18284 .nr(8)
18285 .kr(1)
18286 .sr(1)
18287 .m(m)
18288 .n(n)
18289 .k(k)
18290 .iterations(1)
18291 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
18292 }
18293 }
18294 }
18295 }
18296
TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1,small_kernel)18297 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, small_kernel) {
18298 TEST_REQUIRES_X86_SSE;
18299 for (size_t k = 1; k <= 5; k += 2) {
18300 GemmMicrokernelTester()
18301 .mr(5)
18302 .nr(8)
18303 .kr(1)
18304 .sr(1)
18305 .m(5)
18306 .n(8)
18307 .k(k)
18308 .ks(3)
18309 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
18310 }
18311 }
18312
TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1,small_kernel_subtile)18313 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, small_kernel_subtile) {
18314 TEST_REQUIRES_X86_SSE;
18315 for (size_t k = 1; k <= 5; k += 2) {
18316 for (uint32_t n = 1; n <= 8; n++) {
18317 for (uint32_t m = 1; m <= 5; m++) {
18318 GemmMicrokernelTester()
18319 .mr(5)
18320 .nr(8)
18321 .kr(1)
18322 .sr(1)
18323 .m(m)
18324 .n(n)
18325 .k(k)
18326 .ks(3)
18327 .iterations(1)
18328 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
18329 }
18330 }
18331 }
18332 }
18333
TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1,n_gt_8_small_kernel)18334 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, n_gt_8_small_kernel) {
18335 TEST_REQUIRES_X86_SSE;
18336 for (uint32_t n = 9; n < 16; n++) {
18337 for (size_t k = 1; k <= 5; k += 2) {
18338 GemmMicrokernelTester()
18339 .mr(5)
18340 .nr(8)
18341 .kr(1)
18342 .sr(1)
18343 .m(5)
18344 .n(n)
18345 .k(k)
18346 .ks(3)
18347 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
18348 }
18349 }
18350 }
18351
TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1,n_div_8_small_kernel)18352 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, n_div_8_small_kernel) {
18353 TEST_REQUIRES_X86_SSE;
18354 for (uint32_t n = 16; n <= 24; n += 8) {
18355 for (size_t k = 1; k <= 5; k += 2) {
18356 GemmMicrokernelTester()
18357 .mr(5)
18358 .nr(8)
18359 .kr(1)
18360 .sr(1)
18361 .m(5)
18362 .n(n)
18363 .k(k)
18364 .ks(3)
18365 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
18366 }
18367 }
18368 }
18369
TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1,strided_cm_subtile)18370 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, strided_cm_subtile) {
18371 TEST_REQUIRES_X86_SSE;
18372 for (size_t k = 1; k <= 5; k += 2) {
18373 for (uint32_t n = 1; n <= 8; n++) {
18374 for (uint32_t m = 1; m <= 5; m++) {
18375 GemmMicrokernelTester()
18376 .mr(5)
18377 .nr(8)
18378 .kr(1)
18379 .sr(1)
18380 .m(m)
18381 .n(n)
18382 .k(k)
18383 .cm_stride(11)
18384 .iterations(1)
18385 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
18386 }
18387 }
18388 }
18389 }
18390
TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1,a_offset)18391 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, a_offset) {
18392 TEST_REQUIRES_X86_SSE;
18393 for (size_t k = 1; k <= 5; k += 2) {
18394 GemmMicrokernelTester()
18395 .mr(5)
18396 .nr(8)
18397 .kr(1)
18398 .sr(1)
18399 .m(5)
18400 .n(8)
18401 .k(k)
18402 .ks(3)
18403 .a_offset(29)
18404 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
18405 }
18406 }
18407
TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1,zero)18408 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, zero) {
18409 TEST_REQUIRES_X86_SSE;
18410 for (size_t k = 1; k <= 5; k += 2) {
18411 for (uint32_t mz = 0; mz < 5; mz++) {
18412 GemmMicrokernelTester()
18413 .mr(5)
18414 .nr(8)
18415 .kr(1)
18416 .sr(1)
18417 .m(5)
18418 .n(8)
18419 .k(k)
18420 .ks(3)
18421 .a_offset(29)
18422 .zero_index(mz)
18423 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
18424 }
18425 }
18426 }
18427
TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1,qmin)18428 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, qmin) {
18429 TEST_REQUIRES_X86_SSE;
18430 GemmMicrokernelTester()
18431 .mr(5)
18432 .nr(8)
18433 .kr(1)
18434 .sr(1)
18435 .m(5)
18436 .n(8)
18437 .k(1)
18438 .qmin(128)
18439 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
18440 }
18441
TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1,qmax)18442 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, qmax) {
18443 TEST_REQUIRES_X86_SSE;
18444 GemmMicrokernelTester()
18445 .mr(5)
18446 .nr(8)
18447 .kr(1)
18448 .sr(1)
18449 .m(5)
18450 .n(8)
18451 .k(1)
18452 .qmax(128)
18453 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
18454 }
18455
TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1,strided_cm)18456 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, strided_cm) {
18457 TEST_REQUIRES_X86_SSE;
18458 GemmMicrokernelTester()
18459 .mr(5)
18460 .nr(8)
18461 .kr(1)
18462 .sr(1)
18463 .m(5)
18464 .n(8)
18465 .k(1)
18466 .cm_stride(11)
18467 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
18468 }
18469 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18470
18471
18472 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,k_eq_4)18473 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, k_eq_4) {
18474 TEST_REQUIRES_X86_SSE2;
18475 GemmMicrokernelTester()
18476 .mr(5)
18477 .nr(8)
18478 .kr(1)
18479 .sr(1)
18480 .m(5)
18481 .n(8)
18482 .k(4)
18483 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18484 }
18485
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,strided_cn)18486 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, strided_cn) {
18487 TEST_REQUIRES_X86_SSE2;
18488 GemmMicrokernelTester()
18489 .mr(5)
18490 .nr(8)
18491 .kr(1)
18492 .sr(1)
18493 .m(5)
18494 .n(8)
18495 .k(4)
18496 .cn_stride(11)
18497 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18498 }
18499
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,k_eq_4_subtile)18500 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, k_eq_4_subtile) {
18501 TEST_REQUIRES_X86_SSE2;
18502 for (uint32_t n = 1; n <= 8; n++) {
18503 for (uint32_t m = 1; m <= 5; m++) {
18504 GemmMicrokernelTester()
18505 .mr(5)
18506 .nr(8)
18507 .kr(1)
18508 .sr(1)
18509 .m(m)
18510 .n(n)
18511 .k(4)
18512 .iterations(1)
18513 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18514 }
18515 }
18516 }
18517
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,k_eq_4_subtile_m)18518 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, k_eq_4_subtile_m) {
18519 TEST_REQUIRES_X86_SSE2;
18520 for (uint32_t m = 1; m <= 5; m++) {
18521 GemmMicrokernelTester()
18522 .mr(5)
18523 .nr(8)
18524 .kr(1)
18525 .sr(1)
18526 .m(m)
18527 .n(8)
18528 .k(4)
18529 .iterations(1)
18530 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18531 }
18532 }
18533
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,k_eq_4_subtile_n)18534 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, k_eq_4_subtile_n) {
18535 TEST_REQUIRES_X86_SSE2;
18536 for (uint32_t n = 1; n <= 8; n++) {
18537 GemmMicrokernelTester()
18538 .mr(5)
18539 .nr(8)
18540 .kr(1)
18541 .sr(1)
18542 .m(5)
18543 .n(n)
18544 .k(4)
18545 .iterations(1)
18546 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18547 }
18548 }
18549
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,k_lt_4)18550 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, k_lt_4) {
18551 TEST_REQUIRES_X86_SSE2;
18552 for (size_t k = 1; k < 4; k++) {
18553 GemmMicrokernelTester()
18554 .mr(5)
18555 .nr(8)
18556 .kr(1)
18557 .sr(1)
18558 .m(5)
18559 .n(8)
18560 .k(k)
18561 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18562 }
18563 }
18564
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,k_lt_4_subtile)18565 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, k_lt_4_subtile) {
18566 TEST_REQUIRES_X86_SSE2;
18567 for (size_t k = 1; k < 4; k++) {
18568 for (uint32_t n = 1; n <= 8; n++) {
18569 for (uint32_t m = 1; m <= 5; m++) {
18570 GemmMicrokernelTester()
18571 .mr(5)
18572 .nr(8)
18573 .kr(1)
18574 .sr(1)
18575 .m(m)
18576 .n(n)
18577 .k(k)
18578 .iterations(1)
18579 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18580 }
18581 }
18582 }
18583 }
18584
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,k_gt_4)18585 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, k_gt_4) {
18586 TEST_REQUIRES_X86_SSE2;
18587 for (size_t k = 5; k < 8; k++) {
18588 GemmMicrokernelTester()
18589 .mr(5)
18590 .nr(8)
18591 .kr(1)
18592 .sr(1)
18593 .m(5)
18594 .n(8)
18595 .k(k)
18596 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18597 }
18598 }
18599
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,k_gt_4_subtile)18600 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, k_gt_4_subtile) {
18601 TEST_REQUIRES_X86_SSE2;
18602 for (size_t k = 5; k < 8; k++) {
18603 for (uint32_t n = 1; n <= 8; n++) {
18604 for (uint32_t m = 1; m <= 5; m++) {
18605 GemmMicrokernelTester()
18606 .mr(5)
18607 .nr(8)
18608 .kr(1)
18609 .sr(1)
18610 .m(m)
18611 .n(n)
18612 .k(k)
18613 .iterations(1)
18614 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18615 }
18616 }
18617 }
18618 }
18619
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,k_div_4)18620 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, k_div_4) {
18621 TEST_REQUIRES_X86_SSE2;
18622 for (size_t k = 8; k <= 40; k += 4) {
18623 GemmMicrokernelTester()
18624 .mr(5)
18625 .nr(8)
18626 .kr(1)
18627 .sr(1)
18628 .m(5)
18629 .n(8)
18630 .k(k)
18631 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18632 }
18633 }
18634
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,k_div_4_subtile)18635 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, k_div_4_subtile) {
18636 TEST_REQUIRES_X86_SSE2;
18637 for (size_t k = 8; k <= 40; k += 4) {
18638 for (uint32_t n = 1; n <= 8; n++) {
18639 for (uint32_t m = 1; m <= 5; m++) {
18640 GemmMicrokernelTester()
18641 .mr(5)
18642 .nr(8)
18643 .kr(1)
18644 .sr(1)
18645 .m(m)
18646 .n(n)
18647 .k(k)
18648 .iterations(1)
18649 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18650 }
18651 }
18652 }
18653 }
18654
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,n_gt_8)18655 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, n_gt_8) {
18656 TEST_REQUIRES_X86_SSE2;
18657 for (uint32_t n = 9; n < 16; n++) {
18658 for (size_t k = 1; k <= 20; k += 5) {
18659 GemmMicrokernelTester()
18660 .mr(5)
18661 .nr(8)
18662 .kr(1)
18663 .sr(1)
18664 .m(5)
18665 .n(n)
18666 .k(k)
18667 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18668 }
18669 }
18670 }
18671
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,n_gt_8_strided_cn)18672 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, n_gt_8_strided_cn) {
18673 TEST_REQUIRES_X86_SSE2;
18674 for (uint32_t n = 9; n < 16; n++) {
18675 for (size_t k = 1; k <= 20; k += 5) {
18676 GemmMicrokernelTester()
18677 .mr(5)
18678 .nr(8)
18679 .kr(1)
18680 .sr(1)
18681 .m(5)
18682 .n(n)
18683 .k(k)
18684 .cn_stride(11)
18685 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18686 }
18687 }
18688 }
18689
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,n_gt_8_subtile)18690 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, n_gt_8_subtile) {
18691 TEST_REQUIRES_X86_SSE2;
18692 for (uint32_t n = 9; n < 16; n++) {
18693 for (size_t k = 1; k <= 20; k += 5) {
18694 for (uint32_t m = 1; m <= 5; m++) {
18695 GemmMicrokernelTester()
18696 .mr(5)
18697 .nr(8)
18698 .kr(1)
18699 .sr(1)
18700 .m(m)
18701 .n(n)
18702 .k(k)
18703 .iterations(1)
18704 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18705 }
18706 }
18707 }
18708 }
18709
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,n_div_8)18710 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, n_div_8) {
18711 TEST_REQUIRES_X86_SSE2;
18712 for (uint32_t n = 16; n <= 24; n += 8) {
18713 for (size_t k = 1; k <= 20; k += 5) {
18714 GemmMicrokernelTester()
18715 .mr(5)
18716 .nr(8)
18717 .kr(1)
18718 .sr(1)
18719 .m(5)
18720 .n(n)
18721 .k(k)
18722 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18723 }
18724 }
18725 }
18726
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,n_div_8_strided_cn)18727 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, n_div_8_strided_cn) {
18728 TEST_REQUIRES_X86_SSE2;
18729 for (uint32_t n = 16; n <= 24; n += 8) {
18730 for (size_t k = 1; k <= 20; k += 5) {
18731 GemmMicrokernelTester()
18732 .mr(5)
18733 .nr(8)
18734 .kr(1)
18735 .sr(1)
18736 .m(5)
18737 .n(n)
18738 .k(k)
18739 .cn_stride(11)
18740 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18741 }
18742 }
18743 }
18744
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,n_div_8_subtile)18745 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, n_div_8_subtile) {
18746 TEST_REQUIRES_X86_SSE2;
18747 for (uint32_t n = 16; n <= 24; n += 8) {
18748 for (size_t k = 1; k <= 20; k += 5) {
18749 for (uint32_t m = 1; m <= 5; m++) {
18750 GemmMicrokernelTester()
18751 .mr(5)
18752 .nr(8)
18753 .kr(1)
18754 .sr(1)
18755 .m(m)
18756 .n(n)
18757 .k(k)
18758 .iterations(1)
18759 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18760 }
18761 }
18762 }
18763 }
18764
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,small_kernel)18765 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, small_kernel) {
18766 TEST_REQUIRES_X86_SSE2;
18767 for (size_t k = 1; k <= 20; k += 5) {
18768 GemmMicrokernelTester()
18769 .mr(5)
18770 .nr(8)
18771 .kr(1)
18772 .sr(1)
18773 .m(5)
18774 .n(8)
18775 .k(k)
18776 .ks(3)
18777 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18778 }
18779 }
18780
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,small_kernel_subtile)18781 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, small_kernel_subtile) {
18782 TEST_REQUIRES_X86_SSE2;
18783 for (size_t k = 1; k <= 20; k += 5) {
18784 for (uint32_t n = 1; n <= 8; n++) {
18785 for (uint32_t m = 1; m <= 5; m++) {
18786 GemmMicrokernelTester()
18787 .mr(5)
18788 .nr(8)
18789 .kr(1)
18790 .sr(1)
18791 .m(m)
18792 .n(n)
18793 .k(k)
18794 .ks(3)
18795 .iterations(1)
18796 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18797 }
18798 }
18799 }
18800 }
18801
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,n_gt_8_small_kernel)18802 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, n_gt_8_small_kernel) {
18803 TEST_REQUIRES_X86_SSE2;
18804 for (uint32_t n = 9; n < 16; n++) {
18805 for (size_t k = 1; k <= 20; k += 5) {
18806 GemmMicrokernelTester()
18807 .mr(5)
18808 .nr(8)
18809 .kr(1)
18810 .sr(1)
18811 .m(5)
18812 .n(n)
18813 .k(k)
18814 .ks(3)
18815 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18816 }
18817 }
18818 }
18819
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,n_div_8_small_kernel)18820 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, n_div_8_small_kernel) {
18821 TEST_REQUIRES_X86_SSE2;
18822 for (uint32_t n = 16; n <= 24; n += 8) {
18823 for (size_t k = 1; k <= 20; k += 5) {
18824 GemmMicrokernelTester()
18825 .mr(5)
18826 .nr(8)
18827 .kr(1)
18828 .sr(1)
18829 .m(5)
18830 .n(n)
18831 .k(k)
18832 .ks(3)
18833 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18834 }
18835 }
18836 }
18837
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,strided_cm_subtile)18838 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, strided_cm_subtile) {
18839 TEST_REQUIRES_X86_SSE2;
18840 for (size_t k = 1; k <= 20; k += 5) {
18841 for (uint32_t n = 1; n <= 8; n++) {
18842 for (uint32_t m = 1; m <= 5; m++) {
18843 GemmMicrokernelTester()
18844 .mr(5)
18845 .nr(8)
18846 .kr(1)
18847 .sr(1)
18848 .m(m)
18849 .n(n)
18850 .k(k)
18851 .cm_stride(11)
18852 .iterations(1)
18853 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18854 }
18855 }
18856 }
18857 }
18858
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,a_offset)18859 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, a_offset) {
18860 TEST_REQUIRES_X86_SSE2;
18861 for (size_t k = 1; k <= 20; k += 5) {
18862 GemmMicrokernelTester()
18863 .mr(5)
18864 .nr(8)
18865 .kr(1)
18866 .sr(1)
18867 .m(5)
18868 .n(8)
18869 .k(k)
18870 .ks(3)
18871 .a_offset(103)
18872 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18873 }
18874 }
18875
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,zero)18876 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, zero) {
18877 TEST_REQUIRES_X86_SSE2;
18878 for (size_t k = 1; k <= 20; k += 5) {
18879 for (uint32_t mz = 0; mz < 5; mz++) {
18880 GemmMicrokernelTester()
18881 .mr(5)
18882 .nr(8)
18883 .kr(1)
18884 .sr(1)
18885 .m(5)
18886 .n(8)
18887 .k(k)
18888 .ks(3)
18889 .a_offset(103)
18890 .zero_index(mz)
18891 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18892 }
18893 }
18894 }
18895
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,qmin)18896 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, qmin) {
18897 TEST_REQUIRES_X86_SSE2;
18898 GemmMicrokernelTester()
18899 .mr(5)
18900 .nr(8)
18901 .kr(1)
18902 .sr(1)
18903 .m(5)
18904 .n(8)
18905 .k(4)
18906 .qmin(128)
18907 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18908 }
18909
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,qmax)18910 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, qmax) {
18911 TEST_REQUIRES_X86_SSE2;
18912 GemmMicrokernelTester()
18913 .mr(5)
18914 .nr(8)
18915 .kr(1)
18916 .sr(1)
18917 .m(5)
18918 .n(8)
18919 .k(4)
18920 .qmax(128)
18921 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18922 }
18923
TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP,strided_cm)18924 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, strided_cm) {
18925 TEST_REQUIRES_X86_SSE2;
18926 GemmMicrokernelTester()
18927 .mr(5)
18928 .nr(8)
18929 .kr(1)
18930 .sr(1)
18931 .m(5)
18932 .n(8)
18933 .k(4)
18934 .cm_stride(11)
18935 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18936 }
18937 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18938
18939
18940 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST,k_eq_1)18941 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, k_eq_1) {
18942 TEST_REQUIRES_X86_AVX;
18943 GemmMicrokernelTester()
18944 .mr(1)
18945 .nr(8)
18946 .kr(1)
18947 .sr(1)
18948 .m(1)
18949 .n(8)
18950 .k(1)
18951 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18952 }
18953
TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST,strided_cn)18954 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, strided_cn) {
18955 TEST_REQUIRES_X86_AVX;
18956 GemmMicrokernelTester()
18957 .mr(1)
18958 .nr(8)
18959 .kr(1)
18960 .sr(1)
18961 .m(1)
18962 .n(8)
18963 .k(1)
18964 .cn_stride(11)
18965 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18966 }
18967
TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST,k_eq_1_subtile)18968 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, k_eq_1_subtile) {
18969 TEST_REQUIRES_X86_AVX;
18970 for (uint32_t n = 1; n <= 8; n++) {
18971 for (uint32_t m = 1; m <= 1; m++) {
18972 GemmMicrokernelTester()
18973 .mr(1)
18974 .nr(8)
18975 .kr(1)
18976 .sr(1)
18977 .m(m)
18978 .n(n)
18979 .k(1)
18980 .iterations(1)
18981 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18982 }
18983 }
18984 }
18985
TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST,k_eq_1_subtile_m)18986 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, k_eq_1_subtile_m) {
18987 TEST_REQUIRES_X86_AVX;
18988 for (uint32_t m = 1; m <= 1; m++) {
18989 GemmMicrokernelTester()
18990 .mr(1)
18991 .nr(8)
18992 .kr(1)
18993 .sr(1)
18994 .m(m)
18995 .n(8)
18996 .k(1)
18997 .iterations(1)
18998 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18999 }
19000 }
19001
TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST,k_eq_1_subtile_n)19002 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, k_eq_1_subtile_n) {
19003 TEST_REQUIRES_X86_AVX;
19004 for (uint32_t n = 1; n <= 8; n++) {
19005 GemmMicrokernelTester()
19006 .mr(1)
19007 .nr(8)
19008 .kr(1)
19009 .sr(1)
19010 .m(1)
19011 .n(n)
19012 .k(1)
19013 .iterations(1)
19014 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19015 }
19016 }
19017
TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST,k_gt_1)19018 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, k_gt_1) {
19019 TEST_REQUIRES_X86_AVX;
19020 for (size_t k = 2; k < 10; k++) {
19021 GemmMicrokernelTester()
19022 .mr(1)
19023 .nr(8)
19024 .kr(1)
19025 .sr(1)
19026 .m(1)
19027 .n(8)
19028 .k(k)
19029 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19030 }
19031 }
19032
TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST,k_gt_1_subtile)19033 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, k_gt_1_subtile) {
19034 TEST_REQUIRES_X86_AVX;
19035 for (size_t k = 2; k < 10; k++) {
19036 for (uint32_t n = 1; n <= 8; n++) {
19037 for (uint32_t m = 1; m <= 1; m++) {
19038 GemmMicrokernelTester()
19039 .mr(1)
19040 .nr(8)
19041 .kr(1)
19042 .sr(1)
19043 .m(m)
19044 .n(n)
19045 .k(k)
19046 .iterations(1)
19047 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19048 }
19049 }
19050 }
19051 }
19052
TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST,n_gt_8)19053 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, n_gt_8) {
19054 TEST_REQUIRES_X86_AVX;
19055 for (uint32_t n = 9; n < 16; n++) {
19056 for (size_t k = 1; k <= 5; k += 2) {
19057 GemmMicrokernelTester()
19058 .mr(1)
19059 .nr(8)
19060 .kr(1)
19061 .sr(1)
19062 .m(1)
19063 .n(n)
19064 .k(k)
19065 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19066 }
19067 }
19068 }
19069
TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST,n_gt_8_strided_cn)19070 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, n_gt_8_strided_cn) {
19071 TEST_REQUIRES_X86_AVX;
19072 for (uint32_t n = 9; n < 16; n++) {
19073 for (size_t k = 1; k <= 5; k += 2) {
19074 GemmMicrokernelTester()
19075 .mr(1)
19076 .nr(8)
19077 .kr(1)
19078 .sr(1)
19079 .m(1)
19080 .n(n)
19081 .k(k)
19082 .cn_stride(11)
19083 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19084 }
19085 }
19086 }
19087
TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST,n_gt_8_subtile)19088 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, n_gt_8_subtile) {
19089 TEST_REQUIRES_X86_AVX;
19090 for (uint32_t n = 9; n < 16; n++) {
19091 for (size_t k = 1; k <= 5; k += 2) {
19092 for (uint32_t m = 1; m <= 1; m++) {
19093 GemmMicrokernelTester()
19094 .mr(1)
19095 .nr(8)
19096 .kr(1)
19097 .sr(1)
19098 .m(m)
19099 .n(n)
19100 .k(k)
19101 .iterations(1)
19102 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19103 }
19104 }
19105 }
19106 }
19107
TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST,n_div_8)19108 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, n_div_8) {
19109 TEST_REQUIRES_X86_AVX;
19110 for (uint32_t n = 16; n <= 24; n += 8) {
19111 for (size_t k = 1; k <= 5; k += 2) {
19112 GemmMicrokernelTester()
19113 .mr(1)
19114 .nr(8)
19115 .kr(1)
19116 .sr(1)
19117 .m(1)
19118 .n(n)
19119 .k(k)
19120 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19121 }
19122 }
19123 }
19124
TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST,n_div_8_strided_cn)19125 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, n_div_8_strided_cn) {
19126 TEST_REQUIRES_X86_AVX;
19127 for (uint32_t n = 16; n <= 24; n += 8) {
19128 for (size_t k = 1; k <= 5; k += 2) {
19129 GemmMicrokernelTester()
19130 .mr(1)
19131 .nr(8)
19132 .kr(1)
19133 .sr(1)
19134 .m(1)
19135 .n(n)
19136 .k(k)
19137 .cn_stride(11)
19138 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19139 }
19140 }
19141 }
19142
TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST,n_div_8_subtile)19143 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, n_div_8_subtile) {
19144 TEST_REQUIRES_X86_AVX;
19145 for (uint32_t n = 16; n <= 24; n += 8) {
19146 for (size_t k = 1; k <= 5; k += 2) {
19147 for (uint32_t m = 1; m <= 1; m++) {
19148 GemmMicrokernelTester()
19149 .mr(1)
19150 .nr(8)
19151 .kr(1)
19152 .sr(1)
19153 .m(m)
19154 .n(n)
19155 .k(k)
19156 .iterations(1)
19157 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19158 }
19159 }
19160 }
19161 }
19162
TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST,small_kernel)19163 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, small_kernel) {
19164 TEST_REQUIRES_X86_AVX;
19165 for (size_t k = 1; k <= 5; k += 2) {
19166 GemmMicrokernelTester()
19167 .mr(1)
19168 .nr(8)
19169 .kr(1)
19170 .sr(1)
19171 .m(1)
19172 .n(8)
19173 .k(k)
19174 .ks(3)
19175 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19176 }
19177 }
19178
TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST,small_kernel_subtile)19179 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, small_kernel_subtile) {
19180 TEST_REQUIRES_X86_AVX;
19181 for (size_t k = 1; k <= 5; k += 2) {
19182 for (uint32_t n = 1; n <= 8; n++) {
19183 for (uint32_t m = 1; m <= 1; m++) {
19184 GemmMicrokernelTester()
19185 .mr(1)
19186 .nr(8)
19187 .kr(1)
19188 .sr(1)
19189 .m(m)
19190 .n(n)
19191 .k(k)
19192 .ks(3)
19193 .iterations(1)
19194 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19195 }
19196 }
19197 }
19198 }
19199
TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST,n_gt_8_small_kernel)19200 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, n_gt_8_small_kernel) {
19201 TEST_REQUIRES_X86_AVX;
19202 for (uint32_t n = 9; n < 16; n++) {
19203 for (size_t k = 1; k <= 5; k += 2) {
19204 GemmMicrokernelTester()
19205 .mr(1)
19206 .nr(8)
19207 .kr(1)
19208 .sr(1)
19209 .m(1)
19210 .n(n)
19211 .k(k)
19212 .ks(3)
19213 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19214 }
19215 }
19216 }
19217
TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST,n_div_8_small_kernel)19218 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, n_div_8_small_kernel) {
19219 TEST_REQUIRES_X86_AVX;
19220 for (uint32_t n = 16; n <= 24; n += 8) {
19221 for (size_t k = 1; k <= 5; k += 2) {
19222 GemmMicrokernelTester()
19223 .mr(1)
19224 .nr(8)
19225 .kr(1)
19226 .sr(1)
19227 .m(1)
19228 .n(n)
19229 .k(k)
19230 .ks(3)
19231 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19232 }
19233 }
19234 }
19235
TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST,strided_cm_subtile)19236 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, strided_cm_subtile) {
19237 TEST_REQUIRES_X86_AVX;
19238 for (size_t k = 1; k <= 5; k += 2) {
19239 for (uint32_t n = 1; n <= 8; n++) {
19240 for (uint32_t m = 1; m <= 1; m++) {
19241 GemmMicrokernelTester()
19242 .mr(1)
19243 .nr(8)
19244 .kr(1)
19245 .sr(1)
19246 .m(m)
19247 .n(n)
19248 .k(k)
19249 .cm_stride(11)
19250 .iterations(1)
19251 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19252 }
19253 }
19254 }
19255 }
19256
TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST,a_offset)19257 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, a_offset) {
19258 TEST_REQUIRES_X86_AVX;
19259 for (size_t k = 1; k <= 5; k += 2) {
19260 GemmMicrokernelTester()
19261 .mr(1)
19262 .nr(8)
19263 .kr(1)
19264 .sr(1)
19265 .m(1)
19266 .n(8)
19267 .k(k)
19268 .ks(3)
19269 .a_offset(7)
19270 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19271 }
19272 }
19273
TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST,zero)19274 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, zero) {
19275 TEST_REQUIRES_X86_AVX;
19276 for (size_t k = 1; k <= 5; k += 2) {
19277 for (uint32_t mz = 0; mz < 1; mz++) {
19278 GemmMicrokernelTester()
19279 .mr(1)
19280 .nr(8)
19281 .kr(1)
19282 .sr(1)
19283 .m(1)
19284 .n(8)
19285 .k(k)
19286 .ks(3)
19287 .a_offset(7)
19288 .zero_index(mz)
19289 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19290 }
19291 }
19292 }
19293
TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST,qmin)19294 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, qmin) {
19295 TEST_REQUIRES_X86_AVX;
19296 GemmMicrokernelTester()
19297 .mr(1)
19298 .nr(8)
19299 .kr(1)
19300 .sr(1)
19301 .m(1)
19302 .n(8)
19303 .k(1)
19304 .qmin(128)
19305 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19306 }
19307
TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST,qmax)19308 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, qmax) {
19309 TEST_REQUIRES_X86_AVX;
19310 GemmMicrokernelTester()
19311 .mr(1)
19312 .nr(8)
19313 .kr(1)
19314 .sr(1)
19315 .m(1)
19316 .n(8)
19317 .k(1)
19318 .qmax(128)
19319 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19320 }
19321
TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST,strided_cm)19322 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, strided_cm) {
19323 TEST_REQUIRES_X86_AVX;
19324 GemmMicrokernelTester()
19325 .mr(1)
19326 .nr(8)
19327 .kr(1)
19328 .sr(1)
19329 .m(1)
19330 .n(8)
19331 .k(1)
19332 .cm_stride(11)
19333 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19334 }
19335 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19336
19337
19338 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST,k_eq_1)19339 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, k_eq_1) {
19340 TEST_REQUIRES_X86_AVX;
19341 GemmMicrokernelTester()
19342 .mr(1)
19343 .nr(16)
19344 .kr(1)
19345 .sr(1)
19346 .m(1)
19347 .n(16)
19348 .k(1)
19349 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19350 }
19351
TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST,strided_cn)19352 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, strided_cn) {
19353 TEST_REQUIRES_X86_AVX;
19354 GemmMicrokernelTester()
19355 .mr(1)
19356 .nr(16)
19357 .kr(1)
19358 .sr(1)
19359 .m(1)
19360 .n(16)
19361 .k(1)
19362 .cn_stride(19)
19363 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19364 }
19365
TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST,k_eq_1_subtile)19366 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, k_eq_1_subtile) {
19367 TEST_REQUIRES_X86_AVX;
19368 for (uint32_t n = 1; n <= 16; n++) {
19369 for (uint32_t m = 1; m <= 1; m++) {
19370 GemmMicrokernelTester()
19371 .mr(1)
19372 .nr(16)
19373 .kr(1)
19374 .sr(1)
19375 .m(m)
19376 .n(n)
19377 .k(1)
19378 .iterations(1)
19379 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19380 }
19381 }
19382 }
19383
TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST,k_eq_1_subtile_m)19384 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, k_eq_1_subtile_m) {
19385 TEST_REQUIRES_X86_AVX;
19386 for (uint32_t m = 1; m <= 1; m++) {
19387 GemmMicrokernelTester()
19388 .mr(1)
19389 .nr(16)
19390 .kr(1)
19391 .sr(1)
19392 .m(m)
19393 .n(16)
19394 .k(1)
19395 .iterations(1)
19396 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19397 }
19398 }
19399
TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST,k_eq_1_subtile_n)19400 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, k_eq_1_subtile_n) {
19401 TEST_REQUIRES_X86_AVX;
19402 for (uint32_t n = 1; n <= 16; n++) {
19403 GemmMicrokernelTester()
19404 .mr(1)
19405 .nr(16)
19406 .kr(1)
19407 .sr(1)
19408 .m(1)
19409 .n(n)
19410 .k(1)
19411 .iterations(1)
19412 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19413 }
19414 }
19415
TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST,k_gt_1)19416 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, k_gt_1) {
19417 TEST_REQUIRES_X86_AVX;
19418 for (size_t k = 2; k < 10; k++) {
19419 GemmMicrokernelTester()
19420 .mr(1)
19421 .nr(16)
19422 .kr(1)
19423 .sr(1)
19424 .m(1)
19425 .n(16)
19426 .k(k)
19427 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19428 }
19429 }
19430
TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST,k_gt_1_subtile)19431 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, k_gt_1_subtile) {
19432 TEST_REQUIRES_X86_AVX;
19433 for (size_t k = 2; k < 10; k++) {
19434 for (uint32_t n = 1; n <= 16; n++) {
19435 for (uint32_t m = 1; m <= 1; m++) {
19436 GemmMicrokernelTester()
19437 .mr(1)
19438 .nr(16)
19439 .kr(1)
19440 .sr(1)
19441 .m(m)
19442 .n(n)
19443 .k(k)
19444 .iterations(1)
19445 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19446 }
19447 }
19448 }
19449 }
19450
TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST,n_gt_16)19451 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, n_gt_16) {
19452 TEST_REQUIRES_X86_AVX;
19453 for (uint32_t n = 17; n < 32; n++) {
19454 for (size_t k = 1; k <= 5; k += 2) {
19455 GemmMicrokernelTester()
19456 .mr(1)
19457 .nr(16)
19458 .kr(1)
19459 .sr(1)
19460 .m(1)
19461 .n(n)
19462 .k(k)
19463 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19464 }
19465 }
19466 }
19467
TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST,n_gt_16_strided_cn)19468 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, n_gt_16_strided_cn) {
19469 TEST_REQUIRES_X86_AVX;
19470 for (uint32_t n = 17; n < 32; n++) {
19471 for (size_t k = 1; k <= 5; k += 2) {
19472 GemmMicrokernelTester()
19473 .mr(1)
19474 .nr(16)
19475 .kr(1)
19476 .sr(1)
19477 .m(1)
19478 .n(n)
19479 .k(k)
19480 .cn_stride(19)
19481 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19482 }
19483 }
19484 }
19485
TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST,n_gt_16_subtile)19486 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, n_gt_16_subtile) {
19487 TEST_REQUIRES_X86_AVX;
19488 for (uint32_t n = 17; n < 32; n++) {
19489 for (size_t k = 1; k <= 5; k += 2) {
19490 for (uint32_t m = 1; m <= 1; m++) {
19491 GemmMicrokernelTester()
19492 .mr(1)
19493 .nr(16)
19494 .kr(1)
19495 .sr(1)
19496 .m(m)
19497 .n(n)
19498 .k(k)
19499 .iterations(1)
19500 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19501 }
19502 }
19503 }
19504 }
19505
TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST,n_div_16)19506 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, n_div_16) {
19507 TEST_REQUIRES_X86_AVX;
19508 for (uint32_t n = 32; n <= 48; n += 16) {
19509 for (size_t k = 1; k <= 5; k += 2) {
19510 GemmMicrokernelTester()
19511 .mr(1)
19512 .nr(16)
19513 .kr(1)
19514 .sr(1)
19515 .m(1)
19516 .n(n)
19517 .k(k)
19518 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19519 }
19520 }
19521 }
19522
TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST,n_div_16_strided_cn)19523 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, n_div_16_strided_cn) {
19524 TEST_REQUIRES_X86_AVX;
19525 for (uint32_t n = 32; n <= 48; n += 16) {
19526 for (size_t k = 1; k <= 5; k += 2) {
19527 GemmMicrokernelTester()
19528 .mr(1)
19529 .nr(16)
19530 .kr(1)
19531 .sr(1)
19532 .m(1)
19533 .n(n)
19534 .k(k)
19535 .cn_stride(19)
19536 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19537 }
19538 }
19539 }
19540
TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST,n_div_16_subtile)19541 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, n_div_16_subtile) {
19542 TEST_REQUIRES_X86_AVX;
19543 for (uint32_t n = 32; n <= 48; n += 16) {
19544 for (size_t k = 1; k <= 5; k += 2) {
19545 for (uint32_t m = 1; m <= 1; m++) {
19546 GemmMicrokernelTester()
19547 .mr(1)
19548 .nr(16)
19549 .kr(1)
19550 .sr(1)
19551 .m(m)
19552 .n(n)
19553 .k(k)
19554 .iterations(1)
19555 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19556 }
19557 }
19558 }
19559 }
19560
TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST,small_kernel)19561 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, small_kernel) {
19562 TEST_REQUIRES_X86_AVX;
19563 for (size_t k = 1; k <= 5; k += 2) {
19564 GemmMicrokernelTester()
19565 .mr(1)
19566 .nr(16)
19567 .kr(1)
19568 .sr(1)
19569 .m(1)
19570 .n(16)
19571 .k(k)
19572 .ks(3)
19573 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19574 }
19575 }
19576
TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST,small_kernel_subtile)19577 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, small_kernel_subtile) {
19578 TEST_REQUIRES_X86_AVX;
19579 for (size_t k = 1; k <= 5; k += 2) {
19580 for (uint32_t n = 1; n <= 16; n++) {
19581 for (uint32_t m = 1; m <= 1; m++) {
19582 GemmMicrokernelTester()
19583 .mr(1)
19584 .nr(16)
19585 .kr(1)
19586 .sr(1)
19587 .m(m)
19588 .n(n)
19589 .k(k)
19590 .ks(3)
19591 .iterations(1)
19592 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19593 }
19594 }
19595 }
19596 }
19597
TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST,n_gt_16_small_kernel)19598 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, n_gt_16_small_kernel) {
19599 TEST_REQUIRES_X86_AVX;
19600 for (uint32_t n = 17; n < 32; n++) {
19601 for (size_t k = 1; k <= 5; k += 2) {
19602 GemmMicrokernelTester()
19603 .mr(1)
19604 .nr(16)
19605 .kr(1)
19606 .sr(1)
19607 .m(1)
19608 .n(n)
19609 .k(k)
19610 .ks(3)
19611 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19612 }
19613 }
19614 }
19615
TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST,n_div_16_small_kernel)19616 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, n_div_16_small_kernel) {
19617 TEST_REQUIRES_X86_AVX;
19618 for (uint32_t n = 32; n <= 48; n += 16) {
19619 for (size_t k = 1; k <= 5; k += 2) {
19620 GemmMicrokernelTester()
19621 .mr(1)
19622 .nr(16)
19623 .kr(1)
19624 .sr(1)
19625 .m(1)
19626 .n(n)
19627 .k(k)
19628 .ks(3)
19629 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19630 }
19631 }
19632 }
19633
TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST,strided_cm_subtile)19634 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, strided_cm_subtile) {
19635 TEST_REQUIRES_X86_AVX;
19636 for (size_t k = 1; k <= 5; k += 2) {
19637 for (uint32_t n = 1; n <= 16; n++) {
19638 for (uint32_t m = 1; m <= 1; m++) {
19639 GemmMicrokernelTester()
19640 .mr(1)
19641 .nr(16)
19642 .kr(1)
19643 .sr(1)
19644 .m(m)
19645 .n(n)
19646 .k(k)
19647 .cm_stride(19)
19648 .iterations(1)
19649 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19650 }
19651 }
19652 }
19653 }
19654
TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST,a_offset)19655 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, a_offset) {
19656 TEST_REQUIRES_X86_AVX;
19657 for (size_t k = 1; k <= 5; k += 2) {
19658 GemmMicrokernelTester()
19659 .mr(1)
19660 .nr(16)
19661 .kr(1)
19662 .sr(1)
19663 .m(1)
19664 .n(16)
19665 .k(k)
19666 .ks(3)
19667 .a_offset(7)
19668 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19669 }
19670 }
19671
TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST,zero)19672 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, zero) {
19673 TEST_REQUIRES_X86_AVX;
19674 for (size_t k = 1; k <= 5; k += 2) {
19675 for (uint32_t mz = 0; mz < 1; mz++) {
19676 GemmMicrokernelTester()
19677 .mr(1)
19678 .nr(16)
19679 .kr(1)
19680 .sr(1)
19681 .m(1)
19682 .n(16)
19683 .k(k)
19684 .ks(3)
19685 .a_offset(7)
19686 .zero_index(mz)
19687 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19688 }
19689 }
19690 }
19691
TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST,qmin)19692 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, qmin) {
19693 TEST_REQUIRES_X86_AVX;
19694 GemmMicrokernelTester()
19695 .mr(1)
19696 .nr(16)
19697 .kr(1)
19698 .sr(1)
19699 .m(1)
19700 .n(16)
19701 .k(1)
19702 .qmin(128)
19703 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19704 }
19705
TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST,qmax)19706 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, qmax) {
19707 TEST_REQUIRES_X86_AVX;
19708 GemmMicrokernelTester()
19709 .mr(1)
19710 .nr(16)
19711 .kr(1)
19712 .sr(1)
19713 .m(1)
19714 .n(16)
19715 .k(1)
19716 .qmax(128)
19717 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19718 }
19719
TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST,strided_cm)19720 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, strided_cm) {
19721 TEST_REQUIRES_X86_AVX;
19722 GemmMicrokernelTester()
19723 .mr(1)
19724 .nr(16)
19725 .kr(1)
19726 .sr(1)
19727 .m(1)
19728 .n(16)
19729 .k(1)
19730 .cm_stride(19)
19731 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19732 }
19733 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19734
19735
19736 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST,k_eq_1)19737 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, k_eq_1) {
19738 TEST_REQUIRES_X86_AVX;
19739 GemmMicrokernelTester()
19740 .mr(4)
19741 .nr(16)
19742 .kr(1)
19743 .sr(1)
19744 .m(4)
19745 .n(16)
19746 .k(1)
19747 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19748 }
19749
TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST,strided_cn)19750 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, strided_cn) {
19751 TEST_REQUIRES_X86_AVX;
19752 GemmMicrokernelTester()
19753 .mr(4)
19754 .nr(16)
19755 .kr(1)
19756 .sr(1)
19757 .m(4)
19758 .n(16)
19759 .k(1)
19760 .cn_stride(19)
19761 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19762 }
19763
TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST,k_eq_1_subtile)19764 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, k_eq_1_subtile) {
19765 TEST_REQUIRES_X86_AVX;
19766 for (uint32_t n = 1; n <= 16; n++) {
19767 for (uint32_t m = 1; m <= 4; m++) {
19768 GemmMicrokernelTester()
19769 .mr(4)
19770 .nr(16)
19771 .kr(1)
19772 .sr(1)
19773 .m(m)
19774 .n(n)
19775 .k(1)
19776 .iterations(1)
19777 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19778 }
19779 }
19780 }
19781
TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST,k_eq_1_subtile_m)19782 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, k_eq_1_subtile_m) {
19783 TEST_REQUIRES_X86_AVX;
19784 for (uint32_t m = 1; m <= 4; m++) {
19785 GemmMicrokernelTester()
19786 .mr(4)
19787 .nr(16)
19788 .kr(1)
19789 .sr(1)
19790 .m(m)
19791 .n(16)
19792 .k(1)
19793 .iterations(1)
19794 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19795 }
19796 }
19797
TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST,k_eq_1_subtile_n)19798 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, k_eq_1_subtile_n) {
19799 TEST_REQUIRES_X86_AVX;
19800 for (uint32_t n = 1; n <= 16; n++) {
19801 GemmMicrokernelTester()
19802 .mr(4)
19803 .nr(16)
19804 .kr(1)
19805 .sr(1)
19806 .m(4)
19807 .n(n)
19808 .k(1)
19809 .iterations(1)
19810 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19811 }
19812 }
19813
TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST,k_gt_1)19814 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, k_gt_1) {
19815 TEST_REQUIRES_X86_AVX;
19816 for (size_t k = 2; k < 10; k++) {
19817 GemmMicrokernelTester()
19818 .mr(4)
19819 .nr(16)
19820 .kr(1)
19821 .sr(1)
19822 .m(4)
19823 .n(16)
19824 .k(k)
19825 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19826 }
19827 }
19828
TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST,k_gt_1_subtile)19829 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, k_gt_1_subtile) {
19830 TEST_REQUIRES_X86_AVX;
19831 for (size_t k = 2; k < 10; k++) {
19832 for (uint32_t n = 1; n <= 16; n++) {
19833 for (uint32_t m = 1; m <= 4; m++) {
19834 GemmMicrokernelTester()
19835 .mr(4)
19836 .nr(16)
19837 .kr(1)
19838 .sr(1)
19839 .m(m)
19840 .n(n)
19841 .k(k)
19842 .iterations(1)
19843 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19844 }
19845 }
19846 }
19847 }
19848
TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST,n_gt_16)19849 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, n_gt_16) {
19850 TEST_REQUIRES_X86_AVX;
19851 for (uint32_t n = 17; n < 32; n++) {
19852 for (size_t k = 1; k <= 5; k += 2) {
19853 GemmMicrokernelTester()
19854 .mr(4)
19855 .nr(16)
19856 .kr(1)
19857 .sr(1)
19858 .m(4)
19859 .n(n)
19860 .k(k)
19861 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19862 }
19863 }
19864 }
19865
TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST,n_gt_16_strided_cn)19866 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, n_gt_16_strided_cn) {
19867 TEST_REQUIRES_X86_AVX;
19868 for (uint32_t n = 17; n < 32; n++) {
19869 for (size_t k = 1; k <= 5; k += 2) {
19870 GemmMicrokernelTester()
19871 .mr(4)
19872 .nr(16)
19873 .kr(1)
19874 .sr(1)
19875 .m(4)
19876 .n(n)
19877 .k(k)
19878 .cn_stride(19)
19879 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19880 }
19881 }
19882 }
19883
TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST,n_gt_16_subtile)19884 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, n_gt_16_subtile) {
19885 TEST_REQUIRES_X86_AVX;
19886 for (uint32_t n = 17; n < 32; n++) {
19887 for (size_t k = 1; k <= 5; k += 2) {
19888 for (uint32_t m = 1; m <= 4; m++) {
19889 GemmMicrokernelTester()
19890 .mr(4)
19891 .nr(16)
19892 .kr(1)
19893 .sr(1)
19894 .m(m)
19895 .n(n)
19896 .k(k)
19897 .iterations(1)
19898 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19899 }
19900 }
19901 }
19902 }
19903
TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST,n_div_16)19904 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, n_div_16) {
19905 TEST_REQUIRES_X86_AVX;
19906 for (uint32_t n = 32; n <= 48; n += 16) {
19907 for (size_t k = 1; k <= 5; k += 2) {
19908 GemmMicrokernelTester()
19909 .mr(4)
19910 .nr(16)
19911 .kr(1)
19912 .sr(1)
19913 .m(4)
19914 .n(n)
19915 .k(k)
19916 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19917 }
19918 }
19919 }
19920
TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST,n_div_16_strided_cn)19921 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, n_div_16_strided_cn) {
19922 TEST_REQUIRES_X86_AVX;
19923 for (uint32_t n = 32; n <= 48; n += 16) {
19924 for (size_t k = 1; k <= 5; k += 2) {
19925 GemmMicrokernelTester()
19926 .mr(4)
19927 .nr(16)
19928 .kr(1)
19929 .sr(1)
19930 .m(4)
19931 .n(n)
19932 .k(k)
19933 .cn_stride(19)
19934 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19935 }
19936 }
19937 }
19938
TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST,n_div_16_subtile)19939 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, n_div_16_subtile) {
19940 TEST_REQUIRES_X86_AVX;
19941 for (uint32_t n = 32; n <= 48; n += 16) {
19942 for (size_t k = 1; k <= 5; k += 2) {
19943 for (uint32_t m = 1; m <= 4; m++) {
19944 GemmMicrokernelTester()
19945 .mr(4)
19946 .nr(16)
19947 .kr(1)
19948 .sr(1)
19949 .m(m)
19950 .n(n)
19951 .k(k)
19952 .iterations(1)
19953 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19954 }
19955 }
19956 }
19957 }
19958
TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST,small_kernel)19959 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, small_kernel) {
19960 TEST_REQUIRES_X86_AVX;
19961 for (size_t k = 1; k <= 5; k += 2) {
19962 GemmMicrokernelTester()
19963 .mr(4)
19964 .nr(16)
19965 .kr(1)
19966 .sr(1)
19967 .m(4)
19968 .n(16)
19969 .k(k)
19970 .ks(3)
19971 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19972 }
19973 }
19974
TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST,small_kernel_subtile)19975 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, small_kernel_subtile) {
19976 TEST_REQUIRES_X86_AVX;
19977 for (size_t k = 1; k <= 5; k += 2) {
19978 for (uint32_t n = 1; n <= 16; n++) {
19979 for (uint32_t m = 1; m <= 4; m++) {
19980 GemmMicrokernelTester()
19981 .mr(4)
19982 .nr(16)
19983 .kr(1)
19984 .sr(1)
19985 .m(m)
19986 .n(n)
19987 .k(k)
19988 .ks(3)
19989 .iterations(1)
19990 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19991 }
19992 }
19993 }
19994 }
19995
TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST,n_gt_16_small_kernel)19996 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, n_gt_16_small_kernel) {
19997 TEST_REQUIRES_X86_AVX;
19998 for (uint32_t n = 17; n < 32; n++) {
19999 for (size_t k = 1; k <= 5; k += 2) {
20000 GemmMicrokernelTester()
20001 .mr(4)
20002 .nr(16)
20003 .kr(1)
20004 .sr(1)
20005 .m(4)
20006 .n(n)
20007 .k(k)
20008 .ks(3)
20009 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20010 }
20011 }
20012 }
20013
TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST,n_div_16_small_kernel)20014 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, n_div_16_small_kernel) {
20015 TEST_REQUIRES_X86_AVX;
20016 for (uint32_t n = 32; n <= 48; n += 16) {
20017 for (size_t k = 1; k <= 5; k += 2) {
20018 GemmMicrokernelTester()
20019 .mr(4)
20020 .nr(16)
20021 .kr(1)
20022 .sr(1)
20023 .m(4)
20024 .n(n)
20025 .k(k)
20026 .ks(3)
20027 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20028 }
20029 }
20030 }
20031
TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST,strided_cm_subtile)20032 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, strided_cm_subtile) {
20033 TEST_REQUIRES_X86_AVX;
20034 for (size_t k = 1; k <= 5; k += 2) {
20035 for (uint32_t n = 1; n <= 16; n++) {
20036 for (uint32_t m = 1; m <= 4; m++) {
20037 GemmMicrokernelTester()
20038 .mr(4)
20039 .nr(16)
20040 .kr(1)
20041 .sr(1)
20042 .m(m)
20043 .n(n)
20044 .k(k)
20045 .cm_stride(19)
20046 .iterations(1)
20047 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20048 }
20049 }
20050 }
20051 }
20052
TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST,a_offset)20053 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, a_offset) {
20054 TEST_REQUIRES_X86_AVX;
20055 for (size_t k = 1; k <= 5; k += 2) {
20056 GemmMicrokernelTester()
20057 .mr(4)
20058 .nr(16)
20059 .kr(1)
20060 .sr(1)
20061 .m(4)
20062 .n(16)
20063 .k(k)
20064 .ks(3)
20065 .a_offset(23)
20066 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20067 }
20068 }
20069
TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST,zero)20070 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, zero) {
20071 TEST_REQUIRES_X86_AVX;
20072 for (size_t k = 1; k <= 5; k += 2) {
20073 for (uint32_t mz = 0; mz < 4; mz++) {
20074 GemmMicrokernelTester()
20075 .mr(4)
20076 .nr(16)
20077 .kr(1)
20078 .sr(1)
20079 .m(4)
20080 .n(16)
20081 .k(k)
20082 .ks(3)
20083 .a_offset(23)
20084 .zero_index(mz)
20085 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20086 }
20087 }
20088 }
20089
TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST,qmin)20090 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, qmin) {
20091 TEST_REQUIRES_X86_AVX;
20092 GemmMicrokernelTester()
20093 .mr(4)
20094 .nr(16)
20095 .kr(1)
20096 .sr(1)
20097 .m(4)
20098 .n(16)
20099 .k(1)
20100 .qmin(128)
20101 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20102 }
20103
TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST,qmax)20104 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, qmax) {
20105 TEST_REQUIRES_X86_AVX;
20106 GemmMicrokernelTester()
20107 .mr(4)
20108 .nr(16)
20109 .kr(1)
20110 .sr(1)
20111 .m(4)
20112 .n(16)
20113 .k(1)
20114 .qmax(128)
20115 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20116 }
20117
TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST,strided_cm)20118 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, strided_cm) {
20119 TEST_REQUIRES_X86_AVX;
20120 GemmMicrokernelTester()
20121 .mr(4)
20122 .nr(16)
20123 .kr(1)
20124 .sr(1)
20125 .m(4)
20126 .n(16)
20127 .k(1)
20128 .cm_stride(19)
20129 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20130 }
20131 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20132
20133
20134 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST,k_eq_1)20135 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, k_eq_1) {
20136 TEST_REQUIRES_X86_AVX;
20137 GemmMicrokernelTester()
20138 .mr(5)
20139 .nr(8)
20140 .kr(1)
20141 .sr(1)
20142 .m(5)
20143 .n(8)
20144 .k(1)
20145 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20146 }
20147
TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST,strided_cn)20148 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, strided_cn) {
20149 TEST_REQUIRES_X86_AVX;
20150 GemmMicrokernelTester()
20151 .mr(5)
20152 .nr(8)
20153 .kr(1)
20154 .sr(1)
20155 .m(5)
20156 .n(8)
20157 .k(1)
20158 .cn_stride(11)
20159 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20160 }
20161
TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST,k_eq_1_subtile)20162 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, k_eq_1_subtile) {
20163 TEST_REQUIRES_X86_AVX;
20164 for (uint32_t n = 1; n <= 8; n++) {
20165 for (uint32_t m = 1; m <= 5; m++) {
20166 GemmMicrokernelTester()
20167 .mr(5)
20168 .nr(8)
20169 .kr(1)
20170 .sr(1)
20171 .m(m)
20172 .n(n)
20173 .k(1)
20174 .iterations(1)
20175 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20176 }
20177 }
20178 }
20179
TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST,k_eq_1_subtile_m)20180 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, k_eq_1_subtile_m) {
20181 TEST_REQUIRES_X86_AVX;
20182 for (uint32_t m = 1; m <= 5; m++) {
20183 GemmMicrokernelTester()
20184 .mr(5)
20185 .nr(8)
20186 .kr(1)
20187 .sr(1)
20188 .m(m)
20189 .n(8)
20190 .k(1)
20191 .iterations(1)
20192 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20193 }
20194 }
20195
TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST,k_eq_1_subtile_n)20196 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, k_eq_1_subtile_n) {
20197 TEST_REQUIRES_X86_AVX;
20198 for (uint32_t n = 1; n <= 8; n++) {
20199 GemmMicrokernelTester()
20200 .mr(5)
20201 .nr(8)
20202 .kr(1)
20203 .sr(1)
20204 .m(5)
20205 .n(n)
20206 .k(1)
20207 .iterations(1)
20208 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20209 }
20210 }
20211
TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST,k_gt_1)20212 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, k_gt_1) {
20213 TEST_REQUIRES_X86_AVX;
20214 for (size_t k = 2; k < 10; k++) {
20215 GemmMicrokernelTester()
20216 .mr(5)
20217 .nr(8)
20218 .kr(1)
20219 .sr(1)
20220 .m(5)
20221 .n(8)
20222 .k(k)
20223 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20224 }
20225 }
20226
TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST,k_gt_1_subtile)20227 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, k_gt_1_subtile) {
20228 TEST_REQUIRES_X86_AVX;
20229 for (size_t k = 2; k < 10; k++) {
20230 for (uint32_t n = 1; n <= 8; n++) {
20231 for (uint32_t m = 1; m <= 5; m++) {
20232 GemmMicrokernelTester()
20233 .mr(5)
20234 .nr(8)
20235 .kr(1)
20236 .sr(1)
20237 .m(m)
20238 .n(n)
20239 .k(k)
20240 .iterations(1)
20241 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20242 }
20243 }
20244 }
20245 }
20246
TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST,n_gt_8)20247 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, n_gt_8) {
20248 TEST_REQUIRES_X86_AVX;
20249 for (uint32_t n = 9; n < 16; n++) {
20250 for (size_t k = 1; k <= 5; k += 2) {
20251 GemmMicrokernelTester()
20252 .mr(5)
20253 .nr(8)
20254 .kr(1)
20255 .sr(1)
20256 .m(5)
20257 .n(n)
20258 .k(k)
20259 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20260 }
20261 }
20262 }
20263
TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST,n_gt_8_strided_cn)20264 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, n_gt_8_strided_cn) {
20265 TEST_REQUIRES_X86_AVX;
20266 for (uint32_t n = 9; n < 16; n++) {
20267 for (size_t k = 1; k <= 5; k += 2) {
20268 GemmMicrokernelTester()
20269 .mr(5)
20270 .nr(8)
20271 .kr(1)
20272 .sr(1)
20273 .m(5)
20274 .n(n)
20275 .k(k)
20276 .cn_stride(11)
20277 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20278 }
20279 }
20280 }
20281
TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST,n_gt_8_subtile)20282 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, n_gt_8_subtile) {
20283 TEST_REQUIRES_X86_AVX;
20284 for (uint32_t n = 9; n < 16; n++) {
20285 for (size_t k = 1; k <= 5; k += 2) {
20286 for (uint32_t m = 1; m <= 5; m++) {
20287 GemmMicrokernelTester()
20288 .mr(5)
20289 .nr(8)
20290 .kr(1)
20291 .sr(1)
20292 .m(m)
20293 .n(n)
20294 .k(k)
20295 .iterations(1)
20296 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20297 }
20298 }
20299 }
20300 }
20301
TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST,n_div_8)20302 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, n_div_8) {
20303 TEST_REQUIRES_X86_AVX;
20304 for (uint32_t n = 16; n <= 24; n += 8) {
20305 for (size_t k = 1; k <= 5; k += 2) {
20306 GemmMicrokernelTester()
20307 .mr(5)
20308 .nr(8)
20309 .kr(1)
20310 .sr(1)
20311 .m(5)
20312 .n(n)
20313 .k(k)
20314 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20315 }
20316 }
20317 }
20318
TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST,n_div_8_strided_cn)20319 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, n_div_8_strided_cn) {
20320 TEST_REQUIRES_X86_AVX;
20321 for (uint32_t n = 16; n <= 24; n += 8) {
20322 for (size_t k = 1; k <= 5; k += 2) {
20323 GemmMicrokernelTester()
20324 .mr(5)
20325 .nr(8)
20326 .kr(1)
20327 .sr(1)
20328 .m(5)
20329 .n(n)
20330 .k(k)
20331 .cn_stride(11)
20332 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20333 }
20334 }
20335 }
20336
TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST,n_div_8_subtile)20337 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, n_div_8_subtile) {
20338 TEST_REQUIRES_X86_AVX;
20339 for (uint32_t n = 16; n <= 24; n += 8) {
20340 for (size_t k = 1; k <= 5; k += 2) {
20341 for (uint32_t m = 1; m <= 5; m++) {
20342 GemmMicrokernelTester()
20343 .mr(5)
20344 .nr(8)
20345 .kr(1)
20346 .sr(1)
20347 .m(m)
20348 .n(n)
20349 .k(k)
20350 .iterations(1)
20351 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20352 }
20353 }
20354 }
20355 }
20356
TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST,small_kernel)20357 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, small_kernel) {
20358 TEST_REQUIRES_X86_AVX;
20359 for (size_t k = 1; k <= 5; k += 2) {
20360 GemmMicrokernelTester()
20361 .mr(5)
20362 .nr(8)
20363 .kr(1)
20364 .sr(1)
20365 .m(5)
20366 .n(8)
20367 .k(k)
20368 .ks(3)
20369 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20370 }
20371 }
20372
TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST,small_kernel_subtile)20373 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, small_kernel_subtile) {
20374 TEST_REQUIRES_X86_AVX;
20375 for (size_t k = 1; k <= 5; k += 2) {
20376 for (uint32_t n = 1; n <= 8; n++) {
20377 for (uint32_t m = 1; m <= 5; m++) {
20378 GemmMicrokernelTester()
20379 .mr(5)
20380 .nr(8)
20381 .kr(1)
20382 .sr(1)
20383 .m(m)
20384 .n(n)
20385 .k(k)
20386 .ks(3)
20387 .iterations(1)
20388 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20389 }
20390 }
20391 }
20392 }
20393
TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST,n_gt_8_small_kernel)20394 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, n_gt_8_small_kernel) {
20395 TEST_REQUIRES_X86_AVX;
20396 for (uint32_t n = 9; n < 16; n++) {
20397 for (size_t k = 1; k <= 5; k += 2) {
20398 GemmMicrokernelTester()
20399 .mr(5)
20400 .nr(8)
20401 .kr(1)
20402 .sr(1)
20403 .m(5)
20404 .n(n)
20405 .k(k)
20406 .ks(3)
20407 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20408 }
20409 }
20410 }
20411
TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST,n_div_8_small_kernel)20412 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, n_div_8_small_kernel) {
20413 TEST_REQUIRES_X86_AVX;
20414 for (uint32_t n = 16; n <= 24; n += 8) {
20415 for (size_t k = 1; k <= 5; k += 2) {
20416 GemmMicrokernelTester()
20417 .mr(5)
20418 .nr(8)
20419 .kr(1)
20420 .sr(1)
20421 .m(5)
20422 .n(n)
20423 .k(k)
20424 .ks(3)
20425 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20426 }
20427 }
20428 }
20429
TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST,strided_cm_subtile)20430 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, strided_cm_subtile) {
20431 TEST_REQUIRES_X86_AVX;
20432 for (size_t k = 1; k <= 5; k += 2) {
20433 for (uint32_t n = 1; n <= 8; n++) {
20434 for (uint32_t m = 1; m <= 5; m++) {
20435 GemmMicrokernelTester()
20436 .mr(5)
20437 .nr(8)
20438 .kr(1)
20439 .sr(1)
20440 .m(m)
20441 .n(n)
20442 .k(k)
20443 .cm_stride(11)
20444 .iterations(1)
20445 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20446 }
20447 }
20448 }
20449 }
20450
TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST,a_offset)20451 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, a_offset) {
20452 TEST_REQUIRES_X86_AVX;
20453 for (size_t k = 1; k <= 5; k += 2) {
20454 GemmMicrokernelTester()
20455 .mr(5)
20456 .nr(8)
20457 .kr(1)
20458 .sr(1)
20459 .m(5)
20460 .n(8)
20461 .k(k)
20462 .ks(3)
20463 .a_offset(29)
20464 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20465 }
20466 }
20467
TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST,zero)20468 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, zero) {
20469 TEST_REQUIRES_X86_AVX;
20470 for (size_t k = 1; k <= 5; k += 2) {
20471 for (uint32_t mz = 0; mz < 5; mz++) {
20472 GemmMicrokernelTester()
20473 .mr(5)
20474 .nr(8)
20475 .kr(1)
20476 .sr(1)
20477 .m(5)
20478 .n(8)
20479 .k(k)
20480 .ks(3)
20481 .a_offset(29)
20482 .zero_index(mz)
20483 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20484 }
20485 }
20486 }
20487
TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST,qmin)20488 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, qmin) {
20489 TEST_REQUIRES_X86_AVX;
20490 GemmMicrokernelTester()
20491 .mr(5)
20492 .nr(8)
20493 .kr(1)
20494 .sr(1)
20495 .m(5)
20496 .n(8)
20497 .k(1)
20498 .qmin(128)
20499 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20500 }
20501
TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST,qmax)20502 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, qmax) {
20503 TEST_REQUIRES_X86_AVX;
20504 GemmMicrokernelTester()
20505 .mr(5)
20506 .nr(8)
20507 .kr(1)
20508 .sr(1)
20509 .m(5)
20510 .n(8)
20511 .k(1)
20512 .qmax(128)
20513 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20514 }
20515
TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST,strided_cm)20516 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, strided_cm) {
20517 TEST_REQUIRES_X86_AVX;
20518 GemmMicrokernelTester()
20519 .mr(5)
20520 .nr(8)
20521 .kr(1)
20522 .sr(1)
20523 .m(5)
20524 .n(8)
20525 .k(1)
20526 .cm_stride(11)
20527 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20528 }
20529 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20530
20531
20532 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST,k_eq_1)20533 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, k_eq_1) {
20534 TEST_REQUIRES_X86_AVX;
20535 GemmMicrokernelTester()
20536 .mr(5)
20537 .nr(16)
20538 .kr(1)
20539 .sr(1)
20540 .m(5)
20541 .n(16)
20542 .k(1)
20543 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20544 }
20545
TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST,strided_cn)20546 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, strided_cn) {
20547 TEST_REQUIRES_X86_AVX;
20548 GemmMicrokernelTester()
20549 .mr(5)
20550 .nr(16)
20551 .kr(1)
20552 .sr(1)
20553 .m(5)
20554 .n(16)
20555 .k(1)
20556 .cn_stride(19)
20557 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20558 }
20559
TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST,k_eq_1_subtile)20560 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, k_eq_1_subtile) {
20561 TEST_REQUIRES_X86_AVX;
20562 for (uint32_t n = 1; n <= 16; n++) {
20563 for (uint32_t m = 1; m <= 5; m++) {
20564 GemmMicrokernelTester()
20565 .mr(5)
20566 .nr(16)
20567 .kr(1)
20568 .sr(1)
20569 .m(m)
20570 .n(n)
20571 .k(1)
20572 .iterations(1)
20573 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20574 }
20575 }
20576 }
20577
TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST,k_eq_1_subtile_m)20578 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, k_eq_1_subtile_m) {
20579 TEST_REQUIRES_X86_AVX;
20580 for (uint32_t m = 1; m <= 5; m++) {
20581 GemmMicrokernelTester()
20582 .mr(5)
20583 .nr(16)
20584 .kr(1)
20585 .sr(1)
20586 .m(m)
20587 .n(16)
20588 .k(1)
20589 .iterations(1)
20590 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20591 }
20592 }
20593
TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST,k_eq_1_subtile_n)20594 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, k_eq_1_subtile_n) {
20595 TEST_REQUIRES_X86_AVX;
20596 for (uint32_t n = 1; n <= 16; n++) {
20597 GemmMicrokernelTester()
20598 .mr(5)
20599 .nr(16)
20600 .kr(1)
20601 .sr(1)
20602 .m(5)
20603 .n(n)
20604 .k(1)
20605 .iterations(1)
20606 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20607 }
20608 }
20609
TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST,k_gt_1)20610 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, k_gt_1) {
20611 TEST_REQUIRES_X86_AVX;
20612 for (size_t k = 2; k < 10; k++) {
20613 GemmMicrokernelTester()
20614 .mr(5)
20615 .nr(16)
20616 .kr(1)
20617 .sr(1)
20618 .m(5)
20619 .n(16)
20620 .k(k)
20621 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20622 }
20623 }
20624
TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST,k_gt_1_subtile)20625 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, k_gt_1_subtile) {
20626 TEST_REQUIRES_X86_AVX;
20627 for (size_t k = 2; k < 10; k++) {
20628 for (uint32_t n = 1; n <= 16; n++) {
20629 for (uint32_t m = 1; m <= 5; m++) {
20630 GemmMicrokernelTester()
20631 .mr(5)
20632 .nr(16)
20633 .kr(1)
20634 .sr(1)
20635 .m(m)
20636 .n(n)
20637 .k(k)
20638 .iterations(1)
20639 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20640 }
20641 }
20642 }
20643 }
20644
TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST,n_gt_16)20645 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, n_gt_16) {
20646 TEST_REQUIRES_X86_AVX;
20647 for (uint32_t n = 17; n < 32; n++) {
20648 for (size_t k = 1; k <= 5; k += 2) {
20649 GemmMicrokernelTester()
20650 .mr(5)
20651 .nr(16)
20652 .kr(1)
20653 .sr(1)
20654 .m(5)
20655 .n(n)
20656 .k(k)
20657 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20658 }
20659 }
20660 }
20661
TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST,n_gt_16_strided_cn)20662 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, n_gt_16_strided_cn) {
20663 TEST_REQUIRES_X86_AVX;
20664 for (uint32_t n = 17; n < 32; n++) {
20665 for (size_t k = 1; k <= 5; k += 2) {
20666 GemmMicrokernelTester()
20667 .mr(5)
20668 .nr(16)
20669 .kr(1)
20670 .sr(1)
20671 .m(5)
20672 .n(n)
20673 .k(k)
20674 .cn_stride(19)
20675 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20676 }
20677 }
20678 }
20679
TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST,n_gt_16_subtile)20680 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, n_gt_16_subtile) {
20681 TEST_REQUIRES_X86_AVX;
20682 for (uint32_t n = 17; n < 32; n++) {
20683 for (size_t k = 1; k <= 5; k += 2) {
20684 for (uint32_t m = 1; m <= 5; m++) {
20685 GemmMicrokernelTester()
20686 .mr(5)
20687 .nr(16)
20688 .kr(1)
20689 .sr(1)
20690 .m(m)
20691 .n(n)
20692 .k(k)
20693 .iterations(1)
20694 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20695 }
20696 }
20697 }
20698 }
20699
TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST,n_div_16)20700 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, n_div_16) {
20701 TEST_REQUIRES_X86_AVX;
20702 for (uint32_t n = 32; n <= 48; n += 16) {
20703 for (size_t k = 1; k <= 5; k += 2) {
20704 GemmMicrokernelTester()
20705 .mr(5)
20706 .nr(16)
20707 .kr(1)
20708 .sr(1)
20709 .m(5)
20710 .n(n)
20711 .k(k)
20712 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20713 }
20714 }
20715 }
20716
TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST,n_div_16_strided_cn)20717 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, n_div_16_strided_cn) {
20718 TEST_REQUIRES_X86_AVX;
20719 for (uint32_t n = 32; n <= 48; n += 16) {
20720 for (size_t k = 1; k <= 5; k += 2) {
20721 GemmMicrokernelTester()
20722 .mr(5)
20723 .nr(16)
20724 .kr(1)
20725 .sr(1)
20726 .m(5)
20727 .n(n)
20728 .k(k)
20729 .cn_stride(19)
20730 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20731 }
20732 }
20733 }
20734
TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST,n_div_16_subtile)20735 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, n_div_16_subtile) {
20736 TEST_REQUIRES_X86_AVX;
20737 for (uint32_t n = 32; n <= 48; n += 16) {
20738 for (size_t k = 1; k <= 5; k += 2) {
20739 for (uint32_t m = 1; m <= 5; m++) {
20740 GemmMicrokernelTester()
20741 .mr(5)
20742 .nr(16)
20743 .kr(1)
20744 .sr(1)
20745 .m(m)
20746 .n(n)
20747 .k(k)
20748 .iterations(1)
20749 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20750 }
20751 }
20752 }
20753 }
20754
TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST,small_kernel)20755 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, small_kernel) {
20756 TEST_REQUIRES_X86_AVX;
20757 for (size_t k = 1; k <= 5; k += 2) {
20758 GemmMicrokernelTester()
20759 .mr(5)
20760 .nr(16)
20761 .kr(1)
20762 .sr(1)
20763 .m(5)
20764 .n(16)
20765 .k(k)
20766 .ks(3)
20767 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20768 }
20769 }
20770
TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST,small_kernel_subtile)20771 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, small_kernel_subtile) {
20772 TEST_REQUIRES_X86_AVX;
20773 for (size_t k = 1; k <= 5; k += 2) {
20774 for (uint32_t n = 1; n <= 16; n++) {
20775 for (uint32_t m = 1; m <= 5; m++) {
20776 GemmMicrokernelTester()
20777 .mr(5)
20778 .nr(16)
20779 .kr(1)
20780 .sr(1)
20781 .m(m)
20782 .n(n)
20783 .k(k)
20784 .ks(3)
20785 .iterations(1)
20786 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20787 }
20788 }
20789 }
20790 }
20791
TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST,n_gt_16_small_kernel)20792 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, n_gt_16_small_kernel) {
20793 TEST_REQUIRES_X86_AVX;
20794 for (uint32_t n = 17; n < 32; n++) {
20795 for (size_t k = 1; k <= 5; k += 2) {
20796 GemmMicrokernelTester()
20797 .mr(5)
20798 .nr(16)
20799 .kr(1)
20800 .sr(1)
20801 .m(5)
20802 .n(n)
20803 .k(k)
20804 .ks(3)
20805 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20806 }
20807 }
20808 }
20809
TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST,n_div_16_small_kernel)20810 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, n_div_16_small_kernel) {
20811 TEST_REQUIRES_X86_AVX;
20812 for (uint32_t n = 32; n <= 48; n += 16) {
20813 for (size_t k = 1; k <= 5; k += 2) {
20814 GemmMicrokernelTester()
20815 .mr(5)
20816 .nr(16)
20817 .kr(1)
20818 .sr(1)
20819 .m(5)
20820 .n(n)
20821 .k(k)
20822 .ks(3)
20823 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20824 }
20825 }
20826 }
20827
TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST,strided_cm_subtile)20828 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, strided_cm_subtile) {
20829 TEST_REQUIRES_X86_AVX;
20830 for (size_t k = 1; k <= 5; k += 2) {
20831 for (uint32_t n = 1; n <= 16; n++) {
20832 for (uint32_t m = 1; m <= 5; m++) {
20833 GemmMicrokernelTester()
20834 .mr(5)
20835 .nr(16)
20836 .kr(1)
20837 .sr(1)
20838 .m(m)
20839 .n(n)
20840 .k(k)
20841 .cm_stride(19)
20842 .iterations(1)
20843 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20844 }
20845 }
20846 }
20847 }
20848
TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST,a_offset)20849 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, a_offset) {
20850 TEST_REQUIRES_X86_AVX;
20851 for (size_t k = 1; k <= 5; k += 2) {
20852 GemmMicrokernelTester()
20853 .mr(5)
20854 .nr(16)
20855 .kr(1)
20856 .sr(1)
20857 .m(5)
20858 .n(16)
20859 .k(k)
20860 .ks(3)
20861 .a_offset(29)
20862 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20863 }
20864 }
20865
TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST,zero)20866 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, zero) {
20867 TEST_REQUIRES_X86_AVX;
20868 for (size_t k = 1; k <= 5; k += 2) {
20869 for (uint32_t mz = 0; mz < 5; mz++) {
20870 GemmMicrokernelTester()
20871 .mr(5)
20872 .nr(16)
20873 .kr(1)
20874 .sr(1)
20875 .m(5)
20876 .n(16)
20877 .k(k)
20878 .ks(3)
20879 .a_offset(29)
20880 .zero_index(mz)
20881 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20882 }
20883 }
20884 }
20885
TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST,qmin)20886 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, qmin) {
20887 TEST_REQUIRES_X86_AVX;
20888 GemmMicrokernelTester()
20889 .mr(5)
20890 .nr(16)
20891 .kr(1)
20892 .sr(1)
20893 .m(5)
20894 .n(16)
20895 .k(1)
20896 .qmin(128)
20897 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20898 }
20899
TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST,qmax)20900 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, qmax) {
20901 TEST_REQUIRES_X86_AVX;
20902 GemmMicrokernelTester()
20903 .mr(5)
20904 .nr(16)
20905 .kr(1)
20906 .sr(1)
20907 .m(5)
20908 .n(16)
20909 .k(1)
20910 .qmax(128)
20911 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20912 }
20913
TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST,strided_cm)20914 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, strided_cm) {
20915 TEST_REQUIRES_X86_AVX;
20916 GemmMicrokernelTester()
20917 .mr(5)
20918 .nr(16)
20919 .kr(1)
20920 .sr(1)
20921 .m(5)
20922 .n(16)
20923 .k(1)
20924 .cm_stride(19)
20925 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
20926 }
20927 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20928
20929
20930 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST,k_eq_1)20931 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, k_eq_1) {
20932 TEST_REQUIRES_X86_AVX;
20933 GemmMicrokernelTester()
20934 .mr(6)
20935 .nr(8)
20936 .kr(1)
20937 .sr(1)
20938 .m(6)
20939 .n(8)
20940 .k(1)
20941 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20942 }
20943
TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST,strided_cn)20944 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, strided_cn) {
20945 TEST_REQUIRES_X86_AVX;
20946 GemmMicrokernelTester()
20947 .mr(6)
20948 .nr(8)
20949 .kr(1)
20950 .sr(1)
20951 .m(6)
20952 .n(8)
20953 .k(1)
20954 .cn_stride(11)
20955 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20956 }
20957
TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST,k_eq_1_subtile)20958 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, k_eq_1_subtile) {
20959 TEST_REQUIRES_X86_AVX;
20960 for (uint32_t n = 1; n <= 8; n++) {
20961 for (uint32_t m = 1; m <= 6; m++) {
20962 GemmMicrokernelTester()
20963 .mr(6)
20964 .nr(8)
20965 .kr(1)
20966 .sr(1)
20967 .m(m)
20968 .n(n)
20969 .k(1)
20970 .iterations(1)
20971 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20972 }
20973 }
20974 }
20975
TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST,k_eq_1_subtile_m)20976 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, k_eq_1_subtile_m) {
20977 TEST_REQUIRES_X86_AVX;
20978 for (uint32_t m = 1; m <= 6; m++) {
20979 GemmMicrokernelTester()
20980 .mr(6)
20981 .nr(8)
20982 .kr(1)
20983 .sr(1)
20984 .m(m)
20985 .n(8)
20986 .k(1)
20987 .iterations(1)
20988 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
20989 }
20990 }
20991
TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST,k_eq_1_subtile_n)20992 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, k_eq_1_subtile_n) {
20993 TEST_REQUIRES_X86_AVX;
20994 for (uint32_t n = 1; n <= 8; n++) {
20995 GemmMicrokernelTester()
20996 .mr(6)
20997 .nr(8)
20998 .kr(1)
20999 .sr(1)
21000 .m(6)
21001 .n(n)
21002 .k(1)
21003 .iterations(1)
21004 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21005 }
21006 }
21007
TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST,k_gt_1)21008 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, k_gt_1) {
21009 TEST_REQUIRES_X86_AVX;
21010 for (size_t k = 2; k < 10; k++) {
21011 GemmMicrokernelTester()
21012 .mr(6)
21013 .nr(8)
21014 .kr(1)
21015 .sr(1)
21016 .m(6)
21017 .n(8)
21018 .k(k)
21019 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21020 }
21021 }
21022
TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST,k_gt_1_subtile)21023 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, k_gt_1_subtile) {
21024 TEST_REQUIRES_X86_AVX;
21025 for (size_t k = 2; k < 10; k++) {
21026 for (uint32_t n = 1; n <= 8; n++) {
21027 for (uint32_t m = 1; m <= 6; m++) {
21028 GemmMicrokernelTester()
21029 .mr(6)
21030 .nr(8)
21031 .kr(1)
21032 .sr(1)
21033 .m(m)
21034 .n(n)
21035 .k(k)
21036 .iterations(1)
21037 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21038 }
21039 }
21040 }
21041 }
21042
TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST,n_gt_8)21043 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, n_gt_8) {
21044 TEST_REQUIRES_X86_AVX;
21045 for (uint32_t n = 9; n < 16; n++) {
21046 for (size_t k = 1; k <= 5; k += 2) {
21047 GemmMicrokernelTester()
21048 .mr(6)
21049 .nr(8)
21050 .kr(1)
21051 .sr(1)
21052 .m(6)
21053 .n(n)
21054 .k(k)
21055 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21056 }
21057 }
21058 }
21059
TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST,n_gt_8_strided_cn)21060 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, n_gt_8_strided_cn) {
21061 TEST_REQUIRES_X86_AVX;
21062 for (uint32_t n = 9; n < 16; n++) {
21063 for (size_t k = 1; k <= 5; k += 2) {
21064 GemmMicrokernelTester()
21065 .mr(6)
21066 .nr(8)
21067 .kr(1)
21068 .sr(1)
21069 .m(6)
21070 .n(n)
21071 .k(k)
21072 .cn_stride(11)
21073 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21074 }
21075 }
21076 }
21077
TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST,n_gt_8_subtile)21078 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, n_gt_8_subtile) {
21079 TEST_REQUIRES_X86_AVX;
21080 for (uint32_t n = 9; n < 16; n++) {
21081 for (size_t k = 1; k <= 5; k += 2) {
21082 for (uint32_t m = 1; m <= 6; m++) {
21083 GemmMicrokernelTester()
21084 .mr(6)
21085 .nr(8)
21086 .kr(1)
21087 .sr(1)
21088 .m(m)
21089 .n(n)
21090 .k(k)
21091 .iterations(1)
21092 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21093 }
21094 }
21095 }
21096 }
21097
TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST,n_div_8)21098 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, n_div_8) {
21099 TEST_REQUIRES_X86_AVX;
21100 for (uint32_t n = 16; n <= 24; n += 8) {
21101 for (size_t k = 1; k <= 5; k += 2) {
21102 GemmMicrokernelTester()
21103 .mr(6)
21104 .nr(8)
21105 .kr(1)
21106 .sr(1)
21107 .m(6)
21108 .n(n)
21109 .k(k)
21110 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21111 }
21112 }
21113 }
21114
TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST,n_div_8_strided_cn)21115 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, n_div_8_strided_cn) {
21116 TEST_REQUIRES_X86_AVX;
21117 for (uint32_t n = 16; n <= 24; n += 8) {
21118 for (size_t k = 1; k <= 5; k += 2) {
21119 GemmMicrokernelTester()
21120 .mr(6)
21121 .nr(8)
21122 .kr(1)
21123 .sr(1)
21124 .m(6)
21125 .n(n)
21126 .k(k)
21127 .cn_stride(11)
21128 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21129 }
21130 }
21131 }
21132
TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST,n_div_8_subtile)21133 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, n_div_8_subtile) {
21134 TEST_REQUIRES_X86_AVX;
21135 for (uint32_t n = 16; n <= 24; n += 8) {
21136 for (size_t k = 1; k <= 5; k += 2) {
21137 for (uint32_t m = 1; m <= 6; m++) {
21138 GemmMicrokernelTester()
21139 .mr(6)
21140 .nr(8)
21141 .kr(1)
21142 .sr(1)
21143 .m(m)
21144 .n(n)
21145 .k(k)
21146 .iterations(1)
21147 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21148 }
21149 }
21150 }
21151 }
21152
TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST,small_kernel)21153 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, small_kernel) {
21154 TEST_REQUIRES_X86_AVX;
21155 for (size_t k = 1; k <= 5; k += 2) {
21156 GemmMicrokernelTester()
21157 .mr(6)
21158 .nr(8)
21159 .kr(1)
21160 .sr(1)
21161 .m(6)
21162 .n(8)
21163 .k(k)
21164 .ks(3)
21165 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21166 }
21167 }
21168
TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST,small_kernel_subtile)21169 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, small_kernel_subtile) {
21170 TEST_REQUIRES_X86_AVX;
21171 for (size_t k = 1; k <= 5; k += 2) {
21172 for (uint32_t n = 1; n <= 8; n++) {
21173 for (uint32_t m = 1; m <= 6; m++) {
21174 GemmMicrokernelTester()
21175 .mr(6)
21176 .nr(8)
21177 .kr(1)
21178 .sr(1)
21179 .m(m)
21180 .n(n)
21181 .k(k)
21182 .ks(3)
21183 .iterations(1)
21184 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21185 }
21186 }
21187 }
21188 }
21189
TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST,n_gt_8_small_kernel)21190 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, n_gt_8_small_kernel) {
21191 TEST_REQUIRES_X86_AVX;
21192 for (uint32_t n = 9; n < 16; n++) {
21193 for (size_t k = 1; k <= 5; k += 2) {
21194 GemmMicrokernelTester()
21195 .mr(6)
21196 .nr(8)
21197 .kr(1)
21198 .sr(1)
21199 .m(6)
21200 .n(n)
21201 .k(k)
21202 .ks(3)
21203 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21204 }
21205 }
21206 }
21207
TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST,n_div_8_small_kernel)21208 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, n_div_8_small_kernel) {
21209 TEST_REQUIRES_X86_AVX;
21210 for (uint32_t n = 16; n <= 24; n += 8) {
21211 for (size_t k = 1; k <= 5; k += 2) {
21212 GemmMicrokernelTester()
21213 .mr(6)
21214 .nr(8)
21215 .kr(1)
21216 .sr(1)
21217 .m(6)
21218 .n(n)
21219 .k(k)
21220 .ks(3)
21221 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21222 }
21223 }
21224 }
21225
TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST,strided_cm_subtile)21226 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, strided_cm_subtile) {
21227 TEST_REQUIRES_X86_AVX;
21228 for (size_t k = 1; k <= 5; k += 2) {
21229 for (uint32_t n = 1; n <= 8; n++) {
21230 for (uint32_t m = 1; m <= 6; m++) {
21231 GemmMicrokernelTester()
21232 .mr(6)
21233 .nr(8)
21234 .kr(1)
21235 .sr(1)
21236 .m(m)
21237 .n(n)
21238 .k(k)
21239 .cm_stride(11)
21240 .iterations(1)
21241 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21242 }
21243 }
21244 }
21245 }
21246
TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST,a_offset)21247 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, a_offset) {
21248 TEST_REQUIRES_X86_AVX;
21249 for (size_t k = 1; k <= 5; k += 2) {
21250 GemmMicrokernelTester()
21251 .mr(6)
21252 .nr(8)
21253 .kr(1)
21254 .sr(1)
21255 .m(6)
21256 .n(8)
21257 .k(k)
21258 .ks(3)
21259 .a_offset(37)
21260 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21261 }
21262 }
21263
TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST,zero)21264 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, zero) {
21265 TEST_REQUIRES_X86_AVX;
21266 for (size_t k = 1; k <= 5; k += 2) {
21267 for (uint32_t mz = 0; mz < 6; mz++) {
21268 GemmMicrokernelTester()
21269 .mr(6)
21270 .nr(8)
21271 .kr(1)
21272 .sr(1)
21273 .m(6)
21274 .n(8)
21275 .k(k)
21276 .ks(3)
21277 .a_offset(37)
21278 .zero_index(mz)
21279 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21280 }
21281 }
21282 }
21283
TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST,qmin)21284 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, qmin) {
21285 TEST_REQUIRES_X86_AVX;
21286 GemmMicrokernelTester()
21287 .mr(6)
21288 .nr(8)
21289 .kr(1)
21290 .sr(1)
21291 .m(6)
21292 .n(8)
21293 .k(1)
21294 .qmin(128)
21295 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21296 }
21297
TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST,qmax)21298 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, qmax) {
21299 TEST_REQUIRES_X86_AVX;
21300 GemmMicrokernelTester()
21301 .mr(6)
21302 .nr(8)
21303 .kr(1)
21304 .sr(1)
21305 .m(6)
21306 .n(8)
21307 .k(1)
21308 .qmax(128)
21309 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21310 }
21311
TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST,strided_cm)21312 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, strided_cm) {
21313 TEST_REQUIRES_X86_AVX;
21314 GemmMicrokernelTester()
21315 .mr(6)
21316 .nr(8)
21317 .kr(1)
21318 .sr(1)
21319 .m(6)
21320 .n(8)
21321 .k(1)
21322 .cm_stride(11)
21323 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
21324 }
21325 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21326
21327
21328 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST,k_eq_1)21329 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, k_eq_1) {
21330 TEST_REQUIRES_X86_FMA3;
21331 GemmMicrokernelTester()
21332 .mr(4)
21333 .nr(16)
21334 .kr(1)
21335 .sr(1)
21336 .m(4)
21337 .n(16)
21338 .k(1)
21339 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21340 }
21341
TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST,strided_cn)21342 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, strided_cn) {
21343 TEST_REQUIRES_X86_FMA3;
21344 GemmMicrokernelTester()
21345 .mr(4)
21346 .nr(16)
21347 .kr(1)
21348 .sr(1)
21349 .m(4)
21350 .n(16)
21351 .k(1)
21352 .cn_stride(19)
21353 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21354 }
21355
TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST,k_eq_1_subtile)21356 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, k_eq_1_subtile) {
21357 TEST_REQUIRES_X86_FMA3;
21358 for (uint32_t n = 1; n <= 16; n++) {
21359 for (uint32_t m = 1; m <= 4; m++) {
21360 GemmMicrokernelTester()
21361 .mr(4)
21362 .nr(16)
21363 .kr(1)
21364 .sr(1)
21365 .m(m)
21366 .n(n)
21367 .k(1)
21368 .iterations(1)
21369 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21370 }
21371 }
21372 }
21373
TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST,k_eq_1_subtile_m)21374 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, k_eq_1_subtile_m) {
21375 TEST_REQUIRES_X86_FMA3;
21376 for (uint32_t m = 1; m <= 4; m++) {
21377 GemmMicrokernelTester()
21378 .mr(4)
21379 .nr(16)
21380 .kr(1)
21381 .sr(1)
21382 .m(m)
21383 .n(16)
21384 .k(1)
21385 .iterations(1)
21386 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21387 }
21388 }
21389
TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST,k_eq_1_subtile_n)21390 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, k_eq_1_subtile_n) {
21391 TEST_REQUIRES_X86_FMA3;
21392 for (uint32_t n = 1; n <= 16; n++) {
21393 GemmMicrokernelTester()
21394 .mr(4)
21395 .nr(16)
21396 .kr(1)
21397 .sr(1)
21398 .m(4)
21399 .n(n)
21400 .k(1)
21401 .iterations(1)
21402 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21403 }
21404 }
21405
TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST,k_gt_1)21406 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, k_gt_1) {
21407 TEST_REQUIRES_X86_FMA3;
21408 for (size_t k = 2; k < 10; k++) {
21409 GemmMicrokernelTester()
21410 .mr(4)
21411 .nr(16)
21412 .kr(1)
21413 .sr(1)
21414 .m(4)
21415 .n(16)
21416 .k(k)
21417 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21418 }
21419 }
21420
TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST,k_gt_1_subtile)21421 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, k_gt_1_subtile) {
21422 TEST_REQUIRES_X86_FMA3;
21423 for (size_t k = 2; k < 10; k++) {
21424 for (uint32_t n = 1; n <= 16; n++) {
21425 for (uint32_t m = 1; m <= 4; m++) {
21426 GemmMicrokernelTester()
21427 .mr(4)
21428 .nr(16)
21429 .kr(1)
21430 .sr(1)
21431 .m(m)
21432 .n(n)
21433 .k(k)
21434 .iterations(1)
21435 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21436 }
21437 }
21438 }
21439 }
21440
TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST,n_gt_16)21441 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, n_gt_16) {
21442 TEST_REQUIRES_X86_FMA3;
21443 for (uint32_t n = 17; n < 32; n++) {
21444 for (size_t k = 1; k <= 5; k += 2) {
21445 GemmMicrokernelTester()
21446 .mr(4)
21447 .nr(16)
21448 .kr(1)
21449 .sr(1)
21450 .m(4)
21451 .n(n)
21452 .k(k)
21453 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21454 }
21455 }
21456 }
21457
TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST,n_gt_16_strided_cn)21458 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, n_gt_16_strided_cn) {
21459 TEST_REQUIRES_X86_FMA3;
21460 for (uint32_t n = 17; n < 32; n++) {
21461 for (size_t k = 1; k <= 5; k += 2) {
21462 GemmMicrokernelTester()
21463 .mr(4)
21464 .nr(16)
21465 .kr(1)
21466 .sr(1)
21467 .m(4)
21468 .n(n)
21469 .k(k)
21470 .cn_stride(19)
21471 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21472 }
21473 }
21474 }
21475
TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST,n_gt_16_subtile)21476 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, n_gt_16_subtile) {
21477 TEST_REQUIRES_X86_FMA3;
21478 for (uint32_t n = 17; n < 32; n++) {
21479 for (size_t k = 1; k <= 5; k += 2) {
21480 for (uint32_t m = 1; m <= 4; m++) {
21481 GemmMicrokernelTester()
21482 .mr(4)
21483 .nr(16)
21484 .kr(1)
21485 .sr(1)
21486 .m(m)
21487 .n(n)
21488 .k(k)
21489 .iterations(1)
21490 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21491 }
21492 }
21493 }
21494 }
21495
TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST,n_div_16)21496 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, n_div_16) {
21497 TEST_REQUIRES_X86_FMA3;
21498 for (uint32_t n = 32; n <= 48; n += 16) {
21499 for (size_t k = 1; k <= 5; k += 2) {
21500 GemmMicrokernelTester()
21501 .mr(4)
21502 .nr(16)
21503 .kr(1)
21504 .sr(1)
21505 .m(4)
21506 .n(n)
21507 .k(k)
21508 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21509 }
21510 }
21511 }
21512
TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST,n_div_16_strided_cn)21513 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, n_div_16_strided_cn) {
21514 TEST_REQUIRES_X86_FMA3;
21515 for (uint32_t n = 32; n <= 48; n += 16) {
21516 for (size_t k = 1; k <= 5; k += 2) {
21517 GemmMicrokernelTester()
21518 .mr(4)
21519 .nr(16)
21520 .kr(1)
21521 .sr(1)
21522 .m(4)
21523 .n(n)
21524 .k(k)
21525 .cn_stride(19)
21526 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21527 }
21528 }
21529 }
21530
TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST,n_div_16_subtile)21531 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, n_div_16_subtile) {
21532 TEST_REQUIRES_X86_FMA3;
21533 for (uint32_t n = 32; n <= 48; n += 16) {
21534 for (size_t k = 1; k <= 5; k += 2) {
21535 for (uint32_t m = 1; m <= 4; m++) {
21536 GemmMicrokernelTester()
21537 .mr(4)
21538 .nr(16)
21539 .kr(1)
21540 .sr(1)
21541 .m(m)
21542 .n(n)
21543 .k(k)
21544 .iterations(1)
21545 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21546 }
21547 }
21548 }
21549 }
21550
TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST,small_kernel)21551 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, small_kernel) {
21552 TEST_REQUIRES_X86_FMA3;
21553 for (size_t k = 1; k <= 5; k += 2) {
21554 GemmMicrokernelTester()
21555 .mr(4)
21556 .nr(16)
21557 .kr(1)
21558 .sr(1)
21559 .m(4)
21560 .n(16)
21561 .k(k)
21562 .ks(3)
21563 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21564 }
21565 }
21566
TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST,small_kernel_subtile)21567 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, small_kernel_subtile) {
21568 TEST_REQUIRES_X86_FMA3;
21569 for (size_t k = 1; k <= 5; k += 2) {
21570 for (uint32_t n = 1; n <= 16; n++) {
21571 for (uint32_t m = 1; m <= 4; m++) {
21572 GemmMicrokernelTester()
21573 .mr(4)
21574 .nr(16)
21575 .kr(1)
21576 .sr(1)
21577 .m(m)
21578 .n(n)
21579 .k(k)
21580 .ks(3)
21581 .iterations(1)
21582 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21583 }
21584 }
21585 }
21586 }
21587
TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST,n_gt_16_small_kernel)21588 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, n_gt_16_small_kernel) {
21589 TEST_REQUIRES_X86_FMA3;
21590 for (uint32_t n = 17; n < 32; n++) {
21591 for (size_t k = 1; k <= 5; k += 2) {
21592 GemmMicrokernelTester()
21593 .mr(4)
21594 .nr(16)
21595 .kr(1)
21596 .sr(1)
21597 .m(4)
21598 .n(n)
21599 .k(k)
21600 .ks(3)
21601 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21602 }
21603 }
21604 }
21605
TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST,n_div_16_small_kernel)21606 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, n_div_16_small_kernel) {
21607 TEST_REQUIRES_X86_FMA3;
21608 for (uint32_t n = 32; n <= 48; n += 16) {
21609 for (size_t k = 1; k <= 5; k += 2) {
21610 GemmMicrokernelTester()
21611 .mr(4)
21612 .nr(16)
21613 .kr(1)
21614 .sr(1)
21615 .m(4)
21616 .n(n)
21617 .k(k)
21618 .ks(3)
21619 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21620 }
21621 }
21622 }
21623
TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST,strided_cm_subtile)21624 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, strided_cm_subtile) {
21625 TEST_REQUIRES_X86_FMA3;
21626 for (size_t k = 1; k <= 5; k += 2) {
21627 for (uint32_t n = 1; n <= 16; n++) {
21628 for (uint32_t m = 1; m <= 4; m++) {
21629 GemmMicrokernelTester()
21630 .mr(4)
21631 .nr(16)
21632 .kr(1)
21633 .sr(1)
21634 .m(m)
21635 .n(n)
21636 .k(k)
21637 .cm_stride(19)
21638 .iterations(1)
21639 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21640 }
21641 }
21642 }
21643 }
21644
TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST,a_offset)21645 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, a_offset) {
21646 TEST_REQUIRES_X86_FMA3;
21647 for (size_t k = 1; k <= 5; k += 2) {
21648 GemmMicrokernelTester()
21649 .mr(4)
21650 .nr(16)
21651 .kr(1)
21652 .sr(1)
21653 .m(4)
21654 .n(16)
21655 .k(k)
21656 .ks(3)
21657 .a_offset(23)
21658 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21659 }
21660 }
21661
TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST,zero)21662 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, zero) {
21663 TEST_REQUIRES_X86_FMA3;
21664 for (size_t k = 1; k <= 5; k += 2) {
21665 for (uint32_t mz = 0; mz < 4; mz++) {
21666 GemmMicrokernelTester()
21667 .mr(4)
21668 .nr(16)
21669 .kr(1)
21670 .sr(1)
21671 .m(4)
21672 .n(16)
21673 .k(k)
21674 .ks(3)
21675 .a_offset(23)
21676 .zero_index(mz)
21677 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21678 }
21679 }
21680 }
21681
TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST,qmin)21682 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, qmin) {
21683 TEST_REQUIRES_X86_FMA3;
21684 GemmMicrokernelTester()
21685 .mr(4)
21686 .nr(16)
21687 .kr(1)
21688 .sr(1)
21689 .m(4)
21690 .n(16)
21691 .k(1)
21692 .qmin(128)
21693 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21694 }
21695
TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST,qmax)21696 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, qmax) {
21697 TEST_REQUIRES_X86_FMA3;
21698 GemmMicrokernelTester()
21699 .mr(4)
21700 .nr(16)
21701 .kr(1)
21702 .sr(1)
21703 .m(4)
21704 .n(16)
21705 .k(1)
21706 .qmax(128)
21707 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21708 }
21709
TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST,strided_cm)21710 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, strided_cm) {
21711 TEST_REQUIRES_X86_FMA3;
21712 GemmMicrokernelTester()
21713 .mr(4)
21714 .nr(16)
21715 .kr(1)
21716 .sr(1)
21717 .m(4)
21718 .n(16)
21719 .k(1)
21720 .cm_stride(19)
21721 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21722 }
21723 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21724
21725
21726 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST,k_eq_1)21727 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, k_eq_1) {
21728 TEST_REQUIRES_X86_FMA3;
21729 GemmMicrokernelTester()
21730 .mr(5)
21731 .nr(16)
21732 .kr(1)
21733 .sr(1)
21734 .m(5)
21735 .n(16)
21736 .k(1)
21737 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21738 }
21739
TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST,strided_cn)21740 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, strided_cn) {
21741 TEST_REQUIRES_X86_FMA3;
21742 GemmMicrokernelTester()
21743 .mr(5)
21744 .nr(16)
21745 .kr(1)
21746 .sr(1)
21747 .m(5)
21748 .n(16)
21749 .k(1)
21750 .cn_stride(19)
21751 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21752 }
21753
TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST,k_eq_1_subtile)21754 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, k_eq_1_subtile) {
21755 TEST_REQUIRES_X86_FMA3;
21756 for (uint32_t n = 1; n <= 16; n++) {
21757 for (uint32_t m = 1; m <= 5; m++) {
21758 GemmMicrokernelTester()
21759 .mr(5)
21760 .nr(16)
21761 .kr(1)
21762 .sr(1)
21763 .m(m)
21764 .n(n)
21765 .k(1)
21766 .iterations(1)
21767 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21768 }
21769 }
21770 }
21771
TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST,k_eq_1_subtile_m)21772 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, k_eq_1_subtile_m) {
21773 TEST_REQUIRES_X86_FMA3;
21774 for (uint32_t m = 1; m <= 5; m++) {
21775 GemmMicrokernelTester()
21776 .mr(5)
21777 .nr(16)
21778 .kr(1)
21779 .sr(1)
21780 .m(m)
21781 .n(16)
21782 .k(1)
21783 .iterations(1)
21784 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21785 }
21786 }
21787
TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST,k_eq_1_subtile_n)21788 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, k_eq_1_subtile_n) {
21789 TEST_REQUIRES_X86_FMA3;
21790 for (uint32_t n = 1; n <= 16; n++) {
21791 GemmMicrokernelTester()
21792 .mr(5)
21793 .nr(16)
21794 .kr(1)
21795 .sr(1)
21796 .m(5)
21797 .n(n)
21798 .k(1)
21799 .iterations(1)
21800 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21801 }
21802 }
21803
TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST,k_gt_1)21804 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, k_gt_1) {
21805 TEST_REQUIRES_X86_FMA3;
21806 for (size_t k = 2; k < 10; k++) {
21807 GemmMicrokernelTester()
21808 .mr(5)
21809 .nr(16)
21810 .kr(1)
21811 .sr(1)
21812 .m(5)
21813 .n(16)
21814 .k(k)
21815 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21816 }
21817 }
21818
TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST,k_gt_1_subtile)21819 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, k_gt_1_subtile) {
21820 TEST_REQUIRES_X86_FMA3;
21821 for (size_t k = 2; k < 10; k++) {
21822 for (uint32_t n = 1; n <= 16; n++) {
21823 for (uint32_t m = 1; m <= 5; m++) {
21824 GemmMicrokernelTester()
21825 .mr(5)
21826 .nr(16)
21827 .kr(1)
21828 .sr(1)
21829 .m(m)
21830 .n(n)
21831 .k(k)
21832 .iterations(1)
21833 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21834 }
21835 }
21836 }
21837 }
21838
TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST,n_gt_16)21839 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, n_gt_16) {
21840 TEST_REQUIRES_X86_FMA3;
21841 for (uint32_t n = 17; n < 32; n++) {
21842 for (size_t k = 1; k <= 5; k += 2) {
21843 GemmMicrokernelTester()
21844 .mr(5)
21845 .nr(16)
21846 .kr(1)
21847 .sr(1)
21848 .m(5)
21849 .n(n)
21850 .k(k)
21851 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21852 }
21853 }
21854 }
21855
TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST,n_gt_16_strided_cn)21856 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, n_gt_16_strided_cn) {
21857 TEST_REQUIRES_X86_FMA3;
21858 for (uint32_t n = 17; n < 32; n++) {
21859 for (size_t k = 1; k <= 5; k += 2) {
21860 GemmMicrokernelTester()
21861 .mr(5)
21862 .nr(16)
21863 .kr(1)
21864 .sr(1)
21865 .m(5)
21866 .n(n)
21867 .k(k)
21868 .cn_stride(19)
21869 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21870 }
21871 }
21872 }
21873
TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST,n_gt_16_subtile)21874 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, n_gt_16_subtile) {
21875 TEST_REQUIRES_X86_FMA3;
21876 for (uint32_t n = 17; n < 32; n++) {
21877 for (size_t k = 1; k <= 5; k += 2) {
21878 for (uint32_t m = 1; m <= 5; m++) {
21879 GemmMicrokernelTester()
21880 .mr(5)
21881 .nr(16)
21882 .kr(1)
21883 .sr(1)
21884 .m(m)
21885 .n(n)
21886 .k(k)
21887 .iterations(1)
21888 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21889 }
21890 }
21891 }
21892 }
21893
TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST,n_div_16)21894 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, n_div_16) {
21895 TEST_REQUIRES_X86_FMA3;
21896 for (uint32_t n = 32; n <= 48; n += 16) {
21897 for (size_t k = 1; k <= 5; k += 2) {
21898 GemmMicrokernelTester()
21899 .mr(5)
21900 .nr(16)
21901 .kr(1)
21902 .sr(1)
21903 .m(5)
21904 .n(n)
21905 .k(k)
21906 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21907 }
21908 }
21909 }
21910
TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST,n_div_16_strided_cn)21911 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, n_div_16_strided_cn) {
21912 TEST_REQUIRES_X86_FMA3;
21913 for (uint32_t n = 32; n <= 48; n += 16) {
21914 for (size_t k = 1; k <= 5; k += 2) {
21915 GemmMicrokernelTester()
21916 .mr(5)
21917 .nr(16)
21918 .kr(1)
21919 .sr(1)
21920 .m(5)
21921 .n(n)
21922 .k(k)
21923 .cn_stride(19)
21924 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21925 }
21926 }
21927 }
21928
TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST,n_div_16_subtile)21929 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, n_div_16_subtile) {
21930 TEST_REQUIRES_X86_FMA3;
21931 for (uint32_t n = 32; n <= 48; n += 16) {
21932 for (size_t k = 1; k <= 5; k += 2) {
21933 for (uint32_t m = 1; m <= 5; m++) {
21934 GemmMicrokernelTester()
21935 .mr(5)
21936 .nr(16)
21937 .kr(1)
21938 .sr(1)
21939 .m(m)
21940 .n(n)
21941 .k(k)
21942 .iterations(1)
21943 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21944 }
21945 }
21946 }
21947 }
21948
TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST,small_kernel)21949 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, small_kernel) {
21950 TEST_REQUIRES_X86_FMA3;
21951 for (size_t k = 1; k <= 5; k += 2) {
21952 GemmMicrokernelTester()
21953 .mr(5)
21954 .nr(16)
21955 .kr(1)
21956 .sr(1)
21957 .m(5)
21958 .n(16)
21959 .k(k)
21960 .ks(3)
21961 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21962 }
21963 }
21964
TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST,small_kernel_subtile)21965 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, small_kernel_subtile) {
21966 TEST_REQUIRES_X86_FMA3;
21967 for (size_t k = 1; k <= 5; k += 2) {
21968 for (uint32_t n = 1; n <= 16; n++) {
21969 for (uint32_t m = 1; m <= 5; m++) {
21970 GemmMicrokernelTester()
21971 .mr(5)
21972 .nr(16)
21973 .kr(1)
21974 .sr(1)
21975 .m(m)
21976 .n(n)
21977 .k(k)
21978 .ks(3)
21979 .iterations(1)
21980 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21981 }
21982 }
21983 }
21984 }
21985
TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST,n_gt_16_small_kernel)21986 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, n_gt_16_small_kernel) {
21987 TEST_REQUIRES_X86_FMA3;
21988 for (uint32_t n = 17; n < 32; n++) {
21989 for (size_t k = 1; k <= 5; k += 2) {
21990 GemmMicrokernelTester()
21991 .mr(5)
21992 .nr(16)
21993 .kr(1)
21994 .sr(1)
21995 .m(5)
21996 .n(n)
21997 .k(k)
21998 .ks(3)
21999 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22000 }
22001 }
22002 }
22003
TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST,n_div_16_small_kernel)22004 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, n_div_16_small_kernel) {
22005 TEST_REQUIRES_X86_FMA3;
22006 for (uint32_t n = 32; n <= 48; n += 16) {
22007 for (size_t k = 1; k <= 5; k += 2) {
22008 GemmMicrokernelTester()
22009 .mr(5)
22010 .nr(16)
22011 .kr(1)
22012 .sr(1)
22013 .m(5)
22014 .n(n)
22015 .k(k)
22016 .ks(3)
22017 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22018 }
22019 }
22020 }
22021
TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST,strided_cm_subtile)22022 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, strided_cm_subtile) {
22023 TEST_REQUIRES_X86_FMA3;
22024 for (size_t k = 1; k <= 5; k += 2) {
22025 for (uint32_t n = 1; n <= 16; n++) {
22026 for (uint32_t m = 1; m <= 5; m++) {
22027 GemmMicrokernelTester()
22028 .mr(5)
22029 .nr(16)
22030 .kr(1)
22031 .sr(1)
22032 .m(m)
22033 .n(n)
22034 .k(k)
22035 .cm_stride(19)
22036 .iterations(1)
22037 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22038 }
22039 }
22040 }
22041 }
22042
TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST,a_offset)22043 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, a_offset) {
22044 TEST_REQUIRES_X86_FMA3;
22045 for (size_t k = 1; k <= 5; k += 2) {
22046 GemmMicrokernelTester()
22047 .mr(5)
22048 .nr(16)
22049 .kr(1)
22050 .sr(1)
22051 .m(5)
22052 .n(16)
22053 .k(k)
22054 .ks(3)
22055 .a_offset(29)
22056 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22057 }
22058 }
22059
TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST,zero)22060 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, zero) {
22061 TEST_REQUIRES_X86_FMA3;
22062 for (size_t k = 1; k <= 5; k += 2) {
22063 for (uint32_t mz = 0; mz < 5; mz++) {
22064 GemmMicrokernelTester()
22065 .mr(5)
22066 .nr(16)
22067 .kr(1)
22068 .sr(1)
22069 .m(5)
22070 .n(16)
22071 .k(k)
22072 .ks(3)
22073 .a_offset(29)
22074 .zero_index(mz)
22075 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22076 }
22077 }
22078 }
22079
TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST,qmin)22080 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, qmin) {
22081 TEST_REQUIRES_X86_FMA3;
22082 GemmMicrokernelTester()
22083 .mr(5)
22084 .nr(16)
22085 .kr(1)
22086 .sr(1)
22087 .m(5)
22088 .n(16)
22089 .k(1)
22090 .qmin(128)
22091 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22092 }
22093
TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST,qmax)22094 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, qmax) {
22095 TEST_REQUIRES_X86_FMA3;
22096 GemmMicrokernelTester()
22097 .mr(5)
22098 .nr(16)
22099 .kr(1)
22100 .sr(1)
22101 .m(5)
22102 .n(16)
22103 .k(1)
22104 .qmax(128)
22105 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22106 }
22107
TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST,strided_cm)22108 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, strided_cm) {
22109 TEST_REQUIRES_X86_FMA3;
22110 GemmMicrokernelTester()
22111 .mr(5)
22112 .nr(16)
22113 .kr(1)
22114 .sr(1)
22115 .m(5)
22116 .n(16)
22117 .k(1)
22118 .cm_stride(19)
22119 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22120 }
22121 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22122
22123
22124 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST,k_eq_1)22125 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, k_eq_1) {
22126 TEST_REQUIRES_X86_FMA3;
22127 GemmMicrokernelTester()
22128 .mr(6)
22129 .nr(8)
22130 .kr(1)
22131 .sr(1)
22132 .m(6)
22133 .n(8)
22134 .k(1)
22135 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22136 }
22137
TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST,strided_cn)22138 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, strided_cn) {
22139 TEST_REQUIRES_X86_FMA3;
22140 GemmMicrokernelTester()
22141 .mr(6)
22142 .nr(8)
22143 .kr(1)
22144 .sr(1)
22145 .m(6)
22146 .n(8)
22147 .k(1)
22148 .cn_stride(11)
22149 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22150 }
22151
TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST,k_eq_1_subtile)22152 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, k_eq_1_subtile) {
22153 TEST_REQUIRES_X86_FMA3;
22154 for (uint32_t n = 1; n <= 8; n++) {
22155 for (uint32_t m = 1; m <= 6; m++) {
22156 GemmMicrokernelTester()
22157 .mr(6)
22158 .nr(8)
22159 .kr(1)
22160 .sr(1)
22161 .m(m)
22162 .n(n)
22163 .k(1)
22164 .iterations(1)
22165 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22166 }
22167 }
22168 }
22169
TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST,k_eq_1_subtile_m)22170 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
22171 TEST_REQUIRES_X86_FMA3;
22172 for (uint32_t m = 1; m <= 6; m++) {
22173 GemmMicrokernelTester()
22174 .mr(6)
22175 .nr(8)
22176 .kr(1)
22177 .sr(1)
22178 .m(m)
22179 .n(8)
22180 .k(1)
22181 .iterations(1)
22182 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22183 }
22184 }
22185
TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST,k_eq_1_subtile_n)22186 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
22187 TEST_REQUIRES_X86_FMA3;
22188 for (uint32_t n = 1; n <= 8; n++) {
22189 GemmMicrokernelTester()
22190 .mr(6)
22191 .nr(8)
22192 .kr(1)
22193 .sr(1)
22194 .m(6)
22195 .n(n)
22196 .k(1)
22197 .iterations(1)
22198 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22199 }
22200 }
22201
TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST,k_gt_1)22202 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, k_gt_1) {
22203 TEST_REQUIRES_X86_FMA3;
22204 for (size_t k = 2; k < 10; k++) {
22205 GemmMicrokernelTester()
22206 .mr(6)
22207 .nr(8)
22208 .kr(1)
22209 .sr(1)
22210 .m(6)
22211 .n(8)
22212 .k(k)
22213 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22214 }
22215 }
22216
TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST,k_gt_1_subtile)22217 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, k_gt_1_subtile) {
22218 TEST_REQUIRES_X86_FMA3;
22219 for (size_t k = 2; k < 10; k++) {
22220 for (uint32_t n = 1; n <= 8; n++) {
22221 for (uint32_t m = 1; m <= 6; m++) {
22222 GemmMicrokernelTester()
22223 .mr(6)
22224 .nr(8)
22225 .kr(1)
22226 .sr(1)
22227 .m(m)
22228 .n(n)
22229 .k(k)
22230 .iterations(1)
22231 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22232 }
22233 }
22234 }
22235 }
22236
TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST,n_gt_8)22237 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, n_gt_8) {
22238 TEST_REQUIRES_X86_FMA3;
22239 for (uint32_t n = 9; n < 16; n++) {
22240 for (size_t k = 1; k <= 5; k += 2) {
22241 GemmMicrokernelTester()
22242 .mr(6)
22243 .nr(8)
22244 .kr(1)
22245 .sr(1)
22246 .m(6)
22247 .n(n)
22248 .k(k)
22249 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22250 }
22251 }
22252 }
22253
TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST,n_gt_8_strided_cn)22254 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
22255 TEST_REQUIRES_X86_FMA3;
22256 for (uint32_t n = 9; n < 16; n++) {
22257 for (size_t k = 1; k <= 5; k += 2) {
22258 GemmMicrokernelTester()
22259 .mr(6)
22260 .nr(8)
22261 .kr(1)
22262 .sr(1)
22263 .m(6)
22264 .n(n)
22265 .k(k)
22266 .cn_stride(11)
22267 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22268 }
22269 }
22270 }
22271
TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST,n_gt_8_subtile)22272 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, n_gt_8_subtile) {
22273 TEST_REQUIRES_X86_FMA3;
22274 for (uint32_t n = 9; n < 16; n++) {
22275 for (size_t k = 1; k <= 5; k += 2) {
22276 for (uint32_t m = 1; m <= 6; m++) {
22277 GemmMicrokernelTester()
22278 .mr(6)
22279 .nr(8)
22280 .kr(1)
22281 .sr(1)
22282 .m(m)
22283 .n(n)
22284 .k(k)
22285 .iterations(1)
22286 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22287 }
22288 }
22289 }
22290 }
22291
TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST,n_div_8)22292 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, n_div_8) {
22293 TEST_REQUIRES_X86_FMA3;
22294 for (uint32_t n = 16; n <= 24; n += 8) {
22295 for (size_t k = 1; k <= 5; k += 2) {
22296 GemmMicrokernelTester()
22297 .mr(6)
22298 .nr(8)
22299 .kr(1)
22300 .sr(1)
22301 .m(6)
22302 .n(n)
22303 .k(k)
22304 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22305 }
22306 }
22307 }
22308
TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST,n_div_8_strided_cn)22309 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, n_div_8_strided_cn) {
22310 TEST_REQUIRES_X86_FMA3;
22311 for (uint32_t n = 16; n <= 24; n += 8) {
22312 for (size_t k = 1; k <= 5; k += 2) {
22313 GemmMicrokernelTester()
22314 .mr(6)
22315 .nr(8)
22316 .kr(1)
22317 .sr(1)
22318 .m(6)
22319 .n(n)
22320 .k(k)
22321 .cn_stride(11)
22322 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22323 }
22324 }
22325 }
22326
TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST,n_div_8_subtile)22327 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, n_div_8_subtile) {
22328 TEST_REQUIRES_X86_FMA3;
22329 for (uint32_t n = 16; n <= 24; n += 8) {
22330 for (size_t k = 1; k <= 5; k += 2) {
22331 for (uint32_t m = 1; m <= 6; m++) {
22332 GemmMicrokernelTester()
22333 .mr(6)
22334 .nr(8)
22335 .kr(1)
22336 .sr(1)
22337 .m(m)
22338 .n(n)
22339 .k(k)
22340 .iterations(1)
22341 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22342 }
22343 }
22344 }
22345 }
22346
TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST,small_kernel)22347 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, small_kernel) {
22348 TEST_REQUIRES_X86_FMA3;
22349 for (size_t k = 1; k <= 5; k += 2) {
22350 GemmMicrokernelTester()
22351 .mr(6)
22352 .nr(8)
22353 .kr(1)
22354 .sr(1)
22355 .m(6)
22356 .n(8)
22357 .k(k)
22358 .ks(3)
22359 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22360 }
22361 }
22362
TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST,small_kernel_subtile)22363 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, small_kernel_subtile) {
22364 TEST_REQUIRES_X86_FMA3;
22365 for (size_t k = 1; k <= 5; k += 2) {
22366 for (uint32_t n = 1; n <= 8; n++) {
22367 for (uint32_t m = 1; m <= 6; m++) {
22368 GemmMicrokernelTester()
22369 .mr(6)
22370 .nr(8)
22371 .kr(1)
22372 .sr(1)
22373 .m(m)
22374 .n(n)
22375 .k(k)
22376 .ks(3)
22377 .iterations(1)
22378 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22379 }
22380 }
22381 }
22382 }
22383
TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST,n_gt_8_small_kernel)22384 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, n_gt_8_small_kernel) {
22385 TEST_REQUIRES_X86_FMA3;
22386 for (uint32_t n = 9; n < 16; n++) {
22387 for (size_t k = 1; k <= 5; k += 2) {
22388 GemmMicrokernelTester()
22389 .mr(6)
22390 .nr(8)
22391 .kr(1)
22392 .sr(1)
22393 .m(6)
22394 .n(n)
22395 .k(k)
22396 .ks(3)
22397 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22398 }
22399 }
22400 }
22401
TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST,n_div_8_small_kernel)22402 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, n_div_8_small_kernel) {
22403 TEST_REQUIRES_X86_FMA3;
22404 for (uint32_t n = 16; n <= 24; n += 8) {
22405 for (size_t k = 1; k <= 5; k += 2) {
22406 GemmMicrokernelTester()
22407 .mr(6)
22408 .nr(8)
22409 .kr(1)
22410 .sr(1)
22411 .m(6)
22412 .n(n)
22413 .k(k)
22414 .ks(3)
22415 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22416 }
22417 }
22418 }
22419
TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST,strided_cm_subtile)22420 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, strided_cm_subtile) {
22421 TEST_REQUIRES_X86_FMA3;
22422 for (size_t k = 1; k <= 5; k += 2) {
22423 for (uint32_t n = 1; n <= 8; n++) {
22424 for (uint32_t m = 1; m <= 6; m++) {
22425 GemmMicrokernelTester()
22426 .mr(6)
22427 .nr(8)
22428 .kr(1)
22429 .sr(1)
22430 .m(m)
22431 .n(n)
22432 .k(k)
22433 .cm_stride(11)
22434 .iterations(1)
22435 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22436 }
22437 }
22438 }
22439 }
22440
TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST,a_offset)22441 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, a_offset) {
22442 TEST_REQUIRES_X86_FMA3;
22443 for (size_t k = 1; k <= 5; k += 2) {
22444 GemmMicrokernelTester()
22445 .mr(6)
22446 .nr(8)
22447 .kr(1)
22448 .sr(1)
22449 .m(6)
22450 .n(8)
22451 .k(k)
22452 .ks(3)
22453 .a_offset(37)
22454 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22455 }
22456 }
22457
TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST,zero)22458 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, zero) {
22459 TEST_REQUIRES_X86_FMA3;
22460 for (size_t k = 1; k <= 5; k += 2) {
22461 for (uint32_t mz = 0; mz < 6; mz++) {
22462 GemmMicrokernelTester()
22463 .mr(6)
22464 .nr(8)
22465 .kr(1)
22466 .sr(1)
22467 .m(6)
22468 .n(8)
22469 .k(k)
22470 .ks(3)
22471 .a_offset(37)
22472 .zero_index(mz)
22473 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22474 }
22475 }
22476 }
22477
TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST,qmin)22478 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, qmin) {
22479 TEST_REQUIRES_X86_FMA3;
22480 GemmMicrokernelTester()
22481 .mr(6)
22482 .nr(8)
22483 .kr(1)
22484 .sr(1)
22485 .m(6)
22486 .n(8)
22487 .k(1)
22488 .qmin(128)
22489 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22490 }
22491
TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST,qmax)22492 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, qmax) {
22493 TEST_REQUIRES_X86_FMA3;
22494 GemmMicrokernelTester()
22495 .mr(6)
22496 .nr(8)
22497 .kr(1)
22498 .sr(1)
22499 .m(6)
22500 .n(8)
22501 .k(1)
22502 .qmax(128)
22503 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22504 }
22505
TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST,strided_cm)22506 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, strided_cm) {
22507 TEST_REQUIRES_X86_FMA3;
22508 GemmMicrokernelTester()
22509 .mr(6)
22510 .nr(8)
22511 .kr(1)
22512 .sr(1)
22513 .m(6)
22514 .n(8)
22515 .k(1)
22516 .cm_stride(11)
22517 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22518 }
22519 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22520
22521
22522 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST,k_eq_1)22523 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, k_eq_1) {
22524 TEST_REQUIRES_X86_FMA3;
22525 GemmMicrokernelTester()
22526 .mr(7)
22527 .nr(8)
22528 .kr(1)
22529 .sr(1)
22530 .m(7)
22531 .n(8)
22532 .k(1)
22533 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22534 }
22535
TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST,strided_cn)22536 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, strided_cn) {
22537 TEST_REQUIRES_X86_FMA3;
22538 GemmMicrokernelTester()
22539 .mr(7)
22540 .nr(8)
22541 .kr(1)
22542 .sr(1)
22543 .m(7)
22544 .n(8)
22545 .k(1)
22546 .cn_stride(11)
22547 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22548 }
22549
TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST,k_eq_1_subtile)22550 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, k_eq_1_subtile) {
22551 TEST_REQUIRES_X86_FMA3;
22552 for (uint32_t n = 1; n <= 8; n++) {
22553 for (uint32_t m = 1; m <= 7; m++) {
22554 GemmMicrokernelTester()
22555 .mr(7)
22556 .nr(8)
22557 .kr(1)
22558 .sr(1)
22559 .m(m)
22560 .n(n)
22561 .k(1)
22562 .iterations(1)
22563 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22564 }
22565 }
22566 }
22567
TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST,k_eq_1_subtile_m)22568 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
22569 TEST_REQUIRES_X86_FMA3;
22570 for (uint32_t m = 1; m <= 7; m++) {
22571 GemmMicrokernelTester()
22572 .mr(7)
22573 .nr(8)
22574 .kr(1)
22575 .sr(1)
22576 .m(m)
22577 .n(8)
22578 .k(1)
22579 .iterations(1)
22580 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22581 }
22582 }
22583
TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST,k_eq_1_subtile_n)22584 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
22585 TEST_REQUIRES_X86_FMA3;
22586 for (uint32_t n = 1; n <= 8; n++) {
22587 GemmMicrokernelTester()
22588 .mr(7)
22589 .nr(8)
22590 .kr(1)
22591 .sr(1)
22592 .m(7)
22593 .n(n)
22594 .k(1)
22595 .iterations(1)
22596 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22597 }
22598 }
22599
TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST,k_gt_1)22600 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, k_gt_1) {
22601 TEST_REQUIRES_X86_FMA3;
22602 for (size_t k = 2; k < 10; k++) {
22603 GemmMicrokernelTester()
22604 .mr(7)
22605 .nr(8)
22606 .kr(1)
22607 .sr(1)
22608 .m(7)
22609 .n(8)
22610 .k(k)
22611 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22612 }
22613 }
22614
TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST,k_gt_1_subtile)22615 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, k_gt_1_subtile) {
22616 TEST_REQUIRES_X86_FMA3;
22617 for (size_t k = 2; k < 10; k++) {
22618 for (uint32_t n = 1; n <= 8; n++) {
22619 for (uint32_t m = 1; m <= 7; m++) {
22620 GemmMicrokernelTester()
22621 .mr(7)
22622 .nr(8)
22623 .kr(1)
22624 .sr(1)
22625 .m(m)
22626 .n(n)
22627 .k(k)
22628 .iterations(1)
22629 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22630 }
22631 }
22632 }
22633 }
22634
TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST,n_gt_8)22635 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, n_gt_8) {
22636 TEST_REQUIRES_X86_FMA3;
22637 for (uint32_t n = 9; n < 16; n++) {
22638 for (size_t k = 1; k <= 5; k += 2) {
22639 GemmMicrokernelTester()
22640 .mr(7)
22641 .nr(8)
22642 .kr(1)
22643 .sr(1)
22644 .m(7)
22645 .n(n)
22646 .k(k)
22647 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22648 }
22649 }
22650 }
22651
TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST,n_gt_8_strided_cn)22652 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
22653 TEST_REQUIRES_X86_FMA3;
22654 for (uint32_t n = 9; n < 16; n++) {
22655 for (size_t k = 1; k <= 5; k += 2) {
22656 GemmMicrokernelTester()
22657 .mr(7)
22658 .nr(8)
22659 .kr(1)
22660 .sr(1)
22661 .m(7)
22662 .n(n)
22663 .k(k)
22664 .cn_stride(11)
22665 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22666 }
22667 }
22668 }
22669
TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST,n_gt_8_subtile)22670 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, n_gt_8_subtile) {
22671 TEST_REQUIRES_X86_FMA3;
22672 for (uint32_t n = 9; n < 16; n++) {
22673 for (size_t k = 1; k <= 5; k += 2) {
22674 for (uint32_t m = 1; m <= 7; m++) {
22675 GemmMicrokernelTester()
22676 .mr(7)
22677 .nr(8)
22678 .kr(1)
22679 .sr(1)
22680 .m(m)
22681 .n(n)
22682 .k(k)
22683 .iterations(1)
22684 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22685 }
22686 }
22687 }
22688 }
22689
TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST,n_div_8)22690 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, n_div_8) {
22691 TEST_REQUIRES_X86_FMA3;
22692 for (uint32_t n = 16; n <= 24; n += 8) {
22693 for (size_t k = 1; k <= 5; k += 2) {
22694 GemmMicrokernelTester()
22695 .mr(7)
22696 .nr(8)
22697 .kr(1)
22698 .sr(1)
22699 .m(7)
22700 .n(n)
22701 .k(k)
22702 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22703 }
22704 }
22705 }
22706
TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST,n_div_8_strided_cn)22707 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, n_div_8_strided_cn) {
22708 TEST_REQUIRES_X86_FMA3;
22709 for (uint32_t n = 16; n <= 24; n += 8) {
22710 for (size_t k = 1; k <= 5; k += 2) {
22711 GemmMicrokernelTester()
22712 .mr(7)
22713 .nr(8)
22714 .kr(1)
22715 .sr(1)
22716 .m(7)
22717 .n(n)
22718 .k(k)
22719 .cn_stride(11)
22720 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22721 }
22722 }
22723 }
22724
TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST,n_div_8_subtile)22725 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, n_div_8_subtile) {
22726 TEST_REQUIRES_X86_FMA3;
22727 for (uint32_t n = 16; n <= 24; n += 8) {
22728 for (size_t k = 1; k <= 5; k += 2) {
22729 for (uint32_t m = 1; m <= 7; m++) {
22730 GemmMicrokernelTester()
22731 .mr(7)
22732 .nr(8)
22733 .kr(1)
22734 .sr(1)
22735 .m(m)
22736 .n(n)
22737 .k(k)
22738 .iterations(1)
22739 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22740 }
22741 }
22742 }
22743 }
22744
TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST,small_kernel)22745 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, small_kernel) {
22746 TEST_REQUIRES_X86_FMA3;
22747 for (size_t k = 1; k <= 5; k += 2) {
22748 GemmMicrokernelTester()
22749 .mr(7)
22750 .nr(8)
22751 .kr(1)
22752 .sr(1)
22753 .m(7)
22754 .n(8)
22755 .k(k)
22756 .ks(3)
22757 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22758 }
22759 }
22760
TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST,small_kernel_subtile)22761 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, small_kernel_subtile) {
22762 TEST_REQUIRES_X86_FMA3;
22763 for (size_t k = 1; k <= 5; k += 2) {
22764 for (uint32_t n = 1; n <= 8; n++) {
22765 for (uint32_t m = 1; m <= 7; m++) {
22766 GemmMicrokernelTester()
22767 .mr(7)
22768 .nr(8)
22769 .kr(1)
22770 .sr(1)
22771 .m(m)
22772 .n(n)
22773 .k(k)
22774 .ks(3)
22775 .iterations(1)
22776 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22777 }
22778 }
22779 }
22780 }
22781
TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST,n_gt_8_small_kernel)22782 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, n_gt_8_small_kernel) {
22783 TEST_REQUIRES_X86_FMA3;
22784 for (uint32_t n = 9; n < 16; n++) {
22785 for (size_t k = 1; k <= 5; k += 2) {
22786 GemmMicrokernelTester()
22787 .mr(7)
22788 .nr(8)
22789 .kr(1)
22790 .sr(1)
22791 .m(7)
22792 .n(n)
22793 .k(k)
22794 .ks(3)
22795 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22796 }
22797 }
22798 }
22799
TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST,n_div_8_small_kernel)22800 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, n_div_8_small_kernel) {
22801 TEST_REQUIRES_X86_FMA3;
22802 for (uint32_t n = 16; n <= 24; n += 8) {
22803 for (size_t k = 1; k <= 5; k += 2) {
22804 GemmMicrokernelTester()
22805 .mr(7)
22806 .nr(8)
22807 .kr(1)
22808 .sr(1)
22809 .m(7)
22810 .n(n)
22811 .k(k)
22812 .ks(3)
22813 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22814 }
22815 }
22816 }
22817
TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST,strided_cm_subtile)22818 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, strided_cm_subtile) {
22819 TEST_REQUIRES_X86_FMA3;
22820 for (size_t k = 1; k <= 5; k += 2) {
22821 for (uint32_t n = 1; n <= 8; n++) {
22822 for (uint32_t m = 1; m <= 7; m++) {
22823 GemmMicrokernelTester()
22824 .mr(7)
22825 .nr(8)
22826 .kr(1)
22827 .sr(1)
22828 .m(m)
22829 .n(n)
22830 .k(k)
22831 .cm_stride(11)
22832 .iterations(1)
22833 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22834 }
22835 }
22836 }
22837 }
22838
TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST,a_offset)22839 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, a_offset) {
22840 TEST_REQUIRES_X86_FMA3;
22841 for (size_t k = 1; k <= 5; k += 2) {
22842 GemmMicrokernelTester()
22843 .mr(7)
22844 .nr(8)
22845 .kr(1)
22846 .sr(1)
22847 .m(7)
22848 .n(8)
22849 .k(k)
22850 .ks(3)
22851 .a_offset(37)
22852 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22853 }
22854 }
22855
TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST,zero)22856 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, zero) {
22857 TEST_REQUIRES_X86_FMA3;
22858 for (size_t k = 1; k <= 5; k += 2) {
22859 for (uint32_t mz = 0; mz < 7; mz++) {
22860 GemmMicrokernelTester()
22861 .mr(7)
22862 .nr(8)
22863 .kr(1)
22864 .sr(1)
22865 .m(7)
22866 .n(8)
22867 .k(k)
22868 .ks(3)
22869 .a_offset(37)
22870 .zero_index(mz)
22871 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22872 }
22873 }
22874 }
22875
TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST,qmin)22876 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, qmin) {
22877 TEST_REQUIRES_X86_FMA3;
22878 GemmMicrokernelTester()
22879 .mr(7)
22880 .nr(8)
22881 .kr(1)
22882 .sr(1)
22883 .m(7)
22884 .n(8)
22885 .k(1)
22886 .qmin(128)
22887 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22888 }
22889
TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST,qmax)22890 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, qmax) {
22891 TEST_REQUIRES_X86_FMA3;
22892 GemmMicrokernelTester()
22893 .mr(7)
22894 .nr(8)
22895 .kr(1)
22896 .sr(1)
22897 .m(7)
22898 .n(8)
22899 .k(1)
22900 .qmax(128)
22901 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22902 }
22903
TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST,strided_cm)22904 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, strided_cm) {
22905 TEST_REQUIRES_X86_FMA3;
22906 GemmMicrokernelTester()
22907 .mr(7)
22908 .nr(8)
22909 .kr(1)
22910 .sr(1)
22911 .m(7)
22912 .n(8)
22913 .k(1)
22914 .cm_stride(11)
22915 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22916 }
22917 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22918
22919
22920 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST,k_eq_1)22921 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, k_eq_1) {
22922 TEST_REQUIRES_X86_FMA3;
22923 GemmMicrokernelTester()
22924 .mr(8)
22925 .nr(8)
22926 .kr(1)
22927 .sr(1)
22928 .m(8)
22929 .n(8)
22930 .k(1)
22931 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22932 }
22933
TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST,strided_cn)22934 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, strided_cn) {
22935 TEST_REQUIRES_X86_FMA3;
22936 GemmMicrokernelTester()
22937 .mr(8)
22938 .nr(8)
22939 .kr(1)
22940 .sr(1)
22941 .m(8)
22942 .n(8)
22943 .k(1)
22944 .cn_stride(11)
22945 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22946 }
22947
TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST,k_eq_1_subtile)22948 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, k_eq_1_subtile) {
22949 TEST_REQUIRES_X86_FMA3;
22950 for (uint32_t n = 1; n <= 8; n++) {
22951 for (uint32_t m = 1; m <= 8; m++) {
22952 GemmMicrokernelTester()
22953 .mr(8)
22954 .nr(8)
22955 .kr(1)
22956 .sr(1)
22957 .m(m)
22958 .n(n)
22959 .k(1)
22960 .iterations(1)
22961 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22962 }
22963 }
22964 }
22965
TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST,k_eq_1_subtile_m)22966 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
22967 TEST_REQUIRES_X86_FMA3;
22968 for (uint32_t m = 1; m <= 8; m++) {
22969 GemmMicrokernelTester()
22970 .mr(8)
22971 .nr(8)
22972 .kr(1)
22973 .sr(1)
22974 .m(m)
22975 .n(8)
22976 .k(1)
22977 .iterations(1)
22978 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22979 }
22980 }
22981
TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST,k_eq_1_subtile_n)22982 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
22983 TEST_REQUIRES_X86_FMA3;
22984 for (uint32_t n = 1; n <= 8; n++) {
22985 GemmMicrokernelTester()
22986 .mr(8)
22987 .nr(8)
22988 .kr(1)
22989 .sr(1)
22990 .m(8)
22991 .n(n)
22992 .k(1)
22993 .iterations(1)
22994 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22995 }
22996 }
22997
TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST,k_gt_1)22998 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, k_gt_1) {
22999 TEST_REQUIRES_X86_FMA3;
23000 for (size_t k = 2; k < 10; k++) {
23001 GemmMicrokernelTester()
23002 .mr(8)
23003 .nr(8)
23004 .kr(1)
23005 .sr(1)
23006 .m(8)
23007 .n(8)
23008 .k(k)
23009 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23010 }
23011 }
23012
TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST,k_gt_1_subtile)23013 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, k_gt_1_subtile) {
23014 TEST_REQUIRES_X86_FMA3;
23015 for (size_t k = 2; k < 10; k++) {
23016 for (uint32_t n = 1; n <= 8; n++) {
23017 for (uint32_t m = 1; m <= 8; m++) {
23018 GemmMicrokernelTester()
23019 .mr(8)
23020 .nr(8)
23021 .kr(1)
23022 .sr(1)
23023 .m(m)
23024 .n(n)
23025 .k(k)
23026 .iterations(1)
23027 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23028 }
23029 }
23030 }
23031 }
23032
TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST,n_gt_8)23033 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, n_gt_8) {
23034 TEST_REQUIRES_X86_FMA3;
23035 for (uint32_t n = 9; n < 16; n++) {
23036 for (size_t k = 1; k <= 5; k += 2) {
23037 GemmMicrokernelTester()
23038 .mr(8)
23039 .nr(8)
23040 .kr(1)
23041 .sr(1)
23042 .m(8)
23043 .n(n)
23044 .k(k)
23045 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23046 }
23047 }
23048 }
23049
TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST,n_gt_8_strided_cn)23050 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
23051 TEST_REQUIRES_X86_FMA3;
23052 for (uint32_t n = 9; n < 16; n++) {
23053 for (size_t k = 1; k <= 5; k += 2) {
23054 GemmMicrokernelTester()
23055 .mr(8)
23056 .nr(8)
23057 .kr(1)
23058 .sr(1)
23059 .m(8)
23060 .n(n)
23061 .k(k)
23062 .cn_stride(11)
23063 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23064 }
23065 }
23066 }
23067
TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST,n_gt_8_subtile)23068 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, n_gt_8_subtile) {
23069 TEST_REQUIRES_X86_FMA3;
23070 for (uint32_t n = 9; n < 16; n++) {
23071 for (size_t k = 1; k <= 5; k += 2) {
23072 for (uint32_t m = 1; m <= 8; m++) {
23073 GemmMicrokernelTester()
23074 .mr(8)
23075 .nr(8)
23076 .kr(1)
23077 .sr(1)
23078 .m(m)
23079 .n(n)
23080 .k(k)
23081 .iterations(1)
23082 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23083 }
23084 }
23085 }
23086 }
23087
TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST,n_div_8)23088 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, n_div_8) {
23089 TEST_REQUIRES_X86_FMA3;
23090 for (uint32_t n = 16; n <= 24; n += 8) {
23091 for (size_t k = 1; k <= 5; k += 2) {
23092 GemmMicrokernelTester()
23093 .mr(8)
23094 .nr(8)
23095 .kr(1)
23096 .sr(1)
23097 .m(8)
23098 .n(n)
23099 .k(k)
23100 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23101 }
23102 }
23103 }
23104
TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST,n_div_8_strided_cn)23105 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, n_div_8_strided_cn) {
23106 TEST_REQUIRES_X86_FMA3;
23107 for (uint32_t n = 16; n <= 24; n += 8) {
23108 for (size_t k = 1; k <= 5; k += 2) {
23109 GemmMicrokernelTester()
23110 .mr(8)
23111 .nr(8)
23112 .kr(1)
23113 .sr(1)
23114 .m(8)
23115 .n(n)
23116 .k(k)
23117 .cn_stride(11)
23118 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23119 }
23120 }
23121 }
23122
TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST,n_div_8_subtile)23123 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, n_div_8_subtile) {
23124 TEST_REQUIRES_X86_FMA3;
23125 for (uint32_t n = 16; n <= 24; n += 8) {
23126 for (size_t k = 1; k <= 5; k += 2) {
23127 for (uint32_t m = 1; m <= 8; m++) {
23128 GemmMicrokernelTester()
23129 .mr(8)
23130 .nr(8)
23131 .kr(1)
23132 .sr(1)
23133 .m(m)
23134 .n(n)
23135 .k(k)
23136 .iterations(1)
23137 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23138 }
23139 }
23140 }
23141 }
23142
TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST,small_kernel)23143 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, small_kernel) {
23144 TEST_REQUIRES_X86_FMA3;
23145 for (size_t k = 1; k <= 5; k += 2) {
23146 GemmMicrokernelTester()
23147 .mr(8)
23148 .nr(8)
23149 .kr(1)
23150 .sr(1)
23151 .m(8)
23152 .n(8)
23153 .k(k)
23154 .ks(3)
23155 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23156 }
23157 }
23158
TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST,small_kernel_subtile)23159 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, small_kernel_subtile) {
23160 TEST_REQUIRES_X86_FMA3;
23161 for (size_t k = 1; k <= 5; k += 2) {
23162 for (uint32_t n = 1; n <= 8; n++) {
23163 for (uint32_t m = 1; m <= 8; m++) {
23164 GemmMicrokernelTester()
23165 .mr(8)
23166 .nr(8)
23167 .kr(1)
23168 .sr(1)
23169 .m(m)
23170 .n(n)
23171 .k(k)
23172 .ks(3)
23173 .iterations(1)
23174 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23175 }
23176 }
23177 }
23178 }
23179
TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST,n_gt_8_small_kernel)23180 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, n_gt_8_small_kernel) {
23181 TEST_REQUIRES_X86_FMA3;
23182 for (uint32_t n = 9; n < 16; n++) {
23183 for (size_t k = 1; k <= 5; k += 2) {
23184 GemmMicrokernelTester()
23185 .mr(8)
23186 .nr(8)
23187 .kr(1)
23188 .sr(1)
23189 .m(8)
23190 .n(n)
23191 .k(k)
23192 .ks(3)
23193 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23194 }
23195 }
23196 }
23197
TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST,n_div_8_small_kernel)23198 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, n_div_8_small_kernel) {
23199 TEST_REQUIRES_X86_FMA3;
23200 for (uint32_t n = 16; n <= 24; n += 8) {
23201 for (size_t k = 1; k <= 5; k += 2) {
23202 GemmMicrokernelTester()
23203 .mr(8)
23204 .nr(8)
23205 .kr(1)
23206 .sr(1)
23207 .m(8)
23208 .n(n)
23209 .k(k)
23210 .ks(3)
23211 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23212 }
23213 }
23214 }
23215
TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST,strided_cm_subtile)23216 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, strided_cm_subtile) {
23217 TEST_REQUIRES_X86_FMA3;
23218 for (size_t k = 1; k <= 5; k += 2) {
23219 for (uint32_t n = 1; n <= 8; n++) {
23220 for (uint32_t m = 1; m <= 8; m++) {
23221 GemmMicrokernelTester()
23222 .mr(8)
23223 .nr(8)
23224 .kr(1)
23225 .sr(1)
23226 .m(m)
23227 .n(n)
23228 .k(k)
23229 .cm_stride(11)
23230 .iterations(1)
23231 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23232 }
23233 }
23234 }
23235 }
23236
TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST,a_offset)23237 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, a_offset) {
23238 TEST_REQUIRES_X86_FMA3;
23239 for (size_t k = 1; k <= 5; k += 2) {
23240 GemmMicrokernelTester()
23241 .mr(8)
23242 .nr(8)
23243 .kr(1)
23244 .sr(1)
23245 .m(8)
23246 .n(8)
23247 .k(k)
23248 .ks(3)
23249 .a_offset(43)
23250 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23251 }
23252 }
23253
TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST,zero)23254 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, zero) {
23255 TEST_REQUIRES_X86_FMA3;
23256 for (size_t k = 1; k <= 5; k += 2) {
23257 for (uint32_t mz = 0; mz < 8; mz++) {
23258 GemmMicrokernelTester()
23259 .mr(8)
23260 .nr(8)
23261 .kr(1)
23262 .sr(1)
23263 .m(8)
23264 .n(8)
23265 .k(k)
23266 .ks(3)
23267 .a_offset(43)
23268 .zero_index(mz)
23269 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23270 }
23271 }
23272 }
23273
TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST,qmin)23274 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, qmin) {
23275 TEST_REQUIRES_X86_FMA3;
23276 GemmMicrokernelTester()
23277 .mr(8)
23278 .nr(8)
23279 .kr(1)
23280 .sr(1)
23281 .m(8)
23282 .n(8)
23283 .k(1)
23284 .qmin(128)
23285 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23286 }
23287
TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST,qmax)23288 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, qmax) {
23289 TEST_REQUIRES_X86_FMA3;
23290 GemmMicrokernelTester()
23291 .mr(8)
23292 .nr(8)
23293 .kr(1)
23294 .sr(1)
23295 .m(8)
23296 .n(8)
23297 .k(1)
23298 .qmax(128)
23299 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23300 }
23301
TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST,strided_cm)23302 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, strided_cm) {
23303 TEST_REQUIRES_X86_FMA3;
23304 GemmMicrokernelTester()
23305 .mr(8)
23306 .nr(8)
23307 .kr(1)
23308 .sr(1)
23309 .m(8)
23310 .n(8)
23311 .k(1)
23312 .cm_stride(11)
23313 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23314 }
23315 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23316
23317
23318 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST,k_eq_1)23319 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, k_eq_1) {
23320 TEST_REQUIRES_X86_AVX512F;
23321 GemmMicrokernelTester()
23322 .mr(4)
23323 .nr(16)
23324 .kr(1)
23325 .sr(1)
23326 .m(4)
23327 .n(16)
23328 .k(1)
23329 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23330 }
23331
TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST,strided_cn)23332 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, strided_cn) {
23333 TEST_REQUIRES_X86_AVX512F;
23334 GemmMicrokernelTester()
23335 .mr(4)
23336 .nr(16)
23337 .kr(1)
23338 .sr(1)
23339 .m(4)
23340 .n(16)
23341 .k(1)
23342 .cn_stride(19)
23343 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23344 }
23345
TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST,k_eq_1_subtile)23346 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, k_eq_1_subtile) {
23347 TEST_REQUIRES_X86_AVX512F;
23348 for (uint32_t n = 1; n <= 16; n++) {
23349 for (uint32_t m = 1; m <= 4; m++) {
23350 GemmMicrokernelTester()
23351 .mr(4)
23352 .nr(16)
23353 .kr(1)
23354 .sr(1)
23355 .m(m)
23356 .n(n)
23357 .k(1)
23358 .iterations(1)
23359 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23360 }
23361 }
23362 }
23363
TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST,k_eq_1_subtile_m)23364 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
23365 TEST_REQUIRES_X86_AVX512F;
23366 for (uint32_t m = 1; m <= 4; m++) {
23367 GemmMicrokernelTester()
23368 .mr(4)
23369 .nr(16)
23370 .kr(1)
23371 .sr(1)
23372 .m(m)
23373 .n(16)
23374 .k(1)
23375 .iterations(1)
23376 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23377 }
23378 }
23379
TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST,k_eq_1_subtile_n)23380 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
23381 TEST_REQUIRES_X86_AVX512F;
23382 for (uint32_t n = 1; n <= 16; n++) {
23383 GemmMicrokernelTester()
23384 .mr(4)
23385 .nr(16)
23386 .kr(1)
23387 .sr(1)
23388 .m(4)
23389 .n(n)
23390 .k(1)
23391 .iterations(1)
23392 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23393 }
23394 }
23395
TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST,k_gt_1)23396 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, k_gt_1) {
23397 TEST_REQUIRES_X86_AVX512F;
23398 for (size_t k = 2; k < 10; k++) {
23399 GemmMicrokernelTester()
23400 .mr(4)
23401 .nr(16)
23402 .kr(1)
23403 .sr(1)
23404 .m(4)
23405 .n(16)
23406 .k(k)
23407 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23408 }
23409 }
23410
TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST,k_gt_1_subtile)23411 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, k_gt_1_subtile) {
23412 TEST_REQUIRES_X86_AVX512F;
23413 for (size_t k = 2; k < 10; k++) {
23414 for (uint32_t n = 1; n <= 16; n++) {
23415 for (uint32_t m = 1; m <= 4; m++) {
23416 GemmMicrokernelTester()
23417 .mr(4)
23418 .nr(16)
23419 .kr(1)
23420 .sr(1)
23421 .m(m)
23422 .n(n)
23423 .k(k)
23424 .iterations(1)
23425 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23426 }
23427 }
23428 }
23429 }
23430
TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST,n_gt_16)23431 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, n_gt_16) {
23432 TEST_REQUIRES_X86_AVX512F;
23433 for (uint32_t n = 17; n < 32; n++) {
23434 for (size_t k = 1; k <= 5; k += 2) {
23435 GemmMicrokernelTester()
23436 .mr(4)
23437 .nr(16)
23438 .kr(1)
23439 .sr(1)
23440 .m(4)
23441 .n(n)
23442 .k(k)
23443 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23444 }
23445 }
23446 }
23447
TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST,n_gt_16_strided_cn)23448 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
23449 TEST_REQUIRES_X86_AVX512F;
23450 for (uint32_t n = 17; n < 32; n++) {
23451 for (size_t k = 1; k <= 5; k += 2) {
23452 GemmMicrokernelTester()
23453 .mr(4)
23454 .nr(16)
23455 .kr(1)
23456 .sr(1)
23457 .m(4)
23458 .n(n)
23459 .k(k)
23460 .cn_stride(19)
23461 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23462 }
23463 }
23464 }
23465
TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST,n_gt_16_subtile)23466 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, n_gt_16_subtile) {
23467 TEST_REQUIRES_X86_AVX512F;
23468 for (uint32_t n = 17; n < 32; n++) {
23469 for (size_t k = 1; k <= 5; k += 2) {
23470 for (uint32_t m = 1; m <= 4; m++) {
23471 GemmMicrokernelTester()
23472 .mr(4)
23473 .nr(16)
23474 .kr(1)
23475 .sr(1)
23476 .m(m)
23477 .n(n)
23478 .k(k)
23479 .iterations(1)
23480 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23481 }
23482 }
23483 }
23484 }
23485
TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST,n_div_16)23486 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, n_div_16) {
23487 TEST_REQUIRES_X86_AVX512F;
23488 for (uint32_t n = 32; n <= 48; n += 16) {
23489 for (size_t k = 1; k <= 5; k += 2) {
23490 GemmMicrokernelTester()
23491 .mr(4)
23492 .nr(16)
23493 .kr(1)
23494 .sr(1)
23495 .m(4)
23496 .n(n)
23497 .k(k)
23498 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23499 }
23500 }
23501 }
23502
TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST,n_div_16_strided_cn)23503 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
23504 TEST_REQUIRES_X86_AVX512F;
23505 for (uint32_t n = 32; n <= 48; n += 16) {
23506 for (size_t k = 1; k <= 5; k += 2) {
23507 GemmMicrokernelTester()
23508 .mr(4)
23509 .nr(16)
23510 .kr(1)
23511 .sr(1)
23512 .m(4)
23513 .n(n)
23514 .k(k)
23515 .cn_stride(19)
23516 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23517 }
23518 }
23519 }
23520
TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST,n_div_16_subtile)23521 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, n_div_16_subtile) {
23522 TEST_REQUIRES_X86_AVX512F;
23523 for (uint32_t n = 32; n <= 48; n += 16) {
23524 for (size_t k = 1; k <= 5; k += 2) {
23525 for (uint32_t m = 1; m <= 4; m++) {
23526 GemmMicrokernelTester()
23527 .mr(4)
23528 .nr(16)
23529 .kr(1)
23530 .sr(1)
23531 .m(m)
23532 .n(n)
23533 .k(k)
23534 .iterations(1)
23535 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23536 }
23537 }
23538 }
23539 }
23540
TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST,small_kernel)23541 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, small_kernel) {
23542 TEST_REQUIRES_X86_AVX512F;
23543 for (size_t k = 1; k <= 5; k += 2) {
23544 GemmMicrokernelTester()
23545 .mr(4)
23546 .nr(16)
23547 .kr(1)
23548 .sr(1)
23549 .m(4)
23550 .n(16)
23551 .k(k)
23552 .ks(3)
23553 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23554 }
23555 }
23556
TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST,small_kernel_subtile)23557 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, small_kernel_subtile) {
23558 TEST_REQUIRES_X86_AVX512F;
23559 for (size_t k = 1; k <= 5; k += 2) {
23560 for (uint32_t n = 1; n <= 16; n++) {
23561 for (uint32_t m = 1; m <= 4; m++) {
23562 GemmMicrokernelTester()
23563 .mr(4)
23564 .nr(16)
23565 .kr(1)
23566 .sr(1)
23567 .m(m)
23568 .n(n)
23569 .k(k)
23570 .ks(3)
23571 .iterations(1)
23572 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23573 }
23574 }
23575 }
23576 }
23577
TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST,n_gt_16_small_kernel)23578 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, n_gt_16_small_kernel) {
23579 TEST_REQUIRES_X86_AVX512F;
23580 for (uint32_t n = 17; n < 32; n++) {
23581 for (size_t k = 1; k <= 5; k += 2) {
23582 GemmMicrokernelTester()
23583 .mr(4)
23584 .nr(16)
23585 .kr(1)
23586 .sr(1)
23587 .m(4)
23588 .n(n)
23589 .k(k)
23590 .ks(3)
23591 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23592 }
23593 }
23594 }
23595
TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST,n_div_16_small_kernel)23596 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, n_div_16_small_kernel) {
23597 TEST_REQUIRES_X86_AVX512F;
23598 for (uint32_t n = 32; n <= 48; n += 16) {
23599 for (size_t k = 1; k <= 5; k += 2) {
23600 GemmMicrokernelTester()
23601 .mr(4)
23602 .nr(16)
23603 .kr(1)
23604 .sr(1)
23605 .m(4)
23606 .n(n)
23607 .k(k)
23608 .ks(3)
23609 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23610 }
23611 }
23612 }
23613
TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST,strided_cm_subtile)23614 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, strided_cm_subtile) {
23615 TEST_REQUIRES_X86_AVX512F;
23616 for (size_t k = 1; k <= 5; k += 2) {
23617 for (uint32_t n = 1; n <= 16; n++) {
23618 for (uint32_t m = 1; m <= 4; m++) {
23619 GemmMicrokernelTester()
23620 .mr(4)
23621 .nr(16)
23622 .kr(1)
23623 .sr(1)
23624 .m(m)
23625 .n(n)
23626 .k(k)
23627 .cm_stride(19)
23628 .iterations(1)
23629 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23630 }
23631 }
23632 }
23633 }
23634
TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST,a_offset)23635 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, a_offset) {
23636 TEST_REQUIRES_X86_AVX512F;
23637 for (size_t k = 1; k <= 5; k += 2) {
23638 GemmMicrokernelTester()
23639 .mr(4)
23640 .nr(16)
23641 .kr(1)
23642 .sr(1)
23643 .m(4)
23644 .n(16)
23645 .k(k)
23646 .ks(3)
23647 .a_offset(23)
23648 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23649 }
23650 }
23651
TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST,zero)23652 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, zero) {
23653 TEST_REQUIRES_X86_AVX512F;
23654 for (size_t k = 1; k <= 5; k += 2) {
23655 for (uint32_t mz = 0; mz < 4; mz++) {
23656 GemmMicrokernelTester()
23657 .mr(4)
23658 .nr(16)
23659 .kr(1)
23660 .sr(1)
23661 .m(4)
23662 .n(16)
23663 .k(k)
23664 .ks(3)
23665 .a_offset(23)
23666 .zero_index(mz)
23667 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23668 }
23669 }
23670 }
23671
TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST,qmin)23672 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, qmin) {
23673 TEST_REQUIRES_X86_AVX512F;
23674 GemmMicrokernelTester()
23675 .mr(4)
23676 .nr(16)
23677 .kr(1)
23678 .sr(1)
23679 .m(4)
23680 .n(16)
23681 .k(1)
23682 .qmin(128)
23683 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23684 }
23685
TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST,qmax)23686 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, qmax) {
23687 TEST_REQUIRES_X86_AVX512F;
23688 GemmMicrokernelTester()
23689 .mr(4)
23690 .nr(16)
23691 .kr(1)
23692 .sr(1)
23693 .m(4)
23694 .n(16)
23695 .k(1)
23696 .qmax(128)
23697 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23698 }
23699
TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST,strided_cm)23700 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, strided_cm) {
23701 TEST_REQUIRES_X86_AVX512F;
23702 GemmMicrokernelTester()
23703 .mr(4)
23704 .nr(16)
23705 .kr(1)
23706 .sr(1)
23707 .m(4)
23708 .n(16)
23709 .k(1)
23710 .cm_stride(19)
23711 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23712 }
23713 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23714
23715
23716 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST,k_eq_1)23717 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, k_eq_1) {
23718 TEST_REQUIRES_X86_AVX512F;
23719 GemmMicrokernelTester()
23720 .mr(5)
23721 .nr(16)
23722 .kr(1)
23723 .sr(1)
23724 .m(5)
23725 .n(16)
23726 .k(1)
23727 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23728 }
23729
TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST,strided_cn)23730 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, strided_cn) {
23731 TEST_REQUIRES_X86_AVX512F;
23732 GemmMicrokernelTester()
23733 .mr(5)
23734 .nr(16)
23735 .kr(1)
23736 .sr(1)
23737 .m(5)
23738 .n(16)
23739 .k(1)
23740 .cn_stride(19)
23741 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23742 }
23743
TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST,k_eq_1_subtile)23744 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, k_eq_1_subtile) {
23745 TEST_REQUIRES_X86_AVX512F;
23746 for (uint32_t n = 1; n <= 16; n++) {
23747 for (uint32_t m = 1; m <= 5; m++) {
23748 GemmMicrokernelTester()
23749 .mr(5)
23750 .nr(16)
23751 .kr(1)
23752 .sr(1)
23753 .m(m)
23754 .n(n)
23755 .k(1)
23756 .iterations(1)
23757 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23758 }
23759 }
23760 }
23761
TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST,k_eq_1_subtile_m)23762 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
23763 TEST_REQUIRES_X86_AVX512F;
23764 for (uint32_t m = 1; m <= 5; m++) {
23765 GemmMicrokernelTester()
23766 .mr(5)
23767 .nr(16)
23768 .kr(1)
23769 .sr(1)
23770 .m(m)
23771 .n(16)
23772 .k(1)
23773 .iterations(1)
23774 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23775 }
23776 }
23777
TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST,k_eq_1_subtile_n)23778 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
23779 TEST_REQUIRES_X86_AVX512F;
23780 for (uint32_t n = 1; n <= 16; n++) {
23781 GemmMicrokernelTester()
23782 .mr(5)
23783 .nr(16)
23784 .kr(1)
23785 .sr(1)
23786 .m(5)
23787 .n(n)
23788 .k(1)
23789 .iterations(1)
23790 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23791 }
23792 }
23793
TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST,k_gt_1)23794 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, k_gt_1) {
23795 TEST_REQUIRES_X86_AVX512F;
23796 for (size_t k = 2; k < 10; k++) {
23797 GemmMicrokernelTester()
23798 .mr(5)
23799 .nr(16)
23800 .kr(1)
23801 .sr(1)
23802 .m(5)
23803 .n(16)
23804 .k(k)
23805 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23806 }
23807 }
23808
TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST,k_gt_1_subtile)23809 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, k_gt_1_subtile) {
23810 TEST_REQUIRES_X86_AVX512F;
23811 for (size_t k = 2; k < 10; k++) {
23812 for (uint32_t n = 1; n <= 16; n++) {
23813 for (uint32_t m = 1; m <= 5; m++) {
23814 GemmMicrokernelTester()
23815 .mr(5)
23816 .nr(16)
23817 .kr(1)
23818 .sr(1)
23819 .m(m)
23820 .n(n)
23821 .k(k)
23822 .iterations(1)
23823 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23824 }
23825 }
23826 }
23827 }
23828
TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST,n_gt_16)23829 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, n_gt_16) {
23830 TEST_REQUIRES_X86_AVX512F;
23831 for (uint32_t n = 17; n < 32; n++) {
23832 for (size_t k = 1; k <= 5; k += 2) {
23833 GemmMicrokernelTester()
23834 .mr(5)
23835 .nr(16)
23836 .kr(1)
23837 .sr(1)
23838 .m(5)
23839 .n(n)
23840 .k(k)
23841 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23842 }
23843 }
23844 }
23845
TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST,n_gt_16_strided_cn)23846 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
23847 TEST_REQUIRES_X86_AVX512F;
23848 for (uint32_t n = 17; n < 32; n++) {
23849 for (size_t k = 1; k <= 5; k += 2) {
23850 GemmMicrokernelTester()
23851 .mr(5)
23852 .nr(16)
23853 .kr(1)
23854 .sr(1)
23855 .m(5)
23856 .n(n)
23857 .k(k)
23858 .cn_stride(19)
23859 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23860 }
23861 }
23862 }
23863
TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST,n_gt_16_subtile)23864 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, n_gt_16_subtile) {
23865 TEST_REQUIRES_X86_AVX512F;
23866 for (uint32_t n = 17; n < 32; n++) {
23867 for (size_t k = 1; k <= 5; k += 2) {
23868 for (uint32_t m = 1; m <= 5; m++) {
23869 GemmMicrokernelTester()
23870 .mr(5)
23871 .nr(16)
23872 .kr(1)
23873 .sr(1)
23874 .m(m)
23875 .n(n)
23876 .k(k)
23877 .iterations(1)
23878 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23879 }
23880 }
23881 }
23882 }
23883
TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST,n_div_16)23884 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, n_div_16) {
23885 TEST_REQUIRES_X86_AVX512F;
23886 for (uint32_t n = 32; n <= 48; n += 16) {
23887 for (size_t k = 1; k <= 5; k += 2) {
23888 GemmMicrokernelTester()
23889 .mr(5)
23890 .nr(16)
23891 .kr(1)
23892 .sr(1)
23893 .m(5)
23894 .n(n)
23895 .k(k)
23896 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23897 }
23898 }
23899 }
23900
TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST,n_div_16_strided_cn)23901 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
23902 TEST_REQUIRES_X86_AVX512F;
23903 for (uint32_t n = 32; n <= 48; n += 16) {
23904 for (size_t k = 1; k <= 5; k += 2) {
23905 GemmMicrokernelTester()
23906 .mr(5)
23907 .nr(16)
23908 .kr(1)
23909 .sr(1)
23910 .m(5)
23911 .n(n)
23912 .k(k)
23913 .cn_stride(19)
23914 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23915 }
23916 }
23917 }
23918
TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST,n_div_16_subtile)23919 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, n_div_16_subtile) {
23920 TEST_REQUIRES_X86_AVX512F;
23921 for (uint32_t n = 32; n <= 48; n += 16) {
23922 for (size_t k = 1; k <= 5; k += 2) {
23923 for (uint32_t m = 1; m <= 5; m++) {
23924 GemmMicrokernelTester()
23925 .mr(5)
23926 .nr(16)
23927 .kr(1)
23928 .sr(1)
23929 .m(m)
23930 .n(n)
23931 .k(k)
23932 .iterations(1)
23933 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23934 }
23935 }
23936 }
23937 }
23938
TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST,small_kernel)23939 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, small_kernel) {
23940 TEST_REQUIRES_X86_AVX512F;
23941 for (size_t k = 1; k <= 5; k += 2) {
23942 GemmMicrokernelTester()
23943 .mr(5)
23944 .nr(16)
23945 .kr(1)
23946 .sr(1)
23947 .m(5)
23948 .n(16)
23949 .k(k)
23950 .ks(3)
23951 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23952 }
23953 }
23954
TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST,small_kernel_subtile)23955 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, small_kernel_subtile) {
23956 TEST_REQUIRES_X86_AVX512F;
23957 for (size_t k = 1; k <= 5; k += 2) {
23958 for (uint32_t n = 1; n <= 16; n++) {
23959 for (uint32_t m = 1; m <= 5; m++) {
23960 GemmMicrokernelTester()
23961 .mr(5)
23962 .nr(16)
23963 .kr(1)
23964 .sr(1)
23965 .m(m)
23966 .n(n)
23967 .k(k)
23968 .ks(3)
23969 .iterations(1)
23970 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23971 }
23972 }
23973 }
23974 }
23975
TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST,n_gt_16_small_kernel)23976 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, n_gt_16_small_kernel) {
23977 TEST_REQUIRES_X86_AVX512F;
23978 for (uint32_t n = 17; n < 32; n++) {
23979 for (size_t k = 1; k <= 5; k += 2) {
23980 GemmMicrokernelTester()
23981 .mr(5)
23982 .nr(16)
23983 .kr(1)
23984 .sr(1)
23985 .m(5)
23986 .n(n)
23987 .k(k)
23988 .ks(3)
23989 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23990 }
23991 }
23992 }
23993
TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST,n_div_16_small_kernel)23994 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, n_div_16_small_kernel) {
23995 TEST_REQUIRES_X86_AVX512F;
23996 for (uint32_t n = 32; n <= 48; n += 16) {
23997 for (size_t k = 1; k <= 5; k += 2) {
23998 GemmMicrokernelTester()
23999 .mr(5)
24000 .nr(16)
24001 .kr(1)
24002 .sr(1)
24003 .m(5)
24004 .n(n)
24005 .k(k)
24006 .ks(3)
24007 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24008 }
24009 }
24010 }
24011
TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST,strided_cm_subtile)24012 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, strided_cm_subtile) {
24013 TEST_REQUIRES_X86_AVX512F;
24014 for (size_t k = 1; k <= 5; k += 2) {
24015 for (uint32_t n = 1; n <= 16; n++) {
24016 for (uint32_t m = 1; m <= 5; m++) {
24017 GemmMicrokernelTester()
24018 .mr(5)
24019 .nr(16)
24020 .kr(1)
24021 .sr(1)
24022 .m(m)
24023 .n(n)
24024 .k(k)
24025 .cm_stride(19)
24026 .iterations(1)
24027 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24028 }
24029 }
24030 }
24031 }
24032
TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST,a_offset)24033 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, a_offset) {
24034 TEST_REQUIRES_X86_AVX512F;
24035 for (size_t k = 1; k <= 5; k += 2) {
24036 GemmMicrokernelTester()
24037 .mr(5)
24038 .nr(16)
24039 .kr(1)
24040 .sr(1)
24041 .m(5)
24042 .n(16)
24043 .k(k)
24044 .ks(3)
24045 .a_offset(29)
24046 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24047 }
24048 }
24049
TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST,zero)24050 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, zero) {
24051 TEST_REQUIRES_X86_AVX512F;
24052 for (size_t k = 1; k <= 5; k += 2) {
24053 for (uint32_t mz = 0; mz < 5; mz++) {
24054 GemmMicrokernelTester()
24055 .mr(5)
24056 .nr(16)
24057 .kr(1)
24058 .sr(1)
24059 .m(5)
24060 .n(16)
24061 .k(k)
24062 .ks(3)
24063 .a_offset(29)
24064 .zero_index(mz)
24065 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24066 }
24067 }
24068 }
24069
TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST,qmin)24070 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, qmin) {
24071 TEST_REQUIRES_X86_AVX512F;
24072 GemmMicrokernelTester()
24073 .mr(5)
24074 .nr(16)
24075 .kr(1)
24076 .sr(1)
24077 .m(5)
24078 .n(16)
24079 .k(1)
24080 .qmin(128)
24081 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24082 }
24083
TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST,qmax)24084 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, qmax) {
24085 TEST_REQUIRES_X86_AVX512F;
24086 GemmMicrokernelTester()
24087 .mr(5)
24088 .nr(16)
24089 .kr(1)
24090 .sr(1)
24091 .m(5)
24092 .n(16)
24093 .k(1)
24094 .qmax(128)
24095 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24096 }
24097
TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST,strided_cm)24098 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, strided_cm) {
24099 TEST_REQUIRES_X86_AVX512F;
24100 GemmMicrokernelTester()
24101 .mr(5)
24102 .nr(16)
24103 .kr(1)
24104 .sr(1)
24105 .m(5)
24106 .n(16)
24107 .k(1)
24108 .cm_stride(19)
24109 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24110 }
24111 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24112
24113
24114 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1)24115 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1) {
24116 GemmMicrokernelTester()
24117 .mr(1)
24118 .nr(8)
24119 .kr(1)
24120 .sr(1)
24121 .m(1)
24122 .n(8)
24123 .k(1)
24124 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24125 }
24126
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,strided_cn)24127 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, strided_cn) {
24128 GemmMicrokernelTester()
24129 .mr(1)
24130 .nr(8)
24131 .kr(1)
24132 .sr(1)
24133 .m(1)
24134 .n(8)
24135 .k(1)
24136 .cn_stride(11)
24137 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24138 }
24139
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile)24140 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile) {
24141 for (uint32_t n = 1; n <= 8; n++) {
24142 for (uint32_t m = 1; m <= 1; m++) {
24143 GemmMicrokernelTester()
24144 .mr(1)
24145 .nr(8)
24146 .kr(1)
24147 .sr(1)
24148 .m(m)
24149 .n(n)
24150 .k(1)
24151 .iterations(1)
24152 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24153 }
24154 }
24155 }
24156
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile_m)24157 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_m) {
24158 for (uint32_t m = 1; m <= 1; m++) {
24159 GemmMicrokernelTester()
24160 .mr(1)
24161 .nr(8)
24162 .kr(1)
24163 .sr(1)
24164 .m(m)
24165 .n(8)
24166 .k(1)
24167 .iterations(1)
24168 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24169 }
24170 }
24171
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile_n)24172 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_n) {
24173 for (uint32_t n = 1; n <= 8; n++) {
24174 GemmMicrokernelTester()
24175 .mr(1)
24176 .nr(8)
24177 .kr(1)
24178 .sr(1)
24179 .m(1)
24180 .n(n)
24181 .k(1)
24182 .iterations(1)
24183 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24184 }
24185 }
24186
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1)24187 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1) {
24188 for (size_t k = 2; k < 10; k++) {
24189 GemmMicrokernelTester()
24190 .mr(1)
24191 .nr(8)
24192 .kr(1)
24193 .sr(1)
24194 .m(1)
24195 .n(8)
24196 .k(k)
24197 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24198 }
24199 }
24200
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1_subtile)24201 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_subtile) {
24202 for (size_t k = 2; k < 10; k++) {
24203 for (uint32_t n = 1; n <= 8; n++) {
24204 for (uint32_t m = 1; m <= 1; m++) {
24205 GemmMicrokernelTester()
24206 .mr(1)
24207 .nr(8)
24208 .kr(1)
24209 .sr(1)
24210 .m(m)
24211 .n(n)
24212 .k(k)
24213 .iterations(1)
24214 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24215 }
24216 }
24217 }
24218 }
24219
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8)24220 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8) {
24221 for (uint32_t n = 9; n < 16; n++) {
24222 for (size_t k = 1; k <= 5; k += 2) {
24223 GemmMicrokernelTester()
24224 .mr(1)
24225 .nr(8)
24226 .kr(1)
24227 .sr(1)
24228 .m(1)
24229 .n(n)
24230 .k(k)
24231 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24232 }
24233 }
24234 }
24235
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_strided_cn)24236 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_cn) {
24237 for (uint32_t n = 9; n < 16; n++) {
24238 for (size_t k = 1; k <= 5; k += 2) {
24239 GemmMicrokernelTester()
24240 .mr(1)
24241 .nr(8)
24242 .kr(1)
24243 .sr(1)
24244 .m(1)
24245 .n(n)
24246 .k(k)
24247 .cn_stride(11)
24248 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24249 }
24250 }
24251 }
24252
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_subtile)24253 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_subtile) {
24254 for (uint32_t n = 9; n < 16; n++) {
24255 for (size_t k = 1; k <= 5; k += 2) {
24256 for (uint32_t m = 1; m <= 1; m++) {
24257 GemmMicrokernelTester()
24258 .mr(1)
24259 .nr(8)
24260 .kr(1)
24261 .sr(1)
24262 .m(m)
24263 .n(n)
24264 .k(k)
24265 .iterations(1)
24266 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24267 }
24268 }
24269 }
24270 }
24271
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,n_div_8)24272 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_div_8) {
24273 for (uint32_t n = 16; n <= 24; n += 8) {
24274 for (size_t k = 1; k <= 5; k += 2) {
24275 GemmMicrokernelTester()
24276 .mr(1)
24277 .nr(8)
24278 .kr(1)
24279 .sr(1)
24280 .m(1)
24281 .n(n)
24282 .k(k)
24283 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24284 }
24285 }
24286 }
24287
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_strided_cn)24288 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_cn) {
24289 for (uint32_t n = 16; n <= 24; n += 8) {
24290 for (size_t k = 1; k <= 5; k += 2) {
24291 GemmMicrokernelTester()
24292 .mr(1)
24293 .nr(8)
24294 .kr(1)
24295 .sr(1)
24296 .m(1)
24297 .n(n)
24298 .k(k)
24299 .cn_stride(11)
24300 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24301 }
24302 }
24303 }
24304
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_subtile)24305 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_subtile) {
24306 for (uint32_t n = 16; n <= 24; n += 8) {
24307 for (size_t k = 1; k <= 5; k += 2) {
24308 for (uint32_t m = 1; m <= 1; m++) {
24309 GemmMicrokernelTester()
24310 .mr(1)
24311 .nr(8)
24312 .kr(1)
24313 .sr(1)
24314 .m(m)
24315 .n(n)
24316 .k(k)
24317 .iterations(1)
24318 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24319 }
24320 }
24321 }
24322 }
24323
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,small_kernel)24324 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, small_kernel) {
24325 for (size_t k = 1; k <= 5; k += 2) {
24326 GemmMicrokernelTester()
24327 .mr(1)
24328 .nr(8)
24329 .kr(1)
24330 .sr(1)
24331 .m(1)
24332 .n(8)
24333 .k(k)
24334 .ks(3)
24335 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24336 }
24337 }
24338
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,small_kernel_subtile)24339 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, small_kernel_subtile) {
24340 for (size_t k = 1; k <= 5; k += 2) {
24341 for (uint32_t n = 1; n <= 8; n++) {
24342 for (uint32_t m = 1; m <= 1; m++) {
24343 GemmMicrokernelTester()
24344 .mr(1)
24345 .nr(8)
24346 .kr(1)
24347 .sr(1)
24348 .m(m)
24349 .n(n)
24350 .k(k)
24351 .ks(3)
24352 .iterations(1)
24353 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24354 }
24355 }
24356 }
24357 }
24358
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_small_kernel)24359 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_small_kernel) {
24360 for (uint32_t n = 9; n < 16; n++) {
24361 for (size_t k = 1; k <= 5; k += 2) {
24362 GemmMicrokernelTester()
24363 .mr(1)
24364 .nr(8)
24365 .kr(1)
24366 .sr(1)
24367 .m(1)
24368 .n(n)
24369 .k(k)
24370 .ks(3)
24371 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24372 }
24373 }
24374 }
24375
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_small_kernel)24376 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_small_kernel) {
24377 for (uint32_t n = 16; n <= 24; n += 8) {
24378 for (size_t k = 1; k <= 5; k += 2) {
24379 GemmMicrokernelTester()
24380 .mr(1)
24381 .nr(8)
24382 .kr(1)
24383 .sr(1)
24384 .m(1)
24385 .n(n)
24386 .k(k)
24387 .ks(3)
24388 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24389 }
24390 }
24391 }
24392
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,strided_cm_subtile)24393 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, strided_cm_subtile) {
24394 for (size_t k = 1; k <= 5; k += 2) {
24395 for (uint32_t n = 1; n <= 8; n++) {
24396 for (uint32_t m = 1; m <= 1; m++) {
24397 GemmMicrokernelTester()
24398 .mr(1)
24399 .nr(8)
24400 .kr(1)
24401 .sr(1)
24402 .m(m)
24403 .n(n)
24404 .k(k)
24405 .cm_stride(11)
24406 .iterations(1)
24407 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24408 }
24409 }
24410 }
24411 }
24412
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,a_offset)24413 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, a_offset) {
24414 for (size_t k = 1; k <= 5; k += 2) {
24415 GemmMicrokernelTester()
24416 .mr(1)
24417 .nr(8)
24418 .kr(1)
24419 .sr(1)
24420 .m(1)
24421 .n(8)
24422 .k(k)
24423 .ks(3)
24424 .a_offset(7)
24425 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24426 }
24427 }
24428
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,zero)24429 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, zero) {
24430 for (size_t k = 1; k <= 5; k += 2) {
24431 for (uint32_t mz = 0; mz < 1; mz++) {
24432 GemmMicrokernelTester()
24433 .mr(1)
24434 .nr(8)
24435 .kr(1)
24436 .sr(1)
24437 .m(1)
24438 .n(8)
24439 .k(k)
24440 .ks(3)
24441 .a_offset(7)
24442 .zero_index(mz)
24443 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24444 }
24445 }
24446 }
24447
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,qmin)24448 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, qmin) {
24449 GemmMicrokernelTester()
24450 .mr(1)
24451 .nr(8)
24452 .kr(1)
24453 .sr(1)
24454 .m(1)
24455 .n(8)
24456 .k(1)
24457 .qmin(128)
24458 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24459 }
24460
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,qmax)24461 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, qmax) {
24462 GemmMicrokernelTester()
24463 .mr(1)
24464 .nr(8)
24465 .kr(1)
24466 .sr(1)
24467 .m(1)
24468 .n(8)
24469 .k(1)
24470 .qmax(128)
24471 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24472 }
24473
TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT,strided_cm)24474 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, strided_cm) {
24475 GemmMicrokernelTester()
24476 .mr(1)
24477 .nr(8)
24478 .kr(1)
24479 .sr(1)
24480 .m(1)
24481 .n(8)
24482 .k(1)
24483 .cm_stride(11)
24484 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
24485 }
24486 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24487
24488
24489 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,k_eq_4)24490 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, k_eq_4) {
24491 GemmMicrokernelTester()
24492 .mr(1)
24493 .nr(8)
24494 .kr(1)
24495 .sr(4)
24496 .m(1)
24497 .n(8)
24498 .k(4)
24499 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24500 }
24501
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,strided_cn)24502 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, strided_cn) {
24503 GemmMicrokernelTester()
24504 .mr(1)
24505 .nr(8)
24506 .kr(1)
24507 .sr(4)
24508 .m(1)
24509 .n(8)
24510 .k(4)
24511 .cn_stride(11)
24512 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24513 }
24514
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,k_eq_4_subtile)24515 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, k_eq_4_subtile) {
24516 for (uint32_t n = 1; n <= 8; n++) {
24517 for (uint32_t m = 1; m <= 1; m++) {
24518 GemmMicrokernelTester()
24519 .mr(1)
24520 .nr(8)
24521 .kr(1)
24522 .sr(4)
24523 .m(m)
24524 .n(n)
24525 .k(4)
24526 .iterations(1)
24527 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24528 }
24529 }
24530 }
24531
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,k_eq_4_subtile_m)24532 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, k_eq_4_subtile_m) {
24533 for (uint32_t m = 1; m <= 1; m++) {
24534 GemmMicrokernelTester()
24535 .mr(1)
24536 .nr(8)
24537 .kr(1)
24538 .sr(4)
24539 .m(m)
24540 .n(8)
24541 .k(4)
24542 .iterations(1)
24543 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24544 }
24545 }
24546
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,k_eq_4_subtile_n)24547 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, k_eq_4_subtile_n) {
24548 for (uint32_t n = 1; n <= 8; n++) {
24549 GemmMicrokernelTester()
24550 .mr(1)
24551 .nr(8)
24552 .kr(1)
24553 .sr(4)
24554 .m(1)
24555 .n(n)
24556 .k(4)
24557 .iterations(1)
24558 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24559 }
24560 }
24561
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,k_lt_4)24562 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, k_lt_4) {
24563 for (size_t k = 1; k < 4; k++) {
24564 GemmMicrokernelTester()
24565 .mr(1)
24566 .nr(8)
24567 .kr(1)
24568 .sr(4)
24569 .m(1)
24570 .n(8)
24571 .k(k)
24572 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24573 }
24574 }
24575
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,k_lt_4_subtile)24576 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, k_lt_4_subtile) {
24577 for (size_t k = 1; k < 4; k++) {
24578 for (uint32_t n = 1; n <= 8; n++) {
24579 for (uint32_t m = 1; m <= 1; m++) {
24580 GemmMicrokernelTester()
24581 .mr(1)
24582 .nr(8)
24583 .kr(1)
24584 .sr(4)
24585 .m(m)
24586 .n(n)
24587 .k(k)
24588 .iterations(1)
24589 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24590 }
24591 }
24592 }
24593 }
24594
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,k_gt_4)24595 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, k_gt_4) {
24596 for (size_t k = 5; k < 8; k++) {
24597 GemmMicrokernelTester()
24598 .mr(1)
24599 .nr(8)
24600 .kr(1)
24601 .sr(4)
24602 .m(1)
24603 .n(8)
24604 .k(k)
24605 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24606 }
24607 }
24608
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,k_gt_4_subtile)24609 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, k_gt_4_subtile) {
24610 for (size_t k = 5; k < 8; k++) {
24611 for (uint32_t n = 1; n <= 8; n++) {
24612 for (uint32_t m = 1; m <= 1; m++) {
24613 GemmMicrokernelTester()
24614 .mr(1)
24615 .nr(8)
24616 .kr(1)
24617 .sr(4)
24618 .m(m)
24619 .n(n)
24620 .k(k)
24621 .iterations(1)
24622 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24623 }
24624 }
24625 }
24626 }
24627
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,k_div_4)24628 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, k_div_4) {
24629 for (size_t k = 8; k <= 40; k += 4) {
24630 GemmMicrokernelTester()
24631 .mr(1)
24632 .nr(8)
24633 .kr(1)
24634 .sr(4)
24635 .m(1)
24636 .n(8)
24637 .k(k)
24638 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24639 }
24640 }
24641
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,k_div_4_subtile)24642 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, k_div_4_subtile) {
24643 for (size_t k = 8; k <= 40; k += 4) {
24644 for (uint32_t n = 1; n <= 8; n++) {
24645 for (uint32_t m = 1; m <= 1; m++) {
24646 GemmMicrokernelTester()
24647 .mr(1)
24648 .nr(8)
24649 .kr(1)
24650 .sr(4)
24651 .m(m)
24652 .n(n)
24653 .k(k)
24654 .iterations(1)
24655 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24656 }
24657 }
24658 }
24659 }
24660
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,n_gt_8)24661 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, n_gt_8) {
24662 for (uint32_t n = 9; n < 16; n++) {
24663 for (size_t k = 1; k <= 20; k += 5) {
24664 GemmMicrokernelTester()
24665 .mr(1)
24666 .nr(8)
24667 .kr(1)
24668 .sr(4)
24669 .m(1)
24670 .n(n)
24671 .k(k)
24672 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24673 }
24674 }
24675 }
24676
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,n_gt_8_strided_cn)24677 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, n_gt_8_strided_cn) {
24678 for (uint32_t n = 9; n < 16; n++) {
24679 for (size_t k = 1; k <= 20; k += 5) {
24680 GemmMicrokernelTester()
24681 .mr(1)
24682 .nr(8)
24683 .kr(1)
24684 .sr(4)
24685 .m(1)
24686 .n(n)
24687 .k(k)
24688 .cn_stride(11)
24689 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24690 }
24691 }
24692 }
24693
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,n_gt_8_subtile)24694 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, n_gt_8_subtile) {
24695 for (uint32_t n = 9; n < 16; n++) {
24696 for (size_t k = 1; k <= 20; k += 5) {
24697 for (uint32_t m = 1; m <= 1; m++) {
24698 GemmMicrokernelTester()
24699 .mr(1)
24700 .nr(8)
24701 .kr(1)
24702 .sr(4)
24703 .m(m)
24704 .n(n)
24705 .k(k)
24706 .iterations(1)
24707 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24708 }
24709 }
24710 }
24711 }
24712
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,n_div_8)24713 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, n_div_8) {
24714 for (uint32_t n = 16; n <= 24; n += 8) {
24715 for (size_t k = 1; k <= 20; k += 5) {
24716 GemmMicrokernelTester()
24717 .mr(1)
24718 .nr(8)
24719 .kr(1)
24720 .sr(4)
24721 .m(1)
24722 .n(n)
24723 .k(k)
24724 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24725 }
24726 }
24727 }
24728
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,n_div_8_strided_cn)24729 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, n_div_8_strided_cn) {
24730 for (uint32_t n = 16; n <= 24; n += 8) {
24731 for (size_t k = 1; k <= 20; k += 5) {
24732 GemmMicrokernelTester()
24733 .mr(1)
24734 .nr(8)
24735 .kr(1)
24736 .sr(4)
24737 .m(1)
24738 .n(n)
24739 .k(k)
24740 .cn_stride(11)
24741 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24742 }
24743 }
24744 }
24745
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,n_div_8_subtile)24746 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, n_div_8_subtile) {
24747 for (uint32_t n = 16; n <= 24; n += 8) {
24748 for (size_t k = 1; k <= 20; k += 5) {
24749 for (uint32_t m = 1; m <= 1; m++) {
24750 GemmMicrokernelTester()
24751 .mr(1)
24752 .nr(8)
24753 .kr(1)
24754 .sr(4)
24755 .m(m)
24756 .n(n)
24757 .k(k)
24758 .iterations(1)
24759 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24760 }
24761 }
24762 }
24763 }
24764
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,small_kernel)24765 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, small_kernel) {
24766 for (size_t k = 1; k <= 20; k += 5) {
24767 GemmMicrokernelTester()
24768 .mr(1)
24769 .nr(8)
24770 .kr(1)
24771 .sr(4)
24772 .m(1)
24773 .n(8)
24774 .k(k)
24775 .ks(3)
24776 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24777 }
24778 }
24779
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,small_kernel_subtile)24780 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, small_kernel_subtile) {
24781 for (size_t k = 1; k <= 20; k += 5) {
24782 for (uint32_t n = 1; n <= 8; n++) {
24783 for (uint32_t m = 1; m <= 1; m++) {
24784 GemmMicrokernelTester()
24785 .mr(1)
24786 .nr(8)
24787 .kr(1)
24788 .sr(4)
24789 .m(m)
24790 .n(n)
24791 .k(k)
24792 .ks(3)
24793 .iterations(1)
24794 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24795 }
24796 }
24797 }
24798 }
24799
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,n_gt_8_small_kernel)24800 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, n_gt_8_small_kernel) {
24801 for (uint32_t n = 9; n < 16; n++) {
24802 for (size_t k = 1; k <= 20; k += 5) {
24803 GemmMicrokernelTester()
24804 .mr(1)
24805 .nr(8)
24806 .kr(1)
24807 .sr(4)
24808 .m(1)
24809 .n(n)
24810 .k(k)
24811 .ks(3)
24812 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24813 }
24814 }
24815 }
24816
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,n_div_8_small_kernel)24817 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, n_div_8_small_kernel) {
24818 for (uint32_t n = 16; n <= 24; n += 8) {
24819 for (size_t k = 1; k <= 20; k += 5) {
24820 GemmMicrokernelTester()
24821 .mr(1)
24822 .nr(8)
24823 .kr(1)
24824 .sr(4)
24825 .m(1)
24826 .n(n)
24827 .k(k)
24828 .ks(3)
24829 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24830 }
24831 }
24832 }
24833
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,strided_cm_subtile)24834 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, strided_cm_subtile) {
24835 for (size_t k = 1; k <= 20; k += 5) {
24836 for (uint32_t n = 1; n <= 8; n++) {
24837 for (uint32_t m = 1; m <= 1; m++) {
24838 GemmMicrokernelTester()
24839 .mr(1)
24840 .nr(8)
24841 .kr(1)
24842 .sr(4)
24843 .m(m)
24844 .n(n)
24845 .k(k)
24846 .cm_stride(11)
24847 .iterations(1)
24848 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24849 }
24850 }
24851 }
24852 }
24853
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,a_offset)24854 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, a_offset) {
24855 for (size_t k = 1; k <= 20; k += 5) {
24856 GemmMicrokernelTester()
24857 .mr(1)
24858 .nr(8)
24859 .kr(1)
24860 .sr(4)
24861 .m(1)
24862 .n(8)
24863 .k(k)
24864 .ks(3)
24865 .a_offset(23)
24866 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24867 }
24868 }
24869
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,zero)24870 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, zero) {
24871 for (size_t k = 1; k <= 20; k += 5) {
24872 for (uint32_t mz = 0; mz < 1; mz++) {
24873 GemmMicrokernelTester()
24874 .mr(1)
24875 .nr(8)
24876 .kr(1)
24877 .sr(4)
24878 .m(1)
24879 .n(8)
24880 .k(k)
24881 .ks(3)
24882 .a_offset(23)
24883 .zero_index(mz)
24884 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24885 }
24886 }
24887 }
24888
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,qmin)24889 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, qmin) {
24890 GemmMicrokernelTester()
24891 .mr(1)
24892 .nr(8)
24893 .kr(1)
24894 .sr(4)
24895 .m(1)
24896 .n(8)
24897 .k(4)
24898 .qmin(128)
24899 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24900 }
24901
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,qmax)24902 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, qmax) {
24903 GemmMicrokernelTester()
24904 .mr(1)
24905 .nr(8)
24906 .kr(1)
24907 .sr(4)
24908 .m(1)
24909 .n(8)
24910 .k(4)
24911 .qmax(128)
24912 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24913 }
24914
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM,strided_cm)24915 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, strided_cm) {
24916 GemmMicrokernelTester()
24917 .mr(1)
24918 .nr(8)
24919 .kr(1)
24920 .sr(4)
24921 .m(1)
24922 .n(8)
24923 .k(4)
24924 .cm_stride(11)
24925 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
24926 }
24927 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24928
24929
24930 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,k_eq_4)24931 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, k_eq_4) {
24932 GemmMicrokernelTester()
24933 .mr(1)
24934 .nr(8)
24935 .kr(1)
24936 .sr(4)
24937 .m(1)
24938 .n(8)
24939 .k(4)
24940 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24941 }
24942
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,strided_cn)24943 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, strided_cn) {
24944 GemmMicrokernelTester()
24945 .mr(1)
24946 .nr(8)
24947 .kr(1)
24948 .sr(4)
24949 .m(1)
24950 .n(8)
24951 .k(4)
24952 .cn_stride(11)
24953 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24954 }
24955
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,k_eq_4_subtile)24956 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, k_eq_4_subtile) {
24957 for (uint32_t n = 1; n <= 8; n++) {
24958 for (uint32_t m = 1; m <= 1; m++) {
24959 GemmMicrokernelTester()
24960 .mr(1)
24961 .nr(8)
24962 .kr(1)
24963 .sr(4)
24964 .m(m)
24965 .n(n)
24966 .k(4)
24967 .iterations(1)
24968 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24969 }
24970 }
24971 }
24972
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,k_eq_4_subtile_m)24973 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, k_eq_4_subtile_m) {
24974 for (uint32_t m = 1; m <= 1; m++) {
24975 GemmMicrokernelTester()
24976 .mr(1)
24977 .nr(8)
24978 .kr(1)
24979 .sr(4)
24980 .m(m)
24981 .n(8)
24982 .k(4)
24983 .iterations(1)
24984 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
24985 }
24986 }
24987
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,k_eq_4_subtile_n)24988 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, k_eq_4_subtile_n) {
24989 for (uint32_t n = 1; n <= 8; n++) {
24990 GemmMicrokernelTester()
24991 .mr(1)
24992 .nr(8)
24993 .kr(1)
24994 .sr(4)
24995 .m(1)
24996 .n(n)
24997 .k(4)
24998 .iterations(1)
24999 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25000 }
25001 }
25002
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,k_lt_4)25003 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, k_lt_4) {
25004 for (size_t k = 1; k < 4; k++) {
25005 GemmMicrokernelTester()
25006 .mr(1)
25007 .nr(8)
25008 .kr(1)
25009 .sr(4)
25010 .m(1)
25011 .n(8)
25012 .k(k)
25013 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25014 }
25015 }
25016
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,k_lt_4_subtile)25017 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, k_lt_4_subtile) {
25018 for (size_t k = 1; k < 4; k++) {
25019 for (uint32_t n = 1; n <= 8; n++) {
25020 for (uint32_t m = 1; m <= 1; m++) {
25021 GemmMicrokernelTester()
25022 .mr(1)
25023 .nr(8)
25024 .kr(1)
25025 .sr(4)
25026 .m(m)
25027 .n(n)
25028 .k(k)
25029 .iterations(1)
25030 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25031 }
25032 }
25033 }
25034 }
25035
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,k_gt_4)25036 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, k_gt_4) {
25037 for (size_t k = 5; k < 8; k++) {
25038 GemmMicrokernelTester()
25039 .mr(1)
25040 .nr(8)
25041 .kr(1)
25042 .sr(4)
25043 .m(1)
25044 .n(8)
25045 .k(k)
25046 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25047 }
25048 }
25049
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,k_gt_4_subtile)25050 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, k_gt_4_subtile) {
25051 for (size_t k = 5; k < 8; k++) {
25052 for (uint32_t n = 1; n <= 8; n++) {
25053 for (uint32_t m = 1; m <= 1; m++) {
25054 GemmMicrokernelTester()
25055 .mr(1)
25056 .nr(8)
25057 .kr(1)
25058 .sr(4)
25059 .m(m)
25060 .n(n)
25061 .k(k)
25062 .iterations(1)
25063 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25064 }
25065 }
25066 }
25067 }
25068
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,k_div_4)25069 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, k_div_4) {
25070 for (size_t k = 8; k <= 40; k += 4) {
25071 GemmMicrokernelTester()
25072 .mr(1)
25073 .nr(8)
25074 .kr(1)
25075 .sr(4)
25076 .m(1)
25077 .n(8)
25078 .k(k)
25079 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25080 }
25081 }
25082
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,k_div_4_subtile)25083 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, k_div_4_subtile) {
25084 for (size_t k = 8; k <= 40; k += 4) {
25085 for (uint32_t n = 1; n <= 8; n++) {
25086 for (uint32_t m = 1; m <= 1; m++) {
25087 GemmMicrokernelTester()
25088 .mr(1)
25089 .nr(8)
25090 .kr(1)
25091 .sr(4)
25092 .m(m)
25093 .n(n)
25094 .k(k)
25095 .iterations(1)
25096 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25097 }
25098 }
25099 }
25100 }
25101
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,n_gt_8)25102 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, n_gt_8) {
25103 for (uint32_t n = 9; n < 16; n++) {
25104 for (size_t k = 1; k <= 20; k += 5) {
25105 GemmMicrokernelTester()
25106 .mr(1)
25107 .nr(8)
25108 .kr(1)
25109 .sr(4)
25110 .m(1)
25111 .n(n)
25112 .k(k)
25113 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25114 }
25115 }
25116 }
25117
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,n_gt_8_strided_cn)25118 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, n_gt_8_strided_cn) {
25119 for (uint32_t n = 9; n < 16; n++) {
25120 for (size_t k = 1; k <= 20; k += 5) {
25121 GemmMicrokernelTester()
25122 .mr(1)
25123 .nr(8)
25124 .kr(1)
25125 .sr(4)
25126 .m(1)
25127 .n(n)
25128 .k(k)
25129 .cn_stride(11)
25130 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25131 }
25132 }
25133 }
25134
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,n_gt_8_subtile)25135 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, n_gt_8_subtile) {
25136 for (uint32_t n = 9; n < 16; n++) {
25137 for (size_t k = 1; k <= 20; k += 5) {
25138 for (uint32_t m = 1; m <= 1; m++) {
25139 GemmMicrokernelTester()
25140 .mr(1)
25141 .nr(8)
25142 .kr(1)
25143 .sr(4)
25144 .m(m)
25145 .n(n)
25146 .k(k)
25147 .iterations(1)
25148 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25149 }
25150 }
25151 }
25152 }
25153
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,n_div_8)25154 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, n_div_8) {
25155 for (uint32_t n = 16; n <= 24; n += 8) {
25156 for (size_t k = 1; k <= 20; k += 5) {
25157 GemmMicrokernelTester()
25158 .mr(1)
25159 .nr(8)
25160 .kr(1)
25161 .sr(4)
25162 .m(1)
25163 .n(n)
25164 .k(k)
25165 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25166 }
25167 }
25168 }
25169
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,n_div_8_strided_cn)25170 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, n_div_8_strided_cn) {
25171 for (uint32_t n = 16; n <= 24; n += 8) {
25172 for (size_t k = 1; k <= 20; k += 5) {
25173 GemmMicrokernelTester()
25174 .mr(1)
25175 .nr(8)
25176 .kr(1)
25177 .sr(4)
25178 .m(1)
25179 .n(n)
25180 .k(k)
25181 .cn_stride(11)
25182 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25183 }
25184 }
25185 }
25186
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,n_div_8_subtile)25187 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, n_div_8_subtile) {
25188 for (uint32_t n = 16; n <= 24; n += 8) {
25189 for (size_t k = 1; k <= 20; k += 5) {
25190 for (uint32_t m = 1; m <= 1; m++) {
25191 GemmMicrokernelTester()
25192 .mr(1)
25193 .nr(8)
25194 .kr(1)
25195 .sr(4)
25196 .m(m)
25197 .n(n)
25198 .k(k)
25199 .iterations(1)
25200 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25201 }
25202 }
25203 }
25204 }
25205
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,small_kernel)25206 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, small_kernel) {
25207 for (size_t k = 1; k <= 20; k += 5) {
25208 GemmMicrokernelTester()
25209 .mr(1)
25210 .nr(8)
25211 .kr(1)
25212 .sr(4)
25213 .m(1)
25214 .n(8)
25215 .k(k)
25216 .ks(3)
25217 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25218 }
25219 }
25220
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,small_kernel_subtile)25221 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, small_kernel_subtile) {
25222 for (size_t k = 1; k <= 20; k += 5) {
25223 for (uint32_t n = 1; n <= 8; n++) {
25224 for (uint32_t m = 1; m <= 1; m++) {
25225 GemmMicrokernelTester()
25226 .mr(1)
25227 .nr(8)
25228 .kr(1)
25229 .sr(4)
25230 .m(m)
25231 .n(n)
25232 .k(k)
25233 .ks(3)
25234 .iterations(1)
25235 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25236 }
25237 }
25238 }
25239 }
25240
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,n_gt_8_small_kernel)25241 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, n_gt_8_small_kernel) {
25242 for (uint32_t n = 9; n < 16; n++) {
25243 for (size_t k = 1; k <= 20; k += 5) {
25244 GemmMicrokernelTester()
25245 .mr(1)
25246 .nr(8)
25247 .kr(1)
25248 .sr(4)
25249 .m(1)
25250 .n(n)
25251 .k(k)
25252 .ks(3)
25253 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25254 }
25255 }
25256 }
25257
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,n_div_8_small_kernel)25258 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, n_div_8_small_kernel) {
25259 for (uint32_t n = 16; n <= 24; n += 8) {
25260 for (size_t k = 1; k <= 20; k += 5) {
25261 GemmMicrokernelTester()
25262 .mr(1)
25263 .nr(8)
25264 .kr(1)
25265 .sr(4)
25266 .m(1)
25267 .n(n)
25268 .k(k)
25269 .ks(3)
25270 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25271 }
25272 }
25273 }
25274
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,strided_cm_subtile)25275 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, strided_cm_subtile) {
25276 for (size_t k = 1; k <= 20; k += 5) {
25277 for (uint32_t n = 1; n <= 8; n++) {
25278 for (uint32_t m = 1; m <= 1; m++) {
25279 GemmMicrokernelTester()
25280 .mr(1)
25281 .nr(8)
25282 .kr(1)
25283 .sr(4)
25284 .m(m)
25285 .n(n)
25286 .k(k)
25287 .cm_stride(11)
25288 .iterations(1)
25289 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25290 }
25291 }
25292 }
25293 }
25294
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,a_offset)25295 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, a_offset) {
25296 for (size_t k = 1; k <= 20; k += 5) {
25297 GemmMicrokernelTester()
25298 .mr(1)
25299 .nr(8)
25300 .kr(1)
25301 .sr(4)
25302 .m(1)
25303 .n(8)
25304 .k(k)
25305 .ks(3)
25306 .a_offset(23)
25307 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25308 }
25309 }
25310
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,zero)25311 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, zero) {
25312 for (size_t k = 1; k <= 20; k += 5) {
25313 for (uint32_t mz = 0; mz < 1; mz++) {
25314 GemmMicrokernelTester()
25315 .mr(1)
25316 .nr(8)
25317 .kr(1)
25318 .sr(4)
25319 .m(1)
25320 .n(8)
25321 .k(k)
25322 .ks(3)
25323 .a_offset(23)
25324 .zero_index(mz)
25325 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25326 }
25327 }
25328 }
25329
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,qmin)25330 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, qmin) {
25331 GemmMicrokernelTester()
25332 .mr(1)
25333 .nr(8)
25334 .kr(1)
25335 .sr(4)
25336 .m(1)
25337 .n(8)
25338 .k(4)
25339 .qmin(128)
25340 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25341 }
25342
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,qmax)25343 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, qmax) {
25344 GemmMicrokernelTester()
25345 .mr(1)
25346 .nr(8)
25347 .kr(1)
25348 .sr(4)
25349 .m(1)
25350 .n(8)
25351 .k(4)
25352 .qmax(128)
25353 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25354 }
25355
TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86,strided_cm)25356 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, strided_cm) {
25357 GemmMicrokernelTester()
25358 .mr(1)
25359 .nr(8)
25360 .kr(1)
25361 .sr(4)
25362 .m(1)
25363 .n(8)
25364 .k(4)
25365 .cm_stride(11)
25366 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
25367 }
25368 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25369
25370
25371 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1)25372 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1) {
25373 GemmMicrokernelTester()
25374 .mr(3)
25375 .nr(8)
25376 .kr(1)
25377 .sr(1)
25378 .m(3)
25379 .n(8)
25380 .k(1)
25381 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25382 }
25383
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,strided_cn)25384 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, strided_cn) {
25385 GemmMicrokernelTester()
25386 .mr(3)
25387 .nr(8)
25388 .kr(1)
25389 .sr(1)
25390 .m(3)
25391 .n(8)
25392 .k(1)
25393 .cn_stride(11)
25394 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25395 }
25396
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile)25397 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile) {
25398 for (uint32_t n = 1; n <= 8; n++) {
25399 for (uint32_t m = 1; m <= 3; m++) {
25400 GemmMicrokernelTester()
25401 .mr(3)
25402 .nr(8)
25403 .kr(1)
25404 .sr(1)
25405 .m(m)
25406 .n(n)
25407 .k(1)
25408 .iterations(1)
25409 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25410 }
25411 }
25412 }
25413
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile_m)25414 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_m) {
25415 for (uint32_t m = 1; m <= 3; m++) {
25416 GemmMicrokernelTester()
25417 .mr(3)
25418 .nr(8)
25419 .kr(1)
25420 .sr(1)
25421 .m(m)
25422 .n(8)
25423 .k(1)
25424 .iterations(1)
25425 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25426 }
25427 }
25428
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile_n)25429 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_n) {
25430 for (uint32_t n = 1; n <= 8; n++) {
25431 GemmMicrokernelTester()
25432 .mr(3)
25433 .nr(8)
25434 .kr(1)
25435 .sr(1)
25436 .m(3)
25437 .n(n)
25438 .k(1)
25439 .iterations(1)
25440 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25441 }
25442 }
25443
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1)25444 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1) {
25445 for (size_t k = 2; k < 10; k++) {
25446 GemmMicrokernelTester()
25447 .mr(3)
25448 .nr(8)
25449 .kr(1)
25450 .sr(1)
25451 .m(3)
25452 .n(8)
25453 .k(k)
25454 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25455 }
25456 }
25457
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1_subtile)25458 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_subtile) {
25459 for (size_t k = 2; k < 10; k++) {
25460 for (uint32_t n = 1; n <= 8; n++) {
25461 for (uint32_t m = 1; m <= 3; m++) {
25462 GemmMicrokernelTester()
25463 .mr(3)
25464 .nr(8)
25465 .kr(1)
25466 .sr(1)
25467 .m(m)
25468 .n(n)
25469 .k(k)
25470 .iterations(1)
25471 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25472 }
25473 }
25474 }
25475 }
25476
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8)25477 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8) {
25478 for (uint32_t n = 9; n < 16; n++) {
25479 for (size_t k = 1; k <= 5; k += 2) {
25480 GemmMicrokernelTester()
25481 .mr(3)
25482 .nr(8)
25483 .kr(1)
25484 .sr(1)
25485 .m(3)
25486 .n(n)
25487 .k(k)
25488 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25489 }
25490 }
25491 }
25492
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_strided_cn)25493 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_cn) {
25494 for (uint32_t n = 9; n < 16; n++) {
25495 for (size_t k = 1; k <= 5; k += 2) {
25496 GemmMicrokernelTester()
25497 .mr(3)
25498 .nr(8)
25499 .kr(1)
25500 .sr(1)
25501 .m(3)
25502 .n(n)
25503 .k(k)
25504 .cn_stride(11)
25505 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25506 }
25507 }
25508 }
25509
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_subtile)25510 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_subtile) {
25511 for (uint32_t n = 9; n < 16; n++) {
25512 for (size_t k = 1; k <= 5; k += 2) {
25513 for (uint32_t m = 1; m <= 3; m++) {
25514 GemmMicrokernelTester()
25515 .mr(3)
25516 .nr(8)
25517 .kr(1)
25518 .sr(1)
25519 .m(m)
25520 .n(n)
25521 .k(k)
25522 .iterations(1)
25523 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25524 }
25525 }
25526 }
25527 }
25528
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,n_div_8)25529 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_div_8) {
25530 for (uint32_t n = 16; n <= 24; n += 8) {
25531 for (size_t k = 1; k <= 5; k += 2) {
25532 GemmMicrokernelTester()
25533 .mr(3)
25534 .nr(8)
25535 .kr(1)
25536 .sr(1)
25537 .m(3)
25538 .n(n)
25539 .k(k)
25540 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25541 }
25542 }
25543 }
25544
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_strided_cn)25545 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_cn) {
25546 for (uint32_t n = 16; n <= 24; n += 8) {
25547 for (size_t k = 1; k <= 5; k += 2) {
25548 GemmMicrokernelTester()
25549 .mr(3)
25550 .nr(8)
25551 .kr(1)
25552 .sr(1)
25553 .m(3)
25554 .n(n)
25555 .k(k)
25556 .cn_stride(11)
25557 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25558 }
25559 }
25560 }
25561
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_subtile)25562 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_subtile) {
25563 for (uint32_t n = 16; n <= 24; n += 8) {
25564 for (size_t k = 1; k <= 5; k += 2) {
25565 for (uint32_t m = 1; m <= 3; m++) {
25566 GemmMicrokernelTester()
25567 .mr(3)
25568 .nr(8)
25569 .kr(1)
25570 .sr(1)
25571 .m(m)
25572 .n(n)
25573 .k(k)
25574 .iterations(1)
25575 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25576 }
25577 }
25578 }
25579 }
25580
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,small_kernel)25581 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, small_kernel) {
25582 for (size_t k = 1; k <= 5; k += 2) {
25583 GemmMicrokernelTester()
25584 .mr(3)
25585 .nr(8)
25586 .kr(1)
25587 .sr(1)
25588 .m(3)
25589 .n(8)
25590 .k(k)
25591 .ks(3)
25592 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25593 }
25594 }
25595
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,small_kernel_subtile)25596 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, small_kernel_subtile) {
25597 for (size_t k = 1; k <= 5; k += 2) {
25598 for (uint32_t n = 1; n <= 8; n++) {
25599 for (uint32_t m = 1; m <= 3; m++) {
25600 GemmMicrokernelTester()
25601 .mr(3)
25602 .nr(8)
25603 .kr(1)
25604 .sr(1)
25605 .m(m)
25606 .n(n)
25607 .k(k)
25608 .ks(3)
25609 .iterations(1)
25610 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25611 }
25612 }
25613 }
25614 }
25615
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_small_kernel)25616 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_small_kernel) {
25617 for (uint32_t n = 9; n < 16; n++) {
25618 for (size_t k = 1; k <= 5; k += 2) {
25619 GemmMicrokernelTester()
25620 .mr(3)
25621 .nr(8)
25622 .kr(1)
25623 .sr(1)
25624 .m(3)
25625 .n(n)
25626 .k(k)
25627 .ks(3)
25628 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25629 }
25630 }
25631 }
25632
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_small_kernel)25633 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_small_kernel) {
25634 for (uint32_t n = 16; n <= 24; n += 8) {
25635 for (size_t k = 1; k <= 5; k += 2) {
25636 GemmMicrokernelTester()
25637 .mr(3)
25638 .nr(8)
25639 .kr(1)
25640 .sr(1)
25641 .m(3)
25642 .n(n)
25643 .k(k)
25644 .ks(3)
25645 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25646 }
25647 }
25648 }
25649
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,strided_cm_subtile)25650 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, strided_cm_subtile) {
25651 for (size_t k = 1; k <= 5; k += 2) {
25652 for (uint32_t n = 1; n <= 8; n++) {
25653 for (uint32_t m = 1; m <= 3; m++) {
25654 GemmMicrokernelTester()
25655 .mr(3)
25656 .nr(8)
25657 .kr(1)
25658 .sr(1)
25659 .m(m)
25660 .n(n)
25661 .k(k)
25662 .cm_stride(11)
25663 .iterations(1)
25664 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25665 }
25666 }
25667 }
25668 }
25669
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,a_offset)25670 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, a_offset) {
25671 for (size_t k = 1; k <= 5; k += 2) {
25672 GemmMicrokernelTester()
25673 .mr(3)
25674 .nr(8)
25675 .kr(1)
25676 .sr(1)
25677 .m(3)
25678 .n(8)
25679 .k(k)
25680 .ks(3)
25681 .a_offset(17)
25682 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25683 }
25684 }
25685
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,zero)25686 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, zero) {
25687 for (size_t k = 1; k <= 5; k += 2) {
25688 for (uint32_t mz = 0; mz < 3; mz++) {
25689 GemmMicrokernelTester()
25690 .mr(3)
25691 .nr(8)
25692 .kr(1)
25693 .sr(1)
25694 .m(3)
25695 .n(8)
25696 .k(k)
25697 .ks(3)
25698 .a_offset(17)
25699 .zero_index(mz)
25700 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25701 }
25702 }
25703 }
25704
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,qmin)25705 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, qmin) {
25706 GemmMicrokernelTester()
25707 .mr(3)
25708 .nr(8)
25709 .kr(1)
25710 .sr(1)
25711 .m(3)
25712 .n(8)
25713 .k(1)
25714 .qmin(128)
25715 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25716 }
25717
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,qmax)25718 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, qmax) {
25719 GemmMicrokernelTester()
25720 .mr(3)
25721 .nr(8)
25722 .kr(1)
25723 .sr(1)
25724 .m(3)
25725 .n(8)
25726 .k(1)
25727 .qmax(128)
25728 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25729 }
25730
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT,strided_cm)25731 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, strided_cm) {
25732 GemmMicrokernelTester()
25733 .mr(3)
25734 .nr(8)
25735 .kr(1)
25736 .sr(1)
25737 .m(3)
25738 .n(8)
25739 .k(1)
25740 .cm_stride(11)
25741 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25742 }
25743 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25744
25745
25746 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,k_eq_4)25747 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_eq_4) {
25748 GemmMicrokernelTester()
25749 .mr(3)
25750 .nr(8)
25751 .kr(1)
25752 .sr(1)
25753 .m(3)
25754 .n(8)
25755 .k(4)
25756 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
25757 }
25758
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,strided_cn)25759 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, strided_cn) {
25760 GemmMicrokernelTester()
25761 .mr(3)
25762 .nr(8)
25763 .kr(1)
25764 .sr(1)
25765 .m(3)
25766 .n(8)
25767 .k(4)
25768 .cn_stride(11)
25769 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
25770 }
25771
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile)25772 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile) {
25773 for (uint32_t n = 1; n <= 8; n++) {
25774 for (uint32_t m = 1; m <= 3; m++) {
25775 GemmMicrokernelTester()
25776 .mr(3)
25777 .nr(8)
25778 .kr(1)
25779 .sr(1)
25780 .m(m)
25781 .n(n)
25782 .k(4)
25783 .iterations(1)
25784 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
25785 }
25786 }
25787 }
25788
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile_m)25789 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_m) {
25790 for (uint32_t m = 1; m <= 3; m++) {
25791 GemmMicrokernelTester()
25792 .mr(3)
25793 .nr(8)
25794 .kr(1)
25795 .sr(1)
25796 .m(m)
25797 .n(8)
25798 .k(4)
25799 .iterations(1)
25800 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
25801 }
25802 }
25803
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile_n)25804 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_n) {
25805 for (uint32_t n = 1; n <= 8; n++) {
25806 GemmMicrokernelTester()
25807 .mr(3)
25808 .nr(8)
25809 .kr(1)
25810 .sr(1)
25811 .m(3)
25812 .n(n)
25813 .k(4)
25814 .iterations(1)
25815 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
25816 }
25817 }
25818
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,k_lt_4)25819 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_lt_4) {
25820 for (size_t k = 1; k < 4; k++) {
25821 GemmMicrokernelTester()
25822 .mr(3)
25823 .nr(8)
25824 .kr(1)
25825 .sr(1)
25826 .m(3)
25827 .n(8)
25828 .k(k)
25829 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
25830 }
25831 }
25832
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,k_lt_4_subtile)25833 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_lt_4_subtile) {
25834 for (size_t k = 1; k < 4; k++) {
25835 for (uint32_t n = 1; n <= 8; n++) {
25836 for (uint32_t m = 1; m <= 3; m++) {
25837 GemmMicrokernelTester()
25838 .mr(3)
25839 .nr(8)
25840 .kr(1)
25841 .sr(1)
25842 .m(m)
25843 .n(n)
25844 .k(k)
25845 .iterations(1)
25846 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
25847 }
25848 }
25849 }
25850 }
25851
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,k_gt_4)25852 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_gt_4) {
25853 for (size_t k = 5; k < 8; k++) {
25854 GemmMicrokernelTester()
25855 .mr(3)
25856 .nr(8)
25857 .kr(1)
25858 .sr(1)
25859 .m(3)
25860 .n(8)
25861 .k(k)
25862 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
25863 }
25864 }
25865
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,k_gt_4_subtile)25866 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_gt_4_subtile) {
25867 for (size_t k = 5; k < 8; k++) {
25868 for (uint32_t n = 1; n <= 8; n++) {
25869 for (uint32_t m = 1; m <= 3; m++) {
25870 GemmMicrokernelTester()
25871 .mr(3)
25872 .nr(8)
25873 .kr(1)
25874 .sr(1)
25875 .m(m)
25876 .n(n)
25877 .k(k)
25878 .iterations(1)
25879 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
25880 }
25881 }
25882 }
25883 }
25884
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,k_div_4)25885 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_div_4) {
25886 for (size_t k = 8; k <= 40; k += 4) {
25887 GemmMicrokernelTester()
25888 .mr(3)
25889 .nr(8)
25890 .kr(1)
25891 .sr(1)
25892 .m(3)
25893 .n(8)
25894 .k(k)
25895 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
25896 }
25897 }
25898
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,k_div_4_subtile)25899 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_div_4_subtile) {
25900 for (size_t k = 8; k <= 40; k += 4) {
25901 for (uint32_t n = 1; n <= 8; n++) {
25902 for (uint32_t m = 1; m <= 3; m++) {
25903 GemmMicrokernelTester()
25904 .mr(3)
25905 .nr(8)
25906 .kr(1)
25907 .sr(1)
25908 .m(m)
25909 .n(n)
25910 .k(k)
25911 .iterations(1)
25912 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
25913 }
25914 }
25915 }
25916 }
25917
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,n_gt_8)25918 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
25919 for (uint32_t n = 9; n < 16; n++) {
25920 for (size_t k = 1; k <= 20; k += 5) {
25921 GemmMicrokernelTester()
25922 .mr(3)
25923 .nr(8)
25924 .kr(1)
25925 .sr(1)
25926 .m(3)
25927 .n(n)
25928 .k(k)
25929 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
25930 }
25931 }
25932 }
25933
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,n_gt_8_strided_cn)25934 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
25935 for (uint32_t n = 9; n < 16; n++) {
25936 for (size_t k = 1; k <= 20; k += 5) {
25937 GemmMicrokernelTester()
25938 .mr(3)
25939 .nr(8)
25940 .kr(1)
25941 .sr(1)
25942 .m(3)
25943 .n(n)
25944 .k(k)
25945 .cn_stride(11)
25946 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
25947 }
25948 }
25949 }
25950
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,n_gt_8_subtile)25951 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
25952 for (uint32_t n = 9; n < 16; n++) {
25953 for (size_t k = 1; k <= 20; k += 5) {
25954 for (uint32_t m = 1; m <= 3; m++) {
25955 GemmMicrokernelTester()
25956 .mr(3)
25957 .nr(8)
25958 .kr(1)
25959 .sr(1)
25960 .m(m)
25961 .n(n)
25962 .k(k)
25963 .iterations(1)
25964 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
25965 }
25966 }
25967 }
25968 }
25969
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,n_div_8)25970 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_div_8) {
25971 for (uint32_t n = 16; n <= 24; n += 8) {
25972 for (size_t k = 1; k <= 20; k += 5) {
25973 GemmMicrokernelTester()
25974 .mr(3)
25975 .nr(8)
25976 .kr(1)
25977 .sr(1)
25978 .m(3)
25979 .n(n)
25980 .k(k)
25981 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
25982 }
25983 }
25984 }
25985
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,n_div_8_strided_cn)25986 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
25987 for (uint32_t n = 16; n <= 24; n += 8) {
25988 for (size_t k = 1; k <= 20; k += 5) {
25989 GemmMicrokernelTester()
25990 .mr(3)
25991 .nr(8)
25992 .kr(1)
25993 .sr(1)
25994 .m(3)
25995 .n(n)
25996 .k(k)
25997 .cn_stride(11)
25998 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
25999 }
26000 }
26001 }
26002
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,n_div_8_subtile)26003 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
26004 for (uint32_t n = 16; n <= 24; n += 8) {
26005 for (size_t k = 1; k <= 20; k += 5) {
26006 for (uint32_t m = 1; m <= 3; m++) {
26007 GemmMicrokernelTester()
26008 .mr(3)
26009 .nr(8)
26010 .kr(1)
26011 .sr(1)
26012 .m(m)
26013 .n(n)
26014 .k(k)
26015 .iterations(1)
26016 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26017 }
26018 }
26019 }
26020 }
26021
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,small_kernel)26022 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, small_kernel) {
26023 for (size_t k = 1; k <= 20; k += 5) {
26024 GemmMicrokernelTester()
26025 .mr(3)
26026 .nr(8)
26027 .kr(1)
26028 .sr(1)
26029 .m(3)
26030 .n(8)
26031 .k(k)
26032 .ks(3)
26033 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26034 }
26035 }
26036
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,small_kernel_subtile)26037 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, small_kernel_subtile) {
26038 for (size_t k = 1; k <= 20; k += 5) {
26039 for (uint32_t n = 1; n <= 8; n++) {
26040 for (uint32_t m = 1; m <= 3; m++) {
26041 GemmMicrokernelTester()
26042 .mr(3)
26043 .nr(8)
26044 .kr(1)
26045 .sr(1)
26046 .m(m)
26047 .n(n)
26048 .k(k)
26049 .ks(3)
26050 .iterations(1)
26051 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26052 }
26053 }
26054 }
26055 }
26056
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,n_gt_8_small_kernel)26057 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_gt_8_small_kernel) {
26058 for (uint32_t n = 9; n < 16; n++) {
26059 for (size_t k = 1; k <= 20; k += 5) {
26060 GemmMicrokernelTester()
26061 .mr(3)
26062 .nr(8)
26063 .kr(1)
26064 .sr(1)
26065 .m(3)
26066 .n(n)
26067 .k(k)
26068 .ks(3)
26069 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26070 }
26071 }
26072 }
26073
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,n_div_8_small_kernel)26074 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_div_8_small_kernel) {
26075 for (uint32_t n = 16; n <= 24; n += 8) {
26076 for (size_t k = 1; k <= 20; k += 5) {
26077 GemmMicrokernelTester()
26078 .mr(3)
26079 .nr(8)
26080 .kr(1)
26081 .sr(1)
26082 .m(3)
26083 .n(n)
26084 .k(k)
26085 .ks(3)
26086 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26087 }
26088 }
26089 }
26090
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,strided_cm_subtile)26091 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
26092 for (size_t k = 1; k <= 20; k += 5) {
26093 for (uint32_t n = 1; n <= 8; n++) {
26094 for (uint32_t m = 1; m <= 3; m++) {
26095 GemmMicrokernelTester()
26096 .mr(3)
26097 .nr(8)
26098 .kr(1)
26099 .sr(1)
26100 .m(m)
26101 .n(n)
26102 .k(k)
26103 .cm_stride(11)
26104 .iterations(1)
26105 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26106 }
26107 }
26108 }
26109 }
26110
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,a_offset)26111 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, a_offset) {
26112 for (size_t k = 1; k <= 20; k += 5) {
26113 GemmMicrokernelTester()
26114 .mr(3)
26115 .nr(8)
26116 .kr(1)
26117 .sr(1)
26118 .m(3)
26119 .n(8)
26120 .k(k)
26121 .ks(3)
26122 .a_offset(67)
26123 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26124 }
26125 }
26126
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,zero)26127 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, zero) {
26128 for (size_t k = 1; k <= 20; k += 5) {
26129 for (uint32_t mz = 0; mz < 3; mz++) {
26130 GemmMicrokernelTester()
26131 .mr(3)
26132 .nr(8)
26133 .kr(1)
26134 .sr(1)
26135 .m(3)
26136 .n(8)
26137 .k(k)
26138 .ks(3)
26139 .a_offset(67)
26140 .zero_index(mz)
26141 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26142 }
26143 }
26144 }
26145
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,qmin)26146 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, qmin) {
26147 GemmMicrokernelTester()
26148 .mr(3)
26149 .nr(8)
26150 .kr(1)
26151 .sr(1)
26152 .m(3)
26153 .n(8)
26154 .k(4)
26155 .qmin(128)
26156 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26157 }
26158
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,qmax)26159 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, qmax) {
26160 GemmMicrokernelTester()
26161 .mr(3)
26162 .nr(8)
26163 .kr(1)
26164 .sr(1)
26165 .m(3)
26166 .n(8)
26167 .k(4)
26168 .qmax(128)
26169 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26170 }
26171
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT,strided_cm)26172 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, strided_cm) {
26173 GemmMicrokernelTester()
26174 .mr(3)
26175 .nr(8)
26176 .kr(1)
26177 .sr(1)
26178 .m(3)
26179 .n(8)
26180 .k(4)
26181 .cm_stride(11)
26182 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
26183 }
26184 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26185
26186
26187 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,k_eq_4)26188 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_eq_4) {
26189 GemmMicrokernelTester()
26190 .mr(3)
26191 .nr(8)
26192 .kr(1)
26193 .sr(1)
26194 .m(3)
26195 .n(8)
26196 .k(4)
26197 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26198 }
26199
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,strided_cn)26200 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, strided_cn) {
26201 GemmMicrokernelTester()
26202 .mr(3)
26203 .nr(8)
26204 .kr(1)
26205 .sr(1)
26206 .m(3)
26207 .n(8)
26208 .k(4)
26209 .cn_stride(11)
26210 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26211 }
26212
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile)26213 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile) {
26214 for (uint32_t n = 1; n <= 8; n++) {
26215 for (uint32_t m = 1; m <= 3; m++) {
26216 GemmMicrokernelTester()
26217 .mr(3)
26218 .nr(8)
26219 .kr(1)
26220 .sr(1)
26221 .m(m)
26222 .n(n)
26223 .k(4)
26224 .iterations(1)
26225 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26226 }
26227 }
26228 }
26229
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile_m)26230 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_m) {
26231 for (uint32_t m = 1; m <= 3; m++) {
26232 GemmMicrokernelTester()
26233 .mr(3)
26234 .nr(8)
26235 .kr(1)
26236 .sr(1)
26237 .m(m)
26238 .n(8)
26239 .k(4)
26240 .iterations(1)
26241 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26242 }
26243 }
26244
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile_n)26245 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_n) {
26246 for (uint32_t n = 1; n <= 8; n++) {
26247 GemmMicrokernelTester()
26248 .mr(3)
26249 .nr(8)
26250 .kr(1)
26251 .sr(1)
26252 .m(3)
26253 .n(n)
26254 .k(4)
26255 .iterations(1)
26256 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26257 }
26258 }
26259
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,k_lt_4)26260 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_lt_4) {
26261 for (size_t k = 1; k < 4; k++) {
26262 GemmMicrokernelTester()
26263 .mr(3)
26264 .nr(8)
26265 .kr(1)
26266 .sr(1)
26267 .m(3)
26268 .n(8)
26269 .k(k)
26270 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26271 }
26272 }
26273
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,k_lt_4_subtile)26274 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_lt_4_subtile) {
26275 for (size_t k = 1; k < 4; k++) {
26276 for (uint32_t n = 1; n <= 8; n++) {
26277 for (uint32_t m = 1; m <= 3; m++) {
26278 GemmMicrokernelTester()
26279 .mr(3)
26280 .nr(8)
26281 .kr(1)
26282 .sr(1)
26283 .m(m)
26284 .n(n)
26285 .k(k)
26286 .iterations(1)
26287 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26288 }
26289 }
26290 }
26291 }
26292
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,k_gt_4)26293 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_gt_4) {
26294 for (size_t k = 5; k < 8; k++) {
26295 GemmMicrokernelTester()
26296 .mr(3)
26297 .nr(8)
26298 .kr(1)
26299 .sr(1)
26300 .m(3)
26301 .n(8)
26302 .k(k)
26303 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26304 }
26305 }
26306
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,k_gt_4_subtile)26307 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_gt_4_subtile) {
26308 for (size_t k = 5; k < 8; k++) {
26309 for (uint32_t n = 1; n <= 8; n++) {
26310 for (uint32_t m = 1; m <= 3; m++) {
26311 GemmMicrokernelTester()
26312 .mr(3)
26313 .nr(8)
26314 .kr(1)
26315 .sr(1)
26316 .m(m)
26317 .n(n)
26318 .k(k)
26319 .iterations(1)
26320 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26321 }
26322 }
26323 }
26324 }
26325
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,k_div_4)26326 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_div_4) {
26327 for (size_t k = 8; k <= 40; k += 4) {
26328 GemmMicrokernelTester()
26329 .mr(3)
26330 .nr(8)
26331 .kr(1)
26332 .sr(1)
26333 .m(3)
26334 .n(8)
26335 .k(k)
26336 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26337 }
26338 }
26339
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,k_div_4_subtile)26340 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_div_4_subtile) {
26341 for (size_t k = 8; k <= 40; k += 4) {
26342 for (uint32_t n = 1; n <= 8; n++) {
26343 for (uint32_t m = 1; m <= 3; m++) {
26344 GemmMicrokernelTester()
26345 .mr(3)
26346 .nr(8)
26347 .kr(1)
26348 .sr(1)
26349 .m(m)
26350 .n(n)
26351 .k(k)
26352 .iterations(1)
26353 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26354 }
26355 }
26356 }
26357 }
26358
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,n_gt_8)26359 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_gt_8) {
26360 for (uint32_t n = 9; n < 16; n++) {
26361 for (size_t k = 1; k <= 20; k += 5) {
26362 GemmMicrokernelTester()
26363 .mr(3)
26364 .nr(8)
26365 .kr(1)
26366 .sr(1)
26367 .m(3)
26368 .n(n)
26369 .k(k)
26370 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26371 }
26372 }
26373 }
26374
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,n_gt_8_strided_cn)26375 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
26376 for (uint32_t n = 9; n < 16; n++) {
26377 for (size_t k = 1; k <= 20; k += 5) {
26378 GemmMicrokernelTester()
26379 .mr(3)
26380 .nr(8)
26381 .kr(1)
26382 .sr(1)
26383 .m(3)
26384 .n(n)
26385 .k(k)
26386 .cn_stride(11)
26387 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26388 }
26389 }
26390 }
26391
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,n_gt_8_subtile)26392 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
26393 for (uint32_t n = 9; n < 16; n++) {
26394 for (size_t k = 1; k <= 20; k += 5) {
26395 for (uint32_t m = 1; m <= 3; m++) {
26396 GemmMicrokernelTester()
26397 .mr(3)
26398 .nr(8)
26399 .kr(1)
26400 .sr(1)
26401 .m(m)
26402 .n(n)
26403 .k(k)
26404 .iterations(1)
26405 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26406 }
26407 }
26408 }
26409 }
26410
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,n_div_8)26411 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_div_8) {
26412 for (uint32_t n = 16; n <= 24; n += 8) {
26413 for (size_t k = 1; k <= 20; k += 5) {
26414 GemmMicrokernelTester()
26415 .mr(3)
26416 .nr(8)
26417 .kr(1)
26418 .sr(1)
26419 .m(3)
26420 .n(n)
26421 .k(k)
26422 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26423 }
26424 }
26425 }
26426
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,n_div_8_strided_cn)26427 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
26428 for (uint32_t n = 16; n <= 24; n += 8) {
26429 for (size_t k = 1; k <= 20; k += 5) {
26430 GemmMicrokernelTester()
26431 .mr(3)
26432 .nr(8)
26433 .kr(1)
26434 .sr(1)
26435 .m(3)
26436 .n(n)
26437 .k(k)
26438 .cn_stride(11)
26439 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26440 }
26441 }
26442 }
26443
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,n_div_8_subtile)26444 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
26445 for (uint32_t n = 16; n <= 24; n += 8) {
26446 for (size_t k = 1; k <= 20; k += 5) {
26447 for (uint32_t m = 1; m <= 3; m++) {
26448 GemmMicrokernelTester()
26449 .mr(3)
26450 .nr(8)
26451 .kr(1)
26452 .sr(1)
26453 .m(m)
26454 .n(n)
26455 .k(k)
26456 .iterations(1)
26457 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26458 }
26459 }
26460 }
26461 }
26462
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,small_kernel)26463 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, small_kernel) {
26464 for (size_t k = 1; k <= 20; k += 5) {
26465 GemmMicrokernelTester()
26466 .mr(3)
26467 .nr(8)
26468 .kr(1)
26469 .sr(1)
26470 .m(3)
26471 .n(8)
26472 .k(k)
26473 .ks(3)
26474 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26475 }
26476 }
26477
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,small_kernel_subtile)26478 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, small_kernel_subtile) {
26479 for (size_t k = 1; k <= 20; k += 5) {
26480 for (uint32_t n = 1; n <= 8; n++) {
26481 for (uint32_t m = 1; m <= 3; m++) {
26482 GemmMicrokernelTester()
26483 .mr(3)
26484 .nr(8)
26485 .kr(1)
26486 .sr(1)
26487 .m(m)
26488 .n(n)
26489 .k(k)
26490 .ks(3)
26491 .iterations(1)
26492 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26493 }
26494 }
26495 }
26496 }
26497
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,n_gt_8_small_kernel)26498 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_gt_8_small_kernel) {
26499 for (uint32_t n = 9; n < 16; n++) {
26500 for (size_t k = 1; k <= 20; k += 5) {
26501 GemmMicrokernelTester()
26502 .mr(3)
26503 .nr(8)
26504 .kr(1)
26505 .sr(1)
26506 .m(3)
26507 .n(n)
26508 .k(k)
26509 .ks(3)
26510 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26511 }
26512 }
26513 }
26514
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,n_div_8_small_kernel)26515 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_div_8_small_kernel) {
26516 for (uint32_t n = 16; n <= 24; n += 8) {
26517 for (size_t k = 1; k <= 20; k += 5) {
26518 GemmMicrokernelTester()
26519 .mr(3)
26520 .nr(8)
26521 .kr(1)
26522 .sr(1)
26523 .m(3)
26524 .n(n)
26525 .k(k)
26526 .ks(3)
26527 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26528 }
26529 }
26530 }
26531
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,strided_cm_subtile)26532 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
26533 for (size_t k = 1; k <= 20; k += 5) {
26534 for (uint32_t n = 1; n <= 8; n++) {
26535 for (uint32_t m = 1; m <= 3; m++) {
26536 GemmMicrokernelTester()
26537 .mr(3)
26538 .nr(8)
26539 .kr(1)
26540 .sr(1)
26541 .m(m)
26542 .n(n)
26543 .k(k)
26544 .cm_stride(11)
26545 .iterations(1)
26546 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26547 }
26548 }
26549 }
26550 }
26551
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,a_offset)26552 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, a_offset) {
26553 for (size_t k = 1; k <= 20; k += 5) {
26554 GemmMicrokernelTester()
26555 .mr(3)
26556 .nr(8)
26557 .kr(1)
26558 .sr(1)
26559 .m(3)
26560 .n(8)
26561 .k(k)
26562 .ks(3)
26563 .a_offset(67)
26564 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26565 }
26566 }
26567
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,zero)26568 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, zero) {
26569 for (size_t k = 1; k <= 20; k += 5) {
26570 for (uint32_t mz = 0; mz < 3; mz++) {
26571 GemmMicrokernelTester()
26572 .mr(3)
26573 .nr(8)
26574 .kr(1)
26575 .sr(1)
26576 .m(3)
26577 .n(8)
26578 .k(k)
26579 .ks(3)
26580 .a_offset(67)
26581 .zero_index(mz)
26582 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26583 }
26584 }
26585 }
26586
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,qmin)26587 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, qmin) {
26588 GemmMicrokernelTester()
26589 .mr(3)
26590 .nr(8)
26591 .kr(1)
26592 .sr(1)
26593 .m(3)
26594 .n(8)
26595 .k(4)
26596 .qmin(128)
26597 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26598 }
26599
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,qmax)26600 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, qmax) {
26601 GemmMicrokernelTester()
26602 .mr(3)
26603 .nr(8)
26604 .kr(1)
26605 .sr(1)
26606 .m(3)
26607 .n(8)
26608 .k(4)
26609 .qmax(128)
26610 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26611 }
26612
TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT,strided_cm)26613 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, strided_cm) {
26614 GemmMicrokernelTester()
26615 .mr(3)
26616 .nr(8)
26617 .kr(1)
26618 .sr(1)
26619 .m(3)
26620 .n(8)
26621 .k(4)
26622 .cm_stride(11)
26623 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
26624 }
26625 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26626
26627
26628 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1)26629 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1) {
26630 GemmMicrokernelTester()
26631 .mr(4)
26632 .nr(8)
26633 .kr(1)
26634 .sr(1)
26635 .m(4)
26636 .n(8)
26637 .k(1)
26638 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26639 }
26640
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,strided_cn)26641 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, strided_cn) {
26642 GemmMicrokernelTester()
26643 .mr(4)
26644 .nr(8)
26645 .kr(1)
26646 .sr(1)
26647 .m(4)
26648 .n(8)
26649 .k(1)
26650 .cn_stride(11)
26651 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26652 }
26653
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile)26654 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile) {
26655 for (uint32_t n = 1; n <= 8; n++) {
26656 for (uint32_t m = 1; m <= 4; m++) {
26657 GemmMicrokernelTester()
26658 .mr(4)
26659 .nr(8)
26660 .kr(1)
26661 .sr(1)
26662 .m(m)
26663 .n(n)
26664 .k(1)
26665 .iterations(1)
26666 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26667 }
26668 }
26669 }
26670
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile_m)26671 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_m) {
26672 for (uint32_t m = 1; m <= 4; m++) {
26673 GemmMicrokernelTester()
26674 .mr(4)
26675 .nr(8)
26676 .kr(1)
26677 .sr(1)
26678 .m(m)
26679 .n(8)
26680 .k(1)
26681 .iterations(1)
26682 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26683 }
26684 }
26685
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile_n)26686 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_n) {
26687 for (uint32_t n = 1; n <= 8; n++) {
26688 GemmMicrokernelTester()
26689 .mr(4)
26690 .nr(8)
26691 .kr(1)
26692 .sr(1)
26693 .m(4)
26694 .n(n)
26695 .k(1)
26696 .iterations(1)
26697 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26698 }
26699 }
26700
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1)26701 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1) {
26702 for (size_t k = 2; k < 10; k++) {
26703 GemmMicrokernelTester()
26704 .mr(4)
26705 .nr(8)
26706 .kr(1)
26707 .sr(1)
26708 .m(4)
26709 .n(8)
26710 .k(k)
26711 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26712 }
26713 }
26714
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1_subtile)26715 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_subtile) {
26716 for (size_t k = 2; k < 10; k++) {
26717 for (uint32_t n = 1; n <= 8; n++) {
26718 for (uint32_t m = 1; m <= 4; m++) {
26719 GemmMicrokernelTester()
26720 .mr(4)
26721 .nr(8)
26722 .kr(1)
26723 .sr(1)
26724 .m(m)
26725 .n(n)
26726 .k(k)
26727 .iterations(1)
26728 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26729 }
26730 }
26731 }
26732 }
26733
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8)26734 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8) {
26735 for (uint32_t n = 9; n < 16; n++) {
26736 for (size_t k = 1; k <= 5; k += 2) {
26737 GemmMicrokernelTester()
26738 .mr(4)
26739 .nr(8)
26740 .kr(1)
26741 .sr(1)
26742 .m(4)
26743 .n(n)
26744 .k(k)
26745 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26746 }
26747 }
26748 }
26749
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_strided_cn)26750 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_cn) {
26751 for (uint32_t n = 9; n < 16; n++) {
26752 for (size_t k = 1; k <= 5; k += 2) {
26753 GemmMicrokernelTester()
26754 .mr(4)
26755 .nr(8)
26756 .kr(1)
26757 .sr(1)
26758 .m(4)
26759 .n(n)
26760 .k(k)
26761 .cn_stride(11)
26762 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26763 }
26764 }
26765 }
26766
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_subtile)26767 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_subtile) {
26768 for (uint32_t n = 9; n < 16; n++) {
26769 for (size_t k = 1; k <= 5; k += 2) {
26770 for (uint32_t m = 1; m <= 4; m++) {
26771 GemmMicrokernelTester()
26772 .mr(4)
26773 .nr(8)
26774 .kr(1)
26775 .sr(1)
26776 .m(m)
26777 .n(n)
26778 .k(k)
26779 .iterations(1)
26780 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26781 }
26782 }
26783 }
26784 }
26785
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,n_div_8)26786 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_div_8) {
26787 for (uint32_t n = 16; n <= 24; n += 8) {
26788 for (size_t k = 1; k <= 5; k += 2) {
26789 GemmMicrokernelTester()
26790 .mr(4)
26791 .nr(8)
26792 .kr(1)
26793 .sr(1)
26794 .m(4)
26795 .n(n)
26796 .k(k)
26797 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26798 }
26799 }
26800 }
26801
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_strided_cn)26802 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_cn) {
26803 for (uint32_t n = 16; n <= 24; n += 8) {
26804 for (size_t k = 1; k <= 5; k += 2) {
26805 GemmMicrokernelTester()
26806 .mr(4)
26807 .nr(8)
26808 .kr(1)
26809 .sr(1)
26810 .m(4)
26811 .n(n)
26812 .k(k)
26813 .cn_stride(11)
26814 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26815 }
26816 }
26817 }
26818
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_subtile)26819 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_subtile) {
26820 for (uint32_t n = 16; n <= 24; n += 8) {
26821 for (size_t k = 1; k <= 5; k += 2) {
26822 for (uint32_t m = 1; m <= 4; m++) {
26823 GemmMicrokernelTester()
26824 .mr(4)
26825 .nr(8)
26826 .kr(1)
26827 .sr(1)
26828 .m(m)
26829 .n(n)
26830 .k(k)
26831 .iterations(1)
26832 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26833 }
26834 }
26835 }
26836 }
26837
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,small_kernel)26838 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, small_kernel) {
26839 for (size_t k = 1; k <= 5; k += 2) {
26840 GemmMicrokernelTester()
26841 .mr(4)
26842 .nr(8)
26843 .kr(1)
26844 .sr(1)
26845 .m(4)
26846 .n(8)
26847 .k(k)
26848 .ks(3)
26849 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26850 }
26851 }
26852
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,small_kernel_subtile)26853 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, small_kernel_subtile) {
26854 for (size_t k = 1; k <= 5; k += 2) {
26855 for (uint32_t n = 1; n <= 8; n++) {
26856 for (uint32_t m = 1; m <= 4; m++) {
26857 GemmMicrokernelTester()
26858 .mr(4)
26859 .nr(8)
26860 .kr(1)
26861 .sr(1)
26862 .m(m)
26863 .n(n)
26864 .k(k)
26865 .ks(3)
26866 .iterations(1)
26867 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26868 }
26869 }
26870 }
26871 }
26872
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_small_kernel)26873 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_small_kernel) {
26874 for (uint32_t n = 9; n < 16; n++) {
26875 for (size_t k = 1; k <= 5; k += 2) {
26876 GemmMicrokernelTester()
26877 .mr(4)
26878 .nr(8)
26879 .kr(1)
26880 .sr(1)
26881 .m(4)
26882 .n(n)
26883 .k(k)
26884 .ks(3)
26885 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26886 }
26887 }
26888 }
26889
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_small_kernel)26890 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_small_kernel) {
26891 for (uint32_t n = 16; n <= 24; n += 8) {
26892 for (size_t k = 1; k <= 5; k += 2) {
26893 GemmMicrokernelTester()
26894 .mr(4)
26895 .nr(8)
26896 .kr(1)
26897 .sr(1)
26898 .m(4)
26899 .n(n)
26900 .k(k)
26901 .ks(3)
26902 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26903 }
26904 }
26905 }
26906
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,strided_cm_subtile)26907 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, strided_cm_subtile) {
26908 for (size_t k = 1; k <= 5; k += 2) {
26909 for (uint32_t n = 1; n <= 8; n++) {
26910 for (uint32_t m = 1; m <= 4; m++) {
26911 GemmMicrokernelTester()
26912 .mr(4)
26913 .nr(8)
26914 .kr(1)
26915 .sr(1)
26916 .m(m)
26917 .n(n)
26918 .k(k)
26919 .cm_stride(11)
26920 .iterations(1)
26921 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26922 }
26923 }
26924 }
26925 }
26926
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,a_offset)26927 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, a_offset) {
26928 for (size_t k = 1; k <= 5; k += 2) {
26929 GemmMicrokernelTester()
26930 .mr(4)
26931 .nr(8)
26932 .kr(1)
26933 .sr(1)
26934 .m(4)
26935 .n(8)
26936 .k(k)
26937 .ks(3)
26938 .a_offset(23)
26939 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26940 }
26941 }
26942
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,zero)26943 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, zero) {
26944 for (size_t k = 1; k <= 5; k += 2) {
26945 for (uint32_t mz = 0; mz < 4; mz++) {
26946 GemmMicrokernelTester()
26947 .mr(4)
26948 .nr(8)
26949 .kr(1)
26950 .sr(1)
26951 .m(4)
26952 .n(8)
26953 .k(k)
26954 .ks(3)
26955 .a_offset(23)
26956 .zero_index(mz)
26957 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26958 }
26959 }
26960 }
26961
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,qmin)26962 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, qmin) {
26963 GemmMicrokernelTester()
26964 .mr(4)
26965 .nr(8)
26966 .kr(1)
26967 .sr(1)
26968 .m(4)
26969 .n(8)
26970 .k(1)
26971 .qmin(128)
26972 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26973 }
26974
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,qmax)26975 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, qmax) {
26976 GemmMicrokernelTester()
26977 .mr(4)
26978 .nr(8)
26979 .kr(1)
26980 .sr(1)
26981 .m(4)
26982 .n(8)
26983 .k(1)
26984 .qmax(128)
26985 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26986 }
26987
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT,strided_cm)26988 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, strided_cm) {
26989 GemmMicrokernelTester()
26990 .mr(4)
26991 .nr(8)
26992 .kr(1)
26993 .sr(1)
26994 .m(4)
26995 .n(8)
26996 .k(1)
26997 .cm_stride(11)
26998 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26999 }
27000 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27001
27002
27003 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_eq_4)27004 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_4) {
27005 GemmMicrokernelTester()
27006 .mr(4)
27007 .nr(8)
27008 .kr(1)
27009 .sr(1)
27010 .m(4)
27011 .n(8)
27012 .k(4)
27013 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27014 }
27015
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,strided_cn)27016 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, strided_cn) {
27017 GemmMicrokernelTester()
27018 .mr(4)
27019 .nr(8)
27020 .kr(1)
27021 .sr(1)
27022 .m(4)
27023 .n(8)
27024 .k(4)
27025 .cn_stride(11)
27026 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27027 }
27028
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile)27029 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile) {
27030 for (uint32_t n = 1; n <= 8; n++) {
27031 for (uint32_t m = 1; m <= 4; m++) {
27032 GemmMicrokernelTester()
27033 .mr(4)
27034 .nr(8)
27035 .kr(1)
27036 .sr(1)
27037 .m(m)
27038 .n(n)
27039 .k(4)
27040 .iterations(1)
27041 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27042 }
27043 }
27044 }
27045
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile_m)27046 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_m) {
27047 for (uint32_t m = 1; m <= 4; m++) {
27048 GemmMicrokernelTester()
27049 .mr(4)
27050 .nr(8)
27051 .kr(1)
27052 .sr(1)
27053 .m(m)
27054 .n(8)
27055 .k(4)
27056 .iterations(1)
27057 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27058 }
27059 }
27060
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile_n)27061 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_n) {
27062 for (uint32_t n = 1; n <= 8; n++) {
27063 GemmMicrokernelTester()
27064 .mr(4)
27065 .nr(8)
27066 .kr(1)
27067 .sr(1)
27068 .m(4)
27069 .n(n)
27070 .k(4)
27071 .iterations(1)
27072 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27073 }
27074 }
27075
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_lt_4)27076 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_lt_4) {
27077 for (size_t k = 1; k < 4; k++) {
27078 GemmMicrokernelTester()
27079 .mr(4)
27080 .nr(8)
27081 .kr(1)
27082 .sr(1)
27083 .m(4)
27084 .n(8)
27085 .k(k)
27086 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27087 }
27088 }
27089
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_lt_4_subtile)27090 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_lt_4_subtile) {
27091 for (size_t k = 1; k < 4; k++) {
27092 for (uint32_t n = 1; n <= 8; n++) {
27093 for (uint32_t m = 1; m <= 4; m++) {
27094 GemmMicrokernelTester()
27095 .mr(4)
27096 .nr(8)
27097 .kr(1)
27098 .sr(1)
27099 .m(m)
27100 .n(n)
27101 .k(k)
27102 .iterations(1)
27103 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27104 }
27105 }
27106 }
27107 }
27108
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_gt_4)27109 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_gt_4) {
27110 for (size_t k = 5; k < 8; k++) {
27111 GemmMicrokernelTester()
27112 .mr(4)
27113 .nr(8)
27114 .kr(1)
27115 .sr(1)
27116 .m(4)
27117 .n(8)
27118 .k(k)
27119 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27120 }
27121 }
27122
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_gt_4_subtile)27123 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_gt_4_subtile) {
27124 for (size_t k = 5; k < 8; k++) {
27125 for (uint32_t n = 1; n <= 8; n++) {
27126 for (uint32_t m = 1; m <= 4; m++) {
27127 GemmMicrokernelTester()
27128 .mr(4)
27129 .nr(8)
27130 .kr(1)
27131 .sr(1)
27132 .m(m)
27133 .n(n)
27134 .k(k)
27135 .iterations(1)
27136 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27137 }
27138 }
27139 }
27140 }
27141
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_div_4)27142 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_div_4) {
27143 for (size_t k = 8; k <= 40; k += 4) {
27144 GemmMicrokernelTester()
27145 .mr(4)
27146 .nr(8)
27147 .kr(1)
27148 .sr(1)
27149 .m(4)
27150 .n(8)
27151 .k(k)
27152 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27153 }
27154 }
27155
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_div_4_subtile)27156 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_div_4_subtile) {
27157 for (size_t k = 8; k <= 40; k += 4) {
27158 for (uint32_t n = 1; n <= 8; n++) {
27159 for (uint32_t m = 1; m <= 4; m++) {
27160 GemmMicrokernelTester()
27161 .mr(4)
27162 .nr(8)
27163 .kr(1)
27164 .sr(1)
27165 .m(m)
27166 .n(n)
27167 .k(k)
27168 .iterations(1)
27169 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27170 }
27171 }
27172 }
27173 }
27174
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_gt_8)27175 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
27176 for (uint32_t n = 9; n < 16; n++) {
27177 for (size_t k = 1; k <= 20; k += 5) {
27178 GemmMicrokernelTester()
27179 .mr(4)
27180 .nr(8)
27181 .kr(1)
27182 .sr(1)
27183 .m(4)
27184 .n(n)
27185 .k(k)
27186 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27187 }
27188 }
27189 }
27190
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_gt_8_strided_cn)27191 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
27192 for (uint32_t n = 9; n < 16; n++) {
27193 for (size_t k = 1; k <= 20; k += 5) {
27194 GemmMicrokernelTester()
27195 .mr(4)
27196 .nr(8)
27197 .kr(1)
27198 .sr(1)
27199 .m(4)
27200 .n(n)
27201 .k(k)
27202 .cn_stride(11)
27203 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27204 }
27205 }
27206 }
27207
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_gt_8_subtile)27208 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
27209 for (uint32_t n = 9; n < 16; n++) {
27210 for (size_t k = 1; k <= 20; k += 5) {
27211 for (uint32_t m = 1; m <= 4; m++) {
27212 GemmMicrokernelTester()
27213 .mr(4)
27214 .nr(8)
27215 .kr(1)
27216 .sr(1)
27217 .m(m)
27218 .n(n)
27219 .k(k)
27220 .iterations(1)
27221 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27222 }
27223 }
27224 }
27225 }
27226
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_div_8)27227 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8) {
27228 for (uint32_t n = 16; n <= 24; n += 8) {
27229 for (size_t k = 1; k <= 20; k += 5) {
27230 GemmMicrokernelTester()
27231 .mr(4)
27232 .nr(8)
27233 .kr(1)
27234 .sr(1)
27235 .m(4)
27236 .n(n)
27237 .k(k)
27238 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27239 }
27240 }
27241 }
27242
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_div_8_strided_cn)27243 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
27244 for (uint32_t n = 16; n <= 24; n += 8) {
27245 for (size_t k = 1; k <= 20; k += 5) {
27246 GemmMicrokernelTester()
27247 .mr(4)
27248 .nr(8)
27249 .kr(1)
27250 .sr(1)
27251 .m(4)
27252 .n(n)
27253 .k(k)
27254 .cn_stride(11)
27255 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27256 }
27257 }
27258 }
27259
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_div_8_subtile)27260 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
27261 for (uint32_t n = 16; n <= 24; n += 8) {
27262 for (size_t k = 1; k <= 20; k += 5) {
27263 for (uint32_t m = 1; m <= 4; m++) {
27264 GemmMicrokernelTester()
27265 .mr(4)
27266 .nr(8)
27267 .kr(1)
27268 .sr(1)
27269 .m(m)
27270 .n(n)
27271 .k(k)
27272 .iterations(1)
27273 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27274 }
27275 }
27276 }
27277 }
27278
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,small_kernel)27279 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, small_kernel) {
27280 for (size_t k = 1; k <= 20; k += 5) {
27281 GemmMicrokernelTester()
27282 .mr(4)
27283 .nr(8)
27284 .kr(1)
27285 .sr(1)
27286 .m(4)
27287 .n(8)
27288 .k(k)
27289 .ks(3)
27290 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27291 }
27292 }
27293
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,small_kernel_subtile)27294 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, small_kernel_subtile) {
27295 for (size_t k = 1; k <= 20; k += 5) {
27296 for (uint32_t n = 1; n <= 8; n++) {
27297 for (uint32_t m = 1; m <= 4; m++) {
27298 GemmMicrokernelTester()
27299 .mr(4)
27300 .nr(8)
27301 .kr(1)
27302 .sr(1)
27303 .m(m)
27304 .n(n)
27305 .k(k)
27306 .ks(3)
27307 .iterations(1)
27308 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27309 }
27310 }
27311 }
27312 }
27313
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_gt_8_small_kernel)27314 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8_small_kernel) {
27315 for (uint32_t n = 9; n < 16; n++) {
27316 for (size_t k = 1; k <= 20; k += 5) {
27317 GemmMicrokernelTester()
27318 .mr(4)
27319 .nr(8)
27320 .kr(1)
27321 .sr(1)
27322 .m(4)
27323 .n(n)
27324 .k(k)
27325 .ks(3)
27326 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27327 }
27328 }
27329 }
27330
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_div_8_small_kernel)27331 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8_small_kernel) {
27332 for (uint32_t n = 16; n <= 24; n += 8) {
27333 for (size_t k = 1; k <= 20; k += 5) {
27334 GemmMicrokernelTester()
27335 .mr(4)
27336 .nr(8)
27337 .kr(1)
27338 .sr(1)
27339 .m(4)
27340 .n(n)
27341 .k(k)
27342 .ks(3)
27343 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27344 }
27345 }
27346 }
27347
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,strided_cm_subtile)27348 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
27349 for (size_t k = 1; k <= 20; k += 5) {
27350 for (uint32_t n = 1; n <= 8; n++) {
27351 for (uint32_t m = 1; m <= 4; m++) {
27352 GemmMicrokernelTester()
27353 .mr(4)
27354 .nr(8)
27355 .kr(1)
27356 .sr(1)
27357 .m(m)
27358 .n(n)
27359 .k(k)
27360 .cm_stride(11)
27361 .iterations(1)
27362 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27363 }
27364 }
27365 }
27366 }
27367
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,a_offset)27368 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, a_offset) {
27369 for (size_t k = 1; k <= 20; k += 5) {
27370 GemmMicrokernelTester()
27371 .mr(4)
27372 .nr(8)
27373 .kr(1)
27374 .sr(1)
27375 .m(4)
27376 .n(8)
27377 .k(k)
27378 .ks(3)
27379 .a_offset(83)
27380 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27381 }
27382 }
27383
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,zero)27384 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, zero) {
27385 for (size_t k = 1; k <= 20; k += 5) {
27386 for (uint32_t mz = 0; mz < 4; mz++) {
27387 GemmMicrokernelTester()
27388 .mr(4)
27389 .nr(8)
27390 .kr(1)
27391 .sr(1)
27392 .m(4)
27393 .n(8)
27394 .k(k)
27395 .ks(3)
27396 .a_offset(83)
27397 .zero_index(mz)
27398 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27399 }
27400 }
27401 }
27402
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,qmin)27403 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, qmin) {
27404 GemmMicrokernelTester()
27405 .mr(4)
27406 .nr(8)
27407 .kr(1)
27408 .sr(1)
27409 .m(4)
27410 .n(8)
27411 .k(4)
27412 .qmin(128)
27413 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27414 }
27415
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,qmax)27416 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, qmax) {
27417 GemmMicrokernelTester()
27418 .mr(4)
27419 .nr(8)
27420 .kr(1)
27421 .sr(1)
27422 .m(4)
27423 .n(8)
27424 .k(4)
27425 .qmax(128)
27426 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27427 }
27428
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,strided_cm)27429 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, strided_cm) {
27430 GemmMicrokernelTester()
27431 .mr(4)
27432 .nr(8)
27433 .kr(1)
27434 .sr(1)
27435 .m(4)
27436 .n(8)
27437 .k(4)
27438 .cm_stride(11)
27439 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27440 }
27441 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27442
27443
27444 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_eq_4)27445 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_4) {
27446 GemmMicrokernelTester()
27447 .mr(4)
27448 .nr(8)
27449 .kr(1)
27450 .sr(1)
27451 .m(4)
27452 .n(8)
27453 .k(4)
27454 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27455 }
27456
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,strided_cn)27457 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, strided_cn) {
27458 GemmMicrokernelTester()
27459 .mr(4)
27460 .nr(8)
27461 .kr(1)
27462 .sr(1)
27463 .m(4)
27464 .n(8)
27465 .k(4)
27466 .cn_stride(11)
27467 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27468 }
27469
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile)27470 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile) {
27471 for (uint32_t n = 1; n <= 8; n++) {
27472 for (uint32_t m = 1; m <= 4; m++) {
27473 GemmMicrokernelTester()
27474 .mr(4)
27475 .nr(8)
27476 .kr(1)
27477 .sr(1)
27478 .m(m)
27479 .n(n)
27480 .k(4)
27481 .iterations(1)
27482 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27483 }
27484 }
27485 }
27486
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile_m)27487 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_m) {
27488 for (uint32_t m = 1; m <= 4; m++) {
27489 GemmMicrokernelTester()
27490 .mr(4)
27491 .nr(8)
27492 .kr(1)
27493 .sr(1)
27494 .m(m)
27495 .n(8)
27496 .k(4)
27497 .iterations(1)
27498 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27499 }
27500 }
27501
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile_n)27502 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_n) {
27503 for (uint32_t n = 1; n <= 8; n++) {
27504 GemmMicrokernelTester()
27505 .mr(4)
27506 .nr(8)
27507 .kr(1)
27508 .sr(1)
27509 .m(4)
27510 .n(n)
27511 .k(4)
27512 .iterations(1)
27513 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27514 }
27515 }
27516
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_lt_4)27517 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_lt_4) {
27518 for (size_t k = 1; k < 4; k++) {
27519 GemmMicrokernelTester()
27520 .mr(4)
27521 .nr(8)
27522 .kr(1)
27523 .sr(1)
27524 .m(4)
27525 .n(8)
27526 .k(k)
27527 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27528 }
27529 }
27530
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_lt_4_subtile)27531 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_lt_4_subtile) {
27532 for (size_t k = 1; k < 4; k++) {
27533 for (uint32_t n = 1; n <= 8; n++) {
27534 for (uint32_t m = 1; m <= 4; m++) {
27535 GemmMicrokernelTester()
27536 .mr(4)
27537 .nr(8)
27538 .kr(1)
27539 .sr(1)
27540 .m(m)
27541 .n(n)
27542 .k(k)
27543 .iterations(1)
27544 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27545 }
27546 }
27547 }
27548 }
27549
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_gt_4)27550 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_gt_4) {
27551 for (size_t k = 5; k < 8; k++) {
27552 GemmMicrokernelTester()
27553 .mr(4)
27554 .nr(8)
27555 .kr(1)
27556 .sr(1)
27557 .m(4)
27558 .n(8)
27559 .k(k)
27560 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27561 }
27562 }
27563
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_gt_4_subtile)27564 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_gt_4_subtile) {
27565 for (size_t k = 5; k < 8; k++) {
27566 for (uint32_t n = 1; n <= 8; n++) {
27567 for (uint32_t m = 1; m <= 4; m++) {
27568 GemmMicrokernelTester()
27569 .mr(4)
27570 .nr(8)
27571 .kr(1)
27572 .sr(1)
27573 .m(m)
27574 .n(n)
27575 .k(k)
27576 .iterations(1)
27577 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27578 }
27579 }
27580 }
27581 }
27582
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_div_4)27583 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_div_4) {
27584 for (size_t k = 8; k <= 40; k += 4) {
27585 GemmMicrokernelTester()
27586 .mr(4)
27587 .nr(8)
27588 .kr(1)
27589 .sr(1)
27590 .m(4)
27591 .n(8)
27592 .k(k)
27593 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27594 }
27595 }
27596
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_div_4_subtile)27597 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_div_4_subtile) {
27598 for (size_t k = 8; k <= 40; k += 4) {
27599 for (uint32_t n = 1; n <= 8; n++) {
27600 for (uint32_t m = 1; m <= 4; m++) {
27601 GemmMicrokernelTester()
27602 .mr(4)
27603 .nr(8)
27604 .kr(1)
27605 .sr(1)
27606 .m(m)
27607 .n(n)
27608 .k(k)
27609 .iterations(1)
27610 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27611 }
27612 }
27613 }
27614 }
27615
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_gt_8)27616 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8) {
27617 for (uint32_t n = 9; n < 16; n++) {
27618 for (size_t k = 1; k <= 20; k += 5) {
27619 GemmMicrokernelTester()
27620 .mr(4)
27621 .nr(8)
27622 .kr(1)
27623 .sr(1)
27624 .m(4)
27625 .n(n)
27626 .k(k)
27627 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27628 }
27629 }
27630 }
27631
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_gt_8_strided_cn)27632 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
27633 for (uint32_t n = 9; n < 16; n++) {
27634 for (size_t k = 1; k <= 20; k += 5) {
27635 GemmMicrokernelTester()
27636 .mr(4)
27637 .nr(8)
27638 .kr(1)
27639 .sr(1)
27640 .m(4)
27641 .n(n)
27642 .k(k)
27643 .cn_stride(11)
27644 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27645 }
27646 }
27647 }
27648
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_gt_8_subtile)27649 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
27650 for (uint32_t n = 9; n < 16; n++) {
27651 for (size_t k = 1; k <= 20; k += 5) {
27652 for (uint32_t m = 1; m <= 4; m++) {
27653 GemmMicrokernelTester()
27654 .mr(4)
27655 .nr(8)
27656 .kr(1)
27657 .sr(1)
27658 .m(m)
27659 .n(n)
27660 .k(k)
27661 .iterations(1)
27662 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27663 }
27664 }
27665 }
27666 }
27667
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_div_8)27668 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8) {
27669 for (uint32_t n = 16; n <= 24; n += 8) {
27670 for (size_t k = 1; k <= 20; k += 5) {
27671 GemmMicrokernelTester()
27672 .mr(4)
27673 .nr(8)
27674 .kr(1)
27675 .sr(1)
27676 .m(4)
27677 .n(n)
27678 .k(k)
27679 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27680 }
27681 }
27682 }
27683
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_div_8_strided_cn)27684 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
27685 for (uint32_t n = 16; n <= 24; n += 8) {
27686 for (size_t k = 1; k <= 20; k += 5) {
27687 GemmMicrokernelTester()
27688 .mr(4)
27689 .nr(8)
27690 .kr(1)
27691 .sr(1)
27692 .m(4)
27693 .n(n)
27694 .k(k)
27695 .cn_stride(11)
27696 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27697 }
27698 }
27699 }
27700
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_div_8_subtile)27701 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
27702 for (uint32_t n = 16; n <= 24; n += 8) {
27703 for (size_t k = 1; k <= 20; k += 5) {
27704 for (uint32_t m = 1; m <= 4; m++) {
27705 GemmMicrokernelTester()
27706 .mr(4)
27707 .nr(8)
27708 .kr(1)
27709 .sr(1)
27710 .m(m)
27711 .n(n)
27712 .k(k)
27713 .iterations(1)
27714 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27715 }
27716 }
27717 }
27718 }
27719
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,small_kernel)27720 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, small_kernel) {
27721 for (size_t k = 1; k <= 20; k += 5) {
27722 GemmMicrokernelTester()
27723 .mr(4)
27724 .nr(8)
27725 .kr(1)
27726 .sr(1)
27727 .m(4)
27728 .n(8)
27729 .k(k)
27730 .ks(3)
27731 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27732 }
27733 }
27734
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,small_kernel_subtile)27735 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, small_kernel_subtile) {
27736 for (size_t k = 1; k <= 20; k += 5) {
27737 for (uint32_t n = 1; n <= 8; n++) {
27738 for (uint32_t m = 1; m <= 4; m++) {
27739 GemmMicrokernelTester()
27740 .mr(4)
27741 .nr(8)
27742 .kr(1)
27743 .sr(1)
27744 .m(m)
27745 .n(n)
27746 .k(k)
27747 .ks(3)
27748 .iterations(1)
27749 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27750 }
27751 }
27752 }
27753 }
27754
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_gt_8_small_kernel)27755 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8_small_kernel) {
27756 for (uint32_t n = 9; n < 16; n++) {
27757 for (size_t k = 1; k <= 20; k += 5) {
27758 GemmMicrokernelTester()
27759 .mr(4)
27760 .nr(8)
27761 .kr(1)
27762 .sr(1)
27763 .m(4)
27764 .n(n)
27765 .k(k)
27766 .ks(3)
27767 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27768 }
27769 }
27770 }
27771
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_div_8_small_kernel)27772 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8_small_kernel) {
27773 for (uint32_t n = 16; n <= 24; n += 8) {
27774 for (size_t k = 1; k <= 20; k += 5) {
27775 GemmMicrokernelTester()
27776 .mr(4)
27777 .nr(8)
27778 .kr(1)
27779 .sr(1)
27780 .m(4)
27781 .n(n)
27782 .k(k)
27783 .ks(3)
27784 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27785 }
27786 }
27787 }
27788
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,strided_cm_subtile)27789 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
27790 for (size_t k = 1; k <= 20; k += 5) {
27791 for (uint32_t n = 1; n <= 8; n++) {
27792 for (uint32_t m = 1; m <= 4; m++) {
27793 GemmMicrokernelTester()
27794 .mr(4)
27795 .nr(8)
27796 .kr(1)
27797 .sr(1)
27798 .m(m)
27799 .n(n)
27800 .k(k)
27801 .cm_stride(11)
27802 .iterations(1)
27803 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27804 }
27805 }
27806 }
27807 }
27808
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,a_offset)27809 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, a_offset) {
27810 for (size_t k = 1; k <= 20; k += 5) {
27811 GemmMicrokernelTester()
27812 .mr(4)
27813 .nr(8)
27814 .kr(1)
27815 .sr(1)
27816 .m(4)
27817 .n(8)
27818 .k(k)
27819 .ks(3)
27820 .a_offset(83)
27821 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27822 }
27823 }
27824
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,zero)27825 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, zero) {
27826 for (size_t k = 1; k <= 20; k += 5) {
27827 for (uint32_t mz = 0; mz < 4; mz++) {
27828 GemmMicrokernelTester()
27829 .mr(4)
27830 .nr(8)
27831 .kr(1)
27832 .sr(1)
27833 .m(4)
27834 .n(8)
27835 .k(k)
27836 .ks(3)
27837 .a_offset(83)
27838 .zero_index(mz)
27839 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27840 }
27841 }
27842 }
27843
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,qmin)27844 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, qmin) {
27845 GemmMicrokernelTester()
27846 .mr(4)
27847 .nr(8)
27848 .kr(1)
27849 .sr(1)
27850 .m(4)
27851 .n(8)
27852 .k(4)
27853 .qmin(128)
27854 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27855 }
27856
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,qmax)27857 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, qmax) {
27858 GemmMicrokernelTester()
27859 .mr(4)
27860 .nr(8)
27861 .kr(1)
27862 .sr(1)
27863 .m(4)
27864 .n(8)
27865 .k(4)
27866 .qmax(128)
27867 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27868 }
27869
TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,strided_cm)27870 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, strided_cm) {
27871 GemmMicrokernelTester()
27872 .mr(4)
27873 .nr(8)
27874 .kr(1)
27875 .sr(1)
27876 .m(4)
27877 .n(8)
27878 .k(4)
27879 .cm_stride(11)
27880 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27881 }
27882 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27883
27884
27885 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,k_eq_4)27886 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, k_eq_4) {
27887 GemmMicrokernelTester()
27888 .mr(4)
27889 .nr(8)
27890 .kr(1)
27891 .sr(4)
27892 .m(4)
27893 .n(8)
27894 .k(4)
27895 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27896 }
27897
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,strided_cn)27898 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, strided_cn) {
27899 GemmMicrokernelTester()
27900 .mr(4)
27901 .nr(8)
27902 .kr(1)
27903 .sr(4)
27904 .m(4)
27905 .n(8)
27906 .k(4)
27907 .cn_stride(11)
27908 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27909 }
27910
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,k_eq_4_subtile)27911 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, k_eq_4_subtile) {
27912 for (uint32_t n = 1; n <= 8; n++) {
27913 for (uint32_t m = 1; m <= 4; m++) {
27914 GemmMicrokernelTester()
27915 .mr(4)
27916 .nr(8)
27917 .kr(1)
27918 .sr(4)
27919 .m(m)
27920 .n(n)
27921 .k(4)
27922 .iterations(1)
27923 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27924 }
27925 }
27926 }
27927
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,k_eq_4_subtile_m)27928 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, k_eq_4_subtile_m) {
27929 for (uint32_t m = 1; m <= 4; m++) {
27930 GemmMicrokernelTester()
27931 .mr(4)
27932 .nr(8)
27933 .kr(1)
27934 .sr(4)
27935 .m(m)
27936 .n(8)
27937 .k(4)
27938 .iterations(1)
27939 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27940 }
27941 }
27942
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,k_eq_4_subtile_n)27943 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, k_eq_4_subtile_n) {
27944 for (uint32_t n = 1; n <= 8; n++) {
27945 GemmMicrokernelTester()
27946 .mr(4)
27947 .nr(8)
27948 .kr(1)
27949 .sr(4)
27950 .m(4)
27951 .n(n)
27952 .k(4)
27953 .iterations(1)
27954 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27955 }
27956 }
27957
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,k_lt_4)27958 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, k_lt_4) {
27959 for (size_t k = 1; k < 4; k++) {
27960 GemmMicrokernelTester()
27961 .mr(4)
27962 .nr(8)
27963 .kr(1)
27964 .sr(4)
27965 .m(4)
27966 .n(8)
27967 .k(k)
27968 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27969 }
27970 }
27971
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,k_lt_4_subtile)27972 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, k_lt_4_subtile) {
27973 for (size_t k = 1; k < 4; k++) {
27974 for (uint32_t n = 1; n <= 8; n++) {
27975 for (uint32_t m = 1; m <= 4; m++) {
27976 GemmMicrokernelTester()
27977 .mr(4)
27978 .nr(8)
27979 .kr(1)
27980 .sr(4)
27981 .m(m)
27982 .n(n)
27983 .k(k)
27984 .iterations(1)
27985 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
27986 }
27987 }
27988 }
27989 }
27990
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,k_gt_4)27991 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, k_gt_4) {
27992 for (size_t k = 5; k < 8; k++) {
27993 GemmMicrokernelTester()
27994 .mr(4)
27995 .nr(8)
27996 .kr(1)
27997 .sr(4)
27998 .m(4)
27999 .n(8)
28000 .k(k)
28001 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28002 }
28003 }
28004
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,k_gt_4_subtile)28005 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, k_gt_4_subtile) {
28006 for (size_t k = 5; k < 8; k++) {
28007 for (uint32_t n = 1; n <= 8; n++) {
28008 for (uint32_t m = 1; m <= 4; m++) {
28009 GemmMicrokernelTester()
28010 .mr(4)
28011 .nr(8)
28012 .kr(1)
28013 .sr(4)
28014 .m(m)
28015 .n(n)
28016 .k(k)
28017 .iterations(1)
28018 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28019 }
28020 }
28021 }
28022 }
28023
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,k_div_4)28024 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, k_div_4) {
28025 for (size_t k = 8; k <= 40; k += 4) {
28026 GemmMicrokernelTester()
28027 .mr(4)
28028 .nr(8)
28029 .kr(1)
28030 .sr(4)
28031 .m(4)
28032 .n(8)
28033 .k(k)
28034 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28035 }
28036 }
28037
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,k_div_4_subtile)28038 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, k_div_4_subtile) {
28039 for (size_t k = 8; k <= 40; k += 4) {
28040 for (uint32_t n = 1; n <= 8; n++) {
28041 for (uint32_t m = 1; m <= 4; m++) {
28042 GemmMicrokernelTester()
28043 .mr(4)
28044 .nr(8)
28045 .kr(1)
28046 .sr(4)
28047 .m(m)
28048 .n(n)
28049 .k(k)
28050 .iterations(1)
28051 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28052 }
28053 }
28054 }
28055 }
28056
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,n_gt_8)28057 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, n_gt_8) {
28058 for (uint32_t n = 9; n < 16; n++) {
28059 for (size_t k = 1; k <= 20; k += 5) {
28060 GemmMicrokernelTester()
28061 .mr(4)
28062 .nr(8)
28063 .kr(1)
28064 .sr(4)
28065 .m(4)
28066 .n(n)
28067 .k(k)
28068 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28069 }
28070 }
28071 }
28072
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,n_gt_8_strided_cn)28073 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, n_gt_8_strided_cn) {
28074 for (uint32_t n = 9; n < 16; n++) {
28075 for (size_t k = 1; k <= 20; k += 5) {
28076 GemmMicrokernelTester()
28077 .mr(4)
28078 .nr(8)
28079 .kr(1)
28080 .sr(4)
28081 .m(4)
28082 .n(n)
28083 .k(k)
28084 .cn_stride(11)
28085 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28086 }
28087 }
28088 }
28089
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,n_gt_8_subtile)28090 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, n_gt_8_subtile) {
28091 for (uint32_t n = 9; n < 16; n++) {
28092 for (size_t k = 1; k <= 20; k += 5) {
28093 for (uint32_t m = 1; m <= 4; m++) {
28094 GemmMicrokernelTester()
28095 .mr(4)
28096 .nr(8)
28097 .kr(1)
28098 .sr(4)
28099 .m(m)
28100 .n(n)
28101 .k(k)
28102 .iterations(1)
28103 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28104 }
28105 }
28106 }
28107 }
28108
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,n_div_8)28109 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, n_div_8) {
28110 for (uint32_t n = 16; n <= 24; n += 8) {
28111 for (size_t k = 1; k <= 20; k += 5) {
28112 GemmMicrokernelTester()
28113 .mr(4)
28114 .nr(8)
28115 .kr(1)
28116 .sr(4)
28117 .m(4)
28118 .n(n)
28119 .k(k)
28120 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28121 }
28122 }
28123 }
28124
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,n_div_8_strided_cn)28125 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, n_div_8_strided_cn) {
28126 for (uint32_t n = 16; n <= 24; n += 8) {
28127 for (size_t k = 1; k <= 20; k += 5) {
28128 GemmMicrokernelTester()
28129 .mr(4)
28130 .nr(8)
28131 .kr(1)
28132 .sr(4)
28133 .m(4)
28134 .n(n)
28135 .k(k)
28136 .cn_stride(11)
28137 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28138 }
28139 }
28140 }
28141
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,n_div_8_subtile)28142 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, n_div_8_subtile) {
28143 for (uint32_t n = 16; n <= 24; n += 8) {
28144 for (size_t k = 1; k <= 20; k += 5) {
28145 for (uint32_t m = 1; m <= 4; m++) {
28146 GemmMicrokernelTester()
28147 .mr(4)
28148 .nr(8)
28149 .kr(1)
28150 .sr(4)
28151 .m(m)
28152 .n(n)
28153 .k(k)
28154 .iterations(1)
28155 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28156 }
28157 }
28158 }
28159 }
28160
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,small_kernel)28161 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, small_kernel) {
28162 for (size_t k = 1; k <= 20; k += 5) {
28163 GemmMicrokernelTester()
28164 .mr(4)
28165 .nr(8)
28166 .kr(1)
28167 .sr(4)
28168 .m(4)
28169 .n(8)
28170 .k(k)
28171 .ks(3)
28172 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28173 }
28174 }
28175
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,small_kernel_subtile)28176 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, small_kernel_subtile) {
28177 for (size_t k = 1; k <= 20; k += 5) {
28178 for (uint32_t n = 1; n <= 8; n++) {
28179 for (uint32_t m = 1; m <= 4; m++) {
28180 GemmMicrokernelTester()
28181 .mr(4)
28182 .nr(8)
28183 .kr(1)
28184 .sr(4)
28185 .m(m)
28186 .n(n)
28187 .k(k)
28188 .ks(3)
28189 .iterations(1)
28190 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28191 }
28192 }
28193 }
28194 }
28195
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,n_gt_8_small_kernel)28196 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, n_gt_8_small_kernel) {
28197 for (uint32_t n = 9; n < 16; n++) {
28198 for (size_t k = 1; k <= 20; k += 5) {
28199 GemmMicrokernelTester()
28200 .mr(4)
28201 .nr(8)
28202 .kr(1)
28203 .sr(4)
28204 .m(4)
28205 .n(n)
28206 .k(k)
28207 .ks(3)
28208 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28209 }
28210 }
28211 }
28212
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,n_div_8_small_kernel)28213 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, n_div_8_small_kernel) {
28214 for (uint32_t n = 16; n <= 24; n += 8) {
28215 for (size_t k = 1; k <= 20; k += 5) {
28216 GemmMicrokernelTester()
28217 .mr(4)
28218 .nr(8)
28219 .kr(1)
28220 .sr(4)
28221 .m(4)
28222 .n(n)
28223 .k(k)
28224 .ks(3)
28225 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28226 }
28227 }
28228 }
28229
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,strided_cm_subtile)28230 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, strided_cm_subtile) {
28231 for (size_t k = 1; k <= 20; k += 5) {
28232 for (uint32_t n = 1; n <= 8; n++) {
28233 for (uint32_t m = 1; m <= 4; m++) {
28234 GemmMicrokernelTester()
28235 .mr(4)
28236 .nr(8)
28237 .kr(1)
28238 .sr(4)
28239 .m(m)
28240 .n(n)
28241 .k(k)
28242 .cm_stride(11)
28243 .iterations(1)
28244 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28245 }
28246 }
28247 }
28248 }
28249
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,a_offset)28250 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, a_offset) {
28251 for (size_t k = 1; k <= 20; k += 5) {
28252 GemmMicrokernelTester()
28253 .mr(4)
28254 .nr(8)
28255 .kr(1)
28256 .sr(4)
28257 .m(4)
28258 .n(8)
28259 .k(k)
28260 .ks(3)
28261 .a_offset(83)
28262 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28263 }
28264 }
28265
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,zero)28266 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, zero) {
28267 for (size_t k = 1; k <= 20; k += 5) {
28268 for (uint32_t mz = 0; mz < 4; mz++) {
28269 GemmMicrokernelTester()
28270 .mr(4)
28271 .nr(8)
28272 .kr(1)
28273 .sr(4)
28274 .m(4)
28275 .n(8)
28276 .k(k)
28277 .ks(3)
28278 .a_offset(83)
28279 .zero_index(mz)
28280 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28281 }
28282 }
28283 }
28284
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,qmin)28285 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, qmin) {
28286 GemmMicrokernelTester()
28287 .mr(4)
28288 .nr(8)
28289 .kr(1)
28290 .sr(4)
28291 .m(4)
28292 .n(8)
28293 .k(4)
28294 .qmin(128)
28295 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28296 }
28297
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,qmax)28298 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, qmax) {
28299 GemmMicrokernelTester()
28300 .mr(4)
28301 .nr(8)
28302 .kr(1)
28303 .sr(4)
28304 .m(4)
28305 .n(8)
28306 .k(4)
28307 .qmax(128)
28308 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28309 }
28310
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM,strided_cm)28311 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, strided_cm) {
28312 GemmMicrokernelTester()
28313 .mr(4)
28314 .nr(8)
28315 .kr(1)
28316 .sr(4)
28317 .m(4)
28318 .n(8)
28319 .k(4)
28320 .cm_stride(11)
28321 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28322 }
28323 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
28324
28325
28326 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,k_eq_4)28327 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, k_eq_4) {
28328 GemmMicrokernelTester()
28329 .mr(4)
28330 .nr(8)
28331 .kr(1)
28332 .sr(4)
28333 .m(4)
28334 .n(8)
28335 .k(4)
28336 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28337 }
28338
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,strided_cn)28339 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, strided_cn) {
28340 GemmMicrokernelTester()
28341 .mr(4)
28342 .nr(8)
28343 .kr(1)
28344 .sr(4)
28345 .m(4)
28346 .n(8)
28347 .k(4)
28348 .cn_stride(11)
28349 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28350 }
28351
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,k_eq_4_subtile)28352 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, k_eq_4_subtile) {
28353 for (uint32_t n = 1; n <= 8; n++) {
28354 for (uint32_t m = 1; m <= 4; m++) {
28355 GemmMicrokernelTester()
28356 .mr(4)
28357 .nr(8)
28358 .kr(1)
28359 .sr(4)
28360 .m(m)
28361 .n(n)
28362 .k(4)
28363 .iterations(1)
28364 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28365 }
28366 }
28367 }
28368
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,k_eq_4_subtile_m)28369 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, k_eq_4_subtile_m) {
28370 for (uint32_t m = 1; m <= 4; m++) {
28371 GemmMicrokernelTester()
28372 .mr(4)
28373 .nr(8)
28374 .kr(1)
28375 .sr(4)
28376 .m(m)
28377 .n(8)
28378 .k(4)
28379 .iterations(1)
28380 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28381 }
28382 }
28383
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,k_eq_4_subtile_n)28384 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, k_eq_4_subtile_n) {
28385 for (uint32_t n = 1; n <= 8; n++) {
28386 GemmMicrokernelTester()
28387 .mr(4)
28388 .nr(8)
28389 .kr(1)
28390 .sr(4)
28391 .m(4)
28392 .n(n)
28393 .k(4)
28394 .iterations(1)
28395 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28396 }
28397 }
28398
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,k_lt_4)28399 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, k_lt_4) {
28400 for (size_t k = 1; k < 4; k++) {
28401 GemmMicrokernelTester()
28402 .mr(4)
28403 .nr(8)
28404 .kr(1)
28405 .sr(4)
28406 .m(4)
28407 .n(8)
28408 .k(k)
28409 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28410 }
28411 }
28412
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,k_lt_4_subtile)28413 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, k_lt_4_subtile) {
28414 for (size_t k = 1; k < 4; k++) {
28415 for (uint32_t n = 1; n <= 8; n++) {
28416 for (uint32_t m = 1; m <= 4; m++) {
28417 GemmMicrokernelTester()
28418 .mr(4)
28419 .nr(8)
28420 .kr(1)
28421 .sr(4)
28422 .m(m)
28423 .n(n)
28424 .k(k)
28425 .iterations(1)
28426 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28427 }
28428 }
28429 }
28430 }
28431
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,k_gt_4)28432 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, k_gt_4) {
28433 for (size_t k = 5; k < 8; k++) {
28434 GemmMicrokernelTester()
28435 .mr(4)
28436 .nr(8)
28437 .kr(1)
28438 .sr(4)
28439 .m(4)
28440 .n(8)
28441 .k(k)
28442 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28443 }
28444 }
28445
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,k_gt_4_subtile)28446 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, k_gt_4_subtile) {
28447 for (size_t k = 5; k < 8; k++) {
28448 for (uint32_t n = 1; n <= 8; n++) {
28449 for (uint32_t m = 1; m <= 4; m++) {
28450 GemmMicrokernelTester()
28451 .mr(4)
28452 .nr(8)
28453 .kr(1)
28454 .sr(4)
28455 .m(m)
28456 .n(n)
28457 .k(k)
28458 .iterations(1)
28459 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28460 }
28461 }
28462 }
28463 }
28464
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,k_div_4)28465 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, k_div_4) {
28466 for (size_t k = 8; k <= 40; k += 4) {
28467 GemmMicrokernelTester()
28468 .mr(4)
28469 .nr(8)
28470 .kr(1)
28471 .sr(4)
28472 .m(4)
28473 .n(8)
28474 .k(k)
28475 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28476 }
28477 }
28478
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,k_div_4_subtile)28479 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, k_div_4_subtile) {
28480 for (size_t k = 8; k <= 40; k += 4) {
28481 for (uint32_t n = 1; n <= 8; n++) {
28482 for (uint32_t m = 1; m <= 4; m++) {
28483 GemmMicrokernelTester()
28484 .mr(4)
28485 .nr(8)
28486 .kr(1)
28487 .sr(4)
28488 .m(m)
28489 .n(n)
28490 .k(k)
28491 .iterations(1)
28492 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28493 }
28494 }
28495 }
28496 }
28497
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,n_gt_8)28498 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, n_gt_8) {
28499 for (uint32_t n = 9; n < 16; n++) {
28500 for (size_t k = 1; k <= 20; k += 5) {
28501 GemmMicrokernelTester()
28502 .mr(4)
28503 .nr(8)
28504 .kr(1)
28505 .sr(4)
28506 .m(4)
28507 .n(n)
28508 .k(k)
28509 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28510 }
28511 }
28512 }
28513
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,n_gt_8_strided_cn)28514 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, n_gt_8_strided_cn) {
28515 for (uint32_t n = 9; n < 16; n++) {
28516 for (size_t k = 1; k <= 20; k += 5) {
28517 GemmMicrokernelTester()
28518 .mr(4)
28519 .nr(8)
28520 .kr(1)
28521 .sr(4)
28522 .m(4)
28523 .n(n)
28524 .k(k)
28525 .cn_stride(11)
28526 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28527 }
28528 }
28529 }
28530
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,n_gt_8_subtile)28531 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, n_gt_8_subtile) {
28532 for (uint32_t n = 9; n < 16; n++) {
28533 for (size_t k = 1; k <= 20; k += 5) {
28534 for (uint32_t m = 1; m <= 4; m++) {
28535 GemmMicrokernelTester()
28536 .mr(4)
28537 .nr(8)
28538 .kr(1)
28539 .sr(4)
28540 .m(m)
28541 .n(n)
28542 .k(k)
28543 .iterations(1)
28544 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28545 }
28546 }
28547 }
28548 }
28549
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,n_div_8)28550 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, n_div_8) {
28551 for (uint32_t n = 16; n <= 24; n += 8) {
28552 for (size_t k = 1; k <= 20; k += 5) {
28553 GemmMicrokernelTester()
28554 .mr(4)
28555 .nr(8)
28556 .kr(1)
28557 .sr(4)
28558 .m(4)
28559 .n(n)
28560 .k(k)
28561 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28562 }
28563 }
28564 }
28565
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,n_div_8_strided_cn)28566 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, n_div_8_strided_cn) {
28567 for (uint32_t n = 16; n <= 24; n += 8) {
28568 for (size_t k = 1; k <= 20; k += 5) {
28569 GemmMicrokernelTester()
28570 .mr(4)
28571 .nr(8)
28572 .kr(1)
28573 .sr(4)
28574 .m(4)
28575 .n(n)
28576 .k(k)
28577 .cn_stride(11)
28578 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28579 }
28580 }
28581 }
28582
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,n_div_8_subtile)28583 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, n_div_8_subtile) {
28584 for (uint32_t n = 16; n <= 24; n += 8) {
28585 for (size_t k = 1; k <= 20; k += 5) {
28586 for (uint32_t m = 1; m <= 4; m++) {
28587 GemmMicrokernelTester()
28588 .mr(4)
28589 .nr(8)
28590 .kr(1)
28591 .sr(4)
28592 .m(m)
28593 .n(n)
28594 .k(k)
28595 .iterations(1)
28596 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28597 }
28598 }
28599 }
28600 }
28601
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,small_kernel)28602 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, small_kernel) {
28603 for (size_t k = 1; k <= 20; k += 5) {
28604 GemmMicrokernelTester()
28605 .mr(4)
28606 .nr(8)
28607 .kr(1)
28608 .sr(4)
28609 .m(4)
28610 .n(8)
28611 .k(k)
28612 .ks(3)
28613 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28614 }
28615 }
28616
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,small_kernel_subtile)28617 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, small_kernel_subtile) {
28618 for (size_t k = 1; k <= 20; k += 5) {
28619 for (uint32_t n = 1; n <= 8; n++) {
28620 for (uint32_t m = 1; m <= 4; m++) {
28621 GemmMicrokernelTester()
28622 .mr(4)
28623 .nr(8)
28624 .kr(1)
28625 .sr(4)
28626 .m(m)
28627 .n(n)
28628 .k(k)
28629 .ks(3)
28630 .iterations(1)
28631 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28632 }
28633 }
28634 }
28635 }
28636
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,n_gt_8_small_kernel)28637 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, n_gt_8_small_kernel) {
28638 for (uint32_t n = 9; n < 16; n++) {
28639 for (size_t k = 1; k <= 20; k += 5) {
28640 GemmMicrokernelTester()
28641 .mr(4)
28642 .nr(8)
28643 .kr(1)
28644 .sr(4)
28645 .m(4)
28646 .n(n)
28647 .k(k)
28648 .ks(3)
28649 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28650 }
28651 }
28652 }
28653
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,n_div_8_small_kernel)28654 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, n_div_8_small_kernel) {
28655 for (uint32_t n = 16; n <= 24; n += 8) {
28656 for (size_t k = 1; k <= 20; k += 5) {
28657 GemmMicrokernelTester()
28658 .mr(4)
28659 .nr(8)
28660 .kr(1)
28661 .sr(4)
28662 .m(4)
28663 .n(n)
28664 .k(k)
28665 .ks(3)
28666 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28667 }
28668 }
28669 }
28670
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,strided_cm_subtile)28671 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, strided_cm_subtile) {
28672 for (size_t k = 1; k <= 20; k += 5) {
28673 for (uint32_t n = 1; n <= 8; n++) {
28674 for (uint32_t m = 1; m <= 4; m++) {
28675 GemmMicrokernelTester()
28676 .mr(4)
28677 .nr(8)
28678 .kr(1)
28679 .sr(4)
28680 .m(m)
28681 .n(n)
28682 .k(k)
28683 .cm_stride(11)
28684 .iterations(1)
28685 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28686 }
28687 }
28688 }
28689 }
28690
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,a_offset)28691 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, a_offset) {
28692 for (size_t k = 1; k <= 20; k += 5) {
28693 GemmMicrokernelTester()
28694 .mr(4)
28695 .nr(8)
28696 .kr(1)
28697 .sr(4)
28698 .m(4)
28699 .n(8)
28700 .k(k)
28701 .ks(3)
28702 .a_offset(83)
28703 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28704 }
28705 }
28706
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,zero)28707 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, zero) {
28708 for (size_t k = 1; k <= 20; k += 5) {
28709 for (uint32_t mz = 0; mz < 4; mz++) {
28710 GemmMicrokernelTester()
28711 .mr(4)
28712 .nr(8)
28713 .kr(1)
28714 .sr(4)
28715 .m(4)
28716 .n(8)
28717 .k(k)
28718 .ks(3)
28719 .a_offset(83)
28720 .zero_index(mz)
28721 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28722 }
28723 }
28724 }
28725
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,qmin)28726 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, qmin) {
28727 GemmMicrokernelTester()
28728 .mr(4)
28729 .nr(8)
28730 .kr(1)
28731 .sr(4)
28732 .m(4)
28733 .n(8)
28734 .k(4)
28735 .qmin(128)
28736 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28737 }
28738
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,qmax)28739 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, qmax) {
28740 GemmMicrokernelTester()
28741 .mr(4)
28742 .nr(8)
28743 .kr(1)
28744 .sr(4)
28745 .m(4)
28746 .n(8)
28747 .k(4)
28748 .qmax(128)
28749 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28750 }
28751
TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86,strided_cm)28752 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, strided_cm) {
28753 GemmMicrokernelTester()
28754 .mr(4)
28755 .nr(8)
28756 .kr(1)
28757 .sr(4)
28758 .m(4)
28759 .n(8)
28760 .k(4)
28761 .cm_stride(11)
28762 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
28763 }
28764 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
28765
28766
28767 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,k_eq_4)28768 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_eq_4) {
28769 GemmMicrokernelTester()
28770 .mr(5)
28771 .nr(8)
28772 .kr(1)
28773 .sr(1)
28774 .m(5)
28775 .n(8)
28776 .k(4)
28777 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
28778 }
28779
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,strided_cn)28780 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, strided_cn) {
28781 GemmMicrokernelTester()
28782 .mr(5)
28783 .nr(8)
28784 .kr(1)
28785 .sr(1)
28786 .m(5)
28787 .n(8)
28788 .k(4)
28789 .cn_stride(11)
28790 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
28791 }
28792
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile)28793 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile) {
28794 for (uint32_t n = 1; n <= 8; n++) {
28795 for (uint32_t m = 1; m <= 5; m++) {
28796 GemmMicrokernelTester()
28797 .mr(5)
28798 .nr(8)
28799 .kr(1)
28800 .sr(1)
28801 .m(m)
28802 .n(n)
28803 .k(4)
28804 .iterations(1)
28805 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
28806 }
28807 }
28808 }
28809
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile_m)28810 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_m) {
28811 for (uint32_t m = 1; m <= 5; m++) {
28812 GemmMicrokernelTester()
28813 .mr(5)
28814 .nr(8)
28815 .kr(1)
28816 .sr(1)
28817 .m(m)
28818 .n(8)
28819 .k(4)
28820 .iterations(1)
28821 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
28822 }
28823 }
28824
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,k_eq_4_subtile_n)28825 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_n) {
28826 for (uint32_t n = 1; n <= 8; n++) {
28827 GemmMicrokernelTester()
28828 .mr(5)
28829 .nr(8)
28830 .kr(1)
28831 .sr(1)
28832 .m(5)
28833 .n(n)
28834 .k(4)
28835 .iterations(1)
28836 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
28837 }
28838 }
28839
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,k_lt_4)28840 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_lt_4) {
28841 for (size_t k = 1; k < 4; k++) {
28842 GemmMicrokernelTester()
28843 .mr(5)
28844 .nr(8)
28845 .kr(1)
28846 .sr(1)
28847 .m(5)
28848 .n(8)
28849 .k(k)
28850 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
28851 }
28852 }
28853
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,k_lt_4_subtile)28854 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_lt_4_subtile) {
28855 for (size_t k = 1; k < 4; k++) {
28856 for (uint32_t n = 1; n <= 8; n++) {
28857 for (uint32_t m = 1; m <= 5; m++) {
28858 GemmMicrokernelTester()
28859 .mr(5)
28860 .nr(8)
28861 .kr(1)
28862 .sr(1)
28863 .m(m)
28864 .n(n)
28865 .k(k)
28866 .iterations(1)
28867 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
28868 }
28869 }
28870 }
28871 }
28872
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,k_gt_4)28873 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_gt_4) {
28874 for (size_t k = 5; k < 8; k++) {
28875 GemmMicrokernelTester()
28876 .mr(5)
28877 .nr(8)
28878 .kr(1)
28879 .sr(1)
28880 .m(5)
28881 .n(8)
28882 .k(k)
28883 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
28884 }
28885 }
28886
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,k_gt_4_subtile)28887 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_gt_4_subtile) {
28888 for (size_t k = 5; k < 8; k++) {
28889 for (uint32_t n = 1; n <= 8; n++) {
28890 for (uint32_t m = 1; m <= 5; m++) {
28891 GemmMicrokernelTester()
28892 .mr(5)
28893 .nr(8)
28894 .kr(1)
28895 .sr(1)
28896 .m(m)
28897 .n(n)
28898 .k(k)
28899 .iterations(1)
28900 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
28901 }
28902 }
28903 }
28904 }
28905
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,k_div_4)28906 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_div_4) {
28907 for (size_t k = 8; k <= 40; k += 4) {
28908 GemmMicrokernelTester()
28909 .mr(5)
28910 .nr(8)
28911 .kr(1)
28912 .sr(1)
28913 .m(5)
28914 .n(8)
28915 .k(k)
28916 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
28917 }
28918 }
28919
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,k_div_4_subtile)28920 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_div_4_subtile) {
28921 for (size_t k = 8; k <= 40; k += 4) {
28922 for (uint32_t n = 1; n <= 8; n++) {
28923 for (uint32_t m = 1; m <= 5; m++) {
28924 GemmMicrokernelTester()
28925 .mr(5)
28926 .nr(8)
28927 .kr(1)
28928 .sr(1)
28929 .m(m)
28930 .n(n)
28931 .k(k)
28932 .iterations(1)
28933 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
28934 }
28935 }
28936 }
28937 }
28938
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,n_gt_8)28939 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
28940 for (uint32_t n = 9; n < 16; n++) {
28941 for (size_t k = 1; k <= 20; k += 5) {
28942 GemmMicrokernelTester()
28943 .mr(5)
28944 .nr(8)
28945 .kr(1)
28946 .sr(1)
28947 .m(5)
28948 .n(n)
28949 .k(k)
28950 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
28951 }
28952 }
28953 }
28954
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,n_gt_8_strided_cn)28955 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
28956 for (uint32_t n = 9; n < 16; n++) {
28957 for (size_t k = 1; k <= 20; k += 5) {
28958 GemmMicrokernelTester()
28959 .mr(5)
28960 .nr(8)
28961 .kr(1)
28962 .sr(1)
28963 .m(5)
28964 .n(n)
28965 .k(k)
28966 .cn_stride(11)
28967 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
28968 }
28969 }
28970 }
28971
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,n_gt_8_subtile)28972 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
28973 for (uint32_t n = 9; n < 16; n++) {
28974 for (size_t k = 1; k <= 20; k += 5) {
28975 for (uint32_t m = 1; m <= 5; m++) {
28976 GemmMicrokernelTester()
28977 .mr(5)
28978 .nr(8)
28979 .kr(1)
28980 .sr(1)
28981 .m(m)
28982 .n(n)
28983 .k(k)
28984 .iterations(1)
28985 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
28986 }
28987 }
28988 }
28989 }
28990
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,n_div_8)28991 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_div_8) {
28992 for (uint32_t n = 16; n <= 24; n += 8) {
28993 for (size_t k = 1; k <= 20; k += 5) {
28994 GemmMicrokernelTester()
28995 .mr(5)
28996 .nr(8)
28997 .kr(1)
28998 .sr(1)
28999 .m(5)
29000 .n(n)
29001 .k(k)
29002 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
29003 }
29004 }
29005 }
29006
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,n_div_8_strided_cn)29007 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
29008 for (uint32_t n = 16; n <= 24; n += 8) {
29009 for (size_t k = 1; k <= 20; k += 5) {
29010 GemmMicrokernelTester()
29011 .mr(5)
29012 .nr(8)
29013 .kr(1)
29014 .sr(1)
29015 .m(5)
29016 .n(n)
29017 .k(k)
29018 .cn_stride(11)
29019 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
29020 }
29021 }
29022 }
29023
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,n_div_8_subtile)29024 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
29025 for (uint32_t n = 16; n <= 24; n += 8) {
29026 for (size_t k = 1; k <= 20; k += 5) {
29027 for (uint32_t m = 1; m <= 5; m++) {
29028 GemmMicrokernelTester()
29029 .mr(5)
29030 .nr(8)
29031 .kr(1)
29032 .sr(1)
29033 .m(m)
29034 .n(n)
29035 .k(k)
29036 .iterations(1)
29037 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
29038 }
29039 }
29040 }
29041 }
29042
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,small_kernel)29043 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, small_kernel) {
29044 for (size_t k = 1; k <= 20; k += 5) {
29045 GemmMicrokernelTester()
29046 .mr(5)
29047 .nr(8)
29048 .kr(1)
29049 .sr(1)
29050 .m(5)
29051 .n(8)
29052 .k(k)
29053 .ks(3)
29054 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
29055 }
29056 }
29057
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,small_kernel_subtile)29058 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, small_kernel_subtile) {
29059 for (size_t k = 1; k <= 20; k += 5) {
29060 for (uint32_t n = 1; n <= 8; n++) {
29061 for (uint32_t m = 1; m <= 5; m++) {
29062 GemmMicrokernelTester()
29063 .mr(5)
29064 .nr(8)
29065 .kr(1)
29066 .sr(1)
29067 .m(m)
29068 .n(n)
29069 .k(k)
29070 .ks(3)
29071 .iterations(1)
29072 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
29073 }
29074 }
29075 }
29076 }
29077
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,n_gt_8_small_kernel)29078 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_gt_8_small_kernel) {
29079 for (uint32_t n = 9; n < 16; n++) {
29080 for (size_t k = 1; k <= 20; k += 5) {
29081 GemmMicrokernelTester()
29082 .mr(5)
29083 .nr(8)
29084 .kr(1)
29085 .sr(1)
29086 .m(5)
29087 .n(n)
29088 .k(k)
29089 .ks(3)
29090 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
29091 }
29092 }
29093 }
29094
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,n_div_8_small_kernel)29095 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_div_8_small_kernel) {
29096 for (uint32_t n = 16; n <= 24; n += 8) {
29097 for (size_t k = 1; k <= 20; k += 5) {
29098 GemmMicrokernelTester()
29099 .mr(5)
29100 .nr(8)
29101 .kr(1)
29102 .sr(1)
29103 .m(5)
29104 .n(n)
29105 .k(k)
29106 .ks(3)
29107 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
29108 }
29109 }
29110 }
29111
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,strided_cm_subtile)29112 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
29113 for (size_t k = 1; k <= 20; k += 5) {
29114 for (uint32_t n = 1; n <= 8; n++) {
29115 for (uint32_t m = 1; m <= 5; m++) {
29116 GemmMicrokernelTester()
29117 .mr(5)
29118 .nr(8)
29119 .kr(1)
29120 .sr(1)
29121 .m(m)
29122 .n(n)
29123 .k(k)
29124 .cm_stride(11)
29125 .iterations(1)
29126 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
29127 }
29128 }
29129 }
29130 }
29131
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,a_offset)29132 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, a_offset) {
29133 for (size_t k = 1; k <= 20; k += 5) {
29134 GemmMicrokernelTester()
29135 .mr(5)
29136 .nr(8)
29137 .kr(1)
29138 .sr(1)
29139 .m(5)
29140 .n(8)
29141 .k(k)
29142 .ks(3)
29143 .a_offset(103)
29144 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
29145 }
29146 }
29147
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,zero)29148 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, zero) {
29149 for (size_t k = 1; k <= 20; k += 5) {
29150 for (uint32_t mz = 0; mz < 5; mz++) {
29151 GemmMicrokernelTester()
29152 .mr(5)
29153 .nr(8)
29154 .kr(1)
29155 .sr(1)
29156 .m(5)
29157 .n(8)
29158 .k(k)
29159 .ks(3)
29160 .a_offset(103)
29161 .zero_index(mz)
29162 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
29163 }
29164 }
29165 }
29166
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,qmin)29167 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, qmin) {
29168 GemmMicrokernelTester()
29169 .mr(5)
29170 .nr(8)
29171 .kr(1)
29172 .sr(1)
29173 .m(5)
29174 .n(8)
29175 .k(4)
29176 .qmin(128)
29177 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
29178 }
29179
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,qmax)29180 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, qmax) {
29181 GemmMicrokernelTester()
29182 .mr(5)
29183 .nr(8)
29184 .kr(1)
29185 .sr(1)
29186 .m(5)
29187 .n(8)
29188 .k(4)
29189 .qmax(128)
29190 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
29191 }
29192
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT,strided_cm)29193 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, strided_cm) {
29194 GemmMicrokernelTester()
29195 .mr(5)
29196 .nr(8)
29197 .kr(1)
29198 .sr(1)
29199 .m(5)
29200 .n(8)
29201 .k(4)
29202 .cm_stride(11)
29203 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
29204 }
29205 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
29206
29207
29208 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,k_eq_1)29209 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_eq_1) {
29210 GemmMicrokernelTester()
29211 .mr(5)
29212 .nr(8)
29213 .kr(1)
29214 .sr(1)
29215 .m(5)
29216 .n(8)
29217 .k(1)
29218 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29219 }
29220
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,strided_cn)29221 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, strided_cn) {
29222 GemmMicrokernelTester()
29223 .mr(5)
29224 .nr(8)
29225 .kr(1)
29226 .sr(1)
29227 .m(5)
29228 .n(8)
29229 .k(1)
29230 .cn_stride(11)
29231 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29232 }
29233
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile)29234 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile) {
29235 for (uint32_t n = 1; n <= 8; n++) {
29236 for (uint32_t m = 1; m <= 5; m++) {
29237 GemmMicrokernelTester()
29238 .mr(5)
29239 .nr(8)
29240 .kr(1)
29241 .sr(1)
29242 .m(m)
29243 .n(n)
29244 .k(1)
29245 .iterations(1)
29246 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29247 }
29248 }
29249 }
29250
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile_m)29251 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_m) {
29252 for (uint32_t m = 1; m <= 5; m++) {
29253 GemmMicrokernelTester()
29254 .mr(5)
29255 .nr(8)
29256 .kr(1)
29257 .sr(1)
29258 .m(m)
29259 .n(8)
29260 .k(1)
29261 .iterations(1)
29262 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29263 }
29264 }
29265
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,k_eq_1_subtile_n)29266 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_n) {
29267 for (uint32_t n = 1; n <= 8; n++) {
29268 GemmMicrokernelTester()
29269 .mr(5)
29270 .nr(8)
29271 .kr(1)
29272 .sr(1)
29273 .m(5)
29274 .n(n)
29275 .k(1)
29276 .iterations(1)
29277 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29278 }
29279 }
29280
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,k_gt_1)29281 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_gt_1) {
29282 for (size_t k = 2; k < 10; k++) {
29283 GemmMicrokernelTester()
29284 .mr(5)
29285 .nr(8)
29286 .kr(1)
29287 .sr(1)
29288 .m(5)
29289 .n(8)
29290 .k(k)
29291 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29292 }
29293 }
29294
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,k_gt_1_subtile)29295 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_subtile) {
29296 for (size_t k = 2; k < 10; k++) {
29297 for (uint32_t n = 1; n <= 8; n++) {
29298 for (uint32_t m = 1; m <= 5; m++) {
29299 GemmMicrokernelTester()
29300 .mr(5)
29301 .nr(8)
29302 .kr(1)
29303 .sr(1)
29304 .m(m)
29305 .n(n)
29306 .k(k)
29307 .iterations(1)
29308 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29309 }
29310 }
29311 }
29312 }
29313
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,n_gt_8)29314 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_gt_8) {
29315 for (uint32_t n = 9; n < 16; n++) {
29316 for (size_t k = 1; k <= 5; k += 2) {
29317 GemmMicrokernelTester()
29318 .mr(5)
29319 .nr(8)
29320 .kr(1)
29321 .sr(1)
29322 .m(5)
29323 .n(n)
29324 .k(k)
29325 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29326 }
29327 }
29328 }
29329
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_strided_cn)29330 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_cn) {
29331 for (uint32_t n = 9; n < 16; n++) {
29332 for (size_t k = 1; k <= 5; k += 2) {
29333 GemmMicrokernelTester()
29334 .mr(5)
29335 .nr(8)
29336 .kr(1)
29337 .sr(1)
29338 .m(5)
29339 .n(n)
29340 .k(k)
29341 .cn_stride(11)
29342 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29343 }
29344 }
29345 }
29346
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_subtile)29347 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_subtile) {
29348 for (uint32_t n = 9; n < 16; n++) {
29349 for (size_t k = 1; k <= 5; k += 2) {
29350 for (uint32_t m = 1; m <= 5; m++) {
29351 GemmMicrokernelTester()
29352 .mr(5)
29353 .nr(8)
29354 .kr(1)
29355 .sr(1)
29356 .m(m)
29357 .n(n)
29358 .k(k)
29359 .iterations(1)
29360 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29361 }
29362 }
29363 }
29364 }
29365
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,n_div_8)29366 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_div_8) {
29367 for (uint32_t n = 16; n <= 24; n += 8) {
29368 for (size_t k = 1; k <= 5; k += 2) {
29369 GemmMicrokernelTester()
29370 .mr(5)
29371 .nr(8)
29372 .kr(1)
29373 .sr(1)
29374 .m(5)
29375 .n(n)
29376 .k(k)
29377 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29378 }
29379 }
29380 }
29381
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,n_div_8_strided_cn)29382 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_cn) {
29383 for (uint32_t n = 16; n <= 24; n += 8) {
29384 for (size_t k = 1; k <= 5; k += 2) {
29385 GemmMicrokernelTester()
29386 .mr(5)
29387 .nr(8)
29388 .kr(1)
29389 .sr(1)
29390 .m(5)
29391 .n(n)
29392 .k(k)
29393 .cn_stride(11)
29394 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29395 }
29396 }
29397 }
29398
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,n_div_8_subtile)29399 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_div_8_subtile) {
29400 for (uint32_t n = 16; n <= 24; n += 8) {
29401 for (size_t k = 1; k <= 5; k += 2) {
29402 for (uint32_t m = 1; m <= 5; m++) {
29403 GemmMicrokernelTester()
29404 .mr(5)
29405 .nr(8)
29406 .kr(1)
29407 .sr(1)
29408 .m(m)
29409 .n(n)
29410 .k(k)
29411 .iterations(1)
29412 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29413 }
29414 }
29415 }
29416 }
29417
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,small_kernel)29418 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, small_kernel) {
29419 for (size_t k = 1; k <= 5; k += 2) {
29420 GemmMicrokernelTester()
29421 .mr(5)
29422 .nr(8)
29423 .kr(1)
29424 .sr(1)
29425 .m(5)
29426 .n(8)
29427 .k(k)
29428 .ks(3)
29429 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29430 }
29431 }
29432
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,small_kernel_subtile)29433 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, small_kernel_subtile) {
29434 for (size_t k = 1; k <= 5; k += 2) {
29435 for (uint32_t n = 1; n <= 8; n++) {
29436 for (uint32_t m = 1; m <= 5; m++) {
29437 GemmMicrokernelTester()
29438 .mr(5)
29439 .nr(8)
29440 .kr(1)
29441 .sr(1)
29442 .m(m)
29443 .n(n)
29444 .k(k)
29445 .ks(3)
29446 .iterations(1)
29447 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29448 }
29449 }
29450 }
29451 }
29452
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,n_gt_8_small_kernel)29453 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_small_kernel) {
29454 for (uint32_t n = 9; n < 16; n++) {
29455 for (size_t k = 1; k <= 5; k += 2) {
29456 GemmMicrokernelTester()
29457 .mr(5)
29458 .nr(8)
29459 .kr(1)
29460 .sr(1)
29461 .m(5)
29462 .n(n)
29463 .k(k)
29464 .ks(3)
29465 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29466 }
29467 }
29468 }
29469
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,n_div_8_small_kernel)29470 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_div_8_small_kernel) {
29471 for (uint32_t n = 16; n <= 24; n += 8) {
29472 for (size_t k = 1; k <= 5; k += 2) {
29473 GemmMicrokernelTester()
29474 .mr(5)
29475 .nr(8)
29476 .kr(1)
29477 .sr(1)
29478 .m(5)
29479 .n(n)
29480 .k(k)
29481 .ks(3)
29482 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29483 }
29484 }
29485 }
29486
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,strided_cm_subtile)29487 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, strided_cm_subtile) {
29488 for (size_t k = 1; k <= 5; k += 2) {
29489 for (uint32_t n = 1; n <= 8; n++) {
29490 for (uint32_t m = 1; m <= 5; m++) {
29491 GemmMicrokernelTester()
29492 .mr(5)
29493 .nr(8)
29494 .kr(1)
29495 .sr(1)
29496 .m(m)
29497 .n(n)
29498 .k(k)
29499 .cm_stride(11)
29500 .iterations(1)
29501 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29502 }
29503 }
29504 }
29505 }
29506
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,a_offset)29507 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, a_offset) {
29508 for (size_t k = 1; k <= 5; k += 2) {
29509 GemmMicrokernelTester()
29510 .mr(5)
29511 .nr(8)
29512 .kr(1)
29513 .sr(1)
29514 .m(5)
29515 .n(8)
29516 .k(k)
29517 .ks(3)
29518 .a_offset(29)
29519 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29520 }
29521 }
29522
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,zero)29523 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, zero) {
29524 for (size_t k = 1; k <= 5; k += 2) {
29525 for (uint32_t mz = 0; mz < 5; mz++) {
29526 GemmMicrokernelTester()
29527 .mr(5)
29528 .nr(8)
29529 .kr(1)
29530 .sr(1)
29531 .m(5)
29532 .n(8)
29533 .k(k)
29534 .ks(3)
29535 .a_offset(29)
29536 .zero_index(mz)
29537 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29538 }
29539 }
29540 }
29541
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,qmin)29542 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, qmin) {
29543 GemmMicrokernelTester()
29544 .mr(5)
29545 .nr(8)
29546 .kr(1)
29547 .sr(1)
29548 .m(5)
29549 .n(8)
29550 .k(1)
29551 .qmin(128)
29552 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29553 }
29554
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,qmax)29555 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, qmax) {
29556 GemmMicrokernelTester()
29557 .mr(5)
29558 .nr(8)
29559 .kr(1)
29560 .sr(1)
29561 .m(5)
29562 .n(8)
29563 .k(1)
29564 .qmax(128)
29565 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29566 }
29567
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT,strided_cm)29568 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, strided_cm) {
29569 GemmMicrokernelTester()
29570 .mr(5)
29571 .nr(8)
29572 .kr(1)
29573 .sr(1)
29574 .m(5)
29575 .n(8)
29576 .k(1)
29577 .cm_stride(11)
29578 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
29579 }
29580 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
29581
29582
29583 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,k_eq_4)29584 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_eq_4) {
29585 GemmMicrokernelTester()
29586 .mr(5)
29587 .nr(8)
29588 .kr(1)
29589 .sr(1)
29590 .m(5)
29591 .n(8)
29592 .k(4)
29593 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29594 }
29595
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,strided_cn)29596 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, strided_cn) {
29597 GemmMicrokernelTester()
29598 .mr(5)
29599 .nr(8)
29600 .kr(1)
29601 .sr(1)
29602 .m(5)
29603 .n(8)
29604 .k(4)
29605 .cn_stride(11)
29606 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29607 }
29608
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile)29609 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile) {
29610 for (uint32_t n = 1; n <= 8; n++) {
29611 for (uint32_t m = 1; m <= 5; m++) {
29612 GemmMicrokernelTester()
29613 .mr(5)
29614 .nr(8)
29615 .kr(1)
29616 .sr(1)
29617 .m(m)
29618 .n(n)
29619 .k(4)
29620 .iterations(1)
29621 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29622 }
29623 }
29624 }
29625
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile_m)29626 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_m) {
29627 for (uint32_t m = 1; m <= 5; m++) {
29628 GemmMicrokernelTester()
29629 .mr(5)
29630 .nr(8)
29631 .kr(1)
29632 .sr(1)
29633 .m(m)
29634 .n(8)
29635 .k(4)
29636 .iterations(1)
29637 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29638 }
29639 }
29640
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,k_eq_4_subtile_n)29641 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_n) {
29642 for (uint32_t n = 1; n <= 8; n++) {
29643 GemmMicrokernelTester()
29644 .mr(5)
29645 .nr(8)
29646 .kr(1)
29647 .sr(1)
29648 .m(5)
29649 .n(n)
29650 .k(4)
29651 .iterations(1)
29652 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29653 }
29654 }
29655
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,k_lt_4)29656 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_lt_4) {
29657 for (size_t k = 1; k < 4; k++) {
29658 GemmMicrokernelTester()
29659 .mr(5)
29660 .nr(8)
29661 .kr(1)
29662 .sr(1)
29663 .m(5)
29664 .n(8)
29665 .k(k)
29666 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29667 }
29668 }
29669
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,k_lt_4_subtile)29670 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_lt_4_subtile) {
29671 for (size_t k = 1; k < 4; k++) {
29672 for (uint32_t n = 1; n <= 8; n++) {
29673 for (uint32_t m = 1; m <= 5; m++) {
29674 GemmMicrokernelTester()
29675 .mr(5)
29676 .nr(8)
29677 .kr(1)
29678 .sr(1)
29679 .m(m)
29680 .n(n)
29681 .k(k)
29682 .iterations(1)
29683 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29684 }
29685 }
29686 }
29687 }
29688
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,k_gt_4)29689 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_gt_4) {
29690 for (size_t k = 5; k < 8; k++) {
29691 GemmMicrokernelTester()
29692 .mr(5)
29693 .nr(8)
29694 .kr(1)
29695 .sr(1)
29696 .m(5)
29697 .n(8)
29698 .k(k)
29699 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29700 }
29701 }
29702
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,k_gt_4_subtile)29703 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_gt_4_subtile) {
29704 for (size_t k = 5; k < 8; k++) {
29705 for (uint32_t n = 1; n <= 8; n++) {
29706 for (uint32_t m = 1; m <= 5; m++) {
29707 GemmMicrokernelTester()
29708 .mr(5)
29709 .nr(8)
29710 .kr(1)
29711 .sr(1)
29712 .m(m)
29713 .n(n)
29714 .k(k)
29715 .iterations(1)
29716 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29717 }
29718 }
29719 }
29720 }
29721
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,k_div_4)29722 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_div_4) {
29723 for (size_t k = 8; k <= 40; k += 4) {
29724 GemmMicrokernelTester()
29725 .mr(5)
29726 .nr(8)
29727 .kr(1)
29728 .sr(1)
29729 .m(5)
29730 .n(8)
29731 .k(k)
29732 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29733 }
29734 }
29735
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,k_div_4_subtile)29736 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_div_4_subtile) {
29737 for (size_t k = 8; k <= 40; k += 4) {
29738 for (uint32_t n = 1; n <= 8; n++) {
29739 for (uint32_t m = 1; m <= 5; m++) {
29740 GemmMicrokernelTester()
29741 .mr(5)
29742 .nr(8)
29743 .kr(1)
29744 .sr(1)
29745 .m(m)
29746 .n(n)
29747 .k(k)
29748 .iterations(1)
29749 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29750 }
29751 }
29752 }
29753 }
29754
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,n_gt_8)29755 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_gt_8) {
29756 for (uint32_t n = 9; n < 16; n++) {
29757 for (size_t k = 1; k <= 20; k += 5) {
29758 GemmMicrokernelTester()
29759 .mr(5)
29760 .nr(8)
29761 .kr(1)
29762 .sr(1)
29763 .m(5)
29764 .n(n)
29765 .k(k)
29766 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29767 }
29768 }
29769 }
29770
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,n_gt_8_strided_cn)29771 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
29772 for (uint32_t n = 9; n < 16; n++) {
29773 for (size_t k = 1; k <= 20; k += 5) {
29774 GemmMicrokernelTester()
29775 .mr(5)
29776 .nr(8)
29777 .kr(1)
29778 .sr(1)
29779 .m(5)
29780 .n(n)
29781 .k(k)
29782 .cn_stride(11)
29783 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29784 }
29785 }
29786 }
29787
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,n_gt_8_subtile)29788 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
29789 for (uint32_t n = 9; n < 16; n++) {
29790 for (size_t k = 1; k <= 20; k += 5) {
29791 for (uint32_t m = 1; m <= 5; m++) {
29792 GemmMicrokernelTester()
29793 .mr(5)
29794 .nr(8)
29795 .kr(1)
29796 .sr(1)
29797 .m(m)
29798 .n(n)
29799 .k(k)
29800 .iterations(1)
29801 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29802 }
29803 }
29804 }
29805 }
29806
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,n_div_8)29807 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_div_8) {
29808 for (uint32_t n = 16; n <= 24; n += 8) {
29809 for (size_t k = 1; k <= 20; k += 5) {
29810 GemmMicrokernelTester()
29811 .mr(5)
29812 .nr(8)
29813 .kr(1)
29814 .sr(1)
29815 .m(5)
29816 .n(n)
29817 .k(k)
29818 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29819 }
29820 }
29821 }
29822
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,n_div_8_strided_cn)29823 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
29824 for (uint32_t n = 16; n <= 24; n += 8) {
29825 for (size_t k = 1; k <= 20; k += 5) {
29826 GemmMicrokernelTester()
29827 .mr(5)
29828 .nr(8)
29829 .kr(1)
29830 .sr(1)
29831 .m(5)
29832 .n(n)
29833 .k(k)
29834 .cn_stride(11)
29835 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29836 }
29837 }
29838 }
29839
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,n_div_8_subtile)29840 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
29841 for (uint32_t n = 16; n <= 24; n += 8) {
29842 for (size_t k = 1; k <= 20; k += 5) {
29843 for (uint32_t m = 1; m <= 5; m++) {
29844 GemmMicrokernelTester()
29845 .mr(5)
29846 .nr(8)
29847 .kr(1)
29848 .sr(1)
29849 .m(m)
29850 .n(n)
29851 .k(k)
29852 .iterations(1)
29853 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29854 }
29855 }
29856 }
29857 }
29858
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,small_kernel)29859 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, small_kernel) {
29860 for (size_t k = 1; k <= 20; k += 5) {
29861 GemmMicrokernelTester()
29862 .mr(5)
29863 .nr(8)
29864 .kr(1)
29865 .sr(1)
29866 .m(5)
29867 .n(8)
29868 .k(k)
29869 .ks(3)
29870 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29871 }
29872 }
29873
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,small_kernel_subtile)29874 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, small_kernel_subtile) {
29875 for (size_t k = 1; k <= 20; k += 5) {
29876 for (uint32_t n = 1; n <= 8; n++) {
29877 for (uint32_t m = 1; m <= 5; m++) {
29878 GemmMicrokernelTester()
29879 .mr(5)
29880 .nr(8)
29881 .kr(1)
29882 .sr(1)
29883 .m(m)
29884 .n(n)
29885 .k(k)
29886 .ks(3)
29887 .iterations(1)
29888 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29889 }
29890 }
29891 }
29892 }
29893
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,n_gt_8_small_kernel)29894 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_gt_8_small_kernel) {
29895 for (uint32_t n = 9; n < 16; n++) {
29896 for (size_t k = 1; k <= 20; k += 5) {
29897 GemmMicrokernelTester()
29898 .mr(5)
29899 .nr(8)
29900 .kr(1)
29901 .sr(1)
29902 .m(5)
29903 .n(n)
29904 .k(k)
29905 .ks(3)
29906 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29907 }
29908 }
29909 }
29910
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,n_div_8_small_kernel)29911 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_div_8_small_kernel) {
29912 for (uint32_t n = 16; n <= 24; n += 8) {
29913 for (size_t k = 1; k <= 20; k += 5) {
29914 GemmMicrokernelTester()
29915 .mr(5)
29916 .nr(8)
29917 .kr(1)
29918 .sr(1)
29919 .m(5)
29920 .n(n)
29921 .k(k)
29922 .ks(3)
29923 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29924 }
29925 }
29926 }
29927
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,strided_cm_subtile)29928 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
29929 for (size_t k = 1; k <= 20; k += 5) {
29930 for (uint32_t n = 1; n <= 8; n++) {
29931 for (uint32_t m = 1; m <= 5; m++) {
29932 GemmMicrokernelTester()
29933 .mr(5)
29934 .nr(8)
29935 .kr(1)
29936 .sr(1)
29937 .m(m)
29938 .n(n)
29939 .k(k)
29940 .cm_stride(11)
29941 .iterations(1)
29942 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29943 }
29944 }
29945 }
29946 }
29947
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,a_offset)29948 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, a_offset) {
29949 for (size_t k = 1; k <= 20; k += 5) {
29950 GemmMicrokernelTester()
29951 .mr(5)
29952 .nr(8)
29953 .kr(1)
29954 .sr(1)
29955 .m(5)
29956 .n(8)
29957 .k(k)
29958 .ks(3)
29959 .a_offset(103)
29960 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29961 }
29962 }
29963
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,zero)29964 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, zero) {
29965 for (size_t k = 1; k <= 20; k += 5) {
29966 for (uint32_t mz = 0; mz < 5; mz++) {
29967 GemmMicrokernelTester()
29968 .mr(5)
29969 .nr(8)
29970 .kr(1)
29971 .sr(1)
29972 .m(5)
29973 .n(8)
29974 .k(k)
29975 .ks(3)
29976 .a_offset(103)
29977 .zero_index(mz)
29978 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29979 }
29980 }
29981 }
29982
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,qmin)29983 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, qmin) {
29984 GemmMicrokernelTester()
29985 .mr(5)
29986 .nr(8)
29987 .kr(1)
29988 .sr(1)
29989 .m(5)
29990 .n(8)
29991 .k(4)
29992 .qmin(128)
29993 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
29994 }
29995
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,qmax)29996 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, qmax) {
29997 GemmMicrokernelTester()
29998 .mr(5)
29999 .nr(8)
30000 .kr(1)
30001 .sr(1)
30002 .m(5)
30003 .n(8)
30004 .k(4)
30005 .qmax(128)
30006 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
30007 }
30008
TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT,strided_cm)30009 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, strided_cm) {
30010 GemmMicrokernelTester()
30011 .mr(5)
30012 .nr(8)
30013 .kr(1)
30014 .sr(1)
30015 .m(5)
30016 .n(8)
30017 .k(4)
30018 .cm_stride(11)
30019 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
30020 }
30021 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
30022
30023
30024 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,k_eq_4)30025 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, k_eq_4) {
30026 GemmMicrokernelTester()
30027 .mr(5)
30028 .nr(8)
30029 .kr(1)
30030 .sr(4)
30031 .m(5)
30032 .n(8)
30033 .k(4)
30034 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30035 }
30036
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,strided_cn)30037 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, strided_cn) {
30038 GemmMicrokernelTester()
30039 .mr(5)
30040 .nr(8)
30041 .kr(1)
30042 .sr(4)
30043 .m(5)
30044 .n(8)
30045 .k(4)
30046 .cn_stride(11)
30047 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30048 }
30049
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,k_eq_4_subtile)30050 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, k_eq_4_subtile) {
30051 for (uint32_t n = 1; n <= 8; n++) {
30052 for (uint32_t m = 1; m <= 5; m++) {
30053 GemmMicrokernelTester()
30054 .mr(5)
30055 .nr(8)
30056 .kr(1)
30057 .sr(4)
30058 .m(m)
30059 .n(n)
30060 .k(4)
30061 .iterations(1)
30062 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30063 }
30064 }
30065 }
30066
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,k_eq_4_subtile_m)30067 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, k_eq_4_subtile_m) {
30068 for (uint32_t m = 1; m <= 5; m++) {
30069 GemmMicrokernelTester()
30070 .mr(5)
30071 .nr(8)
30072 .kr(1)
30073 .sr(4)
30074 .m(m)
30075 .n(8)
30076 .k(4)
30077 .iterations(1)
30078 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30079 }
30080 }
30081
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,k_eq_4_subtile_n)30082 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, k_eq_4_subtile_n) {
30083 for (uint32_t n = 1; n <= 8; n++) {
30084 GemmMicrokernelTester()
30085 .mr(5)
30086 .nr(8)
30087 .kr(1)
30088 .sr(4)
30089 .m(5)
30090 .n(n)
30091 .k(4)
30092 .iterations(1)
30093 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30094 }
30095 }
30096
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,k_lt_4)30097 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, k_lt_4) {
30098 for (size_t k = 1; k < 4; k++) {
30099 GemmMicrokernelTester()
30100 .mr(5)
30101 .nr(8)
30102 .kr(1)
30103 .sr(4)
30104 .m(5)
30105 .n(8)
30106 .k(k)
30107 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30108 }
30109 }
30110
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,k_lt_4_subtile)30111 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, k_lt_4_subtile) {
30112 for (size_t k = 1; k < 4; k++) {
30113 for (uint32_t n = 1; n <= 8; n++) {
30114 for (uint32_t m = 1; m <= 5; m++) {
30115 GemmMicrokernelTester()
30116 .mr(5)
30117 .nr(8)
30118 .kr(1)
30119 .sr(4)
30120 .m(m)
30121 .n(n)
30122 .k(k)
30123 .iterations(1)
30124 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30125 }
30126 }
30127 }
30128 }
30129
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,k_gt_4)30130 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, k_gt_4) {
30131 for (size_t k = 5; k < 8; k++) {
30132 GemmMicrokernelTester()
30133 .mr(5)
30134 .nr(8)
30135 .kr(1)
30136 .sr(4)
30137 .m(5)
30138 .n(8)
30139 .k(k)
30140 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30141 }
30142 }
30143
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,k_gt_4_subtile)30144 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, k_gt_4_subtile) {
30145 for (size_t k = 5; k < 8; k++) {
30146 for (uint32_t n = 1; n <= 8; n++) {
30147 for (uint32_t m = 1; m <= 5; m++) {
30148 GemmMicrokernelTester()
30149 .mr(5)
30150 .nr(8)
30151 .kr(1)
30152 .sr(4)
30153 .m(m)
30154 .n(n)
30155 .k(k)
30156 .iterations(1)
30157 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30158 }
30159 }
30160 }
30161 }
30162
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,k_div_4)30163 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, k_div_4) {
30164 for (size_t k = 8; k <= 40; k += 4) {
30165 GemmMicrokernelTester()
30166 .mr(5)
30167 .nr(8)
30168 .kr(1)
30169 .sr(4)
30170 .m(5)
30171 .n(8)
30172 .k(k)
30173 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30174 }
30175 }
30176
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,k_div_4_subtile)30177 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, k_div_4_subtile) {
30178 for (size_t k = 8; k <= 40; k += 4) {
30179 for (uint32_t n = 1; n <= 8; n++) {
30180 for (uint32_t m = 1; m <= 5; m++) {
30181 GemmMicrokernelTester()
30182 .mr(5)
30183 .nr(8)
30184 .kr(1)
30185 .sr(4)
30186 .m(m)
30187 .n(n)
30188 .k(k)
30189 .iterations(1)
30190 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30191 }
30192 }
30193 }
30194 }
30195
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,n_gt_8)30196 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, n_gt_8) {
30197 for (uint32_t n = 9; n < 16; n++) {
30198 for (size_t k = 1; k <= 20; k += 5) {
30199 GemmMicrokernelTester()
30200 .mr(5)
30201 .nr(8)
30202 .kr(1)
30203 .sr(4)
30204 .m(5)
30205 .n(n)
30206 .k(k)
30207 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30208 }
30209 }
30210 }
30211
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,n_gt_8_strided_cn)30212 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, n_gt_8_strided_cn) {
30213 for (uint32_t n = 9; n < 16; n++) {
30214 for (size_t k = 1; k <= 20; k += 5) {
30215 GemmMicrokernelTester()
30216 .mr(5)
30217 .nr(8)
30218 .kr(1)
30219 .sr(4)
30220 .m(5)
30221 .n(n)
30222 .k(k)
30223 .cn_stride(11)
30224 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30225 }
30226 }
30227 }
30228
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,n_gt_8_subtile)30229 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, n_gt_8_subtile) {
30230 for (uint32_t n = 9; n < 16; n++) {
30231 for (size_t k = 1; k <= 20; k += 5) {
30232 for (uint32_t m = 1; m <= 5; m++) {
30233 GemmMicrokernelTester()
30234 .mr(5)
30235 .nr(8)
30236 .kr(1)
30237 .sr(4)
30238 .m(m)
30239 .n(n)
30240 .k(k)
30241 .iterations(1)
30242 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30243 }
30244 }
30245 }
30246 }
30247
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,n_div_8)30248 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, n_div_8) {
30249 for (uint32_t n = 16; n <= 24; n += 8) {
30250 for (size_t k = 1; k <= 20; k += 5) {
30251 GemmMicrokernelTester()
30252 .mr(5)
30253 .nr(8)
30254 .kr(1)
30255 .sr(4)
30256 .m(5)
30257 .n(n)
30258 .k(k)
30259 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30260 }
30261 }
30262 }
30263
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,n_div_8_strided_cn)30264 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, n_div_8_strided_cn) {
30265 for (uint32_t n = 16; n <= 24; n += 8) {
30266 for (size_t k = 1; k <= 20; k += 5) {
30267 GemmMicrokernelTester()
30268 .mr(5)
30269 .nr(8)
30270 .kr(1)
30271 .sr(4)
30272 .m(5)
30273 .n(n)
30274 .k(k)
30275 .cn_stride(11)
30276 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30277 }
30278 }
30279 }
30280
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,n_div_8_subtile)30281 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, n_div_8_subtile) {
30282 for (uint32_t n = 16; n <= 24; n += 8) {
30283 for (size_t k = 1; k <= 20; k += 5) {
30284 for (uint32_t m = 1; m <= 5; m++) {
30285 GemmMicrokernelTester()
30286 .mr(5)
30287 .nr(8)
30288 .kr(1)
30289 .sr(4)
30290 .m(m)
30291 .n(n)
30292 .k(k)
30293 .iterations(1)
30294 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30295 }
30296 }
30297 }
30298 }
30299
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,small_kernel)30300 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, small_kernel) {
30301 for (size_t k = 1; k <= 20; k += 5) {
30302 GemmMicrokernelTester()
30303 .mr(5)
30304 .nr(8)
30305 .kr(1)
30306 .sr(4)
30307 .m(5)
30308 .n(8)
30309 .k(k)
30310 .ks(3)
30311 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30312 }
30313 }
30314
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,small_kernel_subtile)30315 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, small_kernel_subtile) {
30316 for (size_t k = 1; k <= 20; k += 5) {
30317 for (uint32_t n = 1; n <= 8; n++) {
30318 for (uint32_t m = 1; m <= 5; m++) {
30319 GemmMicrokernelTester()
30320 .mr(5)
30321 .nr(8)
30322 .kr(1)
30323 .sr(4)
30324 .m(m)
30325 .n(n)
30326 .k(k)
30327 .ks(3)
30328 .iterations(1)
30329 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30330 }
30331 }
30332 }
30333 }
30334
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,n_gt_8_small_kernel)30335 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, n_gt_8_small_kernel) {
30336 for (uint32_t n = 9; n < 16; n++) {
30337 for (size_t k = 1; k <= 20; k += 5) {
30338 GemmMicrokernelTester()
30339 .mr(5)
30340 .nr(8)
30341 .kr(1)
30342 .sr(4)
30343 .m(5)
30344 .n(n)
30345 .k(k)
30346 .ks(3)
30347 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30348 }
30349 }
30350 }
30351
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,n_div_8_small_kernel)30352 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, n_div_8_small_kernel) {
30353 for (uint32_t n = 16; n <= 24; n += 8) {
30354 for (size_t k = 1; k <= 20; k += 5) {
30355 GemmMicrokernelTester()
30356 .mr(5)
30357 .nr(8)
30358 .kr(1)
30359 .sr(4)
30360 .m(5)
30361 .n(n)
30362 .k(k)
30363 .ks(3)
30364 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30365 }
30366 }
30367 }
30368
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,strided_cm_subtile)30369 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, strided_cm_subtile) {
30370 for (size_t k = 1; k <= 20; k += 5) {
30371 for (uint32_t n = 1; n <= 8; n++) {
30372 for (uint32_t m = 1; m <= 5; m++) {
30373 GemmMicrokernelTester()
30374 .mr(5)
30375 .nr(8)
30376 .kr(1)
30377 .sr(4)
30378 .m(m)
30379 .n(n)
30380 .k(k)
30381 .cm_stride(11)
30382 .iterations(1)
30383 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30384 }
30385 }
30386 }
30387 }
30388
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,a_offset)30389 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, a_offset) {
30390 for (size_t k = 1; k <= 20; k += 5) {
30391 GemmMicrokernelTester()
30392 .mr(5)
30393 .nr(8)
30394 .kr(1)
30395 .sr(4)
30396 .m(5)
30397 .n(8)
30398 .k(k)
30399 .ks(3)
30400 .a_offset(103)
30401 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30402 }
30403 }
30404
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,zero)30405 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, zero) {
30406 for (size_t k = 1; k <= 20; k += 5) {
30407 for (uint32_t mz = 0; mz < 5; mz++) {
30408 GemmMicrokernelTester()
30409 .mr(5)
30410 .nr(8)
30411 .kr(1)
30412 .sr(4)
30413 .m(5)
30414 .n(8)
30415 .k(k)
30416 .ks(3)
30417 .a_offset(103)
30418 .zero_index(mz)
30419 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30420 }
30421 }
30422 }
30423
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,qmin)30424 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, qmin) {
30425 GemmMicrokernelTester()
30426 .mr(5)
30427 .nr(8)
30428 .kr(1)
30429 .sr(4)
30430 .m(5)
30431 .n(8)
30432 .k(4)
30433 .qmin(128)
30434 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30435 }
30436
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,qmax)30437 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, qmax) {
30438 GemmMicrokernelTester()
30439 .mr(5)
30440 .nr(8)
30441 .kr(1)
30442 .sr(4)
30443 .m(5)
30444 .n(8)
30445 .k(4)
30446 .qmax(128)
30447 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30448 }
30449
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM,strided_cm)30450 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, strided_cm) {
30451 GemmMicrokernelTester()
30452 .mr(5)
30453 .nr(8)
30454 .kr(1)
30455 .sr(4)
30456 .m(5)
30457 .n(8)
30458 .k(4)
30459 .cm_stride(11)
30460 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30461 }
30462 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
30463
30464
30465 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,k_eq_4)30466 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, k_eq_4) {
30467 GemmMicrokernelTester()
30468 .mr(5)
30469 .nr(8)
30470 .kr(1)
30471 .sr(4)
30472 .m(5)
30473 .n(8)
30474 .k(4)
30475 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30476 }
30477
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,strided_cn)30478 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, strided_cn) {
30479 GemmMicrokernelTester()
30480 .mr(5)
30481 .nr(8)
30482 .kr(1)
30483 .sr(4)
30484 .m(5)
30485 .n(8)
30486 .k(4)
30487 .cn_stride(11)
30488 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30489 }
30490
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,k_eq_4_subtile)30491 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, k_eq_4_subtile) {
30492 for (uint32_t n = 1; n <= 8; n++) {
30493 for (uint32_t m = 1; m <= 5; m++) {
30494 GemmMicrokernelTester()
30495 .mr(5)
30496 .nr(8)
30497 .kr(1)
30498 .sr(4)
30499 .m(m)
30500 .n(n)
30501 .k(4)
30502 .iterations(1)
30503 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30504 }
30505 }
30506 }
30507
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,k_eq_4_subtile_m)30508 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, k_eq_4_subtile_m) {
30509 for (uint32_t m = 1; m <= 5; m++) {
30510 GemmMicrokernelTester()
30511 .mr(5)
30512 .nr(8)
30513 .kr(1)
30514 .sr(4)
30515 .m(m)
30516 .n(8)
30517 .k(4)
30518 .iterations(1)
30519 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30520 }
30521 }
30522
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,k_eq_4_subtile_n)30523 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, k_eq_4_subtile_n) {
30524 for (uint32_t n = 1; n <= 8; n++) {
30525 GemmMicrokernelTester()
30526 .mr(5)
30527 .nr(8)
30528 .kr(1)
30529 .sr(4)
30530 .m(5)
30531 .n(n)
30532 .k(4)
30533 .iterations(1)
30534 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30535 }
30536 }
30537
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,k_lt_4)30538 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, k_lt_4) {
30539 for (size_t k = 1; k < 4; k++) {
30540 GemmMicrokernelTester()
30541 .mr(5)
30542 .nr(8)
30543 .kr(1)
30544 .sr(4)
30545 .m(5)
30546 .n(8)
30547 .k(k)
30548 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30549 }
30550 }
30551
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,k_lt_4_subtile)30552 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, k_lt_4_subtile) {
30553 for (size_t k = 1; k < 4; k++) {
30554 for (uint32_t n = 1; n <= 8; n++) {
30555 for (uint32_t m = 1; m <= 5; m++) {
30556 GemmMicrokernelTester()
30557 .mr(5)
30558 .nr(8)
30559 .kr(1)
30560 .sr(4)
30561 .m(m)
30562 .n(n)
30563 .k(k)
30564 .iterations(1)
30565 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30566 }
30567 }
30568 }
30569 }
30570
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,k_gt_4)30571 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, k_gt_4) {
30572 for (size_t k = 5; k < 8; k++) {
30573 GemmMicrokernelTester()
30574 .mr(5)
30575 .nr(8)
30576 .kr(1)
30577 .sr(4)
30578 .m(5)
30579 .n(8)
30580 .k(k)
30581 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30582 }
30583 }
30584
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,k_gt_4_subtile)30585 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, k_gt_4_subtile) {
30586 for (size_t k = 5; k < 8; k++) {
30587 for (uint32_t n = 1; n <= 8; n++) {
30588 for (uint32_t m = 1; m <= 5; m++) {
30589 GemmMicrokernelTester()
30590 .mr(5)
30591 .nr(8)
30592 .kr(1)
30593 .sr(4)
30594 .m(m)
30595 .n(n)
30596 .k(k)
30597 .iterations(1)
30598 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30599 }
30600 }
30601 }
30602 }
30603
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,k_div_4)30604 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, k_div_4) {
30605 for (size_t k = 8; k <= 40; k += 4) {
30606 GemmMicrokernelTester()
30607 .mr(5)
30608 .nr(8)
30609 .kr(1)
30610 .sr(4)
30611 .m(5)
30612 .n(8)
30613 .k(k)
30614 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30615 }
30616 }
30617
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,k_div_4_subtile)30618 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, k_div_4_subtile) {
30619 for (size_t k = 8; k <= 40; k += 4) {
30620 for (uint32_t n = 1; n <= 8; n++) {
30621 for (uint32_t m = 1; m <= 5; m++) {
30622 GemmMicrokernelTester()
30623 .mr(5)
30624 .nr(8)
30625 .kr(1)
30626 .sr(4)
30627 .m(m)
30628 .n(n)
30629 .k(k)
30630 .iterations(1)
30631 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30632 }
30633 }
30634 }
30635 }
30636
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,n_gt_8)30637 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, n_gt_8) {
30638 for (uint32_t n = 9; n < 16; n++) {
30639 for (size_t k = 1; k <= 20; k += 5) {
30640 GemmMicrokernelTester()
30641 .mr(5)
30642 .nr(8)
30643 .kr(1)
30644 .sr(4)
30645 .m(5)
30646 .n(n)
30647 .k(k)
30648 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30649 }
30650 }
30651 }
30652
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,n_gt_8_strided_cn)30653 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, n_gt_8_strided_cn) {
30654 for (uint32_t n = 9; n < 16; n++) {
30655 for (size_t k = 1; k <= 20; k += 5) {
30656 GemmMicrokernelTester()
30657 .mr(5)
30658 .nr(8)
30659 .kr(1)
30660 .sr(4)
30661 .m(5)
30662 .n(n)
30663 .k(k)
30664 .cn_stride(11)
30665 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30666 }
30667 }
30668 }
30669
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,n_gt_8_subtile)30670 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, n_gt_8_subtile) {
30671 for (uint32_t n = 9; n < 16; n++) {
30672 for (size_t k = 1; k <= 20; k += 5) {
30673 for (uint32_t m = 1; m <= 5; m++) {
30674 GemmMicrokernelTester()
30675 .mr(5)
30676 .nr(8)
30677 .kr(1)
30678 .sr(4)
30679 .m(m)
30680 .n(n)
30681 .k(k)
30682 .iterations(1)
30683 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30684 }
30685 }
30686 }
30687 }
30688
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,n_div_8)30689 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, n_div_8) {
30690 for (uint32_t n = 16; n <= 24; n += 8) {
30691 for (size_t k = 1; k <= 20; k += 5) {
30692 GemmMicrokernelTester()
30693 .mr(5)
30694 .nr(8)
30695 .kr(1)
30696 .sr(4)
30697 .m(5)
30698 .n(n)
30699 .k(k)
30700 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30701 }
30702 }
30703 }
30704
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,n_div_8_strided_cn)30705 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, n_div_8_strided_cn) {
30706 for (uint32_t n = 16; n <= 24; n += 8) {
30707 for (size_t k = 1; k <= 20; k += 5) {
30708 GemmMicrokernelTester()
30709 .mr(5)
30710 .nr(8)
30711 .kr(1)
30712 .sr(4)
30713 .m(5)
30714 .n(n)
30715 .k(k)
30716 .cn_stride(11)
30717 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30718 }
30719 }
30720 }
30721
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,n_div_8_subtile)30722 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, n_div_8_subtile) {
30723 for (uint32_t n = 16; n <= 24; n += 8) {
30724 for (size_t k = 1; k <= 20; k += 5) {
30725 for (uint32_t m = 1; m <= 5; m++) {
30726 GemmMicrokernelTester()
30727 .mr(5)
30728 .nr(8)
30729 .kr(1)
30730 .sr(4)
30731 .m(m)
30732 .n(n)
30733 .k(k)
30734 .iterations(1)
30735 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30736 }
30737 }
30738 }
30739 }
30740
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,small_kernel)30741 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, small_kernel) {
30742 for (size_t k = 1; k <= 20; k += 5) {
30743 GemmMicrokernelTester()
30744 .mr(5)
30745 .nr(8)
30746 .kr(1)
30747 .sr(4)
30748 .m(5)
30749 .n(8)
30750 .k(k)
30751 .ks(3)
30752 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30753 }
30754 }
30755
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,small_kernel_subtile)30756 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, small_kernel_subtile) {
30757 for (size_t k = 1; k <= 20; k += 5) {
30758 for (uint32_t n = 1; n <= 8; n++) {
30759 for (uint32_t m = 1; m <= 5; m++) {
30760 GemmMicrokernelTester()
30761 .mr(5)
30762 .nr(8)
30763 .kr(1)
30764 .sr(4)
30765 .m(m)
30766 .n(n)
30767 .k(k)
30768 .ks(3)
30769 .iterations(1)
30770 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30771 }
30772 }
30773 }
30774 }
30775
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,n_gt_8_small_kernel)30776 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, n_gt_8_small_kernel) {
30777 for (uint32_t n = 9; n < 16; n++) {
30778 for (size_t k = 1; k <= 20; k += 5) {
30779 GemmMicrokernelTester()
30780 .mr(5)
30781 .nr(8)
30782 .kr(1)
30783 .sr(4)
30784 .m(5)
30785 .n(n)
30786 .k(k)
30787 .ks(3)
30788 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30789 }
30790 }
30791 }
30792
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,n_div_8_small_kernel)30793 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, n_div_8_small_kernel) {
30794 for (uint32_t n = 16; n <= 24; n += 8) {
30795 for (size_t k = 1; k <= 20; k += 5) {
30796 GemmMicrokernelTester()
30797 .mr(5)
30798 .nr(8)
30799 .kr(1)
30800 .sr(4)
30801 .m(5)
30802 .n(n)
30803 .k(k)
30804 .ks(3)
30805 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30806 }
30807 }
30808 }
30809
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,strided_cm_subtile)30810 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, strided_cm_subtile) {
30811 for (size_t k = 1; k <= 20; k += 5) {
30812 for (uint32_t n = 1; n <= 8; n++) {
30813 for (uint32_t m = 1; m <= 5; m++) {
30814 GemmMicrokernelTester()
30815 .mr(5)
30816 .nr(8)
30817 .kr(1)
30818 .sr(4)
30819 .m(m)
30820 .n(n)
30821 .k(k)
30822 .cm_stride(11)
30823 .iterations(1)
30824 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30825 }
30826 }
30827 }
30828 }
30829
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,a_offset)30830 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, a_offset) {
30831 for (size_t k = 1; k <= 20; k += 5) {
30832 GemmMicrokernelTester()
30833 .mr(5)
30834 .nr(8)
30835 .kr(1)
30836 .sr(4)
30837 .m(5)
30838 .n(8)
30839 .k(k)
30840 .ks(3)
30841 .a_offset(103)
30842 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30843 }
30844 }
30845
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,zero)30846 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, zero) {
30847 for (size_t k = 1; k <= 20; k += 5) {
30848 for (uint32_t mz = 0; mz < 5; mz++) {
30849 GemmMicrokernelTester()
30850 .mr(5)
30851 .nr(8)
30852 .kr(1)
30853 .sr(4)
30854 .m(5)
30855 .n(8)
30856 .k(k)
30857 .ks(3)
30858 .a_offset(103)
30859 .zero_index(mz)
30860 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30861 }
30862 }
30863 }
30864
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,qmin)30865 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, qmin) {
30866 GemmMicrokernelTester()
30867 .mr(5)
30868 .nr(8)
30869 .kr(1)
30870 .sr(4)
30871 .m(5)
30872 .n(8)
30873 .k(4)
30874 .qmin(128)
30875 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30876 }
30877
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,qmax)30878 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, qmax) {
30879 GemmMicrokernelTester()
30880 .mr(5)
30881 .nr(8)
30882 .kr(1)
30883 .sr(4)
30884 .m(5)
30885 .n(8)
30886 .k(4)
30887 .qmax(128)
30888 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30889 }
30890
TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86,strided_cm)30891 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, strided_cm) {
30892 GemmMicrokernelTester()
30893 .mr(5)
30894 .nr(8)
30895 .kr(1)
30896 .sr(4)
30897 .m(5)
30898 .n(8)
30899 .k(4)
30900 .cm_stride(11)
30901 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30902 }
30903 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
30904
30905
30906 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1)30907 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1) {
30908 GemmMicrokernelTester()
30909 .mr(6)
30910 .nr(8)
30911 .kr(1)
30912 .sr(1)
30913 .m(6)
30914 .n(8)
30915 .k(1)
30916 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30917 }
30918
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,strided_cn)30919 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, strided_cn) {
30920 GemmMicrokernelTester()
30921 .mr(6)
30922 .nr(8)
30923 .kr(1)
30924 .sr(1)
30925 .m(6)
30926 .n(8)
30927 .k(1)
30928 .cn_stride(11)
30929 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30930 }
30931
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile)30932 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile) {
30933 for (uint32_t n = 1; n <= 8; n++) {
30934 for (uint32_t m = 1; m <= 6; m++) {
30935 GemmMicrokernelTester()
30936 .mr(6)
30937 .nr(8)
30938 .kr(1)
30939 .sr(1)
30940 .m(m)
30941 .n(n)
30942 .k(1)
30943 .iterations(1)
30944 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30945 }
30946 }
30947 }
30948
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile_m)30949 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_m) {
30950 for (uint32_t m = 1; m <= 6; m++) {
30951 GemmMicrokernelTester()
30952 .mr(6)
30953 .nr(8)
30954 .kr(1)
30955 .sr(1)
30956 .m(m)
30957 .n(8)
30958 .k(1)
30959 .iterations(1)
30960 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30961 }
30962 }
30963
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,k_eq_1_subtile_n)30964 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_n) {
30965 for (uint32_t n = 1; n <= 8; n++) {
30966 GemmMicrokernelTester()
30967 .mr(6)
30968 .nr(8)
30969 .kr(1)
30970 .sr(1)
30971 .m(6)
30972 .n(n)
30973 .k(1)
30974 .iterations(1)
30975 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30976 }
30977 }
30978
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1)30979 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1) {
30980 for (size_t k = 2; k < 10; k++) {
30981 GemmMicrokernelTester()
30982 .mr(6)
30983 .nr(8)
30984 .kr(1)
30985 .sr(1)
30986 .m(6)
30987 .n(8)
30988 .k(k)
30989 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
30990 }
30991 }
30992
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,k_gt_1_subtile)30993 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_subtile) {
30994 for (size_t k = 2; k < 10; k++) {
30995 for (uint32_t n = 1; n <= 8; n++) {
30996 for (uint32_t m = 1; m <= 6; m++) {
30997 GemmMicrokernelTester()
30998 .mr(6)
30999 .nr(8)
31000 .kr(1)
31001 .sr(1)
31002 .m(m)
31003 .n(n)
31004 .k(k)
31005 .iterations(1)
31006 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31007 }
31008 }
31009 }
31010 }
31011
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8)31012 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8) {
31013 for (uint32_t n = 9; n < 16; n++) {
31014 for (size_t k = 1; k <= 5; k += 2) {
31015 GemmMicrokernelTester()
31016 .mr(6)
31017 .nr(8)
31018 .kr(1)
31019 .sr(1)
31020 .m(6)
31021 .n(n)
31022 .k(k)
31023 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31024 }
31025 }
31026 }
31027
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_strided_cn)31028 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_cn) {
31029 for (uint32_t n = 9; n < 16; n++) {
31030 for (size_t k = 1; k <= 5; k += 2) {
31031 GemmMicrokernelTester()
31032 .mr(6)
31033 .nr(8)
31034 .kr(1)
31035 .sr(1)
31036 .m(6)
31037 .n(n)
31038 .k(k)
31039 .cn_stride(11)
31040 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31041 }
31042 }
31043 }
31044
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_subtile)31045 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_subtile) {
31046 for (uint32_t n = 9; n < 16; n++) {
31047 for (size_t k = 1; k <= 5; k += 2) {
31048 for (uint32_t m = 1; m <= 6; m++) {
31049 GemmMicrokernelTester()
31050 .mr(6)
31051 .nr(8)
31052 .kr(1)
31053 .sr(1)
31054 .m(m)
31055 .n(n)
31056 .k(k)
31057 .iterations(1)
31058 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31059 }
31060 }
31061 }
31062 }
31063
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,n_div_8)31064 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_div_8) {
31065 for (uint32_t n = 16; n <= 24; n += 8) {
31066 for (size_t k = 1; k <= 5; k += 2) {
31067 GemmMicrokernelTester()
31068 .mr(6)
31069 .nr(8)
31070 .kr(1)
31071 .sr(1)
31072 .m(6)
31073 .n(n)
31074 .k(k)
31075 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31076 }
31077 }
31078 }
31079
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_strided_cn)31080 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_cn) {
31081 for (uint32_t n = 16; n <= 24; n += 8) {
31082 for (size_t k = 1; k <= 5; k += 2) {
31083 GemmMicrokernelTester()
31084 .mr(6)
31085 .nr(8)
31086 .kr(1)
31087 .sr(1)
31088 .m(6)
31089 .n(n)
31090 .k(k)
31091 .cn_stride(11)
31092 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31093 }
31094 }
31095 }
31096
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_subtile)31097 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_subtile) {
31098 for (uint32_t n = 16; n <= 24; n += 8) {
31099 for (size_t k = 1; k <= 5; k += 2) {
31100 for (uint32_t m = 1; m <= 6; m++) {
31101 GemmMicrokernelTester()
31102 .mr(6)
31103 .nr(8)
31104 .kr(1)
31105 .sr(1)
31106 .m(m)
31107 .n(n)
31108 .k(k)
31109 .iterations(1)
31110 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31111 }
31112 }
31113 }
31114 }
31115
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,small_kernel)31116 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, small_kernel) {
31117 for (size_t k = 1; k <= 5; k += 2) {
31118 GemmMicrokernelTester()
31119 .mr(6)
31120 .nr(8)
31121 .kr(1)
31122 .sr(1)
31123 .m(6)
31124 .n(8)
31125 .k(k)
31126 .ks(3)
31127 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31128 }
31129 }
31130
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,small_kernel_subtile)31131 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, small_kernel_subtile) {
31132 for (size_t k = 1; k <= 5; k += 2) {
31133 for (uint32_t n = 1; n <= 8; n++) {
31134 for (uint32_t m = 1; m <= 6; m++) {
31135 GemmMicrokernelTester()
31136 .mr(6)
31137 .nr(8)
31138 .kr(1)
31139 .sr(1)
31140 .m(m)
31141 .n(n)
31142 .k(k)
31143 .ks(3)
31144 .iterations(1)
31145 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31146 }
31147 }
31148 }
31149 }
31150
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,n_gt_8_small_kernel)31151 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_small_kernel) {
31152 for (uint32_t n = 9; n < 16; n++) {
31153 for (size_t k = 1; k <= 5; k += 2) {
31154 GemmMicrokernelTester()
31155 .mr(6)
31156 .nr(8)
31157 .kr(1)
31158 .sr(1)
31159 .m(6)
31160 .n(n)
31161 .k(k)
31162 .ks(3)
31163 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31164 }
31165 }
31166 }
31167
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,n_div_8_small_kernel)31168 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_small_kernel) {
31169 for (uint32_t n = 16; n <= 24; n += 8) {
31170 for (size_t k = 1; k <= 5; k += 2) {
31171 GemmMicrokernelTester()
31172 .mr(6)
31173 .nr(8)
31174 .kr(1)
31175 .sr(1)
31176 .m(6)
31177 .n(n)
31178 .k(k)
31179 .ks(3)
31180 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31181 }
31182 }
31183 }
31184
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,strided_cm_subtile)31185 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, strided_cm_subtile) {
31186 for (size_t k = 1; k <= 5; k += 2) {
31187 for (uint32_t n = 1; n <= 8; n++) {
31188 for (uint32_t m = 1; m <= 6; m++) {
31189 GemmMicrokernelTester()
31190 .mr(6)
31191 .nr(8)
31192 .kr(1)
31193 .sr(1)
31194 .m(m)
31195 .n(n)
31196 .k(k)
31197 .cm_stride(11)
31198 .iterations(1)
31199 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31200 }
31201 }
31202 }
31203 }
31204
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,a_offset)31205 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, a_offset) {
31206 for (size_t k = 1; k <= 5; k += 2) {
31207 GemmMicrokernelTester()
31208 .mr(6)
31209 .nr(8)
31210 .kr(1)
31211 .sr(1)
31212 .m(6)
31213 .n(8)
31214 .k(k)
31215 .ks(3)
31216 .a_offset(37)
31217 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31218 }
31219 }
31220
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,zero)31221 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, zero) {
31222 for (size_t k = 1; k <= 5; k += 2) {
31223 for (uint32_t mz = 0; mz < 6; mz++) {
31224 GemmMicrokernelTester()
31225 .mr(6)
31226 .nr(8)
31227 .kr(1)
31228 .sr(1)
31229 .m(6)
31230 .n(8)
31231 .k(k)
31232 .ks(3)
31233 .a_offset(37)
31234 .zero_index(mz)
31235 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31236 }
31237 }
31238 }
31239
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,qmin)31240 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, qmin) {
31241 GemmMicrokernelTester()
31242 .mr(6)
31243 .nr(8)
31244 .kr(1)
31245 .sr(1)
31246 .m(6)
31247 .n(8)
31248 .k(1)
31249 .qmin(128)
31250 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31251 }
31252
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,qmax)31253 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, qmax) {
31254 GemmMicrokernelTester()
31255 .mr(6)
31256 .nr(8)
31257 .kr(1)
31258 .sr(1)
31259 .m(6)
31260 .n(8)
31261 .k(1)
31262 .qmax(128)
31263 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31264 }
31265
TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT,strided_cm)31266 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, strided_cm) {
31267 GemmMicrokernelTester()
31268 .mr(6)
31269 .nr(8)
31270 .kr(1)
31271 .sr(1)
31272 .m(6)
31273 .n(8)
31274 .k(1)
31275 .cm_stride(11)
31276 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31277 }
31278 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31279
31280
31281 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)31282 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
31283 GemmMicrokernelTester()
31284 .mr(1)
31285 .nr(8)
31286 .kr(1)
31287 .sr(1)
31288 .m(1)
31289 .n(8)
31290 .k(1)
31291 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31292 }
31293
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)31294 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
31295 GemmMicrokernelTester()
31296 .mr(1)
31297 .nr(8)
31298 .kr(1)
31299 .sr(1)
31300 .m(1)
31301 .n(8)
31302 .k(1)
31303 .cn_stride(11)
31304 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31305 }
31306
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)31307 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
31308 for (uint32_t n = 1; n <= 8; n++) {
31309 for (uint32_t m = 1; m <= 1; m++) {
31310 GemmMicrokernelTester()
31311 .mr(1)
31312 .nr(8)
31313 .kr(1)
31314 .sr(1)
31315 .m(m)
31316 .n(n)
31317 .k(1)
31318 .iterations(1)
31319 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31320 }
31321 }
31322 }
31323
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)31324 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
31325 for (uint32_t m = 1; m <= 1; m++) {
31326 GemmMicrokernelTester()
31327 .mr(1)
31328 .nr(8)
31329 .kr(1)
31330 .sr(1)
31331 .m(m)
31332 .n(8)
31333 .k(1)
31334 .iterations(1)
31335 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31336 }
31337 }
31338
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)31339 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
31340 for (uint32_t n = 1; n <= 8; n++) {
31341 GemmMicrokernelTester()
31342 .mr(1)
31343 .nr(8)
31344 .kr(1)
31345 .sr(1)
31346 .m(1)
31347 .n(n)
31348 .k(1)
31349 .iterations(1)
31350 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31351 }
31352 }
31353
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)31354 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
31355 for (size_t k = 2; k < 10; k++) {
31356 GemmMicrokernelTester()
31357 .mr(1)
31358 .nr(8)
31359 .kr(1)
31360 .sr(1)
31361 .m(1)
31362 .n(8)
31363 .k(k)
31364 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31365 }
31366 }
31367
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)31368 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
31369 for (size_t k = 2; k < 10; k++) {
31370 for (uint32_t n = 1; n <= 8; n++) {
31371 for (uint32_t m = 1; m <= 1; m++) {
31372 GemmMicrokernelTester()
31373 .mr(1)
31374 .nr(8)
31375 .kr(1)
31376 .sr(1)
31377 .m(m)
31378 .n(n)
31379 .k(k)
31380 .iterations(1)
31381 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31382 }
31383 }
31384 }
31385 }
31386
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)31387 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
31388 for (uint32_t n = 9; n < 16; n++) {
31389 for (size_t k = 1; k <= 5; k += 2) {
31390 GemmMicrokernelTester()
31391 .mr(1)
31392 .nr(8)
31393 .kr(1)
31394 .sr(1)
31395 .m(1)
31396 .n(n)
31397 .k(k)
31398 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31399 }
31400 }
31401 }
31402
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)31403 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
31404 for (uint32_t n = 9; n < 16; n++) {
31405 for (size_t k = 1; k <= 5; k += 2) {
31406 GemmMicrokernelTester()
31407 .mr(1)
31408 .nr(8)
31409 .kr(1)
31410 .sr(1)
31411 .m(1)
31412 .n(n)
31413 .k(k)
31414 .cn_stride(11)
31415 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31416 }
31417 }
31418 }
31419
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)31420 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
31421 for (uint32_t n = 9; n < 16; n++) {
31422 for (size_t k = 1; k <= 5; k += 2) {
31423 for (uint32_t m = 1; m <= 1; m++) {
31424 GemmMicrokernelTester()
31425 .mr(1)
31426 .nr(8)
31427 .kr(1)
31428 .sr(1)
31429 .m(m)
31430 .n(n)
31431 .k(k)
31432 .iterations(1)
31433 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31434 }
31435 }
31436 }
31437 }
31438
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)31439 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
31440 for (uint32_t n = 16; n <= 24; n += 8) {
31441 for (size_t k = 1; k <= 5; k += 2) {
31442 GemmMicrokernelTester()
31443 .mr(1)
31444 .nr(8)
31445 .kr(1)
31446 .sr(1)
31447 .m(1)
31448 .n(n)
31449 .k(k)
31450 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31451 }
31452 }
31453 }
31454
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)31455 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
31456 for (uint32_t n = 16; n <= 24; n += 8) {
31457 for (size_t k = 1; k <= 5; k += 2) {
31458 GemmMicrokernelTester()
31459 .mr(1)
31460 .nr(8)
31461 .kr(1)
31462 .sr(1)
31463 .m(1)
31464 .n(n)
31465 .k(k)
31466 .cn_stride(11)
31467 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31468 }
31469 }
31470 }
31471
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)31472 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
31473 for (uint32_t n = 16; n <= 24; n += 8) {
31474 for (size_t k = 1; k <= 5; k += 2) {
31475 for (uint32_t m = 1; m <= 1; m++) {
31476 GemmMicrokernelTester()
31477 .mr(1)
31478 .nr(8)
31479 .kr(1)
31480 .sr(1)
31481 .m(m)
31482 .n(n)
31483 .k(k)
31484 .iterations(1)
31485 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31486 }
31487 }
31488 }
31489 }
31490
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel)31491 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel) {
31492 for (size_t k = 1; k <= 5; k += 2) {
31493 GemmMicrokernelTester()
31494 .mr(1)
31495 .nr(8)
31496 .kr(1)
31497 .sr(1)
31498 .m(1)
31499 .n(8)
31500 .k(k)
31501 .ks(3)
31502 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31503 }
31504 }
31505
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel_subtile)31506 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel_subtile) {
31507 for (size_t k = 1; k <= 5; k += 2) {
31508 for (uint32_t n = 1; n <= 8; n++) {
31509 for (uint32_t m = 1; m <= 1; m++) {
31510 GemmMicrokernelTester()
31511 .mr(1)
31512 .nr(8)
31513 .kr(1)
31514 .sr(1)
31515 .m(m)
31516 .n(n)
31517 .k(k)
31518 .ks(3)
31519 .iterations(1)
31520 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31521 }
31522 }
31523 }
31524 }
31525
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_small_kernel)31526 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_small_kernel) {
31527 for (uint32_t n = 9; n < 16; n++) {
31528 for (size_t k = 1; k <= 5; k += 2) {
31529 GemmMicrokernelTester()
31530 .mr(1)
31531 .nr(8)
31532 .kr(1)
31533 .sr(1)
31534 .m(1)
31535 .n(n)
31536 .k(k)
31537 .ks(3)
31538 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31539 }
31540 }
31541 }
31542
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_small_kernel)31543 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_small_kernel) {
31544 for (uint32_t n = 16; n <= 24; n += 8) {
31545 for (size_t k = 1; k <= 5; k += 2) {
31546 GemmMicrokernelTester()
31547 .mr(1)
31548 .nr(8)
31549 .kr(1)
31550 .sr(1)
31551 .m(1)
31552 .n(n)
31553 .k(k)
31554 .ks(3)
31555 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31556 }
31557 }
31558 }
31559
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)31560 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
31561 for (size_t k = 1; k <= 5; k += 2) {
31562 for (uint32_t n = 1; n <= 8; n++) {
31563 for (uint32_t m = 1; m <= 1; m++) {
31564 GemmMicrokernelTester()
31565 .mr(1)
31566 .nr(8)
31567 .kr(1)
31568 .sr(1)
31569 .m(m)
31570 .n(n)
31571 .k(k)
31572 .cm_stride(11)
31573 .iterations(1)
31574 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31575 }
31576 }
31577 }
31578 }
31579
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,a_offset)31580 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, a_offset) {
31581 for (size_t k = 1; k <= 5; k += 2) {
31582 GemmMicrokernelTester()
31583 .mr(1)
31584 .nr(8)
31585 .kr(1)
31586 .sr(1)
31587 .m(1)
31588 .n(8)
31589 .k(k)
31590 .ks(3)
31591 .a_offset(7)
31592 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31593 }
31594 }
31595
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,zero)31596 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, zero) {
31597 for (size_t k = 1; k <= 5; k += 2) {
31598 for (uint32_t mz = 0; mz < 1; mz++) {
31599 GemmMicrokernelTester()
31600 .mr(1)
31601 .nr(8)
31602 .kr(1)
31603 .sr(1)
31604 .m(1)
31605 .n(8)
31606 .k(k)
31607 .ks(3)
31608 .a_offset(7)
31609 .zero_index(mz)
31610 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31611 }
31612 }
31613 }
31614
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,qmin)31615 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, qmin) {
31616 GemmMicrokernelTester()
31617 .mr(1)
31618 .nr(8)
31619 .kr(1)
31620 .sr(1)
31621 .m(1)
31622 .n(8)
31623 .k(1)
31624 .qmin(128)
31625 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31626 }
31627
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,qmax)31628 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, qmax) {
31629 GemmMicrokernelTester()
31630 .mr(1)
31631 .nr(8)
31632 .kr(1)
31633 .sr(1)
31634 .m(1)
31635 .n(8)
31636 .k(1)
31637 .qmax(128)
31638 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31639 }
31640
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)31641 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
31642 GemmMicrokernelTester()
31643 .mr(1)
31644 .nr(8)
31645 .kr(1)
31646 .sr(1)
31647 .m(1)
31648 .n(8)
31649 .k(1)
31650 .cm_stride(11)
31651 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
31652 }
31653 #endif // XNN_ARCH_WASMRELAXEDSIMD
31654
31655
31656 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)31657 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
31658 GemmMicrokernelTester()
31659 .mr(1)
31660 .nr(8)
31661 .kr(1)
31662 .sr(1)
31663 .m(1)
31664 .n(8)
31665 .k(4)
31666 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31667 }
31668
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)31669 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
31670 GemmMicrokernelTester()
31671 .mr(1)
31672 .nr(8)
31673 .kr(1)
31674 .sr(1)
31675 .m(1)
31676 .n(8)
31677 .k(4)
31678 .cn_stride(11)
31679 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31680 }
31681
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)31682 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
31683 for (uint32_t n = 1; n <= 8; n++) {
31684 for (uint32_t m = 1; m <= 1; m++) {
31685 GemmMicrokernelTester()
31686 .mr(1)
31687 .nr(8)
31688 .kr(1)
31689 .sr(1)
31690 .m(m)
31691 .n(n)
31692 .k(4)
31693 .iterations(1)
31694 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31695 }
31696 }
31697 }
31698
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)31699 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
31700 for (uint32_t m = 1; m <= 1; m++) {
31701 GemmMicrokernelTester()
31702 .mr(1)
31703 .nr(8)
31704 .kr(1)
31705 .sr(1)
31706 .m(m)
31707 .n(8)
31708 .k(4)
31709 .iterations(1)
31710 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31711 }
31712 }
31713
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)31714 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
31715 for (uint32_t n = 1; n <= 8; n++) {
31716 GemmMicrokernelTester()
31717 .mr(1)
31718 .nr(8)
31719 .kr(1)
31720 .sr(1)
31721 .m(1)
31722 .n(n)
31723 .k(4)
31724 .iterations(1)
31725 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31726 }
31727 }
31728
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)31729 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
31730 for (size_t k = 1; k < 4; k++) {
31731 GemmMicrokernelTester()
31732 .mr(1)
31733 .nr(8)
31734 .kr(1)
31735 .sr(1)
31736 .m(1)
31737 .n(8)
31738 .k(k)
31739 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31740 }
31741 }
31742
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)31743 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
31744 for (size_t k = 1; k < 4; k++) {
31745 for (uint32_t n = 1; n <= 8; n++) {
31746 for (uint32_t m = 1; m <= 1; m++) {
31747 GemmMicrokernelTester()
31748 .mr(1)
31749 .nr(8)
31750 .kr(1)
31751 .sr(1)
31752 .m(m)
31753 .n(n)
31754 .k(k)
31755 .iterations(1)
31756 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31757 }
31758 }
31759 }
31760 }
31761
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)31762 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
31763 for (size_t k = 5; k < 8; k++) {
31764 GemmMicrokernelTester()
31765 .mr(1)
31766 .nr(8)
31767 .kr(1)
31768 .sr(1)
31769 .m(1)
31770 .n(8)
31771 .k(k)
31772 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31773 }
31774 }
31775
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)31776 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
31777 for (size_t k = 5; k < 8; k++) {
31778 for (uint32_t n = 1; n <= 8; n++) {
31779 for (uint32_t m = 1; m <= 1; m++) {
31780 GemmMicrokernelTester()
31781 .mr(1)
31782 .nr(8)
31783 .kr(1)
31784 .sr(1)
31785 .m(m)
31786 .n(n)
31787 .k(k)
31788 .iterations(1)
31789 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31790 }
31791 }
31792 }
31793 }
31794
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)31795 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
31796 for (size_t k = 8; k <= 40; k += 4) {
31797 GemmMicrokernelTester()
31798 .mr(1)
31799 .nr(8)
31800 .kr(1)
31801 .sr(1)
31802 .m(1)
31803 .n(8)
31804 .k(k)
31805 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31806 }
31807 }
31808
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)31809 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
31810 for (size_t k = 8; k <= 40; k += 4) {
31811 for (uint32_t n = 1; n <= 8; n++) {
31812 for (uint32_t m = 1; m <= 1; m++) {
31813 GemmMicrokernelTester()
31814 .mr(1)
31815 .nr(8)
31816 .kr(1)
31817 .sr(1)
31818 .m(m)
31819 .n(n)
31820 .k(k)
31821 .iterations(1)
31822 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31823 }
31824 }
31825 }
31826 }
31827
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)31828 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
31829 for (uint32_t n = 9; n < 16; n++) {
31830 for (size_t k = 1; k <= 20; k += 5) {
31831 GemmMicrokernelTester()
31832 .mr(1)
31833 .nr(8)
31834 .kr(1)
31835 .sr(1)
31836 .m(1)
31837 .n(n)
31838 .k(k)
31839 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31840 }
31841 }
31842 }
31843
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)31844 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
31845 for (uint32_t n = 9; n < 16; n++) {
31846 for (size_t k = 1; k <= 20; k += 5) {
31847 GemmMicrokernelTester()
31848 .mr(1)
31849 .nr(8)
31850 .kr(1)
31851 .sr(1)
31852 .m(1)
31853 .n(n)
31854 .k(k)
31855 .cn_stride(11)
31856 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31857 }
31858 }
31859 }
31860
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)31861 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
31862 for (uint32_t n = 9; n < 16; n++) {
31863 for (size_t k = 1; k <= 20; k += 5) {
31864 for (uint32_t m = 1; m <= 1; m++) {
31865 GemmMicrokernelTester()
31866 .mr(1)
31867 .nr(8)
31868 .kr(1)
31869 .sr(1)
31870 .m(m)
31871 .n(n)
31872 .k(k)
31873 .iterations(1)
31874 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31875 }
31876 }
31877 }
31878 }
31879
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)31880 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
31881 for (uint32_t n = 16; n <= 24; n += 8) {
31882 for (size_t k = 1; k <= 20; k += 5) {
31883 GemmMicrokernelTester()
31884 .mr(1)
31885 .nr(8)
31886 .kr(1)
31887 .sr(1)
31888 .m(1)
31889 .n(n)
31890 .k(k)
31891 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31892 }
31893 }
31894 }
31895
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)31896 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
31897 for (uint32_t n = 16; n <= 24; n += 8) {
31898 for (size_t k = 1; k <= 20; k += 5) {
31899 GemmMicrokernelTester()
31900 .mr(1)
31901 .nr(8)
31902 .kr(1)
31903 .sr(1)
31904 .m(1)
31905 .n(n)
31906 .k(k)
31907 .cn_stride(11)
31908 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31909 }
31910 }
31911 }
31912
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)31913 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
31914 for (uint32_t n = 16; n <= 24; n += 8) {
31915 for (size_t k = 1; k <= 20; k += 5) {
31916 for (uint32_t m = 1; m <= 1; m++) {
31917 GemmMicrokernelTester()
31918 .mr(1)
31919 .nr(8)
31920 .kr(1)
31921 .sr(1)
31922 .m(m)
31923 .n(n)
31924 .k(k)
31925 .iterations(1)
31926 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31927 }
31928 }
31929 }
31930 }
31931
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel)31932 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel) {
31933 for (size_t k = 1; k <= 20; k += 5) {
31934 GemmMicrokernelTester()
31935 .mr(1)
31936 .nr(8)
31937 .kr(1)
31938 .sr(1)
31939 .m(1)
31940 .n(8)
31941 .k(k)
31942 .ks(3)
31943 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31944 }
31945 }
31946
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel_subtile)31947 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel_subtile) {
31948 for (size_t k = 1; k <= 20; k += 5) {
31949 for (uint32_t n = 1; n <= 8; n++) {
31950 for (uint32_t m = 1; m <= 1; m++) {
31951 GemmMicrokernelTester()
31952 .mr(1)
31953 .nr(8)
31954 .kr(1)
31955 .sr(1)
31956 .m(m)
31957 .n(n)
31958 .k(k)
31959 .ks(3)
31960 .iterations(1)
31961 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31962 }
31963 }
31964 }
31965 }
31966
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_small_kernel)31967 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_small_kernel) {
31968 for (uint32_t n = 9; n < 16; n++) {
31969 for (size_t k = 1; k <= 20; k += 5) {
31970 GemmMicrokernelTester()
31971 .mr(1)
31972 .nr(8)
31973 .kr(1)
31974 .sr(1)
31975 .m(1)
31976 .n(n)
31977 .k(k)
31978 .ks(3)
31979 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31980 }
31981 }
31982 }
31983
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_small_kernel)31984 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_small_kernel) {
31985 for (uint32_t n = 16; n <= 24; n += 8) {
31986 for (size_t k = 1; k <= 20; k += 5) {
31987 GemmMicrokernelTester()
31988 .mr(1)
31989 .nr(8)
31990 .kr(1)
31991 .sr(1)
31992 .m(1)
31993 .n(n)
31994 .k(k)
31995 .ks(3)
31996 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
31997 }
31998 }
31999 }
32000
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)32001 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
32002 for (size_t k = 1; k <= 20; k += 5) {
32003 for (uint32_t n = 1; n <= 8; n++) {
32004 for (uint32_t m = 1; m <= 1; m++) {
32005 GemmMicrokernelTester()
32006 .mr(1)
32007 .nr(8)
32008 .kr(1)
32009 .sr(1)
32010 .m(m)
32011 .n(n)
32012 .k(k)
32013 .cm_stride(11)
32014 .iterations(1)
32015 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32016 }
32017 }
32018 }
32019 }
32020
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,a_offset)32021 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, a_offset) {
32022 for (size_t k = 1; k <= 20; k += 5) {
32023 GemmMicrokernelTester()
32024 .mr(1)
32025 .nr(8)
32026 .kr(1)
32027 .sr(1)
32028 .m(1)
32029 .n(8)
32030 .k(k)
32031 .ks(3)
32032 .a_offset(23)
32033 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32034 }
32035 }
32036
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,zero)32037 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, zero) {
32038 for (size_t k = 1; k <= 20; k += 5) {
32039 for (uint32_t mz = 0; mz < 1; mz++) {
32040 GemmMicrokernelTester()
32041 .mr(1)
32042 .nr(8)
32043 .kr(1)
32044 .sr(1)
32045 .m(1)
32046 .n(8)
32047 .k(k)
32048 .ks(3)
32049 .a_offset(23)
32050 .zero_index(mz)
32051 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32052 }
32053 }
32054 }
32055
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,qmin)32056 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, qmin) {
32057 GemmMicrokernelTester()
32058 .mr(1)
32059 .nr(8)
32060 .kr(1)
32061 .sr(1)
32062 .m(1)
32063 .n(8)
32064 .k(4)
32065 .qmin(128)
32066 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32067 }
32068
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,qmax)32069 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, qmax) {
32070 GemmMicrokernelTester()
32071 .mr(1)
32072 .nr(8)
32073 .kr(1)
32074 .sr(1)
32075 .m(1)
32076 .n(8)
32077 .k(4)
32078 .qmax(128)
32079 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32080 }
32081
TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)32082 TEST(F32_IGEMM_MINMAX_1X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
32083 GemmMicrokernelTester()
32084 .mr(1)
32085 .nr(8)
32086 .kr(1)
32087 .sr(1)
32088 .m(1)
32089 .n(8)
32090 .k(4)
32091 .cm_stride(11)
32092 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32093 }
32094 #endif // XNN_ARCH_WASMRELAXEDSIMD
32095
32096
32097 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)32098 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
32099 GemmMicrokernelTester()
32100 .mr(3)
32101 .nr(8)
32102 .kr(1)
32103 .sr(1)
32104 .m(3)
32105 .n(8)
32106 .k(1)
32107 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32108 }
32109
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)32110 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
32111 GemmMicrokernelTester()
32112 .mr(3)
32113 .nr(8)
32114 .kr(1)
32115 .sr(1)
32116 .m(3)
32117 .n(8)
32118 .k(1)
32119 .cn_stride(11)
32120 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32121 }
32122
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)32123 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
32124 for (uint32_t n = 1; n <= 8; n++) {
32125 for (uint32_t m = 1; m <= 3; m++) {
32126 GemmMicrokernelTester()
32127 .mr(3)
32128 .nr(8)
32129 .kr(1)
32130 .sr(1)
32131 .m(m)
32132 .n(n)
32133 .k(1)
32134 .iterations(1)
32135 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32136 }
32137 }
32138 }
32139
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)32140 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
32141 for (uint32_t m = 1; m <= 3; m++) {
32142 GemmMicrokernelTester()
32143 .mr(3)
32144 .nr(8)
32145 .kr(1)
32146 .sr(1)
32147 .m(m)
32148 .n(8)
32149 .k(1)
32150 .iterations(1)
32151 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32152 }
32153 }
32154
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)32155 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
32156 for (uint32_t n = 1; n <= 8; n++) {
32157 GemmMicrokernelTester()
32158 .mr(3)
32159 .nr(8)
32160 .kr(1)
32161 .sr(1)
32162 .m(3)
32163 .n(n)
32164 .k(1)
32165 .iterations(1)
32166 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32167 }
32168 }
32169
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)32170 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
32171 for (size_t k = 2; k < 10; k++) {
32172 GemmMicrokernelTester()
32173 .mr(3)
32174 .nr(8)
32175 .kr(1)
32176 .sr(1)
32177 .m(3)
32178 .n(8)
32179 .k(k)
32180 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32181 }
32182 }
32183
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)32184 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
32185 for (size_t k = 2; k < 10; k++) {
32186 for (uint32_t n = 1; n <= 8; n++) {
32187 for (uint32_t m = 1; m <= 3; m++) {
32188 GemmMicrokernelTester()
32189 .mr(3)
32190 .nr(8)
32191 .kr(1)
32192 .sr(1)
32193 .m(m)
32194 .n(n)
32195 .k(k)
32196 .iterations(1)
32197 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32198 }
32199 }
32200 }
32201 }
32202
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)32203 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
32204 for (uint32_t n = 9; n < 16; n++) {
32205 for (size_t k = 1; k <= 5; k += 2) {
32206 GemmMicrokernelTester()
32207 .mr(3)
32208 .nr(8)
32209 .kr(1)
32210 .sr(1)
32211 .m(3)
32212 .n(n)
32213 .k(k)
32214 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32215 }
32216 }
32217 }
32218
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)32219 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
32220 for (uint32_t n = 9; n < 16; n++) {
32221 for (size_t k = 1; k <= 5; k += 2) {
32222 GemmMicrokernelTester()
32223 .mr(3)
32224 .nr(8)
32225 .kr(1)
32226 .sr(1)
32227 .m(3)
32228 .n(n)
32229 .k(k)
32230 .cn_stride(11)
32231 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32232 }
32233 }
32234 }
32235
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)32236 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
32237 for (uint32_t n = 9; n < 16; n++) {
32238 for (size_t k = 1; k <= 5; k += 2) {
32239 for (uint32_t m = 1; m <= 3; m++) {
32240 GemmMicrokernelTester()
32241 .mr(3)
32242 .nr(8)
32243 .kr(1)
32244 .sr(1)
32245 .m(m)
32246 .n(n)
32247 .k(k)
32248 .iterations(1)
32249 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32250 }
32251 }
32252 }
32253 }
32254
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)32255 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
32256 for (uint32_t n = 16; n <= 24; n += 8) {
32257 for (size_t k = 1; k <= 5; k += 2) {
32258 GemmMicrokernelTester()
32259 .mr(3)
32260 .nr(8)
32261 .kr(1)
32262 .sr(1)
32263 .m(3)
32264 .n(n)
32265 .k(k)
32266 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32267 }
32268 }
32269 }
32270
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)32271 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
32272 for (uint32_t n = 16; n <= 24; n += 8) {
32273 for (size_t k = 1; k <= 5; k += 2) {
32274 GemmMicrokernelTester()
32275 .mr(3)
32276 .nr(8)
32277 .kr(1)
32278 .sr(1)
32279 .m(3)
32280 .n(n)
32281 .k(k)
32282 .cn_stride(11)
32283 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32284 }
32285 }
32286 }
32287
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)32288 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
32289 for (uint32_t n = 16; n <= 24; n += 8) {
32290 for (size_t k = 1; k <= 5; k += 2) {
32291 for (uint32_t m = 1; m <= 3; m++) {
32292 GemmMicrokernelTester()
32293 .mr(3)
32294 .nr(8)
32295 .kr(1)
32296 .sr(1)
32297 .m(m)
32298 .n(n)
32299 .k(k)
32300 .iterations(1)
32301 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32302 }
32303 }
32304 }
32305 }
32306
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel)32307 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel) {
32308 for (size_t k = 1; k <= 5; k += 2) {
32309 GemmMicrokernelTester()
32310 .mr(3)
32311 .nr(8)
32312 .kr(1)
32313 .sr(1)
32314 .m(3)
32315 .n(8)
32316 .k(k)
32317 .ks(3)
32318 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32319 }
32320 }
32321
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel_subtile)32322 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel_subtile) {
32323 for (size_t k = 1; k <= 5; k += 2) {
32324 for (uint32_t n = 1; n <= 8; n++) {
32325 for (uint32_t m = 1; m <= 3; m++) {
32326 GemmMicrokernelTester()
32327 .mr(3)
32328 .nr(8)
32329 .kr(1)
32330 .sr(1)
32331 .m(m)
32332 .n(n)
32333 .k(k)
32334 .ks(3)
32335 .iterations(1)
32336 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32337 }
32338 }
32339 }
32340 }
32341
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_small_kernel)32342 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_small_kernel) {
32343 for (uint32_t n = 9; n < 16; n++) {
32344 for (size_t k = 1; k <= 5; k += 2) {
32345 GemmMicrokernelTester()
32346 .mr(3)
32347 .nr(8)
32348 .kr(1)
32349 .sr(1)
32350 .m(3)
32351 .n(n)
32352 .k(k)
32353 .ks(3)
32354 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32355 }
32356 }
32357 }
32358
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_small_kernel)32359 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_small_kernel) {
32360 for (uint32_t n = 16; n <= 24; n += 8) {
32361 for (size_t k = 1; k <= 5; k += 2) {
32362 GemmMicrokernelTester()
32363 .mr(3)
32364 .nr(8)
32365 .kr(1)
32366 .sr(1)
32367 .m(3)
32368 .n(n)
32369 .k(k)
32370 .ks(3)
32371 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32372 }
32373 }
32374 }
32375
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)32376 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
32377 for (size_t k = 1; k <= 5; k += 2) {
32378 for (uint32_t n = 1; n <= 8; n++) {
32379 for (uint32_t m = 1; m <= 3; m++) {
32380 GemmMicrokernelTester()
32381 .mr(3)
32382 .nr(8)
32383 .kr(1)
32384 .sr(1)
32385 .m(m)
32386 .n(n)
32387 .k(k)
32388 .cm_stride(11)
32389 .iterations(1)
32390 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32391 }
32392 }
32393 }
32394 }
32395
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,a_offset)32396 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, a_offset) {
32397 for (size_t k = 1; k <= 5; k += 2) {
32398 GemmMicrokernelTester()
32399 .mr(3)
32400 .nr(8)
32401 .kr(1)
32402 .sr(1)
32403 .m(3)
32404 .n(8)
32405 .k(k)
32406 .ks(3)
32407 .a_offset(17)
32408 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32409 }
32410 }
32411
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,zero)32412 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, zero) {
32413 for (size_t k = 1; k <= 5; k += 2) {
32414 for (uint32_t mz = 0; mz < 3; mz++) {
32415 GemmMicrokernelTester()
32416 .mr(3)
32417 .nr(8)
32418 .kr(1)
32419 .sr(1)
32420 .m(3)
32421 .n(8)
32422 .k(k)
32423 .ks(3)
32424 .a_offset(17)
32425 .zero_index(mz)
32426 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32427 }
32428 }
32429 }
32430
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,qmin)32431 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, qmin) {
32432 GemmMicrokernelTester()
32433 .mr(3)
32434 .nr(8)
32435 .kr(1)
32436 .sr(1)
32437 .m(3)
32438 .n(8)
32439 .k(1)
32440 .qmin(128)
32441 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32442 }
32443
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,qmax)32444 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, qmax) {
32445 GemmMicrokernelTester()
32446 .mr(3)
32447 .nr(8)
32448 .kr(1)
32449 .sr(1)
32450 .m(3)
32451 .n(8)
32452 .k(1)
32453 .qmax(128)
32454 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32455 }
32456
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)32457 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
32458 GemmMicrokernelTester()
32459 .mr(3)
32460 .nr(8)
32461 .kr(1)
32462 .sr(1)
32463 .m(3)
32464 .n(8)
32465 .k(1)
32466 .cm_stride(11)
32467 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
32468 }
32469 #endif // XNN_ARCH_WASMRELAXEDSIMD
32470
32471
32472 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)32473 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
32474 GemmMicrokernelTester()
32475 .mr(3)
32476 .nr(8)
32477 .kr(1)
32478 .sr(1)
32479 .m(3)
32480 .n(8)
32481 .k(4)
32482 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32483 }
32484
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)32485 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
32486 GemmMicrokernelTester()
32487 .mr(3)
32488 .nr(8)
32489 .kr(1)
32490 .sr(1)
32491 .m(3)
32492 .n(8)
32493 .k(4)
32494 .cn_stride(11)
32495 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32496 }
32497
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)32498 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
32499 for (uint32_t n = 1; n <= 8; n++) {
32500 for (uint32_t m = 1; m <= 3; m++) {
32501 GemmMicrokernelTester()
32502 .mr(3)
32503 .nr(8)
32504 .kr(1)
32505 .sr(1)
32506 .m(m)
32507 .n(n)
32508 .k(4)
32509 .iterations(1)
32510 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32511 }
32512 }
32513 }
32514
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)32515 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
32516 for (uint32_t m = 1; m <= 3; m++) {
32517 GemmMicrokernelTester()
32518 .mr(3)
32519 .nr(8)
32520 .kr(1)
32521 .sr(1)
32522 .m(m)
32523 .n(8)
32524 .k(4)
32525 .iterations(1)
32526 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32527 }
32528 }
32529
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)32530 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
32531 for (uint32_t n = 1; n <= 8; n++) {
32532 GemmMicrokernelTester()
32533 .mr(3)
32534 .nr(8)
32535 .kr(1)
32536 .sr(1)
32537 .m(3)
32538 .n(n)
32539 .k(4)
32540 .iterations(1)
32541 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32542 }
32543 }
32544
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)32545 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
32546 for (size_t k = 1; k < 4; k++) {
32547 GemmMicrokernelTester()
32548 .mr(3)
32549 .nr(8)
32550 .kr(1)
32551 .sr(1)
32552 .m(3)
32553 .n(8)
32554 .k(k)
32555 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32556 }
32557 }
32558
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)32559 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
32560 for (size_t k = 1; k < 4; k++) {
32561 for (uint32_t n = 1; n <= 8; n++) {
32562 for (uint32_t m = 1; m <= 3; m++) {
32563 GemmMicrokernelTester()
32564 .mr(3)
32565 .nr(8)
32566 .kr(1)
32567 .sr(1)
32568 .m(m)
32569 .n(n)
32570 .k(k)
32571 .iterations(1)
32572 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32573 }
32574 }
32575 }
32576 }
32577
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)32578 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
32579 for (size_t k = 5; k < 8; k++) {
32580 GemmMicrokernelTester()
32581 .mr(3)
32582 .nr(8)
32583 .kr(1)
32584 .sr(1)
32585 .m(3)
32586 .n(8)
32587 .k(k)
32588 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32589 }
32590 }
32591
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)32592 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
32593 for (size_t k = 5; k < 8; k++) {
32594 for (uint32_t n = 1; n <= 8; n++) {
32595 for (uint32_t m = 1; m <= 3; m++) {
32596 GemmMicrokernelTester()
32597 .mr(3)
32598 .nr(8)
32599 .kr(1)
32600 .sr(1)
32601 .m(m)
32602 .n(n)
32603 .k(k)
32604 .iterations(1)
32605 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32606 }
32607 }
32608 }
32609 }
32610
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)32611 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
32612 for (size_t k = 8; k <= 40; k += 4) {
32613 GemmMicrokernelTester()
32614 .mr(3)
32615 .nr(8)
32616 .kr(1)
32617 .sr(1)
32618 .m(3)
32619 .n(8)
32620 .k(k)
32621 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32622 }
32623 }
32624
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)32625 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
32626 for (size_t k = 8; k <= 40; k += 4) {
32627 for (uint32_t n = 1; n <= 8; n++) {
32628 for (uint32_t m = 1; m <= 3; m++) {
32629 GemmMicrokernelTester()
32630 .mr(3)
32631 .nr(8)
32632 .kr(1)
32633 .sr(1)
32634 .m(m)
32635 .n(n)
32636 .k(k)
32637 .iterations(1)
32638 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32639 }
32640 }
32641 }
32642 }
32643
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)32644 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
32645 for (uint32_t n = 9; n < 16; n++) {
32646 for (size_t k = 1; k <= 20; k += 5) {
32647 GemmMicrokernelTester()
32648 .mr(3)
32649 .nr(8)
32650 .kr(1)
32651 .sr(1)
32652 .m(3)
32653 .n(n)
32654 .k(k)
32655 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32656 }
32657 }
32658 }
32659
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)32660 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
32661 for (uint32_t n = 9; n < 16; n++) {
32662 for (size_t k = 1; k <= 20; k += 5) {
32663 GemmMicrokernelTester()
32664 .mr(3)
32665 .nr(8)
32666 .kr(1)
32667 .sr(1)
32668 .m(3)
32669 .n(n)
32670 .k(k)
32671 .cn_stride(11)
32672 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32673 }
32674 }
32675 }
32676
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)32677 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
32678 for (uint32_t n = 9; n < 16; n++) {
32679 for (size_t k = 1; k <= 20; k += 5) {
32680 for (uint32_t m = 1; m <= 3; m++) {
32681 GemmMicrokernelTester()
32682 .mr(3)
32683 .nr(8)
32684 .kr(1)
32685 .sr(1)
32686 .m(m)
32687 .n(n)
32688 .k(k)
32689 .iterations(1)
32690 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32691 }
32692 }
32693 }
32694 }
32695
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)32696 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
32697 for (uint32_t n = 16; n <= 24; n += 8) {
32698 for (size_t k = 1; k <= 20; k += 5) {
32699 GemmMicrokernelTester()
32700 .mr(3)
32701 .nr(8)
32702 .kr(1)
32703 .sr(1)
32704 .m(3)
32705 .n(n)
32706 .k(k)
32707 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32708 }
32709 }
32710 }
32711
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)32712 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
32713 for (uint32_t n = 16; n <= 24; n += 8) {
32714 for (size_t k = 1; k <= 20; k += 5) {
32715 GemmMicrokernelTester()
32716 .mr(3)
32717 .nr(8)
32718 .kr(1)
32719 .sr(1)
32720 .m(3)
32721 .n(n)
32722 .k(k)
32723 .cn_stride(11)
32724 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32725 }
32726 }
32727 }
32728
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)32729 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
32730 for (uint32_t n = 16; n <= 24; n += 8) {
32731 for (size_t k = 1; k <= 20; k += 5) {
32732 for (uint32_t m = 1; m <= 3; m++) {
32733 GemmMicrokernelTester()
32734 .mr(3)
32735 .nr(8)
32736 .kr(1)
32737 .sr(1)
32738 .m(m)
32739 .n(n)
32740 .k(k)
32741 .iterations(1)
32742 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32743 }
32744 }
32745 }
32746 }
32747
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel)32748 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel) {
32749 for (size_t k = 1; k <= 20; k += 5) {
32750 GemmMicrokernelTester()
32751 .mr(3)
32752 .nr(8)
32753 .kr(1)
32754 .sr(1)
32755 .m(3)
32756 .n(8)
32757 .k(k)
32758 .ks(3)
32759 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32760 }
32761 }
32762
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel_subtile)32763 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel_subtile) {
32764 for (size_t k = 1; k <= 20; k += 5) {
32765 for (uint32_t n = 1; n <= 8; n++) {
32766 for (uint32_t m = 1; m <= 3; m++) {
32767 GemmMicrokernelTester()
32768 .mr(3)
32769 .nr(8)
32770 .kr(1)
32771 .sr(1)
32772 .m(m)
32773 .n(n)
32774 .k(k)
32775 .ks(3)
32776 .iterations(1)
32777 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32778 }
32779 }
32780 }
32781 }
32782
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_small_kernel)32783 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_small_kernel) {
32784 for (uint32_t n = 9; n < 16; n++) {
32785 for (size_t k = 1; k <= 20; k += 5) {
32786 GemmMicrokernelTester()
32787 .mr(3)
32788 .nr(8)
32789 .kr(1)
32790 .sr(1)
32791 .m(3)
32792 .n(n)
32793 .k(k)
32794 .ks(3)
32795 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32796 }
32797 }
32798 }
32799
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_small_kernel)32800 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_small_kernel) {
32801 for (uint32_t n = 16; n <= 24; n += 8) {
32802 for (size_t k = 1; k <= 20; k += 5) {
32803 GemmMicrokernelTester()
32804 .mr(3)
32805 .nr(8)
32806 .kr(1)
32807 .sr(1)
32808 .m(3)
32809 .n(n)
32810 .k(k)
32811 .ks(3)
32812 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32813 }
32814 }
32815 }
32816
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)32817 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
32818 for (size_t k = 1; k <= 20; k += 5) {
32819 for (uint32_t n = 1; n <= 8; n++) {
32820 for (uint32_t m = 1; m <= 3; m++) {
32821 GemmMicrokernelTester()
32822 .mr(3)
32823 .nr(8)
32824 .kr(1)
32825 .sr(1)
32826 .m(m)
32827 .n(n)
32828 .k(k)
32829 .cm_stride(11)
32830 .iterations(1)
32831 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32832 }
32833 }
32834 }
32835 }
32836
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,a_offset)32837 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, a_offset) {
32838 for (size_t k = 1; k <= 20; k += 5) {
32839 GemmMicrokernelTester()
32840 .mr(3)
32841 .nr(8)
32842 .kr(1)
32843 .sr(1)
32844 .m(3)
32845 .n(8)
32846 .k(k)
32847 .ks(3)
32848 .a_offset(67)
32849 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32850 }
32851 }
32852
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,zero)32853 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, zero) {
32854 for (size_t k = 1; k <= 20; k += 5) {
32855 for (uint32_t mz = 0; mz < 3; mz++) {
32856 GemmMicrokernelTester()
32857 .mr(3)
32858 .nr(8)
32859 .kr(1)
32860 .sr(1)
32861 .m(3)
32862 .n(8)
32863 .k(k)
32864 .ks(3)
32865 .a_offset(67)
32866 .zero_index(mz)
32867 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32868 }
32869 }
32870 }
32871
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,qmin)32872 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, qmin) {
32873 GemmMicrokernelTester()
32874 .mr(3)
32875 .nr(8)
32876 .kr(1)
32877 .sr(1)
32878 .m(3)
32879 .n(8)
32880 .k(4)
32881 .qmin(128)
32882 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32883 }
32884
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,qmax)32885 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, qmax) {
32886 GemmMicrokernelTester()
32887 .mr(3)
32888 .nr(8)
32889 .kr(1)
32890 .sr(1)
32891 .m(3)
32892 .n(8)
32893 .k(4)
32894 .qmax(128)
32895 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32896 }
32897
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)32898 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
32899 GemmMicrokernelTester()
32900 .mr(3)
32901 .nr(8)
32902 .kr(1)
32903 .sr(1)
32904 .m(3)
32905 .n(8)
32906 .k(4)
32907 .cm_stride(11)
32908 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
32909 }
32910 #endif // XNN_ARCH_WASMRELAXEDSIMD
32911
32912
32913 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,k_eq_4)32914 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, k_eq_4) {
32915 GemmMicrokernelTester()
32916 .mr(3)
32917 .nr(8)
32918 .kr(1)
32919 .sr(1)
32920 .m(3)
32921 .n(8)
32922 .k(4)
32923 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
32924 }
32925
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,strided_cn)32926 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, strided_cn) {
32927 GemmMicrokernelTester()
32928 .mr(3)
32929 .nr(8)
32930 .kr(1)
32931 .sr(1)
32932 .m(3)
32933 .n(8)
32934 .k(4)
32935 .cn_stride(11)
32936 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
32937 }
32938
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile)32939 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile) {
32940 for (uint32_t n = 1; n <= 8; n++) {
32941 for (uint32_t m = 1; m <= 3; m++) {
32942 GemmMicrokernelTester()
32943 .mr(3)
32944 .nr(8)
32945 .kr(1)
32946 .sr(1)
32947 .m(m)
32948 .n(n)
32949 .k(4)
32950 .iterations(1)
32951 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
32952 }
32953 }
32954 }
32955
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile_m)32956 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile_m) {
32957 for (uint32_t m = 1; m <= 3; m++) {
32958 GemmMicrokernelTester()
32959 .mr(3)
32960 .nr(8)
32961 .kr(1)
32962 .sr(1)
32963 .m(m)
32964 .n(8)
32965 .k(4)
32966 .iterations(1)
32967 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
32968 }
32969 }
32970
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile_n)32971 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile_n) {
32972 for (uint32_t n = 1; n <= 8; n++) {
32973 GemmMicrokernelTester()
32974 .mr(3)
32975 .nr(8)
32976 .kr(1)
32977 .sr(1)
32978 .m(3)
32979 .n(n)
32980 .k(4)
32981 .iterations(1)
32982 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
32983 }
32984 }
32985
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,k_lt_4)32986 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, k_lt_4) {
32987 for (size_t k = 1; k < 4; k++) {
32988 GemmMicrokernelTester()
32989 .mr(3)
32990 .nr(8)
32991 .kr(1)
32992 .sr(1)
32993 .m(3)
32994 .n(8)
32995 .k(k)
32996 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
32997 }
32998 }
32999
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,k_lt_4_subtile)33000 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, k_lt_4_subtile) {
33001 for (size_t k = 1; k < 4; k++) {
33002 for (uint32_t n = 1; n <= 8; n++) {
33003 for (uint32_t m = 1; m <= 3; m++) {
33004 GemmMicrokernelTester()
33005 .mr(3)
33006 .nr(8)
33007 .kr(1)
33008 .sr(1)
33009 .m(m)
33010 .n(n)
33011 .k(k)
33012 .iterations(1)
33013 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33014 }
33015 }
33016 }
33017 }
33018
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,k_gt_4)33019 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, k_gt_4) {
33020 for (size_t k = 5; k < 8; k++) {
33021 GemmMicrokernelTester()
33022 .mr(3)
33023 .nr(8)
33024 .kr(1)
33025 .sr(1)
33026 .m(3)
33027 .n(8)
33028 .k(k)
33029 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33030 }
33031 }
33032
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,k_gt_4_subtile)33033 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, k_gt_4_subtile) {
33034 for (size_t k = 5; k < 8; k++) {
33035 for (uint32_t n = 1; n <= 8; n++) {
33036 for (uint32_t m = 1; m <= 3; m++) {
33037 GemmMicrokernelTester()
33038 .mr(3)
33039 .nr(8)
33040 .kr(1)
33041 .sr(1)
33042 .m(m)
33043 .n(n)
33044 .k(k)
33045 .iterations(1)
33046 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33047 }
33048 }
33049 }
33050 }
33051
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,k_div_4)33052 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, k_div_4) {
33053 for (size_t k = 8; k <= 40; k += 4) {
33054 GemmMicrokernelTester()
33055 .mr(3)
33056 .nr(8)
33057 .kr(1)
33058 .sr(1)
33059 .m(3)
33060 .n(8)
33061 .k(k)
33062 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33063 }
33064 }
33065
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,k_div_4_subtile)33066 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, k_div_4_subtile) {
33067 for (size_t k = 8; k <= 40; k += 4) {
33068 for (uint32_t n = 1; n <= 8; n++) {
33069 for (uint32_t m = 1; m <= 3; m++) {
33070 GemmMicrokernelTester()
33071 .mr(3)
33072 .nr(8)
33073 .kr(1)
33074 .sr(1)
33075 .m(m)
33076 .n(n)
33077 .k(k)
33078 .iterations(1)
33079 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33080 }
33081 }
33082 }
33083 }
33084
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,n_gt_8)33085 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, n_gt_8) {
33086 for (uint32_t n = 9; n < 16; n++) {
33087 for (size_t k = 1; k <= 20; k += 5) {
33088 GemmMicrokernelTester()
33089 .mr(3)
33090 .nr(8)
33091 .kr(1)
33092 .sr(1)
33093 .m(3)
33094 .n(n)
33095 .k(k)
33096 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33097 }
33098 }
33099 }
33100
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_strided_cn)33101 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_strided_cn) {
33102 for (uint32_t n = 9; n < 16; n++) {
33103 for (size_t k = 1; k <= 20; k += 5) {
33104 GemmMicrokernelTester()
33105 .mr(3)
33106 .nr(8)
33107 .kr(1)
33108 .sr(1)
33109 .m(3)
33110 .n(n)
33111 .k(k)
33112 .cn_stride(11)
33113 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33114 }
33115 }
33116 }
33117
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_subtile)33118 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_subtile) {
33119 for (uint32_t n = 9; n < 16; n++) {
33120 for (size_t k = 1; k <= 20; k += 5) {
33121 for (uint32_t m = 1; m <= 3; m++) {
33122 GemmMicrokernelTester()
33123 .mr(3)
33124 .nr(8)
33125 .kr(1)
33126 .sr(1)
33127 .m(m)
33128 .n(n)
33129 .k(k)
33130 .iterations(1)
33131 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33132 }
33133 }
33134 }
33135 }
33136
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,n_div_8)33137 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, n_div_8) {
33138 for (uint32_t n = 16; n <= 24; n += 8) {
33139 for (size_t k = 1; k <= 20; k += 5) {
33140 GemmMicrokernelTester()
33141 .mr(3)
33142 .nr(8)
33143 .kr(1)
33144 .sr(1)
33145 .m(3)
33146 .n(n)
33147 .k(k)
33148 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33149 }
33150 }
33151 }
33152
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,n_div_8_strided_cn)33153 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, n_div_8_strided_cn) {
33154 for (uint32_t n = 16; n <= 24; n += 8) {
33155 for (size_t k = 1; k <= 20; k += 5) {
33156 GemmMicrokernelTester()
33157 .mr(3)
33158 .nr(8)
33159 .kr(1)
33160 .sr(1)
33161 .m(3)
33162 .n(n)
33163 .k(k)
33164 .cn_stride(11)
33165 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33166 }
33167 }
33168 }
33169
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,n_div_8_subtile)33170 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, n_div_8_subtile) {
33171 for (uint32_t n = 16; n <= 24; n += 8) {
33172 for (size_t k = 1; k <= 20; k += 5) {
33173 for (uint32_t m = 1; m <= 3; m++) {
33174 GemmMicrokernelTester()
33175 .mr(3)
33176 .nr(8)
33177 .kr(1)
33178 .sr(1)
33179 .m(m)
33180 .n(n)
33181 .k(k)
33182 .iterations(1)
33183 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33184 }
33185 }
33186 }
33187 }
33188
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,small_kernel)33189 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, small_kernel) {
33190 for (size_t k = 1; k <= 20; k += 5) {
33191 GemmMicrokernelTester()
33192 .mr(3)
33193 .nr(8)
33194 .kr(1)
33195 .sr(1)
33196 .m(3)
33197 .n(8)
33198 .k(k)
33199 .ks(3)
33200 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33201 }
33202 }
33203
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,small_kernel_subtile)33204 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, small_kernel_subtile) {
33205 for (size_t k = 1; k <= 20; k += 5) {
33206 for (uint32_t n = 1; n <= 8; n++) {
33207 for (uint32_t m = 1; m <= 3; m++) {
33208 GemmMicrokernelTester()
33209 .mr(3)
33210 .nr(8)
33211 .kr(1)
33212 .sr(1)
33213 .m(m)
33214 .n(n)
33215 .k(k)
33216 .ks(3)
33217 .iterations(1)
33218 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33219 }
33220 }
33221 }
33222 }
33223
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_small_kernel)33224 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_small_kernel) {
33225 for (uint32_t n = 9; n < 16; n++) {
33226 for (size_t k = 1; k <= 20; k += 5) {
33227 GemmMicrokernelTester()
33228 .mr(3)
33229 .nr(8)
33230 .kr(1)
33231 .sr(1)
33232 .m(3)
33233 .n(n)
33234 .k(k)
33235 .ks(3)
33236 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33237 }
33238 }
33239 }
33240
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,n_div_8_small_kernel)33241 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, n_div_8_small_kernel) {
33242 for (uint32_t n = 16; n <= 24; n += 8) {
33243 for (size_t k = 1; k <= 20; k += 5) {
33244 GemmMicrokernelTester()
33245 .mr(3)
33246 .nr(8)
33247 .kr(1)
33248 .sr(1)
33249 .m(3)
33250 .n(n)
33251 .k(k)
33252 .ks(3)
33253 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33254 }
33255 }
33256 }
33257
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,strided_cm_subtile)33258 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, strided_cm_subtile) {
33259 for (size_t k = 1; k <= 20; k += 5) {
33260 for (uint32_t n = 1; n <= 8; n++) {
33261 for (uint32_t m = 1; m <= 3; m++) {
33262 GemmMicrokernelTester()
33263 .mr(3)
33264 .nr(8)
33265 .kr(1)
33266 .sr(1)
33267 .m(m)
33268 .n(n)
33269 .k(k)
33270 .cm_stride(11)
33271 .iterations(1)
33272 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33273 }
33274 }
33275 }
33276 }
33277
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,a_offset)33278 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, a_offset) {
33279 for (size_t k = 1; k <= 20; k += 5) {
33280 GemmMicrokernelTester()
33281 .mr(3)
33282 .nr(8)
33283 .kr(1)
33284 .sr(1)
33285 .m(3)
33286 .n(8)
33287 .k(k)
33288 .ks(3)
33289 .a_offset(67)
33290 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33291 }
33292 }
33293
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,zero)33294 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, zero) {
33295 for (size_t k = 1; k <= 20; k += 5) {
33296 for (uint32_t mz = 0; mz < 3; mz++) {
33297 GemmMicrokernelTester()
33298 .mr(3)
33299 .nr(8)
33300 .kr(1)
33301 .sr(1)
33302 .m(3)
33303 .n(8)
33304 .k(k)
33305 .ks(3)
33306 .a_offset(67)
33307 .zero_index(mz)
33308 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33309 }
33310 }
33311 }
33312
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,qmin)33313 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, qmin) {
33314 GemmMicrokernelTester()
33315 .mr(3)
33316 .nr(8)
33317 .kr(1)
33318 .sr(1)
33319 .m(3)
33320 .n(8)
33321 .k(4)
33322 .qmin(128)
33323 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33324 }
33325
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,qmax)33326 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, qmax) {
33327 GemmMicrokernelTester()
33328 .mr(3)
33329 .nr(8)
33330 .kr(1)
33331 .sr(1)
33332 .m(3)
33333 .n(8)
33334 .k(4)
33335 .qmax(128)
33336 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33337 }
33338
TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT,strided_cm)33339 TEST(F32_IGEMM_MINMAX_3X8__WASMRELAXEDSIMD_SPLAT, strided_cm) {
33340 GemmMicrokernelTester()
33341 .mr(3)
33342 .nr(8)
33343 .kr(1)
33344 .sr(1)
33345 .m(3)
33346 .n(8)
33347 .k(4)
33348 .cm_stride(11)
33349 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
33350 }
33351 #endif // XNN_ARCH_WASMRELAXEDSIMD
33352
33353
33354 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)33355 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
33356 GemmMicrokernelTester()
33357 .mr(3)
33358 .nr(8)
33359 .kr(1)
33360 .sr(4)
33361 .m(3)
33362 .n(8)
33363 .k(4)
33364 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33365 }
33366
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,strided_cn)33367 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
33368 GemmMicrokernelTester()
33369 .mr(3)
33370 .nr(8)
33371 .kr(1)
33372 .sr(4)
33373 .m(3)
33374 .n(8)
33375 .k(4)
33376 .cn_stride(11)
33377 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33378 }
33379
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)33380 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
33381 for (uint32_t n = 1; n <= 8; n++) {
33382 for (uint32_t m = 1; m <= 3; m++) {
33383 GemmMicrokernelTester()
33384 .mr(3)
33385 .nr(8)
33386 .kr(1)
33387 .sr(4)
33388 .m(m)
33389 .n(n)
33390 .k(4)
33391 .iterations(1)
33392 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33393 }
33394 }
33395 }
33396
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)33397 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
33398 for (uint32_t m = 1; m <= 3; m++) {
33399 GemmMicrokernelTester()
33400 .mr(3)
33401 .nr(8)
33402 .kr(1)
33403 .sr(4)
33404 .m(m)
33405 .n(8)
33406 .k(4)
33407 .iterations(1)
33408 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33409 }
33410 }
33411
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)33412 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
33413 for (uint32_t n = 1; n <= 8; n++) {
33414 GemmMicrokernelTester()
33415 .mr(3)
33416 .nr(8)
33417 .kr(1)
33418 .sr(4)
33419 .m(3)
33420 .n(n)
33421 .k(4)
33422 .iterations(1)
33423 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33424 }
33425 }
33426
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)33427 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
33428 for (size_t k = 1; k < 4; k++) {
33429 GemmMicrokernelTester()
33430 .mr(3)
33431 .nr(8)
33432 .kr(1)
33433 .sr(4)
33434 .m(3)
33435 .n(8)
33436 .k(k)
33437 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33438 }
33439 }
33440
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)33441 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
33442 for (size_t k = 1; k < 4; k++) {
33443 for (uint32_t n = 1; n <= 8; n++) {
33444 for (uint32_t m = 1; m <= 3; m++) {
33445 GemmMicrokernelTester()
33446 .mr(3)
33447 .nr(8)
33448 .kr(1)
33449 .sr(4)
33450 .m(m)
33451 .n(n)
33452 .k(k)
33453 .iterations(1)
33454 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33455 }
33456 }
33457 }
33458 }
33459
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)33460 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
33461 for (size_t k = 5; k < 8; k++) {
33462 GemmMicrokernelTester()
33463 .mr(3)
33464 .nr(8)
33465 .kr(1)
33466 .sr(4)
33467 .m(3)
33468 .n(8)
33469 .k(k)
33470 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33471 }
33472 }
33473
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)33474 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
33475 for (size_t k = 5; k < 8; k++) {
33476 for (uint32_t n = 1; n <= 8; n++) {
33477 for (uint32_t m = 1; m <= 3; m++) {
33478 GemmMicrokernelTester()
33479 .mr(3)
33480 .nr(8)
33481 .kr(1)
33482 .sr(4)
33483 .m(m)
33484 .n(n)
33485 .k(k)
33486 .iterations(1)
33487 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33488 }
33489 }
33490 }
33491 }
33492
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,k_div_4)33493 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
33494 for (size_t k = 8; k <= 40; k += 4) {
33495 GemmMicrokernelTester()
33496 .mr(3)
33497 .nr(8)
33498 .kr(1)
33499 .sr(4)
33500 .m(3)
33501 .n(8)
33502 .k(k)
33503 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33504 }
33505 }
33506
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)33507 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
33508 for (size_t k = 8; k <= 40; k += 4) {
33509 for (uint32_t n = 1; n <= 8; n++) {
33510 for (uint32_t m = 1; m <= 3; m++) {
33511 GemmMicrokernelTester()
33512 .mr(3)
33513 .nr(8)
33514 .kr(1)
33515 .sr(4)
33516 .m(m)
33517 .n(n)
33518 .k(k)
33519 .iterations(1)
33520 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33521 }
33522 }
33523 }
33524 }
33525
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)33526 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
33527 for (uint32_t n = 9; n < 16; n++) {
33528 for (size_t k = 1; k <= 20; k += 5) {
33529 GemmMicrokernelTester()
33530 .mr(3)
33531 .nr(8)
33532 .kr(1)
33533 .sr(4)
33534 .m(3)
33535 .n(n)
33536 .k(k)
33537 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33538 }
33539 }
33540 }
33541
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)33542 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
33543 for (uint32_t n = 9; n < 16; n++) {
33544 for (size_t k = 1; k <= 20; k += 5) {
33545 GemmMicrokernelTester()
33546 .mr(3)
33547 .nr(8)
33548 .kr(1)
33549 .sr(4)
33550 .m(3)
33551 .n(n)
33552 .k(k)
33553 .cn_stride(11)
33554 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33555 }
33556 }
33557 }
33558
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)33559 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
33560 for (uint32_t n = 9; n < 16; n++) {
33561 for (size_t k = 1; k <= 20; k += 5) {
33562 for (uint32_t m = 1; m <= 3; m++) {
33563 GemmMicrokernelTester()
33564 .mr(3)
33565 .nr(8)
33566 .kr(1)
33567 .sr(4)
33568 .m(m)
33569 .n(n)
33570 .k(k)
33571 .iterations(1)
33572 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33573 }
33574 }
33575 }
33576 }
33577
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,n_div_8)33578 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
33579 for (uint32_t n = 16; n <= 24; n += 8) {
33580 for (size_t k = 1; k <= 20; k += 5) {
33581 GemmMicrokernelTester()
33582 .mr(3)
33583 .nr(8)
33584 .kr(1)
33585 .sr(4)
33586 .m(3)
33587 .n(n)
33588 .k(k)
33589 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33590 }
33591 }
33592 }
33593
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)33594 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
33595 for (uint32_t n = 16; n <= 24; n += 8) {
33596 for (size_t k = 1; k <= 20; k += 5) {
33597 GemmMicrokernelTester()
33598 .mr(3)
33599 .nr(8)
33600 .kr(1)
33601 .sr(4)
33602 .m(3)
33603 .n(n)
33604 .k(k)
33605 .cn_stride(11)
33606 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33607 }
33608 }
33609 }
33610
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)33611 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
33612 for (uint32_t n = 16; n <= 24; n += 8) {
33613 for (size_t k = 1; k <= 20; k += 5) {
33614 for (uint32_t m = 1; m <= 3; m++) {
33615 GemmMicrokernelTester()
33616 .mr(3)
33617 .nr(8)
33618 .kr(1)
33619 .sr(4)
33620 .m(m)
33621 .n(n)
33622 .k(k)
33623 .iterations(1)
33624 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33625 }
33626 }
33627 }
33628 }
33629
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,small_kernel)33630 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, small_kernel) {
33631 for (size_t k = 1; k <= 20; k += 5) {
33632 GemmMicrokernelTester()
33633 .mr(3)
33634 .nr(8)
33635 .kr(1)
33636 .sr(4)
33637 .m(3)
33638 .n(8)
33639 .k(k)
33640 .ks(3)
33641 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33642 }
33643 }
33644
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,small_kernel_subtile)33645 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, small_kernel_subtile) {
33646 for (size_t k = 1; k <= 20; k += 5) {
33647 for (uint32_t n = 1; n <= 8; n++) {
33648 for (uint32_t m = 1; m <= 3; m++) {
33649 GemmMicrokernelTester()
33650 .mr(3)
33651 .nr(8)
33652 .kr(1)
33653 .sr(4)
33654 .m(m)
33655 .n(n)
33656 .k(k)
33657 .ks(3)
33658 .iterations(1)
33659 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33660 }
33661 }
33662 }
33663 }
33664
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_small_kernel)33665 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_small_kernel) {
33666 for (uint32_t n = 9; n < 16; n++) {
33667 for (size_t k = 1; k <= 20; k += 5) {
33668 GemmMicrokernelTester()
33669 .mr(3)
33670 .nr(8)
33671 .kr(1)
33672 .sr(4)
33673 .m(3)
33674 .n(n)
33675 .k(k)
33676 .ks(3)
33677 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33678 }
33679 }
33680 }
33681
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,n_div_8_small_kernel)33682 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, n_div_8_small_kernel) {
33683 for (uint32_t n = 16; n <= 24; n += 8) {
33684 for (size_t k = 1; k <= 20; k += 5) {
33685 GemmMicrokernelTester()
33686 .mr(3)
33687 .nr(8)
33688 .kr(1)
33689 .sr(4)
33690 .m(3)
33691 .n(n)
33692 .k(k)
33693 .ks(3)
33694 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33695 }
33696 }
33697 }
33698
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)33699 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
33700 for (size_t k = 1; k <= 20; k += 5) {
33701 for (uint32_t n = 1; n <= 8; n++) {
33702 for (uint32_t m = 1; m <= 3; m++) {
33703 GemmMicrokernelTester()
33704 .mr(3)
33705 .nr(8)
33706 .kr(1)
33707 .sr(4)
33708 .m(m)
33709 .n(n)
33710 .k(k)
33711 .cm_stride(11)
33712 .iterations(1)
33713 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33714 }
33715 }
33716 }
33717 }
33718
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,a_offset)33719 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, a_offset) {
33720 for (size_t k = 1; k <= 20; k += 5) {
33721 GemmMicrokernelTester()
33722 .mr(3)
33723 .nr(8)
33724 .kr(1)
33725 .sr(4)
33726 .m(3)
33727 .n(8)
33728 .k(k)
33729 .ks(3)
33730 .a_offset(67)
33731 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33732 }
33733 }
33734
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,zero)33735 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, zero) {
33736 for (size_t k = 1; k <= 20; k += 5) {
33737 for (uint32_t mz = 0; mz < 3; mz++) {
33738 GemmMicrokernelTester()
33739 .mr(3)
33740 .nr(8)
33741 .kr(1)
33742 .sr(4)
33743 .m(3)
33744 .n(8)
33745 .k(k)
33746 .ks(3)
33747 .a_offset(67)
33748 .zero_index(mz)
33749 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33750 }
33751 }
33752 }
33753
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,qmin)33754 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, qmin) {
33755 GemmMicrokernelTester()
33756 .mr(3)
33757 .nr(8)
33758 .kr(1)
33759 .sr(4)
33760 .m(3)
33761 .n(8)
33762 .k(4)
33763 .qmin(128)
33764 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33765 }
33766
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,qmax)33767 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, qmax) {
33768 GemmMicrokernelTester()
33769 .mr(3)
33770 .nr(8)
33771 .kr(1)
33772 .sr(4)
33773 .m(3)
33774 .n(8)
33775 .k(4)
33776 .qmax(128)
33777 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33778 }
33779
TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA,strided_cm)33780 TEST(F32_IGEMM_MINMAX_3X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
33781 GemmMicrokernelTester()
33782 .mr(3)
33783 .nr(8)
33784 .kr(1)
33785 .sr(4)
33786 .m(3)
33787 .n(8)
33788 .k(4)
33789 .cm_stride(11)
33790 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
33791 }
33792 #endif // XNN_ARCH_WASMRELAXEDSIMD
33793
33794
33795 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)33796 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
33797 GemmMicrokernelTester()
33798 .mr(4)
33799 .nr(8)
33800 .kr(1)
33801 .sr(1)
33802 .m(4)
33803 .n(8)
33804 .k(1)
33805 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
33806 }
33807
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)33808 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
33809 GemmMicrokernelTester()
33810 .mr(4)
33811 .nr(8)
33812 .kr(1)
33813 .sr(1)
33814 .m(4)
33815 .n(8)
33816 .k(1)
33817 .cn_stride(11)
33818 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
33819 }
33820
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)33821 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
33822 for (uint32_t n = 1; n <= 8; n++) {
33823 for (uint32_t m = 1; m <= 4; m++) {
33824 GemmMicrokernelTester()
33825 .mr(4)
33826 .nr(8)
33827 .kr(1)
33828 .sr(1)
33829 .m(m)
33830 .n(n)
33831 .k(1)
33832 .iterations(1)
33833 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
33834 }
33835 }
33836 }
33837
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)33838 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
33839 for (uint32_t m = 1; m <= 4; m++) {
33840 GemmMicrokernelTester()
33841 .mr(4)
33842 .nr(8)
33843 .kr(1)
33844 .sr(1)
33845 .m(m)
33846 .n(8)
33847 .k(1)
33848 .iterations(1)
33849 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
33850 }
33851 }
33852
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)33853 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
33854 for (uint32_t n = 1; n <= 8; n++) {
33855 GemmMicrokernelTester()
33856 .mr(4)
33857 .nr(8)
33858 .kr(1)
33859 .sr(1)
33860 .m(4)
33861 .n(n)
33862 .k(1)
33863 .iterations(1)
33864 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
33865 }
33866 }
33867
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)33868 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
33869 for (size_t k = 2; k < 10; k++) {
33870 GemmMicrokernelTester()
33871 .mr(4)
33872 .nr(8)
33873 .kr(1)
33874 .sr(1)
33875 .m(4)
33876 .n(8)
33877 .k(k)
33878 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
33879 }
33880 }
33881
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)33882 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
33883 for (size_t k = 2; k < 10; k++) {
33884 for (uint32_t n = 1; n <= 8; n++) {
33885 for (uint32_t m = 1; m <= 4; m++) {
33886 GemmMicrokernelTester()
33887 .mr(4)
33888 .nr(8)
33889 .kr(1)
33890 .sr(1)
33891 .m(m)
33892 .n(n)
33893 .k(k)
33894 .iterations(1)
33895 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
33896 }
33897 }
33898 }
33899 }
33900
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)33901 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
33902 for (uint32_t n = 9; n < 16; n++) {
33903 for (size_t k = 1; k <= 5; k += 2) {
33904 GemmMicrokernelTester()
33905 .mr(4)
33906 .nr(8)
33907 .kr(1)
33908 .sr(1)
33909 .m(4)
33910 .n(n)
33911 .k(k)
33912 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
33913 }
33914 }
33915 }
33916
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)33917 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
33918 for (uint32_t n = 9; n < 16; n++) {
33919 for (size_t k = 1; k <= 5; k += 2) {
33920 GemmMicrokernelTester()
33921 .mr(4)
33922 .nr(8)
33923 .kr(1)
33924 .sr(1)
33925 .m(4)
33926 .n(n)
33927 .k(k)
33928 .cn_stride(11)
33929 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
33930 }
33931 }
33932 }
33933
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)33934 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
33935 for (uint32_t n = 9; n < 16; n++) {
33936 for (size_t k = 1; k <= 5; k += 2) {
33937 for (uint32_t m = 1; m <= 4; m++) {
33938 GemmMicrokernelTester()
33939 .mr(4)
33940 .nr(8)
33941 .kr(1)
33942 .sr(1)
33943 .m(m)
33944 .n(n)
33945 .k(k)
33946 .iterations(1)
33947 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
33948 }
33949 }
33950 }
33951 }
33952
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)33953 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
33954 for (uint32_t n = 16; n <= 24; n += 8) {
33955 for (size_t k = 1; k <= 5; k += 2) {
33956 GemmMicrokernelTester()
33957 .mr(4)
33958 .nr(8)
33959 .kr(1)
33960 .sr(1)
33961 .m(4)
33962 .n(n)
33963 .k(k)
33964 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
33965 }
33966 }
33967 }
33968
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)33969 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
33970 for (uint32_t n = 16; n <= 24; n += 8) {
33971 for (size_t k = 1; k <= 5; k += 2) {
33972 GemmMicrokernelTester()
33973 .mr(4)
33974 .nr(8)
33975 .kr(1)
33976 .sr(1)
33977 .m(4)
33978 .n(n)
33979 .k(k)
33980 .cn_stride(11)
33981 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
33982 }
33983 }
33984 }
33985
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)33986 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
33987 for (uint32_t n = 16; n <= 24; n += 8) {
33988 for (size_t k = 1; k <= 5; k += 2) {
33989 for (uint32_t m = 1; m <= 4; m++) {
33990 GemmMicrokernelTester()
33991 .mr(4)
33992 .nr(8)
33993 .kr(1)
33994 .sr(1)
33995 .m(m)
33996 .n(n)
33997 .k(k)
33998 .iterations(1)
33999 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34000 }
34001 }
34002 }
34003 }
34004
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel)34005 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel) {
34006 for (size_t k = 1; k <= 5; k += 2) {
34007 GemmMicrokernelTester()
34008 .mr(4)
34009 .nr(8)
34010 .kr(1)
34011 .sr(1)
34012 .m(4)
34013 .n(8)
34014 .k(k)
34015 .ks(3)
34016 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34017 }
34018 }
34019
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel_subtile)34020 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel_subtile) {
34021 for (size_t k = 1; k <= 5; k += 2) {
34022 for (uint32_t n = 1; n <= 8; n++) {
34023 for (uint32_t m = 1; m <= 4; m++) {
34024 GemmMicrokernelTester()
34025 .mr(4)
34026 .nr(8)
34027 .kr(1)
34028 .sr(1)
34029 .m(m)
34030 .n(n)
34031 .k(k)
34032 .ks(3)
34033 .iterations(1)
34034 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34035 }
34036 }
34037 }
34038 }
34039
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_small_kernel)34040 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_small_kernel) {
34041 for (uint32_t n = 9; n < 16; n++) {
34042 for (size_t k = 1; k <= 5; k += 2) {
34043 GemmMicrokernelTester()
34044 .mr(4)
34045 .nr(8)
34046 .kr(1)
34047 .sr(1)
34048 .m(4)
34049 .n(n)
34050 .k(k)
34051 .ks(3)
34052 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34053 }
34054 }
34055 }
34056
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_small_kernel)34057 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_small_kernel) {
34058 for (uint32_t n = 16; n <= 24; n += 8) {
34059 for (size_t k = 1; k <= 5; k += 2) {
34060 GemmMicrokernelTester()
34061 .mr(4)
34062 .nr(8)
34063 .kr(1)
34064 .sr(1)
34065 .m(4)
34066 .n(n)
34067 .k(k)
34068 .ks(3)
34069 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34070 }
34071 }
34072 }
34073
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)34074 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
34075 for (size_t k = 1; k <= 5; k += 2) {
34076 for (uint32_t n = 1; n <= 8; n++) {
34077 for (uint32_t m = 1; m <= 4; m++) {
34078 GemmMicrokernelTester()
34079 .mr(4)
34080 .nr(8)
34081 .kr(1)
34082 .sr(1)
34083 .m(m)
34084 .n(n)
34085 .k(k)
34086 .cm_stride(11)
34087 .iterations(1)
34088 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34089 }
34090 }
34091 }
34092 }
34093
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,a_offset)34094 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, a_offset) {
34095 for (size_t k = 1; k <= 5; k += 2) {
34096 GemmMicrokernelTester()
34097 .mr(4)
34098 .nr(8)
34099 .kr(1)
34100 .sr(1)
34101 .m(4)
34102 .n(8)
34103 .k(k)
34104 .ks(3)
34105 .a_offset(23)
34106 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34107 }
34108 }
34109
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,zero)34110 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, zero) {
34111 for (size_t k = 1; k <= 5; k += 2) {
34112 for (uint32_t mz = 0; mz < 4; mz++) {
34113 GemmMicrokernelTester()
34114 .mr(4)
34115 .nr(8)
34116 .kr(1)
34117 .sr(1)
34118 .m(4)
34119 .n(8)
34120 .k(k)
34121 .ks(3)
34122 .a_offset(23)
34123 .zero_index(mz)
34124 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34125 }
34126 }
34127 }
34128
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,qmin)34129 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, qmin) {
34130 GemmMicrokernelTester()
34131 .mr(4)
34132 .nr(8)
34133 .kr(1)
34134 .sr(1)
34135 .m(4)
34136 .n(8)
34137 .k(1)
34138 .qmin(128)
34139 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34140 }
34141
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,qmax)34142 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, qmax) {
34143 GemmMicrokernelTester()
34144 .mr(4)
34145 .nr(8)
34146 .kr(1)
34147 .sr(1)
34148 .m(4)
34149 .n(8)
34150 .k(1)
34151 .qmax(128)
34152 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34153 }
34154
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)34155 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
34156 GemmMicrokernelTester()
34157 .mr(4)
34158 .nr(8)
34159 .kr(1)
34160 .sr(1)
34161 .m(4)
34162 .n(8)
34163 .k(1)
34164 .cm_stride(11)
34165 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
34166 }
34167 #endif // XNN_ARCH_WASMRELAXEDSIMD
34168
34169
34170 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)34171 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
34172 GemmMicrokernelTester()
34173 .mr(4)
34174 .nr(8)
34175 .kr(1)
34176 .sr(1)
34177 .m(4)
34178 .n(8)
34179 .k(4)
34180 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34181 }
34182
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)34183 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
34184 GemmMicrokernelTester()
34185 .mr(4)
34186 .nr(8)
34187 .kr(1)
34188 .sr(1)
34189 .m(4)
34190 .n(8)
34191 .k(4)
34192 .cn_stride(11)
34193 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34194 }
34195
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)34196 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
34197 for (uint32_t n = 1; n <= 8; n++) {
34198 for (uint32_t m = 1; m <= 4; m++) {
34199 GemmMicrokernelTester()
34200 .mr(4)
34201 .nr(8)
34202 .kr(1)
34203 .sr(1)
34204 .m(m)
34205 .n(n)
34206 .k(4)
34207 .iterations(1)
34208 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34209 }
34210 }
34211 }
34212
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)34213 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
34214 for (uint32_t m = 1; m <= 4; m++) {
34215 GemmMicrokernelTester()
34216 .mr(4)
34217 .nr(8)
34218 .kr(1)
34219 .sr(1)
34220 .m(m)
34221 .n(8)
34222 .k(4)
34223 .iterations(1)
34224 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34225 }
34226 }
34227
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)34228 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
34229 for (uint32_t n = 1; n <= 8; n++) {
34230 GemmMicrokernelTester()
34231 .mr(4)
34232 .nr(8)
34233 .kr(1)
34234 .sr(1)
34235 .m(4)
34236 .n(n)
34237 .k(4)
34238 .iterations(1)
34239 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34240 }
34241 }
34242
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)34243 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
34244 for (size_t k = 1; k < 4; k++) {
34245 GemmMicrokernelTester()
34246 .mr(4)
34247 .nr(8)
34248 .kr(1)
34249 .sr(1)
34250 .m(4)
34251 .n(8)
34252 .k(k)
34253 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34254 }
34255 }
34256
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)34257 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
34258 for (size_t k = 1; k < 4; k++) {
34259 for (uint32_t n = 1; n <= 8; n++) {
34260 for (uint32_t m = 1; m <= 4; m++) {
34261 GemmMicrokernelTester()
34262 .mr(4)
34263 .nr(8)
34264 .kr(1)
34265 .sr(1)
34266 .m(m)
34267 .n(n)
34268 .k(k)
34269 .iterations(1)
34270 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34271 }
34272 }
34273 }
34274 }
34275
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)34276 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
34277 for (size_t k = 5; k < 8; k++) {
34278 GemmMicrokernelTester()
34279 .mr(4)
34280 .nr(8)
34281 .kr(1)
34282 .sr(1)
34283 .m(4)
34284 .n(8)
34285 .k(k)
34286 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34287 }
34288 }
34289
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)34290 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
34291 for (size_t k = 5; k < 8; k++) {
34292 for (uint32_t n = 1; n <= 8; n++) {
34293 for (uint32_t m = 1; m <= 4; m++) {
34294 GemmMicrokernelTester()
34295 .mr(4)
34296 .nr(8)
34297 .kr(1)
34298 .sr(1)
34299 .m(m)
34300 .n(n)
34301 .k(k)
34302 .iterations(1)
34303 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34304 }
34305 }
34306 }
34307 }
34308
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)34309 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
34310 for (size_t k = 8; k <= 40; k += 4) {
34311 GemmMicrokernelTester()
34312 .mr(4)
34313 .nr(8)
34314 .kr(1)
34315 .sr(1)
34316 .m(4)
34317 .n(8)
34318 .k(k)
34319 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34320 }
34321 }
34322
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)34323 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
34324 for (size_t k = 8; k <= 40; k += 4) {
34325 for (uint32_t n = 1; n <= 8; n++) {
34326 for (uint32_t m = 1; m <= 4; m++) {
34327 GemmMicrokernelTester()
34328 .mr(4)
34329 .nr(8)
34330 .kr(1)
34331 .sr(1)
34332 .m(m)
34333 .n(n)
34334 .k(k)
34335 .iterations(1)
34336 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34337 }
34338 }
34339 }
34340 }
34341
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)34342 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
34343 for (uint32_t n = 9; n < 16; n++) {
34344 for (size_t k = 1; k <= 20; k += 5) {
34345 GemmMicrokernelTester()
34346 .mr(4)
34347 .nr(8)
34348 .kr(1)
34349 .sr(1)
34350 .m(4)
34351 .n(n)
34352 .k(k)
34353 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34354 }
34355 }
34356 }
34357
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)34358 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
34359 for (uint32_t n = 9; n < 16; n++) {
34360 for (size_t k = 1; k <= 20; k += 5) {
34361 GemmMicrokernelTester()
34362 .mr(4)
34363 .nr(8)
34364 .kr(1)
34365 .sr(1)
34366 .m(4)
34367 .n(n)
34368 .k(k)
34369 .cn_stride(11)
34370 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34371 }
34372 }
34373 }
34374
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)34375 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
34376 for (uint32_t n = 9; n < 16; n++) {
34377 for (size_t k = 1; k <= 20; k += 5) {
34378 for (uint32_t m = 1; m <= 4; m++) {
34379 GemmMicrokernelTester()
34380 .mr(4)
34381 .nr(8)
34382 .kr(1)
34383 .sr(1)
34384 .m(m)
34385 .n(n)
34386 .k(k)
34387 .iterations(1)
34388 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34389 }
34390 }
34391 }
34392 }
34393
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)34394 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
34395 for (uint32_t n = 16; n <= 24; n += 8) {
34396 for (size_t k = 1; k <= 20; k += 5) {
34397 GemmMicrokernelTester()
34398 .mr(4)
34399 .nr(8)
34400 .kr(1)
34401 .sr(1)
34402 .m(4)
34403 .n(n)
34404 .k(k)
34405 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34406 }
34407 }
34408 }
34409
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)34410 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
34411 for (uint32_t n = 16; n <= 24; n += 8) {
34412 for (size_t k = 1; k <= 20; k += 5) {
34413 GemmMicrokernelTester()
34414 .mr(4)
34415 .nr(8)
34416 .kr(1)
34417 .sr(1)
34418 .m(4)
34419 .n(n)
34420 .k(k)
34421 .cn_stride(11)
34422 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34423 }
34424 }
34425 }
34426
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)34427 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
34428 for (uint32_t n = 16; n <= 24; n += 8) {
34429 for (size_t k = 1; k <= 20; k += 5) {
34430 for (uint32_t m = 1; m <= 4; m++) {
34431 GemmMicrokernelTester()
34432 .mr(4)
34433 .nr(8)
34434 .kr(1)
34435 .sr(1)
34436 .m(m)
34437 .n(n)
34438 .k(k)
34439 .iterations(1)
34440 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34441 }
34442 }
34443 }
34444 }
34445
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel)34446 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel) {
34447 for (size_t k = 1; k <= 20; k += 5) {
34448 GemmMicrokernelTester()
34449 .mr(4)
34450 .nr(8)
34451 .kr(1)
34452 .sr(1)
34453 .m(4)
34454 .n(8)
34455 .k(k)
34456 .ks(3)
34457 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34458 }
34459 }
34460
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel_subtile)34461 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel_subtile) {
34462 for (size_t k = 1; k <= 20; k += 5) {
34463 for (uint32_t n = 1; n <= 8; n++) {
34464 for (uint32_t m = 1; m <= 4; m++) {
34465 GemmMicrokernelTester()
34466 .mr(4)
34467 .nr(8)
34468 .kr(1)
34469 .sr(1)
34470 .m(m)
34471 .n(n)
34472 .k(k)
34473 .ks(3)
34474 .iterations(1)
34475 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34476 }
34477 }
34478 }
34479 }
34480
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_small_kernel)34481 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_small_kernel) {
34482 for (uint32_t n = 9; n < 16; n++) {
34483 for (size_t k = 1; k <= 20; k += 5) {
34484 GemmMicrokernelTester()
34485 .mr(4)
34486 .nr(8)
34487 .kr(1)
34488 .sr(1)
34489 .m(4)
34490 .n(n)
34491 .k(k)
34492 .ks(3)
34493 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34494 }
34495 }
34496 }
34497
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_small_kernel)34498 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_small_kernel) {
34499 for (uint32_t n = 16; n <= 24; n += 8) {
34500 for (size_t k = 1; k <= 20; k += 5) {
34501 GemmMicrokernelTester()
34502 .mr(4)
34503 .nr(8)
34504 .kr(1)
34505 .sr(1)
34506 .m(4)
34507 .n(n)
34508 .k(k)
34509 .ks(3)
34510 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34511 }
34512 }
34513 }
34514
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)34515 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
34516 for (size_t k = 1; k <= 20; k += 5) {
34517 for (uint32_t n = 1; n <= 8; n++) {
34518 for (uint32_t m = 1; m <= 4; m++) {
34519 GemmMicrokernelTester()
34520 .mr(4)
34521 .nr(8)
34522 .kr(1)
34523 .sr(1)
34524 .m(m)
34525 .n(n)
34526 .k(k)
34527 .cm_stride(11)
34528 .iterations(1)
34529 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34530 }
34531 }
34532 }
34533 }
34534
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,a_offset)34535 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, a_offset) {
34536 for (size_t k = 1; k <= 20; k += 5) {
34537 GemmMicrokernelTester()
34538 .mr(4)
34539 .nr(8)
34540 .kr(1)
34541 .sr(1)
34542 .m(4)
34543 .n(8)
34544 .k(k)
34545 .ks(3)
34546 .a_offset(83)
34547 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34548 }
34549 }
34550
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,zero)34551 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, zero) {
34552 for (size_t k = 1; k <= 20; k += 5) {
34553 for (uint32_t mz = 0; mz < 4; mz++) {
34554 GemmMicrokernelTester()
34555 .mr(4)
34556 .nr(8)
34557 .kr(1)
34558 .sr(1)
34559 .m(4)
34560 .n(8)
34561 .k(k)
34562 .ks(3)
34563 .a_offset(83)
34564 .zero_index(mz)
34565 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34566 }
34567 }
34568 }
34569
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,qmin)34570 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, qmin) {
34571 GemmMicrokernelTester()
34572 .mr(4)
34573 .nr(8)
34574 .kr(1)
34575 .sr(1)
34576 .m(4)
34577 .n(8)
34578 .k(4)
34579 .qmin(128)
34580 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34581 }
34582
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,qmax)34583 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, qmax) {
34584 GemmMicrokernelTester()
34585 .mr(4)
34586 .nr(8)
34587 .kr(1)
34588 .sr(1)
34589 .m(4)
34590 .n(8)
34591 .k(4)
34592 .qmax(128)
34593 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34594 }
34595
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)34596 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
34597 GemmMicrokernelTester()
34598 .mr(4)
34599 .nr(8)
34600 .kr(1)
34601 .sr(1)
34602 .m(4)
34603 .n(8)
34604 .k(4)
34605 .cm_stride(11)
34606 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
34607 }
34608 #endif // XNN_ARCH_WASMRELAXEDSIMD
34609
34610
34611 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,k_eq_4)34612 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, k_eq_4) {
34613 GemmMicrokernelTester()
34614 .mr(4)
34615 .nr(8)
34616 .kr(1)
34617 .sr(1)
34618 .m(4)
34619 .n(8)
34620 .k(4)
34621 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34622 }
34623
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,strided_cn)34624 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, strided_cn) {
34625 GemmMicrokernelTester()
34626 .mr(4)
34627 .nr(8)
34628 .kr(1)
34629 .sr(1)
34630 .m(4)
34631 .n(8)
34632 .k(4)
34633 .cn_stride(11)
34634 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34635 }
34636
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile)34637 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile) {
34638 for (uint32_t n = 1; n <= 8; n++) {
34639 for (uint32_t m = 1; m <= 4; m++) {
34640 GemmMicrokernelTester()
34641 .mr(4)
34642 .nr(8)
34643 .kr(1)
34644 .sr(1)
34645 .m(m)
34646 .n(n)
34647 .k(4)
34648 .iterations(1)
34649 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34650 }
34651 }
34652 }
34653
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile_m)34654 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile_m) {
34655 for (uint32_t m = 1; m <= 4; m++) {
34656 GemmMicrokernelTester()
34657 .mr(4)
34658 .nr(8)
34659 .kr(1)
34660 .sr(1)
34661 .m(m)
34662 .n(8)
34663 .k(4)
34664 .iterations(1)
34665 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34666 }
34667 }
34668
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,k_eq_4_subtile_n)34669 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, k_eq_4_subtile_n) {
34670 for (uint32_t n = 1; n <= 8; n++) {
34671 GemmMicrokernelTester()
34672 .mr(4)
34673 .nr(8)
34674 .kr(1)
34675 .sr(1)
34676 .m(4)
34677 .n(n)
34678 .k(4)
34679 .iterations(1)
34680 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34681 }
34682 }
34683
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,k_lt_4)34684 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, k_lt_4) {
34685 for (size_t k = 1; k < 4; k++) {
34686 GemmMicrokernelTester()
34687 .mr(4)
34688 .nr(8)
34689 .kr(1)
34690 .sr(1)
34691 .m(4)
34692 .n(8)
34693 .k(k)
34694 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34695 }
34696 }
34697
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,k_lt_4_subtile)34698 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, k_lt_4_subtile) {
34699 for (size_t k = 1; k < 4; k++) {
34700 for (uint32_t n = 1; n <= 8; n++) {
34701 for (uint32_t m = 1; m <= 4; m++) {
34702 GemmMicrokernelTester()
34703 .mr(4)
34704 .nr(8)
34705 .kr(1)
34706 .sr(1)
34707 .m(m)
34708 .n(n)
34709 .k(k)
34710 .iterations(1)
34711 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34712 }
34713 }
34714 }
34715 }
34716
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,k_gt_4)34717 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, k_gt_4) {
34718 for (size_t k = 5; k < 8; k++) {
34719 GemmMicrokernelTester()
34720 .mr(4)
34721 .nr(8)
34722 .kr(1)
34723 .sr(1)
34724 .m(4)
34725 .n(8)
34726 .k(k)
34727 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34728 }
34729 }
34730
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,k_gt_4_subtile)34731 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, k_gt_4_subtile) {
34732 for (size_t k = 5; k < 8; k++) {
34733 for (uint32_t n = 1; n <= 8; n++) {
34734 for (uint32_t m = 1; m <= 4; m++) {
34735 GemmMicrokernelTester()
34736 .mr(4)
34737 .nr(8)
34738 .kr(1)
34739 .sr(1)
34740 .m(m)
34741 .n(n)
34742 .k(k)
34743 .iterations(1)
34744 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34745 }
34746 }
34747 }
34748 }
34749
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,k_div_4)34750 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, k_div_4) {
34751 for (size_t k = 8; k <= 40; k += 4) {
34752 GemmMicrokernelTester()
34753 .mr(4)
34754 .nr(8)
34755 .kr(1)
34756 .sr(1)
34757 .m(4)
34758 .n(8)
34759 .k(k)
34760 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34761 }
34762 }
34763
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,k_div_4_subtile)34764 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, k_div_4_subtile) {
34765 for (size_t k = 8; k <= 40; k += 4) {
34766 for (uint32_t n = 1; n <= 8; n++) {
34767 for (uint32_t m = 1; m <= 4; m++) {
34768 GemmMicrokernelTester()
34769 .mr(4)
34770 .nr(8)
34771 .kr(1)
34772 .sr(1)
34773 .m(m)
34774 .n(n)
34775 .k(k)
34776 .iterations(1)
34777 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34778 }
34779 }
34780 }
34781 }
34782
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,n_gt_8)34783 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, n_gt_8) {
34784 for (uint32_t n = 9; n < 16; n++) {
34785 for (size_t k = 1; k <= 20; k += 5) {
34786 GemmMicrokernelTester()
34787 .mr(4)
34788 .nr(8)
34789 .kr(1)
34790 .sr(1)
34791 .m(4)
34792 .n(n)
34793 .k(k)
34794 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34795 }
34796 }
34797 }
34798
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_strided_cn)34799 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_strided_cn) {
34800 for (uint32_t n = 9; n < 16; n++) {
34801 for (size_t k = 1; k <= 20; k += 5) {
34802 GemmMicrokernelTester()
34803 .mr(4)
34804 .nr(8)
34805 .kr(1)
34806 .sr(1)
34807 .m(4)
34808 .n(n)
34809 .k(k)
34810 .cn_stride(11)
34811 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34812 }
34813 }
34814 }
34815
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_subtile)34816 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_subtile) {
34817 for (uint32_t n = 9; n < 16; n++) {
34818 for (size_t k = 1; k <= 20; k += 5) {
34819 for (uint32_t m = 1; m <= 4; m++) {
34820 GemmMicrokernelTester()
34821 .mr(4)
34822 .nr(8)
34823 .kr(1)
34824 .sr(1)
34825 .m(m)
34826 .n(n)
34827 .k(k)
34828 .iterations(1)
34829 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34830 }
34831 }
34832 }
34833 }
34834
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,n_div_8)34835 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, n_div_8) {
34836 for (uint32_t n = 16; n <= 24; n += 8) {
34837 for (size_t k = 1; k <= 20; k += 5) {
34838 GemmMicrokernelTester()
34839 .mr(4)
34840 .nr(8)
34841 .kr(1)
34842 .sr(1)
34843 .m(4)
34844 .n(n)
34845 .k(k)
34846 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34847 }
34848 }
34849 }
34850
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,n_div_8_strided_cn)34851 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, n_div_8_strided_cn) {
34852 for (uint32_t n = 16; n <= 24; n += 8) {
34853 for (size_t k = 1; k <= 20; k += 5) {
34854 GemmMicrokernelTester()
34855 .mr(4)
34856 .nr(8)
34857 .kr(1)
34858 .sr(1)
34859 .m(4)
34860 .n(n)
34861 .k(k)
34862 .cn_stride(11)
34863 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34864 }
34865 }
34866 }
34867
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,n_div_8_subtile)34868 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, n_div_8_subtile) {
34869 for (uint32_t n = 16; n <= 24; n += 8) {
34870 for (size_t k = 1; k <= 20; k += 5) {
34871 for (uint32_t m = 1; m <= 4; m++) {
34872 GemmMicrokernelTester()
34873 .mr(4)
34874 .nr(8)
34875 .kr(1)
34876 .sr(1)
34877 .m(m)
34878 .n(n)
34879 .k(k)
34880 .iterations(1)
34881 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34882 }
34883 }
34884 }
34885 }
34886
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,small_kernel)34887 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, small_kernel) {
34888 for (size_t k = 1; k <= 20; k += 5) {
34889 GemmMicrokernelTester()
34890 .mr(4)
34891 .nr(8)
34892 .kr(1)
34893 .sr(1)
34894 .m(4)
34895 .n(8)
34896 .k(k)
34897 .ks(3)
34898 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34899 }
34900 }
34901
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,small_kernel_subtile)34902 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, small_kernel_subtile) {
34903 for (size_t k = 1; k <= 20; k += 5) {
34904 for (uint32_t n = 1; n <= 8; n++) {
34905 for (uint32_t m = 1; m <= 4; m++) {
34906 GemmMicrokernelTester()
34907 .mr(4)
34908 .nr(8)
34909 .kr(1)
34910 .sr(1)
34911 .m(m)
34912 .n(n)
34913 .k(k)
34914 .ks(3)
34915 .iterations(1)
34916 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34917 }
34918 }
34919 }
34920 }
34921
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,n_gt_8_small_kernel)34922 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, n_gt_8_small_kernel) {
34923 for (uint32_t n = 9; n < 16; n++) {
34924 for (size_t k = 1; k <= 20; k += 5) {
34925 GemmMicrokernelTester()
34926 .mr(4)
34927 .nr(8)
34928 .kr(1)
34929 .sr(1)
34930 .m(4)
34931 .n(n)
34932 .k(k)
34933 .ks(3)
34934 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34935 }
34936 }
34937 }
34938
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,n_div_8_small_kernel)34939 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, n_div_8_small_kernel) {
34940 for (uint32_t n = 16; n <= 24; n += 8) {
34941 for (size_t k = 1; k <= 20; k += 5) {
34942 GemmMicrokernelTester()
34943 .mr(4)
34944 .nr(8)
34945 .kr(1)
34946 .sr(1)
34947 .m(4)
34948 .n(n)
34949 .k(k)
34950 .ks(3)
34951 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34952 }
34953 }
34954 }
34955
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,strided_cm_subtile)34956 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, strided_cm_subtile) {
34957 for (size_t k = 1; k <= 20; k += 5) {
34958 for (uint32_t n = 1; n <= 8; n++) {
34959 for (uint32_t m = 1; m <= 4; m++) {
34960 GemmMicrokernelTester()
34961 .mr(4)
34962 .nr(8)
34963 .kr(1)
34964 .sr(1)
34965 .m(m)
34966 .n(n)
34967 .k(k)
34968 .cm_stride(11)
34969 .iterations(1)
34970 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34971 }
34972 }
34973 }
34974 }
34975
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,a_offset)34976 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, a_offset) {
34977 for (size_t k = 1; k <= 20; k += 5) {
34978 GemmMicrokernelTester()
34979 .mr(4)
34980 .nr(8)
34981 .kr(1)
34982 .sr(1)
34983 .m(4)
34984 .n(8)
34985 .k(k)
34986 .ks(3)
34987 .a_offset(83)
34988 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
34989 }
34990 }
34991
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,zero)34992 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, zero) {
34993 for (size_t k = 1; k <= 20; k += 5) {
34994 for (uint32_t mz = 0; mz < 4; mz++) {
34995 GemmMicrokernelTester()
34996 .mr(4)
34997 .nr(8)
34998 .kr(1)
34999 .sr(1)
35000 .m(4)
35001 .n(8)
35002 .k(k)
35003 .ks(3)
35004 .a_offset(83)
35005 .zero_index(mz)
35006 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35007 }
35008 }
35009 }
35010
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,qmin)35011 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, qmin) {
35012 GemmMicrokernelTester()
35013 .mr(4)
35014 .nr(8)
35015 .kr(1)
35016 .sr(1)
35017 .m(4)
35018 .n(8)
35019 .k(4)
35020 .qmin(128)
35021 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35022 }
35023
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,qmax)35024 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, qmax) {
35025 GemmMicrokernelTester()
35026 .mr(4)
35027 .nr(8)
35028 .kr(1)
35029 .sr(1)
35030 .m(4)
35031 .n(8)
35032 .k(4)
35033 .qmax(128)
35034 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35035 }
35036
TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT,strided_cm)35037 TEST(F32_IGEMM_MINMAX_4X8__WASMRELAXEDSIMD_SPLAT, strided_cm) {
35038 GemmMicrokernelTester()
35039 .mr(4)
35040 .nr(8)
35041 .kr(1)
35042 .sr(1)
35043 .m(4)
35044 .n(8)
35045 .k(4)
35046 .cm_stride(11)
35047 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, xnn_init_f32_minmax_wasmsimd_params);
35048 }
35049 #endif // XNN_ARCH_WASMRELAXEDSIMD
35050
35051
35052 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)35053 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
35054 GemmMicrokernelTester()
35055 .mr(4)
35056 .nr(8)
35057 .kr(1)
35058 .sr(4)
35059 .m(4)
35060 .n(8)
35061 .k(4)
35062 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35063 }
35064
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,strided_cn)35065 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
35066 GemmMicrokernelTester()
35067 .mr(4)
35068 .nr(8)
35069 .kr(1)
35070 .sr(4)
35071 .m(4)
35072 .n(8)
35073 .k(4)
35074 .cn_stride(11)
35075 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35076 }
35077
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)35078 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
35079 for (uint32_t n = 1; n <= 8; n++) {
35080 for (uint32_t m = 1; m <= 4; m++) {
35081 GemmMicrokernelTester()
35082 .mr(4)
35083 .nr(8)
35084 .kr(1)
35085 .sr(4)
35086 .m(m)
35087 .n(n)
35088 .k(4)
35089 .iterations(1)
35090 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35091 }
35092 }
35093 }
35094
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)35095 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
35096 for (uint32_t m = 1; m <= 4; m++) {
35097 GemmMicrokernelTester()
35098 .mr(4)
35099 .nr(8)
35100 .kr(1)
35101 .sr(4)
35102 .m(m)
35103 .n(8)
35104 .k(4)
35105 .iterations(1)
35106 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35107 }
35108 }
35109
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)35110 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
35111 for (uint32_t n = 1; n <= 8; n++) {
35112 GemmMicrokernelTester()
35113 .mr(4)
35114 .nr(8)
35115 .kr(1)
35116 .sr(4)
35117 .m(4)
35118 .n(n)
35119 .k(4)
35120 .iterations(1)
35121 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35122 }
35123 }
35124
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)35125 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
35126 for (size_t k = 1; k < 4; k++) {
35127 GemmMicrokernelTester()
35128 .mr(4)
35129 .nr(8)
35130 .kr(1)
35131 .sr(4)
35132 .m(4)
35133 .n(8)
35134 .k(k)
35135 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35136 }
35137 }
35138
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)35139 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
35140 for (size_t k = 1; k < 4; k++) {
35141 for (uint32_t n = 1; n <= 8; n++) {
35142 for (uint32_t m = 1; m <= 4; m++) {
35143 GemmMicrokernelTester()
35144 .mr(4)
35145 .nr(8)
35146 .kr(1)
35147 .sr(4)
35148 .m(m)
35149 .n(n)
35150 .k(k)
35151 .iterations(1)
35152 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35153 }
35154 }
35155 }
35156 }
35157
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)35158 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
35159 for (size_t k = 5; k < 8; k++) {
35160 GemmMicrokernelTester()
35161 .mr(4)
35162 .nr(8)
35163 .kr(1)
35164 .sr(4)
35165 .m(4)
35166 .n(8)
35167 .k(k)
35168 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35169 }
35170 }
35171
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)35172 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
35173 for (size_t k = 5; k < 8; k++) {
35174 for (uint32_t n = 1; n <= 8; n++) {
35175 for (uint32_t m = 1; m <= 4; m++) {
35176 GemmMicrokernelTester()
35177 .mr(4)
35178 .nr(8)
35179 .kr(1)
35180 .sr(4)
35181 .m(m)
35182 .n(n)
35183 .k(k)
35184 .iterations(1)
35185 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35186 }
35187 }
35188 }
35189 }
35190
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,k_div_4)35191 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
35192 for (size_t k = 8; k <= 40; k += 4) {
35193 GemmMicrokernelTester()
35194 .mr(4)
35195 .nr(8)
35196 .kr(1)
35197 .sr(4)
35198 .m(4)
35199 .n(8)
35200 .k(k)
35201 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35202 }
35203 }
35204
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)35205 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
35206 for (size_t k = 8; k <= 40; k += 4) {
35207 for (uint32_t n = 1; n <= 8; n++) {
35208 for (uint32_t m = 1; m <= 4; m++) {
35209 GemmMicrokernelTester()
35210 .mr(4)
35211 .nr(8)
35212 .kr(1)
35213 .sr(4)
35214 .m(m)
35215 .n(n)
35216 .k(k)
35217 .iterations(1)
35218 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35219 }
35220 }
35221 }
35222 }
35223
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)35224 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
35225 for (uint32_t n = 9; n < 16; n++) {
35226 for (size_t k = 1; k <= 20; k += 5) {
35227 GemmMicrokernelTester()
35228 .mr(4)
35229 .nr(8)
35230 .kr(1)
35231 .sr(4)
35232 .m(4)
35233 .n(n)
35234 .k(k)
35235 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35236 }
35237 }
35238 }
35239
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)35240 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
35241 for (uint32_t n = 9; n < 16; n++) {
35242 for (size_t k = 1; k <= 20; k += 5) {
35243 GemmMicrokernelTester()
35244 .mr(4)
35245 .nr(8)
35246 .kr(1)
35247 .sr(4)
35248 .m(4)
35249 .n(n)
35250 .k(k)
35251 .cn_stride(11)
35252 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35253 }
35254 }
35255 }
35256
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)35257 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
35258 for (uint32_t n = 9; n < 16; n++) {
35259 for (size_t k = 1; k <= 20; k += 5) {
35260 for (uint32_t m = 1; m <= 4; m++) {
35261 GemmMicrokernelTester()
35262 .mr(4)
35263 .nr(8)
35264 .kr(1)
35265 .sr(4)
35266 .m(m)
35267 .n(n)
35268 .k(k)
35269 .iterations(1)
35270 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35271 }
35272 }
35273 }
35274 }
35275
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,n_div_8)35276 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
35277 for (uint32_t n = 16; n <= 24; n += 8) {
35278 for (size_t k = 1; k <= 20; k += 5) {
35279 GemmMicrokernelTester()
35280 .mr(4)
35281 .nr(8)
35282 .kr(1)
35283 .sr(4)
35284 .m(4)
35285 .n(n)
35286 .k(k)
35287 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35288 }
35289 }
35290 }
35291
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)35292 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
35293 for (uint32_t n = 16; n <= 24; n += 8) {
35294 for (size_t k = 1; k <= 20; k += 5) {
35295 GemmMicrokernelTester()
35296 .mr(4)
35297 .nr(8)
35298 .kr(1)
35299 .sr(4)
35300 .m(4)
35301 .n(n)
35302 .k(k)
35303 .cn_stride(11)
35304 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35305 }
35306 }
35307 }
35308
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)35309 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
35310 for (uint32_t n = 16; n <= 24; n += 8) {
35311 for (size_t k = 1; k <= 20; k += 5) {
35312 for (uint32_t m = 1; m <= 4; m++) {
35313 GemmMicrokernelTester()
35314 .mr(4)
35315 .nr(8)
35316 .kr(1)
35317 .sr(4)
35318 .m(m)
35319 .n(n)
35320 .k(k)
35321 .iterations(1)
35322 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35323 }
35324 }
35325 }
35326 }
35327
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,small_kernel)35328 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, small_kernel) {
35329 for (size_t k = 1; k <= 20; k += 5) {
35330 GemmMicrokernelTester()
35331 .mr(4)
35332 .nr(8)
35333 .kr(1)
35334 .sr(4)
35335 .m(4)
35336 .n(8)
35337 .k(k)
35338 .ks(3)
35339 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35340 }
35341 }
35342
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,small_kernel_subtile)35343 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, small_kernel_subtile) {
35344 for (size_t k = 1; k <= 20; k += 5) {
35345 for (uint32_t n = 1; n <= 8; n++) {
35346 for (uint32_t m = 1; m <= 4; m++) {
35347 GemmMicrokernelTester()
35348 .mr(4)
35349 .nr(8)
35350 .kr(1)
35351 .sr(4)
35352 .m(m)
35353 .n(n)
35354 .k(k)
35355 .ks(3)
35356 .iterations(1)
35357 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35358 }
35359 }
35360 }
35361 }
35362
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_small_kernel)35363 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_small_kernel) {
35364 for (uint32_t n = 9; n < 16; n++) {
35365 for (size_t k = 1; k <= 20; k += 5) {
35366 GemmMicrokernelTester()
35367 .mr(4)
35368 .nr(8)
35369 .kr(1)
35370 .sr(4)
35371 .m(4)
35372 .n(n)
35373 .k(k)
35374 .ks(3)
35375 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35376 }
35377 }
35378 }
35379
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,n_div_8_small_kernel)35380 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, n_div_8_small_kernel) {
35381 for (uint32_t n = 16; n <= 24; n += 8) {
35382 for (size_t k = 1; k <= 20; k += 5) {
35383 GemmMicrokernelTester()
35384 .mr(4)
35385 .nr(8)
35386 .kr(1)
35387 .sr(4)
35388 .m(4)
35389 .n(n)
35390 .k(k)
35391 .ks(3)
35392 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35393 }
35394 }
35395 }
35396
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)35397 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
35398 for (size_t k = 1; k <= 20; k += 5) {
35399 for (uint32_t n = 1; n <= 8; n++) {
35400 for (uint32_t m = 1; m <= 4; m++) {
35401 GemmMicrokernelTester()
35402 .mr(4)
35403 .nr(8)
35404 .kr(1)
35405 .sr(4)
35406 .m(m)
35407 .n(n)
35408 .k(k)
35409 .cm_stride(11)
35410 .iterations(1)
35411 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35412 }
35413 }
35414 }
35415 }
35416
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,a_offset)35417 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, a_offset) {
35418 for (size_t k = 1; k <= 20; k += 5) {
35419 GemmMicrokernelTester()
35420 .mr(4)
35421 .nr(8)
35422 .kr(1)
35423 .sr(4)
35424 .m(4)
35425 .n(8)
35426 .k(k)
35427 .ks(3)
35428 .a_offset(83)
35429 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35430 }
35431 }
35432
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,zero)35433 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, zero) {
35434 for (size_t k = 1; k <= 20; k += 5) {
35435 for (uint32_t mz = 0; mz < 4; mz++) {
35436 GemmMicrokernelTester()
35437 .mr(4)
35438 .nr(8)
35439 .kr(1)
35440 .sr(4)
35441 .m(4)
35442 .n(8)
35443 .k(k)
35444 .ks(3)
35445 .a_offset(83)
35446 .zero_index(mz)
35447 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35448 }
35449 }
35450 }
35451
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,qmin)35452 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, qmin) {
35453 GemmMicrokernelTester()
35454 .mr(4)
35455 .nr(8)
35456 .kr(1)
35457 .sr(4)
35458 .m(4)
35459 .n(8)
35460 .k(4)
35461 .qmin(128)
35462 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35463 }
35464
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,qmax)35465 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, qmax) {
35466 GemmMicrokernelTester()
35467 .mr(4)
35468 .nr(8)
35469 .kr(1)
35470 .sr(4)
35471 .m(4)
35472 .n(8)
35473 .k(4)
35474 .qmax(128)
35475 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35476 }
35477
TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA,strided_cm)35478 TEST(F32_IGEMM_MINMAX_4X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
35479 GemmMicrokernelTester()
35480 .mr(4)
35481 .nr(8)
35482 .kr(1)
35483 .sr(4)
35484 .m(4)
35485 .n(8)
35486 .k(4)
35487 .cm_stride(11)
35488 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, xnn_init_f32_minmax_wasmsimd_params);
35489 }
35490 #endif // XNN_ARCH_WASMRELAXEDSIMD
35491
35492
35493 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)35494 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
35495 GemmMicrokernelTester()
35496 .mr(5)
35497 .nr(8)
35498 .kr(1)
35499 .sr(1)
35500 .m(5)
35501 .n(8)
35502 .k(1)
35503 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35504 }
35505
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)35506 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
35507 GemmMicrokernelTester()
35508 .mr(5)
35509 .nr(8)
35510 .kr(1)
35511 .sr(1)
35512 .m(5)
35513 .n(8)
35514 .k(1)
35515 .cn_stride(11)
35516 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35517 }
35518
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)35519 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
35520 for (uint32_t n = 1; n <= 8; n++) {
35521 for (uint32_t m = 1; m <= 5; m++) {
35522 GemmMicrokernelTester()
35523 .mr(5)
35524 .nr(8)
35525 .kr(1)
35526 .sr(1)
35527 .m(m)
35528 .n(n)
35529 .k(1)
35530 .iterations(1)
35531 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35532 }
35533 }
35534 }
35535
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)35536 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
35537 for (uint32_t m = 1; m <= 5; m++) {
35538 GemmMicrokernelTester()
35539 .mr(5)
35540 .nr(8)
35541 .kr(1)
35542 .sr(1)
35543 .m(m)
35544 .n(8)
35545 .k(1)
35546 .iterations(1)
35547 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35548 }
35549 }
35550
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)35551 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
35552 for (uint32_t n = 1; n <= 8; n++) {
35553 GemmMicrokernelTester()
35554 .mr(5)
35555 .nr(8)
35556 .kr(1)
35557 .sr(1)
35558 .m(5)
35559 .n(n)
35560 .k(1)
35561 .iterations(1)
35562 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35563 }
35564 }
35565
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)35566 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
35567 for (size_t k = 2; k < 10; k++) {
35568 GemmMicrokernelTester()
35569 .mr(5)
35570 .nr(8)
35571 .kr(1)
35572 .sr(1)
35573 .m(5)
35574 .n(8)
35575 .k(k)
35576 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35577 }
35578 }
35579
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)35580 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
35581 for (size_t k = 2; k < 10; k++) {
35582 for (uint32_t n = 1; n <= 8; n++) {
35583 for (uint32_t m = 1; m <= 5; m++) {
35584 GemmMicrokernelTester()
35585 .mr(5)
35586 .nr(8)
35587 .kr(1)
35588 .sr(1)
35589 .m(m)
35590 .n(n)
35591 .k(k)
35592 .iterations(1)
35593 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35594 }
35595 }
35596 }
35597 }
35598
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)35599 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
35600 for (uint32_t n = 9; n < 16; n++) {
35601 for (size_t k = 1; k <= 5; k += 2) {
35602 GemmMicrokernelTester()
35603 .mr(5)
35604 .nr(8)
35605 .kr(1)
35606 .sr(1)
35607 .m(5)
35608 .n(n)
35609 .k(k)
35610 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35611 }
35612 }
35613 }
35614
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)35615 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
35616 for (uint32_t n = 9; n < 16; n++) {
35617 for (size_t k = 1; k <= 5; k += 2) {
35618 GemmMicrokernelTester()
35619 .mr(5)
35620 .nr(8)
35621 .kr(1)
35622 .sr(1)
35623 .m(5)
35624 .n(n)
35625 .k(k)
35626 .cn_stride(11)
35627 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35628 }
35629 }
35630 }
35631
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)35632 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
35633 for (uint32_t n = 9; n < 16; n++) {
35634 for (size_t k = 1; k <= 5; k += 2) {
35635 for (uint32_t m = 1; m <= 5; m++) {
35636 GemmMicrokernelTester()
35637 .mr(5)
35638 .nr(8)
35639 .kr(1)
35640 .sr(1)
35641 .m(m)
35642 .n(n)
35643 .k(k)
35644 .iterations(1)
35645 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35646 }
35647 }
35648 }
35649 }
35650
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)35651 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
35652 for (uint32_t n = 16; n <= 24; n += 8) {
35653 for (size_t k = 1; k <= 5; k += 2) {
35654 GemmMicrokernelTester()
35655 .mr(5)
35656 .nr(8)
35657 .kr(1)
35658 .sr(1)
35659 .m(5)
35660 .n(n)
35661 .k(k)
35662 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35663 }
35664 }
35665 }
35666
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)35667 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
35668 for (uint32_t n = 16; n <= 24; n += 8) {
35669 for (size_t k = 1; k <= 5; k += 2) {
35670 GemmMicrokernelTester()
35671 .mr(5)
35672 .nr(8)
35673 .kr(1)
35674 .sr(1)
35675 .m(5)
35676 .n(n)
35677 .k(k)
35678 .cn_stride(11)
35679 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35680 }
35681 }
35682 }
35683
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)35684 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
35685 for (uint32_t n = 16; n <= 24; n += 8) {
35686 for (size_t k = 1; k <= 5; k += 2) {
35687 for (uint32_t m = 1; m <= 5; m++) {
35688 GemmMicrokernelTester()
35689 .mr(5)
35690 .nr(8)
35691 .kr(1)
35692 .sr(1)
35693 .m(m)
35694 .n(n)
35695 .k(k)
35696 .iterations(1)
35697 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35698 }
35699 }
35700 }
35701 }
35702
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel)35703 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel) {
35704 for (size_t k = 1; k <= 5; k += 2) {
35705 GemmMicrokernelTester()
35706 .mr(5)
35707 .nr(8)
35708 .kr(1)
35709 .sr(1)
35710 .m(5)
35711 .n(8)
35712 .k(k)
35713 .ks(3)
35714 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35715 }
35716 }
35717
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel_subtile)35718 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel_subtile) {
35719 for (size_t k = 1; k <= 5; k += 2) {
35720 for (uint32_t n = 1; n <= 8; n++) {
35721 for (uint32_t m = 1; m <= 5; m++) {
35722 GemmMicrokernelTester()
35723 .mr(5)
35724 .nr(8)
35725 .kr(1)
35726 .sr(1)
35727 .m(m)
35728 .n(n)
35729 .k(k)
35730 .ks(3)
35731 .iterations(1)
35732 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35733 }
35734 }
35735 }
35736 }
35737
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_small_kernel)35738 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_small_kernel) {
35739 for (uint32_t n = 9; n < 16; n++) {
35740 for (size_t k = 1; k <= 5; k += 2) {
35741 GemmMicrokernelTester()
35742 .mr(5)
35743 .nr(8)
35744 .kr(1)
35745 .sr(1)
35746 .m(5)
35747 .n(n)
35748 .k(k)
35749 .ks(3)
35750 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35751 }
35752 }
35753 }
35754
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_small_kernel)35755 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_small_kernel) {
35756 for (uint32_t n = 16; n <= 24; n += 8) {
35757 for (size_t k = 1; k <= 5; k += 2) {
35758 GemmMicrokernelTester()
35759 .mr(5)
35760 .nr(8)
35761 .kr(1)
35762 .sr(1)
35763 .m(5)
35764 .n(n)
35765 .k(k)
35766 .ks(3)
35767 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35768 }
35769 }
35770 }
35771
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)35772 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
35773 for (size_t k = 1; k <= 5; k += 2) {
35774 for (uint32_t n = 1; n <= 8; n++) {
35775 for (uint32_t m = 1; m <= 5; m++) {
35776 GemmMicrokernelTester()
35777 .mr(5)
35778 .nr(8)
35779 .kr(1)
35780 .sr(1)
35781 .m(m)
35782 .n(n)
35783 .k(k)
35784 .cm_stride(11)
35785 .iterations(1)
35786 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35787 }
35788 }
35789 }
35790 }
35791
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,a_offset)35792 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, a_offset) {
35793 for (size_t k = 1; k <= 5; k += 2) {
35794 GemmMicrokernelTester()
35795 .mr(5)
35796 .nr(8)
35797 .kr(1)
35798 .sr(1)
35799 .m(5)
35800 .n(8)
35801 .k(k)
35802 .ks(3)
35803 .a_offset(29)
35804 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35805 }
35806 }
35807
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,zero)35808 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, zero) {
35809 for (size_t k = 1; k <= 5; k += 2) {
35810 for (uint32_t mz = 0; mz < 5; mz++) {
35811 GemmMicrokernelTester()
35812 .mr(5)
35813 .nr(8)
35814 .kr(1)
35815 .sr(1)
35816 .m(5)
35817 .n(8)
35818 .k(k)
35819 .ks(3)
35820 .a_offset(29)
35821 .zero_index(mz)
35822 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35823 }
35824 }
35825 }
35826
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,qmin)35827 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, qmin) {
35828 GemmMicrokernelTester()
35829 .mr(5)
35830 .nr(8)
35831 .kr(1)
35832 .sr(1)
35833 .m(5)
35834 .n(8)
35835 .k(1)
35836 .qmin(128)
35837 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35838 }
35839
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,qmax)35840 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, qmax) {
35841 GemmMicrokernelTester()
35842 .mr(5)
35843 .nr(8)
35844 .kr(1)
35845 .sr(1)
35846 .m(5)
35847 .n(8)
35848 .k(1)
35849 .qmax(128)
35850 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35851 }
35852
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)35853 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
35854 GemmMicrokernelTester()
35855 .mr(5)
35856 .nr(8)
35857 .kr(1)
35858 .sr(1)
35859 .m(5)
35860 .n(8)
35861 .k(1)
35862 .cm_stride(11)
35863 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
35864 }
35865 #endif // XNN_ARCH_WASMRELAXEDSIMD
35866
35867
35868 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)35869 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
35870 GemmMicrokernelTester()
35871 .mr(5)
35872 .nr(8)
35873 .kr(1)
35874 .sr(1)
35875 .m(5)
35876 .n(8)
35877 .k(4)
35878 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35879 }
35880
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)35881 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
35882 GemmMicrokernelTester()
35883 .mr(5)
35884 .nr(8)
35885 .kr(1)
35886 .sr(1)
35887 .m(5)
35888 .n(8)
35889 .k(4)
35890 .cn_stride(11)
35891 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35892 }
35893
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)35894 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
35895 for (uint32_t n = 1; n <= 8; n++) {
35896 for (uint32_t m = 1; m <= 5; m++) {
35897 GemmMicrokernelTester()
35898 .mr(5)
35899 .nr(8)
35900 .kr(1)
35901 .sr(1)
35902 .m(m)
35903 .n(n)
35904 .k(4)
35905 .iterations(1)
35906 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35907 }
35908 }
35909 }
35910
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)35911 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
35912 for (uint32_t m = 1; m <= 5; m++) {
35913 GemmMicrokernelTester()
35914 .mr(5)
35915 .nr(8)
35916 .kr(1)
35917 .sr(1)
35918 .m(m)
35919 .n(8)
35920 .k(4)
35921 .iterations(1)
35922 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35923 }
35924 }
35925
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)35926 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
35927 for (uint32_t n = 1; n <= 8; n++) {
35928 GemmMicrokernelTester()
35929 .mr(5)
35930 .nr(8)
35931 .kr(1)
35932 .sr(1)
35933 .m(5)
35934 .n(n)
35935 .k(4)
35936 .iterations(1)
35937 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35938 }
35939 }
35940
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)35941 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
35942 for (size_t k = 1; k < 4; k++) {
35943 GemmMicrokernelTester()
35944 .mr(5)
35945 .nr(8)
35946 .kr(1)
35947 .sr(1)
35948 .m(5)
35949 .n(8)
35950 .k(k)
35951 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35952 }
35953 }
35954
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)35955 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
35956 for (size_t k = 1; k < 4; k++) {
35957 for (uint32_t n = 1; n <= 8; n++) {
35958 for (uint32_t m = 1; m <= 5; m++) {
35959 GemmMicrokernelTester()
35960 .mr(5)
35961 .nr(8)
35962 .kr(1)
35963 .sr(1)
35964 .m(m)
35965 .n(n)
35966 .k(k)
35967 .iterations(1)
35968 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35969 }
35970 }
35971 }
35972 }
35973
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)35974 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
35975 for (size_t k = 5; k < 8; k++) {
35976 GemmMicrokernelTester()
35977 .mr(5)
35978 .nr(8)
35979 .kr(1)
35980 .sr(1)
35981 .m(5)
35982 .n(8)
35983 .k(k)
35984 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
35985 }
35986 }
35987
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)35988 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
35989 for (size_t k = 5; k < 8; k++) {
35990 for (uint32_t n = 1; n <= 8; n++) {
35991 for (uint32_t m = 1; m <= 5; m++) {
35992 GemmMicrokernelTester()
35993 .mr(5)
35994 .nr(8)
35995 .kr(1)
35996 .sr(1)
35997 .m(m)
35998 .n(n)
35999 .k(k)
36000 .iterations(1)
36001 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36002 }
36003 }
36004 }
36005 }
36006
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)36007 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
36008 for (size_t k = 8; k <= 40; k += 4) {
36009 GemmMicrokernelTester()
36010 .mr(5)
36011 .nr(8)
36012 .kr(1)
36013 .sr(1)
36014 .m(5)
36015 .n(8)
36016 .k(k)
36017 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36018 }
36019 }
36020
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)36021 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
36022 for (size_t k = 8; k <= 40; k += 4) {
36023 for (uint32_t n = 1; n <= 8; n++) {
36024 for (uint32_t m = 1; m <= 5; m++) {
36025 GemmMicrokernelTester()
36026 .mr(5)
36027 .nr(8)
36028 .kr(1)
36029 .sr(1)
36030 .m(m)
36031 .n(n)
36032 .k(k)
36033 .iterations(1)
36034 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36035 }
36036 }
36037 }
36038 }
36039
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)36040 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
36041 for (uint32_t n = 9; n < 16; n++) {
36042 for (size_t k = 1; k <= 20; k += 5) {
36043 GemmMicrokernelTester()
36044 .mr(5)
36045 .nr(8)
36046 .kr(1)
36047 .sr(1)
36048 .m(5)
36049 .n(n)
36050 .k(k)
36051 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36052 }
36053 }
36054 }
36055
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)36056 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
36057 for (uint32_t n = 9; n < 16; n++) {
36058 for (size_t k = 1; k <= 20; k += 5) {
36059 GemmMicrokernelTester()
36060 .mr(5)
36061 .nr(8)
36062 .kr(1)
36063 .sr(1)
36064 .m(5)
36065 .n(n)
36066 .k(k)
36067 .cn_stride(11)
36068 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36069 }
36070 }
36071 }
36072
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)36073 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
36074 for (uint32_t n = 9; n < 16; n++) {
36075 for (size_t k = 1; k <= 20; k += 5) {
36076 for (uint32_t m = 1; m <= 5; m++) {
36077 GemmMicrokernelTester()
36078 .mr(5)
36079 .nr(8)
36080 .kr(1)
36081 .sr(1)
36082 .m(m)
36083 .n(n)
36084 .k(k)
36085 .iterations(1)
36086 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36087 }
36088 }
36089 }
36090 }
36091
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)36092 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
36093 for (uint32_t n = 16; n <= 24; n += 8) {
36094 for (size_t k = 1; k <= 20; k += 5) {
36095 GemmMicrokernelTester()
36096 .mr(5)
36097 .nr(8)
36098 .kr(1)
36099 .sr(1)
36100 .m(5)
36101 .n(n)
36102 .k(k)
36103 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36104 }
36105 }
36106 }
36107
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)36108 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
36109 for (uint32_t n = 16; n <= 24; n += 8) {
36110 for (size_t k = 1; k <= 20; k += 5) {
36111 GemmMicrokernelTester()
36112 .mr(5)
36113 .nr(8)
36114 .kr(1)
36115 .sr(1)
36116 .m(5)
36117 .n(n)
36118 .k(k)
36119 .cn_stride(11)
36120 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36121 }
36122 }
36123 }
36124
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)36125 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
36126 for (uint32_t n = 16; n <= 24; n += 8) {
36127 for (size_t k = 1; k <= 20; k += 5) {
36128 for (uint32_t m = 1; m <= 5; m++) {
36129 GemmMicrokernelTester()
36130 .mr(5)
36131 .nr(8)
36132 .kr(1)
36133 .sr(1)
36134 .m(m)
36135 .n(n)
36136 .k(k)
36137 .iterations(1)
36138 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36139 }
36140 }
36141 }
36142 }
36143
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel)36144 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel) {
36145 for (size_t k = 1; k <= 20; k += 5) {
36146 GemmMicrokernelTester()
36147 .mr(5)
36148 .nr(8)
36149 .kr(1)
36150 .sr(1)
36151 .m(5)
36152 .n(8)
36153 .k(k)
36154 .ks(3)
36155 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36156 }
36157 }
36158
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel_subtile)36159 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel_subtile) {
36160 for (size_t k = 1; k <= 20; k += 5) {
36161 for (uint32_t n = 1; n <= 8; n++) {
36162 for (uint32_t m = 1; m <= 5; m++) {
36163 GemmMicrokernelTester()
36164 .mr(5)
36165 .nr(8)
36166 .kr(1)
36167 .sr(1)
36168 .m(m)
36169 .n(n)
36170 .k(k)
36171 .ks(3)
36172 .iterations(1)
36173 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36174 }
36175 }
36176 }
36177 }
36178
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_small_kernel)36179 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_small_kernel) {
36180 for (uint32_t n = 9; n < 16; n++) {
36181 for (size_t k = 1; k <= 20; k += 5) {
36182 GemmMicrokernelTester()
36183 .mr(5)
36184 .nr(8)
36185 .kr(1)
36186 .sr(1)
36187 .m(5)
36188 .n(n)
36189 .k(k)
36190 .ks(3)
36191 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36192 }
36193 }
36194 }
36195
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_small_kernel)36196 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_small_kernel) {
36197 for (uint32_t n = 16; n <= 24; n += 8) {
36198 for (size_t k = 1; k <= 20; k += 5) {
36199 GemmMicrokernelTester()
36200 .mr(5)
36201 .nr(8)
36202 .kr(1)
36203 .sr(1)
36204 .m(5)
36205 .n(n)
36206 .k(k)
36207 .ks(3)
36208 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36209 }
36210 }
36211 }
36212
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)36213 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
36214 for (size_t k = 1; k <= 20; k += 5) {
36215 for (uint32_t n = 1; n <= 8; n++) {
36216 for (uint32_t m = 1; m <= 5; m++) {
36217 GemmMicrokernelTester()
36218 .mr(5)
36219 .nr(8)
36220 .kr(1)
36221 .sr(1)
36222 .m(m)
36223 .n(n)
36224 .k(k)
36225 .cm_stride(11)
36226 .iterations(1)
36227 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36228 }
36229 }
36230 }
36231 }
36232
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,a_offset)36233 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, a_offset) {
36234 for (size_t k = 1; k <= 20; k += 5) {
36235 GemmMicrokernelTester()
36236 .mr(5)
36237 .nr(8)
36238 .kr(1)
36239 .sr(1)
36240 .m(5)
36241 .n(8)
36242 .k(k)
36243 .ks(3)
36244 .a_offset(103)
36245 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36246 }
36247 }
36248
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,zero)36249 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, zero) {
36250 for (size_t k = 1; k <= 20; k += 5) {
36251 for (uint32_t mz = 0; mz < 5; mz++) {
36252 GemmMicrokernelTester()
36253 .mr(5)
36254 .nr(8)
36255 .kr(1)
36256 .sr(1)
36257 .m(5)
36258 .n(8)
36259 .k(k)
36260 .ks(3)
36261 .a_offset(103)
36262 .zero_index(mz)
36263 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36264 }
36265 }
36266 }
36267
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,qmin)36268 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, qmin) {
36269 GemmMicrokernelTester()
36270 .mr(5)
36271 .nr(8)
36272 .kr(1)
36273 .sr(1)
36274 .m(5)
36275 .n(8)
36276 .k(4)
36277 .qmin(128)
36278 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36279 }
36280
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,qmax)36281 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, qmax) {
36282 GemmMicrokernelTester()
36283 .mr(5)
36284 .nr(8)
36285 .kr(1)
36286 .sr(1)
36287 .m(5)
36288 .n(8)
36289 .k(4)
36290 .qmax(128)
36291 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36292 }
36293
TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)36294 TEST(F32_IGEMM_MINMAX_5X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
36295 GemmMicrokernelTester()
36296 .mr(5)
36297 .nr(8)
36298 .kr(1)
36299 .sr(1)
36300 .m(5)
36301 .n(8)
36302 .k(4)
36303 .cm_stride(11)
36304 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36305 }
36306 #endif // XNN_ARCH_WASMRELAXEDSIMD
36307
36308
36309 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)36310 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
36311 GemmMicrokernelTester()
36312 .mr(6)
36313 .nr(8)
36314 .kr(1)
36315 .sr(1)
36316 .m(6)
36317 .n(8)
36318 .k(1)
36319 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
36320 }
36321
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)36322 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
36323 GemmMicrokernelTester()
36324 .mr(6)
36325 .nr(8)
36326 .kr(1)
36327 .sr(1)
36328 .m(6)
36329 .n(8)
36330 .k(1)
36331 .cn_stride(11)
36332 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
36333 }
36334
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)36335 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
36336 for (uint32_t n = 1; n <= 8; n++) {
36337 for (uint32_t m = 1; m <= 6; m++) {
36338 GemmMicrokernelTester()
36339 .mr(6)
36340 .nr(8)
36341 .kr(1)
36342 .sr(1)
36343 .m(m)
36344 .n(n)
36345 .k(1)
36346 .iterations(1)
36347 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
36348 }
36349 }
36350 }
36351
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)36352 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
36353 for (uint32_t m = 1; m <= 6; m++) {
36354 GemmMicrokernelTester()
36355 .mr(6)
36356 .nr(8)
36357 .kr(1)
36358 .sr(1)
36359 .m(m)
36360 .n(8)
36361 .k(1)
36362 .iterations(1)
36363 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
36364 }
36365 }
36366
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)36367 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
36368 for (uint32_t n = 1; n <= 8; n++) {
36369 GemmMicrokernelTester()
36370 .mr(6)
36371 .nr(8)
36372 .kr(1)
36373 .sr(1)
36374 .m(6)
36375 .n(n)
36376 .k(1)
36377 .iterations(1)
36378 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
36379 }
36380 }
36381
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)36382 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
36383 for (size_t k = 2; k < 10; k++) {
36384 GemmMicrokernelTester()
36385 .mr(6)
36386 .nr(8)
36387 .kr(1)
36388 .sr(1)
36389 .m(6)
36390 .n(8)
36391 .k(k)
36392 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
36393 }
36394 }
36395
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)36396 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
36397 for (size_t k = 2; k < 10; k++) {
36398 for (uint32_t n = 1; n <= 8; n++) {
36399 for (uint32_t m = 1; m <= 6; m++) {
36400 GemmMicrokernelTester()
36401 .mr(6)
36402 .nr(8)
36403 .kr(1)
36404 .sr(1)
36405 .m(m)
36406 .n(n)
36407 .k(k)
36408 .iterations(1)
36409 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
36410 }
36411 }
36412 }
36413 }
36414
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)36415 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
36416 for (uint32_t n = 9; n < 16; n++) {
36417 for (size_t k = 1; k <= 5; k += 2) {
36418 GemmMicrokernelTester()
36419 .mr(6)
36420 .nr(8)
36421 .kr(1)
36422 .sr(1)
36423 .m(6)
36424 .n(n)
36425 .k(k)
36426 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
36427 }
36428 }
36429 }
36430
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)36431 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
36432 for (uint32_t n = 9; n < 16; n++) {
36433 for (size_t k = 1; k <= 5; k += 2) {
36434 GemmMicrokernelTester()
36435 .mr(6)
36436 .nr(8)
36437 .kr(1)
36438 .sr(1)
36439 .m(6)
36440 .n(n)
36441 .k(k)
36442 .cn_stride(11)
36443 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
36444 }
36445 }
36446 }
36447
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)36448 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
36449 for (uint32_t n = 9; n < 16; n++) {
36450 for (size_t k = 1; k <= 5; k += 2) {
36451 for (uint32_t m = 1; m <= 6; m++) {
36452 GemmMicrokernelTester()
36453 .mr(6)
36454 .nr(8)
36455 .kr(1)
36456 .sr(1)
36457 .m(m)
36458 .n(n)
36459 .k(k)
36460 .iterations(1)
36461 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
36462 }
36463 }
36464 }
36465 }
36466
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)36467 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
36468 for (uint32_t n = 16; n <= 24; n += 8) {
36469 for (size_t k = 1; k <= 5; k += 2) {
36470 GemmMicrokernelTester()
36471 .mr(6)
36472 .nr(8)
36473 .kr(1)
36474 .sr(1)
36475 .m(6)
36476 .n(n)
36477 .k(k)
36478 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
36479 }
36480 }
36481 }
36482
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)36483 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
36484 for (uint32_t n = 16; n <= 24; n += 8) {
36485 for (size_t k = 1; k <= 5; k += 2) {
36486 GemmMicrokernelTester()
36487 .mr(6)
36488 .nr(8)
36489 .kr(1)
36490 .sr(1)
36491 .m(6)
36492 .n(n)
36493 .k(k)
36494 .cn_stride(11)
36495 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
36496 }
36497 }
36498 }
36499
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)36500 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
36501 for (uint32_t n = 16; n <= 24; n += 8) {
36502 for (size_t k = 1; k <= 5; k += 2) {
36503 for (uint32_t m = 1; m <= 6; m++) {
36504 GemmMicrokernelTester()
36505 .mr(6)
36506 .nr(8)
36507 .kr(1)
36508 .sr(1)
36509 .m(m)
36510 .n(n)
36511 .k(k)
36512 .iterations(1)
36513 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
36514 }
36515 }
36516 }
36517 }
36518
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel)36519 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel) {
36520 for (size_t k = 1; k <= 5; k += 2) {
36521 GemmMicrokernelTester()
36522 .mr(6)
36523 .nr(8)
36524 .kr(1)
36525 .sr(1)
36526 .m(6)
36527 .n(8)
36528 .k(k)
36529 .ks(3)
36530 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
36531 }
36532 }
36533
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel_subtile)36534 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel_subtile) {
36535 for (size_t k = 1; k <= 5; k += 2) {
36536 for (uint32_t n = 1; n <= 8; n++) {
36537 for (uint32_t m = 1; m <= 6; m++) {
36538 GemmMicrokernelTester()
36539 .mr(6)
36540 .nr(8)
36541 .kr(1)
36542 .sr(1)
36543 .m(m)
36544 .n(n)
36545 .k(k)
36546 .ks(3)
36547 .iterations(1)
36548 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
36549 }
36550 }
36551 }
36552 }
36553
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_small_kernel)36554 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_small_kernel) {
36555 for (uint32_t n = 9; n < 16; n++) {
36556 for (size_t k = 1; k <= 5; k += 2) {
36557 GemmMicrokernelTester()
36558 .mr(6)
36559 .nr(8)
36560 .kr(1)
36561 .sr(1)
36562 .m(6)
36563 .n(n)
36564 .k(k)
36565 .ks(3)
36566 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
36567 }
36568 }
36569 }
36570
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_small_kernel)36571 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_small_kernel) {
36572 for (uint32_t n = 16; n <= 24; n += 8) {
36573 for (size_t k = 1; k <= 5; k += 2) {
36574 GemmMicrokernelTester()
36575 .mr(6)
36576 .nr(8)
36577 .kr(1)
36578 .sr(1)
36579 .m(6)
36580 .n(n)
36581 .k(k)
36582 .ks(3)
36583 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
36584 }
36585 }
36586 }
36587
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)36588 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
36589 for (size_t k = 1; k <= 5; k += 2) {
36590 for (uint32_t n = 1; n <= 8; n++) {
36591 for (uint32_t m = 1; m <= 6; m++) {
36592 GemmMicrokernelTester()
36593 .mr(6)
36594 .nr(8)
36595 .kr(1)
36596 .sr(1)
36597 .m(m)
36598 .n(n)
36599 .k(k)
36600 .cm_stride(11)
36601 .iterations(1)
36602 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
36603 }
36604 }
36605 }
36606 }
36607
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,a_offset)36608 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, a_offset) {
36609 for (size_t k = 1; k <= 5; k += 2) {
36610 GemmMicrokernelTester()
36611 .mr(6)
36612 .nr(8)
36613 .kr(1)
36614 .sr(1)
36615 .m(6)
36616 .n(8)
36617 .k(k)
36618 .ks(3)
36619 .a_offset(37)
36620 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
36621 }
36622 }
36623
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,zero)36624 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, zero) {
36625 for (size_t k = 1; k <= 5; k += 2) {
36626 for (uint32_t mz = 0; mz < 6; mz++) {
36627 GemmMicrokernelTester()
36628 .mr(6)
36629 .nr(8)
36630 .kr(1)
36631 .sr(1)
36632 .m(6)
36633 .n(8)
36634 .k(k)
36635 .ks(3)
36636 .a_offset(37)
36637 .zero_index(mz)
36638 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
36639 }
36640 }
36641 }
36642
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,qmin)36643 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, qmin) {
36644 GemmMicrokernelTester()
36645 .mr(6)
36646 .nr(8)
36647 .kr(1)
36648 .sr(1)
36649 .m(6)
36650 .n(8)
36651 .k(1)
36652 .qmin(128)
36653 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
36654 }
36655
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,qmax)36656 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, qmax) {
36657 GemmMicrokernelTester()
36658 .mr(6)
36659 .nr(8)
36660 .kr(1)
36661 .sr(1)
36662 .m(6)
36663 .n(8)
36664 .k(1)
36665 .qmax(128)
36666 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
36667 }
36668
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)36669 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
36670 GemmMicrokernelTester()
36671 .mr(6)
36672 .nr(8)
36673 .kr(1)
36674 .sr(1)
36675 .m(6)
36676 .n(8)
36677 .k(1)
36678 .cm_stride(11)
36679 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
36680 }
36681 #endif // XNN_ARCH_WASMRELAXEDSIMD
36682
36683
36684 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)36685 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
36686 GemmMicrokernelTester()
36687 .mr(6)
36688 .nr(8)
36689 .kr(1)
36690 .sr(1)
36691 .m(6)
36692 .n(8)
36693 .k(4)
36694 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36695 }
36696
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)36697 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
36698 GemmMicrokernelTester()
36699 .mr(6)
36700 .nr(8)
36701 .kr(1)
36702 .sr(1)
36703 .m(6)
36704 .n(8)
36705 .k(4)
36706 .cn_stride(11)
36707 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36708 }
36709
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)36710 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
36711 for (uint32_t n = 1; n <= 8; n++) {
36712 for (uint32_t m = 1; m <= 6; m++) {
36713 GemmMicrokernelTester()
36714 .mr(6)
36715 .nr(8)
36716 .kr(1)
36717 .sr(1)
36718 .m(m)
36719 .n(n)
36720 .k(4)
36721 .iterations(1)
36722 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36723 }
36724 }
36725 }
36726
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)36727 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
36728 for (uint32_t m = 1; m <= 6; m++) {
36729 GemmMicrokernelTester()
36730 .mr(6)
36731 .nr(8)
36732 .kr(1)
36733 .sr(1)
36734 .m(m)
36735 .n(8)
36736 .k(4)
36737 .iterations(1)
36738 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36739 }
36740 }
36741
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)36742 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
36743 for (uint32_t n = 1; n <= 8; n++) {
36744 GemmMicrokernelTester()
36745 .mr(6)
36746 .nr(8)
36747 .kr(1)
36748 .sr(1)
36749 .m(6)
36750 .n(n)
36751 .k(4)
36752 .iterations(1)
36753 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36754 }
36755 }
36756
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)36757 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
36758 for (size_t k = 1; k < 4; k++) {
36759 GemmMicrokernelTester()
36760 .mr(6)
36761 .nr(8)
36762 .kr(1)
36763 .sr(1)
36764 .m(6)
36765 .n(8)
36766 .k(k)
36767 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36768 }
36769 }
36770
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)36771 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
36772 for (size_t k = 1; k < 4; k++) {
36773 for (uint32_t n = 1; n <= 8; n++) {
36774 for (uint32_t m = 1; m <= 6; m++) {
36775 GemmMicrokernelTester()
36776 .mr(6)
36777 .nr(8)
36778 .kr(1)
36779 .sr(1)
36780 .m(m)
36781 .n(n)
36782 .k(k)
36783 .iterations(1)
36784 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36785 }
36786 }
36787 }
36788 }
36789
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)36790 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
36791 for (size_t k = 5; k < 8; k++) {
36792 GemmMicrokernelTester()
36793 .mr(6)
36794 .nr(8)
36795 .kr(1)
36796 .sr(1)
36797 .m(6)
36798 .n(8)
36799 .k(k)
36800 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36801 }
36802 }
36803
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)36804 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
36805 for (size_t k = 5; k < 8; k++) {
36806 for (uint32_t n = 1; n <= 8; n++) {
36807 for (uint32_t m = 1; m <= 6; m++) {
36808 GemmMicrokernelTester()
36809 .mr(6)
36810 .nr(8)
36811 .kr(1)
36812 .sr(1)
36813 .m(m)
36814 .n(n)
36815 .k(k)
36816 .iterations(1)
36817 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36818 }
36819 }
36820 }
36821 }
36822
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)36823 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
36824 for (size_t k = 8; k <= 40; k += 4) {
36825 GemmMicrokernelTester()
36826 .mr(6)
36827 .nr(8)
36828 .kr(1)
36829 .sr(1)
36830 .m(6)
36831 .n(8)
36832 .k(k)
36833 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36834 }
36835 }
36836
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)36837 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
36838 for (size_t k = 8; k <= 40; k += 4) {
36839 for (uint32_t n = 1; n <= 8; n++) {
36840 for (uint32_t m = 1; m <= 6; m++) {
36841 GemmMicrokernelTester()
36842 .mr(6)
36843 .nr(8)
36844 .kr(1)
36845 .sr(1)
36846 .m(m)
36847 .n(n)
36848 .k(k)
36849 .iterations(1)
36850 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36851 }
36852 }
36853 }
36854 }
36855
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)36856 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
36857 for (uint32_t n = 9; n < 16; n++) {
36858 for (size_t k = 1; k <= 20; k += 5) {
36859 GemmMicrokernelTester()
36860 .mr(6)
36861 .nr(8)
36862 .kr(1)
36863 .sr(1)
36864 .m(6)
36865 .n(n)
36866 .k(k)
36867 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36868 }
36869 }
36870 }
36871
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)36872 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
36873 for (uint32_t n = 9; n < 16; n++) {
36874 for (size_t k = 1; k <= 20; k += 5) {
36875 GemmMicrokernelTester()
36876 .mr(6)
36877 .nr(8)
36878 .kr(1)
36879 .sr(1)
36880 .m(6)
36881 .n(n)
36882 .k(k)
36883 .cn_stride(11)
36884 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36885 }
36886 }
36887 }
36888
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)36889 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
36890 for (uint32_t n = 9; n < 16; n++) {
36891 for (size_t k = 1; k <= 20; k += 5) {
36892 for (uint32_t m = 1; m <= 6; m++) {
36893 GemmMicrokernelTester()
36894 .mr(6)
36895 .nr(8)
36896 .kr(1)
36897 .sr(1)
36898 .m(m)
36899 .n(n)
36900 .k(k)
36901 .iterations(1)
36902 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36903 }
36904 }
36905 }
36906 }
36907
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)36908 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
36909 for (uint32_t n = 16; n <= 24; n += 8) {
36910 for (size_t k = 1; k <= 20; k += 5) {
36911 GemmMicrokernelTester()
36912 .mr(6)
36913 .nr(8)
36914 .kr(1)
36915 .sr(1)
36916 .m(6)
36917 .n(n)
36918 .k(k)
36919 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36920 }
36921 }
36922 }
36923
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)36924 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
36925 for (uint32_t n = 16; n <= 24; n += 8) {
36926 for (size_t k = 1; k <= 20; k += 5) {
36927 GemmMicrokernelTester()
36928 .mr(6)
36929 .nr(8)
36930 .kr(1)
36931 .sr(1)
36932 .m(6)
36933 .n(n)
36934 .k(k)
36935 .cn_stride(11)
36936 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36937 }
36938 }
36939 }
36940
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)36941 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
36942 for (uint32_t n = 16; n <= 24; n += 8) {
36943 for (size_t k = 1; k <= 20; k += 5) {
36944 for (uint32_t m = 1; m <= 6; m++) {
36945 GemmMicrokernelTester()
36946 .mr(6)
36947 .nr(8)
36948 .kr(1)
36949 .sr(1)
36950 .m(m)
36951 .n(n)
36952 .k(k)
36953 .iterations(1)
36954 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36955 }
36956 }
36957 }
36958 }
36959
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel)36960 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel) {
36961 for (size_t k = 1; k <= 20; k += 5) {
36962 GemmMicrokernelTester()
36963 .mr(6)
36964 .nr(8)
36965 .kr(1)
36966 .sr(1)
36967 .m(6)
36968 .n(8)
36969 .k(k)
36970 .ks(3)
36971 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36972 }
36973 }
36974
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel_subtile)36975 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel_subtile) {
36976 for (size_t k = 1; k <= 20; k += 5) {
36977 for (uint32_t n = 1; n <= 8; n++) {
36978 for (uint32_t m = 1; m <= 6; m++) {
36979 GemmMicrokernelTester()
36980 .mr(6)
36981 .nr(8)
36982 .kr(1)
36983 .sr(1)
36984 .m(m)
36985 .n(n)
36986 .k(k)
36987 .ks(3)
36988 .iterations(1)
36989 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
36990 }
36991 }
36992 }
36993 }
36994
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_small_kernel)36995 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_small_kernel) {
36996 for (uint32_t n = 9; n < 16; n++) {
36997 for (size_t k = 1; k <= 20; k += 5) {
36998 GemmMicrokernelTester()
36999 .mr(6)
37000 .nr(8)
37001 .kr(1)
37002 .sr(1)
37003 .m(6)
37004 .n(n)
37005 .k(k)
37006 .ks(3)
37007 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
37008 }
37009 }
37010 }
37011
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_small_kernel)37012 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_small_kernel) {
37013 for (uint32_t n = 16; n <= 24; n += 8) {
37014 for (size_t k = 1; k <= 20; k += 5) {
37015 GemmMicrokernelTester()
37016 .mr(6)
37017 .nr(8)
37018 .kr(1)
37019 .sr(1)
37020 .m(6)
37021 .n(n)
37022 .k(k)
37023 .ks(3)
37024 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
37025 }
37026 }
37027 }
37028
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)37029 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
37030 for (size_t k = 1; k <= 20; k += 5) {
37031 for (uint32_t n = 1; n <= 8; n++) {
37032 for (uint32_t m = 1; m <= 6; m++) {
37033 GemmMicrokernelTester()
37034 .mr(6)
37035 .nr(8)
37036 .kr(1)
37037 .sr(1)
37038 .m(m)
37039 .n(n)
37040 .k(k)
37041 .cm_stride(11)
37042 .iterations(1)
37043 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
37044 }
37045 }
37046 }
37047 }
37048
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,a_offset)37049 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, a_offset) {
37050 for (size_t k = 1; k <= 20; k += 5) {
37051 GemmMicrokernelTester()
37052 .mr(6)
37053 .nr(8)
37054 .kr(1)
37055 .sr(1)
37056 .m(6)
37057 .n(8)
37058 .k(k)
37059 .ks(3)
37060 .a_offset(127)
37061 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
37062 }
37063 }
37064
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,zero)37065 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, zero) {
37066 for (size_t k = 1; k <= 20; k += 5) {
37067 for (uint32_t mz = 0; mz < 6; mz++) {
37068 GemmMicrokernelTester()
37069 .mr(6)
37070 .nr(8)
37071 .kr(1)
37072 .sr(1)
37073 .m(6)
37074 .n(8)
37075 .k(k)
37076 .ks(3)
37077 .a_offset(127)
37078 .zero_index(mz)
37079 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
37080 }
37081 }
37082 }
37083
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,qmin)37084 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, qmin) {
37085 GemmMicrokernelTester()
37086 .mr(6)
37087 .nr(8)
37088 .kr(1)
37089 .sr(1)
37090 .m(6)
37091 .n(8)
37092 .k(4)
37093 .qmin(128)
37094 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
37095 }
37096
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,qmax)37097 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, qmax) {
37098 GemmMicrokernelTester()
37099 .mr(6)
37100 .nr(8)
37101 .kr(1)
37102 .sr(1)
37103 .m(6)
37104 .n(8)
37105 .k(4)
37106 .qmax(128)
37107 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
37108 }
37109
TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)37110 TEST(F32_IGEMM_MINMAX_6X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
37111 GemmMicrokernelTester()
37112 .mr(6)
37113 .nr(8)
37114 .kr(1)
37115 .sr(1)
37116 .m(6)
37117 .n(8)
37118 .k(4)
37119 .cm_stride(11)
37120 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, xnn_init_f32_minmax_wasmsimd_params);
37121 }
37122 #endif // XNN_ARCH_WASMRELAXEDSIMD
37123
37124
37125 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_MINMAX_1X4__WASM,k_eq_1)37126 TEST(F32_IGEMM_MINMAX_1X4__WASM, k_eq_1) {
37127 GemmMicrokernelTester()
37128 .mr(1)
37129 .nr(4)
37130 .kr(1)
37131 .sr(1)
37132 .m(1)
37133 .n(4)
37134 .k(1)
37135 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
37136 }
37137
TEST(F32_IGEMM_MINMAX_1X4__WASM,strided_cn)37138 TEST(F32_IGEMM_MINMAX_1X4__WASM, strided_cn) {
37139 GemmMicrokernelTester()
37140 .mr(1)
37141 .nr(4)
37142 .kr(1)
37143 .sr(1)
37144 .m(1)
37145 .n(4)
37146 .k(1)
37147 .cn_stride(7)
37148 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
37149 }
37150
TEST(F32_IGEMM_MINMAX_1X4__WASM,k_eq_1_subtile)37151 TEST(F32_IGEMM_MINMAX_1X4__WASM, k_eq_1_subtile) {
37152 for (uint32_t n = 1; n <= 4; n++) {
37153 for (uint32_t m = 1; m <= 1; m++) {
37154 GemmMicrokernelTester()
37155 .mr(1)
37156 .nr(4)
37157 .kr(1)
37158 .sr(1)
37159 .m(m)
37160 .n(n)
37161 .k(1)
37162 .iterations(1)
37163 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
37164 }
37165 }
37166 }
37167
TEST(F32_IGEMM_MINMAX_1X4__WASM,k_eq_1_subtile_m)37168 TEST(F32_IGEMM_MINMAX_1X4__WASM, k_eq_1_subtile_m) {
37169 for (uint32_t m = 1; m <= 1; m++) {
37170 GemmMicrokernelTester()
37171 .mr(1)
37172 .nr(4)
37173 .kr(1)
37174 .sr(1)
37175 .m(m)
37176 .n(4)
37177 .k(1)
37178 .iterations(1)
37179 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
37180 }
37181 }
37182
TEST(F32_IGEMM_MINMAX_1X4__WASM,k_eq_1_subtile_n)37183 TEST(F32_IGEMM_MINMAX_1X4__WASM, k_eq_1_subtile_n) {
37184 for (uint32_t n = 1; n <= 4; n++) {
37185 GemmMicrokernelTester()
37186 .mr(1)
37187 .nr(4)
37188 .kr(1)
37189 .sr(1)
37190 .m(1)
37191 .n(n)
37192 .k(1)
37193 .iterations(1)
37194 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
37195 }
37196 }
37197
TEST(F32_IGEMM_MINMAX_1X4__WASM,k_gt_1)37198 TEST(F32_IGEMM_MINMAX_1X4__WASM, k_gt_1) {
37199 for (size_t k = 2; k < 10; k++) {
37200 GemmMicrokernelTester()
37201 .mr(1)
37202 .nr(4)
37203 .kr(1)
37204 .sr(1)
37205 .m(1)
37206 .n(4)
37207 .k(k)
37208 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
37209 }
37210 }
37211
TEST(F32_IGEMM_MINMAX_1X4__WASM,k_gt_1_subtile)37212 TEST(F32_IGEMM_MINMAX_1X4__WASM, k_gt_1_subtile) {
37213 for (size_t k = 2; k < 10; k++) {
37214 for (uint32_t n = 1; n <= 4; n++) {
37215 for (uint32_t m = 1; m <= 1; m++) {
37216 GemmMicrokernelTester()
37217 .mr(1)
37218 .nr(4)
37219 .kr(1)
37220 .sr(1)
37221 .m(m)
37222 .n(n)
37223 .k(k)
37224 .iterations(1)
37225 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
37226 }
37227 }
37228 }
37229 }
37230
TEST(F32_IGEMM_MINMAX_1X4__WASM,n_gt_4)37231 TEST(F32_IGEMM_MINMAX_1X4__WASM, n_gt_4) {
37232 for (uint32_t n = 5; n < 8; n++) {
37233 for (size_t k = 1; k <= 5; k += 2) {
37234 GemmMicrokernelTester()
37235 .mr(1)
37236 .nr(4)
37237 .kr(1)
37238 .sr(1)
37239 .m(1)
37240 .n(n)
37241 .k(k)
37242 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
37243 }
37244 }
37245 }
37246
TEST(F32_IGEMM_MINMAX_1X4__WASM,n_gt_4_strided_cn)37247 TEST(F32_IGEMM_MINMAX_1X4__WASM, n_gt_4_strided_cn) {
37248 for (uint32_t n = 5; n < 8; n++) {
37249 for (size_t k = 1; k <= 5; k += 2) {
37250 GemmMicrokernelTester()
37251 .mr(1)
37252 .nr(4)
37253 .kr(1)
37254 .sr(1)
37255 .m(1)
37256 .n(n)
37257 .k(k)
37258 .cn_stride(7)
37259 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
37260 }
37261 }
37262 }
37263
TEST(F32_IGEMM_MINMAX_1X4__WASM,n_gt_4_subtile)37264 TEST(F32_IGEMM_MINMAX_1X4__WASM, n_gt_4_subtile) {
37265 for (uint32_t n = 5; n < 8; n++) {
37266 for (size_t k = 1; k <= 5; k += 2) {
37267 for (uint32_t m = 1; m <= 1; m++) {
37268 GemmMicrokernelTester()
37269 .mr(1)
37270 .nr(4)
37271 .kr(1)
37272 .sr(1)
37273 .m(m)
37274 .n(n)
37275 .k(k)
37276 .iterations(1)
37277 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
37278 }
37279 }
37280 }
37281 }
37282
TEST(F32_IGEMM_MINMAX_1X4__WASM,n_div_4)37283 TEST(F32_IGEMM_MINMAX_1X4__WASM, n_div_4) {
37284 for (uint32_t n = 8; n <= 12; n += 4) {
37285 for (size_t k = 1; k <= 5; k += 2) {
37286 GemmMicrokernelTester()
37287 .mr(1)
37288 .nr(4)
37289 .kr(1)
37290 .sr(1)
37291 .m(1)
37292 .n(n)
37293 .k(k)
37294 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
37295 }
37296 }
37297 }
37298
TEST(F32_IGEMM_MINMAX_1X4__WASM,n_div_4_strided_cn)37299 TEST(F32_IGEMM_MINMAX_1X4__WASM, n_div_4_strided_cn) {
37300 for (uint32_t n = 8; n <= 12; n += 4) {
37301 for (size_t k = 1; k <= 5; k += 2) {
37302 GemmMicrokernelTester()
37303 .mr(1)
37304 .nr(4)
37305 .kr(1)
37306 .sr(1)
37307 .m(1)
37308 .n(n)
37309 .k(k)
37310 .cn_stride(7)
37311 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
37312 }
37313 }
37314 }
37315
TEST(F32_IGEMM_MINMAX_1X4__WASM,n_div_4_subtile)37316 TEST(F32_IGEMM_MINMAX_1X4__WASM, n_div_4_subtile) {
37317 for (uint32_t n = 8; n <= 12; n += 4) {
37318 for (size_t k = 1; k <= 5; k += 2) {
37319 for (uint32_t m = 1; m <= 1; m++) {
37320 GemmMicrokernelTester()
37321 .mr(1)
37322 .nr(4)
37323 .kr(1)
37324 .sr(1)
37325 .m(m)
37326 .n(n)
37327 .k(k)
37328 .iterations(1)
37329 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
37330 }
37331 }
37332 }
37333 }
37334
TEST(F32_IGEMM_MINMAX_1X4__WASM,small_kernel)37335 TEST(F32_IGEMM_MINMAX_1X4__WASM, small_kernel) {
37336 for (size_t k = 1; k <= 5; k += 2) {
37337 GemmMicrokernelTester()
37338 .mr(1)
37339 .nr(4)
37340 .kr(1)
37341 .sr(1)
37342 .m(1)
37343 .n(4)
37344 .k(k)
37345 .ks(3)
37346 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
37347 }
37348 }
37349
TEST(F32_IGEMM_MINMAX_1X4__WASM,small_kernel_subtile)37350 TEST(F32_IGEMM_MINMAX_1X4__WASM, small_kernel_subtile) {
37351 for (size_t k = 1; k <= 5; k += 2) {
37352 for (uint32_t n = 1; n <= 4; n++) {
37353 for (uint32_t m = 1; m <= 1; m++) {
37354 GemmMicrokernelTester()
37355 .mr(1)
37356 .nr(4)
37357 .kr(1)
37358 .sr(1)
37359 .m(m)
37360 .n(n)
37361 .k(k)
37362 .ks(3)
37363 .iterations(1)
37364 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
37365 }
37366 }
37367 }
37368 }
37369
TEST(F32_IGEMM_MINMAX_1X4__WASM,n_gt_4_small_kernel)37370 TEST(F32_IGEMM_MINMAX_1X4__WASM, n_gt_4_small_kernel) {
37371 for (uint32_t n = 5; n < 8; n++) {
37372 for (size_t k = 1; k <= 5; k += 2) {
37373 GemmMicrokernelTester()
37374 .mr(1)
37375 .nr(4)
37376 .kr(1)
37377 .sr(1)
37378 .m(1)
37379 .n(n)
37380 .k(k)
37381 .ks(3)
37382 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
37383 }
37384 }
37385 }
37386
TEST(F32_IGEMM_MINMAX_1X4__WASM,n_div_4_small_kernel)37387 TEST(F32_IGEMM_MINMAX_1X4__WASM, n_div_4_small_kernel) {
37388 for (uint32_t n = 8; n <= 12; n += 4) {
37389 for (size_t k = 1; k <= 5; k += 2) {
37390 GemmMicrokernelTester()
37391 .mr(1)
37392 .nr(4)
37393 .kr(1)
37394 .sr(1)
37395 .m(1)
37396 .n(n)
37397 .k(k)
37398 .ks(3)
37399 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
37400 }
37401 }
37402 }
37403
TEST(F32_IGEMM_MINMAX_1X4__WASM,strided_cm_subtile)37404 TEST(F32_IGEMM_MINMAX_1X4__WASM, strided_cm_subtile) {
37405 for (size_t k = 1; k <= 5; k += 2) {
37406 for (uint32_t n = 1; n <= 4; n++) {
37407 for (uint32_t m = 1; m <= 1; m++) {
37408 GemmMicrokernelTester()
37409 .mr(1)
37410 .nr(4)
37411 .kr(1)
37412 .sr(1)
37413 .m(m)
37414 .n(n)
37415 .k(k)
37416 .cm_stride(7)
37417 .iterations(1)
37418 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
37419 }
37420 }
37421 }
37422 }
37423
TEST(F32_IGEMM_MINMAX_1X4__WASM,a_offset)37424 TEST(F32_IGEMM_MINMAX_1X4__WASM, a_offset) {
37425 for (size_t k = 1; k <= 5; k += 2) {
37426 GemmMicrokernelTester()
37427 .mr(1)
37428 .nr(4)
37429 .kr(1)
37430 .sr(1)
37431 .m(1)
37432 .n(4)
37433 .k(k)
37434 .ks(3)
37435 .a_offset(7)
37436 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
37437 }
37438 }
37439
TEST(F32_IGEMM_MINMAX_1X4__WASM,zero)37440 TEST(F32_IGEMM_MINMAX_1X4__WASM, zero) {
37441 for (size_t k = 1; k <= 5; k += 2) {
37442 for (uint32_t mz = 0; mz < 1; mz++) {
37443 GemmMicrokernelTester()
37444 .mr(1)
37445 .nr(4)
37446 .kr(1)
37447 .sr(1)
37448 .m(1)
37449 .n(4)
37450 .k(k)
37451 .ks(3)
37452 .a_offset(7)
37453 .zero_index(mz)
37454 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
37455 }
37456 }
37457 }
37458
TEST(F32_IGEMM_MINMAX_1X4__WASM,qmin)37459 TEST(F32_IGEMM_MINMAX_1X4__WASM, qmin) {
37460 GemmMicrokernelTester()
37461 .mr(1)
37462 .nr(4)
37463 .kr(1)
37464 .sr(1)
37465 .m(1)
37466 .n(4)
37467 .k(1)
37468 .qmin(128)
37469 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
37470 }
37471
TEST(F32_IGEMM_MINMAX_1X4__WASM,qmax)37472 TEST(F32_IGEMM_MINMAX_1X4__WASM, qmax) {
37473 GemmMicrokernelTester()
37474 .mr(1)
37475 .nr(4)
37476 .kr(1)
37477 .sr(1)
37478 .m(1)
37479 .n(4)
37480 .k(1)
37481 .qmax(128)
37482 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
37483 }
37484
TEST(F32_IGEMM_MINMAX_1X4__WASM,strided_cm)37485 TEST(F32_IGEMM_MINMAX_1X4__WASM, strided_cm) {
37486 GemmMicrokernelTester()
37487 .mr(1)
37488 .nr(4)
37489 .kr(1)
37490 .sr(1)
37491 .m(1)
37492 .n(4)
37493 .k(1)
37494 .cm_stride(7)
37495 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
37496 }
37497 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
37498
37499
TEST(F32_IGEMM_MINMAX_2X4__SCALAR,k_eq_1)37500 TEST(F32_IGEMM_MINMAX_2X4__SCALAR, k_eq_1) {
37501 GemmMicrokernelTester()
37502 .mr(2)
37503 .nr(4)
37504 .kr(1)
37505 .sr(1)
37506 .m(2)
37507 .n(4)
37508 .k(1)
37509 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
37510 }
37511
TEST(F32_IGEMM_MINMAX_2X4__SCALAR,strided_cn)37512 TEST(F32_IGEMM_MINMAX_2X4__SCALAR, strided_cn) {
37513 GemmMicrokernelTester()
37514 .mr(2)
37515 .nr(4)
37516 .kr(1)
37517 .sr(1)
37518 .m(2)
37519 .n(4)
37520 .k(1)
37521 .cn_stride(7)
37522 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
37523 }
37524
TEST(F32_IGEMM_MINMAX_2X4__SCALAR,k_eq_1_subtile)37525 TEST(F32_IGEMM_MINMAX_2X4__SCALAR, k_eq_1_subtile) {
37526 for (uint32_t n = 1; n <= 4; n++) {
37527 for (uint32_t m = 1; m <= 2; m++) {
37528 GemmMicrokernelTester()
37529 .mr(2)
37530 .nr(4)
37531 .kr(1)
37532 .sr(1)
37533 .m(m)
37534 .n(n)
37535 .k(1)
37536 .iterations(1)
37537 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
37538 }
37539 }
37540 }
37541
TEST(F32_IGEMM_MINMAX_2X4__SCALAR,k_eq_1_subtile_m)37542 TEST(F32_IGEMM_MINMAX_2X4__SCALAR, k_eq_1_subtile_m) {
37543 for (uint32_t m = 1; m <= 2; m++) {
37544 GemmMicrokernelTester()
37545 .mr(2)
37546 .nr(4)
37547 .kr(1)
37548 .sr(1)
37549 .m(m)
37550 .n(4)
37551 .k(1)
37552 .iterations(1)
37553 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
37554 }
37555 }
37556
TEST(F32_IGEMM_MINMAX_2X4__SCALAR,k_eq_1_subtile_n)37557 TEST(F32_IGEMM_MINMAX_2X4__SCALAR, k_eq_1_subtile_n) {
37558 for (uint32_t n = 1; n <= 4; n++) {
37559 GemmMicrokernelTester()
37560 .mr(2)
37561 .nr(4)
37562 .kr(1)
37563 .sr(1)
37564 .m(2)
37565 .n(n)
37566 .k(1)
37567 .iterations(1)
37568 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
37569 }
37570 }
37571
TEST(F32_IGEMM_MINMAX_2X4__SCALAR,k_gt_1)37572 TEST(F32_IGEMM_MINMAX_2X4__SCALAR, k_gt_1) {
37573 for (size_t k = 2; k < 10; k++) {
37574 GemmMicrokernelTester()
37575 .mr(2)
37576 .nr(4)
37577 .kr(1)
37578 .sr(1)
37579 .m(2)
37580 .n(4)
37581 .k(k)
37582 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
37583 }
37584 }
37585
TEST(F32_IGEMM_MINMAX_2X4__SCALAR,k_gt_1_subtile)37586 TEST(F32_IGEMM_MINMAX_2X4__SCALAR, k_gt_1_subtile) {
37587 for (size_t k = 2; k < 10; k++) {
37588 for (uint32_t n = 1; n <= 4; n++) {
37589 for (uint32_t m = 1; m <= 2; m++) {
37590 GemmMicrokernelTester()
37591 .mr(2)
37592 .nr(4)
37593 .kr(1)
37594 .sr(1)
37595 .m(m)
37596 .n(n)
37597 .k(k)
37598 .iterations(1)
37599 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
37600 }
37601 }
37602 }
37603 }
37604
TEST(F32_IGEMM_MINMAX_2X4__SCALAR,n_gt_4)37605 TEST(F32_IGEMM_MINMAX_2X4__SCALAR, n_gt_4) {
37606 for (uint32_t n = 5; n < 8; n++) {
37607 for (size_t k = 1; k <= 5; k += 2) {
37608 GemmMicrokernelTester()
37609 .mr(2)
37610 .nr(4)
37611 .kr(1)
37612 .sr(1)
37613 .m(2)
37614 .n(n)
37615 .k(k)
37616 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
37617 }
37618 }
37619 }
37620
TEST(F32_IGEMM_MINMAX_2X4__SCALAR,n_gt_4_strided_cn)37621 TEST(F32_IGEMM_MINMAX_2X4__SCALAR, n_gt_4_strided_cn) {
37622 for (uint32_t n = 5; n < 8; n++) {
37623 for (size_t k = 1; k <= 5; k += 2) {
37624 GemmMicrokernelTester()
37625 .mr(2)
37626 .nr(4)
37627 .kr(1)
37628 .sr(1)
37629 .m(2)
37630 .n(n)
37631 .k(k)
37632 .cn_stride(7)
37633 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
37634 }
37635 }
37636 }
37637
TEST(F32_IGEMM_MINMAX_2X4__SCALAR,n_gt_4_subtile)37638 TEST(F32_IGEMM_MINMAX_2X4__SCALAR, n_gt_4_subtile) {
37639 for (uint32_t n = 5; n < 8; n++) {
37640 for (size_t k = 1; k <= 5; k += 2) {
37641 for (uint32_t m = 1; m <= 2; m++) {
37642 GemmMicrokernelTester()
37643 .mr(2)
37644 .nr(4)
37645 .kr(1)
37646 .sr(1)
37647 .m(m)
37648 .n(n)
37649 .k(k)
37650 .iterations(1)
37651 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
37652 }
37653 }
37654 }
37655 }
37656
TEST(F32_IGEMM_MINMAX_2X4__SCALAR,n_div_4)37657 TEST(F32_IGEMM_MINMAX_2X4__SCALAR, n_div_4) {
37658 for (uint32_t n = 8; n <= 12; n += 4) {
37659 for (size_t k = 1; k <= 5; k += 2) {
37660 GemmMicrokernelTester()
37661 .mr(2)
37662 .nr(4)
37663 .kr(1)
37664 .sr(1)
37665 .m(2)
37666 .n(n)
37667 .k(k)
37668 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
37669 }
37670 }
37671 }
37672
TEST(F32_IGEMM_MINMAX_2X4__SCALAR,n_div_4_strided_cn)37673 TEST(F32_IGEMM_MINMAX_2X4__SCALAR, n_div_4_strided_cn) {
37674 for (uint32_t n = 8; n <= 12; n += 4) {
37675 for (size_t k = 1; k <= 5; k += 2) {
37676 GemmMicrokernelTester()
37677 .mr(2)
37678 .nr(4)
37679 .kr(1)
37680 .sr(1)
37681 .m(2)
37682 .n(n)
37683 .k(k)
37684 .cn_stride(7)
37685 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
37686 }
37687 }
37688 }
37689
TEST(F32_IGEMM_MINMAX_2X4__SCALAR,n_div_4_subtile)37690 TEST(F32_IGEMM_MINMAX_2X4__SCALAR, n_div_4_subtile) {
37691 for (uint32_t n = 8; n <= 12; n += 4) {
37692 for (size_t k = 1; k <= 5; k += 2) {
37693 for (uint32_t m = 1; m <= 2; m++) {
37694 GemmMicrokernelTester()
37695 .mr(2)
37696 .nr(4)
37697 .kr(1)
37698 .sr(1)
37699 .m(m)
37700 .n(n)
37701 .k(k)
37702 .iterations(1)
37703 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
37704 }
37705 }
37706 }
37707 }
37708
TEST(F32_IGEMM_MINMAX_2X4__SCALAR,small_kernel)37709 TEST(F32_IGEMM_MINMAX_2X4__SCALAR, small_kernel) {
37710 for (size_t k = 1; k <= 5; k += 2) {
37711 GemmMicrokernelTester()
37712 .mr(2)
37713 .nr(4)
37714 .kr(1)
37715 .sr(1)
37716 .m(2)
37717 .n(4)
37718 .k(k)
37719 .ks(3)
37720 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
37721 }
37722 }
37723
TEST(F32_IGEMM_MINMAX_2X4__SCALAR,small_kernel_subtile)37724 TEST(F32_IGEMM_MINMAX_2X4__SCALAR, small_kernel_subtile) {
37725 for (size_t k = 1; k <= 5; k += 2) {
37726 for (uint32_t n = 1; n <= 4; n++) {
37727 for (uint32_t m = 1; m <= 2; m++) {
37728 GemmMicrokernelTester()
37729 .mr(2)
37730 .nr(4)
37731 .kr(1)
37732 .sr(1)
37733 .m(m)
37734 .n(n)
37735 .k(k)
37736 .ks(3)
37737 .iterations(1)
37738 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
37739 }
37740 }
37741 }
37742 }
37743
TEST(F32_IGEMM_MINMAX_2X4__SCALAR,n_gt_4_small_kernel)37744 TEST(F32_IGEMM_MINMAX_2X4__SCALAR, n_gt_4_small_kernel) {
37745 for (uint32_t n = 5; n < 8; n++) {
37746 for (size_t k = 1; k <= 5; k += 2) {
37747 GemmMicrokernelTester()
37748 .mr(2)
37749 .nr(4)
37750 .kr(1)
37751 .sr(1)
37752 .m(2)
37753 .n(n)
37754 .k(k)
37755 .ks(3)
37756 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
37757 }
37758 }
37759 }
37760
TEST(F32_IGEMM_MINMAX_2X4__SCALAR,n_div_4_small_kernel)37761 TEST(F32_IGEMM_MINMAX_2X4__SCALAR, n_div_4_small_kernel) {
37762 for (uint32_t n = 8; n <= 12; n += 4) {
37763 for (size_t k = 1; k <= 5; k += 2) {
37764 GemmMicrokernelTester()
37765 .mr(2)
37766 .nr(4)
37767 .kr(1)
37768 .sr(1)
37769 .m(2)
37770 .n(n)
37771 .k(k)
37772 .ks(3)
37773 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
37774 }
37775 }
37776 }
37777
TEST(F32_IGEMM_MINMAX_2X4__SCALAR,strided_cm_subtile)37778 TEST(F32_IGEMM_MINMAX_2X4__SCALAR, strided_cm_subtile) {
37779 for (size_t k = 1; k <= 5; k += 2) {
37780 for (uint32_t n = 1; n <= 4; n++) {
37781 for (uint32_t m = 1; m <= 2; m++) {
37782 GemmMicrokernelTester()
37783 .mr(2)
37784 .nr(4)
37785 .kr(1)
37786 .sr(1)
37787 .m(m)
37788 .n(n)
37789 .k(k)
37790 .cm_stride(7)
37791 .iterations(1)
37792 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
37793 }
37794 }
37795 }
37796 }
37797
TEST(F32_IGEMM_MINMAX_2X4__SCALAR,a_offset)37798 TEST(F32_IGEMM_MINMAX_2X4__SCALAR, a_offset) {
37799 for (size_t k = 1; k <= 5; k += 2) {
37800 GemmMicrokernelTester()
37801 .mr(2)
37802 .nr(4)
37803 .kr(1)
37804 .sr(1)
37805 .m(2)
37806 .n(4)
37807 .k(k)
37808 .ks(3)
37809 .a_offset(13)
37810 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
37811 }
37812 }
37813
TEST(F32_IGEMM_MINMAX_2X4__SCALAR,zero)37814 TEST(F32_IGEMM_MINMAX_2X4__SCALAR, zero) {
37815 for (size_t k = 1; k <= 5; k += 2) {
37816 for (uint32_t mz = 0; mz < 2; mz++) {
37817 GemmMicrokernelTester()
37818 .mr(2)
37819 .nr(4)
37820 .kr(1)
37821 .sr(1)
37822 .m(2)
37823 .n(4)
37824 .k(k)
37825 .ks(3)
37826 .a_offset(13)
37827 .zero_index(mz)
37828 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
37829 }
37830 }
37831 }
37832
TEST(F32_IGEMM_MINMAX_2X4__SCALAR,qmin)37833 TEST(F32_IGEMM_MINMAX_2X4__SCALAR, qmin) {
37834 GemmMicrokernelTester()
37835 .mr(2)
37836 .nr(4)
37837 .kr(1)
37838 .sr(1)
37839 .m(2)
37840 .n(4)
37841 .k(1)
37842 .qmin(128)
37843 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
37844 }
37845
TEST(F32_IGEMM_MINMAX_2X4__SCALAR,qmax)37846 TEST(F32_IGEMM_MINMAX_2X4__SCALAR, qmax) {
37847 GemmMicrokernelTester()
37848 .mr(2)
37849 .nr(4)
37850 .kr(1)
37851 .sr(1)
37852 .m(2)
37853 .n(4)
37854 .k(1)
37855 .qmax(128)
37856 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
37857 }
37858
TEST(F32_IGEMM_MINMAX_2X4__SCALAR,strided_cm)37859 TEST(F32_IGEMM_MINMAX_2X4__SCALAR, strided_cm) {
37860 GemmMicrokernelTester()
37861 .mr(2)
37862 .nr(4)
37863 .kr(1)
37864 .sr(1)
37865 .m(2)
37866 .n(4)
37867 .k(1)
37868 .cm_stride(7)
37869 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
37870 }
37871
37872
37873 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,k_eq_2)37874 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, k_eq_2) {
37875 TEST_REQUIRES_ARM_NEON;
37876 GemmMicrokernelTester()
37877 .mr(4)
37878 .nr(8)
37879 .kr(1)
37880 .sr(1)
37881 .m(4)
37882 .n(8)
37883 .k(2)
37884 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
37885 }
37886
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,strided_cn)37887 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, strided_cn) {
37888 TEST_REQUIRES_ARM_NEON;
37889 GemmMicrokernelTester()
37890 .mr(4)
37891 .nr(8)
37892 .kr(1)
37893 .sr(1)
37894 .m(4)
37895 .n(8)
37896 .k(2)
37897 .cn_stride(11)
37898 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
37899 }
37900
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,k_eq_2_subtile)37901 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, k_eq_2_subtile) {
37902 TEST_REQUIRES_ARM_NEON;
37903 for (uint32_t n = 1; n <= 8; n++) {
37904 for (uint32_t m = 1; m <= 4; m++) {
37905 GemmMicrokernelTester()
37906 .mr(4)
37907 .nr(8)
37908 .kr(1)
37909 .sr(1)
37910 .m(m)
37911 .n(n)
37912 .k(2)
37913 .iterations(1)
37914 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
37915 }
37916 }
37917 }
37918
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,k_eq_2_subtile_m)37919 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, k_eq_2_subtile_m) {
37920 TEST_REQUIRES_ARM_NEON;
37921 for (uint32_t m = 1; m <= 4; m++) {
37922 GemmMicrokernelTester()
37923 .mr(4)
37924 .nr(8)
37925 .kr(1)
37926 .sr(1)
37927 .m(m)
37928 .n(8)
37929 .k(2)
37930 .iterations(1)
37931 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
37932 }
37933 }
37934
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,k_eq_2_subtile_n)37935 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, k_eq_2_subtile_n) {
37936 TEST_REQUIRES_ARM_NEON;
37937 for (uint32_t n = 1; n <= 8; n++) {
37938 GemmMicrokernelTester()
37939 .mr(4)
37940 .nr(8)
37941 .kr(1)
37942 .sr(1)
37943 .m(4)
37944 .n(n)
37945 .k(2)
37946 .iterations(1)
37947 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
37948 }
37949 }
37950
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,k_lt_2)37951 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, k_lt_2) {
37952 TEST_REQUIRES_ARM_NEON;
37953 for (size_t k = 1; k < 2; k++) {
37954 GemmMicrokernelTester()
37955 .mr(4)
37956 .nr(8)
37957 .kr(1)
37958 .sr(1)
37959 .m(4)
37960 .n(8)
37961 .k(k)
37962 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
37963 }
37964 }
37965
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,k_lt_2_subtile)37966 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, k_lt_2_subtile) {
37967 TEST_REQUIRES_ARM_NEON;
37968 for (size_t k = 1; k < 2; k++) {
37969 for (uint32_t n = 1; n <= 8; n++) {
37970 for (uint32_t m = 1; m <= 4; m++) {
37971 GemmMicrokernelTester()
37972 .mr(4)
37973 .nr(8)
37974 .kr(1)
37975 .sr(1)
37976 .m(m)
37977 .n(n)
37978 .k(k)
37979 .iterations(1)
37980 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
37981 }
37982 }
37983 }
37984 }
37985
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,k_gt_2)37986 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, k_gt_2) {
37987 TEST_REQUIRES_ARM_NEON;
37988 for (size_t k = 3; k < 4; k++) {
37989 GemmMicrokernelTester()
37990 .mr(4)
37991 .nr(8)
37992 .kr(1)
37993 .sr(1)
37994 .m(4)
37995 .n(8)
37996 .k(k)
37997 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
37998 }
37999 }
38000
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,k_gt_2_subtile)38001 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, k_gt_2_subtile) {
38002 TEST_REQUIRES_ARM_NEON;
38003 for (size_t k = 3; k < 4; k++) {
38004 for (uint32_t n = 1; n <= 8; n++) {
38005 for (uint32_t m = 1; m <= 4; m++) {
38006 GemmMicrokernelTester()
38007 .mr(4)
38008 .nr(8)
38009 .kr(1)
38010 .sr(1)
38011 .m(m)
38012 .n(n)
38013 .k(k)
38014 .iterations(1)
38015 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
38016 }
38017 }
38018 }
38019 }
38020
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,k_div_2)38021 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, k_div_2) {
38022 TEST_REQUIRES_ARM_NEON;
38023 for (size_t k = 4; k <= 20; k += 2) {
38024 GemmMicrokernelTester()
38025 .mr(4)
38026 .nr(8)
38027 .kr(1)
38028 .sr(1)
38029 .m(4)
38030 .n(8)
38031 .k(k)
38032 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
38033 }
38034 }
38035
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,k_div_2_subtile)38036 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, k_div_2_subtile) {
38037 TEST_REQUIRES_ARM_NEON;
38038 for (size_t k = 4; k <= 20; k += 2) {
38039 for (uint32_t n = 1; n <= 8; n++) {
38040 for (uint32_t m = 1; m <= 4; m++) {
38041 GemmMicrokernelTester()
38042 .mr(4)
38043 .nr(8)
38044 .kr(1)
38045 .sr(1)
38046 .m(m)
38047 .n(n)
38048 .k(k)
38049 .iterations(1)
38050 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
38051 }
38052 }
38053 }
38054 }
38055
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,n_gt_8)38056 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, n_gt_8) {
38057 TEST_REQUIRES_ARM_NEON;
38058 for (uint32_t n = 9; n < 16; n++) {
38059 for (size_t k = 1; k <= 10; k += 3) {
38060 GemmMicrokernelTester()
38061 .mr(4)
38062 .nr(8)
38063 .kr(1)
38064 .sr(1)
38065 .m(4)
38066 .n(n)
38067 .k(k)
38068 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
38069 }
38070 }
38071 }
38072
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,n_gt_8_strided_cn)38073 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, n_gt_8_strided_cn) {
38074 TEST_REQUIRES_ARM_NEON;
38075 for (uint32_t n = 9; n < 16; n++) {
38076 for (size_t k = 1; k <= 10; k += 3) {
38077 GemmMicrokernelTester()
38078 .mr(4)
38079 .nr(8)
38080 .kr(1)
38081 .sr(1)
38082 .m(4)
38083 .n(n)
38084 .k(k)
38085 .cn_stride(11)
38086 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
38087 }
38088 }
38089 }
38090
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,n_gt_8_subtile)38091 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, n_gt_8_subtile) {
38092 TEST_REQUIRES_ARM_NEON;
38093 for (uint32_t n = 9; n < 16; n++) {
38094 for (size_t k = 1; k <= 10; k += 3) {
38095 for (uint32_t m = 1; m <= 4; m++) {
38096 GemmMicrokernelTester()
38097 .mr(4)
38098 .nr(8)
38099 .kr(1)
38100 .sr(1)
38101 .m(m)
38102 .n(n)
38103 .k(k)
38104 .iterations(1)
38105 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
38106 }
38107 }
38108 }
38109 }
38110
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,n_div_8)38111 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, n_div_8) {
38112 TEST_REQUIRES_ARM_NEON;
38113 for (uint32_t n = 16; n <= 24; n += 8) {
38114 for (size_t k = 1; k <= 10; k += 3) {
38115 GemmMicrokernelTester()
38116 .mr(4)
38117 .nr(8)
38118 .kr(1)
38119 .sr(1)
38120 .m(4)
38121 .n(n)
38122 .k(k)
38123 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
38124 }
38125 }
38126 }
38127
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,n_div_8_strided_cn)38128 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, n_div_8_strided_cn) {
38129 TEST_REQUIRES_ARM_NEON;
38130 for (uint32_t n = 16; n <= 24; n += 8) {
38131 for (size_t k = 1; k <= 10; k += 3) {
38132 GemmMicrokernelTester()
38133 .mr(4)
38134 .nr(8)
38135 .kr(1)
38136 .sr(1)
38137 .m(4)
38138 .n(n)
38139 .k(k)
38140 .cn_stride(11)
38141 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
38142 }
38143 }
38144 }
38145
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,n_div_8_subtile)38146 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, n_div_8_subtile) {
38147 TEST_REQUIRES_ARM_NEON;
38148 for (uint32_t n = 16; n <= 24; n += 8) {
38149 for (size_t k = 1; k <= 10; k += 3) {
38150 for (uint32_t m = 1; m <= 4; m++) {
38151 GemmMicrokernelTester()
38152 .mr(4)
38153 .nr(8)
38154 .kr(1)
38155 .sr(1)
38156 .m(m)
38157 .n(n)
38158 .k(k)
38159 .iterations(1)
38160 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
38161 }
38162 }
38163 }
38164 }
38165
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,small_kernel)38166 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, small_kernel) {
38167 TEST_REQUIRES_ARM_NEON;
38168 for (size_t k = 1; k <= 10; k += 3) {
38169 GemmMicrokernelTester()
38170 .mr(4)
38171 .nr(8)
38172 .kr(1)
38173 .sr(1)
38174 .m(4)
38175 .n(8)
38176 .k(k)
38177 .ks(3)
38178 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
38179 }
38180 }
38181
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,small_kernel_subtile)38182 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, small_kernel_subtile) {
38183 TEST_REQUIRES_ARM_NEON;
38184 for (size_t k = 1; k <= 10; k += 3) {
38185 for (uint32_t n = 1; n <= 8; n++) {
38186 for (uint32_t m = 1; m <= 4; m++) {
38187 GemmMicrokernelTester()
38188 .mr(4)
38189 .nr(8)
38190 .kr(1)
38191 .sr(1)
38192 .m(m)
38193 .n(n)
38194 .k(k)
38195 .ks(3)
38196 .iterations(1)
38197 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
38198 }
38199 }
38200 }
38201 }
38202
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,n_gt_8_small_kernel)38203 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, n_gt_8_small_kernel) {
38204 TEST_REQUIRES_ARM_NEON;
38205 for (uint32_t n = 9; n < 16; n++) {
38206 for (size_t k = 1; k <= 10; k += 3) {
38207 GemmMicrokernelTester()
38208 .mr(4)
38209 .nr(8)
38210 .kr(1)
38211 .sr(1)
38212 .m(4)
38213 .n(n)
38214 .k(k)
38215 .ks(3)
38216 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
38217 }
38218 }
38219 }
38220
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,n_div_8_small_kernel)38221 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, n_div_8_small_kernel) {
38222 TEST_REQUIRES_ARM_NEON;
38223 for (uint32_t n = 16; n <= 24; n += 8) {
38224 for (size_t k = 1; k <= 10; k += 3) {
38225 GemmMicrokernelTester()
38226 .mr(4)
38227 .nr(8)
38228 .kr(1)
38229 .sr(1)
38230 .m(4)
38231 .n(n)
38232 .k(k)
38233 .ks(3)
38234 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
38235 }
38236 }
38237 }
38238
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,strided_cm_subtile)38239 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, strided_cm_subtile) {
38240 TEST_REQUIRES_ARM_NEON;
38241 for (size_t k = 1; k <= 10; k += 3) {
38242 for (uint32_t n = 1; n <= 8; n++) {
38243 for (uint32_t m = 1; m <= 4; m++) {
38244 GemmMicrokernelTester()
38245 .mr(4)
38246 .nr(8)
38247 .kr(1)
38248 .sr(1)
38249 .m(m)
38250 .n(n)
38251 .k(k)
38252 .cm_stride(11)
38253 .iterations(1)
38254 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
38255 }
38256 }
38257 }
38258 }
38259
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,a_offset)38260 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, a_offset) {
38261 TEST_REQUIRES_ARM_NEON;
38262 for (size_t k = 1; k <= 10; k += 3) {
38263 GemmMicrokernelTester()
38264 .mr(4)
38265 .nr(8)
38266 .kr(1)
38267 .sr(1)
38268 .m(4)
38269 .n(8)
38270 .k(k)
38271 .ks(3)
38272 .a_offset(43)
38273 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
38274 }
38275 }
38276
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,zero)38277 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, zero) {
38278 TEST_REQUIRES_ARM_NEON;
38279 for (size_t k = 1; k <= 10; k += 3) {
38280 for (uint32_t mz = 0; mz < 4; mz++) {
38281 GemmMicrokernelTester()
38282 .mr(4)
38283 .nr(8)
38284 .kr(1)
38285 .sr(1)
38286 .m(4)
38287 .n(8)
38288 .k(k)
38289 .ks(3)
38290 .a_offset(43)
38291 .zero_index(mz)
38292 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
38293 }
38294 }
38295 }
38296
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,qmin)38297 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, qmin) {
38298 TEST_REQUIRES_ARM_NEON;
38299 GemmMicrokernelTester()
38300 .mr(4)
38301 .nr(8)
38302 .kr(1)
38303 .sr(1)
38304 .m(4)
38305 .n(8)
38306 .k(2)
38307 .qmin(128)
38308 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
38309 }
38310
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,qmax)38311 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, qmax) {
38312 TEST_REQUIRES_ARM_NEON;
38313 GemmMicrokernelTester()
38314 .mr(4)
38315 .nr(8)
38316 .kr(1)
38317 .sr(1)
38318 .m(4)
38319 .n(8)
38320 .k(2)
38321 .qmax(128)
38322 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
38323 }
38324
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7,strided_cm)38325 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, strided_cm) {
38326 TEST_REQUIRES_ARM_NEON;
38327 GemmMicrokernelTester()
38328 .mr(4)
38329 .nr(8)
38330 .kr(1)
38331 .sr(1)
38332 .m(4)
38333 .n(8)
38334 .k(2)
38335 .cm_stride(11)
38336 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
38337 }
38338 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
38339
38340
38341 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,k_eq_4)38342 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4) {
38343 TEST_REQUIRES_ARM_NEON;
38344 GemmMicrokernelTester()
38345 .mr(4)
38346 .nr(8)
38347 .kr(1)
38348 .sr(1)
38349 .m(4)
38350 .n(8)
38351 .k(4)
38352 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38353 }
38354
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,strided_cn)38355 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, strided_cn) {
38356 TEST_REQUIRES_ARM_NEON;
38357 GemmMicrokernelTester()
38358 .mr(4)
38359 .nr(8)
38360 .kr(1)
38361 .sr(1)
38362 .m(4)
38363 .n(8)
38364 .k(4)
38365 .cn_stride(11)
38366 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38367 }
38368
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,k_eq_4_subtile)38369 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4_subtile) {
38370 TEST_REQUIRES_ARM_NEON;
38371 for (uint32_t n = 1; n <= 8; n++) {
38372 for (uint32_t m = 1; m <= 4; m++) {
38373 GemmMicrokernelTester()
38374 .mr(4)
38375 .nr(8)
38376 .kr(1)
38377 .sr(1)
38378 .m(m)
38379 .n(n)
38380 .k(4)
38381 .iterations(1)
38382 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38383 }
38384 }
38385 }
38386
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,k_eq_4_subtile_m)38387 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4_subtile_m) {
38388 TEST_REQUIRES_ARM_NEON;
38389 for (uint32_t m = 1; m <= 4; m++) {
38390 GemmMicrokernelTester()
38391 .mr(4)
38392 .nr(8)
38393 .kr(1)
38394 .sr(1)
38395 .m(m)
38396 .n(8)
38397 .k(4)
38398 .iterations(1)
38399 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38400 }
38401 }
38402
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,k_eq_4_subtile_n)38403 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4_subtile_n) {
38404 TEST_REQUIRES_ARM_NEON;
38405 for (uint32_t n = 1; n <= 8; n++) {
38406 GemmMicrokernelTester()
38407 .mr(4)
38408 .nr(8)
38409 .kr(1)
38410 .sr(1)
38411 .m(4)
38412 .n(n)
38413 .k(4)
38414 .iterations(1)
38415 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38416 }
38417 }
38418
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,k_eq_8)38419 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, k_eq_8) {
38420 TEST_REQUIRES_ARM_NEON;
38421 GemmMicrokernelTester()
38422 .mr(4)
38423 .nr(8)
38424 .kr(1)
38425 .sr(1)
38426 .m(4)
38427 .n(8)
38428 .k(8)
38429 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38430 }
38431
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,k_eq_8_subtile)38432 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, k_eq_8_subtile) {
38433 TEST_REQUIRES_ARM_NEON;
38434 for (uint32_t n = 1; n <= 8; n++) {
38435 for (uint32_t m = 1; m <= 4; m++) {
38436 GemmMicrokernelTester()
38437 .mr(4)
38438 .nr(8)
38439 .kr(1)
38440 .sr(1)
38441 .m(m)
38442 .n(n)
38443 .k(8)
38444 .iterations(1)
38445 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38446 }
38447 }
38448 }
38449
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,k_lt_8)38450 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, k_lt_8) {
38451 TEST_REQUIRES_ARM_NEON;
38452 for (size_t k = 1; k < 8; k++) {
38453 GemmMicrokernelTester()
38454 .mr(4)
38455 .nr(8)
38456 .kr(1)
38457 .sr(1)
38458 .m(4)
38459 .n(8)
38460 .k(k)
38461 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38462 }
38463 }
38464
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,k_lt_8_subtile)38465 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, k_lt_8_subtile) {
38466 TEST_REQUIRES_ARM_NEON;
38467 for (size_t k = 1; k < 8; k++) {
38468 for (uint32_t n = 1; n <= 8; n++) {
38469 for (uint32_t m = 1; m <= 4; m++) {
38470 GemmMicrokernelTester()
38471 .mr(4)
38472 .nr(8)
38473 .kr(1)
38474 .sr(1)
38475 .m(m)
38476 .n(n)
38477 .k(k)
38478 .iterations(1)
38479 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38480 }
38481 }
38482 }
38483 }
38484
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,k_gt_8)38485 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, k_gt_8) {
38486 TEST_REQUIRES_ARM_NEON;
38487 for (size_t k = 9; k < 16; k++) {
38488 GemmMicrokernelTester()
38489 .mr(4)
38490 .nr(8)
38491 .kr(1)
38492 .sr(1)
38493 .m(4)
38494 .n(8)
38495 .k(k)
38496 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38497 }
38498 }
38499
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,k_gt_8_subtile)38500 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, k_gt_8_subtile) {
38501 TEST_REQUIRES_ARM_NEON;
38502 for (size_t k = 9; k < 16; k++) {
38503 for (uint32_t n = 1; n <= 8; n++) {
38504 for (uint32_t m = 1; m <= 4; m++) {
38505 GemmMicrokernelTester()
38506 .mr(4)
38507 .nr(8)
38508 .kr(1)
38509 .sr(1)
38510 .m(m)
38511 .n(n)
38512 .k(k)
38513 .iterations(1)
38514 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38515 }
38516 }
38517 }
38518 }
38519
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,k_div_4)38520 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, k_div_4) {
38521 TEST_REQUIRES_ARM_NEON;
38522 for (size_t k = 12; k <= 40; k += 4) {
38523 GemmMicrokernelTester()
38524 .mr(4)
38525 .nr(8)
38526 .kr(1)
38527 .sr(1)
38528 .m(4)
38529 .n(8)
38530 .k(k)
38531 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38532 }
38533 }
38534
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,k_div_4_subtile)38535 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, k_div_4_subtile) {
38536 TEST_REQUIRES_ARM_NEON;
38537 for (size_t k = 12; k <= 40; k += 4) {
38538 for (uint32_t n = 1; n <= 8; n++) {
38539 for (uint32_t m = 1; m <= 4; m++) {
38540 GemmMicrokernelTester()
38541 .mr(4)
38542 .nr(8)
38543 .kr(1)
38544 .sr(1)
38545 .m(m)
38546 .n(n)
38547 .k(k)
38548 .iterations(1)
38549 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38550 }
38551 }
38552 }
38553 }
38554
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,n_gt_8)38555 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, n_gt_8) {
38556 TEST_REQUIRES_ARM_NEON;
38557 for (uint32_t n = 9; n < 16; n++) {
38558 for (size_t k = 1; k <= 20; k += 5) {
38559 GemmMicrokernelTester()
38560 .mr(4)
38561 .nr(8)
38562 .kr(1)
38563 .sr(1)
38564 .m(4)
38565 .n(n)
38566 .k(k)
38567 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38568 }
38569 }
38570 }
38571
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,n_gt_8_strided_cn)38572 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, n_gt_8_strided_cn) {
38573 TEST_REQUIRES_ARM_NEON;
38574 for (uint32_t n = 9; n < 16; n++) {
38575 for (size_t k = 1; k <= 20; k += 5) {
38576 GemmMicrokernelTester()
38577 .mr(4)
38578 .nr(8)
38579 .kr(1)
38580 .sr(1)
38581 .m(4)
38582 .n(n)
38583 .k(k)
38584 .cn_stride(11)
38585 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38586 }
38587 }
38588 }
38589
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,n_gt_8_subtile)38590 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, n_gt_8_subtile) {
38591 TEST_REQUIRES_ARM_NEON;
38592 for (uint32_t n = 9; n < 16; n++) {
38593 for (size_t k = 1; k <= 20; k += 5) {
38594 for (uint32_t m = 1; m <= 4; m++) {
38595 GemmMicrokernelTester()
38596 .mr(4)
38597 .nr(8)
38598 .kr(1)
38599 .sr(1)
38600 .m(m)
38601 .n(n)
38602 .k(k)
38603 .iterations(1)
38604 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38605 }
38606 }
38607 }
38608 }
38609
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,n_div_8)38610 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, n_div_8) {
38611 TEST_REQUIRES_ARM_NEON;
38612 for (uint32_t n = 16; n <= 24; n += 8) {
38613 for (size_t k = 1; k <= 20; k += 5) {
38614 GemmMicrokernelTester()
38615 .mr(4)
38616 .nr(8)
38617 .kr(1)
38618 .sr(1)
38619 .m(4)
38620 .n(n)
38621 .k(k)
38622 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38623 }
38624 }
38625 }
38626
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,n_div_8_strided_cn)38627 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, n_div_8_strided_cn) {
38628 TEST_REQUIRES_ARM_NEON;
38629 for (uint32_t n = 16; n <= 24; n += 8) {
38630 for (size_t k = 1; k <= 20; k += 5) {
38631 GemmMicrokernelTester()
38632 .mr(4)
38633 .nr(8)
38634 .kr(1)
38635 .sr(1)
38636 .m(4)
38637 .n(n)
38638 .k(k)
38639 .cn_stride(11)
38640 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38641 }
38642 }
38643 }
38644
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,n_div_8_subtile)38645 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, n_div_8_subtile) {
38646 TEST_REQUIRES_ARM_NEON;
38647 for (uint32_t n = 16; n <= 24; n += 8) {
38648 for (size_t k = 1; k <= 20; k += 5) {
38649 for (uint32_t m = 1; m <= 4; m++) {
38650 GemmMicrokernelTester()
38651 .mr(4)
38652 .nr(8)
38653 .kr(1)
38654 .sr(1)
38655 .m(m)
38656 .n(n)
38657 .k(k)
38658 .iterations(1)
38659 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38660 }
38661 }
38662 }
38663 }
38664
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,small_kernel)38665 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, small_kernel) {
38666 TEST_REQUIRES_ARM_NEON;
38667 for (size_t k = 1; k <= 20; k += 5) {
38668 GemmMicrokernelTester()
38669 .mr(4)
38670 .nr(8)
38671 .kr(1)
38672 .sr(1)
38673 .m(4)
38674 .n(8)
38675 .k(k)
38676 .ks(3)
38677 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38678 }
38679 }
38680
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,small_kernel_subtile)38681 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, small_kernel_subtile) {
38682 TEST_REQUIRES_ARM_NEON;
38683 for (size_t k = 1; k <= 20; k += 5) {
38684 for (uint32_t n = 1; n <= 8; n++) {
38685 for (uint32_t m = 1; m <= 4; m++) {
38686 GemmMicrokernelTester()
38687 .mr(4)
38688 .nr(8)
38689 .kr(1)
38690 .sr(1)
38691 .m(m)
38692 .n(n)
38693 .k(k)
38694 .ks(3)
38695 .iterations(1)
38696 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38697 }
38698 }
38699 }
38700 }
38701
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,n_gt_8_small_kernel)38702 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, n_gt_8_small_kernel) {
38703 TEST_REQUIRES_ARM_NEON;
38704 for (uint32_t n = 9; n < 16; n++) {
38705 for (size_t k = 1; k <= 20; k += 5) {
38706 GemmMicrokernelTester()
38707 .mr(4)
38708 .nr(8)
38709 .kr(1)
38710 .sr(1)
38711 .m(4)
38712 .n(n)
38713 .k(k)
38714 .ks(3)
38715 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38716 }
38717 }
38718 }
38719
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,n_div_8_small_kernel)38720 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, n_div_8_small_kernel) {
38721 TEST_REQUIRES_ARM_NEON;
38722 for (uint32_t n = 16; n <= 24; n += 8) {
38723 for (size_t k = 1; k <= 20; k += 5) {
38724 GemmMicrokernelTester()
38725 .mr(4)
38726 .nr(8)
38727 .kr(1)
38728 .sr(1)
38729 .m(4)
38730 .n(n)
38731 .k(k)
38732 .ks(3)
38733 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38734 }
38735 }
38736 }
38737
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,strided_cm_subtile)38738 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, strided_cm_subtile) {
38739 TEST_REQUIRES_ARM_NEON;
38740 for (size_t k = 1; k <= 20; k += 5) {
38741 for (uint32_t n = 1; n <= 8; n++) {
38742 for (uint32_t m = 1; m <= 4; m++) {
38743 GemmMicrokernelTester()
38744 .mr(4)
38745 .nr(8)
38746 .kr(1)
38747 .sr(1)
38748 .m(m)
38749 .n(n)
38750 .k(k)
38751 .cm_stride(11)
38752 .iterations(1)
38753 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38754 }
38755 }
38756 }
38757 }
38758
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,a_offset)38759 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, a_offset) {
38760 TEST_REQUIRES_ARM_NEON;
38761 for (size_t k = 1; k <= 20; k += 5) {
38762 GemmMicrokernelTester()
38763 .mr(4)
38764 .nr(8)
38765 .kr(1)
38766 .sr(1)
38767 .m(4)
38768 .n(8)
38769 .k(k)
38770 .ks(3)
38771 .a_offset(83)
38772 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38773 }
38774 }
38775
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,zero)38776 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, zero) {
38777 TEST_REQUIRES_ARM_NEON;
38778 for (size_t k = 1; k <= 20; k += 5) {
38779 for (uint32_t mz = 0; mz < 4; mz++) {
38780 GemmMicrokernelTester()
38781 .mr(4)
38782 .nr(8)
38783 .kr(1)
38784 .sr(1)
38785 .m(4)
38786 .n(8)
38787 .k(k)
38788 .ks(3)
38789 .a_offset(83)
38790 .zero_index(mz)
38791 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38792 }
38793 }
38794 }
38795
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,qmin)38796 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, qmin) {
38797 TEST_REQUIRES_ARM_NEON;
38798 GemmMicrokernelTester()
38799 .mr(4)
38800 .nr(8)
38801 .kr(1)
38802 .sr(1)
38803 .m(4)
38804 .n(8)
38805 .k(4)
38806 .qmin(128)
38807 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38808 }
38809
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,qmax)38810 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, qmax) {
38811 TEST_REQUIRES_ARM_NEON;
38812 GemmMicrokernelTester()
38813 .mr(4)
38814 .nr(8)
38815 .kr(1)
38816 .sr(1)
38817 .m(4)
38818 .n(8)
38819 .k(4)
38820 .qmax(128)
38821 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38822 }
38823
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55,strided_cm)38824 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, strided_cm) {
38825 TEST_REQUIRES_ARM_NEON;
38826 GemmMicrokernelTester()
38827 .mr(4)
38828 .nr(8)
38829 .kr(1)
38830 .sr(1)
38831 .m(4)
38832 .n(8)
38833 .k(4)
38834 .cm_stride(11)
38835 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
38836 }
38837 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
38838
38839
38840 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,k_eq_4)38841 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4) {
38842 TEST_REQUIRES_ARM_NEON;
38843 GemmMicrokernelTester()
38844 .mr(4)
38845 .nr(8)
38846 .kr(1)
38847 .sr(1)
38848 .m(4)
38849 .n(8)
38850 .k(4)
38851 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
38852 }
38853
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,strided_cn)38854 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, strided_cn) {
38855 TEST_REQUIRES_ARM_NEON;
38856 GemmMicrokernelTester()
38857 .mr(4)
38858 .nr(8)
38859 .kr(1)
38860 .sr(1)
38861 .m(4)
38862 .n(8)
38863 .k(4)
38864 .cn_stride(11)
38865 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
38866 }
38867
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,k_eq_4_subtile)38868 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4_subtile) {
38869 TEST_REQUIRES_ARM_NEON;
38870 for (uint32_t n = 1; n <= 8; n++) {
38871 for (uint32_t m = 1; m <= 4; m++) {
38872 GemmMicrokernelTester()
38873 .mr(4)
38874 .nr(8)
38875 .kr(1)
38876 .sr(1)
38877 .m(m)
38878 .n(n)
38879 .k(4)
38880 .iterations(1)
38881 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
38882 }
38883 }
38884 }
38885
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,k_eq_4_subtile_m)38886 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4_subtile_m) {
38887 TEST_REQUIRES_ARM_NEON;
38888 for (uint32_t m = 1; m <= 4; m++) {
38889 GemmMicrokernelTester()
38890 .mr(4)
38891 .nr(8)
38892 .kr(1)
38893 .sr(1)
38894 .m(m)
38895 .n(8)
38896 .k(4)
38897 .iterations(1)
38898 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
38899 }
38900 }
38901
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,k_eq_4_subtile_n)38902 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4_subtile_n) {
38903 TEST_REQUIRES_ARM_NEON;
38904 for (uint32_t n = 1; n <= 8; n++) {
38905 GemmMicrokernelTester()
38906 .mr(4)
38907 .nr(8)
38908 .kr(1)
38909 .sr(1)
38910 .m(4)
38911 .n(n)
38912 .k(4)
38913 .iterations(1)
38914 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
38915 }
38916 }
38917
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,k_eq_8)38918 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, k_eq_8) {
38919 TEST_REQUIRES_ARM_NEON;
38920 GemmMicrokernelTester()
38921 .mr(4)
38922 .nr(8)
38923 .kr(1)
38924 .sr(1)
38925 .m(4)
38926 .n(8)
38927 .k(8)
38928 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
38929 }
38930
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,k_eq_8_subtile)38931 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, k_eq_8_subtile) {
38932 TEST_REQUIRES_ARM_NEON;
38933 for (uint32_t n = 1; n <= 8; n++) {
38934 for (uint32_t m = 1; m <= 4; m++) {
38935 GemmMicrokernelTester()
38936 .mr(4)
38937 .nr(8)
38938 .kr(1)
38939 .sr(1)
38940 .m(m)
38941 .n(n)
38942 .k(8)
38943 .iterations(1)
38944 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
38945 }
38946 }
38947 }
38948
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,k_lt_8)38949 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, k_lt_8) {
38950 TEST_REQUIRES_ARM_NEON;
38951 for (size_t k = 1; k < 8; k++) {
38952 GemmMicrokernelTester()
38953 .mr(4)
38954 .nr(8)
38955 .kr(1)
38956 .sr(1)
38957 .m(4)
38958 .n(8)
38959 .k(k)
38960 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
38961 }
38962 }
38963
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,k_lt_8_subtile)38964 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, k_lt_8_subtile) {
38965 TEST_REQUIRES_ARM_NEON;
38966 for (size_t k = 1; k < 8; k++) {
38967 for (uint32_t n = 1; n <= 8; n++) {
38968 for (uint32_t m = 1; m <= 4; m++) {
38969 GemmMicrokernelTester()
38970 .mr(4)
38971 .nr(8)
38972 .kr(1)
38973 .sr(1)
38974 .m(m)
38975 .n(n)
38976 .k(k)
38977 .iterations(1)
38978 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
38979 }
38980 }
38981 }
38982 }
38983
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,k_gt_8)38984 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, k_gt_8) {
38985 TEST_REQUIRES_ARM_NEON;
38986 for (size_t k = 9; k < 16; k++) {
38987 GemmMicrokernelTester()
38988 .mr(4)
38989 .nr(8)
38990 .kr(1)
38991 .sr(1)
38992 .m(4)
38993 .n(8)
38994 .k(k)
38995 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
38996 }
38997 }
38998
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,k_gt_8_subtile)38999 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, k_gt_8_subtile) {
39000 TEST_REQUIRES_ARM_NEON;
39001 for (size_t k = 9; k < 16; k++) {
39002 for (uint32_t n = 1; n <= 8; n++) {
39003 for (uint32_t m = 1; m <= 4; m++) {
39004 GemmMicrokernelTester()
39005 .mr(4)
39006 .nr(8)
39007 .kr(1)
39008 .sr(1)
39009 .m(m)
39010 .n(n)
39011 .k(k)
39012 .iterations(1)
39013 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
39014 }
39015 }
39016 }
39017 }
39018
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,k_div_4)39019 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, k_div_4) {
39020 TEST_REQUIRES_ARM_NEON;
39021 for (size_t k = 12; k <= 40; k += 4) {
39022 GemmMicrokernelTester()
39023 .mr(4)
39024 .nr(8)
39025 .kr(1)
39026 .sr(1)
39027 .m(4)
39028 .n(8)
39029 .k(k)
39030 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
39031 }
39032 }
39033
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,k_div_4_subtile)39034 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, k_div_4_subtile) {
39035 TEST_REQUIRES_ARM_NEON;
39036 for (size_t k = 12; k <= 40; k += 4) {
39037 for (uint32_t n = 1; n <= 8; n++) {
39038 for (uint32_t m = 1; m <= 4; m++) {
39039 GemmMicrokernelTester()
39040 .mr(4)
39041 .nr(8)
39042 .kr(1)
39043 .sr(1)
39044 .m(m)
39045 .n(n)
39046 .k(k)
39047 .iterations(1)
39048 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
39049 }
39050 }
39051 }
39052 }
39053
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,n_gt_8)39054 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, n_gt_8) {
39055 TEST_REQUIRES_ARM_NEON;
39056 for (uint32_t n = 9; n < 16; n++) {
39057 for (size_t k = 1; k <= 20; k += 5) {
39058 GemmMicrokernelTester()
39059 .mr(4)
39060 .nr(8)
39061 .kr(1)
39062 .sr(1)
39063 .m(4)
39064 .n(n)
39065 .k(k)
39066 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
39067 }
39068 }
39069 }
39070
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,n_gt_8_strided_cn)39071 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, n_gt_8_strided_cn) {
39072 TEST_REQUIRES_ARM_NEON;
39073 for (uint32_t n = 9; n < 16; n++) {
39074 for (size_t k = 1; k <= 20; k += 5) {
39075 GemmMicrokernelTester()
39076 .mr(4)
39077 .nr(8)
39078 .kr(1)
39079 .sr(1)
39080 .m(4)
39081 .n(n)
39082 .k(k)
39083 .cn_stride(11)
39084 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
39085 }
39086 }
39087 }
39088
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,n_gt_8_subtile)39089 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, n_gt_8_subtile) {
39090 TEST_REQUIRES_ARM_NEON;
39091 for (uint32_t n = 9; n < 16; n++) {
39092 for (size_t k = 1; k <= 20; k += 5) {
39093 for (uint32_t m = 1; m <= 4; m++) {
39094 GemmMicrokernelTester()
39095 .mr(4)
39096 .nr(8)
39097 .kr(1)
39098 .sr(1)
39099 .m(m)
39100 .n(n)
39101 .k(k)
39102 .iterations(1)
39103 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
39104 }
39105 }
39106 }
39107 }
39108
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,n_div_8)39109 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, n_div_8) {
39110 TEST_REQUIRES_ARM_NEON;
39111 for (uint32_t n = 16; n <= 24; n += 8) {
39112 for (size_t k = 1; k <= 20; k += 5) {
39113 GemmMicrokernelTester()
39114 .mr(4)
39115 .nr(8)
39116 .kr(1)
39117 .sr(1)
39118 .m(4)
39119 .n(n)
39120 .k(k)
39121 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
39122 }
39123 }
39124 }
39125
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,n_div_8_strided_cn)39126 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, n_div_8_strided_cn) {
39127 TEST_REQUIRES_ARM_NEON;
39128 for (uint32_t n = 16; n <= 24; n += 8) {
39129 for (size_t k = 1; k <= 20; k += 5) {
39130 GemmMicrokernelTester()
39131 .mr(4)
39132 .nr(8)
39133 .kr(1)
39134 .sr(1)
39135 .m(4)
39136 .n(n)
39137 .k(k)
39138 .cn_stride(11)
39139 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
39140 }
39141 }
39142 }
39143
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,n_div_8_subtile)39144 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, n_div_8_subtile) {
39145 TEST_REQUIRES_ARM_NEON;
39146 for (uint32_t n = 16; n <= 24; n += 8) {
39147 for (size_t k = 1; k <= 20; k += 5) {
39148 for (uint32_t m = 1; m <= 4; m++) {
39149 GemmMicrokernelTester()
39150 .mr(4)
39151 .nr(8)
39152 .kr(1)
39153 .sr(1)
39154 .m(m)
39155 .n(n)
39156 .k(k)
39157 .iterations(1)
39158 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
39159 }
39160 }
39161 }
39162 }
39163
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,small_kernel)39164 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, small_kernel) {
39165 TEST_REQUIRES_ARM_NEON;
39166 for (size_t k = 1; k <= 20; k += 5) {
39167 GemmMicrokernelTester()
39168 .mr(4)
39169 .nr(8)
39170 .kr(1)
39171 .sr(1)
39172 .m(4)
39173 .n(8)
39174 .k(k)
39175 .ks(3)
39176 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
39177 }
39178 }
39179
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,small_kernel_subtile)39180 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, small_kernel_subtile) {
39181 TEST_REQUIRES_ARM_NEON;
39182 for (size_t k = 1; k <= 20; k += 5) {
39183 for (uint32_t n = 1; n <= 8; n++) {
39184 for (uint32_t m = 1; m <= 4; m++) {
39185 GemmMicrokernelTester()
39186 .mr(4)
39187 .nr(8)
39188 .kr(1)
39189 .sr(1)
39190 .m(m)
39191 .n(n)
39192 .k(k)
39193 .ks(3)
39194 .iterations(1)
39195 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
39196 }
39197 }
39198 }
39199 }
39200
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,n_gt_8_small_kernel)39201 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, n_gt_8_small_kernel) {
39202 TEST_REQUIRES_ARM_NEON;
39203 for (uint32_t n = 9; n < 16; n++) {
39204 for (size_t k = 1; k <= 20; k += 5) {
39205 GemmMicrokernelTester()
39206 .mr(4)
39207 .nr(8)
39208 .kr(1)
39209 .sr(1)
39210 .m(4)
39211 .n(n)
39212 .k(k)
39213 .ks(3)
39214 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
39215 }
39216 }
39217 }
39218
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,n_div_8_small_kernel)39219 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, n_div_8_small_kernel) {
39220 TEST_REQUIRES_ARM_NEON;
39221 for (uint32_t n = 16; n <= 24; n += 8) {
39222 for (size_t k = 1; k <= 20; k += 5) {
39223 GemmMicrokernelTester()
39224 .mr(4)
39225 .nr(8)
39226 .kr(1)
39227 .sr(1)
39228 .m(4)
39229 .n(n)
39230 .k(k)
39231 .ks(3)
39232 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
39233 }
39234 }
39235 }
39236
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,strided_cm_subtile)39237 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, strided_cm_subtile) {
39238 TEST_REQUIRES_ARM_NEON;
39239 for (size_t k = 1; k <= 20; k += 5) {
39240 for (uint32_t n = 1; n <= 8; n++) {
39241 for (uint32_t m = 1; m <= 4; m++) {
39242 GemmMicrokernelTester()
39243 .mr(4)
39244 .nr(8)
39245 .kr(1)
39246 .sr(1)
39247 .m(m)
39248 .n(n)
39249 .k(k)
39250 .cm_stride(11)
39251 .iterations(1)
39252 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
39253 }
39254 }
39255 }
39256 }
39257
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,a_offset)39258 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, a_offset) {
39259 TEST_REQUIRES_ARM_NEON;
39260 for (size_t k = 1; k <= 20; k += 5) {
39261 GemmMicrokernelTester()
39262 .mr(4)
39263 .nr(8)
39264 .kr(1)
39265 .sr(1)
39266 .m(4)
39267 .n(8)
39268 .k(k)
39269 .ks(3)
39270 .a_offset(83)
39271 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
39272 }
39273 }
39274
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,zero)39275 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, zero) {
39276 TEST_REQUIRES_ARM_NEON;
39277 for (size_t k = 1; k <= 20; k += 5) {
39278 for (uint32_t mz = 0; mz < 4; mz++) {
39279 GemmMicrokernelTester()
39280 .mr(4)
39281 .nr(8)
39282 .kr(1)
39283 .sr(1)
39284 .m(4)
39285 .n(8)
39286 .k(k)
39287 .ks(3)
39288 .a_offset(83)
39289 .zero_index(mz)
39290 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
39291 }
39292 }
39293 }
39294
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,qmin)39295 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, qmin) {
39296 TEST_REQUIRES_ARM_NEON;
39297 GemmMicrokernelTester()
39298 .mr(4)
39299 .nr(8)
39300 .kr(1)
39301 .sr(1)
39302 .m(4)
39303 .n(8)
39304 .k(4)
39305 .qmin(128)
39306 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
39307 }
39308
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,qmax)39309 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, qmax) {
39310 TEST_REQUIRES_ARM_NEON;
39311 GemmMicrokernelTester()
39312 .mr(4)
39313 .nr(8)
39314 .kr(1)
39315 .sr(1)
39316 .m(4)
39317 .n(8)
39318 .k(4)
39319 .qmax(128)
39320 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
39321 }
39322
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75,strided_cm)39323 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, strided_cm) {
39324 TEST_REQUIRES_ARM_NEON;
39325 GemmMicrokernelTester()
39326 .mr(4)
39327 .nr(8)
39328 .kr(1)
39329 .sr(1)
39330 .m(4)
39331 .n(8)
39332 .k(4)
39333 .cm_stride(11)
39334 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
39335 }
39336 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
39337
39338
39339 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,k_eq_2)39340 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, k_eq_2) {
39341 TEST_REQUIRES_ARM_NEON;
39342 GemmMicrokernelTester()
39343 .mr(4)
39344 .nr(8)
39345 .kr(1)
39346 .sr(1)
39347 .m(4)
39348 .n(8)
39349 .k(2)
39350 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39351 }
39352
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,strided_cn)39353 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, strided_cn) {
39354 TEST_REQUIRES_ARM_NEON;
39355 GemmMicrokernelTester()
39356 .mr(4)
39357 .nr(8)
39358 .kr(1)
39359 .sr(1)
39360 .m(4)
39361 .n(8)
39362 .k(2)
39363 .cn_stride(11)
39364 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39365 }
39366
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,k_eq_2_subtile)39367 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, k_eq_2_subtile) {
39368 TEST_REQUIRES_ARM_NEON;
39369 for (uint32_t n = 1; n <= 8; n++) {
39370 for (uint32_t m = 1; m <= 4; m++) {
39371 GemmMicrokernelTester()
39372 .mr(4)
39373 .nr(8)
39374 .kr(1)
39375 .sr(1)
39376 .m(m)
39377 .n(n)
39378 .k(2)
39379 .iterations(1)
39380 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39381 }
39382 }
39383 }
39384
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,k_eq_2_subtile_m)39385 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, k_eq_2_subtile_m) {
39386 TEST_REQUIRES_ARM_NEON;
39387 for (uint32_t m = 1; m <= 4; m++) {
39388 GemmMicrokernelTester()
39389 .mr(4)
39390 .nr(8)
39391 .kr(1)
39392 .sr(1)
39393 .m(m)
39394 .n(8)
39395 .k(2)
39396 .iterations(1)
39397 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39398 }
39399 }
39400
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,k_eq_2_subtile_n)39401 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, k_eq_2_subtile_n) {
39402 TEST_REQUIRES_ARM_NEON;
39403 for (uint32_t n = 1; n <= 8; n++) {
39404 GemmMicrokernelTester()
39405 .mr(4)
39406 .nr(8)
39407 .kr(1)
39408 .sr(1)
39409 .m(4)
39410 .n(n)
39411 .k(2)
39412 .iterations(1)
39413 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39414 }
39415 }
39416
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,k_lt_2)39417 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, k_lt_2) {
39418 TEST_REQUIRES_ARM_NEON;
39419 for (size_t k = 1; k < 2; k++) {
39420 GemmMicrokernelTester()
39421 .mr(4)
39422 .nr(8)
39423 .kr(1)
39424 .sr(1)
39425 .m(4)
39426 .n(8)
39427 .k(k)
39428 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39429 }
39430 }
39431
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,k_lt_2_subtile)39432 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, k_lt_2_subtile) {
39433 TEST_REQUIRES_ARM_NEON;
39434 for (size_t k = 1; k < 2; k++) {
39435 for (uint32_t n = 1; n <= 8; n++) {
39436 for (uint32_t m = 1; m <= 4; m++) {
39437 GemmMicrokernelTester()
39438 .mr(4)
39439 .nr(8)
39440 .kr(1)
39441 .sr(1)
39442 .m(m)
39443 .n(n)
39444 .k(k)
39445 .iterations(1)
39446 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39447 }
39448 }
39449 }
39450 }
39451
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,k_gt_2)39452 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, k_gt_2) {
39453 TEST_REQUIRES_ARM_NEON;
39454 for (size_t k = 3; k < 4; k++) {
39455 GemmMicrokernelTester()
39456 .mr(4)
39457 .nr(8)
39458 .kr(1)
39459 .sr(1)
39460 .m(4)
39461 .n(8)
39462 .k(k)
39463 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39464 }
39465 }
39466
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,k_gt_2_subtile)39467 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, k_gt_2_subtile) {
39468 TEST_REQUIRES_ARM_NEON;
39469 for (size_t k = 3; k < 4; k++) {
39470 for (uint32_t n = 1; n <= 8; n++) {
39471 for (uint32_t m = 1; m <= 4; m++) {
39472 GemmMicrokernelTester()
39473 .mr(4)
39474 .nr(8)
39475 .kr(1)
39476 .sr(1)
39477 .m(m)
39478 .n(n)
39479 .k(k)
39480 .iterations(1)
39481 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39482 }
39483 }
39484 }
39485 }
39486
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,k_div_2)39487 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, k_div_2) {
39488 TEST_REQUIRES_ARM_NEON;
39489 for (size_t k = 4; k <= 20; k += 2) {
39490 GemmMicrokernelTester()
39491 .mr(4)
39492 .nr(8)
39493 .kr(1)
39494 .sr(1)
39495 .m(4)
39496 .n(8)
39497 .k(k)
39498 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39499 }
39500 }
39501
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,k_div_2_subtile)39502 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, k_div_2_subtile) {
39503 TEST_REQUIRES_ARM_NEON;
39504 for (size_t k = 4; k <= 20; k += 2) {
39505 for (uint32_t n = 1; n <= 8; n++) {
39506 for (uint32_t m = 1; m <= 4; m++) {
39507 GemmMicrokernelTester()
39508 .mr(4)
39509 .nr(8)
39510 .kr(1)
39511 .sr(1)
39512 .m(m)
39513 .n(n)
39514 .k(k)
39515 .iterations(1)
39516 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39517 }
39518 }
39519 }
39520 }
39521
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,n_gt_8)39522 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, n_gt_8) {
39523 TEST_REQUIRES_ARM_NEON;
39524 for (uint32_t n = 9; n < 16; n++) {
39525 for (size_t k = 1; k <= 10; k += 3) {
39526 GemmMicrokernelTester()
39527 .mr(4)
39528 .nr(8)
39529 .kr(1)
39530 .sr(1)
39531 .m(4)
39532 .n(n)
39533 .k(k)
39534 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39535 }
39536 }
39537 }
39538
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,n_gt_8_strided_cn)39539 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, n_gt_8_strided_cn) {
39540 TEST_REQUIRES_ARM_NEON;
39541 for (uint32_t n = 9; n < 16; n++) {
39542 for (size_t k = 1; k <= 10; k += 3) {
39543 GemmMicrokernelTester()
39544 .mr(4)
39545 .nr(8)
39546 .kr(1)
39547 .sr(1)
39548 .m(4)
39549 .n(n)
39550 .k(k)
39551 .cn_stride(11)
39552 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39553 }
39554 }
39555 }
39556
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,n_gt_8_subtile)39557 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, n_gt_8_subtile) {
39558 TEST_REQUIRES_ARM_NEON;
39559 for (uint32_t n = 9; n < 16; n++) {
39560 for (size_t k = 1; k <= 10; k += 3) {
39561 for (uint32_t m = 1; m <= 4; m++) {
39562 GemmMicrokernelTester()
39563 .mr(4)
39564 .nr(8)
39565 .kr(1)
39566 .sr(1)
39567 .m(m)
39568 .n(n)
39569 .k(k)
39570 .iterations(1)
39571 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39572 }
39573 }
39574 }
39575 }
39576
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,n_div_8)39577 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, n_div_8) {
39578 TEST_REQUIRES_ARM_NEON;
39579 for (uint32_t n = 16; n <= 24; n += 8) {
39580 for (size_t k = 1; k <= 10; k += 3) {
39581 GemmMicrokernelTester()
39582 .mr(4)
39583 .nr(8)
39584 .kr(1)
39585 .sr(1)
39586 .m(4)
39587 .n(n)
39588 .k(k)
39589 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39590 }
39591 }
39592 }
39593
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,n_div_8_strided_cn)39594 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, n_div_8_strided_cn) {
39595 TEST_REQUIRES_ARM_NEON;
39596 for (uint32_t n = 16; n <= 24; n += 8) {
39597 for (size_t k = 1; k <= 10; k += 3) {
39598 GemmMicrokernelTester()
39599 .mr(4)
39600 .nr(8)
39601 .kr(1)
39602 .sr(1)
39603 .m(4)
39604 .n(n)
39605 .k(k)
39606 .cn_stride(11)
39607 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39608 }
39609 }
39610 }
39611
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,n_div_8_subtile)39612 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, n_div_8_subtile) {
39613 TEST_REQUIRES_ARM_NEON;
39614 for (uint32_t n = 16; n <= 24; n += 8) {
39615 for (size_t k = 1; k <= 10; k += 3) {
39616 for (uint32_t m = 1; m <= 4; m++) {
39617 GemmMicrokernelTester()
39618 .mr(4)
39619 .nr(8)
39620 .kr(1)
39621 .sr(1)
39622 .m(m)
39623 .n(n)
39624 .k(k)
39625 .iterations(1)
39626 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39627 }
39628 }
39629 }
39630 }
39631
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,small_kernel)39632 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, small_kernel) {
39633 TEST_REQUIRES_ARM_NEON;
39634 for (size_t k = 1; k <= 10; k += 3) {
39635 GemmMicrokernelTester()
39636 .mr(4)
39637 .nr(8)
39638 .kr(1)
39639 .sr(1)
39640 .m(4)
39641 .n(8)
39642 .k(k)
39643 .ks(3)
39644 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39645 }
39646 }
39647
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,small_kernel_subtile)39648 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, small_kernel_subtile) {
39649 TEST_REQUIRES_ARM_NEON;
39650 for (size_t k = 1; k <= 10; k += 3) {
39651 for (uint32_t n = 1; n <= 8; n++) {
39652 for (uint32_t m = 1; m <= 4; m++) {
39653 GemmMicrokernelTester()
39654 .mr(4)
39655 .nr(8)
39656 .kr(1)
39657 .sr(1)
39658 .m(m)
39659 .n(n)
39660 .k(k)
39661 .ks(3)
39662 .iterations(1)
39663 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39664 }
39665 }
39666 }
39667 }
39668
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,n_gt_8_small_kernel)39669 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, n_gt_8_small_kernel) {
39670 TEST_REQUIRES_ARM_NEON;
39671 for (uint32_t n = 9; n < 16; n++) {
39672 for (size_t k = 1; k <= 10; k += 3) {
39673 GemmMicrokernelTester()
39674 .mr(4)
39675 .nr(8)
39676 .kr(1)
39677 .sr(1)
39678 .m(4)
39679 .n(n)
39680 .k(k)
39681 .ks(3)
39682 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39683 }
39684 }
39685 }
39686
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,n_div_8_small_kernel)39687 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, n_div_8_small_kernel) {
39688 TEST_REQUIRES_ARM_NEON;
39689 for (uint32_t n = 16; n <= 24; n += 8) {
39690 for (size_t k = 1; k <= 10; k += 3) {
39691 GemmMicrokernelTester()
39692 .mr(4)
39693 .nr(8)
39694 .kr(1)
39695 .sr(1)
39696 .m(4)
39697 .n(n)
39698 .k(k)
39699 .ks(3)
39700 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39701 }
39702 }
39703 }
39704
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,strided_cm_subtile)39705 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, strided_cm_subtile) {
39706 TEST_REQUIRES_ARM_NEON;
39707 for (size_t k = 1; k <= 10; k += 3) {
39708 for (uint32_t n = 1; n <= 8; n++) {
39709 for (uint32_t m = 1; m <= 4; m++) {
39710 GemmMicrokernelTester()
39711 .mr(4)
39712 .nr(8)
39713 .kr(1)
39714 .sr(1)
39715 .m(m)
39716 .n(n)
39717 .k(k)
39718 .cm_stride(11)
39719 .iterations(1)
39720 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39721 }
39722 }
39723 }
39724 }
39725
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,a_offset)39726 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, a_offset) {
39727 TEST_REQUIRES_ARM_NEON;
39728 for (size_t k = 1; k <= 10; k += 3) {
39729 GemmMicrokernelTester()
39730 .mr(4)
39731 .nr(8)
39732 .kr(1)
39733 .sr(1)
39734 .m(4)
39735 .n(8)
39736 .k(k)
39737 .ks(3)
39738 .a_offset(43)
39739 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39740 }
39741 }
39742
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,zero)39743 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, zero) {
39744 TEST_REQUIRES_ARM_NEON;
39745 for (size_t k = 1; k <= 10; k += 3) {
39746 for (uint32_t mz = 0; mz < 4; mz++) {
39747 GemmMicrokernelTester()
39748 .mr(4)
39749 .nr(8)
39750 .kr(1)
39751 .sr(1)
39752 .m(4)
39753 .n(8)
39754 .k(k)
39755 .ks(3)
39756 .a_offset(43)
39757 .zero_index(mz)
39758 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39759 }
39760 }
39761 }
39762
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,qmin)39763 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, qmin) {
39764 TEST_REQUIRES_ARM_NEON;
39765 GemmMicrokernelTester()
39766 .mr(4)
39767 .nr(8)
39768 .kr(1)
39769 .sr(1)
39770 .m(4)
39771 .n(8)
39772 .k(2)
39773 .qmin(128)
39774 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39775 }
39776
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,qmax)39777 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, qmax) {
39778 TEST_REQUIRES_ARM_NEON;
39779 GemmMicrokernelTester()
39780 .mr(4)
39781 .nr(8)
39782 .kr(1)
39783 .sr(1)
39784 .m(4)
39785 .n(8)
39786 .k(2)
39787 .qmax(128)
39788 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39789 }
39790
TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64,strided_cm)39791 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, strided_cm) {
39792 TEST_REQUIRES_ARM_NEON;
39793 GemmMicrokernelTester()
39794 .mr(4)
39795 .nr(8)
39796 .kr(1)
39797 .sr(1)
39798 .m(4)
39799 .n(8)
39800 .k(2)
39801 .cm_stride(11)
39802 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
39803 }
39804 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
39805
39806
39807 #if XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8)39808 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
39809 TEST_REQUIRES_ARM_NEON_FMA;
39810 GemmMicrokernelTester()
39811 .mr(6)
39812 .nr(8)
39813 .kr(1)
39814 .sr(1)
39815 .m(6)
39816 .n(8)
39817 .k(8)
39818 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
39819 }
39820
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,strided_cn)39821 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, strided_cn) {
39822 TEST_REQUIRES_ARM_NEON_FMA;
39823 GemmMicrokernelTester()
39824 .mr(6)
39825 .nr(8)
39826 .kr(1)
39827 .sr(1)
39828 .m(6)
39829 .n(8)
39830 .k(8)
39831 .cn_stride(11)
39832 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
39833 }
39834
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile)39835 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile) {
39836 TEST_REQUIRES_ARM_NEON_FMA;
39837 for (uint32_t n = 1; n <= 8; n++) {
39838 for (uint32_t m = 1; m <= 6; m++) {
39839 GemmMicrokernelTester()
39840 .mr(6)
39841 .nr(8)
39842 .kr(1)
39843 .sr(1)
39844 .m(m)
39845 .n(n)
39846 .k(8)
39847 .iterations(1)
39848 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
39849 }
39850 }
39851 }
39852
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile_m)39853 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_m) {
39854 TEST_REQUIRES_ARM_NEON_FMA;
39855 for (uint32_t m = 1; m <= 6; m++) {
39856 GemmMicrokernelTester()
39857 .mr(6)
39858 .nr(8)
39859 .kr(1)
39860 .sr(1)
39861 .m(m)
39862 .n(8)
39863 .k(8)
39864 .iterations(1)
39865 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
39866 }
39867 }
39868
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile_n)39869 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_n) {
39870 TEST_REQUIRES_ARM_NEON_FMA;
39871 for (uint32_t n = 1; n <= 8; n++) {
39872 GemmMicrokernelTester()
39873 .mr(6)
39874 .nr(8)
39875 .kr(1)
39876 .sr(1)
39877 .m(6)
39878 .n(n)
39879 .k(8)
39880 .iterations(1)
39881 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
39882 }
39883 }
39884
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_16)39885 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16) {
39886 TEST_REQUIRES_ARM_NEON_FMA;
39887 GemmMicrokernelTester()
39888 .mr(6)
39889 .nr(8)
39890 .kr(1)
39891 .sr(1)
39892 .m(6)
39893 .n(8)
39894 .k(16)
39895 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
39896 }
39897
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_16_subtile)39898 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_subtile) {
39899 TEST_REQUIRES_ARM_NEON_FMA;
39900 for (uint32_t n = 1; n <= 8; n++) {
39901 for (uint32_t m = 1; m <= 6; m++) {
39902 GemmMicrokernelTester()
39903 .mr(6)
39904 .nr(8)
39905 .kr(1)
39906 .sr(1)
39907 .m(m)
39908 .n(n)
39909 .k(16)
39910 .iterations(1)
39911 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
39912 }
39913 }
39914 }
39915
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,k_lt_16)39916 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16) {
39917 TEST_REQUIRES_ARM_NEON_FMA;
39918 for (size_t k = 1; k < 16; k++) {
39919 GemmMicrokernelTester()
39920 .mr(6)
39921 .nr(8)
39922 .kr(1)
39923 .sr(1)
39924 .m(6)
39925 .n(8)
39926 .k(k)
39927 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
39928 }
39929 }
39930
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,k_lt_16_subtile)39931 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_subtile) {
39932 TEST_REQUIRES_ARM_NEON_FMA;
39933 for (size_t k = 1; k < 16; k++) {
39934 for (uint32_t n = 1; n <= 8; n++) {
39935 for (uint32_t m = 1; m <= 6; m++) {
39936 GemmMicrokernelTester()
39937 .mr(6)
39938 .nr(8)
39939 .kr(1)
39940 .sr(1)
39941 .m(m)
39942 .n(n)
39943 .k(k)
39944 .iterations(1)
39945 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
39946 }
39947 }
39948 }
39949 }
39950
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,k_gt_16)39951 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16) {
39952 TEST_REQUIRES_ARM_NEON_FMA;
39953 for (size_t k = 17; k < 32; k++) {
39954 GemmMicrokernelTester()
39955 .mr(6)
39956 .nr(8)
39957 .kr(1)
39958 .sr(1)
39959 .m(6)
39960 .n(8)
39961 .k(k)
39962 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
39963 }
39964 }
39965
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,k_gt_16_subtile)39966 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_subtile) {
39967 TEST_REQUIRES_ARM_NEON_FMA;
39968 for (size_t k = 17; k < 32; k++) {
39969 for (uint32_t n = 1; n <= 8; n++) {
39970 for (uint32_t m = 1; m <= 6; m++) {
39971 GemmMicrokernelTester()
39972 .mr(6)
39973 .nr(8)
39974 .kr(1)
39975 .sr(1)
39976 .m(m)
39977 .n(n)
39978 .k(k)
39979 .iterations(1)
39980 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
39981 }
39982 }
39983 }
39984 }
39985
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,k_div_8)39986 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8) {
39987 TEST_REQUIRES_ARM_NEON_FMA;
39988 for (size_t k = 24; k <= 80; k += 8) {
39989 GemmMicrokernelTester()
39990 .mr(6)
39991 .nr(8)
39992 .kr(1)
39993 .sr(1)
39994 .m(6)
39995 .n(8)
39996 .k(k)
39997 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
39998 }
39999 }
40000
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,k_div_8_subtile)40001 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_subtile) {
40002 TEST_REQUIRES_ARM_NEON_FMA;
40003 for (size_t k = 24; k <= 80; k += 8) {
40004 for (uint32_t n = 1; n <= 8; n++) {
40005 for (uint32_t m = 1; m <= 6; m++) {
40006 GemmMicrokernelTester()
40007 .mr(6)
40008 .nr(8)
40009 .kr(1)
40010 .sr(1)
40011 .m(m)
40012 .n(n)
40013 .k(k)
40014 .iterations(1)
40015 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40016 }
40017 }
40018 }
40019 }
40020
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8)40021 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8) {
40022 TEST_REQUIRES_ARM_NEON_FMA;
40023 for (uint32_t n = 9; n < 16; n++) {
40024 for (size_t k = 1; k <= 40; k += 9) {
40025 GemmMicrokernelTester()
40026 .mr(6)
40027 .nr(8)
40028 .kr(1)
40029 .sr(1)
40030 .m(6)
40031 .n(n)
40032 .k(k)
40033 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40034 }
40035 }
40036 }
40037
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_strided_cn)40038 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_cn) {
40039 TEST_REQUIRES_ARM_NEON_FMA;
40040 for (uint32_t n = 9; n < 16; n++) {
40041 for (size_t k = 1; k <= 40; k += 9) {
40042 GemmMicrokernelTester()
40043 .mr(6)
40044 .nr(8)
40045 .kr(1)
40046 .sr(1)
40047 .m(6)
40048 .n(n)
40049 .k(k)
40050 .cn_stride(11)
40051 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40052 }
40053 }
40054 }
40055
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_subtile)40056 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_subtile) {
40057 TEST_REQUIRES_ARM_NEON_FMA;
40058 for (uint32_t n = 9; n < 16; n++) {
40059 for (size_t k = 1; k <= 40; k += 9) {
40060 for (uint32_t m = 1; m <= 6; m++) {
40061 GemmMicrokernelTester()
40062 .mr(6)
40063 .nr(8)
40064 .kr(1)
40065 .sr(1)
40066 .m(m)
40067 .n(n)
40068 .k(k)
40069 .iterations(1)
40070 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40071 }
40072 }
40073 }
40074 }
40075
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8)40076 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8) {
40077 TEST_REQUIRES_ARM_NEON_FMA;
40078 for (uint32_t n = 16; n <= 24; n += 8) {
40079 for (size_t k = 1; k <= 40; k += 9) {
40080 GemmMicrokernelTester()
40081 .mr(6)
40082 .nr(8)
40083 .kr(1)
40084 .sr(1)
40085 .m(6)
40086 .n(n)
40087 .k(k)
40088 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40089 }
40090 }
40091 }
40092
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_strided_cn)40093 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_cn) {
40094 TEST_REQUIRES_ARM_NEON_FMA;
40095 for (uint32_t n = 16; n <= 24; n += 8) {
40096 for (size_t k = 1; k <= 40; k += 9) {
40097 GemmMicrokernelTester()
40098 .mr(6)
40099 .nr(8)
40100 .kr(1)
40101 .sr(1)
40102 .m(6)
40103 .n(n)
40104 .k(k)
40105 .cn_stride(11)
40106 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40107 }
40108 }
40109 }
40110
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_subtile)40111 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_subtile) {
40112 TEST_REQUIRES_ARM_NEON_FMA;
40113 for (uint32_t n = 16; n <= 24; n += 8) {
40114 for (size_t k = 1; k <= 40; k += 9) {
40115 for (uint32_t m = 1; m <= 6; m++) {
40116 GemmMicrokernelTester()
40117 .mr(6)
40118 .nr(8)
40119 .kr(1)
40120 .sr(1)
40121 .m(m)
40122 .n(n)
40123 .k(k)
40124 .iterations(1)
40125 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40126 }
40127 }
40128 }
40129 }
40130
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,small_kernel)40131 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel) {
40132 TEST_REQUIRES_ARM_NEON_FMA;
40133 for (size_t k = 1; k <= 40; k += 9) {
40134 GemmMicrokernelTester()
40135 .mr(6)
40136 .nr(8)
40137 .kr(1)
40138 .sr(1)
40139 .m(6)
40140 .n(8)
40141 .k(k)
40142 .ks(3)
40143 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40144 }
40145 }
40146
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,small_kernel_subtile)40147 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel_subtile) {
40148 TEST_REQUIRES_ARM_NEON_FMA;
40149 for (size_t k = 1; k <= 40; k += 9) {
40150 for (uint32_t n = 1; n <= 8; n++) {
40151 for (uint32_t m = 1; m <= 6; m++) {
40152 GemmMicrokernelTester()
40153 .mr(6)
40154 .nr(8)
40155 .kr(1)
40156 .sr(1)
40157 .m(m)
40158 .n(n)
40159 .k(k)
40160 .ks(3)
40161 .iterations(1)
40162 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40163 }
40164 }
40165 }
40166 }
40167
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_small_kernel)40168 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_small_kernel) {
40169 TEST_REQUIRES_ARM_NEON_FMA;
40170 for (uint32_t n = 9; n < 16; n++) {
40171 for (size_t k = 1; k <= 40; k += 9) {
40172 GemmMicrokernelTester()
40173 .mr(6)
40174 .nr(8)
40175 .kr(1)
40176 .sr(1)
40177 .m(6)
40178 .n(n)
40179 .k(k)
40180 .ks(3)
40181 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40182 }
40183 }
40184 }
40185
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_small_kernel)40186 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_small_kernel) {
40187 TEST_REQUIRES_ARM_NEON_FMA;
40188 for (uint32_t n = 16; n <= 24; n += 8) {
40189 for (size_t k = 1; k <= 40; k += 9) {
40190 GemmMicrokernelTester()
40191 .mr(6)
40192 .nr(8)
40193 .kr(1)
40194 .sr(1)
40195 .m(6)
40196 .n(n)
40197 .k(k)
40198 .ks(3)
40199 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40200 }
40201 }
40202 }
40203
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,strided_cm_subtile)40204 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm_subtile) {
40205 TEST_REQUIRES_ARM_NEON_FMA;
40206 for (size_t k = 1; k <= 40; k += 9) {
40207 for (uint32_t n = 1; n <= 8; n++) {
40208 for (uint32_t m = 1; m <= 6; m++) {
40209 GemmMicrokernelTester()
40210 .mr(6)
40211 .nr(8)
40212 .kr(1)
40213 .sr(1)
40214 .m(m)
40215 .n(n)
40216 .k(k)
40217 .cm_stride(11)
40218 .iterations(1)
40219 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40220 }
40221 }
40222 }
40223 }
40224
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,a_offset)40225 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, a_offset) {
40226 TEST_REQUIRES_ARM_NEON_FMA;
40227 for (size_t k = 1; k <= 40; k += 9) {
40228 GemmMicrokernelTester()
40229 .mr(6)
40230 .nr(8)
40231 .kr(1)
40232 .sr(1)
40233 .m(6)
40234 .n(8)
40235 .k(k)
40236 .ks(3)
40237 .a_offset(251)
40238 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40239 }
40240 }
40241
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,zero)40242 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, zero) {
40243 TEST_REQUIRES_ARM_NEON_FMA;
40244 for (size_t k = 1; k <= 40; k += 9) {
40245 for (uint32_t mz = 0; mz < 6; mz++) {
40246 GemmMicrokernelTester()
40247 .mr(6)
40248 .nr(8)
40249 .kr(1)
40250 .sr(1)
40251 .m(6)
40252 .n(8)
40253 .k(k)
40254 .ks(3)
40255 .a_offset(251)
40256 .zero_index(mz)
40257 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40258 }
40259 }
40260 }
40261
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,qmin)40262 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, qmin) {
40263 TEST_REQUIRES_ARM_NEON_FMA;
40264 GemmMicrokernelTester()
40265 .mr(6)
40266 .nr(8)
40267 .kr(1)
40268 .sr(1)
40269 .m(6)
40270 .n(8)
40271 .k(8)
40272 .qmin(128)
40273 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40274 }
40275
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,qmax)40276 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, qmax) {
40277 TEST_REQUIRES_ARM_NEON_FMA;
40278 GemmMicrokernelTester()
40279 .mr(6)
40280 .nr(8)
40281 .kr(1)
40282 .sr(1)
40283 .m(6)
40284 .n(8)
40285 .k(8)
40286 .qmax(128)
40287 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40288 }
40289
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,strided_cm)40290 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm) {
40291 TEST_REQUIRES_ARM_NEON_FMA;
40292 GemmMicrokernelTester()
40293 .mr(6)
40294 .nr(8)
40295 .kr(1)
40296 .sr(1)
40297 .m(6)
40298 .n(8)
40299 .k(8)
40300 .cm_stride(11)
40301 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40302 }
40303
TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile_m_upto_mr)40304 TEST(GENERATE_F32_IGEMM_UPTO6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_m_upto_mr) {
40305 TEST_REQUIRES_ARM_NEON_FMA;
40306 for (uint32_t max_mr = 1; max_mr <= 6; max_mr++) {
40307 for (uint32_t m = 1; m <= max_mr; m++) {
40308 GemmMicrokernelTester()
40309 .mr(max_mr)
40310 .nr(8)
40311 .kr(1)
40312 .sr(1)
40313 .m(m)
40314 .n(8)
40315 .k(8)
40316 .iterations(1)
40317 .Test(xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40318 }
40319 }
40320 }
40321 #endif // XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
40322
40323
40324 #if XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8)40325 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
40326 TEST_REQUIRES_ARM_NEON_FMA;
40327 GemmMicrokernelTester()
40328 .mr(4)
40329 .nr(8)
40330 .kr(1)
40331 .sr(1)
40332 .m(4)
40333 .n(8)
40334 .k(8)
40335 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40336 }
40337
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,strided_cn)40338 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, strided_cn) {
40339 TEST_REQUIRES_ARM_NEON_FMA;
40340 GemmMicrokernelTester()
40341 .mr(4)
40342 .nr(8)
40343 .kr(1)
40344 .sr(1)
40345 .m(4)
40346 .n(8)
40347 .k(8)
40348 .cn_stride(11)
40349 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40350 }
40351
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile)40352 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile) {
40353 TEST_REQUIRES_ARM_NEON_FMA;
40354 for (uint32_t n = 1; n <= 8; n++) {
40355 for (uint32_t m = 1; m <= 4; m++) {
40356 GemmMicrokernelTester()
40357 .mr(4)
40358 .nr(8)
40359 .kr(1)
40360 .sr(1)
40361 .m(m)
40362 .n(n)
40363 .k(8)
40364 .iterations(1)
40365 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40366 }
40367 }
40368 }
40369
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile_m)40370 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_m) {
40371 TEST_REQUIRES_ARM_NEON_FMA;
40372 for (uint32_t m = 1; m <= 4; m++) {
40373 GemmMicrokernelTester()
40374 .mr(4)
40375 .nr(8)
40376 .kr(1)
40377 .sr(1)
40378 .m(m)
40379 .n(8)
40380 .k(8)
40381 .iterations(1)
40382 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40383 }
40384 }
40385
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_8_subtile_n)40386 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_n) {
40387 TEST_REQUIRES_ARM_NEON_FMA;
40388 for (uint32_t n = 1; n <= 8; n++) {
40389 GemmMicrokernelTester()
40390 .mr(4)
40391 .nr(8)
40392 .kr(1)
40393 .sr(1)
40394 .m(4)
40395 .n(n)
40396 .k(8)
40397 .iterations(1)
40398 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40399 }
40400 }
40401
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_16)40402 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16) {
40403 TEST_REQUIRES_ARM_NEON_FMA;
40404 GemmMicrokernelTester()
40405 .mr(4)
40406 .nr(8)
40407 .kr(1)
40408 .sr(1)
40409 .m(4)
40410 .n(8)
40411 .k(16)
40412 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40413 }
40414
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,k_eq_16_subtile)40415 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_subtile) {
40416 TEST_REQUIRES_ARM_NEON_FMA;
40417 for (uint32_t n = 1; n <= 8; n++) {
40418 for (uint32_t m = 1; m <= 4; m++) {
40419 GemmMicrokernelTester()
40420 .mr(4)
40421 .nr(8)
40422 .kr(1)
40423 .sr(1)
40424 .m(m)
40425 .n(n)
40426 .k(16)
40427 .iterations(1)
40428 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40429 }
40430 }
40431 }
40432
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,k_lt_16)40433 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16) {
40434 TEST_REQUIRES_ARM_NEON_FMA;
40435 for (size_t k = 1; k < 16; k++) {
40436 GemmMicrokernelTester()
40437 .mr(4)
40438 .nr(8)
40439 .kr(1)
40440 .sr(1)
40441 .m(4)
40442 .n(8)
40443 .k(k)
40444 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40445 }
40446 }
40447
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,k_lt_16_subtile)40448 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_subtile) {
40449 TEST_REQUIRES_ARM_NEON_FMA;
40450 for (size_t k = 1; k < 16; k++) {
40451 for (uint32_t n = 1; n <= 8; n++) {
40452 for (uint32_t m = 1; m <= 4; m++) {
40453 GemmMicrokernelTester()
40454 .mr(4)
40455 .nr(8)
40456 .kr(1)
40457 .sr(1)
40458 .m(m)
40459 .n(n)
40460 .k(k)
40461 .iterations(1)
40462 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40463 }
40464 }
40465 }
40466 }
40467
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,k_gt_16)40468 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16) {
40469 TEST_REQUIRES_ARM_NEON_FMA;
40470 for (size_t k = 17; k < 32; k++) {
40471 GemmMicrokernelTester()
40472 .mr(4)
40473 .nr(8)
40474 .kr(1)
40475 .sr(1)
40476 .m(4)
40477 .n(8)
40478 .k(k)
40479 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40480 }
40481 }
40482
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,k_gt_16_subtile)40483 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_subtile) {
40484 TEST_REQUIRES_ARM_NEON_FMA;
40485 for (size_t k = 17; k < 32; k++) {
40486 for (uint32_t n = 1; n <= 8; n++) {
40487 for (uint32_t m = 1; m <= 4; m++) {
40488 GemmMicrokernelTester()
40489 .mr(4)
40490 .nr(8)
40491 .kr(1)
40492 .sr(1)
40493 .m(m)
40494 .n(n)
40495 .k(k)
40496 .iterations(1)
40497 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40498 }
40499 }
40500 }
40501 }
40502
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,k_div_8)40503 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8) {
40504 TEST_REQUIRES_ARM_NEON_FMA;
40505 for (size_t k = 24; k <= 80; k += 8) {
40506 GemmMicrokernelTester()
40507 .mr(4)
40508 .nr(8)
40509 .kr(1)
40510 .sr(1)
40511 .m(4)
40512 .n(8)
40513 .k(k)
40514 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40515 }
40516 }
40517
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,k_div_8_subtile)40518 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_subtile) {
40519 TEST_REQUIRES_ARM_NEON_FMA;
40520 for (size_t k = 24; k <= 80; k += 8) {
40521 for (uint32_t n = 1; n <= 8; n++) {
40522 for (uint32_t m = 1; m <= 4; m++) {
40523 GemmMicrokernelTester()
40524 .mr(4)
40525 .nr(8)
40526 .kr(1)
40527 .sr(1)
40528 .m(m)
40529 .n(n)
40530 .k(k)
40531 .iterations(1)
40532 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40533 }
40534 }
40535 }
40536 }
40537
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8)40538 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8) {
40539 TEST_REQUIRES_ARM_NEON_FMA;
40540 for (uint32_t n = 9; n < 16; n++) {
40541 for (size_t k = 1; k <= 40; k += 9) {
40542 GemmMicrokernelTester()
40543 .mr(4)
40544 .nr(8)
40545 .kr(1)
40546 .sr(1)
40547 .m(4)
40548 .n(n)
40549 .k(k)
40550 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40551 }
40552 }
40553 }
40554
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_strided_cn)40555 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_cn) {
40556 TEST_REQUIRES_ARM_NEON_FMA;
40557 for (uint32_t n = 9; n < 16; n++) {
40558 for (size_t k = 1; k <= 40; k += 9) {
40559 GemmMicrokernelTester()
40560 .mr(4)
40561 .nr(8)
40562 .kr(1)
40563 .sr(1)
40564 .m(4)
40565 .n(n)
40566 .k(k)
40567 .cn_stride(11)
40568 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40569 }
40570 }
40571 }
40572
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_subtile)40573 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_subtile) {
40574 TEST_REQUIRES_ARM_NEON_FMA;
40575 for (uint32_t n = 9; n < 16; n++) {
40576 for (size_t k = 1; k <= 40; k += 9) {
40577 for (uint32_t m = 1; m <= 4; m++) {
40578 GemmMicrokernelTester()
40579 .mr(4)
40580 .nr(8)
40581 .kr(1)
40582 .sr(1)
40583 .m(m)
40584 .n(n)
40585 .k(k)
40586 .iterations(1)
40587 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40588 }
40589 }
40590 }
40591 }
40592
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8)40593 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8) {
40594 TEST_REQUIRES_ARM_NEON_FMA;
40595 for (uint32_t n = 16; n <= 24; n += 8) {
40596 for (size_t k = 1; k <= 40; k += 9) {
40597 GemmMicrokernelTester()
40598 .mr(4)
40599 .nr(8)
40600 .kr(1)
40601 .sr(1)
40602 .m(4)
40603 .n(n)
40604 .k(k)
40605 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40606 }
40607 }
40608 }
40609
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_strided_cn)40610 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_cn) {
40611 TEST_REQUIRES_ARM_NEON_FMA;
40612 for (uint32_t n = 16; n <= 24; n += 8) {
40613 for (size_t k = 1; k <= 40; k += 9) {
40614 GemmMicrokernelTester()
40615 .mr(4)
40616 .nr(8)
40617 .kr(1)
40618 .sr(1)
40619 .m(4)
40620 .n(n)
40621 .k(k)
40622 .cn_stride(11)
40623 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40624 }
40625 }
40626 }
40627
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_subtile)40628 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_subtile) {
40629 TEST_REQUIRES_ARM_NEON_FMA;
40630 for (uint32_t n = 16; n <= 24; n += 8) {
40631 for (size_t k = 1; k <= 40; k += 9) {
40632 for (uint32_t m = 1; m <= 4; m++) {
40633 GemmMicrokernelTester()
40634 .mr(4)
40635 .nr(8)
40636 .kr(1)
40637 .sr(1)
40638 .m(m)
40639 .n(n)
40640 .k(k)
40641 .iterations(1)
40642 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40643 }
40644 }
40645 }
40646 }
40647
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,small_kernel)40648 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel) {
40649 TEST_REQUIRES_ARM_NEON_FMA;
40650 for (size_t k = 1; k <= 40; k += 9) {
40651 GemmMicrokernelTester()
40652 .mr(4)
40653 .nr(8)
40654 .kr(1)
40655 .sr(1)
40656 .m(4)
40657 .n(8)
40658 .k(k)
40659 .ks(3)
40660 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40661 }
40662 }
40663
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,small_kernel_subtile)40664 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel_subtile) {
40665 TEST_REQUIRES_ARM_NEON_FMA;
40666 for (size_t k = 1; k <= 40; k += 9) {
40667 for (uint32_t n = 1; n <= 8; n++) {
40668 for (uint32_t m = 1; m <= 4; m++) {
40669 GemmMicrokernelTester()
40670 .mr(4)
40671 .nr(8)
40672 .kr(1)
40673 .sr(1)
40674 .m(m)
40675 .n(n)
40676 .k(k)
40677 .ks(3)
40678 .iterations(1)
40679 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40680 }
40681 }
40682 }
40683 }
40684
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,n_gt_8_small_kernel)40685 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_small_kernel) {
40686 TEST_REQUIRES_ARM_NEON_FMA;
40687 for (uint32_t n = 9; n < 16; n++) {
40688 for (size_t k = 1; k <= 40; k += 9) {
40689 GemmMicrokernelTester()
40690 .mr(4)
40691 .nr(8)
40692 .kr(1)
40693 .sr(1)
40694 .m(4)
40695 .n(n)
40696 .k(k)
40697 .ks(3)
40698 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40699 }
40700 }
40701 }
40702
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,n_div_8_small_kernel)40703 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_small_kernel) {
40704 TEST_REQUIRES_ARM_NEON_FMA;
40705 for (uint32_t n = 16; n <= 24; n += 8) {
40706 for (size_t k = 1; k <= 40; k += 9) {
40707 GemmMicrokernelTester()
40708 .mr(4)
40709 .nr(8)
40710 .kr(1)
40711 .sr(1)
40712 .m(4)
40713 .n(n)
40714 .k(k)
40715 .ks(3)
40716 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40717 }
40718 }
40719 }
40720
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,strided_cm_subtile)40721 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm_subtile) {
40722 TEST_REQUIRES_ARM_NEON_FMA;
40723 for (size_t k = 1; k <= 40; k += 9) {
40724 for (uint32_t n = 1; n <= 8; n++) {
40725 for (uint32_t m = 1; m <= 4; m++) {
40726 GemmMicrokernelTester()
40727 .mr(4)
40728 .nr(8)
40729 .kr(1)
40730 .sr(1)
40731 .m(m)
40732 .n(n)
40733 .k(k)
40734 .cm_stride(11)
40735 .iterations(1)
40736 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40737 }
40738 }
40739 }
40740 }
40741
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,a_offset)40742 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, a_offset) {
40743 TEST_REQUIRES_ARM_NEON_FMA;
40744 for (size_t k = 1; k <= 40; k += 9) {
40745 GemmMicrokernelTester()
40746 .mr(4)
40747 .nr(8)
40748 .kr(1)
40749 .sr(1)
40750 .m(4)
40751 .n(8)
40752 .k(k)
40753 .ks(3)
40754 .a_offset(163)
40755 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40756 }
40757 }
40758
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,zero)40759 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, zero) {
40760 TEST_REQUIRES_ARM_NEON_FMA;
40761 for (size_t k = 1; k <= 40; k += 9) {
40762 for (uint32_t mz = 0; mz < 4; mz++) {
40763 GemmMicrokernelTester()
40764 .mr(4)
40765 .nr(8)
40766 .kr(1)
40767 .sr(1)
40768 .m(4)
40769 .n(8)
40770 .k(k)
40771 .ks(3)
40772 .a_offset(163)
40773 .zero_index(mz)
40774 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40775 }
40776 }
40777 }
40778
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,qmin)40779 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, qmin) {
40780 TEST_REQUIRES_ARM_NEON_FMA;
40781 GemmMicrokernelTester()
40782 .mr(4)
40783 .nr(8)
40784 .kr(1)
40785 .sr(1)
40786 .m(4)
40787 .n(8)
40788 .k(8)
40789 .qmin(128)
40790 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40791 }
40792
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,qmax)40793 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, qmax) {
40794 TEST_REQUIRES_ARM_NEON_FMA;
40795 GemmMicrokernelTester()
40796 .mr(4)
40797 .nr(8)
40798 .kr(1)
40799 .sr(1)
40800 .m(4)
40801 .n(8)
40802 .k(8)
40803 .qmax(128)
40804 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40805 }
40806
TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75,strided_cm)40807 TEST(GENERATE_F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm) {
40808 TEST_REQUIRES_ARM_NEON_FMA;
40809 GemmMicrokernelTester()
40810 .mr(4)
40811 .nr(8)
40812 .kr(1)
40813 .sr(1)
40814 .m(4)
40815 .n(8)
40816 .k(8)
40817 .cm_stride(11)
40818 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
40819 }
40820 #endif // XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
40821