1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/f32-igemm.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25
26
27 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT,k_eq_1)28 TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT, k_eq_1) {
29 GemmMicrokernelTester()
30 .mr(3)
31 .nr(8)
32 .kr(1)
33 .sr(1)
34 .m(3)
35 .n(8)
36 .k(1)
37 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_loadsplat);
38 }
39
TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT,strided_cn)40 TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT, strided_cn) {
41 GemmMicrokernelTester()
42 .mr(3)
43 .nr(8)
44 .kr(1)
45 .sr(1)
46 .m(3)
47 .n(8)
48 .k(1)
49 .cn_stride(11)
50 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_loadsplat);
51 }
52
TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile)53 TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile) {
54 for (uint32_t n = 1; n <= 8; n++) {
55 for (uint32_t m = 1; m <= 3; m++) {
56 GemmMicrokernelTester()
57 .mr(3)
58 .nr(8)
59 .kr(1)
60 .sr(1)
61 .m(m)
62 .n(n)
63 .k(1)
64 .iterations(1)
65 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_loadsplat);
66 }
67 }
68 }
69
TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_m)70 TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_m) {
71 for (uint32_t m = 1; m <= 3; m++) {
72 GemmMicrokernelTester()
73 .mr(3)
74 .nr(8)
75 .kr(1)
76 .sr(1)
77 .m(m)
78 .n(8)
79 .k(1)
80 .iterations(1)
81 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_loadsplat);
82 }
83 }
84
TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_n)85 TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_n) {
86 for (uint32_t n = 1; n <= 8; n++) {
87 GemmMicrokernelTester()
88 .mr(3)
89 .nr(8)
90 .kr(1)
91 .sr(1)
92 .m(3)
93 .n(n)
94 .k(1)
95 .iterations(1)
96 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_loadsplat);
97 }
98 }
99
TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT,k_gt_1)100 TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT, k_gt_1) {
101 for (size_t k = 2; k < 10; k++) {
102 GemmMicrokernelTester()
103 .mr(3)
104 .nr(8)
105 .kr(1)
106 .sr(1)
107 .m(3)
108 .n(8)
109 .k(k)
110 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_loadsplat);
111 }
112 }
113
TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT,k_gt_1_subtile)114 TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT, k_gt_1_subtile) {
115 for (size_t k = 2; k < 10; k++) {
116 for (uint32_t n = 1; n <= 8; n++) {
117 for (uint32_t m = 1; m <= 3; m++) {
118 GemmMicrokernelTester()
119 .mr(3)
120 .nr(8)
121 .kr(1)
122 .sr(1)
123 .m(m)
124 .n(n)
125 .k(k)
126 .iterations(1)
127 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_loadsplat);
128 }
129 }
130 }
131 }
132
TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT,n_gt_8)133 TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT, n_gt_8) {
134 for (uint32_t n = 9; n < 16; n++) {
135 for (size_t k = 1; k <= 5; k += 2) {
136 GemmMicrokernelTester()
137 .mr(3)
138 .nr(8)
139 .kr(1)
140 .sr(1)
141 .m(3)
142 .n(n)
143 .k(k)
144 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_loadsplat);
145 }
146 }
147 }
148
TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_cn)149 TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_cn) {
150 for (uint32_t n = 9; n < 16; n++) {
151 for (size_t k = 1; k <= 5; k += 2) {
152 GemmMicrokernelTester()
153 .mr(3)
154 .nr(8)
155 .kr(1)
156 .sr(1)
157 .m(3)
158 .n(n)
159 .k(k)
160 .cn_stride(11)
161 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_loadsplat);
162 }
163 }
164 }
165
TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT,n_gt_8_subtile)166 TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT, n_gt_8_subtile) {
167 for (uint32_t n = 9; n < 16; n++) {
168 for (size_t k = 1; k <= 5; k += 2) {
169 for (uint32_t m = 1; m <= 3; m++) {
170 GemmMicrokernelTester()
171 .mr(3)
172 .nr(8)
173 .kr(1)
174 .sr(1)
175 .m(m)
176 .n(n)
177 .k(k)
178 .iterations(1)
179 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_loadsplat);
180 }
181 }
182 }
183 }
184
TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT,n_div_8)185 TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT, n_div_8) {
186 for (uint32_t n = 16; n <= 24; n += 8) {
187 for (size_t k = 1; k <= 5; k += 2) {
188 GemmMicrokernelTester()
189 .mr(3)
190 .nr(8)
191 .kr(1)
192 .sr(1)
193 .m(3)
194 .n(n)
195 .k(k)
196 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_loadsplat);
197 }
198 }
199 }
200
TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT,n_div_8_strided_cn)201 TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT, n_div_8_strided_cn) {
202 for (uint32_t n = 16; n <= 24; n += 8) {
203 for (size_t k = 1; k <= 5; k += 2) {
204 GemmMicrokernelTester()
205 .mr(3)
206 .nr(8)
207 .kr(1)
208 .sr(1)
209 .m(3)
210 .n(n)
211 .k(k)
212 .cn_stride(11)
213 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_loadsplat);
214 }
215 }
216 }
217
TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT,n_div_8_subtile)218 TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT, n_div_8_subtile) {
219 for (uint32_t n = 16; n <= 24; n += 8) {
220 for (size_t k = 1; k <= 5; k += 2) {
221 for (uint32_t m = 1; m <= 3; m++) {
222 GemmMicrokernelTester()
223 .mr(3)
224 .nr(8)
225 .kr(1)
226 .sr(1)
227 .m(m)
228 .n(n)
229 .k(k)
230 .iterations(1)
231 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_loadsplat);
232 }
233 }
234 }
235 }
236
TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT,small_kernel)237 TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT, small_kernel) {
238 for (size_t k = 1; k <= 5; k += 2) {
239 GemmMicrokernelTester()
240 .mr(3)
241 .nr(8)
242 .kr(1)
243 .sr(1)
244 .m(3)
245 .n(8)
246 .k(k)
247 .ks(3)
248 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_loadsplat);
249 }
250 }
251
TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT,small_kernel_subtile)252 TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT, small_kernel_subtile) {
253 for (size_t k = 1; k <= 5; k += 2) {
254 for (uint32_t n = 1; n <= 8; n++) {
255 for (uint32_t m = 1; m <= 3; m++) {
256 GemmMicrokernelTester()
257 .mr(3)
258 .nr(8)
259 .kr(1)
260 .sr(1)
261 .m(m)
262 .n(n)
263 .k(k)
264 .ks(3)
265 .iterations(1)
266 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_loadsplat);
267 }
268 }
269 }
270 }
271
TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT,n_gt_8_small_kernel)272 TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT, n_gt_8_small_kernel) {
273 for (uint32_t n = 9; n < 16; n++) {
274 for (size_t k = 1; k <= 5; k += 2) {
275 GemmMicrokernelTester()
276 .mr(3)
277 .nr(8)
278 .kr(1)
279 .sr(1)
280 .m(3)
281 .n(n)
282 .k(k)
283 .ks(3)
284 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_loadsplat);
285 }
286 }
287 }
288
TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT,n_div_8_small_kernel)289 TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT, n_div_8_small_kernel) {
290 for (uint32_t n = 16; n <= 24; n += 8) {
291 for (size_t k = 1; k <= 5; k += 2) {
292 GemmMicrokernelTester()
293 .mr(3)
294 .nr(8)
295 .kr(1)
296 .sr(1)
297 .m(3)
298 .n(n)
299 .k(k)
300 .ks(3)
301 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_loadsplat);
302 }
303 }
304 }
305
TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT,strided_cm_subtile)306 TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT, strided_cm_subtile) {
307 for (size_t k = 1; k <= 5; k += 2) {
308 for (uint32_t n = 1; n <= 8; n++) {
309 for (uint32_t m = 1; m <= 3; m++) {
310 GemmMicrokernelTester()
311 .mr(3)
312 .nr(8)
313 .kr(1)
314 .sr(1)
315 .m(m)
316 .n(n)
317 .k(k)
318 .cm_stride(11)
319 .iterations(1)
320 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_loadsplat);
321 }
322 }
323 }
324 }
325
TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT,a_offset)326 TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT, a_offset) {
327 for (size_t k = 1; k <= 5; k += 2) {
328 GemmMicrokernelTester()
329 .mr(3)
330 .nr(8)
331 .kr(1)
332 .sr(1)
333 .m(3)
334 .n(8)
335 .k(k)
336 .ks(3)
337 .a_offset(17)
338 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_loadsplat);
339 }
340 }
341
TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT,zero)342 TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT, zero) {
343 for (size_t k = 1; k <= 5; k += 2) {
344 for (uint32_t mz = 0; mz < 3; mz++) {
345 GemmMicrokernelTester()
346 .mr(3)
347 .nr(8)
348 .kr(1)
349 .sr(1)
350 .m(3)
351 .n(8)
352 .k(k)
353 .ks(3)
354 .a_offset(17)
355 .zero_index(mz)
356 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_loadsplat);
357 }
358 }
359 }
360
TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT,strided_cm)361 TEST(F32_IGEMM_3X8__WASMSIMD_LOADSPLAT, strided_cm) {
362 GemmMicrokernelTester()
363 .mr(3)
364 .nr(8)
365 .kr(1)
366 .sr(1)
367 .m(3)
368 .n(8)
369 .k(1)
370 .cm_stride(11)
371 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_loadsplat);
372 }
373 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
374
375
376 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,k_eq_4)377 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, k_eq_4) {
378 GemmMicrokernelTester()
379 .mr(3)
380 .nr(8)
381 .kr(1)
382 .sr(1)
383 .m(3)
384 .n(8)
385 .k(4)
386 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
387 }
388
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,strided_cn)389 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, strided_cn) {
390 GemmMicrokernelTester()
391 .mr(3)
392 .nr(8)
393 .kr(1)
394 .sr(1)
395 .m(3)
396 .n(8)
397 .k(4)
398 .cn_stride(11)
399 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
400 }
401
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,k_eq_4_subtile)402 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
403 for (uint32_t n = 1; n <= 8; n++) {
404 for (uint32_t m = 1; m <= 3; m++) {
405 GemmMicrokernelTester()
406 .mr(3)
407 .nr(8)
408 .kr(1)
409 .sr(1)
410 .m(m)
411 .n(n)
412 .k(4)
413 .iterations(1)
414 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
415 }
416 }
417 }
418
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,k_eq_4_subtile_m)419 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
420 for (uint32_t m = 1; m <= 3; m++) {
421 GemmMicrokernelTester()
422 .mr(3)
423 .nr(8)
424 .kr(1)
425 .sr(1)
426 .m(m)
427 .n(8)
428 .k(4)
429 .iterations(1)
430 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
431 }
432 }
433
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,k_eq_4_subtile_n)434 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
435 for (uint32_t n = 1; n <= 8; n++) {
436 GemmMicrokernelTester()
437 .mr(3)
438 .nr(8)
439 .kr(1)
440 .sr(1)
441 .m(3)
442 .n(n)
443 .k(4)
444 .iterations(1)
445 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
446 }
447 }
448
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,k_lt_4)449 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, k_lt_4) {
450 for (size_t k = 1; k < 4; k++) {
451 GemmMicrokernelTester()
452 .mr(3)
453 .nr(8)
454 .kr(1)
455 .sr(1)
456 .m(3)
457 .n(8)
458 .k(k)
459 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
460 }
461 }
462
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,k_lt_4_subtile)463 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
464 for (size_t k = 1; k < 4; k++) {
465 for (uint32_t n = 1; n <= 8; n++) {
466 for (uint32_t m = 1; m <= 3; m++) {
467 GemmMicrokernelTester()
468 .mr(3)
469 .nr(8)
470 .kr(1)
471 .sr(1)
472 .m(m)
473 .n(n)
474 .k(k)
475 .iterations(1)
476 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
477 }
478 }
479 }
480 }
481
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,k_gt_4)482 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, k_gt_4) {
483 for (size_t k = 5; k < 8; k++) {
484 GemmMicrokernelTester()
485 .mr(3)
486 .nr(8)
487 .kr(1)
488 .sr(1)
489 .m(3)
490 .n(8)
491 .k(k)
492 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
493 }
494 }
495
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,k_gt_4_subtile)496 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
497 for (size_t k = 5; k < 8; k++) {
498 for (uint32_t n = 1; n <= 8; n++) {
499 for (uint32_t m = 1; m <= 3; m++) {
500 GemmMicrokernelTester()
501 .mr(3)
502 .nr(8)
503 .kr(1)
504 .sr(1)
505 .m(m)
506 .n(n)
507 .k(k)
508 .iterations(1)
509 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
510 }
511 }
512 }
513 }
514
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,k_div_4)515 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, k_div_4) {
516 for (size_t k = 8; k <= 40; k += 4) {
517 GemmMicrokernelTester()
518 .mr(3)
519 .nr(8)
520 .kr(1)
521 .sr(1)
522 .m(3)
523 .n(8)
524 .k(k)
525 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
526 }
527 }
528
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,k_div_4_subtile)529 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, k_div_4_subtile) {
530 for (size_t k = 8; k <= 40; k += 4) {
531 for (uint32_t n = 1; n <= 8; n++) {
532 for (uint32_t m = 1; m <= 3; m++) {
533 GemmMicrokernelTester()
534 .mr(3)
535 .nr(8)
536 .kr(1)
537 .sr(1)
538 .m(m)
539 .n(n)
540 .k(k)
541 .iterations(1)
542 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
543 }
544 }
545 }
546 }
547
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,n_gt_8)548 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, n_gt_8) {
549 for (uint32_t n = 9; n < 16; n++) {
550 for (size_t k = 1; k <= 20; k += 5) {
551 GemmMicrokernelTester()
552 .mr(3)
553 .nr(8)
554 .kr(1)
555 .sr(1)
556 .m(3)
557 .n(n)
558 .k(k)
559 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
560 }
561 }
562 }
563
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,n_gt_8_strided_cn)564 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
565 for (uint32_t n = 9; n < 16; n++) {
566 for (size_t k = 1; k <= 20; k += 5) {
567 GemmMicrokernelTester()
568 .mr(3)
569 .nr(8)
570 .kr(1)
571 .sr(1)
572 .m(3)
573 .n(n)
574 .k(k)
575 .cn_stride(11)
576 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
577 }
578 }
579 }
580
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,n_gt_8_subtile)581 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
582 for (uint32_t n = 9; n < 16; n++) {
583 for (size_t k = 1; k <= 20; k += 5) {
584 for (uint32_t m = 1; m <= 3; m++) {
585 GemmMicrokernelTester()
586 .mr(3)
587 .nr(8)
588 .kr(1)
589 .sr(1)
590 .m(m)
591 .n(n)
592 .k(k)
593 .iterations(1)
594 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
595 }
596 }
597 }
598 }
599
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,n_div_8)600 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, n_div_8) {
601 for (uint32_t n = 16; n <= 24; n += 8) {
602 for (size_t k = 1; k <= 20; k += 5) {
603 GemmMicrokernelTester()
604 .mr(3)
605 .nr(8)
606 .kr(1)
607 .sr(1)
608 .m(3)
609 .n(n)
610 .k(k)
611 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
612 }
613 }
614 }
615
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,n_div_8_strided_cn)616 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
617 for (uint32_t n = 16; n <= 24; n += 8) {
618 for (size_t k = 1; k <= 20; k += 5) {
619 GemmMicrokernelTester()
620 .mr(3)
621 .nr(8)
622 .kr(1)
623 .sr(1)
624 .m(3)
625 .n(n)
626 .k(k)
627 .cn_stride(11)
628 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
629 }
630 }
631 }
632
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,n_div_8_subtile)633 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, n_div_8_subtile) {
634 for (uint32_t n = 16; n <= 24; n += 8) {
635 for (size_t k = 1; k <= 20; k += 5) {
636 for (uint32_t m = 1; m <= 3; m++) {
637 GemmMicrokernelTester()
638 .mr(3)
639 .nr(8)
640 .kr(1)
641 .sr(1)
642 .m(m)
643 .n(n)
644 .k(k)
645 .iterations(1)
646 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
647 }
648 }
649 }
650 }
651
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,small_kernel)652 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, small_kernel) {
653 for (size_t k = 1; k <= 20; k += 5) {
654 GemmMicrokernelTester()
655 .mr(3)
656 .nr(8)
657 .kr(1)
658 .sr(1)
659 .m(3)
660 .n(8)
661 .k(k)
662 .ks(3)
663 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
664 }
665 }
666
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,small_kernel_subtile)667 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, small_kernel_subtile) {
668 for (size_t k = 1; k <= 20; k += 5) {
669 for (uint32_t n = 1; n <= 8; n++) {
670 for (uint32_t m = 1; m <= 3; m++) {
671 GemmMicrokernelTester()
672 .mr(3)
673 .nr(8)
674 .kr(1)
675 .sr(1)
676 .m(m)
677 .n(n)
678 .k(k)
679 .ks(3)
680 .iterations(1)
681 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
682 }
683 }
684 }
685 }
686
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,n_gt_8_small_kernel)687 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, n_gt_8_small_kernel) {
688 for (uint32_t n = 9; n < 16; n++) {
689 for (size_t k = 1; k <= 20; k += 5) {
690 GemmMicrokernelTester()
691 .mr(3)
692 .nr(8)
693 .kr(1)
694 .sr(1)
695 .m(3)
696 .n(n)
697 .k(k)
698 .ks(3)
699 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
700 }
701 }
702 }
703
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,n_div_8_small_kernel)704 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, n_div_8_small_kernel) {
705 for (uint32_t n = 16; n <= 24; n += 8) {
706 for (size_t k = 1; k <= 20; k += 5) {
707 GemmMicrokernelTester()
708 .mr(3)
709 .nr(8)
710 .kr(1)
711 .sr(1)
712 .m(3)
713 .n(n)
714 .k(k)
715 .ks(3)
716 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
717 }
718 }
719 }
720
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,strided_cm_subtile)721 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, strided_cm_subtile) {
722 for (size_t k = 1; k <= 20; k += 5) {
723 for (uint32_t n = 1; n <= 8; n++) {
724 for (uint32_t m = 1; m <= 3; m++) {
725 GemmMicrokernelTester()
726 .mr(3)
727 .nr(8)
728 .kr(1)
729 .sr(1)
730 .m(m)
731 .n(n)
732 .k(k)
733 .cm_stride(11)
734 .iterations(1)
735 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
736 }
737 }
738 }
739 }
740
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,a_offset)741 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, a_offset) {
742 for (size_t k = 1; k <= 20; k += 5) {
743 GemmMicrokernelTester()
744 .mr(3)
745 .nr(8)
746 .kr(1)
747 .sr(1)
748 .m(3)
749 .n(8)
750 .k(k)
751 .ks(3)
752 .a_offset(67)
753 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
754 }
755 }
756
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,zero)757 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, zero) {
758 for (size_t k = 1; k <= 20; k += 5) {
759 for (uint32_t mz = 0; mz < 3; mz++) {
760 GemmMicrokernelTester()
761 .mr(3)
762 .nr(8)
763 .kr(1)
764 .sr(1)
765 .m(3)
766 .n(8)
767 .k(k)
768 .ks(3)
769 .a_offset(67)
770 .zero_index(mz)
771 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
772 }
773 }
774 }
775
TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT,strided_cm)776 TEST(F32_IGEMM_3X8__WASMSIMD_SPLAT, strided_cm) {
777 GemmMicrokernelTester()
778 .mr(3)
779 .nr(8)
780 .kr(1)
781 .sr(1)
782 .m(3)
783 .n(8)
784 .k(4)
785 .cm_stride(11)
786 .Test(xnn_f32_igemm_ukernel_3x8__wasmsimd_splat);
787 }
788 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
789
790
791 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_3X8S4__WASMSIMD,k_eq_4)792 TEST(F32_IGEMM_3X8S4__WASMSIMD, k_eq_4) {
793 GemmMicrokernelTester()
794 .mr(3)
795 .nr(8)
796 .kr(1)
797 .sr(4)
798 .m(3)
799 .n(8)
800 .k(4)
801 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
802 }
803
TEST(F32_IGEMM_3X8S4__WASMSIMD,strided_cn)804 TEST(F32_IGEMM_3X8S4__WASMSIMD, strided_cn) {
805 GemmMicrokernelTester()
806 .mr(3)
807 .nr(8)
808 .kr(1)
809 .sr(4)
810 .m(3)
811 .n(8)
812 .k(4)
813 .cn_stride(11)
814 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
815 }
816
TEST(F32_IGEMM_3X8S4__WASMSIMD,k_eq_4_subtile)817 TEST(F32_IGEMM_3X8S4__WASMSIMD, k_eq_4_subtile) {
818 for (uint32_t n = 1; n <= 8; n++) {
819 for (uint32_t m = 1; m <= 3; m++) {
820 GemmMicrokernelTester()
821 .mr(3)
822 .nr(8)
823 .kr(1)
824 .sr(4)
825 .m(m)
826 .n(n)
827 .k(4)
828 .iterations(1)
829 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
830 }
831 }
832 }
833
TEST(F32_IGEMM_3X8S4__WASMSIMD,k_eq_4_subtile_m)834 TEST(F32_IGEMM_3X8S4__WASMSIMD, k_eq_4_subtile_m) {
835 for (uint32_t m = 1; m <= 3; m++) {
836 GemmMicrokernelTester()
837 .mr(3)
838 .nr(8)
839 .kr(1)
840 .sr(4)
841 .m(m)
842 .n(8)
843 .k(4)
844 .iterations(1)
845 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
846 }
847 }
848
TEST(F32_IGEMM_3X8S4__WASMSIMD,k_eq_4_subtile_n)849 TEST(F32_IGEMM_3X8S4__WASMSIMD, k_eq_4_subtile_n) {
850 for (uint32_t n = 1; n <= 8; n++) {
851 GemmMicrokernelTester()
852 .mr(3)
853 .nr(8)
854 .kr(1)
855 .sr(4)
856 .m(3)
857 .n(n)
858 .k(4)
859 .iterations(1)
860 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
861 }
862 }
863
TEST(F32_IGEMM_3X8S4__WASMSIMD,k_lt_4)864 TEST(F32_IGEMM_3X8S4__WASMSIMD, k_lt_4) {
865 for (size_t k = 1; k < 4; k++) {
866 GemmMicrokernelTester()
867 .mr(3)
868 .nr(8)
869 .kr(1)
870 .sr(4)
871 .m(3)
872 .n(8)
873 .k(k)
874 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
875 }
876 }
877
TEST(F32_IGEMM_3X8S4__WASMSIMD,k_lt_4_subtile)878 TEST(F32_IGEMM_3X8S4__WASMSIMD, k_lt_4_subtile) {
879 for (size_t k = 1; k < 4; k++) {
880 for (uint32_t n = 1; n <= 8; n++) {
881 for (uint32_t m = 1; m <= 3; m++) {
882 GemmMicrokernelTester()
883 .mr(3)
884 .nr(8)
885 .kr(1)
886 .sr(4)
887 .m(m)
888 .n(n)
889 .k(k)
890 .iterations(1)
891 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
892 }
893 }
894 }
895 }
896
TEST(F32_IGEMM_3X8S4__WASMSIMD,k_gt_4)897 TEST(F32_IGEMM_3X8S4__WASMSIMD, k_gt_4) {
898 for (size_t k = 5; k < 8; k++) {
899 GemmMicrokernelTester()
900 .mr(3)
901 .nr(8)
902 .kr(1)
903 .sr(4)
904 .m(3)
905 .n(8)
906 .k(k)
907 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
908 }
909 }
910
TEST(F32_IGEMM_3X8S4__WASMSIMD,k_gt_4_subtile)911 TEST(F32_IGEMM_3X8S4__WASMSIMD, k_gt_4_subtile) {
912 for (size_t k = 5; k < 8; k++) {
913 for (uint32_t n = 1; n <= 8; n++) {
914 for (uint32_t m = 1; m <= 3; m++) {
915 GemmMicrokernelTester()
916 .mr(3)
917 .nr(8)
918 .kr(1)
919 .sr(4)
920 .m(m)
921 .n(n)
922 .k(k)
923 .iterations(1)
924 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
925 }
926 }
927 }
928 }
929
TEST(F32_IGEMM_3X8S4__WASMSIMD,k_div_4)930 TEST(F32_IGEMM_3X8S4__WASMSIMD, k_div_4) {
931 for (size_t k = 8; k <= 40; k += 4) {
932 GemmMicrokernelTester()
933 .mr(3)
934 .nr(8)
935 .kr(1)
936 .sr(4)
937 .m(3)
938 .n(8)
939 .k(k)
940 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
941 }
942 }
943
TEST(F32_IGEMM_3X8S4__WASMSIMD,k_div_4_subtile)944 TEST(F32_IGEMM_3X8S4__WASMSIMD, k_div_4_subtile) {
945 for (size_t k = 8; k <= 40; k += 4) {
946 for (uint32_t n = 1; n <= 8; n++) {
947 for (uint32_t m = 1; m <= 3; m++) {
948 GemmMicrokernelTester()
949 .mr(3)
950 .nr(8)
951 .kr(1)
952 .sr(4)
953 .m(m)
954 .n(n)
955 .k(k)
956 .iterations(1)
957 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
958 }
959 }
960 }
961 }
962
TEST(F32_IGEMM_3X8S4__WASMSIMD,n_gt_8)963 TEST(F32_IGEMM_3X8S4__WASMSIMD, n_gt_8) {
964 for (uint32_t n = 9; n < 16; n++) {
965 for (size_t k = 1; k <= 20; k += 5) {
966 GemmMicrokernelTester()
967 .mr(3)
968 .nr(8)
969 .kr(1)
970 .sr(4)
971 .m(3)
972 .n(n)
973 .k(k)
974 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
975 }
976 }
977 }
978
TEST(F32_IGEMM_3X8S4__WASMSIMD,n_gt_8_strided_cn)979 TEST(F32_IGEMM_3X8S4__WASMSIMD, n_gt_8_strided_cn) {
980 for (uint32_t n = 9; n < 16; n++) {
981 for (size_t k = 1; k <= 20; k += 5) {
982 GemmMicrokernelTester()
983 .mr(3)
984 .nr(8)
985 .kr(1)
986 .sr(4)
987 .m(3)
988 .n(n)
989 .k(k)
990 .cn_stride(11)
991 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
992 }
993 }
994 }
995
TEST(F32_IGEMM_3X8S4__WASMSIMD,n_gt_8_subtile)996 TEST(F32_IGEMM_3X8S4__WASMSIMD, n_gt_8_subtile) {
997 for (uint32_t n = 9; n < 16; n++) {
998 for (size_t k = 1; k <= 20; k += 5) {
999 for (uint32_t m = 1; m <= 3; m++) {
1000 GemmMicrokernelTester()
1001 .mr(3)
1002 .nr(8)
1003 .kr(1)
1004 .sr(4)
1005 .m(m)
1006 .n(n)
1007 .k(k)
1008 .iterations(1)
1009 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
1010 }
1011 }
1012 }
1013 }
1014
TEST(F32_IGEMM_3X8S4__WASMSIMD,n_div_8)1015 TEST(F32_IGEMM_3X8S4__WASMSIMD, n_div_8) {
1016 for (uint32_t n = 16; n <= 24; n += 8) {
1017 for (size_t k = 1; k <= 20; k += 5) {
1018 GemmMicrokernelTester()
1019 .mr(3)
1020 .nr(8)
1021 .kr(1)
1022 .sr(4)
1023 .m(3)
1024 .n(n)
1025 .k(k)
1026 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
1027 }
1028 }
1029 }
1030
TEST(F32_IGEMM_3X8S4__WASMSIMD,n_div_8_strided_cn)1031 TEST(F32_IGEMM_3X8S4__WASMSIMD, n_div_8_strided_cn) {
1032 for (uint32_t n = 16; n <= 24; n += 8) {
1033 for (size_t k = 1; k <= 20; k += 5) {
1034 GemmMicrokernelTester()
1035 .mr(3)
1036 .nr(8)
1037 .kr(1)
1038 .sr(4)
1039 .m(3)
1040 .n(n)
1041 .k(k)
1042 .cn_stride(11)
1043 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
1044 }
1045 }
1046 }
1047
TEST(F32_IGEMM_3X8S4__WASMSIMD,n_div_8_subtile)1048 TEST(F32_IGEMM_3X8S4__WASMSIMD, n_div_8_subtile) {
1049 for (uint32_t n = 16; n <= 24; n += 8) {
1050 for (size_t k = 1; k <= 20; k += 5) {
1051 for (uint32_t m = 1; m <= 3; m++) {
1052 GemmMicrokernelTester()
1053 .mr(3)
1054 .nr(8)
1055 .kr(1)
1056 .sr(4)
1057 .m(m)
1058 .n(n)
1059 .k(k)
1060 .iterations(1)
1061 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
1062 }
1063 }
1064 }
1065 }
1066
TEST(F32_IGEMM_3X8S4__WASMSIMD,small_kernel)1067 TEST(F32_IGEMM_3X8S4__WASMSIMD, small_kernel) {
1068 for (size_t k = 1; k <= 20; k += 5) {
1069 GemmMicrokernelTester()
1070 .mr(3)
1071 .nr(8)
1072 .kr(1)
1073 .sr(4)
1074 .m(3)
1075 .n(8)
1076 .k(k)
1077 .ks(3)
1078 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
1079 }
1080 }
1081
TEST(F32_IGEMM_3X8S4__WASMSIMD,small_kernel_subtile)1082 TEST(F32_IGEMM_3X8S4__WASMSIMD, small_kernel_subtile) {
1083 for (size_t k = 1; k <= 20; k += 5) {
1084 for (uint32_t n = 1; n <= 8; n++) {
1085 for (uint32_t m = 1; m <= 3; m++) {
1086 GemmMicrokernelTester()
1087 .mr(3)
1088 .nr(8)
1089 .kr(1)
1090 .sr(4)
1091 .m(m)
1092 .n(n)
1093 .k(k)
1094 .ks(3)
1095 .iterations(1)
1096 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
1097 }
1098 }
1099 }
1100 }
1101
TEST(F32_IGEMM_3X8S4__WASMSIMD,n_gt_8_small_kernel)1102 TEST(F32_IGEMM_3X8S4__WASMSIMD, n_gt_8_small_kernel) {
1103 for (uint32_t n = 9; n < 16; n++) {
1104 for (size_t k = 1; k <= 20; k += 5) {
1105 GemmMicrokernelTester()
1106 .mr(3)
1107 .nr(8)
1108 .kr(1)
1109 .sr(4)
1110 .m(3)
1111 .n(n)
1112 .k(k)
1113 .ks(3)
1114 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
1115 }
1116 }
1117 }
1118
TEST(F32_IGEMM_3X8S4__WASMSIMD,n_div_8_small_kernel)1119 TEST(F32_IGEMM_3X8S4__WASMSIMD, n_div_8_small_kernel) {
1120 for (uint32_t n = 16; n <= 24; n += 8) {
1121 for (size_t k = 1; k <= 20; k += 5) {
1122 GemmMicrokernelTester()
1123 .mr(3)
1124 .nr(8)
1125 .kr(1)
1126 .sr(4)
1127 .m(3)
1128 .n(n)
1129 .k(k)
1130 .ks(3)
1131 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
1132 }
1133 }
1134 }
1135
TEST(F32_IGEMM_3X8S4__WASMSIMD,strided_cm_subtile)1136 TEST(F32_IGEMM_3X8S4__WASMSIMD, strided_cm_subtile) {
1137 for (size_t k = 1; k <= 20; k += 5) {
1138 for (uint32_t n = 1; n <= 8; n++) {
1139 for (uint32_t m = 1; m <= 3; m++) {
1140 GemmMicrokernelTester()
1141 .mr(3)
1142 .nr(8)
1143 .kr(1)
1144 .sr(4)
1145 .m(m)
1146 .n(n)
1147 .k(k)
1148 .cm_stride(11)
1149 .iterations(1)
1150 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
1151 }
1152 }
1153 }
1154 }
1155
TEST(F32_IGEMM_3X8S4__WASMSIMD,a_offset)1156 TEST(F32_IGEMM_3X8S4__WASMSIMD, a_offset) {
1157 for (size_t k = 1; k <= 20; k += 5) {
1158 GemmMicrokernelTester()
1159 .mr(3)
1160 .nr(8)
1161 .kr(1)
1162 .sr(4)
1163 .m(3)
1164 .n(8)
1165 .k(k)
1166 .ks(3)
1167 .a_offset(67)
1168 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
1169 }
1170 }
1171
TEST(F32_IGEMM_3X8S4__WASMSIMD,zero)1172 TEST(F32_IGEMM_3X8S4__WASMSIMD, zero) {
1173 for (size_t k = 1; k <= 20; k += 5) {
1174 for (uint32_t mz = 0; mz < 3; mz++) {
1175 GemmMicrokernelTester()
1176 .mr(3)
1177 .nr(8)
1178 .kr(1)
1179 .sr(4)
1180 .m(3)
1181 .n(8)
1182 .k(k)
1183 .ks(3)
1184 .a_offset(67)
1185 .zero_index(mz)
1186 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
1187 }
1188 }
1189 }
1190
TEST(F32_IGEMM_3X8S4__WASMSIMD,strided_cm)1191 TEST(F32_IGEMM_3X8S4__WASMSIMD, strided_cm) {
1192 GemmMicrokernelTester()
1193 .mr(3)
1194 .nr(8)
1195 .kr(1)
1196 .sr(4)
1197 .m(3)
1198 .n(8)
1199 .k(4)
1200 .cm_stride(11)
1201 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmsimd);
1202 }
1203 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1204
1205
1206 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_4X2C4__WASMSIMD,k_eq_4)1207 TEST(F32_IGEMM_4X2C4__WASMSIMD, k_eq_4) {
1208 GemmMicrokernelTester()
1209 .mr(4)
1210 .nr(2)
1211 .kr(4)
1212 .sr(1)
1213 .m(4)
1214 .n(2)
1215 .k(4)
1216 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1217 }
1218
TEST(F32_IGEMM_4X2C4__WASMSIMD,strided_cn)1219 TEST(F32_IGEMM_4X2C4__WASMSIMD, strided_cn) {
1220 GemmMicrokernelTester()
1221 .mr(4)
1222 .nr(2)
1223 .kr(4)
1224 .sr(1)
1225 .m(4)
1226 .n(2)
1227 .k(4)
1228 .cn_stride(5)
1229 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1230 }
1231
TEST(F32_IGEMM_4X2C4__WASMSIMD,k_eq_4_subtile)1232 TEST(F32_IGEMM_4X2C4__WASMSIMD, k_eq_4_subtile) {
1233 for (uint32_t n = 1; n <= 2; n++) {
1234 for (uint32_t m = 1; m <= 4; m++) {
1235 GemmMicrokernelTester()
1236 .mr(4)
1237 .nr(2)
1238 .kr(4)
1239 .sr(1)
1240 .m(m)
1241 .n(n)
1242 .k(4)
1243 .iterations(1)
1244 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1245 }
1246 }
1247 }
1248
TEST(F32_IGEMM_4X2C4__WASMSIMD,k_eq_4_subtile_m)1249 TEST(F32_IGEMM_4X2C4__WASMSIMD, k_eq_4_subtile_m) {
1250 for (uint32_t m = 1; m <= 4; m++) {
1251 GemmMicrokernelTester()
1252 .mr(4)
1253 .nr(2)
1254 .kr(4)
1255 .sr(1)
1256 .m(m)
1257 .n(2)
1258 .k(4)
1259 .iterations(1)
1260 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1261 }
1262 }
1263
TEST(F32_IGEMM_4X2C4__WASMSIMD,k_eq_4_subtile_n)1264 TEST(F32_IGEMM_4X2C4__WASMSIMD, k_eq_4_subtile_n) {
1265 for (uint32_t n = 1; n <= 2; n++) {
1266 GemmMicrokernelTester()
1267 .mr(4)
1268 .nr(2)
1269 .kr(4)
1270 .sr(1)
1271 .m(4)
1272 .n(n)
1273 .k(4)
1274 .iterations(1)
1275 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1276 }
1277 }
1278
TEST(F32_IGEMM_4X2C4__WASMSIMD,k_lt_4)1279 TEST(F32_IGEMM_4X2C4__WASMSIMD, k_lt_4) {
1280 for (size_t k = 1; k < 4; k++) {
1281 GemmMicrokernelTester()
1282 .mr(4)
1283 .nr(2)
1284 .kr(4)
1285 .sr(1)
1286 .m(4)
1287 .n(2)
1288 .k(k)
1289 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1290 }
1291 }
1292
TEST(F32_IGEMM_4X2C4__WASMSIMD,k_lt_4_subtile)1293 TEST(F32_IGEMM_4X2C4__WASMSIMD, k_lt_4_subtile) {
1294 for (size_t k = 1; k < 4; k++) {
1295 for (uint32_t n = 1; n <= 2; n++) {
1296 for (uint32_t m = 1; m <= 4; m++) {
1297 GemmMicrokernelTester()
1298 .mr(4)
1299 .nr(2)
1300 .kr(4)
1301 .sr(1)
1302 .m(m)
1303 .n(n)
1304 .k(k)
1305 .iterations(1)
1306 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1307 }
1308 }
1309 }
1310 }
1311
TEST(F32_IGEMM_4X2C4__WASMSIMD,k_gt_4)1312 TEST(F32_IGEMM_4X2C4__WASMSIMD, k_gt_4) {
1313 for (size_t k = 5; k < 8; k++) {
1314 GemmMicrokernelTester()
1315 .mr(4)
1316 .nr(2)
1317 .kr(4)
1318 .sr(1)
1319 .m(4)
1320 .n(2)
1321 .k(k)
1322 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1323 }
1324 }
1325
TEST(F32_IGEMM_4X2C4__WASMSIMD,k_gt_4_subtile)1326 TEST(F32_IGEMM_4X2C4__WASMSIMD, k_gt_4_subtile) {
1327 for (size_t k = 5; k < 8; k++) {
1328 for (uint32_t n = 1; n <= 2; n++) {
1329 for (uint32_t m = 1; m <= 4; m++) {
1330 GemmMicrokernelTester()
1331 .mr(4)
1332 .nr(2)
1333 .kr(4)
1334 .sr(1)
1335 .m(m)
1336 .n(n)
1337 .k(k)
1338 .iterations(1)
1339 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1340 }
1341 }
1342 }
1343 }
1344
TEST(F32_IGEMM_4X2C4__WASMSIMD,k_div_4)1345 TEST(F32_IGEMM_4X2C4__WASMSIMD, k_div_4) {
1346 for (size_t k = 8; k <= 40; k += 4) {
1347 GemmMicrokernelTester()
1348 .mr(4)
1349 .nr(2)
1350 .kr(4)
1351 .sr(1)
1352 .m(4)
1353 .n(2)
1354 .k(k)
1355 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1356 }
1357 }
1358
TEST(F32_IGEMM_4X2C4__WASMSIMD,k_div_4_subtile)1359 TEST(F32_IGEMM_4X2C4__WASMSIMD, k_div_4_subtile) {
1360 for (size_t k = 8; k <= 40; k += 4) {
1361 for (uint32_t n = 1; n <= 2; n++) {
1362 for (uint32_t m = 1; m <= 4; m++) {
1363 GemmMicrokernelTester()
1364 .mr(4)
1365 .nr(2)
1366 .kr(4)
1367 .sr(1)
1368 .m(m)
1369 .n(n)
1370 .k(k)
1371 .iterations(1)
1372 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1373 }
1374 }
1375 }
1376 }
1377
TEST(F32_IGEMM_4X2C4__WASMSIMD,n_gt_2)1378 TEST(F32_IGEMM_4X2C4__WASMSIMD, n_gt_2) {
1379 for (uint32_t n = 3; n < 4; n++) {
1380 for (size_t k = 1; k <= 20; k += 5) {
1381 GemmMicrokernelTester()
1382 .mr(4)
1383 .nr(2)
1384 .kr(4)
1385 .sr(1)
1386 .m(4)
1387 .n(n)
1388 .k(k)
1389 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1390 }
1391 }
1392 }
1393
TEST(F32_IGEMM_4X2C4__WASMSIMD,n_gt_2_strided_cn)1394 TEST(F32_IGEMM_4X2C4__WASMSIMD, n_gt_2_strided_cn) {
1395 for (uint32_t n = 3; n < 4; n++) {
1396 for (size_t k = 1; k <= 20; k += 5) {
1397 GemmMicrokernelTester()
1398 .mr(4)
1399 .nr(2)
1400 .kr(4)
1401 .sr(1)
1402 .m(4)
1403 .n(n)
1404 .k(k)
1405 .cn_stride(5)
1406 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1407 }
1408 }
1409 }
1410
TEST(F32_IGEMM_4X2C4__WASMSIMD,n_gt_2_subtile)1411 TEST(F32_IGEMM_4X2C4__WASMSIMD, n_gt_2_subtile) {
1412 for (uint32_t n = 3; n < 4; n++) {
1413 for (size_t k = 1; k <= 20; k += 5) {
1414 for (uint32_t m = 1; m <= 4; m++) {
1415 GemmMicrokernelTester()
1416 .mr(4)
1417 .nr(2)
1418 .kr(4)
1419 .sr(1)
1420 .m(m)
1421 .n(n)
1422 .k(k)
1423 .iterations(1)
1424 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1425 }
1426 }
1427 }
1428 }
1429
TEST(F32_IGEMM_4X2C4__WASMSIMD,n_div_2)1430 TEST(F32_IGEMM_4X2C4__WASMSIMD, n_div_2) {
1431 for (uint32_t n = 4; n <= 6; n += 2) {
1432 for (size_t k = 1; k <= 20; k += 5) {
1433 GemmMicrokernelTester()
1434 .mr(4)
1435 .nr(2)
1436 .kr(4)
1437 .sr(1)
1438 .m(4)
1439 .n(n)
1440 .k(k)
1441 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1442 }
1443 }
1444 }
1445
TEST(F32_IGEMM_4X2C4__WASMSIMD,n_div_2_strided_cn)1446 TEST(F32_IGEMM_4X2C4__WASMSIMD, n_div_2_strided_cn) {
1447 for (uint32_t n = 4; n <= 6; n += 2) {
1448 for (size_t k = 1; k <= 20; k += 5) {
1449 GemmMicrokernelTester()
1450 .mr(4)
1451 .nr(2)
1452 .kr(4)
1453 .sr(1)
1454 .m(4)
1455 .n(n)
1456 .k(k)
1457 .cn_stride(5)
1458 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1459 }
1460 }
1461 }
1462
TEST(F32_IGEMM_4X2C4__WASMSIMD,n_div_2_subtile)1463 TEST(F32_IGEMM_4X2C4__WASMSIMD, n_div_2_subtile) {
1464 for (uint32_t n = 4; n <= 6; n += 2) {
1465 for (size_t k = 1; k <= 20; k += 5) {
1466 for (uint32_t m = 1; m <= 4; m++) {
1467 GemmMicrokernelTester()
1468 .mr(4)
1469 .nr(2)
1470 .kr(4)
1471 .sr(1)
1472 .m(m)
1473 .n(n)
1474 .k(k)
1475 .iterations(1)
1476 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1477 }
1478 }
1479 }
1480 }
1481
TEST(F32_IGEMM_4X2C4__WASMSIMD,small_kernel)1482 TEST(F32_IGEMM_4X2C4__WASMSIMD, small_kernel) {
1483 for (size_t k = 1; k <= 20; k += 5) {
1484 GemmMicrokernelTester()
1485 .mr(4)
1486 .nr(2)
1487 .kr(4)
1488 .sr(1)
1489 .m(4)
1490 .n(2)
1491 .k(k)
1492 .ks(3)
1493 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1494 }
1495 }
1496
TEST(F32_IGEMM_4X2C4__WASMSIMD,small_kernel_subtile)1497 TEST(F32_IGEMM_4X2C4__WASMSIMD, small_kernel_subtile) {
1498 for (size_t k = 1; k <= 20; k += 5) {
1499 for (uint32_t n = 1; n <= 2; n++) {
1500 for (uint32_t m = 1; m <= 4; m++) {
1501 GemmMicrokernelTester()
1502 .mr(4)
1503 .nr(2)
1504 .kr(4)
1505 .sr(1)
1506 .m(m)
1507 .n(n)
1508 .k(k)
1509 .ks(3)
1510 .iterations(1)
1511 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1512 }
1513 }
1514 }
1515 }
1516
TEST(F32_IGEMM_4X2C4__WASMSIMD,n_gt_2_small_kernel)1517 TEST(F32_IGEMM_4X2C4__WASMSIMD, n_gt_2_small_kernel) {
1518 for (uint32_t n = 3; n < 4; n++) {
1519 for (size_t k = 1; k <= 20; k += 5) {
1520 GemmMicrokernelTester()
1521 .mr(4)
1522 .nr(2)
1523 .kr(4)
1524 .sr(1)
1525 .m(4)
1526 .n(n)
1527 .k(k)
1528 .ks(3)
1529 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1530 }
1531 }
1532 }
1533
TEST(F32_IGEMM_4X2C4__WASMSIMD,n_div_2_small_kernel)1534 TEST(F32_IGEMM_4X2C4__WASMSIMD, n_div_2_small_kernel) {
1535 for (uint32_t n = 4; n <= 6; n += 2) {
1536 for (size_t k = 1; k <= 20; k += 5) {
1537 GemmMicrokernelTester()
1538 .mr(4)
1539 .nr(2)
1540 .kr(4)
1541 .sr(1)
1542 .m(4)
1543 .n(n)
1544 .k(k)
1545 .ks(3)
1546 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1547 }
1548 }
1549 }
1550
TEST(F32_IGEMM_4X2C4__WASMSIMD,strided_cm_subtile)1551 TEST(F32_IGEMM_4X2C4__WASMSIMD, strided_cm_subtile) {
1552 for (size_t k = 1; k <= 20; k += 5) {
1553 for (uint32_t n = 1; n <= 2; n++) {
1554 for (uint32_t m = 1; m <= 4; m++) {
1555 GemmMicrokernelTester()
1556 .mr(4)
1557 .nr(2)
1558 .kr(4)
1559 .sr(1)
1560 .m(m)
1561 .n(n)
1562 .k(k)
1563 .cm_stride(5)
1564 .iterations(1)
1565 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1566 }
1567 }
1568 }
1569 }
1570
TEST(F32_IGEMM_4X2C4__WASMSIMD,a_offset)1571 TEST(F32_IGEMM_4X2C4__WASMSIMD, a_offset) {
1572 for (size_t k = 1; k <= 20; k += 5) {
1573 GemmMicrokernelTester()
1574 .mr(4)
1575 .nr(2)
1576 .kr(4)
1577 .sr(1)
1578 .m(4)
1579 .n(2)
1580 .k(k)
1581 .ks(3)
1582 .a_offset(83)
1583 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1584 }
1585 }
1586
TEST(F32_IGEMM_4X2C4__WASMSIMD,zero)1587 TEST(F32_IGEMM_4X2C4__WASMSIMD, zero) {
1588 for (size_t k = 1; k <= 20; k += 5) {
1589 for (uint32_t mz = 0; mz < 4; mz++) {
1590 GemmMicrokernelTester()
1591 .mr(4)
1592 .nr(2)
1593 .kr(4)
1594 .sr(1)
1595 .m(4)
1596 .n(2)
1597 .k(k)
1598 .ks(3)
1599 .a_offset(83)
1600 .zero_index(mz)
1601 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1602 }
1603 }
1604 }
1605
TEST(F32_IGEMM_4X2C4__WASMSIMD,strided_cm)1606 TEST(F32_IGEMM_4X2C4__WASMSIMD, strided_cm) {
1607 GemmMicrokernelTester()
1608 .mr(4)
1609 .nr(2)
1610 .kr(4)
1611 .sr(1)
1612 .m(4)
1613 .n(2)
1614 .k(4)
1615 .cm_stride(5)
1616 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmsimd);
1617 }
1618 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1619
1620
1621 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT,k_eq_1)1622 TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT, k_eq_1) {
1623 GemmMicrokernelTester()
1624 .mr(4)
1625 .nr(8)
1626 .kr(1)
1627 .sr(1)
1628 .m(4)
1629 .n(8)
1630 .k(1)
1631 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_loadsplat);
1632 }
1633
TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT,strided_cn)1634 TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT, strided_cn) {
1635 GemmMicrokernelTester()
1636 .mr(4)
1637 .nr(8)
1638 .kr(1)
1639 .sr(1)
1640 .m(4)
1641 .n(8)
1642 .k(1)
1643 .cn_stride(11)
1644 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_loadsplat);
1645 }
1646
TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile)1647 TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile) {
1648 for (uint32_t n = 1; n <= 8; n++) {
1649 for (uint32_t m = 1; m <= 4; m++) {
1650 GemmMicrokernelTester()
1651 .mr(4)
1652 .nr(8)
1653 .kr(1)
1654 .sr(1)
1655 .m(m)
1656 .n(n)
1657 .k(1)
1658 .iterations(1)
1659 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_loadsplat);
1660 }
1661 }
1662 }
1663
TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_m)1664 TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_m) {
1665 for (uint32_t m = 1; m <= 4; m++) {
1666 GemmMicrokernelTester()
1667 .mr(4)
1668 .nr(8)
1669 .kr(1)
1670 .sr(1)
1671 .m(m)
1672 .n(8)
1673 .k(1)
1674 .iterations(1)
1675 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_loadsplat);
1676 }
1677 }
1678
TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_n)1679 TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_n) {
1680 for (uint32_t n = 1; n <= 8; n++) {
1681 GemmMicrokernelTester()
1682 .mr(4)
1683 .nr(8)
1684 .kr(1)
1685 .sr(1)
1686 .m(4)
1687 .n(n)
1688 .k(1)
1689 .iterations(1)
1690 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_loadsplat);
1691 }
1692 }
1693
TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT,k_gt_1)1694 TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT, k_gt_1) {
1695 for (size_t k = 2; k < 10; k++) {
1696 GemmMicrokernelTester()
1697 .mr(4)
1698 .nr(8)
1699 .kr(1)
1700 .sr(1)
1701 .m(4)
1702 .n(8)
1703 .k(k)
1704 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_loadsplat);
1705 }
1706 }
1707
TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT,k_gt_1_subtile)1708 TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT, k_gt_1_subtile) {
1709 for (size_t k = 2; k < 10; k++) {
1710 for (uint32_t n = 1; n <= 8; n++) {
1711 for (uint32_t m = 1; m <= 4; m++) {
1712 GemmMicrokernelTester()
1713 .mr(4)
1714 .nr(8)
1715 .kr(1)
1716 .sr(1)
1717 .m(m)
1718 .n(n)
1719 .k(k)
1720 .iterations(1)
1721 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_loadsplat);
1722 }
1723 }
1724 }
1725 }
1726
TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT,n_gt_8)1727 TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT, n_gt_8) {
1728 for (uint32_t n = 9; n < 16; n++) {
1729 for (size_t k = 1; k <= 5; k += 2) {
1730 GemmMicrokernelTester()
1731 .mr(4)
1732 .nr(8)
1733 .kr(1)
1734 .sr(1)
1735 .m(4)
1736 .n(n)
1737 .k(k)
1738 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_loadsplat);
1739 }
1740 }
1741 }
1742
TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_cn)1743 TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_cn) {
1744 for (uint32_t n = 9; n < 16; n++) {
1745 for (size_t k = 1; k <= 5; k += 2) {
1746 GemmMicrokernelTester()
1747 .mr(4)
1748 .nr(8)
1749 .kr(1)
1750 .sr(1)
1751 .m(4)
1752 .n(n)
1753 .k(k)
1754 .cn_stride(11)
1755 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_loadsplat);
1756 }
1757 }
1758 }
1759
TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT,n_gt_8_subtile)1760 TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT, n_gt_8_subtile) {
1761 for (uint32_t n = 9; n < 16; n++) {
1762 for (size_t k = 1; k <= 5; k += 2) {
1763 for (uint32_t m = 1; m <= 4; m++) {
1764 GemmMicrokernelTester()
1765 .mr(4)
1766 .nr(8)
1767 .kr(1)
1768 .sr(1)
1769 .m(m)
1770 .n(n)
1771 .k(k)
1772 .iterations(1)
1773 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_loadsplat);
1774 }
1775 }
1776 }
1777 }
1778
TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT,n_div_8)1779 TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT, n_div_8) {
1780 for (uint32_t n = 16; n <= 24; n += 8) {
1781 for (size_t k = 1; k <= 5; k += 2) {
1782 GemmMicrokernelTester()
1783 .mr(4)
1784 .nr(8)
1785 .kr(1)
1786 .sr(1)
1787 .m(4)
1788 .n(n)
1789 .k(k)
1790 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_loadsplat);
1791 }
1792 }
1793 }
1794
TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT,n_div_8_strided_cn)1795 TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT, n_div_8_strided_cn) {
1796 for (uint32_t n = 16; n <= 24; n += 8) {
1797 for (size_t k = 1; k <= 5; k += 2) {
1798 GemmMicrokernelTester()
1799 .mr(4)
1800 .nr(8)
1801 .kr(1)
1802 .sr(1)
1803 .m(4)
1804 .n(n)
1805 .k(k)
1806 .cn_stride(11)
1807 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_loadsplat);
1808 }
1809 }
1810 }
1811
TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT,n_div_8_subtile)1812 TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT, n_div_8_subtile) {
1813 for (uint32_t n = 16; n <= 24; n += 8) {
1814 for (size_t k = 1; k <= 5; k += 2) {
1815 for (uint32_t m = 1; m <= 4; m++) {
1816 GemmMicrokernelTester()
1817 .mr(4)
1818 .nr(8)
1819 .kr(1)
1820 .sr(1)
1821 .m(m)
1822 .n(n)
1823 .k(k)
1824 .iterations(1)
1825 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_loadsplat);
1826 }
1827 }
1828 }
1829 }
1830
TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT,small_kernel)1831 TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT, small_kernel) {
1832 for (size_t k = 1; k <= 5; k += 2) {
1833 GemmMicrokernelTester()
1834 .mr(4)
1835 .nr(8)
1836 .kr(1)
1837 .sr(1)
1838 .m(4)
1839 .n(8)
1840 .k(k)
1841 .ks(3)
1842 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_loadsplat);
1843 }
1844 }
1845
TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT,small_kernel_subtile)1846 TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT, small_kernel_subtile) {
1847 for (size_t k = 1; k <= 5; k += 2) {
1848 for (uint32_t n = 1; n <= 8; n++) {
1849 for (uint32_t m = 1; m <= 4; m++) {
1850 GemmMicrokernelTester()
1851 .mr(4)
1852 .nr(8)
1853 .kr(1)
1854 .sr(1)
1855 .m(m)
1856 .n(n)
1857 .k(k)
1858 .ks(3)
1859 .iterations(1)
1860 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_loadsplat);
1861 }
1862 }
1863 }
1864 }
1865
TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT,n_gt_8_small_kernel)1866 TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT, n_gt_8_small_kernel) {
1867 for (uint32_t n = 9; n < 16; n++) {
1868 for (size_t k = 1; k <= 5; k += 2) {
1869 GemmMicrokernelTester()
1870 .mr(4)
1871 .nr(8)
1872 .kr(1)
1873 .sr(1)
1874 .m(4)
1875 .n(n)
1876 .k(k)
1877 .ks(3)
1878 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_loadsplat);
1879 }
1880 }
1881 }
1882
TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT,n_div_8_small_kernel)1883 TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT, n_div_8_small_kernel) {
1884 for (uint32_t n = 16; n <= 24; n += 8) {
1885 for (size_t k = 1; k <= 5; k += 2) {
1886 GemmMicrokernelTester()
1887 .mr(4)
1888 .nr(8)
1889 .kr(1)
1890 .sr(1)
1891 .m(4)
1892 .n(n)
1893 .k(k)
1894 .ks(3)
1895 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_loadsplat);
1896 }
1897 }
1898 }
1899
TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT,strided_cm_subtile)1900 TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT, strided_cm_subtile) {
1901 for (size_t k = 1; k <= 5; k += 2) {
1902 for (uint32_t n = 1; n <= 8; n++) {
1903 for (uint32_t m = 1; m <= 4; m++) {
1904 GemmMicrokernelTester()
1905 .mr(4)
1906 .nr(8)
1907 .kr(1)
1908 .sr(1)
1909 .m(m)
1910 .n(n)
1911 .k(k)
1912 .cm_stride(11)
1913 .iterations(1)
1914 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_loadsplat);
1915 }
1916 }
1917 }
1918 }
1919
TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT,a_offset)1920 TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT, a_offset) {
1921 for (size_t k = 1; k <= 5; k += 2) {
1922 GemmMicrokernelTester()
1923 .mr(4)
1924 .nr(8)
1925 .kr(1)
1926 .sr(1)
1927 .m(4)
1928 .n(8)
1929 .k(k)
1930 .ks(3)
1931 .a_offset(23)
1932 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_loadsplat);
1933 }
1934 }
1935
TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT,zero)1936 TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT, zero) {
1937 for (size_t k = 1; k <= 5; k += 2) {
1938 for (uint32_t mz = 0; mz < 4; mz++) {
1939 GemmMicrokernelTester()
1940 .mr(4)
1941 .nr(8)
1942 .kr(1)
1943 .sr(1)
1944 .m(4)
1945 .n(8)
1946 .k(k)
1947 .ks(3)
1948 .a_offset(23)
1949 .zero_index(mz)
1950 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_loadsplat);
1951 }
1952 }
1953 }
1954
TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT,strided_cm)1955 TEST(F32_IGEMM_4X8__WASMSIMD_LOADSPLAT, strided_cm) {
1956 GemmMicrokernelTester()
1957 .mr(4)
1958 .nr(8)
1959 .kr(1)
1960 .sr(1)
1961 .m(4)
1962 .n(8)
1963 .k(1)
1964 .cm_stride(11)
1965 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_loadsplat);
1966 }
1967 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1968
1969
1970 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_4X8S4__WASMSIMD,k_eq_4)1971 TEST(F32_IGEMM_4X8S4__WASMSIMD, k_eq_4) {
1972 GemmMicrokernelTester()
1973 .mr(4)
1974 .nr(8)
1975 .kr(1)
1976 .sr(4)
1977 .m(4)
1978 .n(8)
1979 .k(4)
1980 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
1981 }
1982
TEST(F32_IGEMM_4X8S4__WASMSIMD,strided_cn)1983 TEST(F32_IGEMM_4X8S4__WASMSIMD, strided_cn) {
1984 GemmMicrokernelTester()
1985 .mr(4)
1986 .nr(8)
1987 .kr(1)
1988 .sr(4)
1989 .m(4)
1990 .n(8)
1991 .k(4)
1992 .cn_stride(11)
1993 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
1994 }
1995
TEST(F32_IGEMM_4X8S4__WASMSIMD,k_eq_4_subtile)1996 TEST(F32_IGEMM_4X8S4__WASMSIMD, k_eq_4_subtile) {
1997 for (uint32_t n = 1; n <= 8; n++) {
1998 for (uint32_t m = 1; m <= 4; m++) {
1999 GemmMicrokernelTester()
2000 .mr(4)
2001 .nr(8)
2002 .kr(1)
2003 .sr(4)
2004 .m(m)
2005 .n(n)
2006 .k(4)
2007 .iterations(1)
2008 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
2009 }
2010 }
2011 }
2012
TEST(F32_IGEMM_4X8S4__WASMSIMD,k_eq_4_subtile_m)2013 TEST(F32_IGEMM_4X8S4__WASMSIMD, k_eq_4_subtile_m) {
2014 for (uint32_t m = 1; m <= 4; m++) {
2015 GemmMicrokernelTester()
2016 .mr(4)
2017 .nr(8)
2018 .kr(1)
2019 .sr(4)
2020 .m(m)
2021 .n(8)
2022 .k(4)
2023 .iterations(1)
2024 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
2025 }
2026 }
2027
TEST(F32_IGEMM_4X8S4__WASMSIMD,k_eq_4_subtile_n)2028 TEST(F32_IGEMM_4X8S4__WASMSIMD, k_eq_4_subtile_n) {
2029 for (uint32_t n = 1; n <= 8; n++) {
2030 GemmMicrokernelTester()
2031 .mr(4)
2032 .nr(8)
2033 .kr(1)
2034 .sr(4)
2035 .m(4)
2036 .n(n)
2037 .k(4)
2038 .iterations(1)
2039 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
2040 }
2041 }
2042
TEST(F32_IGEMM_4X8S4__WASMSIMD,k_lt_4)2043 TEST(F32_IGEMM_4X8S4__WASMSIMD, k_lt_4) {
2044 for (size_t k = 1; k < 4; k++) {
2045 GemmMicrokernelTester()
2046 .mr(4)
2047 .nr(8)
2048 .kr(1)
2049 .sr(4)
2050 .m(4)
2051 .n(8)
2052 .k(k)
2053 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
2054 }
2055 }
2056
TEST(F32_IGEMM_4X8S4__WASMSIMD,k_lt_4_subtile)2057 TEST(F32_IGEMM_4X8S4__WASMSIMD, k_lt_4_subtile) {
2058 for (size_t k = 1; k < 4; k++) {
2059 for (uint32_t n = 1; n <= 8; n++) {
2060 for (uint32_t m = 1; m <= 4; m++) {
2061 GemmMicrokernelTester()
2062 .mr(4)
2063 .nr(8)
2064 .kr(1)
2065 .sr(4)
2066 .m(m)
2067 .n(n)
2068 .k(k)
2069 .iterations(1)
2070 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
2071 }
2072 }
2073 }
2074 }
2075
TEST(F32_IGEMM_4X8S4__WASMSIMD,k_gt_4)2076 TEST(F32_IGEMM_4X8S4__WASMSIMD, k_gt_4) {
2077 for (size_t k = 5; k < 8; k++) {
2078 GemmMicrokernelTester()
2079 .mr(4)
2080 .nr(8)
2081 .kr(1)
2082 .sr(4)
2083 .m(4)
2084 .n(8)
2085 .k(k)
2086 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
2087 }
2088 }
2089
TEST(F32_IGEMM_4X8S4__WASMSIMD,k_gt_4_subtile)2090 TEST(F32_IGEMM_4X8S4__WASMSIMD, k_gt_4_subtile) {
2091 for (size_t k = 5; k < 8; k++) {
2092 for (uint32_t n = 1; n <= 8; n++) {
2093 for (uint32_t m = 1; m <= 4; m++) {
2094 GemmMicrokernelTester()
2095 .mr(4)
2096 .nr(8)
2097 .kr(1)
2098 .sr(4)
2099 .m(m)
2100 .n(n)
2101 .k(k)
2102 .iterations(1)
2103 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
2104 }
2105 }
2106 }
2107 }
2108
TEST(F32_IGEMM_4X8S4__WASMSIMD,k_div_4)2109 TEST(F32_IGEMM_4X8S4__WASMSIMD, k_div_4) {
2110 for (size_t k = 8; k <= 40; k += 4) {
2111 GemmMicrokernelTester()
2112 .mr(4)
2113 .nr(8)
2114 .kr(1)
2115 .sr(4)
2116 .m(4)
2117 .n(8)
2118 .k(k)
2119 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
2120 }
2121 }
2122
TEST(F32_IGEMM_4X8S4__WASMSIMD,k_div_4_subtile)2123 TEST(F32_IGEMM_4X8S4__WASMSIMD, k_div_4_subtile) {
2124 for (size_t k = 8; k <= 40; k += 4) {
2125 for (uint32_t n = 1; n <= 8; n++) {
2126 for (uint32_t m = 1; m <= 4; m++) {
2127 GemmMicrokernelTester()
2128 .mr(4)
2129 .nr(8)
2130 .kr(1)
2131 .sr(4)
2132 .m(m)
2133 .n(n)
2134 .k(k)
2135 .iterations(1)
2136 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
2137 }
2138 }
2139 }
2140 }
2141
TEST(F32_IGEMM_4X8S4__WASMSIMD,n_gt_8)2142 TEST(F32_IGEMM_4X8S4__WASMSIMD, n_gt_8) {
2143 for (uint32_t n = 9; n < 16; n++) {
2144 for (size_t k = 1; k <= 20; k += 5) {
2145 GemmMicrokernelTester()
2146 .mr(4)
2147 .nr(8)
2148 .kr(1)
2149 .sr(4)
2150 .m(4)
2151 .n(n)
2152 .k(k)
2153 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
2154 }
2155 }
2156 }
2157
TEST(F32_IGEMM_4X8S4__WASMSIMD,n_gt_8_strided_cn)2158 TEST(F32_IGEMM_4X8S4__WASMSIMD, n_gt_8_strided_cn) {
2159 for (uint32_t n = 9; n < 16; n++) {
2160 for (size_t k = 1; k <= 20; k += 5) {
2161 GemmMicrokernelTester()
2162 .mr(4)
2163 .nr(8)
2164 .kr(1)
2165 .sr(4)
2166 .m(4)
2167 .n(n)
2168 .k(k)
2169 .cn_stride(11)
2170 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
2171 }
2172 }
2173 }
2174
TEST(F32_IGEMM_4X8S4__WASMSIMD,n_gt_8_subtile)2175 TEST(F32_IGEMM_4X8S4__WASMSIMD, n_gt_8_subtile) {
2176 for (uint32_t n = 9; n < 16; n++) {
2177 for (size_t k = 1; k <= 20; k += 5) {
2178 for (uint32_t m = 1; m <= 4; m++) {
2179 GemmMicrokernelTester()
2180 .mr(4)
2181 .nr(8)
2182 .kr(1)
2183 .sr(4)
2184 .m(m)
2185 .n(n)
2186 .k(k)
2187 .iterations(1)
2188 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
2189 }
2190 }
2191 }
2192 }
2193
TEST(F32_IGEMM_4X8S4__WASMSIMD,n_div_8)2194 TEST(F32_IGEMM_4X8S4__WASMSIMD, n_div_8) {
2195 for (uint32_t n = 16; n <= 24; n += 8) {
2196 for (size_t k = 1; k <= 20; k += 5) {
2197 GemmMicrokernelTester()
2198 .mr(4)
2199 .nr(8)
2200 .kr(1)
2201 .sr(4)
2202 .m(4)
2203 .n(n)
2204 .k(k)
2205 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
2206 }
2207 }
2208 }
2209
TEST(F32_IGEMM_4X8S4__WASMSIMD,n_div_8_strided_cn)2210 TEST(F32_IGEMM_4X8S4__WASMSIMD, n_div_8_strided_cn) {
2211 for (uint32_t n = 16; n <= 24; n += 8) {
2212 for (size_t k = 1; k <= 20; k += 5) {
2213 GemmMicrokernelTester()
2214 .mr(4)
2215 .nr(8)
2216 .kr(1)
2217 .sr(4)
2218 .m(4)
2219 .n(n)
2220 .k(k)
2221 .cn_stride(11)
2222 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
2223 }
2224 }
2225 }
2226
TEST(F32_IGEMM_4X8S4__WASMSIMD,n_div_8_subtile)2227 TEST(F32_IGEMM_4X8S4__WASMSIMD, n_div_8_subtile) {
2228 for (uint32_t n = 16; n <= 24; n += 8) {
2229 for (size_t k = 1; k <= 20; k += 5) {
2230 for (uint32_t m = 1; m <= 4; m++) {
2231 GemmMicrokernelTester()
2232 .mr(4)
2233 .nr(8)
2234 .kr(1)
2235 .sr(4)
2236 .m(m)
2237 .n(n)
2238 .k(k)
2239 .iterations(1)
2240 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
2241 }
2242 }
2243 }
2244 }
2245
TEST(F32_IGEMM_4X8S4__WASMSIMD,small_kernel)2246 TEST(F32_IGEMM_4X8S4__WASMSIMD, small_kernel) {
2247 for (size_t k = 1; k <= 20; k += 5) {
2248 GemmMicrokernelTester()
2249 .mr(4)
2250 .nr(8)
2251 .kr(1)
2252 .sr(4)
2253 .m(4)
2254 .n(8)
2255 .k(k)
2256 .ks(3)
2257 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
2258 }
2259 }
2260
TEST(F32_IGEMM_4X8S4__WASMSIMD,small_kernel_subtile)2261 TEST(F32_IGEMM_4X8S4__WASMSIMD, small_kernel_subtile) {
2262 for (size_t k = 1; k <= 20; k += 5) {
2263 for (uint32_t n = 1; n <= 8; n++) {
2264 for (uint32_t m = 1; m <= 4; m++) {
2265 GemmMicrokernelTester()
2266 .mr(4)
2267 .nr(8)
2268 .kr(1)
2269 .sr(4)
2270 .m(m)
2271 .n(n)
2272 .k(k)
2273 .ks(3)
2274 .iterations(1)
2275 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
2276 }
2277 }
2278 }
2279 }
2280
TEST(F32_IGEMM_4X8S4__WASMSIMD,n_gt_8_small_kernel)2281 TEST(F32_IGEMM_4X8S4__WASMSIMD, n_gt_8_small_kernel) {
2282 for (uint32_t n = 9; n < 16; n++) {
2283 for (size_t k = 1; k <= 20; k += 5) {
2284 GemmMicrokernelTester()
2285 .mr(4)
2286 .nr(8)
2287 .kr(1)
2288 .sr(4)
2289 .m(4)
2290 .n(n)
2291 .k(k)
2292 .ks(3)
2293 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
2294 }
2295 }
2296 }
2297
TEST(F32_IGEMM_4X8S4__WASMSIMD,n_div_8_small_kernel)2298 TEST(F32_IGEMM_4X8S4__WASMSIMD, n_div_8_small_kernel) {
2299 for (uint32_t n = 16; n <= 24; n += 8) {
2300 for (size_t k = 1; k <= 20; k += 5) {
2301 GemmMicrokernelTester()
2302 .mr(4)
2303 .nr(8)
2304 .kr(1)
2305 .sr(4)
2306 .m(4)
2307 .n(n)
2308 .k(k)
2309 .ks(3)
2310 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
2311 }
2312 }
2313 }
2314
TEST(F32_IGEMM_4X8S4__WASMSIMD,strided_cm_subtile)2315 TEST(F32_IGEMM_4X8S4__WASMSIMD, strided_cm_subtile) {
2316 for (size_t k = 1; k <= 20; k += 5) {
2317 for (uint32_t n = 1; n <= 8; n++) {
2318 for (uint32_t m = 1; m <= 4; m++) {
2319 GemmMicrokernelTester()
2320 .mr(4)
2321 .nr(8)
2322 .kr(1)
2323 .sr(4)
2324 .m(m)
2325 .n(n)
2326 .k(k)
2327 .cm_stride(11)
2328 .iterations(1)
2329 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
2330 }
2331 }
2332 }
2333 }
2334
TEST(F32_IGEMM_4X8S4__WASMSIMD,a_offset)2335 TEST(F32_IGEMM_4X8S4__WASMSIMD, a_offset) {
2336 for (size_t k = 1; k <= 20; k += 5) {
2337 GemmMicrokernelTester()
2338 .mr(4)
2339 .nr(8)
2340 .kr(1)
2341 .sr(4)
2342 .m(4)
2343 .n(8)
2344 .k(k)
2345 .ks(3)
2346 .a_offset(83)
2347 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
2348 }
2349 }
2350
TEST(F32_IGEMM_4X8S4__WASMSIMD,zero)2351 TEST(F32_IGEMM_4X8S4__WASMSIMD, zero) {
2352 for (size_t k = 1; k <= 20; k += 5) {
2353 for (uint32_t mz = 0; mz < 4; mz++) {
2354 GemmMicrokernelTester()
2355 .mr(4)
2356 .nr(8)
2357 .kr(1)
2358 .sr(4)
2359 .m(4)
2360 .n(8)
2361 .k(k)
2362 .ks(3)
2363 .a_offset(83)
2364 .zero_index(mz)
2365 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
2366 }
2367 }
2368 }
2369
TEST(F32_IGEMM_4X8S4__WASMSIMD,strided_cm)2370 TEST(F32_IGEMM_4X8S4__WASMSIMD, strided_cm) {
2371 GemmMicrokernelTester()
2372 .mr(4)
2373 .nr(8)
2374 .kr(1)
2375 .sr(4)
2376 .m(4)
2377 .n(8)
2378 .k(4)
2379 .cm_stride(11)
2380 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmsimd);
2381 }
2382 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2383
2384
2385 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT,k_eq_1)2386 TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT, k_eq_1) {
2387 GemmMicrokernelTester()
2388 .mr(5)
2389 .nr(8)
2390 .kr(1)
2391 .sr(1)
2392 .m(5)
2393 .n(8)
2394 .k(1)
2395 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat);
2396 }
2397
TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT,strided_cn)2398 TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT, strided_cn) {
2399 GemmMicrokernelTester()
2400 .mr(5)
2401 .nr(8)
2402 .kr(1)
2403 .sr(1)
2404 .m(5)
2405 .n(8)
2406 .k(1)
2407 .cn_stride(11)
2408 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat);
2409 }
2410
TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile)2411 TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile) {
2412 for (uint32_t n = 1; n <= 8; n++) {
2413 for (uint32_t m = 1; m <= 5; m++) {
2414 GemmMicrokernelTester()
2415 .mr(5)
2416 .nr(8)
2417 .kr(1)
2418 .sr(1)
2419 .m(m)
2420 .n(n)
2421 .k(1)
2422 .iterations(1)
2423 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat);
2424 }
2425 }
2426 }
2427
TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_m)2428 TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_m) {
2429 for (uint32_t m = 1; m <= 5; m++) {
2430 GemmMicrokernelTester()
2431 .mr(5)
2432 .nr(8)
2433 .kr(1)
2434 .sr(1)
2435 .m(m)
2436 .n(8)
2437 .k(1)
2438 .iterations(1)
2439 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat);
2440 }
2441 }
2442
TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_n)2443 TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_n) {
2444 for (uint32_t n = 1; n <= 8; n++) {
2445 GemmMicrokernelTester()
2446 .mr(5)
2447 .nr(8)
2448 .kr(1)
2449 .sr(1)
2450 .m(5)
2451 .n(n)
2452 .k(1)
2453 .iterations(1)
2454 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat);
2455 }
2456 }
2457
TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT,k_gt_1)2458 TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT, k_gt_1) {
2459 for (size_t k = 2; k < 10; k++) {
2460 GemmMicrokernelTester()
2461 .mr(5)
2462 .nr(8)
2463 .kr(1)
2464 .sr(1)
2465 .m(5)
2466 .n(8)
2467 .k(k)
2468 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat);
2469 }
2470 }
2471
TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT,k_gt_1_subtile)2472 TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT, k_gt_1_subtile) {
2473 for (size_t k = 2; k < 10; k++) {
2474 for (uint32_t n = 1; n <= 8; n++) {
2475 for (uint32_t m = 1; m <= 5; m++) {
2476 GemmMicrokernelTester()
2477 .mr(5)
2478 .nr(8)
2479 .kr(1)
2480 .sr(1)
2481 .m(m)
2482 .n(n)
2483 .k(k)
2484 .iterations(1)
2485 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat);
2486 }
2487 }
2488 }
2489 }
2490
TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT,n_gt_8)2491 TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT, n_gt_8) {
2492 for (uint32_t n = 9; n < 16; n++) {
2493 for (size_t k = 1; k <= 5; k += 2) {
2494 GemmMicrokernelTester()
2495 .mr(5)
2496 .nr(8)
2497 .kr(1)
2498 .sr(1)
2499 .m(5)
2500 .n(n)
2501 .k(k)
2502 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat);
2503 }
2504 }
2505 }
2506
TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_cn)2507 TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_cn) {
2508 for (uint32_t n = 9; n < 16; n++) {
2509 for (size_t k = 1; k <= 5; k += 2) {
2510 GemmMicrokernelTester()
2511 .mr(5)
2512 .nr(8)
2513 .kr(1)
2514 .sr(1)
2515 .m(5)
2516 .n(n)
2517 .k(k)
2518 .cn_stride(11)
2519 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat);
2520 }
2521 }
2522 }
2523
TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT,n_gt_8_subtile)2524 TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT, n_gt_8_subtile) {
2525 for (uint32_t n = 9; n < 16; n++) {
2526 for (size_t k = 1; k <= 5; k += 2) {
2527 for (uint32_t m = 1; m <= 5; m++) {
2528 GemmMicrokernelTester()
2529 .mr(5)
2530 .nr(8)
2531 .kr(1)
2532 .sr(1)
2533 .m(m)
2534 .n(n)
2535 .k(k)
2536 .iterations(1)
2537 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat);
2538 }
2539 }
2540 }
2541 }
2542
TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT,n_div_8)2543 TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT, n_div_8) {
2544 for (uint32_t n = 16; n <= 24; n += 8) {
2545 for (size_t k = 1; k <= 5; k += 2) {
2546 GemmMicrokernelTester()
2547 .mr(5)
2548 .nr(8)
2549 .kr(1)
2550 .sr(1)
2551 .m(5)
2552 .n(n)
2553 .k(k)
2554 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat);
2555 }
2556 }
2557 }
2558
TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT,n_div_8_strided_cn)2559 TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT, n_div_8_strided_cn) {
2560 for (uint32_t n = 16; n <= 24; n += 8) {
2561 for (size_t k = 1; k <= 5; k += 2) {
2562 GemmMicrokernelTester()
2563 .mr(5)
2564 .nr(8)
2565 .kr(1)
2566 .sr(1)
2567 .m(5)
2568 .n(n)
2569 .k(k)
2570 .cn_stride(11)
2571 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat);
2572 }
2573 }
2574 }
2575
TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT,n_div_8_subtile)2576 TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT, n_div_8_subtile) {
2577 for (uint32_t n = 16; n <= 24; n += 8) {
2578 for (size_t k = 1; k <= 5; k += 2) {
2579 for (uint32_t m = 1; m <= 5; m++) {
2580 GemmMicrokernelTester()
2581 .mr(5)
2582 .nr(8)
2583 .kr(1)
2584 .sr(1)
2585 .m(m)
2586 .n(n)
2587 .k(k)
2588 .iterations(1)
2589 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat);
2590 }
2591 }
2592 }
2593 }
2594
TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT,small_kernel)2595 TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT, small_kernel) {
2596 for (size_t k = 1; k <= 5; k += 2) {
2597 GemmMicrokernelTester()
2598 .mr(5)
2599 .nr(8)
2600 .kr(1)
2601 .sr(1)
2602 .m(5)
2603 .n(8)
2604 .k(k)
2605 .ks(3)
2606 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat);
2607 }
2608 }
2609
TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT,small_kernel_subtile)2610 TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT, small_kernel_subtile) {
2611 for (size_t k = 1; k <= 5; k += 2) {
2612 for (uint32_t n = 1; n <= 8; n++) {
2613 for (uint32_t m = 1; m <= 5; m++) {
2614 GemmMicrokernelTester()
2615 .mr(5)
2616 .nr(8)
2617 .kr(1)
2618 .sr(1)
2619 .m(m)
2620 .n(n)
2621 .k(k)
2622 .ks(3)
2623 .iterations(1)
2624 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat);
2625 }
2626 }
2627 }
2628 }
2629
TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT,n_gt_8_small_kernel)2630 TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT, n_gt_8_small_kernel) {
2631 for (uint32_t n = 9; n < 16; n++) {
2632 for (size_t k = 1; k <= 5; k += 2) {
2633 GemmMicrokernelTester()
2634 .mr(5)
2635 .nr(8)
2636 .kr(1)
2637 .sr(1)
2638 .m(5)
2639 .n(n)
2640 .k(k)
2641 .ks(3)
2642 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat);
2643 }
2644 }
2645 }
2646
TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT,n_div_8_small_kernel)2647 TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT, n_div_8_small_kernel) {
2648 for (uint32_t n = 16; n <= 24; n += 8) {
2649 for (size_t k = 1; k <= 5; k += 2) {
2650 GemmMicrokernelTester()
2651 .mr(5)
2652 .nr(8)
2653 .kr(1)
2654 .sr(1)
2655 .m(5)
2656 .n(n)
2657 .k(k)
2658 .ks(3)
2659 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat);
2660 }
2661 }
2662 }
2663
TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT,strided_cm_subtile)2664 TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT, strided_cm_subtile) {
2665 for (size_t k = 1; k <= 5; k += 2) {
2666 for (uint32_t n = 1; n <= 8; n++) {
2667 for (uint32_t m = 1; m <= 5; m++) {
2668 GemmMicrokernelTester()
2669 .mr(5)
2670 .nr(8)
2671 .kr(1)
2672 .sr(1)
2673 .m(m)
2674 .n(n)
2675 .k(k)
2676 .cm_stride(11)
2677 .iterations(1)
2678 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat);
2679 }
2680 }
2681 }
2682 }
2683
TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT,a_offset)2684 TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT, a_offset) {
2685 for (size_t k = 1; k <= 5; k += 2) {
2686 GemmMicrokernelTester()
2687 .mr(5)
2688 .nr(8)
2689 .kr(1)
2690 .sr(1)
2691 .m(5)
2692 .n(8)
2693 .k(k)
2694 .ks(3)
2695 .a_offset(29)
2696 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat);
2697 }
2698 }
2699
TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT,zero)2700 TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT, zero) {
2701 for (size_t k = 1; k <= 5; k += 2) {
2702 for (uint32_t mz = 0; mz < 5; mz++) {
2703 GemmMicrokernelTester()
2704 .mr(5)
2705 .nr(8)
2706 .kr(1)
2707 .sr(1)
2708 .m(5)
2709 .n(8)
2710 .k(k)
2711 .ks(3)
2712 .a_offset(29)
2713 .zero_index(mz)
2714 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat);
2715 }
2716 }
2717 }
2718
TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT,strided_cm)2719 TEST(F32_IGEMM_5X8__WASMSIMD_LOADSPLAT, strided_cm) {
2720 GemmMicrokernelTester()
2721 .mr(5)
2722 .nr(8)
2723 .kr(1)
2724 .sr(1)
2725 .m(5)
2726 .n(8)
2727 .k(1)
2728 .cm_stride(11)
2729 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat);
2730 }
2731 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2732
2733
2734 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_5X8S4__WASMSIMD,k_eq_4)2735 TEST(F32_IGEMM_5X8S4__WASMSIMD, k_eq_4) {
2736 GemmMicrokernelTester()
2737 .mr(5)
2738 .nr(8)
2739 .kr(1)
2740 .sr(4)
2741 .m(5)
2742 .n(8)
2743 .k(4)
2744 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
2745 }
2746
TEST(F32_IGEMM_5X8S4__WASMSIMD,strided_cn)2747 TEST(F32_IGEMM_5X8S4__WASMSIMD, strided_cn) {
2748 GemmMicrokernelTester()
2749 .mr(5)
2750 .nr(8)
2751 .kr(1)
2752 .sr(4)
2753 .m(5)
2754 .n(8)
2755 .k(4)
2756 .cn_stride(11)
2757 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
2758 }
2759
TEST(F32_IGEMM_5X8S4__WASMSIMD,k_eq_4_subtile)2760 TEST(F32_IGEMM_5X8S4__WASMSIMD, k_eq_4_subtile) {
2761 for (uint32_t n = 1; n <= 8; n++) {
2762 for (uint32_t m = 1; m <= 5; m++) {
2763 GemmMicrokernelTester()
2764 .mr(5)
2765 .nr(8)
2766 .kr(1)
2767 .sr(4)
2768 .m(m)
2769 .n(n)
2770 .k(4)
2771 .iterations(1)
2772 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
2773 }
2774 }
2775 }
2776
TEST(F32_IGEMM_5X8S4__WASMSIMD,k_eq_4_subtile_m)2777 TEST(F32_IGEMM_5X8S4__WASMSIMD, k_eq_4_subtile_m) {
2778 for (uint32_t m = 1; m <= 5; m++) {
2779 GemmMicrokernelTester()
2780 .mr(5)
2781 .nr(8)
2782 .kr(1)
2783 .sr(4)
2784 .m(m)
2785 .n(8)
2786 .k(4)
2787 .iterations(1)
2788 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
2789 }
2790 }
2791
TEST(F32_IGEMM_5X8S4__WASMSIMD,k_eq_4_subtile_n)2792 TEST(F32_IGEMM_5X8S4__WASMSIMD, k_eq_4_subtile_n) {
2793 for (uint32_t n = 1; n <= 8; n++) {
2794 GemmMicrokernelTester()
2795 .mr(5)
2796 .nr(8)
2797 .kr(1)
2798 .sr(4)
2799 .m(5)
2800 .n(n)
2801 .k(4)
2802 .iterations(1)
2803 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
2804 }
2805 }
2806
TEST(F32_IGEMM_5X8S4__WASMSIMD,k_lt_4)2807 TEST(F32_IGEMM_5X8S4__WASMSIMD, k_lt_4) {
2808 for (size_t k = 1; k < 4; k++) {
2809 GemmMicrokernelTester()
2810 .mr(5)
2811 .nr(8)
2812 .kr(1)
2813 .sr(4)
2814 .m(5)
2815 .n(8)
2816 .k(k)
2817 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
2818 }
2819 }
2820
TEST(F32_IGEMM_5X8S4__WASMSIMD,k_lt_4_subtile)2821 TEST(F32_IGEMM_5X8S4__WASMSIMD, k_lt_4_subtile) {
2822 for (size_t k = 1; k < 4; k++) {
2823 for (uint32_t n = 1; n <= 8; n++) {
2824 for (uint32_t m = 1; m <= 5; m++) {
2825 GemmMicrokernelTester()
2826 .mr(5)
2827 .nr(8)
2828 .kr(1)
2829 .sr(4)
2830 .m(m)
2831 .n(n)
2832 .k(k)
2833 .iterations(1)
2834 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
2835 }
2836 }
2837 }
2838 }
2839
TEST(F32_IGEMM_5X8S4__WASMSIMD,k_gt_4)2840 TEST(F32_IGEMM_5X8S4__WASMSIMD, k_gt_4) {
2841 for (size_t k = 5; k < 8; k++) {
2842 GemmMicrokernelTester()
2843 .mr(5)
2844 .nr(8)
2845 .kr(1)
2846 .sr(4)
2847 .m(5)
2848 .n(8)
2849 .k(k)
2850 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
2851 }
2852 }
2853
TEST(F32_IGEMM_5X8S4__WASMSIMD,k_gt_4_subtile)2854 TEST(F32_IGEMM_5X8S4__WASMSIMD, k_gt_4_subtile) {
2855 for (size_t k = 5; k < 8; k++) {
2856 for (uint32_t n = 1; n <= 8; n++) {
2857 for (uint32_t m = 1; m <= 5; m++) {
2858 GemmMicrokernelTester()
2859 .mr(5)
2860 .nr(8)
2861 .kr(1)
2862 .sr(4)
2863 .m(m)
2864 .n(n)
2865 .k(k)
2866 .iterations(1)
2867 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
2868 }
2869 }
2870 }
2871 }
2872
TEST(F32_IGEMM_5X8S4__WASMSIMD,k_div_4)2873 TEST(F32_IGEMM_5X8S4__WASMSIMD, k_div_4) {
2874 for (size_t k = 8; k <= 40; k += 4) {
2875 GemmMicrokernelTester()
2876 .mr(5)
2877 .nr(8)
2878 .kr(1)
2879 .sr(4)
2880 .m(5)
2881 .n(8)
2882 .k(k)
2883 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
2884 }
2885 }
2886
TEST(F32_IGEMM_5X8S4__WASMSIMD,k_div_4_subtile)2887 TEST(F32_IGEMM_5X8S4__WASMSIMD, k_div_4_subtile) {
2888 for (size_t k = 8; k <= 40; k += 4) {
2889 for (uint32_t n = 1; n <= 8; n++) {
2890 for (uint32_t m = 1; m <= 5; m++) {
2891 GemmMicrokernelTester()
2892 .mr(5)
2893 .nr(8)
2894 .kr(1)
2895 .sr(4)
2896 .m(m)
2897 .n(n)
2898 .k(k)
2899 .iterations(1)
2900 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
2901 }
2902 }
2903 }
2904 }
2905
TEST(F32_IGEMM_5X8S4__WASMSIMD,n_gt_8)2906 TEST(F32_IGEMM_5X8S4__WASMSIMD, n_gt_8) {
2907 for (uint32_t n = 9; n < 16; n++) {
2908 for (size_t k = 1; k <= 20; k += 5) {
2909 GemmMicrokernelTester()
2910 .mr(5)
2911 .nr(8)
2912 .kr(1)
2913 .sr(4)
2914 .m(5)
2915 .n(n)
2916 .k(k)
2917 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
2918 }
2919 }
2920 }
2921
TEST(F32_IGEMM_5X8S4__WASMSIMD,n_gt_8_strided_cn)2922 TEST(F32_IGEMM_5X8S4__WASMSIMD, n_gt_8_strided_cn) {
2923 for (uint32_t n = 9; n < 16; n++) {
2924 for (size_t k = 1; k <= 20; k += 5) {
2925 GemmMicrokernelTester()
2926 .mr(5)
2927 .nr(8)
2928 .kr(1)
2929 .sr(4)
2930 .m(5)
2931 .n(n)
2932 .k(k)
2933 .cn_stride(11)
2934 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
2935 }
2936 }
2937 }
2938
TEST(F32_IGEMM_5X8S4__WASMSIMD,n_gt_8_subtile)2939 TEST(F32_IGEMM_5X8S4__WASMSIMD, n_gt_8_subtile) {
2940 for (uint32_t n = 9; n < 16; n++) {
2941 for (size_t k = 1; k <= 20; k += 5) {
2942 for (uint32_t m = 1; m <= 5; m++) {
2943 GemmMicrokernelTester()
2944 .mr(5)
2945 .nr(8)
2946 .kr(1)
2947 .sr(4)
2948 .m(m)
2949 .n(n)
2950 .k(k)
2951 .iterations(1)
2952 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
2953 }
2954 }
2955 }
2956 }
2957
TEST(F32_IGEMM_5X8S4__WASMSIMD,n_div_8)2958 TEST(F32_IGEMM_5X8S4__WASMSIMD, n_div_8) {
2959 for (uint32_t n = 16; n <= 24; n += 8) {
2960 for (size_t k = 1; k <= 20; k += 5) {
2961 GemmMicrokernelTester()
2962 .mr(5)
2963 .nr(8)
2964 .kr(1)
2965 .sr(4)
2966 .m(5)
2967 .n(n)
2968 .k(k)
2969 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
2970 }
2971 }
2972 }
2973
TEST(F32_IGEMM_5X8S4__WASMSIMD,n_div_8_strided_cn)2974 TEST(F32_IGEMM_5X8S4__WASMSIMD, n_div_8_strided_cn) {
2975 for (uint32_t n = 16; n <= 24; n += 8) {
2976 for (size_t k = 1; k <= 20; k += 5) {
2977 GemmMicrokernelTester()
2978 .mr(5)
2979 .nr(8)
2980 .kr(1)
2981 .sr(4)
2982 .m(5)
2983 .n(n)
2984 .k(k)
2985 .cn_stride(11)
2986 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
2987 }
2988 }
2989 }
2990
TEST(F32_IGEMM_5X8S4__WASMSIMD,n_div_8_subtile)2991 TEST(F32_IGEMM_5X8S4__WASMSIMD, n_div_8_subtile) {
2992 for (uint32_t n = 16; n <= 24; n += 8) {
2993 for (size_t k = 1; k <= 20; k += 5) {
2994 for (uint32_t m = 1; m <= 5; m++) {
2995 GemmMicrokernelTester()
2996 .mr(5)
2997 .nr(8)
2998 .kr(1)
2999 .sr(4)
3000 .m(m)
3001 .n(n)
3002 .k(k)
3003 .iterations(1)
3004 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
3005 }
3006 }
3007 }
3008 }
3009
TEST(F32_IGEMM_5X8S4__WASMSIMD,small_kernel)3010 TEST(F32_IGEMM_5X8S4__WASMSIMD, small_kernel) {
3011 for (size_t k = 1; k <= 20; k += 5) {
3012 GemmMicrokernelTester()
3013 .mr(5)
3014 .nr(8)
3015 .kr(1)
3016 .sr(4)
3017 .m(5)
3018 .n(8)
3019 .k(k)
3020 .ks(3)
3021 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
3022 }
3023 }
3024
TEST(F32_IGEMM_5X8S4__WASMSIMD,small_kernel_subtile)3025 TEST(F32_IGEMM_5X8S4__WASMSIMD, small_kernel_subtile) {
3026 for (size_t k = 1; k <= 20; k += 5) {
3027 for (uint32_t n = 1; n <= 8; n++) {
3028 for (uint32_t m = 1; m <= 5; m++) {
3029 GemmMicrokernelTester()
3030 .mr(5)
3031 .nr(8)
3032 .kr(1)
3033 .sr(4)
3034 .m(m)
3035 .n(n)
3036 .k(k)
3037 .ks(3)
3038 .iterations(1)
3039 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
3040 }
3041 }
3042 }
3043 }
3044
TEST(F32_IGEMM_5X8S4__WASMSIMD,n_gt_8_small_kernel)3045 TEST(F32_IGEMM_5X8S4__WASMSIMD, n_gt_8_small_kernel) {
3046 for (uint32_t n = 9; n < 16; n++) {
3047 for (size_t k = 1; k <= 20; k += 5) {
3048 GemmMicrokernelTester()
3049 .mr(5)
3050 .nr(8)
3051 .kr(1)
3052 .sr(4)
3053 .m(5)
3054 .n(n)
3055 .k(k)
3056 .ks(3)
3057 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
3058 }
3059 }
3060 }
3061
TEST(F32_IGEMM_5X8S4__WASMSIMD,n_div_8_small_kernel)3062 TEST(F32_IGEMM_5X8S4__WASMSIMD, n_div_8_small_kernel) {
3063 for (uint32_t n = 16; n <= 24; n += 8) {
3064 for (size_t k = 1; k <= 20; k += 5) {
3065 GemmMicrokernelTester()
3066 .mr(5)
3067 .nr(8)
3068 .kr(1)
3069 .sr(4)
3070 .m(5)
3071 .n(n)
3072 .k(k)
3073 .ks(3)
3074 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
3075 }
3076 }
3077 }
3078
TEST(F32_IGEMM_5X8S4__WASMSIMD,strided_cm_subtile)3079 TEST(F32_IGEMM_5X8S4__WASMSIMD, strided_cm_subtile) {
3080 for (size_t k = 1; k <= 20; k += 5) {
3081 for (uint32_t n = 1; n <= 8; n++) {
3082 for (uint32_t m = 1; m <= 5; m++) {
3083 GemmMicrokernelTester()
3084 .mr(5)
3085 .nr(8)
3086 .kr(1)
3087 .sr(4)
3088 .m(m)
3089 .n(n)
3090 .k(k)
3091 .cm_stride(11)
3092 .iterations(1)
3093 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
3094 }
3095 }
3096 }
3097 }
3098
TEST(F32_IGEMM_5X8S4__WASMSIMD,a_offset)3099 TEST(F32_IGEMM_5X8S4__WASMSIMD, a_offset) {
3100 for (size_t k = 1; k <= 20; k += 5) {
3101 GemmMicrokernelTester()
3102 .mr(5)
3103 .nr(8)
3104 .kr(1)
3105 .sr(4)
3106 .m(5)
3107 .n(8)
3108 .k(k)
3109 .ks(3)
3110 .a_offset(103)
3111 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
3112 }
3113 }
3114
TEST(F32_IGEMM_5X8S4__WASMSIMD,zero)3115 TEST(F32_IGEMM_5X8S4__WASMSIMD, zero) {
3116 for (size_t k = 1; k <= 20; k += 5) {
3117 for (uint32_t mz = 0; mz < 5; mz++) {
3118 GemmMicrokernelTester()
3119 .mr(5)
3120 .nr(8)
3121 .kr(1)
3122 .sr(4)
3123 .m(5)
3124 .n(8)
3125 .k(k)
3126 .ks(3)
3127 .a_offset(103)
3128 .zero_index(mz)
3129 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
3130 }
3131 }
3132 }
3133
TEST(F32_IGEMM_5X8S4__WASMSIMD,strided_cm)3134 TEST(F32_IGEMM_5X8S4__WASMSIMD, strided_cm) {
3135 GemmMicrokernelTester()
3136 .mr(5)
3137 .nr(8)
3138 .kr(1)
3139 .sr(4)
3140 .m(5)
3141 .n(8)
3142 .k(4)
3143 .cm_stride(11)
3144 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmsimd);
3145 }
3146 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3147
3148
3149 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,k_eq_4)3150 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, k_eq_4) {
3151 GemmMicrokernelTester()
3152 .mr(6)
3153 .nr(8)
3154 .kr(1)
3155 .sr(1)
3156 .m(6)
3157 .n(8)
3158 .k(4)
3159 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3160 }
3161
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,strided_cn)3162 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, strided_cn) {
3163 GemmMicrokernelTester()
3164 .mr(6)
3165 .nr(8)
3166 .kr(1)
3167 .sr(1)
3168 .m(6)
3169 .n(8)
3170 .k(4)
3171 .cn_stride(11)
3172 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3173 }
3174
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,k_eq_4_subtile)3175 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
3176 for (uint32_t n = 1; n <= 8; n++) {
3177 for (uint32_t m = 1; m <= 6; m++) {
3178 GemmMicrokernelTester()
3179 .mr(6)
3180 .nr(8)
3181 .kr(1)
3182 .sr(1)
3183 .m(m)
3184 .n(n)
3185 .k(4)
3186 .iterations(1)
3187 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3188 }
3189 }
3190 }
3191
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,k_eq_4_subtile_m)3192 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
3193 for (uint32_t m = 1; m <= 6; m++) {
3194 GemmMicrokernelTester()
3195 .mr(6)
3196 .nr(8)
3197 .kr(1)
3198 .sr(1)
3199 .m(m)
3200 .n(8)
3201 .k(4)
3202 .iterations(1)
3203 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3204 }
3205 }
3206
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,k_eq_4_subtile_n)3207 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
3208 for (uint32_t n = 1; n <= 8; n++) {
3209 GemmMicrokernelTester()
3210 .mr(6)
3211 .nr(8)
3212 .kr(1)
3213 .sr(1)
3214 .m(6)
3215 .n(n)
3216 .k(4)
3217 .iterations(1)
3218 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3219 }
3220 }
3221
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,k_lt_4)3222 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, k_lt_4) {
3223 for (size_t k = 1; k < 4; k++) {
3224 GemmMicrokernelTester()
3225 .mr(6)
3226 .nr(8)
3227 .kr(1)
3228 .sr(1)
3229 .m(6)
3230 .n(8)
3231 .k(k)
3232 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3233 }
3234 }
3235
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,k_lt_4_subtile)3236 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
3237 for (size_t k = 1; k < 4; k++) {
3238 for (uint32_t n = 1; n <= 8; n++) {
3239 for (uint32_t m = 1; m <= 6; m++) {
3240 GemmMicrokernelTester()
3241 .mr(6)
3242 .nr(8)
3243 .kr(1)
3244 .sr(1)
3245 .m(m)
3246 .n(n)
3247 .k(k)
3248 .iterations(1)
3249 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3250 }
3251 }
3252 }
3253 }
3254
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,k_gt_4)3255 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, k_gt_4) {
3256 for (size_t k = 5; k < 8; k++) {
3257 GemmMicrokernelTester()
3258 .mr(6)
3259 .nr(8)
3260 .kr(1)
3261 .sr(1)
3262 .m(6)
3263 .n(8)
3264 .k(k)
3265 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3266 }
3267 }
3268
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,k_gt_4_subtile)3269 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
3270 for (size_t k = 5; k < 8; k++) {
3271 for (uint32_t n = 1; n <= 8; n++) {
3272 for (uint32_t m = 1; m <= 6; m++) {
3273 GemmMicrokernelTester()
3274 .mr(6)
3275 .nr(8)
3276 .kr(1)
3277 .sr(1)
3278 .m(m)
3279 .n(n)
3280 .k(k)
3281 .iterations(1)
3282 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3283 }
3284 }
3285 }
3286 }
3287
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,k_div_4)3288 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, k_div_4) {
3289 for (size_t k = 8; k <= 40; k += 4) {
3290 GemmMicrokernelTester()
3291 .mr(6)
3292 .nr(8)
3293 .kr(1)
3294 .sr(1)
3295 .m(6)
3296 .n(8)
3297 .k(k)
3298 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3299 }
3300 }
3301
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,k_div_4_subtile)3302 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, k_div_4_subtile) {
3303 for (size_t k = 8; k <= 40; k += 4) {
3304 for (uint32_t n = 1; n <= 8; n++) {
3305 for (uint32_t m = 1; m <= 6; m++) {
3306 GemmMicrokernelTester()
3307 .mr(6)
3308 .nr(8)
3309 .kr(1)
3310 .sr(1)
3311 .m(m)
3312 .n(n)
3313 .k(k)
3314 .iterations(1)
3315 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3316 }
3317 }
3318 }
3319 }
3320
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,n_gt_8)3321 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, n_gt_8) {
3322 for (uint32_t n = 9; n < 16; n++) {
3323 for (size_t k = 1; k <= 20; k += 5) {
3324 GemmMicrokernelTester()
3325 .mr(6)
3326 .nr(8)
3327 .kr(1)
3328 .sr(1)
3329 .m(6)
3330 .n(n)
3331 .k(k)
3332 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3333 }
3334 }
3335 }
3336
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,n_gt_8_strided_cn)3337 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
3338 for (uint32_t n = 9; n < 16; n++) {
3339 for (size_t k = 1; k <= 20; k += 5) {
3340 GemmMicrokernelTester()
3341 .mr(6)
3342 .nr(8)
3343 .kr(1)
3344 .sr(1)
3345 .m(6)
3346 .n(n)
3347 .k(k)
3348 .cn_stride(11)
3349 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3350 }
3351 }
3352 }
3353
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,n_gt_8_subtile)3354 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
3355 for (uint32_t n = 9; n < 16; n++) {
3356 for (size_t k = 1; k <= 20; k += 5) {
3357 for (uint32_t m = 1; m <= 6; m++) {
3358 GemmMicrokernelTester()
3359 .mr(6)
3360 .nr(8)
3361 .kr(1)
3362 .sr(1)
3363 .m(m)
3364 .n(n)
3365 .k(k)
3366 .iterations(1)
3367 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3368 }
3369 }
3370 }
3371 }
3372
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,n_div_8)3373 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, n_div_8) {
3374 for (uint32_t n = 16; n <= 24; n += 8) {
3375 for (size_t k = 1; k <= 20; k += 5) {
3376 GemmMicrokernelTester()
3377 .mr(6)
3378 .nr(8)
3379 .kr(1)
3380 .sr(1)
3381 .m(6)
3382 .n(n)
3383 .k(k)
3384 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3385 }
3386 }
3387 }
3388
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,n_div_8_strided_cn)3389 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
3390 for (uint32_t n = 16; n <= 24; n += 8) {
3391 for (size_t k = 1; k <= 20; k += 5) {
3392 GemmMicrokernelTester()
3393 .mr(6)
3394 .nr(8)
3395 .kr(1)
3396 .sr(1)
3397 .m(6)
3398 .n(n)
3399 .k(k)
3400 .cn_stride(11)
3401 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3402 }
3403 }
3404 }
3405
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,n_div_8_subtile)3406 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, n_div_8_subtile) {
3407 for (uint32_t n = 16; n <= 24; n += 8) {
3408 for (size_t k = 1; k <= 20; k += 5) {
3409 for (uint32_t m = 1; m <= 6; m++) {
3410 GemmMicrokernelTester()
3411 .mr(6)
3412 .nr(8)
3413 .kr(1)
3414 .sr(1)
3415 .m(m)
3416 .n(n)
3417 .k(k)
3418 .iterations(1)
3419 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3420 }
3421 }
3422 }
3423 }
3424
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,small_kernel)3425 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, small_kernel) {
3426 for (size_t k = 1; k <= 20; k += 5) {
3427 GemmMicrokernelTester()
3428 .mr(6)
3429 .nr(8)
3430 .kr(1)
3431 .sr(1)
3432 .m(6)
3433 .n(8)
3434 .k(k)
3435 .ks(3)
3436 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3437 }
3438 }
3439
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,small_kernel_subtile)3440 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, small_kernel_subtile) {
3441 for (size_t k = 1; k <= 20; k += 5) {
3442 for (uint32_t n = 1; n <= 8; n++) {
3443 for (uint32_t m = 1; m <= 6; m++) {
3444 GemmMicrokernelTester()
3445 .mr(6)
3446 .nr(8)
3447 .kr(1)
3448 .sr(1)
3449 .m(m)
3450 .n(n)
3451 .k(k)
3452 .ks(3)
3453 .iterations(1)
3454 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3455 }
3456 }
3457 }
3458 }
3459
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,n_gt_8_small_kernel)3460 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, n_gt_8_small_kernel) {
3461 for (uint32_t n = 9; n < 16; n++) {
3462 for (size_t k = 1; k <= 20; k += 5) {
3463 GemmMicrokernelTester()
3464 .mr(6)
3465 .nr(8)
3466 .kr(1)
3467 .sr(1)
3468 .m(6)
3469 .n(n)
3470 .k(k)
3471 .ks(3)
3472 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3473 }
3474 }
3475 }
3476
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,n_div_8_small_kernel)3477 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, n_div_8_small_kernel) {
3478 for (uint32_t n = 16; n <= 24; n += 8) {
3479 for (size_t k = 1; k <= 20; k += 5) {
3480 GemmMicrokernelTester()
3481 .mr(6)
3482 .nr(8)
3483 .kr(1)
3484 .sr(1)
3485 .m(6)
3486 .n(n)
3487 .k(k)
3488 .ks(3)
3489 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3490 }
3491 }
3492 }
3493
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,strided_cm_subtile)3494 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, strided_cm_subtile) {
3495 for (size_t k = 1; k <= 20; k += 5) {
3496 for (uint32_t n = 1; n <= 8; n++) {
3497 for (uint32_t m = 1; m <= 6; m++) {
3498 GemmMicrokernelTester()
3499 .mr(6)
3500 .nr(8)
3501 .kr(1)
3502 .sr(1)
3503 .m(m)
3504 .n(n)
3505 .k(k)
3506 .cm_stride(11)
3507 .iterations(1)
3508 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3509 }
3510 }
3511 }
3512 }
3513
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,a_offset)3514 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, a_offset) {
3515 for (size_t k = 1; k <= 20; k += 5) {
3516 GemmMicrokernelTester()
3517 .mr(6)
3518 .nr(8)
3519 .kr(1)
3520 .sr(1)
3521 .m(6)
3522 .n(8)
3523 .k(k)
3524 .ks(3)
3525 .a_offset(127)
3526 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3527 }
3528 }
3529
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,zero)3530 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, zero) {
3531 for (size_t k = 1; k <= 20; k += 5) {
3532 for (uint32_t mz = 0; mz < 6; mz++) {
3533 GemmMicrokernelTester()
3534 .mr(6)
3535 .nr(8)
3536 .kr(1)
3537 .sr(1)
3538 .m(6)
3539 .n(8)
3540 .k(k)
3541 .ks(3)
3542 .a_offset(127)
3543 .zero_index(mz)
3544 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3545 }
3546 }
3547 }
3548
TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT,strided_cm)3549 TEST(F32_IGEMM_6X8__WASMSIMD_SPLAT, strided_cm) {
3550 GemmMicrokernelTester()
3551 .mr(6)
3552 .nr(8)
3553 .kr(1)
3554 .sr(1)
3555 .m(6)
3556 .n(8)
3557 .k(4)
3558 .cm_stride(11)
3559 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_splat);
3560 }
3561 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3562
3563
3564 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)3565 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
3566 GemmMicrokernelTester()
3567 .mr(1)
3568 .nr(8)
3569 .kr(1)
3570 .sr(1)
3571 .m(1)
3572 .n(8)
3573 .k(4)
3574 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3575 }
3576
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)3577 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
3578 GemmMicrokernelTester()
3579 .mr(1)
3580 .nr(8)
3581 .kr(1)
3582 .sr(1)
3583 .m(1)
3584 .n(8)
3585 .k(4)
3586 .cn_stride(11)
3587 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3588 }
3589
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)3590 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
3591 for (uint32_t n = 1; n <= 8; n++) {
3592 for (uint32_t m = 1; m <= 1; m++) {
3593 GemmMicrokernelTester()
3594 .mr(1)
3595 .nr(8)
3596 .kr(1)
3597 .sr(1)
3598 .m(m)
3599 .n(n)
3600 .k(4)
3601 .iterations(1)
3602 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3603 }
3604 }
3605 }
3606
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)3607 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
3608 for (uint32_t m = 1; m <= 1; m++) {
3609 GemmMicrokernelTester()
3610 .mr(1)
3611 .nr(8)
3612 .kr(1)
3613 .sr(1)
3614 .m(m)
3615 .n(8)
3616 .k(4)
3617 .iterations(1)
3618 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3619 }
3620 }
3621
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)3622 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
3623 for (uint32_t n = 1; n <= 8; n++) {
3624 GemmMicrokernelTester()
3625 .mr(1)
3626 .nr(8)
3627 .kr(1)
3628 .sr(1)
3629 .m(1)
3630 .n(n)
3631 .k(4)
3632 .iterations(1)
3633 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3634 }
3635 }
3636
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)3637 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
3638 for (size_t k = 1; k < 4; k++) {
3639 GemmMicrokernelTester()
3640 .mr(1)
3641 .nr(8)
3642 .kr(1)
3643 .sr(1)
3644 .m(1)
3645 .n(8)
3646 .k(k)
3647 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3648 }
3649 }
3650
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)3651 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
3652 for (size_t k = 1; k < 4; k++) {
3653 for (uint32_t n = 1; n <= 8; n++) {
3654 for (uint32_t m = 1; m <= 1; m++) {
3655 GemmMicrokernelTester()
3656 .mr(1)
3657 .nr(8)
3658 .kr(1)
3659 .sr(1)
3660 .m(m)
3661 .n(n)
3662 .k(k)
3663 .iterations(1)
3664 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3665 }
3666 }
3667 }
3668 }
3669
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)3670 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
3671 for (size_t k = 5; k < 8; k++) {
3672 GemmMicrokernelTester()
3673 .mr(1)
3674 .nr(8)
3675 .kr(1)
3676 .sr(1)
3677 .m(1)
3678 .n(8)
3679 .k(k)
3680 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3681 }
3682 }
3683
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)3684 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
3685 for (size_t k = 5; k < 8; k++) {
3686 for (uint32_t n = 1; n <= 8; n++) {
3687 for (uint32_t m = 1; m <= 1; m++) {
3688 GemmMicrokernelTester()
3689 .mr(1)
3690 .nr(8)
3691 .kr(1)
3692 .sr(1)
3693 .m(m)
3694 .n(n)
3695 .k(k)
3696 .iterations(1)
3697 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3698 }
3699 }
3700 }
3701 }
3702
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)3703 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
3704 for (size_t k = 8; k <= 40; k += 4) {
3705 GemmMicrokernelTester()
3706 .mr(1)
3707 .nr(8)
3708 .kr(1)
3709 .sr(1)
3710 .m(1)
3711 .n(8)
3712 .k(k)
3713 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3714 }
3715 }
3716
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)3717 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
3718 for (size_t k = 8; k <= 40; k += 4) {
3719 for (uint32_t n = 1; n <= 8; n++) {
3720 for (uint32_t m = 1; m <= 1; m++) {
3721 GemmMicrokernelTester()
3722 .mr(1)
3723 .nr(8)
3724 .kr(1)
3725 .sr(1)
3726 .m(m)
3727 .n(n)
3728 .k(k)
3729 .iterations(1)
3730 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3731 }
3732 }
3733 }
3734 }
3735
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)3736 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
3737 for (uint32_t n = 9; n < 16; n++) {
3738 for (size_t k = 1; k <= 20; k += 5) {
3739 GemmMicrokernelTester()
3740 .mr(1)
3741 .nr(8)
3742 .kr(1)
3743 .sr(1)
3744 .m(1)
3745 .n(n)
3746 .k(k)
3747 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3748 }
3749 }
3750 }
3751
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)3752 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
3753 for (uint32_t n = 9; n < 16; n++) {
3754 for (size_t k = 1; k <= 20; k += 5) {
3755 GemmMicrokernelTester()
3756 .mr(1)
3757 .nr(8)
3758 .kr(1)
3759 .sr(1)
3760 .m(1)
3761 .n(n)
3762 .k(k)
3763 .cn_stride(11)
3764 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3765 }
3766 }
3767 }
3768
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)3769 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
3770 for (uint32_t n = 9; n < 16; n++) {
3771 for (size_t k = 1; k <= 20; k += 5) {
3772 for (uint32_t m = 1; m <= 1; m++) {
3773 GemmMicrokernelTester()
3774 .mr(1)
3775 .nr(8)
3776 .kr(1)
3777 .sr(1)
3778 .m(m)
3779 .n(n)
3780 .k(k)
3781 .iterations(1)
3782 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3783 }
3784 }
3785 }
3786 }
3787
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)3788 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
3789 for (uint32_t n = 16; n <= 24; n += 8) {
3790 for (size_t k = 1; k <= 20; k += 5) {
3791 GemmMicrokernelTester()
3792 .mr(1)
3793 .nr(8)
3794 .kr(1)
3795 .sr(1)
3796 .m(1)
3797 .n(n)
3798 .k(k)
3799 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3800 }
3801 }
3802 }
3803
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)3804 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
3805 for (uint32_t n = 16; n <= 24; n += 8) {
3806 for (size_t k = 1; k <= 20; k += 5) {
3807 GemmMicrokernelTester()
3808 .mr(1)
3809 .nr(8)
3810 .kr(1)
3811 .sr(1)
3812 .m(1)
3813 .n(n)
3814 .k(k)
3815 .cn_stride(11)
3816 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3817 }
3818 }
3819 }
3820
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)3821 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
3822 for (uint32_t n = 16; n <= 24; n += 8) {
3823 for (size_t k = 1; k <= 20; k += 5) {
3824 for (uint32_t m = 1; m <= 1; m++) {
3825 GemmMicrokernelTester()
3826 .mr(1)
3827 .nr(8)
3828 .kr(1)
3829 .sr(1)
3830 .m(m)
3831 .n(n)
3832 .k(k)
3833 .iterations(1)
3834 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3835 }
3836 }
3837 }
3838 }
3839
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel)3840 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel) {
3841 for (size_t k = 1; k <= 20; k += 5) {
3842 GemmMicrokernelTester()
3843 .mr(1)
3844 .nr(8)
3845 .kr(1)
3846 .sr(1)
3847 .m(1)
3848 .n(8)
3849 .k(k)
3850 .ks(3)
3851 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3852 }
3853 }
3854
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel_subtile)3855 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel_subtile) {
3856 for (size_t k = 1; k <= 20; k += 5) {
3857 for (uint32_t n = 1; n <= 8; n++) {
3858 for (uint32_t m = 1; m <= 1; m++) {
3859 GemmMicrokernelTester()
3860 .mr(1)
3861 .nr(8)
3862 .kr(1)
3863 .sr(1)
3864 .m(m)
3865 .n(n)
3866 .k(k)
3867 .ks(3)
3868 .iterations(1)
3869 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3870 }
3871 }
3872 }
3873 }
3874
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_small_kernel)3875 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_small_kernel) {
3876 for (uint32_t n = 9; n < 16; n++) {
3877 for (size_t k = 1; k <= 20; k += 5) {
3878 GemmMicrokernelTester()
3879 .mr(1)
3880 .nr(8)
3881 .kr(1)
3882 .sr(1)
3883 .m(1)
3884 .n(n)
3885 .k(k)
3886 .ks(3)
3887 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3888 }
3889 }
3890 }
3891
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_small_kernel)3892 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_small_kernel) {
3893 for (uint32_t n = 16; n <= 24; n += 8) {
3894 for (size_t k = 1; k <= 20; k += 5) {
3895 GemmMicrokernelTester()
3896 .mr(1)
3897 .nr(8)
3898 .kr(1)
3899 .sr(1)
3900 .m(1)
3901 .n(n)
3902 .k(k)
3903 .ks(3)
3904 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3905 }
3906 }
3907 }
3908
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)3909 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
3910 for (size_t k = 1; k <= 20; k += 5) {
3911 for (uint32_t n = 1; n <= 8; n++) {
3912 for (uint32_t m = 1; m <= 1; m++) {
3913 GemmMicrokernelTester()
3914 .mr(1)
3915 .nr(8)
3916 .kr(1)
3917 .sr(1)
3918 .m(m)
3919 .n(n)
3920 .k(k)
3921 .cm_stride(11)
3922 .iterations(1)
3923 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3924 }
3925 }
3926 }
3927 }
3928
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,a_offset)3929 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, a_offset) {
3930 for (size_t k = 1; k <= 20; k += 5) {
3931 GemmMicrokernelTester()
3932 .mr(1)
3933 .nr(8)
3934 .kr(1)
3935 .sr(1)
3936 .m(1)
3937 .n(8)
3938 .k(k)
3939 .ks(3)
3940 .a_offset(23)
3941 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3942 }
3943 }
3944
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,zero)3945 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, zero) {
3946 for (size_t k = 1; k <= 20; k += 5) {
3947 for (uint32_t mz = 0; mz < 1; mz++) {
3948 GemmMicrokernelTester()
3949 .mr(1)
3950 .nr(8)
3951 .kr(1)
3952 .sr(1)
3953 .m(1)
3954 .n(8)
3955 .k(k)
3956 .ks(3)
3957 .a_offset(23)
3958 .zero_index(mz)
3959 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3960 }
3961 }
3962 }
3963
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)3964 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
3965 GemmMicrokernelTester()
3966 .mr(1)
3967 .nr(8)
3968 .kr(1)
3969 .sr(1)
3970 .m(1)
3971 .n(8)
3972 .k(4)
3973 .cm_stride(11)
3974 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3975 }
3976 #endif // XNN_ARCH_WASMRELAXEDSIMD
3977
3978
3979 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)3980 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
3981 GemmMicrokernelTester()
3982 .mr(1)
3983 .nr(8)
3984 .kr(1)
3985 .sr(4)
3986 .m(1)
3987 .n(8)
3988 .k(4)
3989 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
3990 }
3991
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,strided_cn)3992 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
3993 GemmMicrokernelTester()
3994 .mr(1)
3995 .nr(8)
3996 .kr(1)
3997 .sr(4)
3998 .m(1)
3999 .n(8)
4000 .k(4)
4001 .cn_stride(11)
4002 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
4003 }
4004
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)4005 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
4006 for (uint32_t n = 1; n <= 8; n++) {
4007 for (uint32_t m = 1; m <= 1; m++) {
4008 GemmMicrokernelTester()
4009 .mr(1)
4010 .nr(8)
4011 .kr(1)
4012 .sr(4)
4013 .m(m)
4014 .n(n)
4015 .k(4)
4016 .iterations(1)
4017 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
4018 }
4019 }
4020 }
4021
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)4022 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
4023 for (uint32_t m = 1; m <= 1; m++) {
4024 GemmMicrokernelTester()
4025 .mr(1)
4026 .nr(8)
4027 .kr(1)
4028 .sr(4)
4029 .m(m)
4030 .n(8)
4031 .k(4)
4032 .iterations(1)
4033 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
4034 }
4035 }
4036
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)4037 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
4038 for (uint32_t n = 1; n <= 8; n++) {
4039 GemmMicrokernelTester()
4040 .mr(1)
4041 .nr(8)
4042 .kr(1)
4043 .sr(4)
4044 .m(1)
4045 .n(n)
4046 .k(4)
4047 .iterations(1)
4048 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
4049 }
4050 }
4051
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)4052 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
4053 for (size_t k = 1; k < 4; k++) {
4054 GemmMicrokernelTester()
4055 .mr(1)
4056 .nr(8)
4057 .kr(1)
4058 .sr(4)
4059 .m(1)
4060 .n(8)
4061 .k(k)
4062 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
4063 }
4064 }
4065
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)4066 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
4067 for (size_t k = 1; k < 4; k++) {
4068 for (uint32_t n = 1; n <= 8; n++) {
4069 for (uint32_t m = 1; m <= 1; m++) {
4070 GemmMicrokernelTester()
4071 .mr(1)
4072 .nr(8)
4073 .kr(1)
4074 .sr(4)
4075 .m(m)
4076 .n(n)
4077 .k(k)
4078 .iterations(1)
4079 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
4080 }
4081 }
4082 }
4083 }
4084
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)4085 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
4086 for (size_t k = 5; k < 8; k++) {
4087 GemmMicrokernelTester()
4088 .mr(1)
4089 .nr(8)
4090 .kr(1)
4091 .sr(4)
4092 .m(1)
4093 .n(8)
4094 .k(k)
4095 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
4096 }
4097 }
4098
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)4099 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
4100 for (size_t k = 5; k < 8; k++) {
4101 for (uint32_t n = 1; n <= 8; n++) {
4102 for (uint32_t m = 1; m <= 1; m++) {
4103 GemmMicrokernelTester()
4104 .mr(1)
4105 .nr(8)
4106 .kr(1)
4107 .sr(4)
4108 .m(m)
4109 .n(n)
4110 .k(k)
4111 .iterations(1)
4112 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
4113 }
4114 }
4115 }
4116 }
4117
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,k_div_4)4118 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
4119 for (size_t k = 8; k <= 40; k += 4) {
4120 GemmMicrokernelTester()
4121 .mr(1)
4122 .nr(8)
4123 .kr(1)
4124 .sr(4)
4125 .m(1)
4126 .n(8)
4127 .k(k)
4128 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
4129 }
4130 }
4131
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)4132 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
4133 for (size_t k = 8; k <= 40; k += 4) {
4134 for (uint32_t n = 1; n <= 8; n++) {
4135 for (uint32_t m = 1; m <= 1; m++) {
4136 GemmMicrokernelTester()
4137 .mr(1)
4138 .nr(8)
4139 .kr(1)
4140 .sr(4)
4141 .m(m)
4142 .n(n)
4143 .k(k)
4144 .iterations(1)
4145 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
4146 }
4147 }
4148 }
4149 }
4150
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)4151 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
4152 for (uint32_t n = 9; n < 16; n++) {
4153 for (size_t k = 1; k <= 20; k += 5) {
4154 GemmMicrokernelTester()
4155 .mr(1)
4156 .nr(8)
4157 .kr(1)
4158 .sr(4)
4159 .m(1)
4160 .n(n)
4161 .k(k)
4162 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
4163 }
4164 }
4165 }
4166
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)4167 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
4168 for (uint32_t n = 9; n < 16; n++) {
4169 for (size_t k = 1; k <= 20; k += 5) {
4170 GemmMicrokernelTester()
4171 .mr(1)
4172 .nr(8)
4173 .kr(1)
4174 .sr(4)
4175 .m(1)
4176 .n(n)
4177 .k(k)
4178 .cn_stride(11)
4179 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
4180 }
4181 }
4182 }
4183
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)4184 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
4185 for (uint32_t n = 9; n < 16; n++) {
4186 for (size_t k = 1; k <= 20; k += 5) {
4187 for (uint32_t m = 1; m <= 1; m++) {
4188 GemmMicrokernelTester()
4189 .mr(1)
4190 .nr(8)
4191 .kr(1)
4192 .sr(4)
4193 .m(m)
4194 .n(n)
4195 .k(k)
4196 .iterations(1)
4197 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
4198 }
4199 }
4200 }
4201 }
4202
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,n_div_8)4203 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
4204 for (uint32_t n = 16; n <= 24; n += 8) {
4205 for (size_t k = 1; k <= 20; k += 5) {
4206 GemmMicrokernelTester()
4207 .mr(1)
4208 .nr(8)
4209 .kr(1)
4210 .sr(4)
4211 .m(1)
4212 .n(n)
4213 .k(k)
4214 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
4215 }
4216 }
4217 }
4218
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)4219 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
4220 for (uint32_t n = 16; n <= 24; n += 8) {
4221 for (size_t k = 1; k <= 20; k += 5) {
4222 GemmMicrokernelTester()
4223 .mr(1)
4224 .nr(8)
4225 .kr(1)
4226 .sr(4)
4227 .m(1)
4228 .n(n)
4229 .k(k)
4230 .cn_stride(11)
4231 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
4232 }
4233 }
4234 }
4235
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)4236 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
4237 for (uint32_t n = 16; n <= 24; n += 8) {
4238 for (size_t k = 1; k <= 20; k += 5) {
4239 for (uint32_t m = 1; m <= 1; m++) {
4240 GemmMicrokernelTester()
4241 .mr(1)
4242 .nr(8)
4243 .kr(1)
4244 .sr(4)
4245 .m(m)
4246 .n(n)
4247 .k(k)
4248 .iterations(1)
4249 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
4250 }
4251 }
4252 }
4253 }
4254
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,small_kernel)4255 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, small_kernel) {
4256 for (size_t k = 1; k <= 20; k += 5) {
4257 GemmMicrokernelTester()
4258 .mr(1)
4259 .nr(8)
4260 .kr(1)
4261 .sr(4)
4262 .m(1)
4263 .n(8)
4264 .k(k)
4265 .ks(3)
4266 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
4267 }
4268 }
4269
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,small_kernel_subtile)4270 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, small_kernel_subtile) {
4271 for (size_t k = 1; k <= 20; k += 5) {
4272 for (uint32_t n = 1; n <= 8; n++) {
4273 for (uint32_t m = 1; m <= 1; m++) {
4274 GemmMicrokernelTester()
4275 .mr(1)
4276 .nr(8)
4277 .kr(1)
4278 .sr(4)
4279 .m(m)
4280 .n(n)
4281 .k(k)
4282 .ks(3)
4283 .iterations(1)
4284 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
4285 }
4286 }
4287 }
4288 }
4289
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_small_kernel)4290 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_small_kernel) {
4291 for (uint32_t n = 9; n < 16; n++) {
4292 for (size_t k = 1; k <= 20; k += 5) {
4293 GemmMicrokernelTester()
4294 .mr(1)
4295 .nr(8)
4296 .kr(1)
4297 .sr(4)
4298 .m(1)
4299 .n(n)
4300 .k(k)
4301 .ks(3)
4302 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
4303 }
4304 }
4305 }
4306
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,n_div_8_small_kernel)4307 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, n_div_8_small_kernel) {
4308 for (uint32_t n = 16; n <= 24; n += 8) {
4309 for (size_t k = 1; k <= 20; k += 5) {
4310 GemmMicrokernelTester()
4311 .mr(1)
4312 .nr(8)
4313 .kr(1)
4314 .sr(4)
4315 .m(1)
4316 .n(n)
4317 .k(k)
4318 .ks(3)
4319 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
4320 }
4321 }
4322 }
4323
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)4324 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
4325 for (size_t k = 1; k <= 20; k += 5) {
4326 for (uint32_t n = 1; n <= 8; n++) {
4327 for (uint32_t m = 1; m <= 1; m++) {
4328 GemmMicrokernelTester()
4329 .mr(1)
4330 .nr(8)
4331 .kr(1)
4332 .sr(4)
4333 .m(m)
4334 .n(n)
4335 .k(k)
4336 .cm_stride(11)
4337 .iterations(1)
4338 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
4339 }
4340 }
4341 }
4342 }
4343
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,a_offset)4344 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, a_offset) {
4345 for (size_t k = 1; k <= 20; k += 5) {
4346 GemmMicrokernelTester()
4347 .mr(1)
4348 .nr(8)
4349 .kr(1)
4350 .sr(4)
4351 .m(1)
4352 .n(8)
4353 .k(k)
4354 .ks(3)
4355 .a_offset(23)
4356 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
4357 }
4358 }
4359
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,zero)4360 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, zero) {
4361 for (size_t k = 1; k <= 20; k += 5) {
4362 for (uint32_t mz = 0; mz < 1; mz++) {
4363 GemmMicrokernelTester()
4364 .mr(1)
4365 .nr(8)
4366 .kr(1)
4367 .sr(4)
4368 .m(1)
4369 .n(8)
4370 .k(k)
4371 .ks(3)
4372 .a_offset(23)
4373 .zero_index(mz)
4374 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
4375 }
4376 }
4377 }
4378
TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA,strided_cm)4379 TEST(F32_IGEMM_1X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
4380 GemmMicrokernelTester()
4381 .mr(1)
4382 .nr(8)
4383 .kr(1)
4384 .sr(4)
4385 .m(1)
4386 .n(8)
4387 .k(4)
4388 .cm_stride(11)
4389 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmrelaxedsimd_fma);
4390 }
4391 #endif // XNN_ARCH_WASMRELAXEDSIMD
4392
4393
4394 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)4395 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
4396 GemmMicrokernelTester()
4397 .mr(3)
4398 .nr(8)
4399 .kr(1)
4400 .sr(1)
4401 .m(3)
4402 .n(8)
4403 .k(4)
4404 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4405 }
4406
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)4407 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
4408 GemmMicrokernelTester()
4409 .mr(3)
4410 .nr(8)
4411 .kr(1)
4412 .sr(1)
4413 .m(3)
4414 .n(8)
4415 .k(4)
4416 .cn_stride(11)
4417 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4418 }
4419
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)4420 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
4421 for (uint32_t n = 1; n <= 8; n++) {
4422 for (uint32_t m = 1; m <= 3; m++) {
4423 GemmMicrokernelTester()
4424 .mr(3)
4425 .nr(8)
4426 .kr(1)
4427 .sr(1)
4428 .m(m)
4429 .n(n)
4430 .k(4)
4431 .iterations(1)
4432 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4433 }
4434 }
4435 }
4436
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)4437 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
4438 for (uint32_t m = 1; m <= 3; m++) {
4439 GemmMicrokernelTester()
4440 .mr(3)
4441 .nr(8)
4442 .kr(1)
4443 .sr(1)
4444 .m(m)
4445 .n(8)
4446 .k(4)
4447 .iterations(1)
4448 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4449 }
4450 }
4451
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)4452 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
4453 for (uint32_t n = 1; n <= 8; n++) {
4454 GemmMicrokernelTester()
4455 .mr(3)
4456 .nr(8)
4457 .kr(1)
4458 .sr(1)
4459 .m(3)
4460 .n(n)
4461 .k(4)
4462 .iterations(1)
4463 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4464 }
4465 }
4466
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)4467 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
4468 for (size_t k = 1; k < 4; k++) {
4469 GemmMicrokernelTester()
4470 .mr(3)
4471 .nr(8)
4472 .kr(1)
4473 .sr(1)
4474 .m(3)
4475 .n(8)
4476 .k(k)
4477 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4478 }
4479 }
4480
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)4481 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
4482 for (size_t k = 1; k < 4; k++) {
4483 for (uint32_t n = 1; n <= 8; n++) {
4484 for (uint32_t m = 1; m <= 3; m++) {
4485 GemmMicrokernelTester()
4486 .mr(3)
4487 .nr(8)
4488 .kr(1)
4489 .sr(1)
4490 .m(m)
4491 .n(n)
4492 .k(k)
4493 .iterations(1)
4494 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4495 }
4496 }
4497 }
4498 }
4499
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)4500 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
4501 for (size_t k = 5; k < 8; k++) {
4502 GemmMicrokernelTester()
4503 .mr(3)
4504 .nr(8)
4505 .kr(1)
4506 .sr(1)
4507 .m(3)
4508 .n(8)
4509 .k(k)
4510 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4511 }
4512 }
4513
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)4514 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
4515 for (size_t k = 5; k < 8; k++) {
4516 for (uint32_t n = 1; n <= 8; n++) {
4517 for (uint32_t m = 1; m <= 3; m++) {
4518 GemmMicrokernelTester()
4519 .mr(3)
4520 .nr(8)
4521 .kr(1)
4522 .sr(1)
4523 .m(m)
4524 .n(n)
4525 .k(k)
4526 .iterations(1)
4527 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4528 }
4529 }
4530 }
4531 }
4532
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)4533 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
4534 for (size_t k = 8; k <= 40; k += 4) {
4535 GemmMicrokernelTester()
4536 .mr(3)
4537 .nr(8)
4538 .kr(1)
4539 .sr(1)
4540 .m(3)
4541 .n(8)
4542 .k(k)
4543 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4544 }
4545 }
4546
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)4547 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
4548 for (size_t k = 8; k <= 40; k += 4) {
4549 for (uint32_t n = 1; n <= 8; n++) {
4550 for (uint32_t m = 1; m <= 3; m++) {
4551 GemmMicrokernelTester()
4552 .mr(3)
4553 .nr(8)
4554 .kr(1)
4555 .sr(1)
4556 .m(m)
4557 .n(n)
4558 .k(k)
4559 .iterations(1)
4560 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4561 }
4562 }
4563 }
4564 }
4565
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)4566 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
4567 for (uint32_t n = 9; n < 16; n++) {
4568 for (size_t k = 1; k <= 20; k += 5) {
4569 GemmMicrokernelTester()
4570 .mr(3)
4571 .nr(8)
4572 .kr(1)
4573 .sr(1)
4574 .m(3)
4575 .n(n)
4576 .k(k)
4577 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4578 }
4579 }
4580 }
4581
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)4582 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
4583 for (uint32_t n = 9; n < 16; n++) {
4584 for (size_t k = 1; k <= 20; k += 5) {
4585 GemmMicrokernelTester()
4586 .mr(3)
4587 .nr(8)
4588 .kr(1)
4589 .sr(1)
4590 .m(3)
4591 .n(n)
4592 .k(k)
4593 .cn_stride(11)
4594 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4595 }
4596 }
4597 }
4598
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)4599 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
4600 for (uint32_t n = 9; n < 16; n++) {
4601 for (size_t k = 1; k <= 20; k += 5) {
4602 for (uint32_t m = 1; m <= 3; m++) {
4603 GemmMicrokernelTester()
4604 .mr(3)
4605 .nr(8)
4606 .kr(1)
4607 .sr(1)
4608 .m(m)
4609 .n(n)
4610 .k(k)
4611 .iterations(1)
4612 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4613 }
4614 }
4615 }
4616 }
4617
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)4618 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
4619 for (uint32_t n = 16; n <= 24; n += 8) {
4620 for (size_t k = 1; k <= 20; k += 5) {
4621 GemmMicrokernelTester()
4622 .mr(3)
4623 .nr(8)
4624 .kr(1)
4625 .sr(1)
4626 .m(3)
4627 .n(n)
4628 .k(k)
4629 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4630 }
4631 }
4632 }
4633
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)4634 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
4635 for (uint32_t n = 16; n <= 24; n += 8) {
4636 for (size_t k = 1; k <= 20; k += 5) {
4637 GemmMicrokernelTester()
4638 .mr(3)
4639 .nr(8)
4640 .kr(1)
4641 .sr(1)
4642 .m(3)
4643 .n(n)
4644 .k(k)
4645 .cn_stride(11)
4646 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4647 }
4648 }
4649 }
4650
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)4651 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
4652 for (uint32_t n = 16; n <= 24; n += 8) {
4653 for (size_t k = 1; k <= 20; k += 5) {
4654 for (uint32_t m = 1; m <= 3; m++) {
4655 GemmMicrokernelTester()
4656 .mr(3)
4657 .nr(8)
4658 .kr(1)
4659 .sr(1)
4660 .m(m)
4661 .n(n)
4662 .k(k)
4663 .iterations(1)
4664 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4665 }
4666 }
4667 }
4668 }
4669
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel)4670 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel) {
4671 for (size_t k = 1; k <= 20; k += 5) {
4672 GemmMicrokernelTester()
4673 .mr(3)
4674 .nr(8)
4675 .kr(1)
4676 .sr(1)
4677 .m(3)
4678 .n(8)
4679 .k(k)
4680 .ks(3)
4681 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4682 }
4683 }
4684
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel_subtile)4685 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel_subtile) {
4686 for (size_t k = 1; k <= 20; k += 5) {
4687 for (uint32_t n = 1; n <= 8; n++) {
4688 for (uint32_t m = 1; m <= 3; m++) {
4689 GemmMicrokernelTester()
4690 .mr(3)
4691 .nr(8)
4692 .kr(1)
4693 .sr(1)
4694 .m(m)
4695 .n(n)
4696 .k(k)
4697 .ks(3)
4698 .iterations(1)
4699 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4700 }
4701 }
4702 }
4703 }
4704
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_small_kernel)4705 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_small_kernel) {
4706 for (uint32_t n = 9; n < 16; n++) {
4707 for (size_t k = 1; k <= 20; k += 5) {
4708 GemmMicrokernelTester()
4709 .mr(3)
4710 .nr(8)
4711 .kr(1)
4712 .sr(1)
4713 .m(3)
4714 .n(n)
4715 .k(k)
4716 .ks(3)
4717 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4718 }
4719 }
4720 }
4721
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_small_kernel)4722 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_small_kernel) {
4723 for (uint32_t n = 16; n <= 24; n += 8) {
4724 for (size_t k = 1; k <= 20; k += 5) {
4725 GemmMicrokernelTester()
4726 .mr(3)
4727 .nr(8)
4728 .kr(1)
4729 .sr(1)
4730 .m(3)
4731 .n(n)
4732 .k(k)
4733 .ks(3)
4734 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4735 }
4736 }
4737 }
4738
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)4739 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
4740 for (size_t k = 1; k <= 20; k += 5) {
4741 for (uint32_t n = 1; n <= 8; n++) {
4742 for (uint32_t m = 1; m <= 3; m++) {
4743 GemmMicrokernelTester()
4744 .mr(3)
4745 .nr(8)
4746 .kr(1)
4747 .sr(1)
4748 .m(m)
4749 .n(n)
4750 .k(k)
4751 .cm_stride(11)
4752 .iterations(1)
4753 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4754 }
4755 }
4756 }
4757 }
4758
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,a_offset)4759 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, a_offset) {
4760 for (size_t k = 1; k <= 20; k += 5) {
4761 GemmMicrokernelTester()
4762 .mr(3)
4763 .nr(8)
4764 .kr(1)
4765 .sr(1)
4766 .m(3)
4767 .n(8)
4768 .k(k)
4769 .ks(3)
4770 .a_offset(67)
4771 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4772 }
4773 }
4774
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,zero)4775 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, zero) {
4776 for (size_t k = 1; k <= 20; k += 5) {
4777 for (uint32_t mz = 0; mz < 3; mz++) {
4778 GemmMicrokernelTester()
4779 .mr(3)
4780 .nr(8)
4781 .kr(1)
4782 .sr(1)
4783 .m(3)
4784 .n(8)
4785 .k(k)
4786 .ks(3)
4787 .a_offset(67)
4788 .zero_index(mz)
4789 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4790 }
4791 }
4792 }
4793
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)4794 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
4795 GemmMicrokernelTester()
4796 .mr(3)
4797 .nr(8)
4798 .kr(1)
4799 .sr(1)
4800 .m(3)
4801 .n(8)
4802 .k(4)
4803 .cm_stride(11)
4804 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4805 }
4806 #endif // XNN_ARCH_WASMRELAXEDSIMD
4807
4808
4809 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,k_eq_4)4810 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, k_eq_4) {
4811 GemmMicrokernelTester()
4812 .mr(4)
4813 .nr(2)
4814 .kr(4)
4815 .sr(1)
4816 .m(4)
4817 .n(2)
4818 .k(4)
4819 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4820 }
4821
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,strided_cn)4822 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, strided_cn) {
4823 GemmMicrokernelTester()
4824 .mr(4)
4825 .nr(2)
4826 .kr(4)
4827 .sr(1)
4828 .m(4)
4829 .n(2)
4830 .k(4)
4831 .cn_stride(5)
4832 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4833 }
4834
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)4835 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
4836 for (uint32_t n = 1; n <= 2; n++) {
4837 for (uint32_t m = 1; m <= 4; m++) {
4838 GemmMicrokernelTester()
4839 .mr(4)
4840 .nr(2)
4841 .kr(4)
4842 .sr(1)
4843 .m(m)
4844 .n(n)
4845 .k(4)
4846 .iterations(1)
4847 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4848 }
4849 }
4850 }
4851
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)4852 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
4853 for (uint32_t m = 1; m <= 4; m++) {
4854 GemmMicrokernelTester()
4855 .mr(4)
4856 .nr(2)
4857 .kr(4)
4858 .sr(1)
4859 .m(m)
4860 .n(2)
4861 .k(4)
4862 .iterations(1)
4863 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4864 }
4865 }
4866
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)4867 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
4868 for (uint32_t n = 1; n <= 2; n++) {
4869 GemmMicrokernelTester()
4870 .mr(4)
4871 .nr(2)
4872 .kr(4)
4873 .sr(1)
4874 .m(4)
4875 .n(n)
4876 .k(4)
4877 .iterations(1)
4878 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4879 }
4880 }
4881
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,k_lt_4)4882 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, k_lt_4) {
4883 for (size_t k = 1; k < 4; k++) {
4884 GemmMicrokernelTester()
4885 .mr(4)
4886 .nr(2)
4887 .kr(4)
4888 .sr(1)
4889 .m(4)
4890 .n(2)
4891 .k(k)
4892 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4893 }
4894 }
4895
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)4896 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
4897 for (size_t k = 1; k < 4; k++) {
4898 for (uint32_t n = 1; n <= 2; n++) {
4899 for (uint32_t m = 1; m <= 4; m++) {
4900 GemmMicrokernelTester()
4901 .mr(4)
4902 .nr(2)
4903 .kr(4)
4904 .sr(1)
4905 .m(m)
4906 .n(n)
4907 .k(k)
4908 .iterations(1)
4909 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4910 }
4911 }
4912 }
4913 }
4914
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,k_gt_4)4915 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, k_gt_4) {
4916 for (size_t k = 5; k < 8; k++) {
4917 GemmMicrokernelTester()
4918 .mr(4)
4919 .nr(2)
4920 .kr(4)
4921 .sr(1)
4922 .m(4)
4923 .n(2)
4924 .k(k)
4925 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4926 }
4927 }
4928
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)4929 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
4930 for (size_t k = 5; k < 8; k++) {
4931 for (uint32_t n = 1; n <= 2; n++) {
4932 for (uint32_t m = 1; m <= 4; m++) {
4933 GemmMicrokernelTester()
4934 .mr(4)
4935 .nr(2)
4936 .kr(4)
4937 .sr(1)
4938 .m(m)
4939 .n(n)
4940 .k(k)
4941 .iterations(1)
4942 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4943 }
4944 }
4945 }
4946 }
4947
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,k_div_4)4948 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, k_div_4) {
4949 for (size_t k = 8; k <= 40; k += 4) {
4950 GemmMicrokernelTester()
4951 .mr(4)
4952 .nr(2)
4953 .kr(4)
4954 .sr(1)
4955 .m(4)
4956 .n(2)
4957 .k(k)
4958 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4959 }
4960 }
4961
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)4962 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
4963 for (size_t k = 8; k <= 40; k += 4) {
4964 for (uint32_t n = 1; n <= 2; n++) {
4965 for (uint32_t m = 1; m <= 4; m++) {
4966 GemmMicrokernelTester()
4967 .mr(4)
4968 .nr(2)
4969 .kr(4)
4970 .sr(1)
4971 .m(m)
4972 .n(n)
4973 .k(k)
4974 .iterations(1)
4975 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4976 }
4977 }
4978 }
4979 }
4980
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,n_gt_2)4981 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, n_gt_2) {
4982 for (uint32_t n = 3; n < 4; n++) {
4983 for (size_t k = 1; k <= 20; k += 5) {
4984 GemmMicrokernelTester()
4985 .mr(4)
4986 .nr(2)
4987 .kr(4)
4988 .sr(1)
4989 .m(4)
4990 .n(n)
4991 .k(k)
4992 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
4993 }
4994 }
4995 }
4996
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,n_gt_2_strided_cn)4997 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, n_gt_2_strided_cn) {
4998 for (uint32_t n = 3; n < 4; n++) {
4999 for (size_t k = 1; k <= 20; k += 5) {
5000 GemmMicrokernelTester()
5001 .mr(4)
5002 .nr(2)
5003 .kr(4)
5004 .sr(1)
5005 .m(4)
5006 .n(n)
5007 .k(k)
5008 .cn_stride(5)
5009 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
5010 }
5011 }
5012 }
5013
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,n_gt_2_subtile)5014 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, n_gt_2_subtile) {
5015 for (uint32_t n = 3; n < 4; n++) {
5016 for (size_t k = 1; k <= 20; k += 5) {
5017 for (uint32_t m = 1; m <= 4; m++) {
5018 GemmMicrokernelTester()
5019 .mr(4)
5020 .nr(2)
5021 .kr(4)
5022 .sr(1)
5023 .m(m)
5024 .n(n)
5025 .k(k)
5026 .iterations(1)
5027 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
5028 }
5029 }
5030 }
5031 }
5032
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,n_div_2)5033 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, n_div_2) {
5034 for (uint32_t n = 4; n <= 6; n += 2) {
5035 for (size_t k = 1; k <= 20; k += 5) {
5036 GemmMicrokernelTester()
5037 .mr(4)
5038 .nr(2)
5039 .kr(4)
5040 .sr(1)
5041 .m(4)
5042 .n(n)
5043 .k(k)
5044 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
5045 }
5046 }
5047 }
5048
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,n_div_2_strided_cn)5049 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, n_div_2_strided_cn) {
5050 for (uint32_t n = 4; n <= 6; n += 2) {
5051 for (size_t k = 1; k <= 20; k += 5) {
5052 GemmMicrokernelTester()
5053 .mr(4)
5054 .nr(2)
5055 .kr(4)
5056 .sr(1)
5057 .m(4)
5058 .n(n)
5059 .k(k)
5060 .cn_stride(5)
5061 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
5062 }
5063 }
5064 }
5065
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,n_div_2_subtile)5066 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, n_div_2_subtile) {
5067 for (uint32_t n = 4; n <= 6; n += 2) {
5068 for (size_t k = 1; k <= 20; k += 5) {
5069 for (uint32_t m = 1; m <= 4; m++) {
5070 GemmMicrokernelTester()
5071 .mr(4)
5072 .nr(2)
5073 .kr(4)
5074 .sr(1)
5075 .m(m)
5076 .n(n)
5077 .k(k)
5078 .iterations(1)
5079 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
5080 }
5081 }
5082 }
5083 }
5084
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,small_kernel)5085 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, small_kernel) {
5086 for (size_t k = 1; k <= 20; k += 5) {
5087 GemmMicrokernelTester()
5088 .mr(4)
5089 .nr(2)
5090 .kr(4)
5091 .sr(1)
5092 .m(4)
5093 .n(2)
5094 .k(k)
5095 .ks(3)
5096 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
5097 }
5098 }
5099
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,small_kernel_subtile)5100 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, small_kernel_subtile) {
5101 for (size_t k = 1; k <= 20; k += 5) {
5102 for (uint32_t n = 1; n <= 2; n++) {
5103 for (uint32_t m = 1; m <= 4; m++) {
5104 GemmMicrokernelTester()
5105 .mr(4)
5106 .nr(2)
5107 .kr(4)
5108 .sr(1)
5109 .m(m)
5110 .n(n)
5111 .k(k)
5112 .ks(3)
5113 .iterations(1)
5114 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
5115 }
5116 }
5117 }
5118 }
5119
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,n_gt_2_small_kernel)5120 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, n_gt_2_small_kernel) {
5121 for (uint32_t n = 3; n < 4; n++) {
5122 for (size_t k = 1; k <= 20; k += 5) {
5123 GemmMicrokernelTester()
5124 .mr(4)
5125 .nr(2)
5126 .kr(4)
5127 .sr(1)
5128 .m(4)
5129 .n(n)
5130 .k(k)
5131 .ks(3)
5132 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
5133 }
5134 }
5135 }
5136
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,n_div_2_small_kernel)5137 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, n_div_2_small_kernel) {
5138 for (uint32_t n = 4; n <= 6; n += 2) {
5139 for (size_t k = 1; k <= 20; k += 5) {
5140 GemmMicrokernelTester()
5141 .mr(4)
5142 .nr(2)
5143 .kr(4)
5144 .sr(1)
5145 .m(4)
5146 .n(n)
5147 .k(k)
5148 .ks(3)
5149 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
5150 }
5151 }
5152 }
5153
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)5154 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
5155 for (size_t k = 1; k <= 20; k += 5) {
5156 for (uint32_t n = 1; n <= 2; n++) {
5157 for (uint32_t m = 1; m <= 4; m++) {
5158 GemmMicrokernelTester()
5159 .mr(4)
5160 .nr(2)
5161 .kr(4)
5162 .sr(1)
5163 .m(m)
5164 .n(n)
5165 .k(k)
5166 .cm_stride(5)
5167 .iterations(1)
5168 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
5169 }
5170 }
5171 }
5172 }
5173
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,a_offset)5174 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, a_offset) {
5175 for (size_t k = 1; k <= 20; k += 5) {
5176 GemmMicrokernelTester()
5177 .mr(4)
5178 .nr(2)
5179 .kr(4)
5180 .sr(1)
5181 .m(4)
5182 .n(2)
5183 .k(k)
5184 .ks(3)
5185 .a_offset(83)
5186 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
5187 }
5188 }
5189
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,zero)5190 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, zero) {
5191 for (size_t k = 1; k <= 20; k += 5) {
5192 for (uint32_t mz = 0; mz < 4; mz++) {
5193 GemmMicrokernelTester()
5194 .mr(4)
5195 .nr(2)
5196 .kr(4)
5197 .sr(1)
5198 .m(4)
5199 .n(2)
5200 .k(k)
5201 .ks(3)
5202 .a_offset(83)
5203 .zero_index(mz)
5204 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
5205 }
5206 }
5207 }
5208
TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA,strided_cm)5209 TEST(F32_IGEMM_4X2C4__WASMRELAXEDSIMD_FMA, strided_cm) {
5210 GemmMicrokernelTester()
5211 .mr(4)
5212 .nr(2)
5213 .kr(4)
5214 .sr(1)
5215 .m(4)
5216 .n(2)
5217 .k(4)
5218 .cm_stride(5)
5219 .Test(xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma);
5220 }
5221 #endif // XNN_ARCH_WASMRELAXEDSIMD
5222
5223
5224 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)5225 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
5226 GemmMicrokernelTester()
5227 .mr(4)
5228 .nr(8)
5229 .kr(1)
5230 .sr(1)
5231 .m(4)
5232 .n(8)
5233 .k(4)
5234 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5235 }
5236
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)5237 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
5238 GemmMicrokernelTester()
5239 .mr(4)
5240 .nr(8)
5241 .kr(1)
5242 .sr(1)
5243 .m(4)
5244 .n(8)
5245 .k(4)
5246 .cn_stride(11)
5247 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5248 }
5249
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)5250 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
5251 for (uint32_t n = 1; n <= 8; n++) {
5252 for (uint32_t m = 1; m <= 4; m++) {
5253 GemmMicrokernelTester()
5254 .mr(4)
5255 .nr(8)
5256 .kr(1)
5257 .sr(1)
5258 .m(m)
5259 .n(n)
5260 .k(4)
5261 .iterations(1)
5262 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5263 }
5264 }
5265 }
5266
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)5267 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
5268 for (uint32_t m = 1; m <= 4; m++) {
5269 GemmMicrokernelTester()
5270 .mr(4)
5271 .nr(8)
5272 .kr(1)
5273 .sr(1)
5274 .m(m)
5275 .n(8)
5276 .k(4)
5277 .iterations(1)
5278 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5279 }
5280 }
5281
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)5282 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
5283 for (uint32_t n = 1; n <= 8; n++) {
5284 GemmMicrokernelTester()
5285 .mr(4)
5286 .nr(8)
5287 .kr(1)
5288 .sr(1)
5289 .m(4)
5290 .n(n)
5291 .k(4)
5292 .iterations(1)
5293 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5294 }
5295 }
5296
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)5297 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
5298 for (size_t k = 1; k < 4; k++) {
5299 GemmMicrokernelTester()
5300 .mr(4)
5301 .nr(8)
5302 .kr(1)
5303 .sr(1)
5304 .m(4)
5305 .n(8)
5306 .k(k)
5307 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5308 }
5309 }
5310
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)5311 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
5312 for (size_t k = 1; k < 4; k++) {
5313 for (uint32_t n = 1; n <= 8; n++) {
5314 for (uint32_t m = 1; m <= 4; m++) {
5315 GemmMicrokernelTester()
5316 .mr(4)
5317 .nr(8)
5318 .kr(1)
5319 .sr(1)
5320 .m(m)
5321 .n(n)
5322 .k(k)
5323 .iterations(1)
5324 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5325 }
5326 }
5327 }
5328 }
5329
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)5330 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
5331 for (size_t k = 5; k < 8; k++) {
5332 GemmMicrokernelTester()
5333 .mr(4)
5334 .nr(8)
5335 .kr(1)
5336 .sr(1)
5337 .m(4)
5338 .n(8)
5339 .k(k)
5340 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5341 }
5342 }
5343
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)5344 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
5345 for (size_t k = 5; k < 8; k++) {
5346 for (uint32_t n = 1; n <= 8; n++) {
5347 for (uint32_t m = 1; m <= 4; m++) {
5348 GemmMicrokernelTester()
5349 .mr(4)
5350 .nr(8)
5351 .kr(1)
5352 .sr(1)
5353 .m(m)
5354 .n(n)
5355 .k(k)
5356 .iterations(1)
5357 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5358 }
5359 }
5360 }
5361 }
5362
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)5363 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
5364 for (size_t k = 8; k <= 40; k += 4) {
5365 GemmMicrokernelTester()
5366 .mr(4)
5367 .nr(8)
5368 .kr(1)
5369 .sr(1)
5370 .m(4)
5371 .n(8)
5372 .k(k)
5373 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5374 }
5375 }
5376
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)5377 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
5378 for (size_t k = 8; k <= 40; k += 4) {
5379 for (uint32_t n = 1; n <= 8; n++) {
5380 for (uint32_t m = 1; m <= 4; m++) {
5381 GemmMicrokernelTester()
5382 .mr(4)
5383 .nr(8)
5384 .kr(1)
5385 .sr(1)
5386 .m(m)
5387 .n(n)
5388 .k(k)
5389 .iterations(1)
5390 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5391 }
5392 }
5393 }
5394 }
5395
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)5396 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
5397 for (uint32_t n = 9; n < 16; n++) {
5398 for (size_t k = 1; k <= 20; k += 5) {
5399 GemmMicrokernelTester()
5400 .mr(4)
5401 .nr(8)
5402 .kr(1)
5403 .sr(1)
5404 .m(4)
5405 .n(n)
5406 .k(k)
5407 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5408 }
5409 }
5410 }
5411
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)5412 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
5413 for (uint32_t n = 9; n < 16; n++) {
5414 for (size_t k = 1; k <= 20; k += 5) {
5415 GemmMicrokernelTester()
5416 .mr(4)
5417 .nr(8)
5418 .kr(1)
5419 .sr(1)
5420 .m(4)
5421 .n(n)
5422 .k(k)
5423 .cn_stride(11)
5424 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5425 }
5426 }
5427 }
5428
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)5429 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
5430 for (uint32_t n = 9; n < 16; n++) {
5431 for (size_t k = 1; k <= 20; k += 5) {
5432 for (uint32_t m = 1; m <= 4; m++) {
5433 GemmMicrokernelTester()
5434 .mr(4)
5435 .nr(8)
5436 .kr(1)
5437 .sr(1)
5438 .m(m)
5439 .n(n)
5440 .k(k)
5441 .iterations(1)
5442 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5443 }
5444 }
5445 }
5446 }
5447
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)5448 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
5449 for (uint32_t n = 16; n <= 24; n += 8) {
5450 for (size_t k = 1; k <= 20; k += 5) {
5451 GemmMicrokernelTester()
5452 .mr(4)
5453 .nr(8)
5454 .kr(1)
5455 .sr(1)
5456 .m(4)
5457 .n(n)
5458 .k(k)
5459 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5460 }
5461 }
5462 }
5463
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)5464 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
5465 for (uint32_t n = 16; n <= 24; n += 8) {
5466 for (size_t k = 1; k <= 20; k += 5) {
5467 GemmMicrokernelTester()
5468 .mr(4)
5469 .nr(8)
5470 .kr(1)
5471 .sr(1)
5472 .m(4)
5473 .n(n)
5474 .k(k)
5475 .cn_stride(11)
5476 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5477 }
5478 }
5479 }
5480
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)5481 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
5482 for (uint32_t n = 16; n <= 24; n += 8) {
5483 for (size_t k = 1; k <= 20; k += 5) {
5484 for (uint32_t m = 1; m <= 4; m++) {
5485 GemmMicrokernelTester()
5486 .mr(4)
5487 .nr(8)
5488 .kr(1)
5489 .sr(1)
5490 .m(m)
5491 .n(n)
5492 .k(k)
5493 .iterations(1)
5494 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5495 }
5496 }
5497 }
5498 }
5499
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel)5500 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel) {
5501 for (size_t k = 1; k <= 20; k += 5) {
5502 GemmMicrokernelTester()
5503 .mr(4)
5504 .nr(8)
5505 .kr(1)
5506 .sr(1)
5507 .m(4)
5508 .n(8)
5509 .k(k)
5510 .ks(3)
5511 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5512 }
5513 }
5514
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel_subtile)5515 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel_subtile) {
5516 for (size_t k = 1; k <= 20; k += 5) {
5517 for (uint32_t n = 1; n <= 8; n++) {
5518 for (uint32_t m = 1; m <= 4; m++) {
5519 GemmMicrokernelTester()
5520 .mr(4)
5521 .nr(8)
5522 .kr(1)
5523 .sr(1)
5524 .m(m)
5525 .n(n)
5526 .k(k)
5527 .ks(3)
5528 .iterations(1)
5529 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5530 }
5531 }
5532 }
5533 }
5534
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_small_kernel)5535 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_small_kernel) {
5536 for (uint32_t n = 9; n < 16; n++) {
5537 for (size_t k = 1; k <= 20; k += 5) {
5538 GemmMicrokernelTester()
5539 .mr(4)
5540 .nr(8)
5541 .kr(1)
5542 .sr(1)
5543 .m(4)
5544 .n(n)
5545 .k(k)
5546 .ks(3)
5547 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5548 }
5549 }
5550 }
5551
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_small_kernel)5552 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_small_kernel) {
5553 for (uint32_t n = 16; n <= 24; n += 8) {
5554 for (size_t k = 1; k <= 20; k += 5) {
5555 GemmMicrokernelTester()
5556 .mr(4)
5557 .nr(8)
5558 .kr(1)
5559 .sr(1)
5560 .m(4)
5561 .n(n)
5562 .k(k)
5563 .ks(3)
5564 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5565 }
5566 }
5567 }
5568
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)5569 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
5570 for (size_t k = 1; k <= 20; k += 5) {
5571 for (uint32_t n = 1; n <= 8; n++) {
5572 for (uint32_t m = 1; m <= 4; m++) {
5573 GemmMicrokernelTester()
5574 .mr(4)
5575 .nr(8)
5576 .kr(1)
5577 .sr(1)
5578 .m(m)
5579 .n(n)
5580 .k(k)
5581 .cm_stride(11)
5582 .iterations(1)
5583 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5584 }
5585 }
5586 }
5587 }
5588
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,a_offset)5589 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, a_offset) {
5590 for (size_t k = 1; k <= 20; k += 5) {
5591 GemmMicrokernelTester()
5592 .mr(4)
5593 .nr(8)
5594 .kr(1)
5595 .sr(1)
5596 .m(4)
5597 .n(8)
5598 .k(k)
5599 .ks(3)
5600 .a_offset(83)
5601 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5602 }
5603 }
5604
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,zero)5605 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, zero) {
5606 for (size_t k = 1; k <= 20; k += 5) {
5607 for (uint32_t mz = 0; mz < 4; mz++) {
5608 GemmMicrokernelTester()
5609 .mr(4)
5610 .nr(8)
5611 .kr(1)
5612 .sr(1)
5613 .m(4)
5614 .n(8)
5615 .k(k)
5616 .ks(3)
5617 .a_offset(83)
5618 .zero_index(mz)
5619 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5620 }
5621 }
5622 }
5623
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)5624 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
5625 GemmMicrokernelTester()
5626 .mr(4)
5627 .nr(8)
5628 .kr(1)
5629 .sr(1)
5630 .m(4)
5631 .n(8)
5632 .k(4)
5633 .cm_stride(11)
5634 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5635 }
5636 #endif // XNN_ARCH_WASMRELAXEDSIMD
5637
5638
5639 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)5640 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
5641 GemmMicrokernelTester()
5642 .mr(5)
5643 .nr(8)
5644 .kr(1)
5645 .sr(1)
5646 .m(5)
5647 .n(8)
5648 .k(4)
5649 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5650 }
5651
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)5652 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
5653 GemmMicrokernelTester()
5654 .mr(5)
5655 .nr(8)
5656 .kr(1)
5657 .sr(1)
5658 .m(5)
5659 .n(8)
5660 .k(4)
5661 .cn_stride(11)
5662 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5663 }
5664
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)5665 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
5666 for (uint32_t n = 1; n <= 8; n++) {
5667 for (uint32_t m = 1; m <= 5; m++) {
5668 GemmMicrokernelTester()
5669 .mr(5)
5670 .nr(8)
5671 .kr(1)
5672 .sr(1)
5673 .m(m)
5674 .n(n)
5675 .k(4)
5676 .iterations(1)
5677 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5678 }
5679 }
5680 }
5681
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)5682 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
5683 for (uint32_t m = 1; m <= 5; m++) {
5684 GemmMicrokernelTester()
5685 .mr(5)
5686 .nr(8)
5687 .kr(1)
5688 .sr(1)
5689 .m(m)
5690 .n(8)
5691 .k(4)
5692 .iterations(1)
5693 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5694 }
5695 }
5696
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)5697 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
5698 for (uint32_t n = 1; n <= 8; n++) {
5699 GemmMicrokernelTester()
5700 .mr(5)
5701 .nr(8)
5702 .kr(1)
5703 .sr(1)
5704 .m(5)
5705 .n(n)
5706 .k(4)
5707 .iterations(1)
5708 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5709 }
5710 }
5711
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)5712 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
5713 for (size_t k = 1; k < 4; k++) {
5714 GemmMicrokernelTester()
5715 .mr(5)
5716 .nr(8)
5717 .kr(1)
5718 .sr(1)
5719 .m(5)
5720 .n(8)
5721 .k(k)
5722 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5723 }
5724 }
5725
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)5726 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
5727 for (size_t k = 1; k < 4; k++) {
5728 for (uint32_t n = 1; n <= 8; n++) {
5729 for (uint32_t m = 1; m <= 5; m++) {
5730 GemmMicrokernelTester()
5731 .mr(5)
5732 .nr(8)
5733 .kr(1)
5734 .sr(1)
5735 .m(m)
5736 .n(n)
5737 .k(k)
5738 .iterations(1)
5739 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5740 }
5741 }
5742 }
5743 }
5744
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)5745 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
5746 for (size_t k = 5; k < 8; k++) {
5747 GemmMicrokernelTester()
5748 .mr(5)
5749 .nr(8)
5750 .kr(1)
5751 .sr(1)
5752 .m(5)
5753 .n(8)
5754 .k(k)
5755 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5756 }
5757 }
5758
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)5759 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
5760 for (size_t k = 5; k < 8; k++) {
5761 for (uint32_t n = 1; n <= 8; n++) {
5762 for (uint32_t m = 1; m <= 5; m++) {
5763 GemmMicrokernelTester()
5764 .mr(5)
5765 .nr(8)
5766 .kr(1)
5767 .sr(1)
5768 .m(m)
5769 .n(n)
5770 .k(k)
5771 .iterations(1)
5772 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5773 }
5774 }
5775 }
5776 }
5777
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)5778 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
5779 for (size_t k = 8; k <= 40; k += 4) {
5780 GemmMicrokernelTester()
5781 .mr(5)
5782 .nr(8)
5783 .kr(1)
5784 .sr(1)
5785 .m(5)
5786 .n(8)
5787 .k(k)
5788 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5789 }
5790 }
5791
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)5792 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
5793 for (size_t k = 8; k <= 40; k += 4) {
5794 for (uint32_t n = 1; n <= 8; n++) {
5795 for (uint32_t m = 1; m <= 5; m++) {
5796 GemmMicrokernelTester()
5797 .mr(5)
5798 .nr(8)
5799 .kr(1)
5800 .sr(1)
5801 .m(m)
5802 .n(n)
5803 .k(k)
5804 .iterations(1)
5805 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5806 }
5807 }
5808 }
5809 }
5810
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)5811 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
5812 for (uint32_t n = 9; n < 16; n++) {
5813 for (size_t k = 1; k <= 20; k += 5) {
5814 GemmMicrokernelTester()
5815 .mr(5)
5816 .nr(8)
5817 .kr(1)
5818 .sr(1)
5819 .m(5)
5820 .n(n)
5821 .k(k)
5822 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5823 }
5824 }
5825 }
5826
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)5827 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
5828 for (uint32_t n = 9; n < 16; n++) {
5829 for (size_t k = 1; k <= 20; k += 5) {
5830 GemmMicrokernelTester()
5831 .mr(5)
5832 .nr(8)
5833 .kr(1)
5834 .sr(1)
5835 .m(5)
5836 .n(n)
5837 .k(k)
5838 .cn_stride(11)
5839 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5840 }
5841 }
5842 }
5843
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)5844 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
5845 for (uint32_t n = 9; n < 16; n++) {
5846 for (size_t k = 1; k <= 20; k += 5) {
5847 for (uint32_t m = 1; m <= 5; m++) {
5848 GemmMicrokernelTester()
5849 .mr(5)
5850 .nr(8)
5851 .kr(1)
5852 .sr(1)
5853 .m(m)
5854 .n(n)
5855 .k(k)
5856 .iterations(1)
5857 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5858 }
5859 }
5860 }
5861 }
5862
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)5863 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
5864 for (uint32_t n = 16; n <= 24; n += 8) {
5865 for (size_t k = 1; k <= 20; k += 5) {
5866 GemmMicrokernelTester()
5867 .mr(5)
5868 .nr(8)
5869 .kr(1)
5870 .sr(1)
5871 .m(5)
5872 .n(n)
5873 .k(k)
5874 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5875 }
5876 }
5877 }
5878
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)5879 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
5880 for (uint32_t n = 16; n <= 24; n += 8) {
5881 for (size_t k = 1; k <= 20; k += 5) {
5882 GemmMicrokernelTester()
5883 .mr(5)
5884 .nr(8)
5885 .kr(1)
5886 .sr(1)
5887 .m(5)
5888 .n(n)
5889 .k(k)
5890 .cn_stride(11)
5891 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5892 }
5893 }
5894 }
5895
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)5896 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
5897 for (uint32_t n = 16; n <= 24; n += 8) {
5898 for (size_t k = 1; k <= 20; k += 5) {
5899 for (uint32_t m = 1; m <= 5; m++) {
5900 GemmMicrokernelTester()
5901 .mr(5)
5902 .nr(8)
5903 .kr(1)
5904 .sr(1)
5905 .m(m)
5906 .n(n)
5907 .k(k)
5908 .iterations(1)
5909 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5910 }
5911 }
5912 }
5913 }
5914
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel)5915 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel) {
5916 for (size_t k = 1; k <= 20; k += 5) {
5917 GemmMicrokernelTester()
5918 .mr(5)
5919 .nr(8)
5920 .kr(1)
5921 .sr(1)
5922 .m(5)
5923 .n(8)
5924 .k(k)
5925 .ks(3)
5926 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5927 }
5928 }
5929
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel_subtile)5930 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel_subtile) {
5931 for (size_t k = 1; k <= 20; k += 5) {
5932 for (uint32_t n = 1; n <= 8; n++) {
5933 for (uint32_t m = 1; m <= 5; m++) {
5934 GemmMicrokernelTester()
5935 .mr(5)
5936 .nr(8)
5937 .kr(1)
5938 .sr(1)
5939 .m(m)
5940 .n(n)
5941 .k(k)
5942 .ks(3)
5943 .iterations(1)
5944 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5945 }
5946 }
5947 }
5948 }
5949
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_small_kernel)5950 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_small_kernel) {
5951 for (uint32_t n = 9; n < 16; n++) {
5952 for (size_t k = 1; k <= 20; k += 5) {
5953 GemmMicrokernelTester()
5954 .mr(5)
5955 .nr(8)
5956 .kr(1)
5957 .sr(1)
5958 .m(5)
5959 .n(n)
5960 .k(k)
5961 .ks(3)
5962 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5963 }
5964 }
5965 }
5966
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_small_kernel)5967 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_small_kernel) {
5968 for (uint32_t n = 16; n <= 24; n += 8) {
5969 for (size_t k = 1; k <= 20; k += 5) {
5970 GemmMicrokernelTester()
5971 .mr(5)
5972 .nr(8)
5973 .kr(1)
5974 .sr(1)
5975 .m(5)
5976 .n(n)
5977 .k(k)
5978 .ks(3)
5979 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5980 }
5981 }
5982 }
5983
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)5984 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
5985 for (size_t k = 1; k <= 20; k += 5) {
5986 for (uint32_t n = 1; n <= 8; n++) {
5987 for (uint32_t m = 1; m <= 5; m++) {
5988 GemmMicrokernelTester()
5989 .mr(5)
5990 .nr(8)
5991 .kr(1)
5992 .sr(1)
5993 .m(m)
5994 .n(n)
5995 .k(k)
5996 .cm_stride(11)
5997 .iterations(1)
5998 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5999 }
6000 }
6001 }
6002 }
6003
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,a_offset)6004 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, a_offset) {
6005 for (size_t k = 1; k <= 20; k += 5) {
6006 GemmMicrokernelTester()
6007 .mr(5)
6008 .nr(8)
6009 .kr(1)
6010 .sr(1)
6011 .m(5)
6012 .n(8)
6013 .k(k)
6014 .ks(3)
6015 .a_offset(103)
6016 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
6017 }
6018 }
6019
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,zero)6020 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, zero) {
6021 for (size_t k = 1; k <= 20; k += 5) {
6022 for (uint32_t mz = 0; mz < 5; mz++) {
6023 GemmMicrokernelTester()
6024 .mr(5)
6025 .nr(8)
6026 .kr(1)
6027 .sr(1)
6028 .m(5)
6029 .n(8)
6030 .k(k)
6031 .ks(3)
6032 .a_offset(103)
6033 .zero_index(mz)
6034 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
6035 }
6036 }
6037 }
6038
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)6039 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
6040 GemmMicrokernelTester()
6041 .mr(5)
6042 .nr(8)
6043 .kr(1)
6044 .sr(1)
6045 .m(5)
6046 .n(8)
6047 .k(4)
6048 .cm_stride(11)
6049 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_splat);
6050 }
6051 #endif // XNN_ARCH_WASMRELAXEDSIMD
6052
6053
6054 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)6055 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
6056 GemmMicrokernelTester()
6057 .mr(5)
6058 .nr(8)
6059 .kr(1)
6060 .sr(4)
6061 .m(5)
6062 .n(8)
6063 .k(4)
6064 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6065 }
6066
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,strided_cn)6067 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
6068 GemmMicrokernelTester()
6069 .mr(5)
6070 .nr(8)
6071 .kr(1)
6072 .sr(4)
6073 .m(5)
6074 .n(8)
6075 .k(4)
6076 .cn_stride(11)
6077 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6078 }
6079
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)6080 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
6081 for (uint32_t n = 1; n <= 8; n++) {
6082 for (uint32_t m = 1; m <= 5; m++) {
6083 GemmMicrokernelTester()
6084 .mr(5)
6085 .nr(8)
6086 .kr(1)
6087 .sr(4)
6088 .m(m)
6089 .n(n)
6090 .k(4)
6091 .iterations(1)
6092 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6093 }
6094 }
6095 }
6096
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)6097 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
6098 for (uint32_t m = 1; m <= 5; m++) {
6099 GemmMicrokernelTester()
6100 .mr(5)
6101 .nr(8)
6102 .kr(1)
6103 .sr(4)
6104 .m(m)
6105 .n(8)
6106 .k(4)
6107 .iterations(1)
6108 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6109 }
6110 }
6111
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)6112 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
6113 for (uint32_t n = 1; n <= 8; n++) {
6114 GemmMicrokernelTester()
6115 .mr(5)
6116 .nr(8)
6117 .kr(1)
6118 .sr(4)
6119 .m(5)
6120 .n(n)
6121 .k(4)
6122 .iterations(1)
6123 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6124 }
6125 }
6126
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)6127 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
6128 for (size_t k = 1; k < 4; k++) {
6129 GemmMicrokernelTester()
6130 .mr(5)
6131 .nr(8)
6132 .kr(1)
6133 .sr(4)
6134 .m(5)
6135 .n(8)
6136 .k(k)
6137 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6138 }
6139 }
6140
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)6141 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
6142 for (size_t k = 1; k < 4; k++) {
6143 for (uint32_t n = 1; n <= 8; n++) {
6144 for (uint32_t m = 1; m <= 5; m++) {
6145 GemmMicrokernelTester()
6146 .mr(5)
6147 .nr(8)
6148 .kr(1)
6149 .sr(4)
6150 .m(m)
6151 .n(n)
6152 .k(k)
6153 .iterations(1)
6154 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6155 }
6156 }
6157 }
6158 }
6159
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)6160 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
6161 for (size_t k = 5; k < 8; k++) {
6162 GemmMicrokernelTester()
6163 .mr(5)
6164 .nr(8)
6165 .kr(1)
6166 .sr(4)
6167 .m(5)
6168 .n(8)
6169 .k(k)
6170 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6171 }
6172 }
6173
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)6174 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
6175 for (size_t k = 5; k < 8; k++) {
6176 for (uint32_t n = 1; n <= 8; n++) {
6177 for (uint32_t m = 1; m <= 5; m++) {
6178 GemmMicrokernelTester()
6179 .mr(5)
6180 .nr(8)
6181 .kr(1)
6182 .sr(4)
6183 .m(m)
6184 .n(n)
6185 .k(k)
6186 .iterations(1)
6187 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6188 }
6189 }
6190 }
6191 }
6192
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,k_div_4)6193 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
6194 for (size_t k = 8; k <= 40; k += 4) {
6195 GemmMicrokernelTester()
6196 .mr(5)
6197 .nr(8)
6198 .kr(1)
6199 .sr(4)
6200 .m(5)
6201 .n(8)
6202 .k(k)
6203 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6204 }
6205 }
6206
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)6207 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
6208 for (size_t k = 8; k <= 40; k += 4) {
6209 for (uint32_t n = 1; n <= 8; n++) {
6210 for (uint32_t m = 1; m <= 5; m++) {
6211 GemmMicrokernelTester()
6212 .mr(5)
6213 .nr(8)
6214 .kr(1)
6215 .sr(4)
6216 .m(m)
6217 .n(n)
6218 .k(k)
6219 .iterations(1)
6220 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6221 }
6222 }
6223 }
6224 }
6225
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)6226 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
6227 for (uint32_t n = 9; n < 16; n++) {
6228 for (size_t k = 1; k <= 20; k += 5) {
6229 GemmMicrokernelTester()
6230 .mr(5)
6231 .nr(8)
6232 .kr(1)
6233 .sr(4)
6234 .m(5)
6235 .n(n)
6236 .k(k)
6237 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6238 }
6239 }
6240 }
6241
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)6242 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
6243 for (uint32_t n = 9; n < 16; n++) {
6244 for (size_t k = 1; k <= 20; k += 5) {
6245 GemmMicrokernelTester()
6246 .mr(5)
6247 .nr(8)
6248 .kr(1)
6249 .sr(4)
6250 .m(5)
6251 .n(n)
6252 .k(k)
6253 .cn_stride(11)
6254 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6255 }
6256 }
6257 }
6258
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)6259 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
6260 for (uint32_t n = 9; n < 16; n++) {
6261 for (size_t k = 1; k <= 20; k += 5) {
6262 for (uint32_t m = 1; m <= 5; m++) {
6263 GemmMicrokernelTester()
6264 .mr(5)
6265 .nr(8)
6266 .kr(1)
6267 .sr(4)
6268 .m(m)
6269 .n(n)
6270 .k(k)
6271 .iterations(1)
6272 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6273 }
6274 }
6275 }
6276 }
6277
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,n_div_8)6278 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
6279 for (uint32_t n = 16; n <= 24; n += 8) {
6280 for (size_t k = 1; k <= 20; k += 5) {
6281 GemmMicrokernelTester()
6282 .mr(5)
6283 .nr(8)
6284 .kr(1)
6285 .sr(4)
6286 .m(5)
6287 .n(n)
6288 .k(k)
6289 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6290 }
6291 }
6292 }
6293
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)6294 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
6295 for (uint32_t n = 16; n <= 24; n += 8) {
6296 for (size_t k = 1; k <= 20; k += 5) {
6297 GemmMicrokernelTester()
6298 .mr(5)
6299 .nr(8)
6300 .kr(1)
6301 .sr(4)
6302 .m(5)
6303 .n(n)
6304 .k(k)
6305 .cn_stride(11)
6306 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6307 }
6308 }
6309 }
6310
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)6311 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
6312 for (uint32_t n = 16; n <= 24; n += 8) {
6313 for (size_t k = 1; k <= 20; k += 5) {
6314 for (uint32_t m = 1; m <= 5; m++) {
6315 GemmMicrokernelTester()
6316 .mr(5)
6317 .nr(8)
6318 .kr(1)
6319 .sr(4)
6320 .m(m)
6321 .n(n)
6322 .k(k)
6323 .iterations(1)
6324 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6325 }
6326 }
6327 }
6328 }
6329
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,small_kernel)6330 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, small_kernel) {
6331 for (size_t k = 1; k <= 20; k += 5) {
6332 GemmMicrokernelTester()
6333 .mr(5)
6334 .nr(8)
6335 .kr(1)
6336 .sr(4)
6337 .m(5)
6338 .n(8)
6339 .k(k)
6340 .ks(3)
6341 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6342 }
6343 }
6344
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,small_kernel_subtile)6345 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, small_kernel_subtile) {
6346 for (size_t k = 1; k <= 20; k += 5) {
6347 for (uint32_t n = 1; n <= 8; n++) {
6348 for (uint32_t m = 1; m <= 5; m++) {
6349 GemmMicrokernelTester()
6350 .mr(5)
6351 .nr(8)
6352 .kr(1)
6353 .sr(4)
6354 .m(m)
6355 .n(n)
6356 .k(k)
6357 .ks(3)
6358 .iterations(1)
6359 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6360 }
6361 }
6362 }
6363 }
6364
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_small_kernel)6365 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_small_kernel) {
6366 for (uint32_t n = 9; n < 16; n++) {
6367 for (size_t k = 1; k <= 20; k += 5) {
6368 GemmMicrokernelTester()
6369 .mr(5)
6370 .nr(8)
6371 .kr(1)
6372 .sr(4)
6373 .m(5)
6374 .n(n)
6375 .k(k)
6376 .ks(3)
6377 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6378 }
6379 }
6380 }
6381
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,n_div_8_small_kernel)6382 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, n_div_8_small_kernel) {
6383 for (uint32_t n = 16; n <= 24; n += 8) {
6384 for (size_t k = 1; k <= 20; k += 5) {
6385 GemmMicrokernelTester()
6386 .mr(5)
6387 .nr(8)
6388 .kr(1)
6389 .sr(4)
6390 .m(5)
6391 .n(n)
6392 .k(k)
6393 .ks(3)
6394 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6395 }
6396 }
6397 }
6398
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)6399 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
6400 for (size_t k = 1; k <= 20; k += 5) {
6401 for (uint32_t n = 1; n <= 8; n++) {
6402 for (uint32_t m = 1; m <= 5; m++) {
6403 GemmMicrokernelTester()
6404 .mr(5)
6405 .nr(8)
6406 .kr(1)
6407 .sr(4)
6408 .m(m)
6409 .n(n)
6410 .k(k)
6411 .cm_stride(11)
6412 .iterations(1)
6413 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6414 }
6415 }
6416 }
6417 }
6418
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,a_offset)6419 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, a_offset) {
6420 for (size_t k = 1; k <= 20; k += 5) {
6421 GemmMicrokernelTester()
6422 .mr(5)
6423 .nr(8)
6424 .kr(1)
6425 .sr(4)
6426 .m(5)
6427 .n(8)
6428 .k(k)
6429 .ks(3)
6430 .a_offset(103)
6431 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6432 }
6433 }
6434
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,zero)6435 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, zero) {
6436 for (size_t k = 1; k <= 20; k += 5) {
6437 for (uint32_t mz = 0; mz < 5; mz++) {
6438 GemmMicrokernelTester()
6439 .mr(5)
6440 .nr(8)
6441 .kr(1)
6442 .sr(4)
6443 .m(5)
6444 .n(8)
6445 .k(k)
6446 .ks(3)
6447 .a_offset(103)
6448 .zero_index(mz)
6449 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6450 }
6451 }
6452 }
6453
TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA,strided_cm)6454 TEST(F32_IGEMM_5X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
6455 GemmMicrokernelTester()
6456 .mr(5)
6457 .nr(8)
6458 .kr(1)
6459 .sr(4)
6460 .m(5)
6461 .n(8)
6462 .k(4)
6463 .cm_stride(11)
6464 .Test(xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma);
6465 }
6466 #endif // XNN_ARCH_WASMRELAXEDSIMD
6467
6468
6469 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)6470 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
6471 GemmMicrokernelTester()
6472 .mr(6)
6473 .nr(8)
6474 .kr(1)
6475 .sr(1)
6476 .m(6)
6477 .n(8)
6478 .k(4)
6479 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6480 }
6481
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)6482 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
6483 GemmMicrokernelTester()
6484 .mr(6)
6485 .nr(8)
6486 .kr(1)
6487 .sr(1)
6488 .m(6)
6489 .n(8)
6490 .k(4)
6491 .cn_stride(11)
6492 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6493 }
6494
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)6495 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
6496 for (uint32_t n = 1; n <= 8; n++) {
6497 for (uint32_t m = 1; m <= 6; m++) {
6498 GemmMicrokernelTester()
6499 .mr(6)
6500 .nr(8)
6501 .kr(1)
6502 .sr(1)
6503 .m(m)
6504 .n(n)
6505 .k(4)
6506 .iterations(1)
6507 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6508 }
6509 }
6510 }
6511
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)6512 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
6513 for (uint32_t m = 1; m <= 6; m++) {
6514 GemmMicrokernelTester()
6515 .mr(6)
6516 .nr(8)
6517 .kr(1)
6518 .sr(1)
6519 .m(m)
6520 .n(8)
6521 .k(4)
6522 .iterations(1)
6523 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6524 }
6525 }
6526
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)6527 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
6528 for (uint32_t n = 1; n <= 8; n++) {
6529 GemmMicrokernelTester()
6530 .mr(6)
6531 .nr(8)
6532 .kr(1)
6533 .sr(1)
6534 .m(6)
6535 .n(n)
6536 .k(4)
6537 .iterations(1)
6538 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6539 }
6540 }
6541
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)6542 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
6543 for (size_t k = 1; k < 4; k++) {
6544 GemmMicrokernelTester()
6545 .mr(6)
6546 .nr(8)
6547 .kr(1)
6548 .sr(1)
6549 .m(6)
6550 .n(8)
6551 .k(k)
6552 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6553 }
6554 }
6555
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)6556 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
6557 for (size_t k = 1; k < 4; k++) {
6558 for (uint32_t n = 1; n <= 8; n++) {
6559 for (uint32_t m = 1; m <= 6; m++) {
6560 GemmMicrokernelTester()
6561 .mr(6)
6562 .nr(8)
6563 .kr(1)
6564 .sr(1)
6565 .m(m)
6566 .n(n)
6567 .k(k)
6568 .iterations(1)
6569 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6570 }
6571 }
6572 }
6573 }
6574
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)6575 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
6576 for (size_t k = 5; k < 8; k++) {
6577 GemmMicrokernelTester()
6578 .mr(6)
6579 .nr(8)
6580 .kr(1)
6581 .sr(1)
6582 .m(6)
6583 .n(8)
6584 .k(k)
6585 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6586 }
6587 }
6588
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)6589 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
6590 for (size_t k = 5; k < 8; k++) {
6591 for (uint32_t n = 1; n <= 8; n++) {
6592 for (uint32_t m = 1; m <= 6; m++) {
6593 GemmMicrokernelTester()
6594 .mr(6)
6595 .nr(8)
6596 .kr(1)
6597 .sr(1)
6598 .m(m)
6599 .n(n)
6600 .k(k)
6601 .iterations(1)
6602 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6603 }
6604 }
6605 }
6606 }
6607
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)6608 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
6609 for (size_t k = 8; k <= 40; k += 4) {
6610 GemmMicrokernelTester()
6611 .mr(6)
6612 .nr(8)
6613 .kr(1)
6614 .sr(1)
6615 .m(6)
6616 .n(8)
6617 .k(k)
6618 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6619 }
6620 }
6621
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)6622 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
6623 for (size_t k = 8; k <= 40; k += 4) {
6624 for (uint32_t n = 1; n <= 8; n++) {
6625 for (uint32_t m = 1; m <= 6; m++) {
6626 GemmMicrokernelTester()
6627 .mr(6)
6628 .nr(8)
6629 .kr(1)
6630 .sr(1)
6631 .m(m)
6632 .n(n)
6633 .k(k)
6634 .iterations(1)
6635 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6636 }
6637 }
6638 }
6639 }
6640
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)6641 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
6642 for (uint32_t n = 9; n < 16; n++) {
6643 for (size_t k = 1; k <= 20; k += 5) {
6644 GemmMicrokernelTester()
6645 .mr(6)
6646 .nr(8)
6647 .kr(1)
6648 .sr(1)
6649 .m(6)
6650 .n(n)
6651 .k(k)
6652 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6653 }
6654 }
6655 }
6656
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)6657 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
6658 for (uint32_t n = 9; n < 16; n++) {
6659 for (size_t k = 1; k <= 20; k += 5) {
6660 GemmMicrokernelTester()
6661 .mr(6)
6662 .nr(8)
6663 .kr(1)
6664 .sr(1)
6665 .m(6)
6666 .n(n)
6667 .k(k)
6668 .cn_stride(11)
6669 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6670 }
6671 }
6672 }
6673
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)6674 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
6675 for (uint32_t n = 9; n < 16; n++) {
6676 for (size_t k = 1; k <= 20; k += 5) {
6677 for (uint32_t m = 1; m <= 6; m++) {
6678 GemmMicrokernelTester()
6679 .mr(6)
6680 .nr(8)
6681 .kr(1)
6682 .sr(1)
6683 .m(m)
6684 .n(n)
6685 .k(k)
6686 .iterations(1)
6687 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6688 }
6689 }
6690 }
6691 }
6692
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)6693 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
6694 for (uint32_t n = 16; n <= 24; n += 8) {
6695 for (size_t k = 1; k <= 20; k += 5) {
6696 GemmMicrokernelTester()
6697 .mr(6)
6698 .nr(8)
6699 .kr(1)
6700 .sr(1)
6701 .m(6)
6702 .n(n)
6703 .k(k)
6704 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6705 }
6706 }
6707 }
6708
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)6709 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
6710 for (uint32_t n = 16; n <= 24; n += 8) {
6711 for (size_t k = 1; k <= 20; k += 5) {
6712 GemmMicrokernelTester()
6713 .mr(6)
6714 .nr(8)
6715 .kr(1)
6716 .sr(1)
6717 .m(6)
6718 .n(n)
6719 .k(k)
6720 .cn_stride(11)
6721 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6722 }
6723 }
6724 }
6725
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)6726 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
6727 for (uint32_t n = 16; n <= 24; n += 8) {
6728 for (size_t k = 1; k <= 20; k += 5) {
6729 for (uint32_t m = 1; m <= 6; m++) {
6730 GemmMicrokernelTester()
6731 .mr(6)
6732 .nr(8)
6733 .kr(1)
6734 .sr(1)
6735 .m(m)
6736 .n(n)
6737 .k(k)
6738 .iterations(1)
6739 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6740 }
6741 }
6742 }
6743 }
6744
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel)6745 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel) {
6746 for (size_t k = 1; k <= 20; k += 5) {
6747 GemmMicrokernelTester()
6748 .mr(6)
6749 .nr(8)
6750 .kr(1)
6751 .sr(1)
6752 .m(6)
6753 .n(8)
6754 .k(k)
6755 .ks(3)
6756 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6757 }
6758 }
6759
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel_subtile)6760 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel_subtile) {
6761 for (size_t k = 1; k <= 20; k += 5) {
6762 for (uint32_t n = 1; n <= 8; n++) {
6763 for (uint32_t m = 1; m <= 6; m++) {
6764 GemmMicrokernelTester()
6765 .mr(6)
6766 .nr(8)
6767 .kr(1)
6768 .sr(1)
6769 .m(m)
6770 .n(n)
6771 .k(k)
6772 .ks(3)
6773 .iterations(1)
6774 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6775 }
6776 }
6777 }
6778 }
6779
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_small_kernel)6780 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_small_kernel) {
6781 for (uint32_t n = 9; n < 16; n++) {
6782 for (size_t k = 1; k <= 20; k += 5) {
6783 GemmMicrokernelTester()
6784 .mr(6)
6785 .nr(8)
6786 .kr(1)
6787 .sr(1)
6788 .m(6)
6789 .n(n)
6790 .k(k)
6791 .ks(3)
6792 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6793 }
6794 }
6795 }
6796
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_small_kernel)6797 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_small_kernel) {
6798 for (uint32_t n = 16; n <= 24; n += 8) {
6799 for (size_t k = 1; k <= 20; k += 5) {
6800 GemmMicrokernelTester()
6801 .mr(6)
6802 .nr(8)
6803 .kr(1)
6804 .sr(1)
6805 .m(6)
6806 .n(n)
6807 .k(k)
6808 .ks(3)
6809 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6810 }
6811 }
6812 }
6813
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)6814 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
6815 for (size_t k = 1; k <= 20; k += 5) {
6816 for (uint32_t n = 1; n <= 8; n++) {
6817 for (uint32_t m = 1; m <= 6; m++) {
6818 GemmMicrokernelTester()
6819 .mr(6)
6820 .nr(8)
6821 .kr(1)
6822 .sr(1)
6823 .m(m)
6824 .n(n)
6825 .k(k)
6826 .cm_stride(11)
6827 .iterations(1)
6828 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6829 }
6830 }
6831 }
6832 }
6833
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,a_offset)6834 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, a_offset) {
6835 for (size_t k = 1; k <= 20; k += 5) {
6836 GemmMicrokernelTester()
6837 .mr(6)
6838 .nr(8)
6839 .kr(1)
6840 .sr(1)
6841 .m(6)
6842 .n(8)
6843 .k(k)
6844 .ks(3)
6845 .a_offset(127)
6846 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6847 }
6848 }
6849
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,zero)6850 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, zero) {
6851 for (size_t k = 1; k <= 20; k += 5) {
6852 for (uint32_t mz = 0; mz < 6; mz++) {
6853 GemmMicrokernelTester()
6854 .mr(6)
6855 .nr(8)
6856 .kr(1)
6857 .sr(1)
6858 .m(6)
6859 .n(8)
6860 .k(k)
6861 .ks(3)
6862 .a_offset(127)
6863 .zero_index(mz)
6864 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6865 }
6866 }
6867 }
6868
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)6869 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
6870 GemmMicrokernelTester()
6871 .mr(6)
6872 .nr(8)
6873 .kr(1)
6874 .sr(1)
6875 .m(6)
6876 .n(8)
6877 .k(4)
6878 .cm_stride(11)
6879 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6880 }
6881 #endif // XNN_ARCH_WASMRELAXEDSIMD
6882
6883
6884 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)6885 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
6886 GemmMicrokernelTester()
6887 .mr(6)
6888 .nr(8)
6889 .kr(1)
6890 .sr(4)
6891 .m(6)
6892 .n(8)
6893 .k(4)
6894 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
6895 }
6896
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,strided_cn)6897 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
6898 GemmMicrokernelTester()
6899 .mr(6)
6900 .nr(8)
6901 .kr(1)
6902 .sr(4)
6903 .m(6)
6904 .n(8)
6905 .k(4)
6906 .cn_stride(11)
6907 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
6908 }
6909
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)6910 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
6911 for (uint32_t n = 1; n <= 8; n++) {
6912 for (uint32_t m = 1; m <= 6; m++) {
6913 GemmMicrokernelTester()
6914 .mr(6)
6915 .nr(8)
6916 .kr(1)
6917 .sr(4)
6918 .m(m)
6919 .n(n)
6920 .k(4)
6921 .iterations(1)
6922 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
6923 }
6924 }
6925 }
6926
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)6927 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
6928 for (uint32_t m = 1; m <= 6; m++) {
6929 GemmMicrokernelTester()
6930 .mr(6)
6931 .nr(8)
6932 .kr(1)
6933 .sr(4)
6934 .m(m)
6935 .n(8)
6936 .k(4)
6937 .iterations(1)
6938 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
6939 }
6940 }
6941
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)6942 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
6943 for (uint32_t n = 1; n <= 8; n++) {
6944 GemmMicrokernelTester()
6945 .mr(6)
6946 .nr(8)
6947 .kr(1)
6948 .sr(4)
6949 .m(6)
6950 .n(n)
6951 .k(4)
6952 .iterations(1)
6953 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
6954 }
6955 }
6956
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)6957 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
6958 for (size_t k = 1; k < 4; k++) {
6959 GemmMicrokernelTester()
6960 .mr(6)
6961 .nr(8)
6962 .kr(1)
6963 .sr(4)
6964 .m(6)
6965 .n(8)
6966 .k(k)
6967 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
6968 }
6969 }
6970
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)6971 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
6972 for (size_t k = 1; k < 4; k++) {
6973 for (uint32_t n = 1; n <= 8; n++) {
6974 for (uint32_t m = 1; m <= 6; m++) {
6975 GemmMicrokernelTester()
6976 .mr(6)
6977 .nr(8)
6978 .kr(1)
6979 .sr(4)
6980 .m(m)
6981 .n(n)
6982 .k(k)
6983 .iterations(1)
6984 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
6985 }
6986 }
6987 }
6988 }
6989
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)6990 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
6991 for (size_t k = 5; k < 8; k++) {
6992 GemmMicrokernelTester()
6993 .mr(6)
6994 .nr(8)
6995 .kr(1)
6996 .sr(4)
6997 .m(6)
6998 .n(8)
6999 .k(k)
7000 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
7001 }
7002 }
7003
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)7004 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
7005 for (size_t k = 5; k < 8; k++) {
7006 for (uint32_t n = 1; n <= 8; n++) {
7007 for (uint32_t m = 1; m <= 6; m++) {
7008 GemmMicrokernelTester()
7009 .mr(6)
7010 .nr(8)
7011 .kr(1)
7012 .sr(4)
7013 .m(m)
7014 .n(n)
7015 .k(k)
7016 .iterations(1)
7017 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
7018 }
7019 }
7020 }
7021 }
7022
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,k_div_4)7023 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
7024 for (size_t k = 8; k <= 40; k += 4) {
7025 GemmMicrokernelTester()
7026 .mr(6)
7027 .nr(8)
7028 .kr(1)
7029 .sr(4)
7030 .m(6)
7031 .n(8)
7032 .k(k)
7033 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
7034 }
7035 }
7036
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)7037 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
7038 for (size_t k = 8; k <= 40; k += 4) {
7039 for (uint32_t n = 1; n <= 8; n++) {
7040 for (uint32_t m = 1; m <= 6; m++) {
7041 GemmMicrokernelTester()
7042 .mr(6)
7043 .nr(8)
7044 .kr(1)
7045 .sr(4)
7046 .m(m)
7047 .n(n)
7048 .k(k)
7049 .iterations(1)
7050 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
7051 }
7052 }
7053 }
7054 }
7055
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)7056 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
7057 for (uint32_t n = 9; n < 16; n++) {
7058 for (size_t k = 1; k <= 20; k += 5) {
7059 GemmMicrokernelTester()
7060 .mr(6)
7061 .nr(8)
7062 .kr(1)
7063 .sr(4)
7064 .m(6)
7065 .n(n)
7066 .k(k)
7067 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
7068 }
7069 }
7070 }
7071
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)7072 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
7073 for (uint32_t n = 9; n < 16; n++) {
7074 for (size_t k = 1; k <= 20; k += 5) {
7075 GemmMicrokernelTester()
7076 .mr(6)
7077 .nr(8)
7078 .kr(1)
7079 .sr(4)
7080 .m(6)
7081 .n(n)
7082 .k(k)
7083 .cn_stride(11)
7084 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
7085 }
7086 }
7087 }
7088
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)7089 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
7090 for (uint32_t n = 9; n < 16; n++) {
7091 for (size_t k = 1; k <= 20; k += 5) {
7092 for (uint32_t m = 1; m <= 6; m++) {
7093 GemmMicrokernelTester()
7094 .mr(6)
7095 .nr(8)
7096 .kr(1)
7097 .sr(4)
7098 .m(m)
7099 .n(n)
7100 .k(k)
7101 .iterations(1)
7102 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
7103 }
7104 }
7105 }
7106 }
7107
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,n_div_8)7108 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
7109 for (uint32_t n = 16; n <= 24; n += 8) {
7110 for (size_t k = 1; k <= 20; k += 5) {
7111 GemmMicrokernelTester()
7112 .mr(6)
7113 .nr(8)
7114 .kr(1)
7115 .sr(4)
7116 .m(6)
7117 .n(n)
7118 .k(k)
7119 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
7120 }
7121 }
7122 }
7123
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)7124 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
7125 for (uint32_t n = 16; n <= 24; n += 8) {
7126 for (size_t k = 1; k <= 20; k += 5) {
7127 GemmMicrokernelTester()
7128 .mr(6)
7129 .nr(8)
7130 .kr(1)
7131 .sr(4)
7132 .m(6)
7133 .n(n)
7134 .k(k)
7135 .cn_stride(11)
7136 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
7137 }
7138 }
7139 }
7140
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)7141 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
7142 for (uint32_t n = 16; n <= 24; n += 8) {
7143 for (size_t k = 1; k <= 20; k += 5) {
7144 for (uint32_t m = 1; m <= 6; m++) {
7145 GemmMicrokernelTester()
7146 .mr(6)
7147 .nr(8)
7148 .kr(1)
7149 .sr(4)
7150 .m(m)
7151 .n(n)
7152 .k(k)
7153 .iterations(1)
7154 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
7155 }
7156 }
7157 }
7158 }
7159
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,small_kernel)7160 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, small_kernel) {
7161 for (size_t k = 1; k <= 20; k += 5) {
7162 GemmMicrokernelTester()
7163 .mr(6)
7164 .nr(8)
7165 .kr(1)
7166 .sr(4)
7167 .m(6)
7168 .n(8)
7169 .k(k)
7170 .ks(3)
7171 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
7172 }
7173 }
7174
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,small_kernel_subtile)7175 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, small_kernel_subtile) {
7176 for (size_t k = 1; k <= 20; k += 5) {
7177 for (uint32_t n = 1; n <= 8; n++) {
7178 for (uint32_t m = 1; m <= 6; m++) {
7179 GemmMicrokernelTester()
7180 .mr(6)
7181 .nr(8)
7182 .kr(1)
7183 .sr(4)
7184 .m(m)
7185 .n(n)
7186 .k(k)
7187 .ks(3)
7188 .iterations(1)
7189 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
7190 }
7191 }
7192 }
7193 }
7194
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_small_kernel)7195 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_small_kernel) {
7196 for (uint32_t n = 9; n < 16; n++) {
7197 for (size_t k = 1; k <= 20; k += 5) {
7198 GemmMicrokernelTester()
7199 .mr(6)
7200 .nr(8)
7201 .kr(1)
7202 .sr(4)
7203 .m(6)
7204 .n(n)
7205 .k(k)
7206 .ks(3)
7207 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
7208 }
7209 }
7210 }
7211
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,n_div_8_small_kernel)7212 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, n_div_8_small_kernel) {
7213 for (uint32_t n = 16; n <= 24; n += 8) {
7214 for (size_t k = 1; k <= 20; k += 5) {
7215 GemmMicrokernelTester()
7216 .mr(6)
7217 .nr(8)
7218 .kr(1)
7219 .sr(4)
7220 .m(6)
7221 .n(n)
7222 .k(k)
7223 .ks(3)
7224 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
7225 }
7226 }
7227 }
7228
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)7229 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
7230 for (size_t k = 1; k <= 20; k += 5) {
7231 for (uint32_t n = 1; n <= 8; n++) {
7232 for (uint32_t m = 1; m <= 6; m++) {
7233 GemmMicrokernelTester()
7234 .mr(6)
7235 .nr(8)
7236 .kr(1)
7237 .sr(4)
7238 .m(m)
7239 .n(n)
7240 .k(k)
7241 .cm_stride(11)
7242 .iterations(1)
7243 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
7244 }
7245 }
7246 }
7247 }
7248
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,a_offset)7249 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, a_offset) {
7250 for (size_t k = 1; k <= 20; k += 5) {
7251 GemmMicrokernelTester()
7252 .mr(6)
7253 .nr(8)
7254 .kr(1)
7255 .sr(4)
7256 .m(6)
7257 .n(8)
7258 .k(k)
7259 .ks(3)
7260 .a_offset(127)
7261 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
7262 }
7263 }
7264
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,zero)7265 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, zero) {
7266 for (size_t k = 1; k <= 20; k += 5) {
7267 for (uint32_t mz = 0; mz < 6; mz++) {
7268 GemmMicrokernelTester()
7269 .mr(6)
7270 .nr(8)
7271 .kr(1)
7272 .sr(4)
7273 .m(6)
7274 .n(8)
7275 .k(k)
7276 .ks(3)
7277 .a_offset(127)
7278 .zero_index(mz)
7279 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
7280 }
7281 }
7282 }
7283
TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA,strided_cm)7284 TEST(F32_IGEMM_6X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
7285 GemmMicrokernelTester()
7286 .mr(6)
7287 .nr(8)
7288 .kr(1)
7289 .sr(4)
7290 .m(6)
7291 .n(8)
7292 .k(4)
7293 .cm_stride(11)
7294 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma);
7295 }
7296 #endif // XNN_ARCH_WASMRELAXEDSIMD
7297
7298
TEST(F32_IGEMM_1X4__SCALAR,k_eq_1)7299 TEST(F32_IGEMM_1X4__SCALAR, k_eq_1) {
7300 GemmMicrokernelTester()
7301 .mr(1)
7302 .nr(4)
7303 .kr(1)
7304 .sr(1)
7305 .m(1)
7306 .n(4)
7307 .k(1)
7308 .Test(xnn_f32_igemm_ukernel_1x4__scalar);
7309 }
7310
TEST(F32_IGEMM_1X4__SCALAR,strided_cn)7311 TEST(F32_IGEMM_1X4__SCALAR, strided_cn) {
7312 GemmMicrokernelTester()
7313 .mr(1)
7314 .nr(4)
7315 .kr(1)
7316 .sr(1)
7317 .m(1)
7318 .n(4)
7319 .k(1)
7320 .cn_stride(7)
7321 .Test(xnn_f32_igemm_ukernel_1x4__scalar);
7322 }
7323
TEST(F32_IGEMM_1X4__SCALAR,k_eq_1_subtile)7324 TEST(F32_IGEMM_1X4__SCALAR, k_eq_1_subtile) {
7325 for (uint32_t n = 1; n <= 4; n++) {
7326 for (uint32_t m = 1; m <= 1; m++) {
7327 GemmMicrokernelTester()
7328 .mr(1)
7329 .nr(4)
7330 .kr(1)
7331 .sr(1)
7332 .m(m)
7333 .n(n)
7334 .k(1)
7335 .iterations(1)
7336 .Test(xnn_f32_igemm_ukernel_1x4__scalar);
7337 }
7338 }
7339 }
7340
TEST(F32_IGEMM_1X4__SCALAR,k_eq_1_subtile_m)7341 TEST(F32_IGEMM_1X4__SCALAR, k_eq_1_subtile_m) {
7342 for (uint32_t m = 1; m <= 1; m++) {
7343 GemmMicrokernelTester()
7344 .mr(1)
7345 .nr(4)
7346 .kr(1)
7347 .sr(1)
7348 .m(m)
7349 .n(4)
7350 .k(1)
7351 .iterations(1)
7352 .Test(xnn_f32_igemm_ukernel_1x4__scalar);
7353 }
7354 }
7355
TEST(F32_IGEMM_1X4__SCALAR,k_eq_1_subtile_n)7356 TEST(F32_IGEMM_1X4__SCALAR, k_eq_1_subtile_n) {
7357 for (uint32_t n = 1; n <= 4; n++) {
7358 GemmMicrokernelTester()
7359 .mr(1)
7360 .nr(4)
7361 .kr(1)
7362 .sr(1)
7363 .m(1)
7364 .n(n)
7365 .k(1)
7366 .iterations(1)
7367 .Test(xnn_f32_igemm_ukernel_1x4__scalar);
7368 }
7369 }
7370
TEST(F32_IGEMM_1X4__SCALAR,k_gt_1)7371 TEST(F32_IGEMM_1X4__SCALAR, k_gt_1) {
7372 for (size_t k = 2; k < 10; k++) {
7373 GemmMicrokernelTester()
7374 .mr(1)
7375 .nr(4)
7376 .kr(1)
7377 .sr(1)
7378 .m(1)
7379 .n(4)
7380 .k(k)
7381 .Test(xnn_f32_igemm_ukernel_1x4__scalar);
7382 }
7383 }
7384
TEST(F32_IGEMM_1X4__SCALAR,k_gt_1_subtile)7385 TEST(F32_IGEMM_1X4__SCALAR, k_gt_1_subtile) {
7386 for (size_t k = 2; k < 10; k++) {
7387 for (uint32_t n = 1; n <= 4; n++) {
7388 for (uint32_t m = 1; m <= 1; m++) {
7389 GemmMicrokernelTester()
7390 .mr(1)
7391 .nr(4)
7392 .kr(1)
7393 .sr(1)
7394 .m(m)
7395 .n(n)
7396 .k(k)
7397 .iterations(1)
7398 .Test(xnn_f32_igemm_ukernel_1x4__scalar);
7399 }
7400 }
7401 }
7402 }
7403
TEST(F32_IGEMM_1X4__SCALAR,n_gt_4)7404 TEST(F32_IGEMM_1X4__SCALAR, n_gt_4) {
7405 for (uint32_t n = 5; n < 8; n++) {
7406 for (size_t k = 1; k <= 5; k += 2) {
7407 GemmMicrokernelTester()
7408 .mr(1)
7409 .nr(4)
7410 .kr(1)
7411 .sr(1)
7412 .m(1)
7413 .n(n)
7414 .k(k)
7415 .Test(xnn_f32_igemm_ukernel_1x4__scalar);
7416 }
7417 }
7418 }
7419
TEST(F32_IGEMM_1X4__SCALAR,n_gt_4_strided_cn)7420 TEST(F32_IGEMM_1X4__SCALAR, n_gt_4_strided_cn) {
7421 for (uint32_t n = 5; n < 8; n++) {
7422 for (size_t k = 1; k <= 5; k += 2) {
7423 GemmMicrokernelTester()
7424 .mr(1)
7425 .nr(4)
7426 .kr(1)
7427 .sr(1)
7428 .m(1)
7429 .n(n)
7430 .k(k)
7431 .cn_stride(7)
7432 .Test(xnn_f32_igemm_ukernel_1x4__scalar);
7433 }
7434 }
7435 }
7436
TEST(F32_IGEMM_1X4__SCALAR,n_gt_4_subtile)7437 TEST(F32_IGEMM_1X4__SCALAR, n_gt_4_subtile) {
7438 for (uint32_t n = 5; n < 8; n++) {
7439 for (size_t k = 1; k <= 5; k += 2) {
7440 for (uint32_t m = 1; m <= 1; m++) {
7441 GemmMicrokernelTester()
7442 .mr(1)
7443 .nr(4)
7444 .kr(1)
7445 .sr(1)
7446 .m(m)
7447 .n(n)
7448 .k(k)
7449 .iterations(1)
7450 .Test(xnn_f32_igemm_ukernel_1x4__scalar);
7451 }
7452 }
7453 }
7454 }
7455
TEST(F32_IGEMM_1X4__SCALAR,n_div_4)7456 TEST(F32_IGEMM_1X4__SCALAR, n_div_4) {
7457 for (uint32_t n = 8; n <= 12; n += 4) {
7458 for (size_t k = 1; k <= 5; k += 2) {
7459 GemmMicrokernelTester()
7460 .mr(1)
7461 .nr(4)
7462 .kr(1)
7463 .sr(1)
7464 .m(1)
7465 .n(n)
7466 .k(k)
7467 .Test(xnn_f32_igemm_ukernel_1x4__scalar);
7468 }
7469 }
7470 }
7471
TEST(F32_IGEMM_1X4__SCALAR,n_div_4_strided_cn)7472 TEST(F32_IGEMM_1X4__SCALAR, n_div_4_strided_cn) {
7473 for (uint32_t n = 8; n <= 12; n += 4) {
7474 for (size_t k = 1; k <= 5; k += 2) {
7475 GemmMicrokernelTester()
7476 .mr(1)
7477 .nr(4)
7478 .kr(1)
7479 .sr(1)
7480 .m(1)
7481 .n(n)
7482 .k(k)
7483 .cn_stride(7)
7484 .Test(xnn_f32_igemm_ukernel_1x4__scalar);
7485 }
7486 }
7487 }
7488
TEST(F32_IGEMM_1X4__SCALAR,n_div_4_subtile)7489 TEST(F32_IGEMM_1X4__SCALAR, n_div_4_subtile) {
7490 for (uint32_t n = 8; n <= 12; n += 4) {
7491 for (size_t k = 1; k <= 5; k += 2) {
7492 for (uint32_t m = 1; m <= 1; m++) {
7493 GemmMicrokernelTester()
7494 .mr(1)
7495 .nr(4)
7496 .kr(1)
7497 .sr(1)
7498 .m(m)
7499 .n(n)
7500 .k(k)
7501 .iterations(1)
7502 .Test(xnn_f32_igemm_ukernel_1x4__scalar);
7503 }
7504 }
7505 }
7506 }
7507
TEST(F32_IGEMM_1X4__SCALAR,small_kernel)7508 TEST(F32_IGEMM_1X4__SCALAR, small_kernel) {
7509 for (size_t k = 1; k <= 5; k += 2) {
7510 GemmMicrokernelTester()
7511 .mr(1)
7512 .nr(4)
7513 .kr(1)
7514 .sr(1)
7515 .m(1)
7516 .n(4)
7517 .k(k)
7518 .ks(3)
7519 .Test(xnn_f32_igemm_ukernel_1x4__scalar);
7520 }
7521 }
7522
TEST(F32_IGEMM_1X4__SCALAR,small_kernel_subtile)7523 TEST(F32_IGEMM_1X4__SCALAR, small_kernel_subtile) {
7524 for (size_t k = 1; k <= 5; k += 2) {
7525 for (uint32_t n = 1; n <= 4; n++) {
7526 for (uint32_t m = 1; m <= 1; m++) {
7527 GemmMicrokernelTester()
7528 .mr(1)
7529 .nr(4)
7530 .kr(1)
7531 .sr(1)
7532 .m(m)
7533 .n(n)
7534 .k(k)
7535 .ks(3)
7536 .iterations(1)
7537 .Test(xnn_f32_igemm_ukernel_1x4__scalar);
7538 }
7539 }
7540 }
7541 }
7542
TEST(F32_IGEMM_1X4__SCALAR,n_gt_4_small_kernel)7543 TEST(F32_IGEMM_1X4__SCALAR, n_gt_4_small_kernel) {
7544 for (uint32_t n = 5; n < 8; n++) {
7545 for (size_t k = 1; k <= 5; k += 2) {
7546 GemmMicrokernelTester()
7547 .mr(1)
7548 .nr(4)
7549 .kr(1)
7550 .sr(1)
7551 .m(1)
7552 .n(n)
7553 .k(k)
7554 .ks(3)
7555 .Test(xnn_f32_igemm_ukernel_1x4__scalar);
7556 }
7557 }
7558 }
7559
TEST(F32_IGEMM_1X4__SCALAR,n_div_4_small_kernel)7560 TEST(F32_IGEMM_1X4__SCALAR, n_div_4_small_kernel) {
7561 for (uint32_t n = 8; n <= 12; n += 4) {
7562 for (size_t k = 1; k <= 5; k += 2) {
7563 GemmMicrokernelTester()
7564 .mr(1)
7565 .nr(4)
7566 .kr(1)
7567 .sr(1)
7568 .m(1)
7569 .n(n)
7570 .k(k)
7571 .ks(3)
7572 .Test(xnn_f32_igemm_ukernel_1x4__scalar);
7573 }
7574 }
7575 }
7576
TEST(F32_IGEMM_1X4__SCALAR,strided_cm_subtile)7577 TEST(F32_IGEMM_1X4__SCALAR, strided_cm_subtile) {
7578 for (size_t k = 1; k <= 5; k += 2) {
7579 for (uint32_t n = 1; n <= 4; n++) {
7580 for (uint32_t m = 1; m <= 1; m++) {
7581 GemmMicrokernelTester()
7582 .mr(1)
7583 .nr(4)
7584 .kr(1)
7585 .sr(1)
7586 .m(m)
7587 .n(n)
7588 .k(k)
7589 .cm_stride(7)
7590 .iterations(1)
7591 .Test(xnn_f32_igemm_ukernel_1x4__scalar);
7592 }
7593 }
7594 }
7595 }
7596
TEST(F32_IGEMM_1X4__SCALAR,a_offset)7597 TEST(F32_IGEMM_1X4__SCALAR, a_offset) {
7598 for (size_t k = 1; k <= 5; k += 2) {
7599 GemmMicrokernelTester()
7600 .mr(1)
7601 .nr(4)
7602 .kr(1)
7603 .sr(1)
7604 .m(1)
7605 .n(4)
7606 .k(k)
7607 .ks(3)
7608 .a_offset(7)
7609 .Test(xnn_f32_igemm_ukernel_1x4__scalar);
7610 }
7611 }
7612
TEST(F32_IGEMM_1X4__SCALAR,zero)7613 TEST(F32_IGEMM_1X4__SCALAR, zero) {
7614 for (size_t k = 1; k <= 5; k += 2) {
7615 for (uint32_t mz = 0; mz < 1; mz++) {
7616 GemmMicrokernelTester()
7617 .mr(1)
7618 .nr(4)
7619 .kr(1)
7620 .sr(1)
7621 .m(1)
7622 .n(4)
7623 .k(k)
7624 .ks(3)
7625 .a_offset(7)
7626 .zero_index(mz)
7627 .Test(xnn_f32_igemm_ukernel_1x4__scalar);
7628 }
7629 }
7630 }
7631
TEST(F32_IGEMM_1X4__SCALAR,strided_cm)7632 TEST(F32_IGEMM_1X4__SCALAR, strided_cm) {
7633 GemmMicrokernelTester()
7634 .mr(1)
7635 .nr(4)
7636 .kr(1)
7637 .sr(1)
7638 .m(1)
7639 .n(4)
7640 .k(1)
7641 .cm_stride(7)
7642 .Test(xnn_f32_igemm_ukernel_1x4__scalar);
7643 }
7644
7645
TEST(F32_IGEMM_4X2__SCALAR,k_eq_1)7646 TEST(F32_IGEMM_4X2__SCALAR, k_eq_1) {
7647 GemmMicrokernelTester()
7648 .mr(4)
7649 .nr(2)
7650 .kr(1)
7651 .sr(1)
7652 .m(4)
7653 .n(2)
7654 .k(1)
7655 .Test(xnn_f32_igemm_ukernel_4x2__scalar);
7656 }
7657
TEST(F32_IGEMM_4X2__SCALAR,strided_cn)7658 TEST(F32_IGEMM_4X2__SCALAR, strided_cn) {
7659 GemmMicrokernelTester()
7660 .mr(4)
7661 .nr(2)
7662 .kr(1)
7663 .sr(1)
7664 .m(4)
7665 .n(2)
7666 .k(1)
7667 .cn_stride(5)
7668 .Test(xnn_f32_igemm_ukernel_4x2__scalar);
7669 }
7670
TEST(F32_IGEMM_4X2__SCALAR,k_eq_1_subtile)7671 TEST(F32_IGEMM_4X2__SCALAR, k_eq_1_subtile) {
7672 for (uint32_t n = 1; n <= 2; n++) {
7673 for (uint32_t m = 1; m <= 4; m++) {
7674 GemmMicrokernelTester()
7675 .mr(4)
7676 .nr(2)
7677 .kr(1)
7678 .sr(1)
7679 .m(m)
7680 .n(n)
7681 .k(1)
7682 .iterations(1)
7683 .Test(xnn_f32_igemm_ukernel_4x2__scalar);
7684 }
7685 }
7686 }
7687
TEST(F32_IGEMM_4X2__SCALAR,k_eq_1_subtile_m)7688 TEST(F32_IGEMM_4X2__SCALAR, k_eq_1_subtile_m) {
7689 for (uint32_t m = 1; m <= 4; m++) {
7690 GemmMicrokernelTester()
7691 .mr(4)
7692 .nr(2)
7693 .kr(1)
7694 .sr(1)
7695 .m(m)
7696 .n(2)
7697 .k(1)
7698 .iterations(1)
7699 .Test(xnn_f32_igemm_ukernel_4x2__scalar);
7700 }
7701 }
7702
TEST(F32_IGEMM_4X2__SCALAR,k_eq_1_subtile_n)7703 TEST(F32_IGEMM_4X2__SCALAR, k_eq_1_subtile_n) {
7704 for (uint32_t n = 1; n <= 2; n++) {
7705 GemmMicrokernelTester()
7706 .mr(4)
7707 .nr(2)
7708 .kr(1)
7709 .sr(1)
7710 .m(4)
7711 .n(n)
7712 .k(1)
7713 .iterations(1)
7714 .Test(xnn_f32_igemm_ukernel_4x2__scalar);
7715 }
7716 }
7717
TEST(F32_IGEMM_4X2__SCALAR,k_gt_1)7718 TEST(F32_IGEMM_4X2__SCALAR, k_gt_1) {
7719 for (size_t k = 2; k < 10; k++) {
7720 GemmMicrokernelTester()
7721 .mr(4)
7722 .nr(2)
7723 .kr(1)
7724 .sr(1)
7725 .m(4)
7726 .n(2)
7727 .k(k)
7728 .Test(xnn_f32_igemm_ukernel_4x2__scalar);
7729 }
7730 }
7731
TEST(F32_IGEMM_4X2__SCALAR,k_gt_1_subtile)7732 TEST(F32_IGEMM_4X2__SCALAR, k_gt_1_subtile) {
7733 for (size_t k = 2; k < 10; k++) {
7734 for (uint32_t n = 1; n <= 2; n++) {
7735 for (uint32_t m = 1; m <= 4; m++) {
7736 GemmMicrokernelTester()
7737 .mr(4)
7738 .nr(2)
7739 .kr(1)
7740 .sr(1)
7741 .m(m)
7742 .n(n)
7743 .k(k)
7744 .iterations(1)
7745 .Test(xnn_f32_igemm_ukernel_4x2__scalar);
7746 }
7747 }
7748 }
7749 }
7750
TEST(F32_IGEMM_4X2__SCALAR,n_gt_2)7751 TEST(F32_IGEMM_4X2__SCALAR, n_gt_2) {
7752 for (uint32_t n = 3; n < 4; n++) {
7753 for (size_t k = 1; k <= 5; k += 2) {
7754 GemmMicrokernelTester()
7755 .mr(4)
7756 .nr(2)
7757 .kr(1)
7758 .sr(1)
7759 .m(4)
7760 .n(n)
7761 .k(k)
7762 .Test(xnn_f32_igemm_ukernel_4x2__scalar);
7763 }
7764 }
7765 }
7766
TEST(F32_IGEMM_4X2__SCALAR,n_gt_2_strided_cn)7767 TEST(F32_IGEMM_4X2__SCALAR, n_gt_2_strided_cn) {
7768 for (uint32_t n = 3; n < 4; n++) {
7769 for (size_t k = 1; k <= 5; k += 2) {
7770 GemmMicrokernelTester()
7771 .mr(4)
7772 .nr(2)
7773 .kr(1)
7774 .sr(1)
7775 .m(4)
7776 .n(n)
7777 .k(k)
7778 .cn_stride(5)
7779 .Test(xnn_f32_igemm_ukernel_4x2__scalar);
7780 }
7781 }
7782 }
7783
TEST(F32_IGEMM_4X2__SCALAR,n_gt_2_subtile)7784 TEST(F32_IGEMM_4X2__SCALAR, n_gt_2_subtile) {
7785 for (uint32_t n = 3; n < 4; n++) {
7786 for (size_t k = 1; k <= 5; k += 2) {
7787 for (uint32_t m = 1; m <= 4; m++) {
7788 GemmMicrokernelTester()
7789 .mr(4)
7790 .nr(2)
7791 .kr(1)
7792 .sr(1)
7793 .m(m)
7794 .n(n)
7795 .k(k)
7796 .iterations(1)
7797 .Test(xnn_f32_igemm_ukernel_4x2__scalar);
7798 }
7799 }
7800 }
7801 }
7802
TEST(F32_IGEMM_4X2__SCALAR,n_div_2)7803 TEST(F32_IGEMM_4X2__SCALAR, n_div_2) {
7804 for (uint32_t n = 4; n <= 6; n += 2) {
7805 for (size_t k = 1; k <= 5; k += 2) {
7806 GemmMicrokernelTester()
7807 .mr(4)
7808 .nr(2)
7809 .kr(1)
7810 .sr(1)
7811 .m(4)
7812 .n(n)
7813 .k(k)
7814 .Test(xnn_f32_igemm_ukernel_4x2__scalar);
7815 }
7816 }
7817 }
7818
TEST(F32_IGEMM_4X2__SCALAR,n_div_2_strided_cn)7819 TEST(F32_IGEMM_4X2__SCALAR, n_div_2_strided_cn) {
7820 for (uint32_t n = 4; n <= 6; n += 2) {
7821 for (size_t k = 1; k <= 5; k += 2) {
7822 GemmMicrokernelTester()
7823 .mr(4)
7824 .nr(2)
7825 .kr(1)
7826 .sr(1)
7827 .m(4)
7828 .n(n)
7829 .k(k)
7830 .cn_stride(5)
7831 .Test(xnn_f32_igemm_ukernel_4x2__scalar);
7832 }
7833 }
7834 }
7835
TEST(F32_IGEMM_4X2__SCALAR,n_div_2_subtile)7836 TEST(F32_IGEMM_4X2__SCALAR, n_div_2_subtile) {
7837 for (uint32_t n = 4; n <= 6; n += 2) {
7838 for (size_t k = 1; k <= 5; k += 2) {
7839 for (uint32_t m = 1; m <= 4; m++) {
7840 GemmMicrokernelTester()
7841 .mr(4)
7842 .nr(2)
7843 .kr(1)
7844 .sr(1)
7845 .m(m)
7846 .n(n)
7847 .k(k)
7848 .iterations(1)
7849 .Test(xnn_f32_igemm_ukernel_4x2__scalar);
7850 }
7851 }
7852 }
7853 }
7854
TEST(F32_IGEMM_4X2__SCALAR,small_kernel)7855 TEST(F32_IGEMM_4X2__SCALAR, small_kernel) {
7856 for (size_t k = 1; k <= 5; k += 2) {
7857 GemmMicrokernelTester()
7858 .mr(4)
7859 .nr(2)
7860 .kr(1)
7861 .sr(1)
7862 .m(4)
7863 .n(2)
7864 .k(k)
7865 .ks(3)
7866 .Test(xnn_f32_igemm_ukernel_4x2__scalar);
7867 }
7868 }
7869
TEST(F32_IGEMM_4X2__SCALAR,small_kernel_subtile)7870 TEST(F32_IGEMM_4X2__SCALAR, small_kernel_subtile) {
7871 for (size_t k = 1; k <= 5; k += 2) {
7872 for (uint32_t n = 1; n <= 2; n++) {
7873 for (uint32_t m = 1; m <= 4; m++) {
7874 GemmMicrokernelTester()
7875 .mr(4)
7876 .nr(2)
7877 .kr(1)
7878 .sr(1)
7879 .m(m)
7880 .n(n)
7881 .k(k)
7882 .ks(3)
7883 .iterations(1)
7884 .Test(xnn_f32_igemm_ukernel_4x2__scalar);
7885 }
7886 }
7887 }
7888 }
7889
TEST(F32_IGEMM_4X2__SCALAR,n_gt_2_small_kernel)7890 TEST(F32_IGEMM_4X2__SCALAR, n_gt_2_small_kernel) {
7891 for (uint32_t n = 3; n < 4; n++) {
7892 for (size_t k = 1; k <= 5; k += 2) {
7893 GemmMicrokernelTester()
7894 .mr(4)
7895 .nr(2)
7896 .kr(1)
7897 .sr(1)
7898 .m(4)
7899 .n(n)
7900 .k(k)
7901 .ks(3)
7902 .Test(xnn_f32_igemm_ukernel_4x2__scalar);
7903 }
7904 }
7905 }
7906
TEST(F32_IGEMM_4X2__SCALAR,n_div_2_small_kernel)7907 TEST(F32_IGEMM_4X2__SCALAR, n_div_2_small_kernel) {
7908 for (uint32_t n = 4; n <= 6; n += 2) {
7909 for (size_t k = 1; k <= 5; k += 2) {
7910 GemmMicrokernelTester()
7911 .mr(4)
7912 .nr(2)
7913 .kr(1)
7914 .sr(1)
7915 .m(4)
7916 .n(n)
7917 .k(k)
7918 .ks(3)
7919 .Test(xnn_f32_igemm_ukernel_4x2__scalar);
7920 }
7921 }
7922 }
7923
TEST(F32_IGEMM_4X2__SCALAR,strided_cm_subtile)7924 TEST(F32_IGEMM_4X2__SCALAR, strided_cm_subtile) {
7925 for (size_t k = 1; k <= 5; k += 2) {
7926 for (uint32_t n = 1; n <= 2; n++) {
7927 for (uint32_t m = 1; m <= 4; m++) {
7928 GemmMicrokernelTester()
7929 .mr(4)
7930 .nr(2)
7931 .kr(1)
7932 .sr(1)
7933 .m(m)
7934 .n(n)
7935 .k(k)
7936 .cm_stride(5)
7937 .iterations(1)
7938 .Test(xnn_f32_igemm_ukernel_4x2__scalar);
7939 }
7940 }
7941 }
7942 }
7943
TEST(F32_IGEMM_4X2__SCALAR,a_offset)7944 TEST(F32_IGEMM_4X2__SCALAR, a_offset) {
7945 for (size_t k = 1; k <= 5; k += 2) {
7946 GemmMicrokernelTester()
7947 .mr(4)
7948 .nr(2)
7949 .kr(1)
7950 .sr(1)
7951 .m(4)
7952 .n(2)
7953 .k(k)
7954 .ks(3)
7955 .a_offset(23)
7956 .Test(xnn_f32_igemm_ukernel_4x2__scalar);
7957 }
7958 }
7959
TEST(F32_IGEMM_4X2__SCALAR,zero)7960 TEST(F32_IGEMM_4X2__SCALAR, zero) {
7961 for (size_t k = 1; k <= 5; k += 2) {
7962 for (uint32_t mz = 0; mz < 4; mz++) {
7963 GemmMicrokernelTester()
7964 .mr(4)
7965 .nr(2)
7966 .kr(1)
7967 .sr(1)
7968 .m(4)
7969 .n(2)
7970 .k(k)
7971 .ks(3)
7972 .a_offset(23)
7973 .zero_index(mz)
7974 .Test(xnn_f32_igemm_ukernel_4x2__scalar);
7975 }
7976 }
7977 }
7978
TEST(F32_IGEMM_4X2__SCALAR,strided_cm)7979 TEST(F32_IGEMM_4X2__SCALAR, strided_cm) {
7980 GemmMicrokernelTester()
7981 .mr(4)
7982 .nr(2)
7983 .kr(1)
7984 .sr(1)
7985 .m(4)
7986 .n(2)
7987 .k(1)
7988 .cm_stride(5)
7989 .Test(xnn_f32_igemm_ukernel_4x2__scalar);
7990 }
7991
7992
TEST(F32_IGEMM_4X4__SCALAR,k_eq_1)7993 TEST(F32_IGEMM_4X4__SCALAR, k_eq_1) {
7994 GemmMicrokernelTester()
7995 .mr(4)
7996 .nr(4)
7997 .kr(1)
7998 .sr(1)
7999 .m(4)
8000 .n(4)
8001 .k(1)
8002 .Test(xnn_f32_igemm_ukernel_4x4__scalar);
8003 }
8004
TEST(F32_IGEMM_4X4__SCALAR,strided_cn)8005 TEST(F32_IGEMM_4X4__SCALAR, strided_cn) {
8006 GemmMicrokernelTester()
8007 .mr(4)
8008 .nr(4)
8009 .kr(1)
8010 .sr(1)
8011 .m(4)
8012 .n(4)
8013 .k(1)
8014 .cn_stride(7)
8015 .Test(xnn_f32_igemm_ukernel_4x4__scalar);
8016 }
8017
TEST(F32_IGEMM_4X4__SCALAR,k_eq_1_subtile)8018 TEST(F32_IGEMM_4X4__SCALAR, k_eq_1_subtile) {
8019 for (uint32_t n = 1; n <= 4; n++) {
8020 for (uint32_t m = 1; m <= 4; m++) {
8021 GemmMicrokernelTester()
8022 .mr(4)
8023 .nr(4)
8024 .kr(1)
8025 .sr(1)
8026 .m(m)
8027 .n(n)
8028 .k(1)
8029 .iterations(1)
8030 .Test(xnn_f32_igemm_ukernel_4x4__scalar);
8031 }
8032 }
8033 }
8034
TEST(F32_IGEMM_4X4__SCALAR,k_eq_1_subtile_m)8035 TEST(F32_IGEMM_4X4__SCALAR, k_eq_1_subtile_m) {
8036 for (uint32_t m = 1; m <= 4; m++) {
8037 GemmMicrokernelTester()
8038 .mr(4)
8039 .nr(4)
8040 .kr(1)
8041 .sr(1)
8042 .m(m)
8043 .n(4)
8044 .k(1)
8045 .iterations(1)
8046 .Test(xnn_f32_igemm_ukernel_4x4__scalar);
8047 }
8048 }
8049
TEST(F32_IGEMM_4X4__SCALAR,k_eq_1_subtile_n)8050 TEST(F32_IGEMM_4X4__SCALAR, k_eq_1_subtile_n) {
8051 for (uint32_t n = 1; n <= 4; n++) {
8052 GemmMicrokernelTester()
8053 .mr(4)
8054 .nr(4)
8055 .kr(1)
8056 .sr(1)
8057 .m(4)
8058 .n(n)
8059 .k(1)
8060 .iterations(1)
8061 .Test(xnn_f32_igemm_ukernel_4x4__scalar);
8062 }
8063 }
8064
TEST(F32_IGEMM_4X4__SCALAR,k_gt_1)8065 TEST(F32_IGEMM_4X4__SCALAR, k_gt_1) {
8066 for (size_t k = 2; k < 10; k++) {
8067 GemmMicrokernelTester()
8068 .mr(4)
8069 .nr(4)
8070 .kr(1)
8071 .sr(1)
8072 .m(4)
8073 .n(4)
8074 .k(k)
8075 .Test(xnn_f32_igemm_ukernel_4x4__scalar);
8076 }
8077 }
8078
TEST(F32_IGEMM_4X4__SCALAR,k_gt_1_subtile)8079 TEST(F32_IGEMM_4X4__SCALAR, k_gt_1_subtile) {
8080 for (size_t k = 2; k < 10; k++) {
8081 for (uint32_t n = 1; n <= 4; n++) {
8082 for (uint32_t m = 1; m <= 4; m++) {
8083 GemmMicrokernelTester()
8084 .mr(4)
8085 .nr(4)
8086 .kr(1)
8087 .sr(1)
8088 .m(m)
8089 .n(n)
8090 .k(k)
8091 .iterations(1)
8092 .Test(xnn_f32_igemm_ukernel_4x4__scalar);
8093 }
8094 }
8095 }
8096 }
8097
TEST(F32_IGEMM_4X4__SCALAR,n_gt_4)8098 TEST(F32_IGEMM_4X4__SCALAR, n_gt_4) {
8099 for (uint32_t n = 5; n < 8; n++) {
8100 for (size_t k = 1; k <= 5; k += 2) {
8101 GemmMicrokernelTester()
8102 .mr(4)
8103 .nr(4)
8104 .kr(1)
8105 .sr(1)
8106 .m(4)
8107 .n(n)
8108 .k(k)
8109 .Test(xnn_f32_igemm_ukernel_4x4__scalar);
8110 }
8111 }
8112 }
8113
TEST(F32_IGEMM_4X4__SCALAR,n_gt_4_strided_cn)8114 TEST(F32_IGEMM_4X4__SCALAR, n_gt_4_strided_cn) {
8115 for (uint32_t n = 5; n < 8; n++) {
8116 for (size_t k = 1; k <= 5; k += 2) {
8117 GemmMicrokernelTester()
8118 .mr(4)
8119 .nr(4)
8120 .kr(1)
8121 .sr(1)
8122 .m(4)
8123 .n(n)
8124 .k(k)
8125 .cn_stride(7)
8126 .Test(xnn_f32_igemm_ukernel_4x4__scalar);
8127 }
8128 }
8129 }
8130
TEST(F32_IGEMM_4X4__SCALAR,n_gt_4_subtile)8131 TEST(F32_IGEMM_4X4__SCALAR, n_gt_4_subtile) {
8132 for (uint32_t n = 5; n < 8; n++) {
8133 for (size_t k = 1; k <= 5; k += 2) {
8134 for (uint32_t m = 1; m <= 4; m++) {
8135 GemmMicrokernelTester()
8136 .mr(4)
8137 .nr(4)
8138 .kr(1)
8139 .sr(1)
8140 .m(m)
8141 .n(n)
8142 .k(k)
8143 .iterations(1)
8144 .Test(xnn_f32_igemm_ukernel_4x4__scalar);
8145 }
8146 }
8147 }
8148 }
8149
TEST(F32_IGEMM_4X4__SCALAR,n_div_4)8150 TEST(F32_IGEMM_4X4__SCALAR, n_div_4) {
8151 for (uint32_t n = 8; n <= 12; n += 4) {
8152 for (size_t k = 1; k <= 5; k += 2) {
8153 GemmMicrokernelTester()
8154 .mr(4)
8155 .nr(4)
8156 .kr(1)
8157 .sr(1)
8158 .m(4)
8159 .n(n)
8160 .k(k)
8161 .Test(xnn_f32_igemm_ukernel_4x4__scalar);
8162 }
8163 }
8164 }
8165
TEST(F32_IGEMM_4X4__SCALAR,n_div_4_strided_cn)8166 TEST(F32_IGEMM_4X4__SCALAR, n_div_4_strided_cn) {
8167 for (uint32_t n = 8; n <= 12; n += 4) {
8168 for (size_t k = 1; k <= 5; k += 2) {
8169 GemmMicrokernelTester()
8170 .mr(4)
8171 .nr(4)
8172 .kr(1)
8173 .sr(1)
8174 .m(4)
8175 .n(n)
8176 .k(k)
8177 .cn_stride(7)
8178 .Test(xnn_f32_igemm_ukernel_4x4__scalar);
8179 }
8180 }
8181 }
8182
TEST(F32_IGEMM_4X4__SCALAR,n_div_4_subtile)8183 TEST(F32_IGEMM_4X4__SCALAR, n_div_4_subtile) {
8184 for (uint32_t n = 8; n <= 12; n += 4) {
8185 for (size_t k = 1; k <= 5; k += 2) {
8186 for (uint32_t m = 1; m <= 4; m++) {
8187 GemmMicrokernelTester()
8188 .mr(4)
8189 .nr(4)
8190 .kr(1)
8191 .sr(1)
8192 .m(m)
8193 .n(n)
8194 .k(k)
8195 .iterations(1)
8196 .Test(xnn_f32_igemm_ukernel_4x4__scalar);
8197 }
8198 }
8199 }
8200 }
8201
TEST(F32_IGEMM_4X4__SCALAR,small_kernel)8202 TEST(F32_IGEMM_4X4__SCALAR, small_kernel) {
8203 for (size_t k = 1; k <= 5; k += 2) {
8204 GemmMicrokernelTester()
8205 .mr(4)
8206 .nr(4)
8207 .kr(1)
8208 .sr(1)
8209 .m(4)
8210 .n(4)
8211 .k(k)
8212 .ks(3)
8213 .Test(xnn_f32_igemm_ukernel_4x4__scalar);
8214 }
8215 }
8216
TEST(F32_IGEMM_4X4__SCALAR,small_kernel_subtile)8217 TEST(F32_IGEMM_4X4__SCALAR, small_kernel_subtile) {
8218 for (size_t k = 1; k <= 5; k += 2) {
8219 for (uint32_t n = 1; n <= 4; n++) {
8220 for (uint32_t m = 1; m <= 4; m++) {
8221 GemmMicrokernelTester()
8222 .mr(4)
8223 .nr(4)
8224 .kr(1)
8225 .sr(1)
8226 .m(m)
8227 .n(n)
8228 .k(k)
8229 .ks(3)
8230 .iterations(1)
8231 .Test(xnn_f32_igemm_ukernel_4x4__scalar);
8232 }
8233 }
8234 }
8235 }
8236
TEST(F32_IGEMM_4X4__SCALAR,n_gt_4_small_kernel)8237 TEST(F32_IGEMM_4X4__SCALAR, n_gt_4_small_kernel) {
8238 for (uint32_t n = 5; n < 8; n++) {
8239 for (size_t k = 1; k <= 5; k += 2) {
8240 GemmMicrokernelTester()
8241 .mr(4)
8242 .nr(4)
8243 .kr(1)
8244 .sr(1)
8245 .m(4)
8246 .n(n)
8247 .k(k)
8248 .ks(3)
8249 .Test(xnn_f32_igemm_ukernel_4x4__scalar);
8250 }
8251 }
8252 }
8253
TEST(F32_IGEMM_4X4__SCALAR,n_div_4_small_kernel)8254 TEST(F32_IGEMM_4X4__SCALAR, n_div_4_small_kernel) {
8255 for (uint32_t n = 8; n <= 12; n += 4) {
8256 for (size_t k = 1; k <= 5; k += 2) {
8257 GemmMicrokernelTester()
8258 .mr(4)
8259 .nr(4)
8260 .kr(1)
8261 .sr(1)
8262 .m(4)
8263 .n(n)
8264 .k(k)
8265 .ks(3)
8266 .Test(xnn_f32_igemm_ukernel_4x4__scalar);
8267 }
8268 }
8269 }
8270
TEST(F32_IGEMM_4X4__SCALAR,strided_cm_subtile)8271 TEST(F32_IGEMM_4X4__SCALAR, strided_cm_subtile) {
8272 for (size_t k = 1; k <= 5; k += 2) {
8273 for (uint32_t n = 1; n <= 4; n++) {
8274 for (uint32_t m = 1; m <= 4; m++) {
8275 GemmMicrokernelTester()
8276 .mr(4)
8277 .nr(4)
8278 .kr(1)
8279 .sr(1)
8280 .m(m)
8281 .n(n)
8282 .k(k)
8283 .cm_stride(7)
8284 .iterations(1)
8285 .Test(xnn_f32_igemm_ukernel_4x4__scalar);
8286 }
8287 }
8288 }
8289 }
8290
TEST(F32_IGEMM_4X4__SCALAR,a_offset)8291 TEST(F32_IGEMM_4X4__SCALAR, a_offset) {
8292 for (size_t k = 1; k <= 5; k += 2) {
8293 GemmMicrokernelTester()
8294 .mr(4)
8295 .nr(4)
8296 .kr(1)
8297 .sr(1)
8298 .m(4)
8299 .n(4)
8300 .k(k)
8301 .ks(3)
8302 .a_offset(23)
8303 .Test(xnn_f32_igemm_ukernel_4x4__scalar);
8304 }
8305 }
8306
TEST(F32_IGEMM_4X4__SCALAR,zero)8307 TEST(F32_IGEMM_4X4__SCALAR, zero) {
8308 for (size_t k = 1; k <= 5; k += 2) {
8309 for (uint32_t mz = 0; mz < 4; mz++) {
8310 GemmMicrokernelTester()
8311 .mr(4)
8312 .nr(4)
8313 .kr(1)
8314 .sr(1)
8315 .m(4)
8316 .n(4)
8317 .k(k)
8318 .ks(3)
8319 .a_offset(23)
8320 .zero_index(mz)
8321 .Test(xnn_f32_igemm_ukernel_4x4__scalar);
8322 }
8323 }
8324 }
8325
TEST(F32_IGEMM_4X4__SCALAR,strided_cm)8326 TEST(F32_IGEMM_4X4__SCALAR, strided_cm) {
8327 GemmMicrokernelTester()
8328 .mr(4)
8329 .nr(4)
8330 .kr(1)
8331 .sr(1)
8332 .m(4)
8333 .n(4)
8334 .k(1)
8335 .cm_stride(7)
8336 .Test(xnn_f32_igemm_ukernel_4x4__scalar);
8337 }
8338