1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/f32-igemm-relu.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25
26
27 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,k_eq_4)28 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_eq_4) {
29 GemmMicrokernelTester()
30 .mr(1)
31 .nr(8)
32 .kr(1)
33 .sr(1)
34 .m(1)
35 .n(8)
36 .k(4)
37 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
38 }
39
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,strided_cn)40 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, strided_cn) {
41 GemmMicrokernelTester()
42 .mr(1)
43 .nr(8)
44 .kr(1)
45 .sr(1)
46 .m(1)
47 .n(8)
48 .k(4)
49 .cn_stride(11)
50 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
51 }
52
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,k_eq_4_subtile)53 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
54 for (uint32_t n = 1; n <= 8; n++) {
55 for (uint32_t m = 1; m <= 1; m++) {
56 GemmMicrokernelTester()
57 .mr(1)
58 .nr(8)
59 .kr(1)
60 .sr(1)
61 .m(m)
62 .n(n)
63 .k(4)
64 .iterations(1)
65 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
66 }
67 }
68 }
69
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,k_eq_4_subtile_m)70 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
71 for (uint32_t m = 1; m <= 1; m++) {
72 GemmMicrokernelTester()
73 .mr(1)
74 .nr(8)
75 .kr(1)
76 .sr(1)
77 .m(m)
78 .n(8)
79 .k(4)
80 .iterations(1)
81 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
82 }
83 }
84
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,k_eq_4_subtile_n)85 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
86 for (uint32_t n = 1; n <= 8; n++) {
87 GemmMicrokernelTester()
88 .mr(1)
89 .nr(8)
90 .kr(1)
91 .sr(1)
92 .m(1)
93 .n(n)
94 .k(4)
95 .iterations(1)
96 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
97 }
98 }
99
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,k_lt_4)100 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_lt_4) {
101 for (size_t k = 1; k < 4; k++) {
102 GemmMicrokernelTester()
103 .mr(1)
104 .nr(8)
105 .kr(1)
106 .sr(1)
107 .m(1)
108 .n(8)
109 .k(k)
110 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
111 }
112 }
113
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,k_lt_4_subtile)114 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
115 for (size_t k = 1; k < 4; k++) {
116 for (uint32_t n = 1; n <= 8; n++) {
117 for (uint32_t m = 1; m <= 1; m++) {
118 GemmMicrokernelTester()
119 .mr(1)
120 .nr(8)
121 .kr(1)
122 .sr(1)
123 .m(m)
124 .n(n)
125 .k(k)
126 .iterations(1)
127 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
128 }
129 }
130 }
131 }
132
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,k_gt_4)133 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_gt_4) {
134 for (size_t k = 5; k < 8; k++) {
135 GemmMicrokernelTester()
136 .mr(1)
137 .nr(8)
138 .kr(1)
139 .sr(1)
140 .m(1)
141 .n(8)
142 .k(k)
143 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
144 }
145 }
146
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,k_gt_4_subtile)147 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
148 for (size_t k = 5; k < 8; k++) {
149 for (uint32_t n = 1; n <= 8; n++) {
150 for (uint32_t m = 1; m <= 1; m++) {
151 GemmMicrokernelTester()
152 .mr(1)
153 .nr(8)
154 .kr(1)
155 .sr(1)
156 .m(m)
157 .n(n)
158 .k(k)
159 .iterations(1)
160 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
161 }
162 }
163 }
164 }
165
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,k_div_4)166 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_div_4) {
167 for (size_t k = 8; k <= 40; k += 4) {
168 GemmMicrokernelTester()
169 .mr(1)
170 .nr(8)
171 .kr(1)
172 .sr(1)
173 .m(1)
174 .n(8)
175 .k(k)
176 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
177 }
178 }
179
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,k_div_4_subtile)180 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_div_4_subtile) {
181 for (size_t k = 8; k <= 40; k += 4) {
182 for (uint32_t n = 1; n <= 8; n++) {
183 for (uint32_t m = 1; m <= 1; m++) {
184 GemmMicrokernelTester()
185 .mr(1)
186 .nr(8)
187 .kr(1)
188 .sr(1)
189 .m(m)
190 .n(n)
191 .k(k)
192 .iterations(1)
193 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
194 }
195 }
196 }
197 }
198
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,n_gt_8)199 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, n_gt_8) {
200 for (uint32_t n = 9; n < 16; n++) {
201 for (size_t k = 1; k <= 20; k += 5) {
202 GemmMicrokernelTester()
203 .mr(1)
204 .nr(8)
205 .kr(1)
206 .sr(1)
207 .m(1)
208 .n(n)
209 .k(k)
210 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
211 }
212 }
213 }
214
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,n_gt_8_strided_cn)215 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
216 for (uint32_t n = 9; n < 16; n++) {
217 for (size_t k = 1; k <= 20; k += 5) {
218 GemmMicrokernelTester()
219 .mr(1)
220 .nr(8)
221 .kr(1)
222 .sr(1)
223 .m(1)
224 .n(n)
225 .k(k)
226 .cn_stride(11)
227 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
228 }
229 }
230 }
231
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,n_gt_8_subtile)232 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
233 for (uint32_t n = 9; n < 16; n++) {
234 for (size_t k = 1; k <= 20; k += 5) {
235 for (uint32_t m = 1; m <= 1; m++) {
236 GemmMicrokernelTester()
237 .mr(1)
238 .nr(8)
239 .kr(1)
240 .sr(1)
241 .m(m)
242 .n(n)
243 .k(k)
244 .iterations(1)
245 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
246 }
247 }
248 }
249 }
250
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,n_div_8)251 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, n_div_8) {
252 for (uint32_t n = 16; n <= 24; n += 8) {
253 for (size_t k = 1; k <= 20; k += 5) {
254 GemmMicrokernelTester()
255 .mr(1)
256 .nr(8)
257 .kr(1)
258 .sr(1)
259 .m(1)
260 .n(n)
261 .k(k)
262 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
263 }
264 }
265 }
266
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,n_div_8_strided_cn)267 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
268 for (uint32_t n = 16; n <= 24; n += 8) {
269 for (size_t k = 1; k <= 20; k += 5) {
270 GemmMicrokernelTester()
271 .mr(1)
272 .nr(8)
273 .kr(1)
274 .sr(1)
275 .m(1)
276 .n(n)
277 .k(k)
278 .cn_stride(11)
279 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
280 }
281 }
282 }
283
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,n_div_8_subtile)284 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, n_div_8_subtile) {
285 for (uint32_t n = 16; n <= 24; n += 8) {
286 for (size_t k = 1; k <= 20; k += 5) {
287 for (uint32_t m = 1; m <= 1; m++) {
288 GemmMicrokernelTester()
289 .mr(1)
290 .nr(8)
291 .kr(1)
292 .sr(1)
293 .m(m)
294 .n(n)
295 .k(k)
296 .iterations(1)
297 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
298 }
299 }
300 }
301 }
302
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,small_kernel)303 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, small_kernel) {
304 for (size_t k = 1; k <= 20; k += 5) {
305 GemmMicrokernelTester()
306 .mr(1)
307 .nr(8)
308 .kr(1)
309 .sr(1)
310 .m(1)
311 .n(8)
312 .k(k)
313 .ks(3)
314 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
315 }
316 }
317
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,small_kernel_subtile)318 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, small_kernel_subtile) {
319 for (size_t k = 1; k <= 20; k += 5) {
320 for (uint32_t n = 1; n <= 8; n++) {
321 for (uint32_t m = 1; m <= 1; m++) {
322 GemmMicrokernelTester()
323 .mr(1)
324 .nr(8)
325 .kr(1)
326 .sr(1)
327 .m(m)
328 .n(n)
329 .k(k)
330 .ks(3)
331 .iterations(1)
332 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
333 }
334 }
335 }
336 }
337
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,n_gt_8_small_kernel)338 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, n_gt_8_small_kernel) {
339 for (uint32_t n = 9; n < 16; n++) {
340 for (size_t k = 1; k <= 20; k += 5) {
341 GemmMicrokernelTester()
342 .mr(1)
343 .nr(8)
344 .kr(1)
345 .sr(1)
346 .m(1)
347 .n(n)
348 .k(k)
349 .ks(3)
350 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
351 }
352 }
353 }
354
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,n_div_8_small_kernel)355 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, n_div_8_small_kernel) {
356 for (uint32_t n = 16; n <= 24; n += 8) {
357 for (size_t k = 1; k <= 20; k += 5) {
358 GemmMicrokernelTester()
359 .mr(1)
360 .nr(8)
361 .kr(1)
362 .sr(1)
363 .m(1)
364 .n(n)
365 .k(k)
366 .ks(3)
367 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
368 }
369 }
370 }
371
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,strided_cm_subtile)372 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, strided_cm_subtile) {
373 for (size_t k = 1; k <= 20; k += 5) {
374 for (uint32_t n = 1; n <= 8; n++) {
375 for (uint32_t m = 1; m <= 1; m++) {
376 GemmMicrokernelTester()
377 .mr(1)
378 .nr(8)
379 .kr(1)
380 .sr(1)
381 .m(m)
382 .n(n)
383 .k(k)
384 .cm_stride(11)
385 .iterations(1)
386 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
387 }
388 }
389 }
390 }
391
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,a_offset)392 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, a_offset) {
393 for (size_t k = 1; k <= 20; k += 5) {
394 GemmMicrokernelTester()
395 .mr(1)
396 .nr(8)
397 .kr(1)
398 .sr(1)
399 .m(1)
400 .n(8)
401 .k(k)
402 .ks(3)
403 .a_offset(23)
404 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
405 }
406 }
407
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,zero)408 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, zero) {
409 for (size_t k = 1; k <= 20; k += 5) {
410 for (uint32_t mz = 0; mz < 1; mz++) {
411 GemmMicrokernelTester()
412 .mr(1)
413 .nr(8)
414 .kr(1)
415 .sr(1)
416 .m(1)
417 .n(8)
418 .k(k)
419 .ks(3)
420 .a_offset(23)
421 .zero_index(mz)
422 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
423 }
424 }
425 }
426
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT,strided_cm)427 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, strided_cm) {
428 GemmMicrokernelTester()
429 .mr(1)
430 .nr(8)
431 .kr(1)
432 .sr(1)
433 .m(1)
434 .n(8)
435 .k(4)
436 .cm_stride(11)
437 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
438 }
439 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
440
441
442 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,k_eq_4)443 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, k_eq_4) {
444 GemmMicrokernelTester()
445 .mr(1)
446 .nr(8)
447 .kr(1)
448 .sr(4)
449 .m(1)
450 .n(8)
451 .k(4)
452 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
453 }
454
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,strided_cn)455 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, strided_cn) {
456 GemmMicrokernelTester()
457 .mr(1)
458 .nr(8)
459 .kr(1)
460 .sr(4)
461 .m(1)
462 .n(8)
463 .k(4)
464 .cn_stride(11)
465 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
466 }
467
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,k_eq_4_subtile)468 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, k_eq_4_subtile) {
469 for (uint32_t n = 1; n <= 8; n++) {
470 for (uint32_t m = 1; m <= 1; m++) {
471 GemmMicrokernelTester()
472 .mr(1)
473 .nr(8)
474 .kr(1)
475 .sr(4)
476 .m(m)
477 .n(n)
478 .k(4)
479 .iterations(1)
480 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
481 }
482 }
483 }
484
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,k_eq_4_subtile_m)485 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, k_eq_4_subtile_m) {
486 for (uint32_t m = 1; m <= 1; m++) {
487 GemmMicrokernelTester()
488 .mr(1)
489 .nr(8)
490 .kr(1)
491 .sr(4)
492 .m(m)
493 .n(8)
494 .k(4)
495 .iterations(1)
496 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
497 }
498 }
499
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,k_eq_4_subtile_n)500 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, k_eq_4_subtile_n) {
501 for (uint32_t n = 1; n <= 8; n++) {
502 GemmMicrokernelTester()
503 .mr(1)
504 .nr(8)
505 .kr(1)
506 .sr(4)
507 .m(1)
508 .n(n)
509 .k(4)
510 .iterations(1)
511 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
512 }
513 }
514
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,k_lt_4)515 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, k_lt_4) {
516 for (size_t k = 1; k < 4; k++) {
517 GemmMicrokernelTester()
518 .mr(1)
519 .nr(8)
520 .kr(1)
521 .sr(4)
522 .m(1)
523 .n(8)
524 .k(k)
525 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
526 }
527 }
528
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,k_lt_4_subtile)529 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, k_lt_4_subtile) {
530 for (size_t k = 1; k < 4; k++) {
531 for (uint32_t n = 1; n <= 8; n++) {
532 for (uint32_t m = 1; m <= 1; m++) {
533 GemmMicrokernelTester()
534 .mr(1)
535 .nr(8)
536 .kr(1)
537 .sr(4)
538 .m(m)
539 .n(n)
540 .k(k)
541 .iterations(1)
542 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
543 }
544 }
545 }
546 }
547
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,k_gt_4)548 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, k_gt_4) {
549 for (size_t k = 5; k < 8; k++) {
550 GemmMicrokernelTester()
551 .mr(1)
552 .nr(8)
553 .kr(1)
554 .sr(4)
555 .m(1)
556 .n(8)
557 .k(k)
558 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
559 }
560 }
561
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,k_gt_4_subtile)562 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, k_gt_4_subtile) {
563 for (size_t k = 5; k < 8; k++) {
564 for (uint32_t n = 1; n <= 8; n++) {
565 for (uint32_t m = 1; m <= 1; m++) {
566 GemmMicrokernelTester()
567 .mr(1)
568 .nr(8)
569 .kr(1)
570 .sr(4)
571 .m(m)
572 .n(n)
573 .k(k)
574 .iterations(1)
575 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
576 }
577 }
578 }
579 }
580
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,k_div_4)581 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, k_div_4) {
582 for (size_t k = 8; k <= 40; k += 4) {
583 GemmMicrokernelTester()
584 .mr(1)
585 .nr(8)
586 .kr(1)
587 .sr(4)
588 .m(1)
589 .n(8)
590 .k(k)
591 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
592 }
593 }
594
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,k_div_4_subtile)595 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, k_div_4_subtile) {
596 for (size_t k = 8; k <= 40; k += 4) {
597 for (uint32_t n = 1; n <= 8; n++) {
598 for (uint32_t m = 1; m <= 1; m++) {
599 GemmMicrokernelTester()
600 .mr(1)
601 .nr(8)
602 .kr(1)
603 .sr(4)
604 .m(m)
605 .n(n)
606 .k(k)
607 .iterations(1)
608 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
609 }
610 }
611 }
612 }
613
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,n_gt_8)614 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, n_gt_8) {
615 for (uint32_t n = 9; n < 16; n++) {
616 for (size_t k = 1; k <= 20; k += 5) {
617 GemmMicrokernelTester()
618 .mr(1)
619 .nr(8)
620 .kr(1)
621 .sr(4)
622 .m(1)
623 .n(n)
624 .k(k)
625 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
626 }
627 }
628 }
629
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,n_gt_8_strided_cn)630 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, n_gt_8_strided_cn) {
631 for (uint32_t n = 9; n < 16; n++) {
632 for (size_t k = 1; k <= 20; k += 5) {
633 GemmMicrokernelTester()
634 .mr(1)
635 .nr(8)
636 .kr(1)
637 .sr(4)
638 .m(1)
639 .n(n)
640 .k(k)
641 .cn_stride(11)
642 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
643 }
644 }
645 }
646
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,n_gt_8_subtile)647 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, n_gt_8_subtile) {
648 for (uint32_t n = 9; n < 16; n++) {
649 for (size_t k = 1; k <= 20; k += 5) {
650 for (uint32_t m = 1; m <= 1; m++) {
651 GemmMicrokernelTester()
652 .mr(1)
653 .nr(8)
654 .kr(1)
655 .sr(4)
656 .m(m)
657 .n(n)
658 .k(k)
659 .iterations(1)
660 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
661 }
662 }
663 }
664 }
665
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,n_div_8)666 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, n_div_8) {
667 for (uint32_t n = 16; n <= 24; n += 8) {
668 for (size_t k = 1; k <= 20; k += 5) {
669 GemmMicrokernelTester()
670 .mr(1)
671 .nr(8)
672 .kr(1)
673 .sr(4)
674 .m(1)
675 .n(n)
676 .k(k)
677 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
678 }
679 }
680 }
681
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,n_div_8_strided_cn)682 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, n_div_8_strided_cn) {
683 for (uint32_t n = 16; n <= 24; n += 8) {
684 for (size_t k = 1; k <= 20; k += 5) {
685 GemmMicrokernelTester()
686 .mr(1)
687 .nr(8)
688 .kr(1)
689 .sr(4)
690 .m(1)
691 .n(n)
692 .k(k)
693 .cn_stride(11)
694 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
695 }
696 }
697 }
698
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,n_div_8_subtile)699 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, n_div_8_subtile) {
700 for (uint32_t n = 16; n <= 24; n += 8) {
701 for (size_t k = 1; k <= 20; k += 5) {
702 for (uint32_t m = 1; m <= 1; m++) {
703 GemmMicrokernelTester()
704 .mr(1)
705 .nr(8)
706 .kr(1)
707 .sr(4)
708 .m(m)
709 .n(n)
710 .k(k)
711 .iterations(1)
712 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
713 }
714 }
715 }
716 }
717
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,small_kernel)718 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, small_kernel) {
719 for (size_t k = 1; k <= 20; k += 5) {
720 GemmMicrokernelTester()
721 .mr(1)
722 .nr(8)
723 .kr(1)
724 .sr(4)
725 .m(1)
726 .n(8)
727 .k(k)
728 .ks(3)
729 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
730 }
731 }
732
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,small_kernel_subtile)733 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, small_kernel_subtile) {
734 for (size_t k = 1; k <= 20; k += 5) {
735 for (uint32_t n = 1; n <= 8; n++) {
736 for (uint32_t m = 1; m <= 1; m++) {
737 GemmMicrokernelTester()
738 .mr(1)
739 .nr(8)
740 .kr(1)
741 .sr(4)
742 .m(m)
743 .n(n)
744 .k(k)
745 .ks(3)
746 .iterations(1)
747 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
748 }
749 }
750 }
751 }
752
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,n_gt_8_small_kernel)753 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, n_gt_8_small_kernel) {
754 for (uint32_t n = 9; n < 16; n++) {
755 for (size_t k = 1; k <= 20; k += 5) {
756 GemmMicrokernelTester()
757 .mr(1)
758 .nr(8)
759 .kr(1)
760 .sr(4)
761 .m(1)
762 .n(n)
763 .k(k)
764 .ks(3)
765 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
766 }
767 }
768 }
769
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,n_div_8_small_kernel)770 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, n_div_8_small_kernel) {
771 for (uint32_t n = 16; n <= 24; n += 8) {
772 for (size_t k = 1; k <= 20; k += 5) {
773 GemmMicrokernelTester()
774 .mr(1)
775 .nr(8)
776 .kr(1)
777 .sr(4)
778 .m(1)
779 .n(n)
780 .k(k)
781 .ks(3)
782 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
783 }
784 }
785 }
786
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,strided_cm_subtile)787 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, strided_cm_subtile) {
788 for (size_t k = 1; k <= 20; k += 5) {
789 for (uint32_t n = 1; n <= 8; n++) {
790 for (uint32_t m = 1; m <= 1; m++) {
791 GemmMicrokernelTester()
792 .mr(1)
793 .nr(8)
794 .kr(1)
795 .sr(4)
796 .m(m)
797 .n(n)
798 .k(k)
799 .cm_stride(11)
800 .iterations(1)
801 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
802 }
803 }
804 }
805 }
806
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,a_offset)807 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, a_offset) {
808 for (size_t k = 1; k <= 20; k += 5) {
809 GemmMicrokernelTester()
810 .mr(1)
811 .nr(8)
812 .kr(1)
813 .sr(4)
814 .m(1)
815 .n(8)
816 .k(k)
817 .ks(3)
818 .a_offset(23)
819 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
820 }
821 }
822
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,zero)823 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, zero) {
824 for (size_t k = 1; k <= 20; k += 5) {
825 for (uint32_t mz = 0; mz < 1; mz++) {
826 GemmMicrokernelTester()
827 .mr(1)
828 .nr(8)
829 .kr(1)
830 .sr(4)
831 .m(1)
832 .n(8)
833 .k(k)
834 .ks(3)
835 .a_offset(23)
836 .zero_index(mz)
837 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
838 }
839 }
840 }
841
TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD,strided_cm)842 TEST(F32_IGEMM_RELU_1X8S4__WASMSIMD, strided_cm) {
843 GemmMicrokernelTester()
844 .mr(1)
845 .nr(8)
846 .kr(1)
847 .sr(4)
848 .m(1)
849 .n(8)
850 .k(4)
851 .cm_stride(11)
852 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmsimd);
853 }
854 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
855
856
857 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT,k_eq_1)858 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT, k_eq_1) {
859 GemmMicrokernelTester()
860 .mr(3)
861 .nr(8)
862 .kr(1)
863 .sr(1)
864 .m(3)
865 .n(8)
866 .k(1)
867 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_loadsplat);
868 }
869
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT,strided_cn)870 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT, strided_cn) {
871 GemmMicrokernelTester()
872 .mr(3)
873 .nr(8)
874 .kr(1)
875 .sr(1)
876 .m(3)
877 .n(8)
878 .k(1)
879 .cn_stride(11)
880 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_loadsplat);
881 }
882
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile)883 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile) {
884 for (uint32_t n = 1; n <= 8; n++) {
885 for (uint32_t m = 1; m <= 3; m++) {
886 GemmMicrokernelTester()
887 .mr(3)
888 .nr(8)
889 .kr(1)
890 .sr(1)
891 .m(m)
892 .n(n)
893 .k(1)
894 .iterations(1)
895 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_loadsplat);
896 }
897 }
898 }
899
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_m)900 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_m) {
901 for (uint32_t m = 1; m <= 3; m++) {
902 GemmMicrokernelTester()
903 .mr(3)
904 .nr(8)
905 .kr(1)
906 .sr(1)
907 .m(m)
908 .n(8)
909 .k(1)
910 .iterations(1)
911 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_loadsplat);
912 }
913 }
914
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_n)915 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_n) {
916 for (uint32_t n = 1; n <= 8; n++) {
917 GemmMicrokernelTester()
918 .mr(3)
919 .nr(8)
920 .kr(1)
921 .sr(1)
922 .m(3)
923 .n(n)
924 .k(1)
925 .iterations(1)
926 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_loadsplat);
927 }
928 }
929
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT,k_gt_1)930 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT, k_gt_1) {
931 for (size_t k = 2; k < 10; k++) {
932 GemmMicrokernelTester()
933 .mr(3)
934 .nr(8)
935 .kr(1)
936 .sr(1)
937 .m(3)
938 .n(8)
939 .k(k)
940 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_loadsplat);
941 }
942 }
943
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT,k_gt_1_subtile)944 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT, k_gt_1_subtile) {
945 for (size_t k = 2; k < 10; k++) {
946 for (uint32_t n = 1; n <= 8; n++) {
947 for (uint32_t m = 1; m <= 3; m++) {
948 GemmMicrokernelTester()
949 .mr(3)
950 .nr(8)
951 .kr(1)
952 .sr(1)
953 .m(m)
954 .n(n)
955 .k(k)
956 .iterations(1)
957 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_loadsplat);
958 }
959 }
960 }
961 }
962
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT,n_gt_8)963 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT, n_gt_8) {
964 for (uint32_t n = 9; n < 16; n++) {
965 for (size_t k = 1; k <= 5; k += 2) {
966 GemmMicrokernelTester()
967 .mr(3)
968 .nr(8)
969 .kr(1)
970 .sr(1)
971 .m(3)
972 .n(n)
973 .k(k)
974 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_loadsplat);
975 }
976 }
977 }
978
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_cn)979 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_cn) {
980 for (uint32_t n = 9; n < 16; n++) {
981 for (size_t k = 1; k <= 5; k += 2) {
982 GemmMicrokernelTester()
983 .mr(3)
984 .nr(8)
985 .kr(1)
986 .sr(1)
987 .m(3)
988 .n(n)
989 .k(k)
990 .cn_stride(11)
991 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_loadsplat);
992 }
993 }
994 }
995
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT,n_gt_8_subtile)996 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT, n_gt_8_subtile) {
997 for (uint32_t n = 9; n < 16; n++) {
998 for (size_t k = 1; k <= 5; k += 2) {
999 for (uint32_t m = 1; m <= 3; m++) {
1000 GemmMicrokernelTester()
1001 .mr(3)
1002 .nr(8)
1003 .kr(1)
1004 .sr(1)
1005 .m(m)
1006 .n(n)
1007 .k(k)
1008 .iterations(1)
1009 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_loadsplat);
1010 }
1011 }
1012 }
1013 }
1014
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT,n_div_8)1015 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT, n_div_8) {
1016 for (uint32_t n = 16; n <= 24; n += 8) {
1017 for (size_t k = 1; k <= 5; k += 2) {
1018 GemmMicrokernelTester()
1019 .mr(3)
1020 .nr(8)
1021 .kr(1)
1022 .sr(1)
1023 .m(3)
1024 .n(n)
1025 .k(k)
1026 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_loadsplat);
1027 }
1028 }
1029 }
1030
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT,n_div_8_strided_cn)1031 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT, n_div_8_strided_cn) {
1032 for (uint32_t n = 16; n <= 24; n += 8) {
1033 for (size_t k = 1; k <= 5; k += 2) {
1034 GemmMicrokernelTester()
1035 .mr(3)
1036 .nr(8)
1037 .kr(1)
1038 .sr(1)
1039 .m(3)
1040 .n(n)
1041 .k(k)
1042 .cn_stride(11)
1043 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_loadsplat);
1044 }
1045 }
1046 }
1047
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT,n_div_8_subtile)1048 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT, n_div_8_subtile) {
1049 for (uint32_t n = 16; n <= 24; n += 8) {
1050 for (size_t k = 1; k <= 5; k += 2) {
1051 for (uint32_t m = 1; m <= 3; m++) {
1052 GemmMicrokernelTester()
1053 .mr(3)
1054 .nr(8)
1055 .kr(1)
1056 .sr(1)
1057 .m(m)
1058 .n(n)
1059 .k(k)
1060 .iterations(1)
1061 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_loadsplat);
1062 }
1063 }
1064 }
1065 }
1066
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT,small_kernel)1067 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT, small_kernel) {
1068 for (size_t k = 1; k <= 5; k += 2) {
1069 GemmMicrokernelTester()
1070 .mr(3)
1071 .nr(8)
1072 .kr(1)
1073 .sr(1)
1074 .m(3)
1075 .n(8)
1076 .k(k)
1077 .ks(3)
1078 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_loadsplat);
1079 }
1080 }
1081
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT,small_kernel_subtile)1082 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT, small_kernel_subtile) {
1083 for (size_t k = 1; k <= 5; k += 2) {
1084 for (uint32_t n = 1; n <= 8; n++) {
1085 for (uint32_t m = 1; m <= 3; m++) {
1086 GemmMicrokernelTester()
1087 .mr(3)
1088 .nr(8)
1089 .kr(1)
1090 .sr(1)
1091 .m(m)
1092 .n(n)
1093 .k(k)
1094 .ks(3)
1095 .iterations(1)
1096 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_loadsplat);
1097 }
1098 }
1099 }
1100 }
1101
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT,n_gt_8_small_kernel)1102 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT, n_gt_8_small_kernel) {
1103 for (uint32_t n = 9; n < 16; n++) {
1104 for (size_t k = 1; k <= 5; k += 2) {
1105 GemmMicrokernelTester()
1106 .mr(3)
1107 .nr(8)
1108 .kr(1)
1109 .sr(1)
1110 .m(3)
1111 .n(n)
1112 .k(k)
1113 .ks(3)
1114 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_loadsplat);
1115 }
1116 }
1117 }
1118
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT,n_div_8_small_kernel)1119 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT, n_div_8_small_kernel) {
1120 for (uint32_t n = 16; n <= 24; n += 8) {
1121 for (size_t k = 1; k <= 5; k += 2) {
1122 GemmMicrokernelTester()
1123 .mr(3)
1124 .nr(8)
1125 .kr(1)
1126 .sr(1)
1127 .m(3)
1128 .n(n)
1129 .k(k)
1130 .ks(3)
1131 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_loadsplat);
1132 }
1133 }
1134 }
1135
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT,strided_cm_subtile)1136 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT, strided_cm_subtile) {
1137 for (size_t k = 1; k <= 5; k += 2) {
1138 for (uint32_t n = 1; n <= 8; n++) {
1139 for (uint32_t m = 1; m <= 3; m++) {
1140 GemmMicrokernelTester()
1141 .mr(3)
1142 .nr(8)
1143 .kr(1)
1144 .sr(1)
1145 .m(m)
1146 .n(n)
1147 .k(k)
1148 .cm_stride(11)
1149 .iterations(1)
1150 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_loadsplat);
1151 }
1152 }
1153 }
1154 }
1155
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT,a_offset)1156 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT, a_offset) {
1157 for (size_t k = 1; k <= 5; k += 2) {
1158 GemmMicrokernelTester()
1159 .mr(3)
1160 .nr(8)
1161 .kr(1)
1162 .sr(1)
1163 .m(3)
1164 .n(8)
1165 .k(k)
1166 .ks(3)
1167 .a_offset(17)
1168 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_loadsplat);
1169 }
1170 }
1171
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT,zero)1172 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT, zero) {
1173 for (size_t k = 1; k <= 5; k += 2) {
1174 for (uint32_t mz = 0; mz < 3; mz++) {
1175 GemmMicrokernelTester()
1176 .mr(3)
1177 .nr(8)
1178 .kr(1)
1179 .sr(1)
1180 .m(3)
1181 .n(8)
1182 .k(k)
1183 .ks(3)
1184 .a_offset(17)
1185 .zero_index(mz)
1186 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_loadsplat);
1187 }
1188 }
1189 }
1190
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT,strided_cm)1191 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_LOADSPLAT, strided_cm) {
1192 GemmMicrokernelTester()
1193 .mr(3)
1194 .nr(8)
1195 .kr(1)
1196 .sr(1)
1197 .m(3)
1198 .n(8)
1199 .k(1)
1200 .cm_stride(11)
1201 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_loadsplat);
1202 }
1203 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1204
1205
1206 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT,k_eq_1)1207 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT, k_eq_1) {
1208 GemmMicrokernelTester()
1209 .mr(4)
1210 .nr(8)
1211 .kr(1)
1212 .sr(1)
1213 .m(4)
1214 .n(8)
1215 .k(1)
1216 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1217 }
1218
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT,strided_cn)1219 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT, strided_cn) {
1220 GemmMicrokernelTester()
1221 .mr(4)
1222 .nr(8)
1223 .kr(1)
1224 .sr(1)
1225 .m(4)
1226 .n(8)
1227 .k(1)
1228 .cn_stride(11)
1229 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1230 }
1231
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile)1232 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile) {
1233 for (uint32_t n = 1; n <= 8; n++) {
1234 for (uint32_t m = 1; m <= 4; m++) {
1235 GemmMicrokernelTester()
1236 .mr(4)
1237 .nr(8)
1238 .kr(1)
1239 .sr(1)
1240 .m(m)
1241 .n(n)
1242 .k(1)
1243 .iterations(1)
1244 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1245 }
1246 }
1247 }
1248
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_m)1249 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_m) {
1250 for (uint32_t m = 1; m <= 4; m++) {
1251 GemmMicrokernelTester()
1252 .mr(4)
1253 .nr(8)
1254 .kr(1)
1255 .sr(1)
1256 .m(m)
1257 .n(8)
1258 .k(1)
1259 .iterations(1)
1260 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1261 }
1262 }
1263
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_n)1264 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_n) {
1265 for (uint32_t n = 1; n <= 8; n++) {
1266 GemmMicrokernelTester()
1267 .mr(4)
1268 .nr(8)
1269 .kr(1)
1270 .sr(1)
1271 .m(4)
1272 .n(n)
1273 .k(1)
1274 .iterations(1)
1275 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1276 }
1277 }
1278
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT,k_gt_1)1279 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT, k_gt_1) {
1280 for (size_t k = 2; k < 10; k++) {
1281 GemmMicrokernelTester()
1282 .mr(4)
1283 .nr(8)
1284 .kr(1)
1285 .sr(1)
1286 .m(4)
1287 .n(8)
1288 .k(k)
1289 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1290 }
1291 }
1292
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT,k_gt_1_subtile)1293 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT, k_gt_1_subtile) {
1294 for (size_t k = 2; k < 10; k++) {
1295 for (uint32_t n = 1; n <= 8; n++) {
1296 for (uint32_t m = 1; m <= 4; m++) {
1297 GemmMicrokernelTester()
1298 .mr(4)
1299 .nr(8)
1300 .kr(1)
1301 .sr(1)
1302 .m(m)
1303 .n(n)
1304 .k(k)
1305 .iterations(1)
1306 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1307 }
1308 }
1309 }
1310 }
1311
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT,n_gt_8)1312 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT, n_gt_8) {
1313 for (uint32_t n = 9; n < 16; n++) {
1314 for (size_t k = 1; k <= 5; k += 2) {
1315 GemmMicrokernelTester()
1316 .mr(4)
1317 .nr(8)
1318 .kr(1)
1319 .sr(1)
1320 .m(4)
1321 .n(n)
1322 .k(k)
1323 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1324 }
1325 }
1326 }
1327
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_cn)1328 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_cn) {
1329 for (uint32_t n = 9; n < 16; n++) {
1330 for (size_t k = 1; k <= 5; k += 2) {
1331 GemmMicrokernelTester()
1332 .mr(4)
1333 .nr(8)
1334 .kr(1)
1335 .sr(1)
1336 .m(4)
1337 .n(n)
1338 .k(k)
1339 .cn_stride(11)
1340 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1341 }
1342 }
1343 }
1344
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT,n_gt_8_subtile)1345 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT, n_gt_8_subtile) {
1346 for (uint32_t n = 9; n < 16; n++) {
1347 for (size_t k = 1; k <= 5; k += 2) {
1348 for (uint32_t m = 1; m <= 4; m++) {
1349 GemmMicrokernelTester()
1350 .mr(4)
1351 .nr(8)
1352 .kr(1)
1353 .sr(1)
1354 .m(m)
1355 .n(n)
1356 .k(k)
1357 .iterations(1)
1358 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1359 }
1360 }
1361 }
1362 }
1363
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT,n_div_8)1364 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT, n_div_8) {
1365 for (uint32_t n = 16; n <= 24; n += 8) {
1366 for (size_t k = 1; k <= 5; k += 2) {
1367 GemmMicrokernelTester()
1368 .mr(4)
1369 .nr(8)
1370 .kr(1)
1371 .sr(1)
1372 .m(4)
1373 .n(n)
1374 .k(k)
1375 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1376 }
1377 }
1378 }
1379
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT,n_div_8_strided_cn)1380 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT, n_div_8_strided_cn) {
1381 for (uint32_t n = 16; n <= 24; n += 8) {
1382 for (size_t k = 1; k <= 5; k += 2) {
1383 GemmMicrokernelTester()
1384 .mr(4)
1385 .nr(8)
1386 .kr(1)
1387 .sr(1)
1388 .m(4)
1389 .n(n)
1390 .k(k)
1391 .cn_stride(11)
1392 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1393 }
1394 }
1395 }
1396
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT,n_div_8_subtile)1397 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT, n_div_8_subtile) {
1398 for (uint32_t n = 16; n <= 24; n += 8) {
1399 for (size_t k = 1; k <= 5; k += 2) {
1400 for (uint32_t m = 1; m <= 4; m++) {
1401 GemmMicrokernelTester()
1402 .mr(4)
1403 .nr(8)
1404 .kr(1)
1405 .sr(1)
1406 .m(m)
1407 .n(n)
1408 .k(k)
1409 .iterations(1)
1410 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1411 }
1412 }
1413 }
1414 }
1415
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT,small_kernel)1416 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT, small_kernel) {
1417 for (size_t k = 1; k <= 5; k += 2) {
1418 GemmMicrokernelTester()
1419 .mr(4)
1420 .nr(8)
1421 .kr(1)
1422 .sr(1)
1423 .m(4)
1424 .n(8)
1425 .k(k)
1426 .ks(3)
1427 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1428 }
1429 }
1430
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT,small_kernel_subtile)1431 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT, small_kernel_subtile) {
1432 for (size_t k = 1; k <= 5; k += 2) {
1433 for (uint32_t n = 1; n <= 8; n++) {
1434 for (uint32_t m = 1; m <= 4; m++) {
1435 GemmMicrokernelTester()
1436 .mr(4)
1437 .nr(8)
1438 .kr(1)
1439 .sr(1)
1440 .m(m)
1441 .n(n)
1442 .k(k)
1443 .ks(3)
1444 .iterations(1)
1445 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1446 }
1447 }
1448 }
1449 }
1450
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT,n_gt_8_small_kernel)1451 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT, n_gt_8_small_kernel) {
1452 for (uint32_t n = 9; n < 16; n++) {
1453 for (size_t k = 1; k <= 5; k += 2) {
1454 GemmMicrokernelTester()
1455 .mr(4)
1456 .nr(8)
1457 .kr(1)
1458 .sr(1)
1459 .m(4)
1460 .n(n)
1461 .k(k)
1462 .ks(3)
1463 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1464 }
1465 }
1466 }
1467
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT,n_div_8_small_kernel)1468 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT, n_div_8_small_kernel) {
1469 for (uint32_t n = 16; n <= 24; n += 8) {
1470 for (size_t k = 1; k <= 5; k += 2) {
1471 GemmMicrokernelTester()
1472 .mr(4)
1473 .nr(8)
1474 .kr(1)
1475 .sr(1)
1476 .m(4)
1477 .n(n)
1478 .k(k)
1479 .ks(3)
1480 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1481 }
1482 }
1483 }
1484
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT,strided_cm_subtile)1485 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT, strided_cm_subtile) {
1486 for (size_t k = 1; k <= 5; k += 2) {
1487 for (uint32_t n = 1; n <= 8; n++) {
1488 for (uint32_t m = 1; m <= 4; m++) {
1489 GemmMicrokernelTester()
1490 .mr(4)
1491 .nr(8)
1492 .kr(1)
1493 .sr(1)
1494 .m(m)
1495 .n(n)
1496 .k(k)
1497 .cm_stride(11)
1498 .iterations(1)
1499 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1500 }
1501 }
1502 }
1503 }
1504
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT,a_offset)1505 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT, a_offset) {
1506 for (size_t k = 1; k <= 5; k += 2) {
1507 GemmMicrokernelTester()
1508 .mr(4)
1509 .nr(8)
1510 .kr(1)
1511 .sr(1)
1512 .m(4)
1513 .n(8)
1514 .k(k)
1515 .ks(3)
1516 .a_offset(23)
1517 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1518 }
1519 }
1520
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT,zero)1521 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT, zero) {
1522 for (size_t k = 1; k <= 5; k += 2) {
1523 for (uint32_t mz = 0; mz < 4; mz++) {
1524 GemmMicrokernelTester()
1525 .mr(4)
1526 .nr(8)
1527 .kr(1)
1528 .sr(1)
1529 .m(4)
1530 .n(8)
1531 .k(k)
1532 .ks(3)
1533 .a_offset(23)
1534 .zero_index(mz)
1535 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1536 }
1537 }
1538 }
1539
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT,strided_cm)1540 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_LOADSPLAT, strided_cm) {
1541 GemmMicrokernelTester()
1542 .mr(4)
1543 .nr(8)
1544 .kr(1)
1545 .sr(1)
1546 .m(4)
1547 .n(8)
1548 .k(1)
1549 .cm_stride(11)
1550 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_loadsplat);
1551 }
1552 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1553
1554
1555 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,k_eq_4)1556 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_eq_4) {
1557 GemmMicrokernelTester()
1558 .mr(4)
1559 .nr(8)
1560 .kr(1)
1561 .sr(1)
1562 .m(4)
1563 .n(8)
1564 .k(4)
1565 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1566 }
1567
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,strided_cn)1568 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, strided_cn) {
1569 GemmMicrokernelTester()
1570 .mr(4)
1571 .nr(8)
1572 .kr(1)
1573 .sr(1)
1574 .m(4)
1575 .n(8)
1576 .k(4)
1577 .cn_stride(11)
1578 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1579 }
1580
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,k_eq_4_subtile)1581 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
1582 for (uint32_t n = 1; n <= 8; n++) {
1583 for (uint32_t m = 1; m <= 4; m++) {
1584 GemmMicrokernelTester()
1585 .mr(4)
1586 .nr(8)
1587 .kr(1)
1588 .sr(1)
1589 .m(m)
1590 .n(n)
1591 .k(4)
1592 .iterations(1)
1593 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1594 }
1595 }
1596 }
1597
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,k_eq_4_subtile_m)1598 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
1599 for (uint32_t m = 1; m <= 4; m++) {
1600 GemmMicrokernelTester()
1601 .mr(4)
1602 .nr(8)
1603 .kr(1)
1604 .sr(1)
1605 .m(m)
1606 .n(8)
1607 .k(4)
1608 .iterations(1)
1609 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1610 }
1611 }
1612
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,k_eq_4_subtile_n)1613 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
1614 for (uint32_t n = 1; n <= 8; n++) {
1615 GemmMicrokernelTester()
1616 .mr(4)
1617 .nr(8)
1618 .kr(1)
1619 .sr(1)
1620 .m(4)
1621 .n(n)
1622 .k(4)
1623 .iterations(1)
1624 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1625 }
1626 }
1627
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,k_lt_4)1628 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_lt_4) {
1629 for (size_t k = 1; k < 4; k++) {
1630 GemmMicrokernelTester()
1631 .mr(4)
1632 .nr(8)
1633 .kr(1)
1634 .sr(1)
1635 .m(4)
1636 .n(8)
1637 .k(k)
1638 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1639 }
1640 }
1641
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,k_lt_4_subtile)1642 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
1643 for (size_t k = 1; k < 4; k++) {
1644 for (uint32_t n = 1; n <= 8; n++) {
1645 for (uint32_t m = 1; m <= 4; m++) {
1646 GemmMicrokernelTester()
1647 .mr(4)
1648 .nr(8)
1649 .kr(1)
1650 .sr(1)
1651 .m(m)
1652 .n(n)
1653 .k(k)
1654 .iterations(1)
1655 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1656 }
1657 }
1658 }
1659 }
1660
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,k_gt_4)1661 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_gt_4) {
1662 for (size_t k = 5; k < 8; k++) {
1663 GemmMicrokernelTester()
1664 .mr(4)
1665 .nr(8)
1666 .kr(1)
1667 .sr(1)
1668 .m(4)
1669 .n(8)
1670 .k(k)
1671 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1672 }
1673 }
1674
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,k_gt_4_subtile)1675 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
1676 for (size_t k = 5; k < 8; k++) {
1677 for (uint32_t n = 1; n <= 8; n++) {
1678 for (uint32_t m = 1; m <= 4; m++) {
1679 GemmMicrokernelTester()
1680 .mr(4)
1681 .nr(8)
1682 .kr(1)
1683 .sr(1)
1684 .m(m)
1685 .n(n)
1686 .k(k)
1687 .iterations(1)
1688 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1689 }
1690 }
1691 }
1692 }
1693
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,k_div_4)1694 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_div_4) {
1695 for (size_t k = 8; k <= 40; k += 4) {
1696 GemmMicrokernelTester()
1697 .mr(4)
1698 .nr(8)
1699 .kr(1)
1700 .sr(1)
1701 .m(4)
1702 .n(8)
1703 .k(k)
1704 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1705 }
1706 }
1707
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,k_div_4_subtile)1708 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_div_4_subtile) {
1709 for (size_t k = 8; k <= 40; k += 4) {
1710 for (uint32_t n = 1; n <= 8; n++) {
1711 for (uint32_t m = 1; m <= 4; m++) {
1712 GemmMicrokernelTester()
1713 .mr(4)
1714 .nr(8)
1715 .kr(1)
1716 .sr(1)
1717 .m(m)
1718 .n(n)
1719 .k(k)
1720 .iterations(1)
1721 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1722 }
1723 }
1724 }
1725 }
1726
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,n_gt_8)1727 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, n_gt_8) {
1728 for (uint32_t n = 9; n < 16; n++) {
1729 for (size_t k = 1; k <= 20; k += 5) {
1730 GemmMicrokernelTester()
1731 .mr(4)
1732 .nr(8)
1733 .kr(1)
1734 .sr(1)
1735 .m(4)
1736 .n(n)
1737 .k(k)
1738 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1739 }
1740 }
1741 }
1742
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,n_gt_8_strided_cn)1743 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
1744 for (uint32_t n = 9; n < 16; n++) {
1745 for (size_t k = 1; k <= 20; k += 5) {
1746 GemmMicrokernelTester()
1747 .mr(4)
1748 .nr(8)
1749 .kr(1)
1750 .sr(1)
1751 .m(4)
1752 .n(n)
1753 .k(k)
1754 .cn_stride(11)
1755 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1756 }
1757 }
1758 }
1759
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,n_gt_8_subtile)1760 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
1761 for (uint32_t n = 9; n < 16; n++) {
1762 for (size_t k = 1; k <= 20; k += 5) {
1763 for (uint32_t m = 1; m <= 4; m++) {
1764 GemmMicrokernelTester()
1765 .mr(4)
1766 .nr(8)
1767 .kr(1)
1768 .sr(1)
1769 .m(m)
1770 .n(n)
1771 .k(k)
1772 .iterations(1)
1773 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1774 }
1775 }
1776 }
1777 }
1778
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,n_div_8)1779 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, n_div_8) {
1780 for (uint32_t n = 16; n <= 24; n += 8) {
1781 for (size_t k = 1; k <= 20; k += 5) {
1782 GemmMicrokernelTester()
1783 .mr(4)
1784 .nr(8)
1785 .kr(1)
1786 .sr(1)
1787 .m(4)
1788 .n(n)
1789 .k(k)
1790 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1791 }
1792 }
1793 }
1794
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,n_div_8_strided_cn)1795 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
1796 for (uint32_t n = 16; n <= 24; n += 8) {
1797 for (size_t k = 1; k <= 20; k += 5) {
1798 GemmMicrokernelTester()
1799 .mr(4)
1800 .nr(8)
1801 .kr(1)
1802 .sr(1)
1803 .m(4)
1804 .n(n)
1805 .k(k)
1806 .cn_stride(11)
1807 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1808 }
1809 }
1810 }
1811
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,n_div_8_subtile)1812 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, n_div_8_subtile) {
1813 for (uint32_t n = 16; n <= 24; n += 8) {
1814 for (size_t k = 1; k <= 20; k += 5) {
1815 for (uint32_t m = 1; m <= 4; m++) {
1816 GemmMicrokernelTester()
1817 .mr(4)
1818 .nr(8)
1819 .kr(1)
1820 .sr(1)
1821 .m(m)
1822 .n(n)
1823 .k(k)
1824 .iterations(1)
1825 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1826 }
1827 }
1828 }
1829 }
1830
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,small_kernel)1831 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, small_kernel) {
1832 for (size_t k = 1; k <= 20; k += 5) {
1833 GemmMicrokernelTester()
1834 .mr(4)
1835 .nr(8)
1836 .kr(1)
1837 .sr(1)
1838 .m(4)
1839 .n(8)
1840 .k(k)
1841 .ks(3)
1842 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1843 }
1844 }
1845
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,small_kernel_subtile)1846 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, small_kernel_subtile) {
1847 for (size_t k = 1; k <= 20; k += 5) {
1848 for (uint32_t n = 1; n <= 8; n++) {
1849 for (uint32_t m = 1; m <= 4; m++) {
1850 GemmMicrokernelTester()
1851 .mr(4)
1852 .nr(8)
1853 .kr(1)
1854 .sr(1)
1855 .m(m)
1856 .n(n)
1857 .k(k)
1858 .ks(3)
1859 .iterations(1)
1860 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1861 }
1862 }
1863 }
1864 }
1865
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,n_gt_8_small_kernel)1866 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, n_gt_8_small_kernel) {
1867 for (uint32_t n = 9; n < 16; n++) {
1868 for (size_t k = 1; k <= 20; k += 5) {
1869 GemmMicrokernelTester()
1870 .mr(4)
1871 .nr(8)
1872 .kr(1)
1873 .sr(1)
1874 .m(4)
1875 .n(n)
1876 .k(k)
1877 .ks(3)
1878 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1879 }
1880 }
1881 }
1882
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,n_div_8_small_kernel)1883 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, n_div_8_small_kernel) {
1884 for (uint32_t n = 16; n <= 24; n += 8) {
1885 for (size_t k = 1; k <= 20; k += 5) {
1886 GemmMicrokernelTester()
1887 .mr(4)
1888 .nr(8)
1889 .kr(1)
1890 .sr(1)
1891 .m(4)
1892 .n(n)
1893 .k(k)
1894 .ks(3)
1895 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1896 }
1897 }
1898 }
1899
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,strided_cm_subtile)1900 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, strided_cm_subtile) {
1901 for (size_t k = 1; k <= 20; k += 5) {
1902 for (uint32_t n = 1; n <= 8; n++) {
1903 for (uint32_t m = 1; m <= 4; m++) {
1904 GemmMicrokernelTester()
1905 .mr(4)
1906 .nr(8)
1907 .kr(1)
1908 .sr(1)
1909 .m(m)
1910 .n(n)
1911 .k(k)
1912 .cm_stride(11)
1913 .iterations(1)
1914 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1915 }
1916 }
1917 }
1918 }
1919
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,a_offset)1920 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, a_offset) {
1921 for (size_t k = 1; k <= 20; k += 5) {
1922 GemmMicrokernelTester()
1923 .mr(4)
1924 .nr(8)
1925 .kr(1)
1926 .sr(1)
1927 .m(4)
1928 .n(8)
1929 .k(k)
1930 .ks(3)
1931 .a_offset(83)
1932 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1933 }
1934 }
1935
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,zero)1936 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, zero) {
1937 for (size_t k = 1; k <= 20; k += 5) {
1938 for (uint32_t mz = 0; mz < 4; mz++) {
1939 GemmMicrokernelTester()
1940 .mr(4)
1941 .nr(8)
1942 .kr(1)
1943 .sr(1)
1944 .m(4)
1945 .n(8)
1946 .k(k)
1947 .ks(3)
1948 .a_offset(83)
1949 .zero_index(mz)
1950 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1951 }
1952 }
1953 }
1954
TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT,strided_cm)1955 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, strided_cm) {
1956 GemmMicrokernelTester()
1957 .mr(4)
1958 .nr(8)
1959 .kr(1)
1960 .sr(1)
1961 .m(4)
1962 .n(8)
1963 .k(4)
1964 .cm_stride(11)
1965 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
1966 }
1967 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1968
1969
1970 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT,k_eq_1)1971 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT, k_eq_1) {
1972 GemmMicrokernelTester()
1973 .mr(5)
1974 .nr(8)
1975 .kr(1)
1976 .sr(1)
1977 .m(5)
1978 .n(8)
1979 .k(1)
1980 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_loadsplat);
1981 }
1982
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT,strided_cn)1983 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT, strided_cn) {
1984 GemmMicrokernelTester()
1985 .mr(5)
1986 .nr(8)
1987 .kr(1)
1988 .sr(1)
1989 .m(5)
1990 .n(8)
1991 .k(1)
1992 .cn_stride(11)
1993 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_loadsplat);
1994 }
1995
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile)1996 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile) {
1997 for (uint32_t n = 1; n <= 8; n++) {
1998 for (uint32_t m = 1; m <= 5; m++) {
1999 GemmMicrokernelTester()
2000 .mr(5)
2001 .nr(8)
2002 .kr(1)
2003 .sr(1)
2004 .m(m)
2005 .n(n)
2006 .k(1)
2007 .iterations(1)
2008 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2009 }
2010 }
2011 }
2012
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_m)2013 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_m) {
2014 for (uint32_t m = 1; m <= 5; m++) {
2015 GemmMicrokernelTester()
2016 .mr(5)
2017 .nr(8)
2018 .kr(1)
2019 .sr(1)
2020 .m(m)
2021 .n(8)
2022 .k(1)
2023 .iterations(1)
2024 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2025 }
2026 }
2027
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_n)2028 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_n) {
2029 for (uint32_t n = 1; n <= 8; n++) {
2030 GemmMicrokernelTester()
2031 .mr(5)
2032 .nr(8)
2033 .kr(1)
2034 .sr(1)
2035 .m(5)
2036 .n(n)
2037 .k(1)
2038 .iterations(1)
2039 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2040 }
2041 }
2042
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT,k_gt_1)2043 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT, k_gt_1) {
2044 for (size_t k = 2; k < 10; k++) {
2045 GemmMicrokernelTester()
2046 .mr(5)
2047 .nr(8)
2048 .kr(1)
2049 .sr(1)
2050 .m(5)
2051 .n(8)
2052 .k(k)
2053 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2054 }
2055 }
2056
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT,k_gt_1_subtile)2057 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT, k_gt_1_subtile) {
2058 for (size_t k = 2; k < 10; k++) {
2059 for (uint32_t n = 1; n <= 8; n++) {
2060 for (uint32_t m = 1; m <= 5; m++) {
2061 GemmMicrokernelTester()
2062 .mr(5)
2063 .nr(8)
2064 .kr(1)
2065 .sr(1)
2066 .m(m)
2067 .n(n)
2068 .k(k)
2069 .iterations(1)
2070 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2071 }
2072 }
2073 }
2074 }
2075
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT,n_gt_8)2076 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT, n_gt_8) {
2077 for (uint32_t n = 9; n < 16; n++) {
2078 for (size_t k = 1; k <= 5; k += 2) {
2079 GemmMicrokernelTester()
2080 .mr(5)
2081 .nr(8)
2082 .kr(1)
2083 .sr(1)
2084 .m(5)
2085 .n(n)
2086 .k(k)
2087 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2088 }
2089 }
2090 }
2091
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_cn)2092 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_cn) {
2093 for (uint32_t n = 9; n < 16; n++) {
2094 for (size_t k = 1; k <= 5; k += 2) {
2095 GemmMicrokernelTester()
2096 .mr(5)
2097 .nr(8)
2098 .kr(1)
2099 .sr(1)
2100 .m(5)
2101 .n(n)
2102 .k(k)
2103 .cn_stride(11)
2104 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2105 }
2106 }
2107 }
2108
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT,n_gt_8_subtile)2109 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT, n_gt_8_subtile) {
2110 for (uint32_t n = 9; n < 16; n++) {
2111 for (size_t k = 1; k <= 5; k += 2) {
2112 for (uint32_t m = 1; m <= 5; m++) {
2113 GemmMicrokernelTester()
2114 .mr(5)
2115 .nr(8)
2116 .kr(1)
2117 .sr(1)
2118 .m(m)
2119 .n(n)
2120 .k(k)
2121 .iterations(1)
2122 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2123 }
2124 }
2125 }
2126 }
2127
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT,n_div_8)2128 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT, n_div_8) {
2129 for (uint32_t n = 16; n <= 24; n += 8) {
2130 for (size_t k = 1; k <= 5; k += 2) {
2131 GemmMicrokernelTester()
2132 .mr(5)
2133 .nr(8)
2134 .kr(1)
2135 .sr(1)
2136 .m(5)
2137 .n(n)
2138 .k(k)
2139 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2140 }
2141 }
2142 }
2143
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT,n_div_8_strided_cn)2144 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT, n_div_8_strided_cn) {
2145 for (uint32_t n = 16; n <= 24; n += 8) {
2146 for (size_t k = 1; k <= 5; k += 2) {
2147 GemmMicrokernelTester()
2148 .mr(5)
2149 .nr(8)
2150 .kr(1)
2151 .sr(1)
2152 .m(5)
2153 .n(n)
2154 .k(k)
2155 .cn_stride(11)
2156 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2157 }
2158 }
2159 }
2160
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT,n_div_8_subtile)2161 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT, n_div_8_subtile) {
2162 for (uint32_t n = 16; n <= 24; n += 8) {
2163 for (size_t k = 1; k <= 5; k += 2) {
2164 for (uint32_t m = 1; m <= 5; m++) {
2165 GemmMicrokernelTester()
2166 .mr(5)
2167 .nr(8)
2168 .kr(1)
2169 .sr(1)
2170 .m(m)
2171 .n(n)
2172 .k(k)
2173 .iterations(1)
2174 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2175 }
2176 }
2177 }
2178 }
2179
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT,small_kernel)2180 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT, small_kernel) {
2181 for (size_t k = 1; k <= 5; k += 2) {
2182 GemmMicrokernelTester()
2183 .mr(5)
2184 .nr(8)
2185 .kr(1)
2186 .sr(1)
2187 .m(5)
2188 .n(8)
2189 .k(k)
2190 .ks(3)
2191 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2192 }
2193 }
2194
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT,small_kernel_subtile)2195 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT, small_kernel_subtile) {
2196 for (size_t k = 1; k <= 5; k += 2) {
2197 for (uint32_t n = 1; n <= 8; n++) {
2198 for (uint32_t m = 1; m <= 5; m++) {
2199 GemmMicrokernelTester()
2200 .mr(5)
2201 .nr(8)
2202 .kr(1)
2203 .sr(1)
2204 .m(m)
2205 .n(n)
2206 .k(k)
2207 .ks(3)
2208 .iterations(1)
2209 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2210 }
2211 }
2212 }
2213 }
2214
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT,n_gt_8_small_kernel)2215 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT, n_gt_8_small_kernel) {
2216 for (uint32_t n = 9; n < 16; n++) {
2217 for (size_t k = 1; k <= 5; k += 2) {
2218 GemmMicrokernelTester()
2219 .mr(5)
2220 .nr(8)
2221 .kr(1)
2222 .sr(1)
2223 .m(5)
2224 .n(n)
2225 .k(k)
2226 .ks(3)
2227 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2228 }
2229 }
2230 }
2231
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT,n_div_8_small_kernel)2232 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT, n_div_8_small_kernel) {
2233 for (uint32_t n = 16; n <= 24; n += 8) {
2234 for (size_t k = 1; k <= 5; k += 2) {
2235 GemmMicrokernelTester()
2236 .mr(5)
2237 .nr(8)
2238 .kr(1)
2239 .sr(1)
2240 .m(5)
2241 .n(n)
2242 .k(k)
2243 .ks(3)
2244 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2245 }
2246 }
2247 }
2248
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT,strided_cm_subtile)2249 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT, strided_cm_subtile) {
2250 for (size_t k = 1; k <= 5; k += 2) {
2251 for (uint32_t n = 1; n <= 8; n++) {
2252 for (uint32_t m = 1; m <= 5; m++) {
2253 GemmMicrokernelTester()
2254 .mr(5)
2255 .nr(8)
2256 .kr(1)
2257 .sr(1)
2258 .m(m)
2259 .n(n)
2260 .k(k)
2261 .cm_stride(11)
2262 .iterations(1)
2263 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2264 }
2265 }
2266 }
2267 }
2268
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT,a_offset)2269 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT, a_offset) {
2270 for (size_t k = 1; k <= 5; k += 2) {
2271 GemmMicrokernelTester()
2272 .mr(5)
2273 .nr(8)
2274 .kr(1)
2275 .sr(1)
2276 .m(5)
2277 .n(8)
2278 .k(k)
2279 .ks(3)
2280 .a_offset(29)
2281 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2282 }
2283 }
2284
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT,zero)2285 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT, zero) {
2286 for (size_t k = 1; k <= 5; k += 2) {
2287 for (uint32_t mz = 0; mz < 5; mz++) {
2288 GemmMicrokernelTester()
2289 .mr(5)
2290 .nr(8)
2291 .kr(1)
2292 .sr(1)
2293 .m(5)
2294 .n(8)
2295 .k(k)
2296 .ks(3)
2297 .a_offset(29)
2298 .zero_index(mz)
2299 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2300 }
2301 }
2302 }
2303
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT,strided_cm)2304 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_LOADSPLAT, strided_cm) {
2305 GemmMicrokernelTester()
2306 .mr(5)
2307 .nr(8)
2308 .kr(1)
2309 .sr(1)
2310 .m(5)
2311 .n(8)
2312 .k(1)
2313 .cm_stride(11)
2314 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_loadsplat);
2315 }
2316 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2317
2318
2319 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,k_eq_4)2320 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_eq_4) {
2321 GemmMicrokernelTester()
2322 .mr(5)
2323 .nr(8)
2324 .kr(1)
2325 .sr(1)
2326 .m(5)
2327 .n(8)
2328 .k(4)
2329 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2330 }
2331
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,strided_cn)2332 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, strided_cn) {
2333 GemmMicrokernelTester()
2334 .mr(5)
2335 .nr(8)
2336 .kr(1)
2337 .sr(1)
2338 .m(5)
2339 .n(8)
2340 .k(4)
2341 .cn_stride(11)
2342 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2343 }
2344
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,k_eq_4_subtile)2345 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
2346 for (uint32_t n = 1; n <= 8; n++) {
2347 for (uint32_t m = 1; m <= 5; m++) {
2348 GemmMicrokernelTester()
2349 .mr(5)
2350 .nr(8)
2351 .kr(1)
2352 .sr(1)
2353 .m(m)
2354 .n(n)
2355 .k(4)
2356 .iterations(1)
2357 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2358 }
2359 }
2360 }
2361
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,k_eq_4_subtile_m)2362 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
2363 for (uint32_t m = 1; m <= 5; m++) {
2364 GemmMicrokernelTester()
2365 .mr(5)
2366 .nr(8)
2367 .kr(1)
2368 .sr(1)
2369 .m(m)
2370 .n(8)
2371 .k(4)
2372 .iterations(1)
2373 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2374 }
2375 }
2376
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,k_eq_4_subtile_n)2377 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
2378 for (uint32_t n = 1; n <= 8; n++) {
2379 GemmMicrokernelTester()
2380 .mr(5)
2381 .nr(8)
2382 .kr(1)
2383 .sr(1)
2384 .m(5)
2385 .n(n)
2386 .k(4)
2387 .iterations(1)
2388 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2389 }
2390 }
2391
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,k_lt_4)2392 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_lt_4) {
2393 for (size_t k = 1; k < 4; k++) {
2394 GemmMicrokernelTester()
2395 .mr(5)
2396 .nr(8)
2397 .kr(1)
2398 .sr(1)
2399 .m(5)
2400 .n(8)
2401 .k(k)
2402 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2403 }
2404 }
2405
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,k_lt_4_subtile)2406 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
2407 for (size_t k = 1; k < 4; k++) {
2408 for (uint32_t n = 1; n <= 8; n++) {
2409 for (uint32_t m = 1; m <= 5; m++) {
2410 GemmMicrokernelTester()
2411 .mr(5)
2412 .nr(8)
2413 .kr(1)
2414 .sr(1)
2415 .m(m)
2416 .n(n)
2417 .k(k)
2418 .iterations(1)
2419 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2420 }
2421 }
2422 }
2423 }
2424
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,k_gt_4)2425 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_gt_4) {
2426 for (size_t k = 5; k < 8; k++) {
2427 GemmMicrokernelTester()
2428 .mr(5)
2429 .nr(8)
2430 .kr(1)
2431 .sr(1)
2432 .m(5)
2433 .n(8)
2434 .k(k)
2435 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2436 }
2437 }
2438
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,k_gt_4_subtile)2439 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
2440 for (size_t k = 5; k < 8; k++) {
2441 for (uint32_t n = 1; n <= 8; n++) {
2442 for (uint32_t m = 1; m <= 5; m++) {
2443 GemmMicrokernelTester()
2444 .mr(5)
2445 .nr(8)
2446 .kr(1)
2447 .sr(1)
2448 .m(m)
2449 .n(n)
2450 .k(k)
2451 .iterations(1)
2452 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2453 }
2454 }
2455 }
2456 }
2457
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,k_div_4)2458 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_div_4) {
2459 for (size_t k = 8; k <= 40; k += 4) {
2460 GemmMicrokernelTester()
2461 .mr(5)
2462 .nr(8)
2463 .kr(1)
2464 .sr(1)
2465 .m(5)
2466 .n(8)
2467 .k(k)
2468 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2469 }
2470 }
2471
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,k_div_4_subtile)2472 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_div_4_subtile) {
2473 for (size_t k = 8; k <= 40; k += 4) {
2474 for (uint32_t n = 1; n <= 8; n++) {
2475 for (uint32_t m = 1; m <= 5; m++) {
2476 GemmMicrokernelTester()
2477 .mr(5)
2478 .nr(8)
2479 .kr(1)
2480 .sr(1)
2481 .m(m)
2482 .n(n)
2483 .k(k)
2484 .iterations(1)
2485 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2486 }
2487 }
2488 }
2489 }
2490
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,n_gt_8)2491 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, n_gt_8) {
2492 for (uint32_t n = 9; n < 16; n++) {
2493 for (size_t k = 1; k <= 20; k += 5) {
2494 GemmMicrokernelTester()
2495 .mr(5)
2496 .nr(8)
2497 .kr(1)
2498 .sr(1)
2499 .m(5)
2500 .n(n)
2501 .k(k)
2502 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2503 }
2504 }
2505 }
2506
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,n_gt_8_strided_cn)2507 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
2508 for (uint32_t n = 9; n < 16; n++) {
2509 for (size_t k = 1; k <= 20; k += 5) {
2510 GemmMicrokernelTester()
2511 .mr(5)
2512 .nr(8)
2513 .kr(1)
2514 .sr(1)
2515 .m(5)
2516 .n(n)
2517 .k(k)
2518 .cn_stride(11)
2519 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2520 }
2521 }
2522 }
2523
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,n_gt_8_subtile)2524 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
2525 for (uint32_t n = 9; n < 16; n++) {
2526 for (size_t k = 1; k <= 20; k += 5) {
2527 for (uint32_t m = 1; m <= 5; m++) {
2528 GemmMicrokernelTester()
2529 .mr(5)
2530 .nr(8)
2531 .kr(1)
2532 .sr(1)
2533 .m(m)
2534 .n(n)
2535 .k(k)
2536 .iterations(1)
2537 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2538 }
2539 }
2540 }
2541 }
2542
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,n_div_8)2543 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, n_div_8) {
2544 for (uint32_t n = 16; n <= 24; n += 8) {
2545 for (size_t k = 1; k <= 20; k += 5) {
2546 GemmMicrokernelTester()
2547 .mr(5)
2548 .nr(8)
2549 .kr(1)
2550 .sr(1)
2551 .m(5)
2552 .n(n)
2553 .k(k)
2554 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2555 }
2556 }
2557 }
2558
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,n_div_8_strided_cn)2559 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
2560 for (uint32_t n = 16; n <= 24; n += 8) {
2561 for (size_t k = 1; k <= 20; k += 5) {
2562 GemmMicrokernelTester()
2563 .mr(5)
2564 .nr(8)
2565 .kr(1)
2566 .sr(1)
2567 .m(5)
2568 .n(n)
2569 .k(k)
2570 .cn_stride(11)
2571 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2572 }
2573 }
2574 }
2575
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,n_div_8_subtile)2576 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, n_div_8_subtile) {
2577 for (uint32_t n = 16; n <= 24; n += 8) {
2578 for (size_t k = 1; k <= 20; k += 5) {
2579 for (uint32_t m = 1; m <= 5; m++) {
2580 GemmMicrokernelTester()
2581 .mr(5)
2582 .nr(8)
2583 .kr(1)
2584 .sr(1)
2585 .m(m)
2586 .n(n)
2587 .k(k)
2588 .iterations(1)
2589 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2590 }
2591 }
2592 }
2593 }
2594
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,small_kernel)2595 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, small_kernel) {
2596 for (size_t k = 1; k <= 20; k += 5) {
2597 GemmMicrokernelTester()
2598 .mr(5)
2599 .nr(8)
2600 .kr(1)
2601 .sr(1)
2602 .m(5)
2603 .n(8)
2604 .k(k)
2605 .ks(3)
2606 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2607 }
2608 }
2609
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,small_kernel_subtile)2610 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, small_kernel_subtile) {
2611 for (size_t k = 1; k <= 20; k += 5) {
2612 for (uint32_t n = 1; n <= 8; n++) {
2613 for (uint32_t m = 1; m <= 5; m++) {
2614 GemmMicrokernelTester()
2615 .mr(5)
2616 .nr(8)
2617 .kr(1)
2618 .sr(1)
2619 .m(m)
2620 .n(n)
2621 .k(k)
2622 .ks(3)
2623 .iterations(1)
2624 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2625 }
2626 }
2627 }
2628 }
2629
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,n_gt_8_small_kernel)2630 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, n_gt_8_small_kernel) {
2631 for (uint32_t n = 9; n < 16; n++) {
2632 for (size_t k = 1; k <= 20; k += 5) {
2633 GemmMicrokernelTester()
2634 .mr(5)
2635 .nr(8)
2636 .kr(1)
2637 .sr(1)
2638 .m(5)
2639 .n(n)
2640 .k(k)
2641 .ks(3)
2642 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2643 }
2644 }
2645 }
2646
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,n_div_8_small_kernel)2647 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, n_div_8_small_kernel) {
2648 for (uint32_t n = 16; n <= 24; n += 8) {
2649 for (size_t k = 1; k <= 20; k += 5) {
2650 GemmMicrokernelTester()
2651 .mr(5)
2652 .nr(8)
2653 .kr(1)
2654 .sr(1)
2655 .m(5)
2656 .n(n)
2657 .k(k)
2658 .ks(3)
2659 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2660 }
2661 }
2662 }
2663
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,strided_cm_subtile)2664 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, strided_cm_subtile) {
2665 for (size_t k = 1; k <= 20; k += 5) {
2666 for (uint32_t n = 1; n <= 8; n++) {
2667 for (uint32_t m = 1; m <= 5; m++) {
2668 GemmMicrokernelTester()
2669 .mr(5)
2670 .nr(8)
2671 .kr(1)
2672 .sr(1)
2673 .m(m)
2674 .n(n)
2675 .k(k)
2676 .cm_stride(11)
2677 .iterations(1)
2678 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2679 }
2680 }
2681 }
2682 }
2683
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,a_offset)2684 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, a_offset) {
2685 for (size_t k = 1; k <= 20; k += 5) {
2686 GemmMicrokernelTester()
2687 .mr(5)
2688 .nr(8)
2689 .kr(1)
2690 .sr(1)
2691 .m(5)
2692 .n(8)
2693 .k(k)
2694 .ks(3)
2695 .a_offset(103)
2696 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2697 }
2698 }
2699
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,zero)2700 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, zero) {
2701 for (size_t k = 1; k <= 20; k += 5) {
2702 for (uint32_t mz = 0; mz < 5; mz++) {
2703 GemmMicrokernelTester()
2704 .mr(5)
2705 .nr(8)
2706 .kr(1)
2707 .sr(1)
2708 .m(5)
2709 .n(8)
2710 .k(k)
2711 .ks(3)
2712 .a_offset(103)
2713 .zero_index(mz)
2714 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2715 }
2716 }
2717 }
2718
TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT,strided_cm)2719 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, strided_cm) {
2720 GemmMicrokernelTester()
2721 .mr(5)
2722 .nr(8)
2723 .kr(1)
2724 .sr(1)
2725 .m(5)
2726 .n(8)
2727 .k(4)
2728 .cm_stride(11)
2729 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
2730 }
2731 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2732
2733
2734 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,k_eq_4)2735 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, k_eq_4) {
2736 GemmMicrokernelTester()
2737 .mr(6)
2738 .nr(8)
2739 .kr(1)
2740 .sr(4)
2741 .m(6)
2742 .n(8)
2743 .k(4)
2744 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
2745 }
2746
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,strided_cn)2747 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, strided_cn) {
2748 GemmMicrokernelTester()
2749 .mr(6)
2750 .nr(8)
2751 .kr(1)
2752 .sr(4)
2753 .m(6)
2754 .n(8)
2755 .k(4)
2756 .cn_stride(11)
2757 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
2758 }
2759
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,k_eq_4_subtile)2760 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, k_eq_4_subtile) {
2761 for (uint32_t n = 1; n <= 8; n++) {
2762 for (uint32_t m = 1; m <= 6; m++) {
2763 GemmMicrokernelTester()
2764 .mr(6)
2765 .nr(8)
2766 .kr(1)
2767 .sr(4)
2768 .m(m)
2769 .n(n)
2770 .k(4)
2771 .iterations(1)
2772 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
2773 }
2774 }
2775 }
2776
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,k_eq_4_subtile_m)2777 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, k_eq_4_subtile_m) {
2778 for (uint32_t m = 1; m <= 6; m++) {
2779 GemmMicrokernelTester()
2780 .mr(6)
2781 .nr(8)
2782 .kr(1)
2783 .sr(4)
2784 .m(m)
2785 .n(8)
2786 .k(4)
2787 .iterations(1)
2788 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
2789 }
2790 }
2791
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,k_eq_4_subtile_n)2792 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, k_eq_4_subtile_n) {
2793 for (uint32_t n = 1; n <= 8; n++) {
2794 GemmMicrokernelTester()
2795 .mr(6)
2796 .nr(8)
2797 .kr(1)
2798 .sr(4)
2799 .m(6)
2800 .n(n)
2801 .k(4)
2802 .iterations(1)
2803 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
2804 }
2805 }
2806
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,k_lt_4)2807 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, k_lt_4) {
2808 for (size_t k = 1; k < 4; k++) {
2809 GemmMicrokernelTester()
2810 .mr(6)
2811 .nr(8)
2812 .kr(1)
2813 .sr(4)
2814 .m(6)
2815 .n(8)
2816 .k(k)
2817 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
2818 }
2819 }
2820
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,k_lt_4_subtile)2821 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, k_lt_4_subtile) {
2822 for (size_t k = 1; k < 4; k++) {
2823 for (uint32_t n = 1; n <= 8; n++) {
2824 for (uint32_t m = 1; m <= 6; m++) {
2825 GemmMicrokernelTester()
2826 .mr(6)
2827 .nr(8)
2828 .kr(1)
2829 .sr(4)
2830 .m(m)
2831 .n(n)
2832 .k(k)
2833 .iterations(1)
2834 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
2835 }
2836 }
2837 }
2838 }
2839
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,k_gt_4)2840 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, k_gt_4) {
2841 for (size_t k = 5; k < 8; k++) {
2842 GemmMicrokernelTester()
2843 .mr(6)
2844 .nr(8)
2845 .kr(1)
2846 .sr(4)
2847 .m(6)
2848 .n(8)
2849 .k(k)
2850 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
2851 }
2852 }
2853
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,k_gt_4_subtile)2854 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, k_gt_4_subtile) {
2855 for (size_t k = 5; k < 8; k++) {
2856 for (uint32_t n = 1; n <= 8; n++) {
2857 for (uint32_t m = 1; m <= 6; m++) {
2858 GemmMicrokernelTester()
2859 .mr(6)
2860 .nr(8)
2861 .kr(1)
2862 .sr(4)
2863 .m(m)
2864 .n(n)
2865 .k(k)
2866 .iterations(1)
2867 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
2868 }
2869 }
2870 }
2871 }
2872
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,k_div_4)2873 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, k_div_4) {
2874 for (size_t k = 8; k <= 40; k += 4) {
2875 GemmMicrokernelTester()
2876 .mr(6)
2877 .nr(8)
2878 .kr(1)
2879 .sr(4)
2880 .m(6)
2881 .n(8)
2882 .k(k)
2883 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
2884 }
2885 }
2886
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,k_div_4_subtile)2887 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, k_div_4_subtile) {
2888 for (size_t k = 8; k <= 40; k += 4) {
2889 for (uint32_t n = 1; n <= 8; n++) {
2890 for (uint32_t m = 1; m <= 6; m++) {
2891 GemmMicrokernelTester()
2892 .mr(6)
2893 .nr(8)
2894 .kr(1)
2895 .sr(4)
2896 .m(m)
2897 .n(n)
2898 .k(k)
2899 .iterations(1)
2900 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
2901 }
2902 }
2903 }
2904 }
2905
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,n_gt_8)2906 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, n_gt_8) {
2907 for (uint32_t n = 9; n < 16; n++) {
2908 for (size_t k = 1; k <= 20; k += 5) {
2909 GemmMicrokernelTester()
2910 .mr(6)
2911 .nr(8)
2912 .kr(1)
2913 .sr(4)
2914 .m(6)
2915 .n(n)
2916 .k(k)
2917 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
2918 }
2919 }
2920 }
2921
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,n_gt_8_strided_cn)2922 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, n_gt_8_strided_cn) {
2923 for (uint32_t n = 9; n < 16; n++) {
2924 for (size_t k = 1; k <= 20; k += 5) {
2925 GemmMicrokernelTester()
2926 .mr(6)
2927 .nr(8)
2928 .kr(1)
2929 .sr(4)
2930 .m(6)
2931 .n(n)
2932 .k(k)
2933 .cn_stride(11)
2934 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
2935 }
2936 }
2937 }
2938
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,n_gt_8_subtile)2939 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, n_gt_8_subtile) {
2940 for (uint32_t n = 9; n < 16; n++) {
2941 for (size_t k = 1; k <= 20; k += 5) {
2942 for (uint32_t m = 1; m <= 6; m++) {
2943 GemmMicrokernelTester()
2944 .mr(6)
2945 .nr(8)
2946 .kr(1)
2947 .sr(4)
2948 .m(m)
2949 .n(n)
2950 .k(k)
2951 .iterations(1)
2952 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
2953 }
2954 }
2955 }
2956 }
2957
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,n_div_8)2958 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, n_div_8) {
2959 for (uint32_t n = 16; n <= 24; n += 8) {
2960 for (size_t k = 1; k <= 20; k += 5) {
2961 GemmMicrokernelTester()
2962 .mr(6)
2963 .nr(8)
2964 .kr(1)
2965 .sr(4)
2966 .m(6)
2967 .n(n)
2968 .k(k)
2969 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
2970 }
2971 }
2972 }
2973
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,n_div_8_strided_cn)2974 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, n_div_8_strided_cn) {
2975 for (uint32_t n = 16; n <= 24; n += 8) {
2976 for (size_t k = 1; k <= 20; k += 5) {
2977 GemmMicrokernelTester()
2978 .mr(6)
2979 .nr(8)
2980 .kr(1)
2981 .sr(4)
2982 .m(6)
2983 .n(n)
2984 .k(k)
2985 .cn_stride(11)
2986 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
2987 }
2988 }
2989 }
2990
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,n_div_8_subtile)2991 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, n_div_8_subtile) {
2992 for (uint32_t n = 16; n <= 24; n += 8) {
2993 for (size_t k = 1; k <= 20; k += 5) {
2994 for (uint32_t m = 1; m <= 6; m++) {
2995 GemmMicrokernelTester()
2996 .mr(6)
2997 .nr(8)
2998 .kr(1)
2999 .sr(4)
3000 .m(m)
3001 .n(n)
3002 .k(k)
3003 .iterations(1)
3004 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
3005 }
3006 }
3007 }
3008 }
3009
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,small_kernel)3010 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, small_kernel) {
3011 for (size_t k = 1; k <= 20; k += 5) {
3012 GemmMicrokernelTester()
3013 .mr(6)
3014 .nr(8)
3015 .kr(1)
3016 .sr(4)
3017 .m(6)
3018 .n(8)
3019 .k(k)
3020 .ks(3)
3021 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
3022 }
3023 }
3024
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,small_kernel_subtile)3025 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, small_kernel_subtile) {
3026 for (size_t k = 1; k <= 20; k += 5) {
3027 for (uint32_t n = 1; n <= 8; n++) {
3028 for (uint32_t m = 1; m <= 6; m++) {
3029 GemmMicrokernelTester()
3030 .mr(6)
3031 .nr(8)
3032 .kr(1)
3033 .sr(4)
3034 .m(m)
3035 .n(n)
3036 .k(k)
3037 .ks(3)
3038 .iterations(1)
3039 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
3040 }
3041 }
3042 }
3043 }
3044
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,n_gt_8_small_kernel)3045 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, n_gt_8_small_kernel) {
3046 for (uint32_t n = 9; n < 16; n++) {
3047 for (size_t k = 1; k <= 20; k += 5) {
3048 GemmMicrokernelTester()
3049 .mr(6)
3050 .nr(8)
3051 .kr(1)
3052 .sr(4)
3053 .m(6)
3054 .n(n)
3055 .k(k)
3056 .ks(3)
3057 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
3058 }
3059 }
3060 }
3061
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,n_div_8_small_kernel)3062 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, n_div_8_small_kernel) {
3063 for (uint32_t n = 16; n <= 24; n += 8) {
3064 for (size_t k = 1; k <= 20; k += 5) {
3065 GemmMicrokernelTester()
3066 .mr(6)
3067 .nr(8)
3068 .kr(1)
3069 .sr(4)
3070 .m(6)
3071 .n(n)
3072 .k(k)
3073 .ks(3)
3074 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
3075 }
3076 }
3077 }
3078
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,strided_cm_subtile)3079 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, strided_cm_subtile) {
3080 for (size_t k = 1; k <= 20; k += 5) {
3081 for (uint32_t n = 1; n <= 8; n++) {
3082 for (uint32_t m = 1; m <= 6; m++) {
3083 GemmMicrokernelTester()
3084 .mr(6)
3085 .nr(8)
3086 .kr(1)
3087 .sr(4)
3088 .m(m)
3089 .n(n)
3090 .k(k)
3091 .cm_stride(11)
3092 .iterations(1)
3093 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
3094 }
3095 }
3096 }
3097 }
3098
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,a_offset)3099 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, a_offset) {
3100 for (size_t k = 1; k <= 20; k += 5) {
3101 GemmMicrokernelTester()
3102 .mr(6)
3103 .nr(8)
3104 .kr(1)
3105 .sr(4)
3106 .m(6)
3107 .n(8)
3108 .k(k)
3109 .ks(3)
3110 .a_offset(127)
3111 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
3112 }
3113 }
3114
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,zero)3115 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, zero) {
3116 for (size_t k = 1; k <= 20; k += 5) {
3117 for (uint32_t mz = 0; mz < 6; mz++) {
3118 GemmMicrokernelTester()
3119 .mr(6)
3120 .nr(8)
3121 .kr(1)
3122 .sr(4)
3123 .m(6)
3124 .n(8)
3125 .k(k)
3126 .ks(3)
3127 .a_offset(127)
3128 .zero_index(mz)
3129 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
3130 }
3131 }
3132 }
3133
TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD,strided_cm)3134 TEST(F32_IGEMM_RELU_6X8S4__WASMSIMD, strided_cm) {
3135 GemmMicrokernelTester()
3136 .mr(6)
3137 .nr(8)
3138 .kr(1)
3139 .sr(4)
3140 .m(6)
3141 .n(8)
3142 .k(4)
3143 .cm_stride(11)
3144 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmsimd);
3145 }
3146 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3147
3148
3149 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)3150 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
3151 GemmMicrokernelTester()
3152 .mr(1)
3153 .nr(8)
3154 .kr(1)
3155 .sr(1)
3156 .m(1)
3157 .n(8)
3158 .k(4)
3159 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3160 }
3161
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)3162 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
3163 GemmMicrokernelTester()
3164 .mr(1)
3165 .nr(8)
3166 .kr(1)
3167 .sr(1)
3168 .m(1)
3169 .n(8)
3170 .k(4)
3171 .cn_stride(11)
3172 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3173 }
3174
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)3175 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
3176 for (uint32_t n = 1; n <= 8; n++) {
3177 for (uint32_t m = 1; m <= 1; m++) {
3178 GemmMicrokernelTester()
3179 .mr(1)
3180 .nr(8)
3181 .kr(1)
3182 .sr(1)
3183 .m(m)
3184 .n(n)
3185 .k(4)
3186 .iterations(1)
3187 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3188 }
3189 }
3190 }
3191
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)3192 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
3193 for (uint32_t m = 1; m <= 1; m++) {
3194 GemmMicrokernelTester()
3195 .mr(1)
3196 .nr(8)
3197 .kr(1)
3198 .sr(1)
3199 .m(m)
3200 .n(8)
3201 .k(4)
3202 .iterations(1)
3203 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3204 }
3205 }
3206
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)3207 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
3208 for (uint32_t n = 1; n <= 8; n++) {
3209 GemmMicrokernelTester()
3210 .mr(1)
3211 .nr(8)
3212 .kr(1)
3213 .sr(1)
3214 .m(1)
3215 .n(n)
3216 .k(4)
3217 .iterations(1)
3218 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3219 }
3220 }
3221
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)3222 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
3223 for (size_t k = 1; k < 4; k++) {
3224 GemmMicrokernelTester()
3225 .mr(1)
3226 .nr(8)
3227 .kr(1)
3228 .sr(1)
3229 .m(1)
3230 .n(8)
3231 .k(k)
3232 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3233 }
3234 }
3235
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)3236 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
3237 for (size_t k = 1; k < 4; k++) {
3238 for (uint32_t n = 1; n <= 8; n++) {
3239 for (uint32_t m = 1; m <= 1; m++) {
3240 GemmMicrokernelTester()
3241 .mr(1)
3242 .nr(8)
3243 .kr(1)
3244 .sr(1)
3245 .m(m)
3246 .n(n)
3247 .k(k)
3248 .iterations(1)
3249 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3250 }
3251 }
3252 }
3253 }
3254
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)3255 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
3256 for (size_t k = 5; k < 8; k++) {
3257 GemmMicrokernelTester()
3258 .mr(1)
3259 .nr(8)
3260 .kr(1)
3261 .sr(1)
3262 .m(1)
3263 .n(8)
3264 .k(k)
3265 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3266 }
3267 }
3268
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)3269 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
3270 for (size_t k = 5; k < 8; k++) {
3271 for (uint32_t n = 1; n <= 8; n++) {
3272 for (uint32_t m = 1; m <= 1; m++) {
3273 GemmMicrokernelTester()
3274 .mr(1)
3275 .nr(8)
3276 .kr(1)
3277 .sr(1)
3278 .m(m)
3279 .n(n)
3280 .k(k)
3281 .iterations(1)
3282 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3283 }
3284 }
3285 }
3286 }
3287
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)3288 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
3289 for (size_t k = 8; k <= 40; k += 4) {
3290 GemmMicrokernelTester()
3291 .mr(1)
3292 .nr(8)
3293 .kr(1)
3294 .sr(1)
3295 .m(1)
3296 .n(8)
3297 .k(k)
3298 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3299 }
3300 }
3301
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)3302 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
3303 for (size_t k = 8; k <= 40; k += 4) {
3304 for (uint32_t n = 1; n <= 8; n++) {
3305 for (uint32_t m = 1; m <= 1; m++) {
3306 GemmMicrokernelTester()
3307 .mr(1)
3308 .nr(8)
3309 .kr(1)
3310 .sr(1)
3311 .m(m)
3312 .n(n)
3313 .k(k)
3314 .iterations(1)
3315 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3316 }
3317 }
3318 }
3319 }
3320
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)3321 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
3322 for (uint32_t n = 9; n < 16; n++) {
3323 for (size_t k = 1; k <= 20; k += 5) {
3324 GemmMicrokernelTester()
3325 .mr(1)
3326 .nr(8)
3327 .kr(1)
3328 .sr(1)
3329 .m(1)
3330 .n(n)
3331 .k(k)
3332 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3333 }
3334 }
3335 }
3336
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)3337 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
3338 for (uint32_t n = 9; n < 16; n++) {
3339 for (size_t k = 1; k <= 20; k += 5) {
3340 GemmMicrokernelTester()
3341 .mr(1)
3342 .nr(8)
3343 .kr(1)
3344 .sr(1)
3345 .m(1)
3346 .n(n)
3347 .k(k)
3348 .cn_stride(11)
3349 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3350 }
3351 }
3352 }
3353
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)3354 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
3355 for (uint32_t n = 9; n < 16; n++) {
3356 for (size_t k = 1; k <= 20; k += 5) {
3357 for (uint32_t m = 1; m <= 1; m++) {
3358 GemmMicrokernelTester()
3359 .mr(1)
3360 .nr(8)
3361 .kr(1)
3362 .sr(1)
3363 .m(m)
3364 .n(n)
3365 .k(k)
3366 .iterations(1)
3367 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3368 }
3369 }
3370 }
3371 }
3372
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)3373 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
3374 for (uint32_t n = 16; n <= 24; n += 8) {
3375 for (size_t k = 1; k <= 20; k += 5) {
3376 GemmMicrokernelTester()
3377 .mr(1)
3378 .nr(8)
3379 .kr(1)
3380 .sr(1)
3381 .m(1)
3382 .n(n)
3383 .k(k)
3384 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3385 }
3386 }
3387 }
3388
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)3389 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
3390 for (uint32_t n = 16; n <= 24; n += 8) {
3391 for (size_t k = 1; k <= 20; k += 5) {
3392 GemmMicrokernelTester()
3393 .mr(1)
3394 .nr(8)
3395 .kr(1)
3396 .sr(1)
3397 .m(1)
3398 .n(n)
3399 .k(k)
3400 .cn_stride(11)
3401 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3402 }
3403 }
3404 }
3405
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)3406 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
3407 for (uint32_t n = 16; n <= 24; n += 8) {
3408 for (size_t k = 1; k <= 20; k += 5) {
3409 for (uint32_t m = 1; m <= 1; m++) {
3410 GemmMicrokernelTester()
3411 .mr(1)
3412 .nr(8)
3413 .kr(1)
3414 .sr(1)
3415 .m(m)
3416 .n(n)
3417 .k(k)
3418 .iterations(1)
3419 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3420 }
3421 }
3422 }
3423 }
3424
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel)3425 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel) {
3426 for (size_t k = 1; k <= 20; k += 5) {
3427 GemmMicrokernelTester()
3428 .mr(1)
3429 .nr(8)
3430 .kr(1)
3431 .sr(1)
3432 .m(1)
3433 .n(8)
3434 .k(k)
3435 .ks(3)
3436 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3437 }
3438 }
3439
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel_subtile)3440 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel_subtile) {
3441 for (size_t k = 1; k <= 20; k += 5) {
3442 for (uint32_t n = 1; n <= 8; n++) {
3443 for (uint32_t m = 1; m <= 1; m++) {
3444 GemmMicrokernelTester()
3445 .mr(1)
3446 .nr(8)
3447 .kr(1)
3448 .sr(1)
3449 .m(m)
3450 .n(n)
3451 .k(k)
3452 .ks(3)
3453 .iterations(1)
3454 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3455 }
3456 }
3457 }
3458 }
3459
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_small_kernel)3460 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_small_kernel) {
3461 for (uint32_t n = 9; n < 16; n++) {
3462 for (size_t k = 1; k <= 20; k += 5) {
3463 GemmMicrokernelTester()
3464 .mr(1)
3465 .nr(8)
3466 .kr(1)
3467 .sr(1)
3468 .m(1)
3469 .n(n)
3470 .k(k)
3471 .ks(3)
3472 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3473 }
3474 }
3475 }
3476
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_small_kernel)3477 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_small_kernel) {
3478 for (uint32_t n = 16; n <= 24; n += 8) {
3479 for (size_t k = 1; k <= 20; k += 5) {
3480 GemmMicrokernelTester()
3481 .mr(1)
3482 .nr(8)
3483 .kr(1)
3484 .sr(1)
3485 .m(1)
3486 .n(n)
3487 .k(k)
3488 .ks(3)
3489 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3490 }
3491 }
3492 }
3493
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)3494 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
3495 for (size_t k = 1; k <= 20; k += 5) {
3496 for (uint32_t n = 1; n <= 8; n++) {
3497 for (uint32_t m = 1; m <= 1; m++) {
3498 GemmMicrokernelTester()
3499 .mr(1)
3500 .nr(8)
3501 .kr(1)
3502 .sr(1)
3503 .m(m)
3504 .n(n)
3505 .k(k)
3506 .cm_stride(11)
3507 .iterations(1)
3508 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3509 }
3510 }
3511 }
3512 }
3513
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,a_offset)3514 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, a_offset) {
3515 for (size_t k = 1; k <= 20; k += 5) {
3516 GemmMicrokernelTester()
3517 .mr(1)
3518 .nr(8)
3519 .kr(1)
3520 .sr(1)
3521 .m(1)
3522 .n(8)
3523 .k(k)
3524 .ks(3)
3525 .a_offset(23)
3526 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3527 }
3528 }
3529
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,zero)3530 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, zero) {
3531 for (size_t k = 1; k <= 20; k += 5) {
3532 for (uint32_t mz = 0; mz < 1; mz++) {
3533 GemmMicrokernelTester()
3534 .mr(1)
3535 .nr(8)
3536 .kr(1)
3537 .sr(1)
3538 .m(1)
3539 .n(8)
3540 .k(k)
3541 .ks(3)
3542 .a_offset(23)
3543 .zero_index(mz)
3544 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3545 }
3546 }
3547 }
3548
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)3549 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
3550 GemmMicrokernelTester()
3551 .mr(1)
3552 .nr(8)
3553 .kr(1)
3554 .sr(1)
3555 .m(1)
3556 .n(8)
3557 .k(4)
3558 .cm_stride(11)
3559 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_splat);
3560 }
3561 #endif // XNN_ARCH_WASMRELAXEDSIMD
3562
3563
3564 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)3565 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
3566 GemmMicrokernelTester()
3567 .mr(1)
3568 .nr(8)
3569 .kr(1)
3570 .sr(4)
3571 .m(1)
3572 .n(8)
3573 .k(4)
3574 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3575 }
3576
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,strided_cn)3577 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
3578 GemmMicrokernelTester()
3579 .mr(1)
3580 .nr(8)
3581 .kr(1)
3582 .sr(4)
3583 .m(1)
3584 .n(8)
3585 .k(4)
3586 .cn_stride(11)
3587 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3588 }
3589
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)3590 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
3591 for (uint32_t n = 1; n <= 8; n++) {
3592 for (uint32_t m = 1; m <= 1; m++) {
3593 GemmMicrokernelTester()
3594 .mr(1)
3595 .nr(8)
3596 .kr(1)
3597 .sr(4)
3598 .m(m)
3599 .n(n)
3600 .k(4)
3601 .iterations(1)
3602 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3603 }
3604 }
3605 }
3606
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)3607 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
3608 for (uint32_t m = 1; m <= 1; m++) {
3609 GemmMicrokernelTester()
3610 .mr(1)
3611 .nr(8)
3612 .kr(1)
3613 .sr(4)
3614 .m(m)
3615 .n(8)
3616 .k(4)
3617 .iterations(1)
3618 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3619 }
3620 }
3621
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)3622 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
3623 for (uint32_t n = 1; n <= 8; n++) {
3624 GemmMicrokernelTester()
3625 .mr(1)
3626 .nr(8)
3627 .kr(1)
3628 .sr(4)
3629 .m(1)
3630 .n(n)
3631 .k(4)
3632 .iterations(1)
3633 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3634 }
3635 }
3636
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)3637 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
3638 for (size_t k = 1; k < 4; k++) {
3639 GemmMicrokernelTester()
3640 .mr(1)
3641 .nr(8)
3642 .kr(1)
3643 .sr(4)
3644 .m(1)
3645 .n(8)
3646 .k(k)
3647 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3648 }
3649 }
3650
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)3651 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
3652 for (size_t k = 1; k < 4; k++) {
3653 for (uint32_t n = 1; n <= 8; n++) {
3654 for (uint32_t m = 1; m <= 1; m++) {
3655 GemmMicrokernelTester()
3656 .mr(1)
3657 .nr(8)
3658 .kr(1)
3659 .sr(4)
3660 .m(m)
3661 .n(n)
3662 .k(k)
3663 .iterations(1)
3664 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3665 }
3666 }
3667 }
3668 }
3669
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)3670 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
3671 for (size_t k = 5; k < 8; k++) {
3672 GemmMicrokernelTester()
3673 .mr(1)
3674 .nr(8)
3675 .kr(1)
3676 .sr(4)
3677 .m(1)
3678 .n(8)
3679 .k(k)
3680 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3681 }
3682 }
3683
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)3684 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
3685 for (size_t k = 5; k < 8; k++) {
3686 for (uint32_t n = 1; n <= 8; n++) {
3687 for (uint32_t m = 1; m <= 1; m++) {
3688 GemmMicrokernelTester()
3689 .mr(1)
3690 .nr(8)
3691 .kr(1)
3692 .sr(4)
3693 .m(m)
3694 .n(n)
3695 .k(k)
3696 .iterations(1)
3697 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3698 }
3699 }
3700 }
3701 }
3702
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,k_div_4)3703 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
3704 for (size_t k = 8; k <= 40; k += 4) {
3705 GemmMicrokernelTester()
3706 .mr(1)
3707 .nr(8)
3708 .kr(1)
3709 .sr(4)
3710 .m(1)
3711 .n(8)
3712 .k(k)
3713 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3714 }
3715 }
3716
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)3717 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
3718 for (size_t k = 8; k <= 40; k += 4) {
3719 for (uint32_t n = 1; n <= 8; n++) {
3720 for (uint32_t m = 1; m <= 1; m++) {
3721 GemmMicrokernelTester()
3722 .mr(1)
3723 .nr(8)
3724 .kr(1)
3725 .sr(4)
3726 .m(m)
3727 .n(n)
3728 .k(k)
3729 .iterations(1)
3730 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3731 }
3732 }
3733 }
3734 }
3735
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)3736 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
3737 for (uint32_t n = 9; n < 16; n++) {
3738 for (size_t k = 1; k <= 20; k += 5) {
3739 GemmMicrokernelTester()
3740 .mr(1)
3741 .nr(8)
3742 .kr(1)
3743 .sr(4)
3744 .m(1)
3745 .n(n)
3746 .k(k)
3747 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3748 }
3749 }
3750 }
3751
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)3752 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
3753 for (uint32_t n = 9; n < 16; n++) {
3754 for (size_t k = 1; k <= 20; k += 5) {
3755 GemmMicrokernelTester()
3756 .mr(1)
3757 .nr(8)
3758 .kr(1)
3759 .sr(4)
3760 .m(1)
3761 .n(n)
3762 .k(k)
3763 .cn_stride(11)
3764 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3765 }
3766 }
3767 }
3768
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)3769 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
3770 for (uint32_t n = 9; n < 16; n++) {
3771 for (size_t k = 1; k <= 20; k += 5) {
3772 for (uint32_t m = 1; m <= 1; m++) {
3773 GemmMicrokernelTester()
3774 .mr(1)
3775 .nr(8)
3776 .kr(1)
3777 .sr(4)
3778 .m(m)
3779 .n(n)
3780 .k(k)
3781 .iterations(1)
3782 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3783 }
3784 }
3785 }
3786 }
3787
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,n_div_8)3788 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
3789 for (uint32_t n = 16; n <= 24; n += 8) {
3790 for (size_t k = 1; k <= 20; k += 5) {
3791 GemmMicrokernelTester()
3792 .mr(1)
3793 .nr(8)
3794 .kr(1)
3795 .sr(4)
3796 .m(1)
3797 .n(n)
3798 .k(k)
3799 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3800 }
3801 }
3802 }
3803
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)3804 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
3805 for (uint32_t n = 16; n <= 24; n += 8) {
3806 for (size_t k = 1; k <= 20; k += 5) {
3807 GemmMicrokernelTester()
3808 .mr(1)
3809 .nr(8)
3810 .kr(1)
3811 .sr(4)
3812 .m(1)
3813 .n(n)
3814 .k(k)
3815 .cn_stride(11)
3816 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3817 }
3818 }
3819 }
3820
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)3821 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
3822 for (uint32_t n = 16; n <= 24; n += 8) {
3823 for (size_t k = 1; k <= 20; k += 5) {
3824 for (uint32_t m = 1; m <= 1; m++) {
3825 GemmMicrokernelTester()
3826 .mr(1)
3827 .nr(8)
3828 .kr(1)
3829 .sr(4)
3830 .m(m)
3831 .n(n)
3832 .k(k)
3833 .iterations(1)
3834 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3835 }
3836 }
3837 }
3838 }
3839
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,small_kernel)3840 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, small_kernel) {
3841 for (size_t k = 1; k <= 20; k += 5) {
3842 GemmMicrokernelTester()
3843 .mr(1)
3844 .nr(8)
3845 .kr(1)
3846 .sr(4)
3847 .m(1)
3848 .n(8)
3849 .k(k)
3850 .ks(3)
3851 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3852 }
3853 }
3854
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,small_kernel_subtile)3855 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, small_kernel_subtile) {
3856 for (size_t k = 1; k <= 20; k += 5) {
3857 for (uint32_t n = 1; n <= 8; n++) {
3858 for (uint32_t m = 1; m <= 1; m++) {
3859 GemmMicrokernelTester()
3860 .mr(1)
3861 .nr(8)
3862 .kr(1)
3863 .sr(4)
3864 .m(m)
3865 .n(n)
3866 .k(k)
3867 .ks(3)
3868 .iterations(1)
3869 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3870 }
3871 }
3872 }
3873 }
3874
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_small_kernel)3875 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_small_kernel) {
3876 for (uint32_t n = 9; n < 16; n++) {
3877 for (size_t k = 1; k <= 20; k += 5) {
3878 GemmMicrokernelTester()
3879 .mr(1)
3880 .nr(8)
3881 .kr(1)
3882 .sr(4)
3883 .m(1)
3884 .n(n)
3885 .k(k)
3886 .ks(3)
3887 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3888 }
3889 }
3890 }
3891
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,n_div_8_small_kernel)3892 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, n_div_8_small_kernel) {
3893 for (uint32_t n = 16; n <= 24; n += 8) {
3894 for (size_t k = 1; k <= 20; k += 5) {
3895 GemmMicrokernelTester()
3896 .mr(1)
3897 .nr(8)
3898 .kr(1)
3899 .sr(4)
3900 .m(1)
3901 .n(n)
3902 .k(k)
3903 .ks(3)
3904 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3905 }
3906 }
3907 }
3908
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)3909 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
3910 for (size_t k = 1; k <= 20; k += 5) {
3911 for (uint32_t n = 1; n <= 8; n++) {
3912 for (uint32_t m = 1; m <= 1; m++) {
3913 GemmMicrokernelTester()
3914 .mr(1)
3915 .nr(8)
3916 .kr(1)
3917 .sr(4)
3918 .m(m)
3919 .n(n)
3920 .k(k)
3921 .cm_stride(11)
3922 .iterations(1)
3923 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3924 }
3925 }
3926 }
3927 }
3928
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,a_offset)3929 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, a_offset) {
3930 for (size_t k = 1; k <= 20; k += 5) {
3931 GemmMicrokernelTester()
3932 .mr(1)
3933 .nr(8)
3934 .kr(1)
3935 .sr(4)
3936 .m(1)
3937 .n(8)
3938 .k(k)
3939 .ks(3)
3940 .a_offset(23)
3941 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3942 }
3943 }
3944
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,zero)3945 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, zero) {
3946 for (size_t k = 1; k <= 20; k += 5) {
3947 for (uint32_t mz = 0; mz < 1; mz++) {
3948 GemmMicrokernelTester()
3949 .mr(1)
3950 .nr(8)
3951 .kr(1)
3952 .sr(4)
3953 .m(1)
3954 .n(8)
3955 .k(k)
3956 .ks(3)
3957 .a_offset(23)
3958 .zero_index(mz)
3959 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3960 }
3961 }
3962 }
3963
TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA,strided_cm)3964 TEST(F32_IGEMM_RELU_1X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
3965 GemmMicrokernelTester()
3966 .mr(1)
3967 .nr(8)
3968 .kr(1)
3969 .sr(4)
3970 .m(1)
3971 .n(8)
3972 .k(4)
3973 .cm_stride(11)
3974 .Test(xnn_f32_igemm_relu_ukernel_1x8s4__wasmrelaxedsimd_fma);
3975 }
3976 #endif // XNN_ARCH_WASMRELAXEDSIMD
3977
3978
3979 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)3980 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
3981 GemmMicrokernelTester()
3982 .mr(3)
3983 .nr(8)
3984 .kr(1)
3985 .sr(1)
3986 .m(3)
3987 .n(8)
3988 .k(4)
3989 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
3990 }
3991
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)3992 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
3993 GemmMicrokernelTester()
3994 .mr(3)
3995 .nr(8)
3996 .kr(1)
3997 .sr(1)
3998 .m(3)
3999 .n(8)
4000 .k(4)
4001 .cn_stride(11)
4002 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4003 }
4004
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)4005 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
4006 for (uint32_t n = 1; n <= 8; n++) {
4007 for (uint32_t m = 1; m <= 3; m++) {
4008 GemmMicrokernelTester()
4009 .mr(3)
4010 .nr(8)
4011 .kr(1)
4012 .sr(1)
4013 .m(m)
4014 .n(n)
4015 .k(4)
4016 .iterations(1)
4017 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4018 }
4019 }
4020 }
4021
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)4022 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
4023 for (uint32_t m = 1; m <= 3; m++) {
4024 GemmMicrokernelTester()
4025 .mr(3)
4026 .nr(8)
4027 .kr(1)
4028 .sr(1)
4029 .m(m)
4030 .n(8)
4031 .k(4)
4032 .iterations(1)
4033 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4034 }
4035 }
4036
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)4037 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
4038 for (uint32_t n = 1; n <= 8; n++) {
4039 GemmMicrokernelTester()
4040 .mr(3)
4041 .nr(8)
4042 .kr(1)
4043 .sr(1)
4044 .m(3)
4045 .n(n)
4046 .k(4)
4047 .iterations(1)
4048 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4049 }
4050 }
4051
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)4052 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
4053 for (size_t k = 1; k < 4; k++) {
4054 GemmMicrokernelTester()
4055 .mr(3)
4056 .nr(8)
4057 .kr(1)
4058 .sr(1)
4059 .m(3)
4060 .n(8)
4061 .k(k)
4062 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4063 }
4064 }
4065
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)4066 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
4067 for (size_t k = 1; k < 4; k++) {
4068 for (uint32_t n = 1; n <= 8; n++) {
4069 for (uint32_t m = 1; m <= 3; m++) {
4070 GemmMicrokernelTester()
4071 .mr(3)
4072 .nr(8)
4073 .kr(1)
4074 .sr(1)
4075 .m(m)
4076 .n(n)
4077 .k(k)
4078 .iterations(1)
4079 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4080 }
4081 }
4082 }
4083 }
4084
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)4085 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
4086 for (size_t k = 5; k < 8; k++) {
4087 GemmMicrokernelTester()
4088 .mr(3)
4089 .nr(8)
4090 .kr(1)
4091 .sr(1)
4092 .m(3)
4093 .n(8)
4094 .k(k)
4095 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4096 }
4097 }
4098
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)4099 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
4100 for (size_t k = 5; k < 8; k++) {
4101 for (uint32_t n = 1; n <= 8; n++) {
4102 for (uint32_t m = 1; m <= 3; m++) {
4103 GemmMicrokernelTester()
4104 .mr(3)
4105 .nr(8)
4106 .kr(1)
4107 .sr(1)
4108 .m(m)
4109 .n(n)
4110 .k(k)
4111 .iterations(1)
4112 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4113 }
4114 }
4115 }
4116 }
4117
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)4118 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
4119 for (size_t k = 8; k <= 40; k += 4) {
4120 GemmMicrokernelTester()
4121 .mr(3)
4122 .nr(8)
4123 .kr(1)
4124 .sr(1)
4125 .m(3)
4126 .n(8)
4127 .k(k)
4128 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4129 }
4130 }
4131
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)4132 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
4133 for (size_t k = 8; k <= 40; k += 4) {
4134 for (uint32_t n = 1; n <= 8; n++) {
4135 for (uint32_t m = 1; m <= 3; m++) {
4136 GemmMicrokernelTester()
4137 .mr(3)
4138 .nr(8)
4139 .kr(1)
4140 .sr(1)
4141 .m(m)
4142 .n(n)
4143 .k(k)
4144 .iterations(1)
4145 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4146 }
4147 }
4148 }
4149 }
4150
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)4151 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
4152 for (uint32_t n = 9; n < 16; n++) {
4153 for (size_t k = 1; k <= 20; k += 5) {
4154 GemmMicrokernelTester()
4155 .mr(3)
4156 .nr(8)
4157 .kr(1)
4158 .sr(1)
4159 .m(3)
4160 .n(n)
4161 .k(k)
4162 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4163 }
4164 }
4165 }
4166
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)4167 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
4168 for (uint32_t n = 9; n < 16; n++) {
4169 for (size_t k = 1; k <= 20; k += 5) {
4170 GemmMicrokernelTester()
4171 .mr(3)
4172 .nr(8)
4173 .kr(1)
4174 .sr(1)
4175 .m(3)
4176 .n(n)
4177 .k(k)
4178 .cn_stride(11)
4179 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4180 }
4181 }
4182 }
4183
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)4184 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
4185 for (uint32_t n = 9; n < 16; n++) {
4186 for (size_t k = 1; k <= 20; k += 5) {
4187 for (uint32_t m = 1; m <= 3; m++) {
4188 GemmMicrokernelTester()
4189 .mr(3)
4190 .nr(8)
4191 .kr(1)
4192 .sr(1)
4193 .m(m)
4194 .n(n)
4195 .k(k)
4196 .iterations(1)
4197 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4198 }
4199 }
4200 }
4201 }
4202
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)4203 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
4204 for (uint32_t n = 16; n <= 24; n += 8) {
4205 for (size_t k = 1; k <= 20; k += 5) {
4206 GemmMicrokernelTester()
4207 .mr(3)
4208 .nr(8)
4209 .kr(1)
4210 .sr(1)
4211 .m(3)
4212 .n(n)
4213 .k(k)
4214 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4215 }
4216 }
4217 }
4218
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)4219 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
4220 for (uint32_t n = 16; n <= 24; n += 8) {
4221 for (size_t k = 1; k <= 20; k += 5) {
4222 GemmMicrokernelTester()
4223 .mr(3)
4224 .nr(8)
4225 .kr(1)
4226 .sr(1)
4227 .m(3)
4228 .n(n)
4229 .k(k)
4230 .cn_stride(11)
4231 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4232 }
4233 }
4234 }
4235
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)4236 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
4237 for (uint32_t n = 16; n <= 24; n += 8) {
4238 for (size_t k = 1; k <= 20; k += 5) {
4239 for (uint32_t m = 1; m <= 3; m++) {
4240 GemmMicrokernelTester()
4241 .mr(3)
4242 .nr(8)
4243 .kr(1)
4244 .sr(1)
4245 .m(m)
4246 .n(n)
4247 .k(k)
4248 .iterations(1)
4249 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4250 }
4251 }
4252 }
4253 }
4254
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel)4255 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel) {
4256 for (size_t k = 1; k <= 20; k += 5) {
4257 GemmMicrokernelTester()
4258 .mr(3)
4259 .nr(8)
4260 .kr(1)
4261 .sr(1)
4262 .m(3)
4263 .n(8)
4264 .k(k)
4265 .ks(3)
4266 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4267 }
4268 }
4269
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel_subtile)4270 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel_subtile) {
4271 for (size_t k = 1; k <= 20; k += 5) {
4272 for (uint32_t n = 1; n <= 8; n++) {
4273 for (uint32_t m = 1; m <= 3; m++) {
4274 GemmMicrokernelTester()
4275 .mr(3)
4276 .nr(8)
4277 .kr(1)
4278 .sr(1)
4279 .m(m)
4280 .n(n)
4281 .k(k)
4282 .ks(3)
4283 .iterations(1)
4284 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4285 }
4286 }
4287 }
4288 }
4289
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_small_kernel)4290 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_small_kernel) {
4291 for (uint32_t n = 9; n < 16; n++) {
4292 for (size_t k = 1; k <= 20; k += 5) {
4293 GemmMicrokernelTester()
4294 .mr(3)
4295 .nr(8)
4296 .kr(1)
4297 .sr(1)
4298 .m(3)
4299 .n(n)
4300 .k(k)
4301 .ks(3)
4302 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4303 }
4304 }
4305 }
4306
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_small_kernel)4307 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_small_kernel) {
4308 for (uint32_t n = 16; n <= 24; n += 8) {
4309 for (size_t k = 1; k <= 20; k += 5) {
4310 GemmMicrokernelTester()
4311 .mr(3)
4312 .nr(8)
4313 .kr(1)
4314 .sr(1)
4315 .m(3)
4316 .n(n)
4317 .k(k)
4318 .ks(3)
4319 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4320 }
4321 }
4322 }
4323
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)4324 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
4325 for (size_t k = 1; k <= 20; k += 5) {
4326 for (uint32_t n = 1; n <= 8; n++) {
4327 for (uint32_t m = 1; m <= 3; m++) {
4328 GemmMicrokernelTester()
4329 .mr(3)
4330 .nr(8)
4331 .kr(1)
4332 .sr(1)
4333 .m(m)
4334 .n(n)
4335 .k(k)
4336 .cm_stride(11)
4337 .iterations(1)
4338 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4339 }
4340 }
4341 }
4342 }
4343
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,a_offset)4344 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, a_offset) {
4345 for (size_t k = 1; k <= 20; k += 5) {
4346 GemmMicrokernelTester()
4347 .mr(3)
4348 .nr(8)
4349 .kr(1)
4350 .sr(1)
4351 .m(3)
4352 .n(8)
4353 .k(k)
4354 .ks(3)
4355 .a_offset(67)
4356 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4357 }
4358 }
4359
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,zero)4360 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, zero) {
4361 for (size_t k = 1; k <= 20; k += 5) {
4362 for (uint32_t mz = 0; mz < 3; mz++) {
4363 GemmMicrokernelTester()
4364 .mr(3)
4365 .nr(8)
4366 .kr(1)
4367 .sr(1)
4368 .m(3)
4369 .n(8)
4370 .k(k)
4371 .ks(3)
4372 .a_offset(67)
4373 .zero_index(mz)
4374 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4375 }
4376 }
4377 }
4378
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)4379 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
4380 GemmMicrokernelTester()
4381 .mr(3)
4382 .nr(8)
4383 .kr(1)
4384 .sr(1)
4385 .m(3)
4386 .n(8)
4387 .k(4)
4388 .cm_stride(11)
4389 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_splat);
4390 }
4391 #endif // XNN_ARCH_WASMRELAXEDSIMD
4392
4393
4394 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,k_eq_4)4395 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, k_eq_4) {
4396 GemmMicrokernelTester()
4397 .mr(4)
4398 .nr(2)
4399 .kr(4)
4400 .sr(1)
4401 .m(4)
4402 .n(2)
4403 .k(4)
4404 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4405 }
4406
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,strided_cn)4407 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, strided_cn) {
4408 GemmMicrokernelTester()
4409 .mr(4)
4410 .nr(2)
4411 .kr(4)
4412 .sr(1)
4413 .m(4)
4414 .n(2)
4415 .k(4)
4416 .cn_stride(5)
4417 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4418 }
4419
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)4420 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
4421 for (uint32_t n = 1; n <= 2; n++) {
4422 for (uint32_t m = 1; m <= 4; m++) {
4423 GemmMicrokernelTester()
4424 .mr(4)
4425 .nr(2)
4426 .kr(4)
4427 .sr(1)
4428 .m(m)
4429 .n(n)
4430 .k(4)
4431 .iterations(1)
4432 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4433 }
4434 }
4435 }
4436
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)4437 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
4438 for (uint32_t m = 1; m <= 4; m++) {
4439 GemmMicrokernelTester()
4440 .mr(4)
4441 .nr(2)
4442 .kr(4)
4443 .sr(1)
4444 .m(m)
4445 .n(2)
4446 .k(4)
4447 .iterations(1)
4448 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4449 }
4450 }
4451
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)4452 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
4453 for (uint32_t n = 1; n <= 2; n++) {
4454 GemmMicrokernelTester()
4455 .mr(4)
4456 .nr(2)
4457 .kr(4)
4458 .sr(1)
4459 .m(4)
4460 .n(n)
4461 .k(4)
4462 .iterations(1)
4463 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4464 }
4465 }
4466
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,k_lt_4)4467 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, k_lt_4) {
4468 for (size_t k = 1; k < 4; k++) {
4469 GemmMicrokernelTester()
4470 .mr(4)
4471 .nr(2)
4472 .kr(4)
4473 .sr(1)
4474 .m(4)
4475 .n(2)
4476 .k(k)
4477 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4478 }
4479 }
4480
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)4481 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
4482 for (size_t k = 1; k < 4; k++) {
4483 for (uint32_t n = 1; n <= 2; n++) {
4484 for (uint32_t m = 1; m <= 4; m++) {
4485 GemmMicrokernelTester()
4486 .mr(4)
4487 .nr(2)
4488 .kr(4)
4489 .sr(1)
4490 .m(m)
4491 .n(n)
4492 .k(k)
4493 .iterations(1)
4494 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4495 }
4496 }
4497 }
4498 }
4499
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,k_gt_4)4500 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, k_gt_4) {
4501 for (size_t k = 5; k < 8; k++) {
4502 GemmMicrokernelTester()
4503 .mr(4)
4504 .nr(2)
4505 .kr(4)
4506 .sr(1)
4507 .m(4)
4508 .n(2)
4509 .k(k)
4510 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4511 }
4512 }
4513
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)4514 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
4515 for (size_t k = 5; k < 8; k++) {
4516 for (uint32_t n = 1; n <= 2; n++) {
4517 for (uint32_t m = 1; m <= 4; m++) {
4518 GemmMicrokernelTester()
4519 .mr(4)
4520 .nr(2)
4521 .kr(4)
4522 .sr(1)
4523 .m(m)
4524 .n(n)
4525 .k(k)
4526 .iterations(1)
4527 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4528 }
4529 }
4530 }
4531 }
4532
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,k_div_4)4533 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, k_div_4) {
4534 for (size_t k = 8; k <= 40; k += 4) {
4535 GemmMicrokernelTester()
4536 .mr(4)
4537 .nr(2)
4538 .kr(4)
4539 .sr(1)
4540 .m(4)
4541 .n(2)
4542 .k(k)
4543 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4544 }
4545 }
4546
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)4547 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
4548 for (size_t k = 8; k <= 40; k += 4) {
4549 for (uint32_t n = 1; n <= 2; n++) {
4550 for (uint32_t m = 1; m <= 4; m++) {
4551 GemmMicrokernelTester()
4552 .mr(4)
4553 .nr(2)
4554 .kr(4)
4555 .sr(1)
4556 .m(m)
4557 .n(n)
4558 .k(k)
4559 .iterations(1)
4560 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4561 }
4562 }
4563 }
4564 }
4565
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,n_gt_2)4566 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, n_gt_2) {
4567 for (uint32_t n = 3; n < 4; n++) {
4568 for (size_t k = 1; k <= 20; k += 5) {
4569 GemmMicrokernelTester()
4570 .mr(4)
4571 .nr(2)
4572 .kr(4)
4573 .sr(1)
4574 .m(4)
4575 .n(n)
4576 .k(k)
4577 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4578 }
4579 }
4580 }
4581
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,n_gt_2_strided_cn)4582 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, n_gt_2_strided_cn) {
4583 for (uint32_t n = 3; n < 4; n++) {
4584 for (size_t k = 1; k <= 20; k += 5) {
4585 GemmMicrokernelTester()
4586 .mr(4)
4587 .nr(2)
4588 .kr(4)
4589 .sr(1)
4590 .m(4)
4591 .n(n)
4592 .k(k)
4593 .cn_stride(5)
4594 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4595 }
4596 }
4597 }
4598
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,n_gt_2_subtile)4599 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, n_gt_2_subtile) {
4600 for (uint32_t n = 3; n < 4; n++) {
4601 for (size_t k = 1; k <= 20; k += 5) {
4602 for (uint32_t m = 1; m <= 4; m++) {
4603 GemmMicrokernelTester()
4604 .mr(4)
4605 .nr(2)
4606 .kr(4)
4607 .sr(1)
4608 .m(m)
4609 .n(n)
4610 .k(k)
4611 .iterations(1)
4612 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4613 }
4614 }
4615 }
4616 }
4617
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,n_div_2)4618 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, n_div_2) {
4619 for (uint32_t n = 4; n <= 6; n += 2) {
4620 for (size_t k = 1; k <= 20; k += 5) {
4621 GemmMicrokernelTester()
4622 .mr(4)
4623 .nr(2)
4624 .kr(4)
4625 .sr(1)
4626 .m(4)
4627 .n(n)
4628 .k(k)
4629 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4630 }
4631 }
4632 }
4633
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,n_div_2_strided_cn)4634 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, n_div_2_strided_cn) {
4635 for (uint32_t n = 4; n <= 6; n += 2) {
4636 for (size_t k = 1; k <= 20; k += 5) {
4637 GemmMicrokernelTester()
4638 .mr(4)
4639 .nr(2)
4640 .kr(4)
4641 .sr(1)
4642 .m(4)
4643 .n(n)
4644 .k(k)
4645 .cn_stride(5)
4646 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4647 }
4648 }
4649 }
4650
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,n_div_2_subtile)4651 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, n_div_2_subtile) {
4652 for (uint32_t n = 4; n <= 6; n += 2) {
4653 for (size_t k = 1; k <= 20; k += 5) {
4654 for (uint32_t m = 1; m <= 4; m++) {
4655 GemmMicrokernelTester()
4656 .mr(4)
4657 .nr(2)
4658 .kr(4)
4659 .sr(1)
4660 .m(m)
4661 .n(n)
4662 .k(k)
4663 .iterations(1)
4664 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4665 }
4666 }
4667 }
4668 }
4669
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,small_kernel)4670 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, small_kernel) {
4671 for (size_t k = 1; k <= 20; k += 5) {
4672 GemmMicrokernelTester()
4673 .mr(4)
4674 .nr(2)
4675 .kr(4)
4676 .sr(1)
4677 .m(4)
4678 .n(2)
4679 .k(k)
4680 .ks(3)
4681 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4682 }
4683 }
4684
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,small_kernel_subtile)4685 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, small_kernel_subtile) {
4686 for (size_t k = 1; k <= 20; k += 5) {
4687 for (uint32_t n = 1; n <= 2; n++) {
4688 for (uint32_t m = 1; m <= 4; m++) {
4689 GemmMicrokernelTester()
4690 .mr(4)
4691 .nr(2)
4692 .kr(4)
4693 .sr(1)
4694 .m(m)
4695 .n(n)
4696 .k(k)
4697 .ks(3)
4698 .iterations(1)
4699 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4700 }
4701 }
4702 }
4703 }
4704
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,n_gt_2_small_kernel)4705 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, n_gt_2_small_kernel) {
4706 for (uint32_t n = 3; n < 4; n++) {
4707 for (size_t k = 1; k <= 20; k += 5) {
4708 GemmMicrokernelTester()
4709 .mr(4)
4710 .nr(2)
4711 .kr(4)
4712 .sr(1)
4713 .m(4)
4714 .n(n)
4715 .k(k)
4716 .ks(3)
4717 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4718 }
4719 }
4720 }
4721
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,n_div_2_small_kernel)4722 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, n_div_2_small_kernel) {
4723 for (uint32_t n = 4; n <= 6; n += 2) {
4724 for (size_t k = 1; k <= 20; k += 5) {
4725 GemmMicrokernelTester()
4726 .mr(4)
4727 .nr(2)
4728 .kr(4)
4729 .sr(1)
4730 .m(4)
4731 .n(n)
4732 .k(k)
4733 .ks(3)
4734 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4735 }
4736 }
4737 }
4738
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)4739 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
4740 for (size_t k = 1; k <= 20; k += 5) {
4741 for (uint32_t n = 1; n <= 2; n++) {
4742 for (uint32_t m = 1; m <= 4; m++) {
4743 GemmMicrokernelTester()
4744 .mr(4)
4745 .nr(2)
4746 .kr(4)
4747 .sr(1)
4748 .m(m)
4749 .n(n)
4750 .k(k)
4751 .cm_stride(5)
4752 .iterations(1)
4753 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4754 }
4755 }
4756 }
4757 }
4758
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,a_offset)4759 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, a_offset) {
4760 for (size_t k = 1; k <= 20; k += 5) {
4761 GemmMicrokernelTester()
4762 .mr(4)
4763 .nr(2)
4764 .kr(4)
4765 .sr(1)
4766 .m(4)
4767 .n(2)
4768 .k(k)
4769 .ks(3)
4770 .a_offset(83)
4771 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4772 }
4773 }
4774
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,zero)4775 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, zero) {
4776 for (size_t k = 1; k <= 20; k += 5) {
4777 for (uint32_t mz = 0; mz < 4; mz++) {
4778 GemmMicrokernelTester()
4779 .mr(4)
4780 .nr(2)
4781 .kr(4)
4782 .sr(1)
4783 .m(4)
4784 .n(2)
4785 .k(k)
4786 .ks(3)
4787 .a_offset(83)
4788 .zero_index(mz)
4789 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4790 }
4791 }
4792 }
4793
TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA,strided_cm)4794 TEST(F32_IGEMM_RELU_4X2C4__WASMRELAXEDSIMD_FMA, strided_cm) {
4795 GemmMicrokernelTester()
4796 .mr(4)
4797 .nr(2)
4798 .kr(4)
4799 .sr(1)
4800 .m(4)
4801 .n(2)
4802 .k(4)
4803 .cm_stride(5)
4804 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma);
4805 }
4806 #endif // XNN_ARCH_WASMRELAXEDSIMD
4807
4808
4809 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)4810 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
4811 GemmMicrokernelTester()
4812 .mr(4)
4813 .nr(8)
4814 .kr(1)
4815 .sr(1)
4816 .m(4)
4817 .n(8)
4818 .k(4)
4819 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4820 }
4821
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)4822 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
4823 GemmMicrokernelTester()
4824 .mr(4)
4825 .nr(8)
4826 .kr(1)
4827 .sr(1)
4828 .m(4)
4829 .n(8)
4830 .k(4)
4831 .cn_stride(11)
4832 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4833 }
4834
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)4835 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
4836 for (uint32_t n = 1; n <= 8; n++) {
4837 for (uint32_t m = 1; m <= 4; m++) {
4838 GemmMicrokernelTester()
4839 .mr(4)
4840 .nr(8)
4841 .kr(1)
4842 .sr(1)
4843 .m(m)
4844 .n(n)
4845 .k(4)
4846 .iterations(1)
4847 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4848 }
4849 }
4850 }
4851
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)4852 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
4853 for (uint32_t m = 1; m <= 4; m++) {
4854 GemmMicrokernelTester()
4855 .mr(4)
4856 .nr(8)
4857 .kr(1)
4858 .sr(1)
4859 .m(m)
4860 .n(8)
4861 .k(4)
4862 .iterations(1)
4863 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4864 }
4865 }
4866
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)4867 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
4868 for (uint32_t n = 1; n <= 8; n++) {
4869 GemmMicrokernelTester()
4870 .mr(4)
4871 .nr(8)
4872 .kr(1)
4873 .sr(1)
4874 .m(4)
4875 .n(n)
4876 .k(4)
4877 .iterations(1)
4878 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4879 }
4880 }
4881
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)4882 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
4883 for (size_t k = 1; k < 4; k++) {
4884 GemmMicrokernelTester()
4885 .mr(4)
4886 .nr(8)
4887 .kr(1)
4888 .sr(1)
4889 .m(4)
4890 .n(8)
4891 .k(k)
4892 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4893 }
4894 }
4895
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)4896 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
4897 for (size_t k = 1; k < 4; k++) {
4898 for (uint32_t n = 1; n <= 8; n++) {
4899 for (uint32_t m = 1; m <= 4; m++) {
4900 GemmMicrokernelTester()
4901 .mr(4)
4902 .nr(8)
4903 .kr(1)
4904 .sr(1)
4905 .m(m)
4906 .n(n)
4907 .k(k)
4908 .iterations(1)
4909 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4910 }
4911 }
4912 }
4913 }
4914
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)4915 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
4916 for (size_t k = 5; k < 8; k++) {
4917 GemmMicrokernelTester()
4918 .mr(4)
4919 .nr(8)
4920 .kr(1)
4921 .sr(1)
4922 .m(4)
4923 .n(8)
4924 .k(k)
4925 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4926 }
4927 }
4928
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)4929 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
4930 for (size_t k = 5; k < 8; k++) {
4931 for (uint32_t n = 1; n <= 8; n++) {
4932 for (uint32_t m = 1; m <= 4; m++) {
4933 GemmMicrokernelTester()
4934 .mr(4)
4935 .nr(8)
4936 .kr(1)
4937 .sr(1)
4938 .m(m)
4939 .n(n)
4940 .k(k)
4941 .iterations(1)
4942 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4943 }
4944 }
4945 }
4946 }
4947
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)4948 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
4949 for (size_t k = 8; k <= 40; k += 4) {
4950 GemmMicrokernelTester()
4951 .mr(4)
4952 .nr(8)
4953 .kr(1)
4954 .sr(1)
4955 .m(4)
4956 .n(8)
4957 .k(k)
4958 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4959 }
4960 }
4961
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)4962 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
4963 for (size_t k = 8; k <= 40; k += 4) {
4964 for (uint32_t n = 1; n <= 8; n++) {
4965 for (uint32_t m = 1; m <= 4; m++) {
4966 GemmMicrokernelTester()
4967 .mr(4)
4968 .nr(8)
4969 .kr(1)
4970 .sr(1)
4971 .m(m)
4972 .n(n)
4973 .k(k)
4974 .iterations(1)
4975 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4976 }
4977 }
4978 }
4979 }
4980
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)4981 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
4982 for (uint32_t n = 9; n < 16; n++) {
4983 for (size_t k = 1; k <= 20; k += 5) {
4984 GemmMicrokernelTester()
4985 .mr(4)
4986 .nr(8)
4987 .kr(1)
4988 .sr(1)
4989 .m(4)
4990 .n(n)
4991 .k(k)
4992 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
4993 }
4994 }
4995 }
4996
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)4997 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
4998 for (uint32_t n = 9; n < 16; n++) {
4999 for (size_t k = 1; k <= 20; k += 5) {
5000 GemmMicrokernelTester()
5001 .mr(4)
5002 .nr(8)
5003 .kr(1)
5004 .sr(1)
5005 .m(4)
5006 .n(n)
5007 .k(k)
5008 .cn_stride(11)
5009 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5010 }
5011 }
5012 }
5013
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)5014 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
5015 for (uint32_t n = 9; n < 16; n++) {
5016 for (size_t k = 1; k <= 20; k += 5) {
5017 for (uint32_t m = 1; m <= 4; m++) {
5018 GemmMicrokernelTester()
5019 .mr(4)
5020 .nr(8)
5021 .kr(1)
5022 .sr(1)
5023 .m(m)
5024 .n(n)
5025 .k(k)
5026 .iterations(1)
5027 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5028 }
5029 }
5030 }
5031 }
5032
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)5033 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
5034 for (uint32_t n = 16; n <= 24; n += 8) {
5035 for (size_t k = 1; k <= 20; k += 5) {
5036 GemmMicrokernelTester()
5037 .mr(4)
5038 .nr(8)
5039 .kr(1)
5040 .sr(1)
5041 .m(4)
5042 .n(n)
5043 .k(k)
5044 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5045 }
5046 }
5047 }
5048
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)5049 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
5050 for (uint32_t n = 16; n <= 24; n += 8) {
5051 for (size_t k = 1; k <= 20; k += 5) {
5052 GemmMicrokernelTester()
5053 .mr(4)
5054 .nr(8)
5055 .kr(1)
5056 .sr(1)
5057 .m(4)
5058 .n(n)
5059 .k(k)
5060 .cn_stride(11)
5061 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5062 }
5063 }
5064 }
5065
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)5066 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
5067 for (uint32_t n = 16; n <= 24; n += 8) {
5068 for (size_t k = 1; k <= 20; k += 5) {
5069 for (uint32_t m = 1; m <= 4; m++) {
5070 GemmMicrokernelTester()
5071 .mr(4)
5072 .nr(8)
5073 .kr(1)
5074 .sr(1)
5075 .m(m)
5076 .n(n)
5077 .k(k)
5078 .iterations(1)
5079 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5080 }
5081 }
5082 }
5083 }
5084
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel)5085 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel) {
5086 for (size_t k = 1; k <= 20; k += 5) {
5087 GemmMicrokernelTester()
5088 .mr(4)
5089 .nr(8)
5090 .kr(1)
5091 .sr(1)
5092 .m(4)
5093 .n(8)
5094 .k(k)
5095 .ks(3)
5096 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5097 }
5098 }
5099
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel_subtile)5100 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel_subtile) {
5101 for (size_t k = 1; k <= 20; k += 5) {
5102 for (uint32_t n = 1; n <= 8; n++) {
5103 for (uint32_t m = 1; m <= 4; m++) {
5104 GemmMicrokernelTester()
5105 .mr(4)
5106 .nr(8)
5107 .kr(1)
5108 .sr(1)
5109 .m(m)
5110 .n(n)
5111 .k(k)
5112 .ks(3)
5113 .iterations(1)
5114 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5115 }
5116 }
5117 }
5118 }
5119
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_small_kernel)5120 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_small_kernel) {
5121 for (uint32_t n = 9; n < 16; n++) {
5122 for (size_t k = 1; k <= 20; k += 5) {
5123 GemmMicrokernelTester()
5124 .mr(4)
5125 .nr(8)
5126 .kr(1)
5127 .sr(1)
5128 .m(4)
5129 .n(n)
5130 .k(k)
5131 .ks(3)
5132 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5133 }
5134 }
5135 }
5136
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_small_kernel)5137 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_small_kernel) {
5138 for (uint32_t n = 16; n <= 24; n += 8) {
5139 for (size_t k = 1; k <= 20; k += 5) {
5140 GemmMicrokernelTester()
5141 .mr(4)
5142 .nr(8)
5143 .kr(1)
5144 .sr(1)
5145 .m(4)
5146 .n(n)
5147 .k(k)
5148 .ks(3)
5149 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5150 }
5151 }
5152 }
5153
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)5154 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
5155 for (size_t k = 1; k <= 20; k += 5) {
5156 for (uint32_t n = 1; n <= 8; n++) {
5157 for (uint32_t m = 1; m <= 4; m++) {
5158 GemmMicrokernelTester()
5159 .mr(4)
5160 .nr(8)
5161 .kr(1)
5162 .sr(1)
5163 .m(m)
5164 .n(n)
5165 .k(k)
5166 .cm_stride(11)
5167 .iterations(1)
5168 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5169 }
5170 }
5171 }
5172 }
5173
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,a_offset)5174 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, a_offset) {
5175 for (size_t k = 1; k <= 20; k += 5) {
5176 GemmMicrokernelTester()
5177 .mr(4)
5178 .nr(8)
5179 .kr(1)
5180 .sr(1)
5181 .m(4)
5182 .n(8)
5183 .k(k)
5184 .ks(3)
5185 .a_offset(83)
5186 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5187 }
5188 }
5189
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,zero)5190 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, zero) {
5191 for (size_t k = 1; k <= 20; k += 5) {
5192 for (uint32_t mz = 0; mz < 4; mz++) {
5193 GemmMicrokernelTester()
5194 .mr(4)
5195 .nr(8)
5196 .kr(1)
5197 .sr(1)
5198 .m(4)
5199 .n(8)
5200 .k(k)
5201 .ks(3)
5202 .a_offset(83)
5203 .zero_index(mz)
5204 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5205 }
5206 }
5207 }
5208
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)5209 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
5210 GemmMicrokernelTester()
5211 .mr(4)
5212 .nr(8)
5213 .kr(1)
5214 .sr(1)
5215 .m(4)
5216 .n(8)
5217 .k(4)
5218 .cm_stride(11)
5219 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_splat);
5220 }
5221 #endif // XNN_ARCH_WASMRELAXEDSIMD
5222
5223
5224 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)5225 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
5226 GemmMicrokernelTester()
5227 .mr(5)
5228 .nr(8)
5229 .kr(1)
5230 .sr(1)
5231 .m(5)
5232 .n(8)
5233 .k(4)
5234 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5235 }
5236
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)5237 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
5238 GemmMicrokernelTester()
5239 .mr(5)
5240 .nr(8)
5241 .kr(1)
5242 .sr(1)
5243 .m(5)
5244 .n(8)
5245 .k(4)
5246 .cn_stride(11)
5247 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5248 }
5249
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)5250 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
5251 for (uint32_t n = 1; n <= 8; n++) {
5252 for (uint32_t m = 1; m <= 5; m++) {
5253 GemmMicrokernelTester()
5254 .mr(5)
5255 .nr(8)
5256 .kr(1)
5257 .sr(1)
5258 .m(m)
5259 .n(n)
5260 .k(4)
5261 .iterations(1)
5262 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5263 }
5264 }
5265 }
5266
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)5267 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
5268 for (uint32_t m = 1; m <= 5; m++) {
5269 GemmMicrokernelTester()
5270 .mr(5)
5271 .nr(8)
5272 .kr(1)
5273 .sr(1)
5274 .m(m)
5275 .n(8)
5276 .k(4)
5277 .iterations(1)
5278 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5279 }
5280 }
5281
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)5282 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
5283 for (uint32_t n = 1; n <= 8; n++) {
5284 GemmMicrokernelTester()
5285 .mr(5)
5286 .nr(8)
5287 .kr(1)
5288 .sr(1)
5289 .m(5)
5290 .n(n)
5291 .k(4)
5292 .iterations(1)
5293 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5294 }
5295 }
5296
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)5297 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
5298 for (size_t k = 1; k < 4; k++) {
5299 GemmMicrokernelTester()
5300 .mr(5)
5301 .nr(8)
5302 .kr(1)
5303 .sr(1)
5304 .m(5)
5305 .n(8)
5306 .k(k)
5307 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5308 }
5309 }
5310
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)5311 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
5312 for (size_t k = 1; k < 4; k++) {
5313 for (uint32_t n = 1; n <= 8; n++) {
5314 for (uint32_t m = 1; m <= 5; m++) {
5315 GemmMicrokernelTester()
5316 .mr(5)
5317 .nr(8)
5318 .kr(1)
5319 .sr(1)
5320 .m(m)
5321 .n(n)
5322 .k(k)
5323 .iterations(1)
5324 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5325 }
5326 }
5327 }
5328 }
5329
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)5330 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
5331 for (size_t k = 5; k < 8; k++) {
5332 GemmMicrokernelTester()
5333 .mr(5)
5334 .nr(8)
5335 .kr(1)
5336 .sr(1)
5337 .m(5)
5338 .n(8)
5339 .k(k)
5340 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5341 }
5342 }
5343
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)5344 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
5345 for (size_t k = 5; k < 8; k++) {
5346 for (uint32_t n = 1; n <= 8; n++) {
5347 for (uint32_t m = 1; m <= 5; m++) {
5348 GemmMicrokernelTester()
5349 .mr(5)
5350 .nr(8)
5351 .kr(1)
5352 .sr(1)
5353 .m(m)
5354 .n(n)
5355 .k(k)
5356 .iterations(1)
5357 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5358 }
5359 }
5360 }
5361 }
5362
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)5363 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
5364 for (size_t k = 8; k <= 40; k += 4) {
5365 GemmMicrokernelTester()
5366 .mr(5)
5367 .nr(8)
5368 .kr(1)
5369 .sr(1)
5370 .m(5)
5371 .n(8)
5372 .k(k)
5373 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5374 }
5375 }
5376
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)5377 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
5378 for (size_t k = 8; k <= 40; k += 4) {
5379 for (uint32_t n = 1; n <= 8; n++) {
5380 for (uint32_t m = 1; m <= 5; m++) {
5381 GemmMicrokernelTester()
5382 .mr(5)
5383 .nr(8)
5384 .kr(1)
5385 .sr(1)
5386 .m(m)
5387 .n(n)
5388 .k(k)
5389 .iterations(1)
5390 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5391 }
5392 }
5393 }
5394 }
5395
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)5396 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
5397 for (uint32_t n = 9; n < 16; n++) {
5398 for (size_t k = 1; k <= 20; k += 5) {
5399 GemmMicrokernelTester()
5400 .mr(5)
5401 .nr(8)
5402 .kr(1)
5403 .sr(1)
5404 .m(5)
5405 .n(n)
5406 .k(k)
5407 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5408 }
5409 }
5410 }
5411
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)5412 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
5413 for (uint32_t n = 9; n < 16; n++) {
5414 for (size_t k = 1; k <= 20; k += 5) {
5415 GemmMicrokernelTester()
5416 .mr(5)
5417 .nr(8)
5418 .kr(1)
5419 .sr(1)
5420 .m(5)
5421 .n(n)
5422 .k(k)
5423 .cn_stride(11)
5424 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5425 }
5426 }
5427 }
5428
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)5429 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
5430 for (uint32_t n = 9; n < 16; n++) {
5431 for (size_t k = 1; k <= 20; k += 5) {
5432 for (uint32_t m = 1; m <= 5; m++) {
5433 GemmMicrokernelTester()
5434 .mr(5)
5435 .nr(8)
5436 .kr(1)
5437 .sr(1)
5438 .m(m)
5439 .n(n)
5440 .k(k)
5441 .iterations(1)
5442 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5443 }
5444 }
5445 }
5446 }
5447
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)5448 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
5449 for (uint32_t n = 16; n <= 24; n += 8) {
5450 for (size_t k = 1; k <= 20; k += 5) {
5451 GemmMicrokernelTester()
5452 .mr(5)
5453 .nr(8)
5454 .kr(1)
5455 .sr(1)
5456 .m(5)
5457 .n(n)
5458 .k(k)
5459 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5460 }
5461 }
5462 }
5463
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)5464 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
5465 for (uint32_t n = 16; n <= 24; n += 8) {
5466 for (size_t k = 1; k <= 20; k += 5) {
5467 GemmMicrokernelTester()
5468 .mr(5)
5469 .nr(8)
5470 .kr(1)
5471 .sr(1)
5472 .m(5)
5473 .n(n)
5474 .k(k)
5475 .cn_stride(11)
5476 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5477 }
5478 }
5479 }
5480
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)5481 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
5482 for (uint32_t n = 16; n <= 24; n += 8) {
5483 for (size_t k = 1; k <= 20; k += 5) {
5484 for (uint32_t m = 1; m <= 5; m++) {
5485 GemmMicrokernelTester()
5486 .mr(5)
5487 .nr(8)
5488 .kr(1)
5489 .sr(1)
5490 .m(m)
5491 .n(n)
5492 .k(k)
5493 .iterations(1)
5494 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5495 }
5496 }
5497 }
5498 }
5499
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel)5500 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel) {
5501 for (size_t k = 1; k <= 20; k += 5) {
5502 GemmMicrokernelTester()
5503 .mr(5)
5504 .nr(8)
5505 .kr(1)
5506 .sr(1)
5507 .m(5)
5508 .n(8)
5509 .k(k)
5510 .ks(3)
5511 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5512 }
5513 }
5514
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel_subtile)5515 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel_subtile) {
5516 for (size_t k = 1; k <= 20; k += 5) {
5517 for (uint32_t n = 1; n <= 8; n++) {
5518 for (uint32_t m = 1; m <= 5; m++) {
5519 GemmMicrokernelTester()
5520 .mr(5)
5521 .nr(8)
5522 .kr(1)
5523 .sr(1)
5524 .m(m)
5525 .n(n)
5526 .k(k)
5527 .ks(3)
5528 .iterations(1)
5529 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5530 }
5531 }
5532 }
5533 }
5534
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_small_kernel)5535 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_small_kernel) {
5536 for (uint32_t n = 9; n < 16; n++) {
5537 for (size_t k = 1; k <= 20; k += 5) {
5538 GemmMicrokernelTester()
5539 .mr(5)
5540 .nr(8)
5541 .kr(1)
5542 .sr(1)
5543 .m(5)
5544 .n(n)
5545 .k(k)
5546 .ks(3)
5547 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5548 }
5549 }
5550 }
5551
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_small_kernel)5552 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_small_kernel) {
5553 for (uint32_t n = 16; n <= 24; n += 8) {
5554 for (size_t k = 1; k <= 20; k += 5) {
5555 GemmMicrokernelTester()
5556 .mr(5)
5557 .nr(8)
5558 .kr(1)
5559 .sr(1)
5560 .m(5)
5561 .n(n)
5562 .k(k)
5563 .ks(3)
5564 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5565 }
5566 }
5567 }
5568
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)5569 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
5570 for (size_t k = 1; k <= 20; k += 5) {
5571 for (uint32_t n = 1; n <= 8; n++) {
5572 for (uint32_t m = 1; m <= 5; m++) {
5573 GemmMicrokernelTester()
5574 .mr(5)
5575 .nr(8)
5576 .kr(1)
5577 .sr(1)
5578 .m(m)
5579 .n(n)
5580 .k(k)
5581 .cm_stride(11)
5582 .iterations(1)
5583 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5584 }
5585 }
5586 }
5587 }
5588
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,a_offset)5589 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, a_offset) {
5590 for (size_t k = 1; k <= 20; k += 5) {
5591 GemmMicrokernelTester()
5592 .mr(5)
5593 .nr(8)
5594 .kr(1)
5595 .sr(1)
5596 .m(5)
5597 .n(8)
5598 .k(k)
5599 .ks(3)
5600 .a_offset(103)
5601 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5602 }
5603 }
5604
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,zero)5605 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, zero) {
5606 for (size_t k = 1; k <= 20; k += 5) {
5607 for (uint32_t mz = 0; mz < 5; mz++) {
5608 GemmMicrokernelTester()
5609 .mr(5)
5610 .nr(8)
5611 .kr(1)
5612 .sr(1)
5613 .m(5)
5614 .n(8)
5615 .k(k)
5616 .ks(3)
5617 .a_offset(103)
5618 .zero_index(mz)
5619 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5620 }
5621 }
5622 }
5623
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)5624 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
5625 GemmMicrokernelTester()
5626 .mr(5)
5627 .nr(8)
5628 .kr(1)
5629 .sr(1)
5630 .m(5)
5631 .n(8)
5632 .k(4)
5633 .cm_stride(11)
5634 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_splat);
5635 }
5636 #endif // XNN_ARCH_WASMRELAXEDSIMD
5637
5638
5639 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)5640 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
5641 GemmMicrokernelTester()
5642 .mr(5)
5643 .nr(8)
5644 .kr(1)
5645 .sr(4)
5646 .m(5)
5647 .n(8)
5648 .k(4)
5649 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5650 }
5651
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,strided_cn)5652 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
5653 GemmMicrokernelTester()
5654 .mr(5)
5655 .nr(8)
5656 .kr(1)
5657 .sr(4)
5658 .m(5)
5659 .n(8)
5660 .k(4)
5661 .cn_stride(11)
5662 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5663 }
5664
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)5665 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
5666 for (uint32_t n = 1; n <= 8; n++) {
5667 for (uint32_t m = 1; m <= 5; m++) {
5668 GemmMicrokernelTester()
5669 .mr(5)
5670 .nr(8)
5671 .kr(1)
5672 .sr(4)
5673 .m(m)
5674 .n(n)
5675 .k(4)
5676 .iterations(1)
5677 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5678 }
5679 }
5680 }
5681
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)5682 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
5683 for (uint32_t m = 1; m <= 5; m++) {
5684 GemmMicrokernelTester()
5685 .mr(5)
5686 .nr(8)
5687 .kr(1)
5688 .sr(4)
5689 .m(m)
5690 .n(8)
5691 .k(4)
5692 .iterations(1)
5693 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5694 }
5695 }
5696
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)5697 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
5698 for (uint32_t n = 1; n <= 8; n++) {
5699 GemmMicrokernelTester()
5700 .mr(5)
5701 .nr(8)
5702 .kr(1)
5703 .sr(4)
5704 .m(5)
5705 .n(n)
5706 .k(4)
5707 .iterations(1)
5708 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5709 }
5710 }
5711
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)5712 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
5713 for (size_t k = 1; k < 4; k++) {
5714 GemmMicrokernelTester()
5715 .mr(5)
5716 .nr(8)
5717 .kr(1)
5718 .sr(4)
5719 .m(5)
5720 .n(8)
5721 .k(k)
5722 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5723 }
5724 }
5725
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)5726 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
5727 for (size_t k = 1; k < 4; k++) {
5728 for (uint32_t n = 1; n <= 8; n++) {
5729 for (uint32_t m = 1; m <= 5; m++) {
5730 GemmMicrokernelTester()
5731 .mr(5)
5732 .nr(8)
5733 .kr(1)
5734 .sr(4)
5735 .m(m)
5736 .n(n)
5737 .k(k)
5738 .iterations(1)
5739 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5740 }
5741 }
5742 }
5743 }
5744
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)5745 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
5746 for (size_t k = 5; k < 8; k++) {
5747 GemmMicrokernelTester()
5748 .mr(5)
5749 .nr(8)
5750 .kr(1)
5751 .sr(4)
5752 .m(5)
5753 .n(8)
5754 .k(k)
5755 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5756 }
5757 }
5758
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)5759 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
5760 for (size_t k = 5; k < 8; k++) {
5761 for (uint32_t n = 1; n <= 8; n++) {
5762 for (uint32_t m = 1; m <= 5; m++) {
5763 GemmMicrokernelTester()
5764 .mr(5)
5765 .nr(8)
5766 .kr(1)
5767 .sr(4)
5768 .m(m)
5769 .n(n)
5770 .k(k)
5771 .iterations(1)
5772 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5773 }
5774 }
5775 }
5776 }
5777
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,k_div_4)5778 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
5779 for (size_t k = 8; k <= 40; k += 4) {
5780 GemmMicrokernelTester()
5781 .mr(5)
5782 .nr(8)
5783 .kr(1)
5784 .sr(4)
5785 .m(5)
5786 .n(8)
5787 .k(k)
5788 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5789 }
5790 }
5791
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)5792 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
5793 for (size_t k = 8; k <= 40; k += 4) {
5794 for (uint32_t n = 1; n <= 8; n++) {
5795 for (uint32_t m = 1; m <= 5; m++) {
5796 GemmMicrokernelTester()
5797 .mr(5)
5798 .nr(8)
5799 .kr(1)
5800 .sr(4)
5801 .m(m)
5802 .n(n)
5803 .k(k)
5804 .iterations(1)
5805 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5806 }
5807 }
5808 }
5809 }
5810
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)5811 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
5812 for (uint32_t n = 9; n < 16; n++) {
5813 for (size_t k = 1; k <= 20; k += 5) {
5814 GemmMicrokernelTester()
5815 .mr(5)
5816 .nr(8)
5817 .kr(1)
5818 .sr(4)
5819 .m(5)
5820 .n(n)
5821 .k(k)
5822 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5823 }
5824 }
5825 }
5826
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)5827 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
5828 for (uint32_t n = 9; n < 16; n++) {
5829 for (size_t k = 1; k <= 20; k += 5) {
5830 GemmMicrokernelTester()
5831 .mr(5)
5832 .nr(8)
5833 .kr(1)
5834 .sr(4)
5835 .m(5)
5836 .n(n)
5837 .k(k)
5838 .cn_stride(11)
5839 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5840 }
5841 }
5842 }
5843
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)5844 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
5845 for (uint32_t n = 9; n < 16; n++) {
5846 for (size_t k = 1; k <= 20; k += 5) {
5847 for (uint32_t m = 1; m <= 5; m++) {
5848 GemmMicrokernelTester()
5849 .mr(5)
5850 .nr(8)
5851 .kr(1)
5852 .sr(4)
5853 .m(m)
5854 .n(n)
5855 .k(k)
5856 .iterations(1)
5857 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5858 }
5859 }
5860 }
5861 }
5862
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,n_div_8)5863 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
5864 for (uint32_t n = 16; n <= 24; n += 8) {
5865 for (size_t k = 1; k <= 20; k += 5) {
5866 GemmMicrokernelTester()
5867 .mr(5)
5868 .nr(8)
5869 .kr(1)
5870 .sr(4)
5871 .m(5)
5872 .n(n)
5873 .k(k)
5874 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5875 }
5876 }
5877 }
5878
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)5879 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
5880 for (uint32_t n = 16; n <= 24; n += 8) {
5881 for (size_t k = 1; k <= 20; k += 5) {
5882 GemmMicrokernelTester()
5883 .mr(5)
5884 .nr(8)
5885 .kr(1)
5886 .sr(4)
5887 .m(5)
5888 .n(n)
5889 .k(k)
5890 .cn_stride(11)
5891 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5892 }
5893 }
5894 }
5895
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)5896 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
5897 for (uint32_t n = 16; n <= 24; n += 8) {
5898 for (size_t k = 1; k <= 20; k += 5) {
5899 for (uint32_t m = 1; m <= 5; m++) {
5900 GemmMicrokernelTester()
5901 .mr(5)
5902 .nr(8)
5903 .kr(1)
5904 .sr(4)
5905 .m(m)
5906 .n(n)
5907 .k(k)
5908 .iterations(1)
5909 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5910 }
5911 }
5912 }
5913 }
5914
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,small_kernel)5915 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, small_kernel) {
5916 for (size_t k = 1; k <= 20; k += 5) {
5917 GemmMicrokernelTester()
5918 .mr(5)
5919 .nr(8)
5920 .kr(1)
5921 .sr(4)
5922 .m(5)
5923 .n(8)
5924 .k(k)
5925 .ks(3)
5926 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5927 }
5928 }
5929
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,small_kernel_subtile)5930 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, small_kernel_subtile) {
5931 for (size_t k = 1; k <= 20; k += 5) {
5932 for (uint32_t n = 1; n <= 8; n++) {
5933 for (uint32_t m = 1; m <= 5; m++) {
5934 GemmMicrokernelTester()
5935 .mr(5)
5936 .nr(8)
5937 .kr(1)
5938 .sr(4)
5939 .m(m)
5940 .n(n)
5941 .k(k)
5942 .ks(3)
5943 .iterations(1)
5944 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5945 }
5946 }
5947 }
5948 }
5949
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_small_kernel)5950 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_small_kernel) {
5951 for (uint32_t n = 9; n < 16; n++) {
5952 for (size_t k = 1; k <= 20; k += 5) {
5953 GemmMicrokernelTester()
5954 .mr(5)
5955 .nr(8)
5956 .kr(1)
5957 .sr(4)
5958 .m(5)
5959 .n(n)
5960 .k(k)
5961 .ks(3)
5962 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5963 }
5964 }
5965 }
5966
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,n_div_8_small_kernel)5967 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, n_div_8_small_kernel) {
5968 for (uint32_t n = 16; n <= 24; n += 8) {
5969 for (size_t k = 1; k <= 20; k += 5) {
5970 GemmMicrokernelTester()
5971 .mr(5)
5972 .nr(8)
5973 .kr(1)
5974 .sr(4)
5975 .m(5)
5976 .n(n)
5977 .k(k)
5978 .ks(3)
5979 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5980 }
5981 }
5982 }
5983
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)5984 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
5985 for (size_t k = 1; k <= 20; k += 5) {
5986 for (uint32_t n = 1; n <= 8; n++) {
5987 for (uint32_t m = 1; m <= 5; m++) {
5988 GemmMicrokernelTester()
5989 .mr(5)
5990 .nr(8)
5991 .kr(1)
5992 .sr(4)
5993 .m(m)
5994 .n(n)
5995 .k(k)
5996 .cm_stride(11)
5997 .iterations(1)
5998 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
5999 }
6000 }
6001 }
6002 }
6003
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,a_offset)6004 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, a_offset) {
6005 for (size_t k = 1; k <= 20; k += 5) {
6006 GemmMicrokernelTester()
6007 .mr(5)
6008 .nr(8)
6009 .kr(1)
6010 .sr(4)
6011 .m(5)
6012 .n(8)
6013 .k(k)
6014 .ks(3)
6015 .a_offset(103)
6016 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
6017 }
6018 }
6019
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,zero)6020 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, zero) {
6021 for (size_t k = 1; k <= 20; k += 5) {
6022 for (uint32_t mz = 0; mz < 5; mz++) {
6023 GemmMicrokernelTester()
6024 .mr(5)
6025 .nr(8)
6026 .kr(1)
6027 .sr(4)
6028 .m(5)
6029 .n(8)
6030 .k(k)
6031 .ks(3)
6032 .a_offset(103)
6033 .zero_index(mz)
6034 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
6035 }
6036 }
6037 }
6038
TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA,strided_cm)6039 TEST(F32_IGEMM_RELU_5X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
6040 GemmMicrokernelTester()
6041 .mr(5)
6042 .nr(8)
6043 .kr(1)
6044 .sr(4)
6045 .m(5)
6046 .n(8)
6047 .k(4)
6048 .cm_stride(11)
6049 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma);
6050 }
6051 #endif // XNN_ARCH_WASMRELAXEDSIMD
6052
6053
6054 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4)6055 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4) {
6056 GemmMicrokernelTester()
6057 .mr(6)
6058 .nr(8)
6059 .kr(1)
6060 .sr(1)
6061 .m(6)
6062 .n(8)
6063 .k(4)
6064 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6065 }
6066
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cn)6067 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cn) {
6068 GemmMicrokernelTester()
6069 .mr(6)
6070 .nr(8)
6071 .kr(1)
6072 .sr(1)
6073 .m(6)
6074 .n(8)
6075 .k(4)
6076 .cn_stride(11)
6077 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6078 }
6079
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile)6080 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile) {
6081 for (uint32_t n = 1; n <= 8; n++) {
6082 for (uint32_t m = 1; m <= 6; m++) {
6083 GemmMicrokernelTester()
6084 .mr(6)
6085 .nr(8)
6086 .kr(1)
6087 .sr(1)
6088 .m(m)
6089 .n(n)
6090 .k(4)
6091 .iterations(1)
6092 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6093 }
6094 }
6095 }
6096
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_m)6097 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_m) {
6098 for (uint32_t m = 1; m <= 6; m++) {
6099 GemmMicrokernelTester()
6100 .mr(6)
6101 .nr(8)
6102 .kr(1)
6103 .sr(1)
6104 .m(m)
6105 .n(8)
6106 .k(4)
6107 .iterations(1)
6108 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6109 }
6110 }
6111
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_eq_4_subtile_n)6112 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_eq_4_subtile_n) {
6113 for (uint32_t n = 1; n <= 8; n++) {
6114 GemmMicrokernelTester()
6115 .mr(6)
6116 .nr(8)
6117 .kr(1)
6118 .sr(1)
6119 .m(6)
6120 .n(n)
6121 .k(4)
6122 .iterations(1)
6123 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6124 }
6125 }
6126
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4)6127 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4) {
6128 for (size_t k = 1; k < 4; k++) {
6129 GemmMicrokernelTester()
6130 .mr(6)
6131 .nr(8)
6132 .kr(1)
6133 .sr(1)
6134 .m(6)
6135 .n(8)
6136 .k(k)
6137 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6138 }
6139 }
6140
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_lt_4_subtile)6141 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_lt_4_subtile) {
6142 for (size_t k = 1; k < 4; k++) {
6143 for (uint32_t n = 1; n <= 8; n++) {
6144 for (uint32_t m = 1; m <= 6; m++) {
6145 GemmMicrokernelTester()
6146 .mr(6)
6147 .nr(8)
6148 .kr(1)
6149 .sr(1)
6150 .m(m)
6151 .n(n)
6152 .k(k)
6153 .iterations(1)
6154 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6155 }
6156 }
6157 }
6158 }
6159
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4)6160 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4) {
6161 for (size_t k = 5; k < 8; k++) {
6162 GemmMicrokernelTester()
6163 .mr(6)
6164 .nr(8)
6165 .kr(1)
6166 .sr(1)
6167 .m(6)
6168 .n(8)
6169 .k(k)
6170 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6171 }
6172 }
6173
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_gt_4_subtile)6174 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_gt_4_subtile) {
6175 for (size_t k = 5; k < 8; k++) {
6176 for (uint32_t n = 1; n <= 8; n++) {
6177 for (uint32_t m = 1; m <= 6; m++) {
6178 GemmMicrokernelTester()
6179 .mr(6)
6180 .nr(8)
6181 .kr(1)
6182 .sr(1)
6183 .m(m)
6184 .n(n)
6185 .k(k)
6186 .iterations(1)
6187 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6188 }
6189 }
6190 }
6191 }
6192
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4)6193 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4) {
6194 for (size_t k = 8; k <= 40; k += 4) {
6195 GemmMicrokernelTester()
6196 .mr(6)
6197 .nr(8)
6198 .kr(1)
6199 .sr(1)
6200 .m(6)
6201 .n(8)
6202 .k(k)
6203 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6204 }
6205 }
6206
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,k_div_4_subtile)6207 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, k_div_4_subtile) {
6208 for (size_t k = 8; k <= 40; k += 4) {
6209 for (uint32_t n = 1; n <= 8; n++) {
6210 for (uint32_t m = 1; m <= 6; m++) {
6211 GemmMicrokernelTester()
6212 .mr(6)
6213 .nr(8)
6214 .kr(1)
6215 .sr(1)
6216 .m(m)
6217 .n(n)
6218 .k(k)
6219 .iterations(1)
6220 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6221 }
6222 }
6223 }
6224 }
6225
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8)6226 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8) {
6227 for (uint32_t n = 9; n < 16; n++) {
6228 for (size_t k = 1; k <= 20; k += 5) {
6229 GemmMicrokernelTester()
6230 .mr(6)
6231 .nr(8)
6232 .kr(1)
6233 .sr(1)
6234 .m(6)
6235 .n(n)
6236 .k(k)
6237 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6238 }
6239 }
6240 }
6241
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_strided_cn)6242 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_strided_cn) {
6243 for (uint32_t n = 9; n < 16; n++) {
6244 for (size_t k = 1; k <= 20; k += 5) {
6245 GemmMicrokernelTester()
6246 .mr(6)
6247 .nr(8)
6248 .kr(1)
6249 .sr(1)
6250 .m(6)
6251 .n(n)
6252 .k(k)
6253 .cn_stride(11)
6254 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6255 }
6256 }
6257 }
6258
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_subtile)6259 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_subtile) {
6260 for (uint32_t n = 9; n < 16; n++) {
6261 for (size_t k = 1; k <= 20; k += 5) {
6262 for (uint32_t m = 1; m <= 6; m++) {
6263 GemmMicrokernelTester()
6264 .mr(6)
6265 .nr(8)
6266 .kr(1)
6267 .sr(1)
6268 .m(m)
6269 .n(n)
6270 .k(k)
6271 .iterations(1)
6272 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6273 }
6274 }
6275 }
6276 }
6277
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8)6278 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8) {
6279 for (uint32_t n = 16; n <= 24; n += 8) {
6280 for (size_t k = 1; k <= 20; k += 5) {
6281 GemmMicrokernelTester()
6282 .mr(6)
6283 .nr(8)
6284 .kr(1)
6285 .sr(1)
6286 .m(6)
6287 .n(n)
6288 .k(k)
6289 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6290 }
6291 }
6292 }
6293
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_strided_cn)6294 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_strided_cn) {
6295 for (uint32_t n = 16; n <= 24; n += 8) {
6296 for (size_t k = 1; k <= 20; k += 5) {
6297 GemmMicrokernelTester()
6298 .mr(6)
6299 .nr(8)
6300 .kr(1)
6301 .sr(1)
6302 .m(6)
6303 .n(n)
6304 .k(k)
6305 .cn_stride(11)
6306 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6307 }
6308 }
6309 }
6310
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_subtile)6311 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_subtile) {
6312 for (uint32_t n = 16; n <= 24; n += 8) {
6313 for (size_t k = 1; k <= 20; k += 5) {
6314 for (uint32_t m = 1; m <= 6; m++) {
6315 GemmMicrokernelTester()
6316 .mr(6)
6317 .nr(8)
6318 .kr(1)
6319 .sr(1)
6320 .m(m)
6321 .n(n)
6322 .k(k)
6323 .iterations(1)
6324 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6325 }
6326 }
6327 }
6328 }
6329
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel)6330 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel) {
6331 for (size_t k = 1; k <= 20; k += 5) {
6332 GemmMicrokernelTester()
6333 .mr(6)
6334 .nr(8)
6335 .kr(1)
6336 .sr(1)
6337 .m(6)
6338 .n(8)
6339 .k(k)
6340 .ks(3)
6341 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6342 }
6343 }
6344
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,small_kernel_subtile)6345 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, small_kernel_subtile) {
6346 for (size_t k = 1; k <= 20; k += 5) {
6347 for (uint32_t n = 1; n <= 8; n++) {
6348 for (uint32_t m = 1; m <= 6; m++) {
6349 GemmMicrokernelTester()
6350 .mr(6)
6351 .nr(8)
6352 .kr(1)
6353 .sr(1)
6354 .m(m)
6355 .n(n)
6356 .k(k)
6357 .ks(3)
6358 .iterations(1)
6359 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6360 }
6361 }
6362 }
6363 }
6364
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_gt_8_small_kernel)6365 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_gt_8_small_kernel) {
6366 for (uint32_t n = 9; n < 16; n++) {
6367 for (size_t k = 1; k <= 20; k += 5) {
6368 GemmMicrokernelTester()
6369 .mr(6)
6370 .nr(8)
6371 .kr(1)
6372 .sr(1)
6373 .m(6)
6374 .n(n)
6375 .k(k)
6376 .ks(3)
6377 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6378 }
6379 }
6380 }
6381
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,n_div_8_small_kernel)6382 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, n_div_8_small_kernel) {
6383 for (uint32_t n = 16; n <= 24; n += 8) {
6384 for (size_t k = 1; k <= 20; k += 5) {
6385 GemmMicrokernelTester()
6386 .mr(6)
6387 .nr(8)
6388 .kr(1)
6389 .sr(1)
6390 .m(6)
6391 .n(n)
6392 .k(k)
6393 .ks(3)
6394 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6395 }
6396 }
6397 }
6398
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm_subtile)6399 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm_subtile) {
6400 for (size_t k = 1; k <= 20; k += 5) {
6401 for (uint32_t n = 1; n <= 8; n++) {
6402 for (uint32_t m = 1; m <= 6; m++) {
6403 GemmMicrokernelTester()
6404 .mr(6)
6405 .nr(8)
6406 .kr(1)
6407 .sr(1)
6408 .m(m)
6409 .n(n)
6410 .k(k)
6411 .cm_stride(11)
6412 .iterations(1)
6413 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6414 }
6415 }
6416 }
6417 }
6418
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,a_offset)6419 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, a_offset) {
6420 for (size_t k = 1; k <= 20; k += 5) {
6421 GemmMicrokernelTester()
6422 .mr(6)
6423 .nr(8)
6424 .kr(1)
6425 .sr(1)
6426 .m(6)
6427 .n(8)
6428 .k(k)
6429 .ks(3)
6430 .a_offset(127)
6431 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6432 }
6433 }
6434
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,zero)6435 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, zero) {
6436 for (size_t k = 1; k <= 20; k += 5) {
6437 for (uint32_t mz = 0; mz < 6; mz++) {
6438 GemmMicrokernelTester()
6439 .mr(6)
6440 .nr(8)
6441 .kr(1)
6442 .sr(1)
6443 .m(6)
6444 .n(8)
6445 .k(k)
6446 .ks(3)
6447 .a_offset(127)
6448 .zero_index(mz)
6449 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6450 }
6451 }
6452 }
6453
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT,strided_cm)6454 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_SPLAT, strided_cm) {
6455 GemmMicrokernelTester()
6456 .mr(6)
6457 .nr(8)
6458 .kr(1)
6459 .sr(1)
6460 .m(6)
6461 .n(8)
6462 .k(4)
6463 .cm_stride(11)
6464 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_splat);
6465 }
6466 #endif // XNN_ARCH_WASMRELAXEDSIMD
6467
6468
6469 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)6470 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
6471 GemmMicrokernelTester()
6472 .mr(6)
6473 .nr(8)
6474 .kr(1)
6475 .sr(4)
6476 .m(6)
6477 .n(8)
6478 .k(4)
6479 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6480 }
6481
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,strided_cn)6482 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
6483 GemmMicrokernelTester()
6484 .mr(6)
6485 .nr(8)
6486 .kr(1)
6487 .sr(4)
6488 .m(6)
6489 .n(8)
6490 .k(4)
6491 .cn_stride(11)
6492 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6493 }
6494
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)6495 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
6496 for (uint32_t n = 1; n <= 8; n++) {
6497 for (uint32_t m = 1; m <= 6; m++) {
6498 GemmMicrokernelTester()
6499 .mr(6)
6500 .nr(8)
6501 .kr(1)
6502 .sr(4)
6503 .m(m)
6504 .n(n)
6505 .k(4)
6506 .iterations(1)
6507 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6508 }
6509 }
6510 }
6511
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)6512 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
6513 for (uint32_t m = 1; m <= 6; m++) {
6514 GemmMicrokernelTester()
6515 .mr(6)
6516 .nr(8)
6517 .kr(1)
6518 .sr(4)
6519 .m(m)
6520 .n(8)
6521 .k(4)
6522 .iterations(1)
6523 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6524 }
6525 }
6526
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)6527 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
6528 for (uint32_t n = 1; n <= 8; n++) {
6529 GemmMicrokernelTester()
6530 .mr(6)
6531 .nr(8)
6532 .kr(1)
6533 .sr(4)
6534 .m(6)
6535 .n(n)
6536 .k(4)
6537 .iterations(1)
6538 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6539 }
6540 }
6541
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)6542 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
6543 for (size_t k = 1; k < 4; k++) {
6544 GemmMicrokernelTester()
6545 .mr(6)
6546 .nr(8)
6547 .kr(1)
6548 .sr(4)
6549 .m(6)
6550 .n(8)
6551 .k(k)
6552 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6553 }
6554 }
6555
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)6556 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
6557 for (size_t k = 1; k < 4; k++) {
6558 for (uint32_t n = 1; n <= 8; n++) {
6559 for (uint32_t m = 1; m <= 6; m++) {
6560 GemmMicrokernelTester()
6561 .mr(6)
6562 .nr(8)
6563 .kr(1)
6564 .sr(4)
6565 .m(m)
6566 .n(n)
6567 .k(k)
6568 .iterations(1)
6569 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6570 }
6571 }
6572 }
6573 }
6574
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)6575 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
6576 for (size_t k = 5; k < 8; k++) {
6577 GemmMicrokernelTester()
6578 .mr(6)
6579 .nr(8)
6580 .kr(1)
6581 .sr(4)
6582 .m(6)
6583 .n(8)
6584 .k(k)
6585 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6586 }
6587 }
6588
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)6589 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
6590 for (size_t k = 5; k < 8; k++) {
6591 for (uint32_t n = 1; n <= 8; n++) {
6592 for (uint32_t m = 1; m <= 6; m++) {
6593 GemmMicrokernelTester()
6594 .mr(6)
6595 .nr(8)
6596 .kr(1)
6597 .sr(4)
6598 .m(m)
6599 .n(n)
6600 .k(k)
6601 .iterations(1)
6602 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6603 }
6604 }
6605 }
6606 }
6607
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,k_div_4)6608 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
6609 for (size_t k = 8; k <= 40; k += 4) {
6610 GemmMicrokernelTester()
6611 .mr(6)
6612 .nr(8)
6613 .kr(1)
6614 .sr(4)
6615 .m(6)
6616 .n(8)
6617 .k(k)
6618 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6619 }
6620 }
6621
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)6622 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
6623 for (size_t k = 8; k <= 40; k += 4) {
6624 for (uint32_t n = 1; n <= 8; n++) {
6625 for (uint32_t m = 1; m <= 6; m++) {
6626 GemmMicrokernelTester()
6627 .mr(6)
6628 .nr(8)
6629 .kr(1)
6630 .sr(4)
6631 .m(m)
6632 .n(n)
6633 .k(k)
6634 .iterations(1)
6635 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6636 }
6637 }
6638 }
6639 }
6640
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)6641 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
6642 for (uint32_t n = 9; n < 16; n++) {
6643 for (size_t k = 1; k <= 20; k += 5) {
6644 GemmMicrokernelTester()
6645 .mr(6)
6646 .nr(8)
6647 .kr(1)
6648 .sr(4)
6649 .m(6)
6650 .n(n)
6651 .k(k)
6652 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6653 }
6654 }
6655 }
6656
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)6657 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
6658 for (uint32_t n = 9; n < 16; n++) {
6659 for (size_t k = 1; k <= 20; k += 5) {
6660 GemmMicrokernelTester()
6661 .mr(6)
6662 .nr(8)
6663 .kr(1)
6664 .sr(4)
6665 .m(6)
6666 .n(n)
6667 .k(k)
6668 .cn_stride(11)
6669 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6670 }
6671 }
6672 }
6673
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)6674 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
6675 for (uint32_t n = 9; n < 16; n++) {
6676 for (size_t k = 1; k <= 20; k += 5) {
6677 for (uint32_t m = 1; m <= 6; m++) {
6678 GemmMicrokernelTester()
6679 .mr(6)
6680 .nr(8)
6681 .kr(1)
6682 .sr(4)
6683 .m(m)
6684 .n(n)
6685 .k(k)
6686 .iterations(1)
6687 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6688 }
6689 }
6690 }
6691 }
6692
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,n_div_8)6693 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
6694 for (uint32_t n = 16; n <= 24; n += 8) {
6695 for (size_t k = 1; k <= 20; k += 5) {
6696 GemmMicrokernelTester()
6697 .mr(6)
6698 .nr(8)
6699 .kr(1)
6700 .sr(4)
6701 .m(6)
6702 .n(n)
6703 .k(k)
6704 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6705 }
6706 }
6707 }
6708
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)6709 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
6710 for (uint32_t n = 16; n <= 24; n += 8) {
6711 for (size_t k = 1; k <= 20; k += 5) {
6712 GemmMicrokernelTester()
6713 .mr(6)
6714 .nr(8)
6715 .kr(1)
6716 .sr(4)
6717 .m(6)
6718 .n(n)
6719 .k(k)
6720 .cn_stride(11)
6721 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6722 }
6723 }
6724 }
6725
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)6726 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
6727 for (uint32_t n = 16; n <= 24; n += 8) {
6728 for (size_t k = 1; k <= 20; k += 5) {
6729 for (uint32_t m = 1; m <= 6; m++) {
6730 GemmMicrokernelTester()
6731 .mr(6)
6732 .nr(8)
6733 .kr(1)
6734 .sr(4)
6735 .m(m)
6736 .n(n)
6737 .k(k)
6738 .iterations(1)
6739 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6740 }
6741 }
6742 }
6743 }
6744
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,small_kernel)6745 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, small_kernel) {
6746 for (size_t k = 1; k <= 20; k += 5) {
6747 GemmMicrokernelTester()
6748 .mr(6)
6749 .nr(8)
6750 .kr(1)
6751 .sr(4)
6752 .m(6)
6753 .n(8)
6754 .k(k)
6755 .ks(3)
6756 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6757 }
6758 }
6759
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,small_kernel_subtile)6760 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, small_kernel_subtile) {
6761 for (size_t k = 1; k <= 20; k += 5) {
6762 for (uint32_t n = 1; n <= 8; n++) {
6763 for (uint32_t m = 1; m <= 6; m++) {
6764 GemmMicrokernelTester()
6765 .mr(6)
6766 .nr(8)
6767 .kr(1)
6768 .sr(4)
6769 .m(m)
6770 .n(n)
6771 .k(k)
6772 .ks(3)
6773 .iterations(1)
6774 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6775 }
6776 }
6777 }
6778 }
6779
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_small_kernel)6780 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_small_kernel) {
6781 for (uint32_t n = 9; n < 16; n++) {
6782 for (size_t k = 1; k <= 20; k += 5) {
6783 GemmMicrokernelTester()
6784 .mr(6)
6785 .nr(8)
6786 .kr(1)
6787 .sr(4)
6788 .m(6)
6789 .n(n)
6790 .k(k)
6791 .ks(3)
6792 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6793 }
6794 }
6795 }
6796
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,n_div_8_small_kernel)6797 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, n_div_8_small_kernel) {
6798 for (uint32_t n = 16; n <= 24; n += 8) {
6799 for (size_t k = 1; k <= 20; k += 5) {
6800 GemmMicrokernelTester()
6801 .mr(6)
6802 .nr(8)
6803 .kr(1)
6804 .sr(4)
6805 .m(6)
6806 .n(n)
6807 .k(k)
6808 .ks(3)
6809 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6810 }
6811 }
6812 }
6813
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)6814 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
6815 for (size_t k = 1; k <= 20; k += 5) {
6816 for (uint32_t n = 1; n <= 8; n++) {
6817 for (uint32_t m = 1; m <= 6; m++) {
6818 GemmMicrokernelTester()
6819 .mr(6)
6820 .nr(8)
6821 .kr(1)
6822 .sr(4)
6823 .m(m)
6824 .n(n)
6825 .k(k)
6826 .cm_stride(11)
6827 .iterations(1)
6828 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6829 }
6830 }
6831 }
6832 }
6833
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,a_offset)6834 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, a_offset) {
6835 for (size_t k = 1; k <= 20; k += 5) {
6836 GemmMicrokernelTester()
6837 .mr(6)
6838 .nr(8)
6839 .kr(1)
6840 .sr(4)
6841 .m(6)
6842 .n(8)
6843 .k(k)
6844 .ks(3)
6845 .a_offset(127)
6846 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6847 }
6848 }
6849
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,zero)6850 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, zero) {
6851 for (size_t k = 1; k <= 20; k += 5) {
6852 for (uint32_t mz = 0; mz < 6; mz++) {
6853 GemmMicrokernelTester()
6854 .mr(6)
6855 .nr(8)
6856 .kr(1)
6857 .sr(4)
6858 .m(6)
6859 .n(8)
6860 .k(k)
6861 .ks(3)
6862 .a_offset(127)
6863 .zero_index(mz)
6864 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6865 }
6866 }
6867 }
6868
TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA,strided_cm)6869 TEST(F32_IGEMM_RELU_6X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
6870 GemmMicrokernelTester()
6871 .mr(6)
6872 .nr(8)
6873 .kr(1)
6874 .sr(4)
6875 .m(6)
6876 .n(8)
6877 .k(4)
6878 .cm_stride(11)
6879 .Test(xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma);
6880 }
6881 #endif // XNN_ARCH_WASMRELAXEDSIMD
6882
6883
6884 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_1X4__WASM,k_eq_1)6885 TEST(F32_IGEMM_RELU_1X4__WASM, k_eq_1) {
6886 GemmMicrokernelTester()
6887 .mr(1)
6888 .nr(4)
6889 .kr(1)
6890 .sr(1)
6891 .m(1)
6892 .n(4)
6893 .k(1)
6894 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
6895 }
6896
TEST(F32_IGEMM_RELU_1X4__WASM,strided_cn)6897 TEST(F32_IGEMM_RELU_1X4__WASM, strided_cn) {
6898 GemmMicrokernelTester()
6899 .mr(1)
6900 .nr(4)
6901 .kr(1)
6902 .sr(1)
6903 .m(1)
6904 .n(4)
6905 .k(1)
6906 .cn_stride(7)
6907 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
6908 }
6909
TEST(F32_IGEMM_RELU_1X4__WASM,k_eq_1_subtile)6910 TEST(F32_IGEMM_RELU_1X4__WASM, k_eq_1_subtile) {
6911 for (uint32_t n = 1; n <= 4; n++) {
6912 for (uint32_t m = 1; m <= 1; m++) {
6913 GemmMicrokernelTester()
6914 .mr(1)
6915 .nr(4)
6916 .kr(1)
6917 .sr(1)
6918 .m(m)
6919 .n(n)
6920 .k(1)
6921 .iterations(1)
6922 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
6923 }
6924 }
6925 }
6926
TEST(F32_IGEMM_RELU_1X4__WASM,k_eq_1_subtile_m)6927 TEST(F32_IGEMM_RELU_1X4__WASM, k_eq_1_subtile_m) {
6928 for (uint32_t m = 1; m <= 1; m++) {
6929 GemmMicrokernelTester()
6930 .mr(1)
6931 .nr(4)
6932 .kr(1)
6933 .sr(1)
6934 .m(m)
6935 .n(4)
6936 .k(1)
6937 .iterations(1)
6938 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
6939 }
6940 }
6941
TEST(F32_IGEMM_RELU_1X4__WASM,k_eq_1_subtile_n)6942 TEST(F32_IGEMM_RELU_1X4__WASM, k_eq_1_subtile_n) {
6943 for (uint32_t n = 1; n <= 4; n++) {
6944 GemmMicrokernelTester()
6945 .mr(1)
6946 .nr(4)
6947 .kr(1)
6948 .sr(1)
6949 .m(1)
6950 .n(n)
6951 .k(1)
6952 .iterations(1)
6953 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
6954 }
6955 }
6956
TEST(F32_IGEMM_RELU_1X4__WASM,k_gt_1)6957 TEST(F32_IGEMM_RELU_1X4__WASM, k_gt_1) {
6958 for (size_t k = 2; k < 10; k++) {
6959 GemmMicrokernelTester()
6960 .mr(1)
6961 .nr(4)
6962 .kr(1)
6963 .sr(1)
6964 .m(1)
6965 .n(4)
6966 .k(k)
6967 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
6968 }
6969 }
6970
TEST(F32_IGEMM_RELU_1X4__WASM,k_gt_1_subtile)6971 TEST(F32_IGEMM_RELU_1X4__WASM, k_gt_1_subtile) {
6972 for (size_t k = 2; k < 10; k++) {
6973 for (uint32_t n = 1; n <= 4; n++) {
6974 for (uint32_t m = 1; m <= 1; m++) {
6975 GemmMicrokernelTester()
6976 .mr(1)
6977 .nr(4)
6978 .kr(1)
6979 .sr(1)
6980 .m(m)
6981 .n(n)
6982 .k(k)
6983 .iterations(1)
6984 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
6985 }
6986 }
6987 }
6988 }
6989
TEST(F32_IGEMM_RELU_1X4__WASM,n_gt_4)6990 TEST(F32_IGEMM_RELU_1X4__WASM, n_gt_4) {
6991 for (uint32_t n = 5; n < 8; n++) {
6992 for (size_t k = 1; k <= 5; k += 2) {
6993 GemmMicrokernelTester()
6994 .mr(1)
6995 .nr(4)
6996 .kr(1)
6997 .sr(1)
6998 .m(1)
6999 .n(n)
7000 .k(k)
7001 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
7002 }
7003 }
7004 }
7005
TEST(F32_IGEMM_RELU_1X4__WASM,n_gt_4_strided_cn)7006 TEST(F32_IGEMM_RELU_1X4__WASM, n_gt_4_strided_cn) {
7007 for (uint32_t n = 5; n < 8; n++) {
7008 for (size_t k = 1; k <= 5; k += 2) {
7009 GemmMicrokernelTester()
7010 .mr(1)
7011 .nr(4)
7012 .kr(1)
7013 .sr(1)
7014 .m(1)
7015 .n(n)
7016 .k(k)
7017 .cn_stride(7)
7018 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
7019 }
7020 }
7021 }
7022
TEST(F32_IGEMM_RELU_1X4__WASM,n_gt_4_subtile)7023 TEST(F32_IGEMM_RELU_1X4__WASM, n_gt_4_subtile) {
7024 for (uint32_t n = 5; n < 8; n++) {
7025 for (size_t k = 1; k <= 5; k += 2) {
7026 for (uint32_t m = 1; m <= 1; m++) {
7027 GemmMicrokernelTester()
7028 .mr(1)
7029 .nr(4)
7030 .kr(1)
7031 .sr(1)
7032 .m(m)
7033 .n(n)
7034 .k(k)
7035 .iterations(1)
7036 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
7037 }
7038 }
7039 }
7040 }
7041
TEST(F32_IGEMM_RELU_1X4__WASM,n_div_4)7042 TEST(F32_IGEMM_RELU_1X4__WASM, n_div_4) {
7043 for (uint32_t n = 8; n <= 12; n += 4) {
7044 for (size_t k = 1; k <= 5; k += 2) {
7045 GemmMicrokernelTester()
7046 .mr(1)
7047 .nr(4)
7048 .kr(1)
7049 .sr(1)
7050 .m(1)
7051 .n(n)
7052 .k(k)
7053 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
7054 }
7055 }
7056 }
7057
TEST(F32_IGEMM_RELU_1X4__WASM,n_div_4_strided_cn)7058 TEST(F32_IGEMM_RELU_1X4__WASM, n_div_4_strided_cn) {
7059 for (uint32_t n = 8; n <= 12; n += 4) {
7060 for (size_t k = 1; k <= 5; k += 2) {
7061 GemmMicrokernelTester()
7062 .mr(1)
7063 .nr(4)
7064 .kr(1)
7065 .sr(1)
7066 .m(1)
7067 .n(n)
7068 .k(k)
7069 .cn_stride(7)
7070 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
7071 }
7072 }
7073 }
7074
TEST(F32_IGEMM_RELU_1X4__WASM,n_div_4_subtile)7075 TEST(F32_IGEMM_RELU_1X4__WASM, n_div_4_subtile) {
7076 for (uint32_t n = 8; n <= 12; n += 4) {
7077 for (size_t k = 1; k <= 5; k += 2) {
7078 for (uint32_t m = 1; m <= 1; m++) {
7079 GemmMicrokernelTester()
7080 .mr(1)
7081 .nr(4)
7082 .kr(1)
7083 .sr(1)
7084 .m(m)
7085 .n(n)
7086 .k(k)
7087 .iterations(1)
7088 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
7089 }
7090 }
7091 }
7092 }
7093
TEST(F32_IGEMM_RELU_1X4__WASM,small_kernel)7094 TEST(F32_IGEMM_RELU_1X4__WASM, small_kernel) {
7095 for (size_t k = 1; k <= 5; k += 2) {
7096 GemmMicrokernelTester()
7097 .mr(1)
7098 .nr(4)
7099 .kr(1)
7100 .sr(1)
7101 .m(1)
7102 .n(4)
7103 .k(k)
7104 .ks(3)
7105 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
7106 }
7107 }
7108
TEST(F32_IGEMM_RELU_1X4__WASM,small_kernel_subtile)7109 TEST(F32_IGEMM_RELU_1X4__WASM, small_kernel_subtile) {
7110 for (size_t k = 1; k <= 5; k += 2) {
7111 for (uint32_t n = 1; n <= 4; n++) {
7112 for (uint32_t m = 1; m <= 1; m++) {
7113 GemmMicrokernelTester()
7114 .mr(1)
7115 .nr(4)
7116 .kr(1)
7117 .sr(1)
7118 .m(m)
7119 .n(n)
7120 .k(k)
7121 .ks(3)
7122 .iterations(1)
7123 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
7124 }
7125 }
7126 }
7127 }
7128
TEST(F32_IGEMM_RELU_1X4__WASM,n_gt_4_small_kernel)7129 TEST(F32_IGEMM_RELU_1X4__WASM, n_gt_4_small_kernel) {
7130 for (uint32_t n = 5; n < 8; n++) {
7131 for (size_t k = 1; k <= 5; k += 2) {
7132 GemmMicrokernelTester()
7133 .mr(1)
7134 .nr(4)
7135 .kr(1)
7136 .sr(1)
7137 .m(1)
7138 .n(n)
7139 .k(k)
7140 .ks(3)
7141 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
7142 }
7143 }
7144 }
7145
TEST(F32_IGEMM_RELU_1X4__WASM,n_div_4_small_kernel)7146 TEST(F32_IGEMM_RELU_1X4__WASM, n_div_4_small_kernel) {
7147 for (uint32_t n = 8; n <= 12; n += 4) {
7148 for (size_t k = 1; k <= 5; k += 2) {
7149 GemmMicrokernelTester()
7150 .mr(1)
7151 .nr(4)
7152 .kr(1)
7153 .sr(1)
7154 .m(1)
7155 .n(n)
7156 .k(k)
7157 .ks(3)
7158 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
7159 }
7160 }
7161 }
7162
TEST(F32_IGEMM_RELU_1X4__WASM,strided_cm_subtile)7163 TEST(F32_IGEMM_RELU_1X4__WASM, strided_cm_subtile) {
7164 for (size_t k = 1; k <= 5; k += 2) {
7165 for (uint32_t n = 1; n <= 4; n++) {
7166 for (uint32_t m = 1; m <= 1; m++) {
7167 GemmMicrokernelTester()
7168 .mr(1)
7169 .nr(4)
7170 .kr(1)
7171 .sr(1)
7172 .m(m)
7173 .n(n)
7174 .k(k)
7175 .cm_stride(7)
7176 .iterations(1)
7177 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
7178 }
7179 }
7180 }
7181 }
7182
TEST(F32_IGEMM_RELU_1X4__WASM,a_offset)7183 TEST(F32_IGEMM_RELU_1X4__WASM, a_offset) {
7184 for (size_t k = 1; k <= 5; k += 2) {
7185 GemmMicrokernelTester()
7186 .mr(1)
7187 .nr(4)
7188 .kr(1)
7189 .sr(1)
7190 .m(1)
7191 .n(4)
7192 .k(k)
7193 .ks(3)
7194 .a_offset(7)
7195 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
7196 }
7197 }
7198
TEST(F32_IGEMM_RELU_1X4__WASM,zero)7199 TEST(F32_IGEMM_RELU_1X4__WASM, zero) {
7200 for (size_t k = 1; k <= 5; k += 2) {
7201 for (uint32_t mz = 0; mz < 1; mz++) {
7202 GemmMicrokernelTester()
7203 .mr(1)
7204 .nr(4)
7205 .kr(1)
7206 .sr(1)
7207 .m(1)
7208 .n(4)
7209 .k(k)
7210 .ks(3)
7211 .a_offset(7)
7212 .zero_index(mz)
7213 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
7214 }
7215 }
7216 }
7217
TEST(F32_IGEMM_RELU_1X4__WASM,strided_cm)7218 TEST(F32_IGEMM_RELU_1X4__WASM, strided_cm) {
7219 GemmMicrokernelTester()
7220 .mr(1)
7221 .nr(4)
7222 .kr(1)
7223 .sr(1)
7224 .m(1)
7225 .n(4)
7226 .k(1)
7227 .cm_stride(7)
7228 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
7229 }
7230 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
7231
7232
TEST(F32_IGEMM_RELU_2X4__SCALAR,k_eq_1)7233 TEST(F32_IGEMM_RELU_2X4__SCALAR, k_eq_1) {
7234 GemmMicrokernelTester()
7235 .mr(2)
7236 .nr(4)
7237 .kr(1)
7238 .sr(1)
7239 .m(2)
7240 .n(4)
7241 .k(1)
7242 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
7243 }
7244
TEST(F32_IGEMM_RELU_2X4__SCALAR,strided_cn)7245 TEST(F32_IGEMM_RELU_2X4__SCALAR, strided_cn) {
7246 GemmMicrokernelTester()
7247 .mr(2)
7248 .nr(4)
7249 .kr(1)
7250 .sr(1)
7251 .m(2)
7252 .n(4)
7253 .k(1)
7254 .cn_stride(7)
7255 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
7256 }
7257
TEST(F32_IGEMM_RELU_2X4__SCALAR,k_eq_1_subtile)7258 TEST(F32_IGEMM_RELU_2X4__SCALAR, k_eq_1_subtile) {
7259 for (uint32_t n = 1; n <= 4; n++) {
7260 for (uint32_t m = 1; m <= 2; m++) {
7261 GemmMicrokernelTester()
7262 .mr(2)
7263 .nr(4)
7264 .kr(1)
7265 .sr(1)
7266 .m(m)
7267 .n(n)
7268 .k(1)
7269 .iterations(1)
7270 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
7271 }
7272 }
7273 }
7274
TEST(F32_IGEMM_RELU_2X4__SCALAR,k_eq_1_subtile_m)7275 TEST(F32_IGEMM_RELU_2X4__SCALAR, k_eq_1_subtile_m) {
7276 for (uint32_t m = 1; m <= 2; m++) {
7277 GemmMicrokernelTester()
7278 .mr(2)
7279 .nr(4)
7280 .kr(1)
7281 .sr(1)
7282 .m(m)
7283 .n(4)
7284 .k(1)
7285 .iterations(1)
7286 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
7287 }
7288 }
7289
TEST(F32_IGEMM_RELU_2X4__SCALAR,k_eq_1_subtile_n)7290 TEST(F32_IGEMM_RELU_2X4__SCALAR, k_eq_1_subtile_n) {
7291 for (uint32_t n = 1; n <= 4; n++) {
7292 GemmMicrokernelTester()
7293 .mr(2)
7294 .nr(4)
7295 .kr(1)
7296 .sr(1)
7297 .m(2)
7298 .n(n)
7299 .k(1)
7300 .iterations(1)
7301 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
7302 }
7303 }
7304
TEST(F32_IGEMM_RELU_2X4__SCALAR,k_gt_1)7305 TEST(F32_IGEMM_RELU_2X4__SCALAR, k_gt_1) {
7306 for (size_t k = 2; k < 10; k++) {
7307 GemmMicrokernelTester()
7308 .mr(2)
7309 .nr(4)
7310 .kr(1)
7311 .sr(1)
7312 .m(2)
7313 .n(4)
7314 .k(k)
7315 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
7316 }
7317 }
7318
TEST(F32_IGEMM_RELU_2X4__SCALAR,k_gt_1_subtile)7319 TEST(F32_IGEMM_RELU_2X4__SCALAR, k_gt_1_subtile) {
7320 for (size_t k = 2; k < 10; k++) {
7321 for (uint32_t n = 1; n <= 4; n++) {
7322 for (uint32_t m = 1; m <= 2; m++) {
7323 GemmMicrokernelTester()
7324 .mr(2)
7325 .nr(4)
7326 .kr(1)
7327 .sr(1)
7328 .m(m)
7329 .n(n)
7330 .k(k)
7331 .iterations(1)
7332 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
7333 }
7334 }
7335 }
7336 }
7337
TEST(F32_IGEMM_RELU_2X4__SCALAR,n_gt_4)7338 TEST(F32_IGEMM_RELU_2X4__SCALAR, n_gt_4) {
7339 for (uint32_t n = 5; n < 8; n++) {
7340 for (size_t k = 1; k <= 5; k += 2) {
7341 GemmMicrokernelTester()
7342 .mr(2)
7343 .nr(4)
7344 .kr(1)
7345 .sr(1)
7346 .m(2)
7347 .n(n)
7348 .k(k)
7349 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
7350 }
7351 }
7352 }
7353
TEST(F32_IGEMM_RELU_2X4__SCALAR,n_gt_4_strided_cn)7354 TEST(F32_IGEMM_RELU_2X4__SCALAR, n_gt_4_strided_cn) {
7355 for (uint32_t n = 5; n < 8; n++) {
7356 for (size_t k = 1; k <= 5; k += 2) {
7357 GemmMicrokernelTester()
7358 .mr(2)
7359 .nr(4)
7360 .kr(1)
7361 .sr(1)
7362 .m(2)
7363 .n(n)
7364 .k(k)
7365 .cn_stride(7)
7366 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
7367 }
7368 }
7369 }
7370
TEST(F32_IGEMM_RELU_2X4__SCALAR,n_gt_4_subtile)7371 TEST(F32_IGEMM_RELU_2X4__SCALAR, n_gt_4_subtile) {
7372 for (uint32_t n = 5; n < 8; n++) {
7373 for (size_t k = 1; k <= 5; k += 2) {
7374 for (uint32_t m = 1; m <= 2; m++) {
7375 GemmMicrokernelTester()
7376 .mr(2)
7377 .nr(4)
7378 .kr(1)
7379 .sr(1)
7380 .m(m)
7381 .n(n)
7382 .k(k)
7383 .iterations(1)
7384 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
7385 }
7386 }
7387 }
7388 }
7389
TEST(F32_IGEMM_RELU_2X4__SCALAR,n_div_4)7390 TEST(F32_IGEMM_RELU_2X4__SCALAR, n_div_4) {
7391 for (uint32_t n = 8; n <= 12; n += 4) {
7392 for (size_t k = 1; k <= 5; k += 2) {
7393 GemmMicrokernelTester()
7394 .mr(2)
7395 .nr(4)
7396 .kr(1)
7397 .sr(1)
7398 .m(2)
7399 .n(n)
7400 .k(k)
7401 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
7402 }
7403 }
7404 }
7405
TEST(F32_IGEMM_RELU_2X4__SCALAR,n_div_4_strided_cn)7406 TEST(F32_IGEMM_RELU_2X4__SCALAR, n_div_4_strided_cn) {
7407 for (uint32_t n = 8; n <= 12; n += 4) {
7408 for (size_t k = 1; k <= 5; k += 2) {
7409 GemmMicrokernelTester()
7410 .mr(2)
7411 .nr(4)
7412 .kr(1)
7413 .sr(1)
7414 .m(2)
7415 .n(n)
7416 .k(k)
7417 .cn_stride(7)
7418 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
7419 }
7420 }
7421 }
7422
TEST(F32_IGEMM_RELU_2X4__SCALAR,n_div_4_subtile)7423 TEST(F32_IGEMM_RELU_2X4__SCALAR, n_div_4_subtile) {
7424 for (uint32_t n = 8; n <= 12; n += 4) {
7425 for (size_t k = 1; k <= 5; k += 2) {
7426 for (uint32_t m = 1; m <= 2; m++) {
7427 GemmMicrokernelTester()
7428 .mr(2)
7429 .nr(4)
7430 .kr(1)
7431 .sr(1)
7432 .m(m)
7433 .n(n)
7434 .k(k)
7435 .iterations(1)
7436 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
7437 }
7438 }
7439 }
7440 }
7441
TEST(F32_IGEMM_RELU_2X4__SCALAR,small_kernel)7442 TEST(F32_IGEMM_RELU_2X4__SCALAR, small_kernel) {
7443 for (size_t k = 1; k <= 5; k += 2) {
7444 GemmMicrokernelTester()
7445 .mr(2)
7446 .nr(4)
7447 .kr(1)
7448 .sr(1)
7449 .m(2)
7450 .n(4)
7451 .k(k)
7452 .ks(3)
7453 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
7454 }
7455 }
7456
TEST(F32_IGEMM_RELU_2X4__SCALAR,small_kernel_subtile)7457 TEST(F32_IGEMM_RELU_2X4__SCALAR, small_kernel_subtile) {
7458 for (size_t k = 1; k <= 5; k += 2) {
7459 for (uint32_t n = 1; n <= 4; n++) {
7460 for (uint32_t m = 1; m <= 2; m++) {
7461 GemmMicrokernelTester()
7462 .mr(2)
7463 .nr(4)
7464 .kr(1)
7465 .sr(1)
7466 .m(m)
7467 .n(n)
7468 .k(k)
7469 .ks(3)
7470 .iterations(1)
7471 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
7472 }
7473 }
7474 }
7475 }
7476
TEST(F32_IGEMM_RELU_2X4__SCALAR,n_gt_4_small_kernel)7477 TEST(F32_IGEMM_RELU_2X4__SCALAR, n_gt_4_small_kernel) {
7478 for (uint32_t n = 5; n < 8; n++) {
7479 for (size_t k = 1; k <= 5; k += 2) {
7480 GemmMicrokernelTester()
7481 .mr(2)
7482 .nr(4)
7483 .kr(1)
7484 .sr(1)
7485 .m(2)
7486 .n(n)
7487 .k(k)
7488 .ks(3)
7489 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
7490 }
7491 }
7492 }
7493
TEST(F32_IGEMM_RELU_2X4__SCALAR,n_div_4_small_kernel)7494 TEST(F32_IGEMM_RELU_2X4__SCALAR, n_div_4_small_kernel) {
7495 for (uint32_t n = 8; n <= 12; n += 4) {
7496 for (size_t k = 1; k <= 5; k += 2) {
7497 GemmMicrokernelTester()
7498 .mr(2)
7499 .nr(4)
7500 .kr(1)
7501 .sr(1)
7502 .m(2)
7503 .n(n)
7504 .k(k)
7505 .ks(3)
7506 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
7507 }
7508 }
7509 }
7510
TEST(F32_IGEMM_RELU_2X4__SCALAR,strided_cm_subtile)7511 TEST(F32_IGEMM_RELU_2X4__SCALAR, strided_cm_subtile) {
7512 for (size_t k = 1; k <= 5; k += 2) {
7513 for (uint32_t n = 1; n <= 4; n++) {
7514 for (uint32_t m = 1; m <= 2; m++) {
7515 GemmMicrokernelTester()
7516 .mr(2)
7517 .nr(4)
7518 .kr(1)
7519 .sr(1)
7520 .m(m)
7521 .n(n)
7522 .k(k)
7523 .cm_stride(7)
7524 .iterations(1)
7525 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
7526 }
7527 }
7528 }
7529 }
7530
TEST(F32_IGEMM_RELU_2X4__SCALAR,a_offset)7531 TEST(F32_IGEMM_RELU_2X4__SCALAR, a_offset) {
7532 for (size_t k = 1; k <= 5; k += 2) {
7533 GemmMicrokernelTester()
7534 .mr(2)
7535 .nr(4)
7536 .kr(1)
7537 .sr(1)
7538 .m(2)
7539 .n(4)
7540 .k(k)
7541 .ks(3)
7542 .a_offset(13)
7543 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
7544 }
7545 }
7546
TEST(F32_IGEMM_RELU_2X4__SCALAR,zero)7547 TEST(F32_IGEMM_RELU_2X4__SCALAR, zero) {
7548 for (size_t k = 1; k <= 5; k += 2) {
7549 for (uint32_t mz = 0; mz < 2; mz++) {
7550 GemmMicrokernelTester()
7551 .mr(2)
7552 .nr(4)
7553 .kr(1)
7554 .sr(1)
7555 .m(2)
7556 .n(4)
7557 .k(k)
7558 .ks(3)
7559 .a_offset(13)
7560 .zero_index(mz)
7561 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
7562 }
7563 }
7564 }
7565
TEST(F32_IGEMM_RELU_2X4__SCALAR,strided_cm)7566 TEST(F32_IGEMM_RELU_2X4__SCALAR, strided_cm) {
7567 GemmMicrokernelTester()
7568 .mr(2)
7569 .nr(4)
7570 .kr(1)
7571 .sr(1)
7572 .m(2)
7573 .n(4)
7574 .k(1)
7575 .cm_stride(7)
7576 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
7577 }
7578