1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/f32-igemm-relu.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25
26
27 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT,k_eq_1)28 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT, k_eq_1) {
29 GemmMicrokernelTester()
30 .mr(1)
31 .nr(8)
32 .kr(1)
33 .sr(1)
34 .m(1)
35 .n(8)
36 .k(1)
37 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_loadsplat);
38 }
39
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT,strided_cn)40 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT, strided_cn) {
41 GemmMicrokernelTester()
42 .mr(1)
43 .nr(8)
44 .kr(1)
45 .sr(1)
46 .m(1)
47 .n(8)
48 .k(1)
49 .cn_stride(11)
50 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_loadsplat);
51 }
52
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile)53 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile) {
54 for (uint32_t n = 1; n <= 8; n++) {
55 for (uint32_t m = 1; m <= 1; m++) {
56 GemmMicrokernelTester()
57 .mr(1)
58 .nr(8)
59 .kr(1)
60 .sr(1)
61 .m(m)
62 .n(n)
63 .k(1)
64 .iterations(1)
65 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_loadsplat);
66 }
67 }
68 }
69
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_m)70 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_m) {
71 for (uint32_t m = 1; m <= 1; m++) {
72 GemmMicrokernelTester()
73 .mr(1)
74 .nr(8)
75 .kr(1)
76 .sr(1)
77 .m(m)
78 .n(8)
79 .k(1)
80 .iterations(1)
81 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_loadsplat);
82 }
83 }
84
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_n)85 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_n) {
86 for (uint32_t n = 1; n <= 8; n++) {
87 GemmMicrokernelTester()
88 .mr(1)
89 .nr(8)
90 .kr(1)
91 .sr(1)
92 .m(1)
93 .n(n)
94 .k(1)
95 .iterations(1)
96 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_loadsplat);
97 }
98 }
99
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT,k_gt_1)100 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT, k_gt_1) {
101 for (size_t k = 2; k < 10; k++) {
102 GemmMicrokernelTester()
103 .mr(1)
104 .nr(8)
105 .kr(1)
106 .sr(1)
107 .m(1)
108 .n(8)
109 .k(k)
110 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_loadsplat);
111 }
112 }
113
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT,k_gt_1_subtile)114 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT, k_gt_1_subtile) {
115 for (size_t k = 2; k < 10; k++) {
116 for (uint32_t n = 1; n <= 8; n++) {
117 for (uint32_t m = 1; m <= 1; m++) {
118 GemmMicrokernelTester()
119 .mr(1)
120 .nr(8)
121 .kr(1)
122 .sr(1)
123 .m(m)
124 .n(n)
125 .k(k)
126 .iterations(1)
127 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_loadsplat);
128 }
129 }
130 }
131 }
132
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT,n_gt_8)133 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT, n_gt_8) {
134 for (uint32_t n = 9; n < 16; n++) {
135 for (size_t k = 1; k <= 5; k += 2) {
136 GemmMicrokernelTester()
137 .mr(1)
138 .nr(8)
139 .kr(1)
140 .sr(1)
141 .m(1)
142 .n(n)
143 .k(k)
144 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_loadsplat);
145 }
146 }
147 }
148
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_cn)149 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_cn) {
150 for (uint32_t n = 9; n < 16; n++) {
151 for (size_t k = 1; k <= 5; k += 2) {
152 GemmMicrokernelTester()
153 .mr(1)
154 .nr(8)
155 .kr(1)
156 .sr(1)
157 .m(1)
158 .n(n)
159 .k(k)
160 .cn_stride(11)
161 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_loadsplat);
162 }
163 }
164 }
165
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT,n_gt_8_subtile)166 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT, n_gt_8_subtile) {
167 for (uint32_t n = 9; n < 16; n++) {
168 for (size_t k = 1; k <= 5; k += 2) {
169 for (uint32_t m = 1; m <= 1; m++) {
170 GemmMicrokernelTester()
171 .mr(1)
172 .nr(8)
173 .kr(1)
174 .sr(1)
175 .m(m)
176 .n(n)
177 .k(k)
178 .iterations(1)
179 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_loadsplat);
180 }
181 }
182 }
183 }
184
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT,n_div_8)185 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT, n_div_8) {
186 for (uint32_t n = 16; n <= 24; n += 8) {
187 for (size_t k = 1; k <= 5; k += 2) {
188 GemmMicrokernelTester()
189 .mr(1)
190 .nr(8)
191 .kr(1)
192 .sr(1)
193 .m(1)
194 .n(n)
195 .k(k)
196 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_loadsplat);
197 }
198 }
199 }
200
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT,n_div_8_strided_cn)201 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT, n_div_8_strided_cn) {
202 for (uint32_t n = 16; n <= 24; n += 8) {
203 for (size_t k = 1; k <= 5; k += 2) {
204 GemmMicrokernelTester()
205 .mr(1)
206 .nr(8)
207 .kr(1)
208 .sr(1)
209 .m(1)
210 .n(n)
211 .k(k)
212 .cn_stride(11)
213 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_loadsplat);
214 }
215 }
216 }
217
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT,n_div_8_subtile)218 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT, n_div_8_subtile) {
219 for (uint32_t n = 16; n <= 24; n += 8) {
220 for (size_t k = 1; k <= 5; k += 2) {
221 for (uint32_t m = 1; m <= 1; m++) {
222 GemmMicrokernelTester()
223 .mr(1)
224 .nr(8)
225 .kr(1)
226 .sr(1)
227 .m(m)
228 .n(n)
229 .k(k)
230 .iterations(1)
231 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_loadsplat);
232 }
233 }
234 }
235 }
236
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT,small_kernel)237 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT, small_kernel) {
238 for (size_t k = 1; k <= 5; k += 2) {
239 GemmMicrokernelTester()
240 .mr(1)
241 .nr(8)
242 .kr(1)
243 .sr(1)
244 .m(1)
245 .n(8)
246 .k(k)
247 .ks(3)
248 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_loadsplat);
249 }
250 }
251
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT,small_kernel_subtile)252 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT, small_kernel_subtile) {
253 for (size_t k = 1; k <= 5; k += 2) {
254 for (uint32_t n = 1; n <= 8; n++) {
255 for (uint32_t m = 1; m <= 1; m++) {
256 GemmMicrokernelTester()
257 .mr(1)
258 .nr(8)
259 .kr(1)
260 .sr(1)
261 .m(m)
262 .n(n)
263 .k(k)
264 .ks(3)
265 .iterations(1)
266 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_loadsplat);
267 }
268 }
269 }
270 }
271
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT,n_gt_8_small_kernel)272 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT, n_gt_8_small_kernel) {
273 for (uint32_t n = 9; n < 16; n++) {
274 for (size_t k = 1; k <= 5; k += 2) {
275 GemmMicrokernelTester()
276 .mr(1)
277 .nr(8)
278 .kr(1)
279 .sr(1)
280 .m(1)
281 .n(n)
282 .k(k)
283 .ks(3)
284 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_loadsplat);
285 }
286 }
287 }
288
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT,n_div_8_small_kernel)289 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT, n_div_8_small_kernel) {
290 for (uint32_t n = 16; n <= 24; n += 8) {
291 for (size_t k = 1; k <= 5; k += 2) {
292 GemmMicrokernelTester()
293 .mr(1)
294 .nr(8)
295 .kr(1)
296 .sr(1)
297 .m(1)
298 .n(n)
299 .k(k)
300 .ks(3)
301 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_loadsplat);
302 }
303 }
304 }
305
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT,strided_cm_subtile)306 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT, strided_cm_subtile) {
307 for (size_t k = 1; k <= 5; k += 2) {
308 for (uint32_t n = 1; n <= 8; n++) {
309 for (uint32_t m = 1; m <= 1; m++) {
310 GemmMicrokernelTester()
311 .mr(1)
312 .nr(8)
313 .kr(1)
314 .sr(1)
315 .m(m)
316 .n(n)
317 .k(k)
318 .cm_stride(11)
319 .iterations(1)
320 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_loadsplat);
321 }
322 }
323 }
324 }
325
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT,a_offset)326 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT, a_offset) {
327 for (size_t k = 1; k <= 5; k += 2) {
328 GemmMicrokernelTester()
329 .mr(1)
330 .nr(8)
331 .kr(1)
332 .sr(1)
333 .m(1)
334 .n(8)
335 .k(k)
336 .ks(3)
337 .a_offset(7)
338 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_loadsplat);
339 }
340 }
341
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT,zero)342 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT, zero) {
343 for (size_t k = 1; k <= 5; k += 2) {
344 for (uint32_t mz = 0; mz < 1; mz++) {
345 GemmMicrokernelTester()
346 .mr(1)
347 .nr(8)
348 .kr(1)
349 .sr(1)
350 .m(1)
351 .n(8)
352 .k(k)
353 .ks(3)
354 .a_offset(7)
355 .zero_index(mz)
356 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_loadsplat);
357 }
358 }
359 }
360
TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT,strided_cm)361 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_LOADSPLAT, strided_cm) {
362 GemmMicrokernelTester()
363 .mr(1)
364 .nr(8)
365 .kr(1)
366 .sr(1)
367 .m(1)
368 .n(8)
369 .k(1)
370 .cm_stride(11)
371 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_loadsplat);
372 }
373 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
374
375
376 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,k_eq_4)377 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, k_eq_4) {
378 GemmMicrokernelTester()
379 .mr(3)
380 .nr(8)
381 .kr(1)
382 .sr(1)
383 .m(3)
384 .n(8)
385 .k(4)
386 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
387 }
388
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,strided_cn)389 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, strided_cn) {
390 GemmMicrokernelTester()
391 .mr(3)
392 .nr(8)
393 .kr(1)
394 .sr(1)
395 .m(3)
396 .n(8)
397 .k(4)
398 .cn_stride(11)
399 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
400 }
401
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,k_eq_4_subtile)402 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
403 for (uint32_t n = 1; n <= 8; n++) {
404 for (uint32_t m = 1; m <= 3; m++) {
405 GemmMicrokernelTester()
406 .mr(3)
407 .nr(8)
408 .kr(1)
409 .sr(1)
410 .m(m)
411 .n(n)
412 .k(4)
413 .iterations(1)
414 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
415 }
416 }
417 }
418
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,k_eq_4_subtile_m)419 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
420 for (uint32_t m = 1; m <= 3; m++) {
421 GemmMicrokernelTester()
422 .mr(3)
423 .nr(8)
424 .kr(1)
425 .sr(1)
426 .m(m)
427 .n(8)
428 .k(4)
429 .iterations(1)
430 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
431 }
432 }
433
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,k_eq_4_subtile_n)434 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
435 for (uint32_t n = 1; n <= 8; n++) {
436 GemmMicrokernelTester()
437 .mr(3)
438 .nr(8)
439 .kr(1)
440 .sr(1)
441 .m(3)
442 .n(n)
443 .k(4)
444 .iterations(1)
445 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
446 }
447 }
448
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,k_lt_4)449 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, k_lt_4) {
450 for (size_t k = 1; k < 4; k++) {
451 GemmMicrokernelTester()
452 .mr(3)
453 .nr(8)
454 .kr(1)
455 .sr(1)
456 .m(3)
457 .n(8)
458 .k(k)
459 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
460 }
461 }
462
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,k_lt_4_subtile)463 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
464 for (size_t k = 1; k < 4; k++) {
465 for (uint32_t n = 1; n <= 8; n++) {
466 for (uint32_t m = 1; m <= 3; m++) {
467 GemmMicrokernelTester()
468 .mr(3)
469 .nr(8)
470 .kr(1)
471 .sr(1)
472 .m(m)
473 .n(n)
474 .k(k)
475 .iterations(1)
476 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
477 }
478 }
479 }
480 }
481
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,k_gt_4)482 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, k_gt_4) {
483 for (size_t k = 5; k < 8; k++) {
484 GemmMicrokernelTester()
485 .mr(3)
486 .nr(8)
487 .kr(1)
488 .sr(1)
489 .m(3)
490 .n(8)
491 .k(k)
492 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
493 }
494 }
495
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,k_gt_4_subtile)496 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
497 for (size_t k = 5; k < 8; k++) {
498 for (uint32_t n = 1; n <= 8; n++) {
499 for (uint32_t m = 1; m <= 3; m++) {
500 GemmMicrokernelTester()
501 .mr(3)
502 .nr(8)
503 .kr(1)
504 .sr(1)
505 .m(m)
506 .n(n)
507 .k(k)
508 .iterations(1)
509 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
510 }
511 }
512 }
513 }
514
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,k_div_4)515 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, k_div_4) {
516 for (size_t k = 8; k <= 40; k += 4) {
517 GemmMicrokernelTester()
518 .mr(3)
519 .nr(8)
520 .kr(1)
521 .sr(1)
522 .m(3)
523 .n(8)
524 .k(k)
525 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
526 }
527 }
528
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,k_div_4_subtile)529 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, k_div_4_subtile) {
530 for (size_t k = 8; k <= 40; k += 4) {
531 for (uint32_t n = 1; n <= 8; n++) {
532 for (uint32_t m = 1; m <= 3; m++) {
533 GemmMicrokernelTester()
534 .mr(3)
535 .nr(8)
536 .kr(1)
537 .sr(1)
538 .m(m)
539 .n(n)
540 .k(k)
541 .iterations(1)
542 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
543 }
544 }
545 }
546 }
547
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,n_gt_8)548 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, n_gt_8) {
549 for (uint32_t n = 9; n < 16; n++) {
550 for (size_t k = 1; k <= 20; k += 5) {
551 GemmMicrokernelTester()
552 .mr(3)
553 .nr(8)
554 .kr(1)
555 .sr(1)
556 .m(3)
557 .n(n)
558 .k(k)
559 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
560 }
561 }
562 }
563
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,n_gt_8_strided_cn)564 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
565 for (uint32_t n = 9; n < 16; n++) {
566 for (size_t k = 1; k <= 20; k += 5) {
567 GemmMicrokernelTester()
568 .mr(3)
569 .nr(8)
570 .kr(1)
571 .sr(1)
572 .m(3)
573 .n(n)
574 .k(k)
575 .cn_stride(11)
576 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
577 }
578 }
579 }
580
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,n_gt_8_subtile)581 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
582 for (uint32_t n = 9; n < 16; n++) {
583 for (size_t k = 1; k <= 20; k += 5) {
584 for (uint32_t m = 1; m <= 3; m++) {
585 GemmMicrokernelTester()
586 .mr(3)
587 .nr(8)
588 .kr(1)
589 .sr(1)
590 .m(m)
591 .n(n)
592 .k(k)
593 .iterations(1)
594 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
595 }
596 }
597 }
598 }
599
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,n_div_8)600 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, n_div_8) {
601 for (uint32_t n = 16; n <= 24; n += 8) {
602 for (size_t k = 1; k <= 20; k += 5) {
603 GemmMicrokernelTester()
604 .mr(3)
605 .nr(8)
606 .kr(1)
607 .sr(1)
608 .m(3)
609 .n(n)
610 .k(k)
611 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
612 }
613 }
614 }
615
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,n_div_8_strided_cn)616 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
617 for (uint32_t n = 16; n <= 24; n += 8) {
618 for (size_t k = 1; k <= 20; k += 5) {
619 GemmMicrokernelTester()
620 .mr(3)
621 .nr(8)
622 .kr(1)
623 .sr(1)
624 .m(3)
625 .n(n)
626 .k(k)
627 .cn_stride(11)
628 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
629 }
630 }
631 }
632
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,n_div_8_subtile)633 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, n_div_8_subtile) {
634 for (uint32_t n = 16; n <= 24; n += 8) {
635 for (size_t k = 1; k <= 20; k += 5) {
636 for (uint32_t m = 1; m <= 3; m++) {
637 GemmMicrokernelTester()
638 .mr(3)
639 .nr(8)
640 .kr(1)
641 .sr(1)
642 .m(m)
643 .n(n)
644 .k(k)
645 .iterations(1)
646 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
647 }
648 }
649 }
650 }
651
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,small_kernel)652 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, small_kernel) {
653 for (size_t k = 1; k <= 20; k += 5) {
654 GemmMicrokernelTester()
655 .mr(3)
656 .nr(8)
657 .kr(1)
658 .sr(1)
659 .m(3)
660 .n(8)
661 .k(k)
662 .ks(3)
663 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
664 }
665 }
666
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,small_kernel_subtile)667 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, small_kernel_subtile) {
668 for (size_t k = 1; k <= 20; k += 5) {
669 for (uint32_t n = 1; n <= 8; n++) {
670 for (uint32_t m = 1; m <= 3; m++) {
671 GemmMicrokernelTester()
672 .mr(3)
673 .nr(8)
674 .kr(1)
675 .sr(1)
676 .m(m)
677 .n(n)
678 .k(k)
679 .ks(3)
680 .iterations(1)
681 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
682 }
683 }
684 }
685 }
686
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,n_gt_8_small_kernel)687 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, n_gt_8_small_kernel) {
688 for (uint32_t n = 9; n < 16; n++) {
689 for (size_t k = 1; k <= 20; k += 5) {
690 GemmMicrokernelTester()
691 .mr(3)
692 .nr(8)
693 .kr(1)
694 .sr(1)
695 .m(3)
696 .n(n)
697 .k(k)
698 .ks(3)
699 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
700 }
701 }
702 }
703
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,n_div_8_small_kernel)704 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, n_div_8_small_kernel) {
705 for (uint32_t n = 16; n <= 24; n += 8) {
706 for (size_t k = 1; k <= 20; k += 5) {
707 GemmMicrokernelTester()
708 .mr(3)
709 .nr(8)
710 .kr(1)
711 .sr(1)
712 .m(3)
713 .n(n)
714 .k(k)
715 .ks(3)
716 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
717 }
718 }
719 }
720
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,strided_cm_subtile)721 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, strided_cm_subtile) {
722 for (size_t k = 1; k <= 20; k += 5) {
723 for (uint32_t n = 1; n <= 8; n++) {
724 for (uint32_t m = 1; m <= 3; m++) {
725 GemmMicrokernelTester()
726 .mr(3)
727 .nr(8)
728 .kr(1)
729 .sr(1)
730 .m(m)
731 .n(n)
732 .k(k)
733 .cm_stride(11)
734 .iterations(1)
735 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
736 }
737 }
738 }
739 }
740
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,a_offset)741 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, a_offset) {
742 for (size_t k = 1; k <= 20; k += 5) {
743 GemmMicrokernelTester()
744 .mr(3)
745 .nr(8)
746 .kr(1)
747 .sr(1)
748 .m(3)
749 .n(8)
750 .k(k)
751 .ks(3)
752 .a_offset(67)
753 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
754 }
755 }
756
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,zero)757 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, zero) {
758 for (size_t k = 1; k <= 20; k += 5) {
759 for (uint32_t mz = 0; mz < 3; mz++) {
760 GemmMicrokernelTester()
761 .mr(3)
762 .nr(8)
763 .kr(1)
764 .sr(1)
765 .m(3)
766 .n(8)
767 .k(k)
768 .ks(3)
769 .a_offset(67)
770 .zero_index(mz)
771 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
772 }
773 }
774 }
775
TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT,strided_cm)776 TEST(F32_IGEMM_RELU_3X8__WASMSIMD_SPLAT, strided_cm) {
777 GemmMicrokernelTester()
778 .mr(3)
779 .nr(8)
780 .kr(1)
781 .sr(1)
782 .m(3)
783 .n(8)
784 .k(4)
785 .cm_stride(11)
786 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmsimd_splat);
787 }
788 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
789
790
791 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,k_eq_4)792 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, k_eq_4) {
793 GemmMicrokernelTester()
794 .mr(3)
795 .nr(8)
796 .kr(1)
797 .sr(4)
798 .m(3)
799 .n(8)
800 .k(4)
801 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
802 }
803
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,strided_cn)804 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, strided_cn) {
805 GemmMicrokernelTester()
806 .mr(3)
807 .nr(8)
808 .kr(1)
809 .sr(4)
810 .m(3)
811 .n(8)
812 .k(4)
813 .cn_stride(11)
814 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
815 }
816
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,k_eq_4_subtile)817 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, k_eq_4_subtile) {
818 for (uint32_t n = 1; n <= 8; n++) {
819 for (uint32_t m = 1; m <= 3; m++) {
820 GemmMicrokernelTester()
821 .mr(3)
822 .nr(8)
823 .kr(1)
824 .sr(4)
825 .m(m)
826 .n(n)
827 .k(4)
828 .iterations(1)
829 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
830 }
831 }
832 }
833
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,k_eq_4_subtile_m)834 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, k_eq_4_subtile_m) {
835 for (uint32_t m = 1; m <= 3; m++) {
836 GemmMicrokernelTester()
837 .mr(3)
838 .nr(8)
839 .kr(1)
840 .sr(4)
841 .m(m)
842 .n(8)
843 .k(4)
844 .iterations(1)
845 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
846 }
847 }
848
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,k_eq_4_subtile_n)849 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, k_eq_4_subtile_n) {
850 for (uint32_t n = 1; n <= 8; n++) {
851 GemmMicrokernelTester()
852 .mr(3)
853 .nr(8)
854 .kr(1)
855 .sr(4)
856 .m(3)
857 .n(n)
858 .k(4)
859 .iterations(1)
860 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
861 }
862 }
863
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,k_lt_4)864 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, k_lt_4) {
865 for (size_t k = 1; k < 4; k++) {
866 GemmMicrokernelTester()
867 .mr(3)
868 .nr(8)
869 .kr(1)
870 .sr(4)
871 .m(3)
872 .n(8)
873 .k(k)
874 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
875 }
876 }
877
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,k_lt_4_subtile)878 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, k_lt_4_subtile) {
879 for (size_t k = 1; k < 4; k++) {
880 for (uint32_t n = 1; n <= 8; n++) {
881 for (uint32_t m = 1; m <= 3; m++) {
882 GemmMicrokernelTester()
883 .mr(3)
884 .nr(8)
885 .kr(1)
886 .sr(4)
887 .m(m)
888 .n(n)
889 .k(k)
890 .iterations(1)
891 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
892 }
893 }
894 }
895 }
896
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,k_gt_4)897 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, k_gt_4) {
898 for (size_t k = 5; k < 8; k++) {
899 GemmMicrokernelTester()
900 .mr(3)
901 .nr(8)
902 .kr(1)
903 .sr(4)
904 .m(3)
905 .n(8)
906 .k(k)
907 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
908 }
909 }
910
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,k_gt_4_subtile)911 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, k_gt_4_subtile) {
912 for (size_t k = 5; k < 8; k++) {
913 for (uint32_t n = 1; n <= 8; n++) {
914 for (uint32_t m = 1; m <= 3; m++) {
915 GemmMicrokernelTester()
916 .mr(3)
917 .nr(8)
918 .kr(1)
919 .sr(4)
920 .m(m)
921 .n(n)
922 .k(k)
923 .iterations(1)
924 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
925 }
926 }
927 }
928 }
929
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,k_div_4)930 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, k_div_4) {
931 for (size_t k = 8; k <= 40; k += 4) {
932 GemmMicrokernelTester()
933 .mr(3)
934 .nr(8)
935 .kr(1)
936 .sr(4)
937 .m(3)
938 .n(8)
939 .k(k)
940 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
941 }
942 }
943
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,k_div_4_subtile)944 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, k_div_4_subtile) {
945 for (size_t k = 8; k <= 40; k += 4) {
946 for (uint32_t n = 1; n <= 8; n++) {
947 for (uint32_t m = 1; m <= 3; m++) {
948 GemmMicrokernelTester()
949 .mr(3)
950 .nr(8)
951 .kr(1)
952 .sr(4)
953 .m(m)
954 .n(n)
955 .k(k)
956 .iterations(1)
957 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
958 }
959 }
960 }
961 }
962
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,n_gt_8)963 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, n_gt_8) {
964 for (uint32_t n = 9; n < 16; n++) {
965 for (size_t k = 1; k <= 20; k += 5) {
966 GemmMicrokernelTester()
967 .mr(3)
968 .nr(8)
969 .kr(1)
970 .sr(4)
971 .m(3)
972 .n(n)
973 .k(k)
974 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
975 }
976 }
977 }
978
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,n_gt_8_strided_cn)979 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, n_gt_8_strided_cn) {
980 for (uint32_t n = 9; n < 16; n++) {
981 for (size_t k = 1; k <= 20; k += 5) {
982 GemmMicrokernelTester()
983 .mr(3)
984 .nr(8)
985 .kr(1)
986 .sr(4)
987 .m(3)
988 .n(n)
989 .k(k)
990 .cn_stride(11)
991 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
992 }
993 }
994 }
995
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,n_gt_8_subtile)996 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, n_gt_8_subtile) {
997 for (uint32_t n = 9; n < 16; n++) {
998 for (size_t k = 1; k <= 20; k += 5) {
999 for (uint32_t m = 1; m <= 3; m++) {
1000 GemmMicrokernelTester()
1001 .mr(3)
1002 .nr(8)
1003 .kr(1)
1004 .sr(4)
1005 .m(m)
1006 .n(n)
1007 .k(k)
1008 .iterations(1)
1009 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
1010 }
1011 }
1012 }
1013 }
1014
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,n_div_8)1015 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, n_div_8) {
1016 for (uint32_t n = 16; n <= 24; n += 8) {
1017 for (size_t k = 1; k <= 20; k += 5) {
1018 GemmMicrokernelTester()
1019 .mr(3)
1020 .nr(8)
1021 .kr(1)
1022 .sr(4)
1023 .m(3)
1024 .n(n)
1025 .k(k)
1026 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
1027 }
1028 }
1029 }
1030
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,n_div_8_strided_cn)1031 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, n_div_8_strided_cn) {
1032 for (uint32_t n = 16; n <= 24; n += 8) {
1033 for (size_t k = 1; k <= 20; k += 5) {
1034 GemmMicrokernelTester()
1035 .mr(3)
1036 .nr(8)
1037 .kr(1)
1038 .sr(4)
1039 .m(3)
1040 .n(n)
1041 .k(k)
1042 .cn_stride(11)
1043 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
1044 }
1045 }
1046 }
1047
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,n_div_8_subtile)1048 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, n_div_8_subtile) {
1049 for (uint32_t n = 16; n <= 24; n += 8) {
1050 for (size_t k = 1; k <= 20; k += 5) {
1051 for (uint32_t m = 1; m <= 3; m++) {
1052 GemmMicrokernelTester()
1053 .mr(3)
1054 .nr(8)
1055 .kr(1)
1056 .sr(4)
1057 .m(m)
1058 .n(n)
1059 .k(k)
1060 .iterations(1)
1061 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
1062 }
1063 }
1064 }
1065 }
1066
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,small_kernel)1067 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, small_kernel) {
1068 for (size_t k = 1; k <= 20; k += 5) {
1069 GemmMicrokernelTester()
1070 .mr(3)
1071 .nr(8)
1072 .kr(1)
1073 .sr(4)
1074 .m(3)
1075 .n(8)
1076 .k(k)
1077 .ks(3)
1078 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
1079 }
1080 }
1081
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,small_kernel_subtile)1082 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, small_kernel_subtile) {
1083 for (size_t k = 1; k <= 20; k += 5) {
1084 for (uint32_t n = 1; n <= 8; n++) {
1085 for (uint32_t m = 1; m <= 3; m++) {
1086 GemmMicrokernelTester()
1087 .mr(3)
1088 .nr(8)
1089 .kr(1)
1090 .sr(4)
1091 .m(m)
1092 .n(n)
1093 .k(k)
1094 .ks(3)
1095 .iterations(1)
1096 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
1097 }
1098 }
1099 }
1100 }
1101
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,n_gt_8_small_kernel)1102 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, n_gt_8_small_kernel) {
1103 for (uint32_t n = 9; n < 16; n++) {
1104 for (size_t k = 1; k <= 20; k += 5) {
1105 GemmMicrokernelTester()
1106 .mr(3)
1107 .nr(8)
1108 .kr(1)
1109 .sr(4)
1110 .m(3)
1111 .n(n)
1112 .k(k)
1113 .ks(3)
1114 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
1115 }
1116 }
1117 }
1118
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,n_div_8_small_kernel)1119 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, n_div_8_small_kernel) {
1120 for (uint32_t n = 16; n <= 24; n += 8) {
1121 for (size_t k = 1; k <= 20; k += 5) {
1122 GemmMicrokernelTester()
1123 .mr(3)
1124 .nr(8)
1125 .kr(1)
1126 .sr(4)
1127 .m(3)
1128 .n(n)
1129 .k(k)
1130 .ks(3)
1131 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
1132 }
1133 }
1134 }
1135
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,strided_cm_subtile)1136 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, strided_cm_subtile) {
1137 for (size_t k = 1; k <= 20; k += 5) {
1138 for (uint32_t n = 1; n <= 8; n++) {
1139 for (uint32_t m = 1; m <= 3; m++) {
1140 GemmMicrokernelTester()
1141 .mr(3)
1142 .nr(8)
1143 .kr(1)
1144 .sr(4)
1145 .m(m)
1146 .n(n)
1147 .k(k)
1148 .cm_stride(11)
1149 .iterations(1)
1150 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
1151 }
1152 }
1153 }
1154 }
1155
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,a_offset)1156 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, a_offset) {
1157 for (size_t k = 1; k <= 20; k += 5) {
1158 GemmMicrokernelTester()
1159 .mr(3)
1160 .nr(8)
1161 .kr(1)
1162 .sr(4)
1163 .m(3)
1164 .n(8)
1165 .k(k)
1166 .ks(3)
1167 .a_offset(67)
1168 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
1169 }
1170 }
1171
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,zero)1172 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, zero) {
1173 for (size_t k = 1; k <= 20; k += 5) {
1174 for (uint32_t mz = 0; mz < 3; mz++) {
1175 GemmMicrokernelTester()
1176 .mr(3)
1177 .nr(8)
1178 .kr(1)
1179 .sr(4)
1180 .m(3)
1181 .n(8)
1182 .k(k)
1183 .ks(3)
1184 .a_offset(67)
1185 .zero_index(mz)
1186 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
1187 }
1188 }
1189 }
1190
TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD,strided_cm)1191 TEST(F32_IGEMM_RELU_3X8S4__WASMSIMD, strided_cm) {
1192 GemmMicrokernelTester()
1193 .mr(3)
1194 .nr(8)
1195 .kr(1)
1196 .sr(4)
1197 .m(3)
1198 .n(8)
1199 .k(4)
1200 .cm_stride(11)
1201 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmsimd);
1202 }
1203 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1204
1205
1206 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,k_eq_4)1207 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_eq_4) {
1208 GemmMicrokernelTester()
1209 .mr(4)
1210 .nr(2)
1211 .kr(4)
1212 .sr(1)
1213 .m(4)
1214 .n(2)
1215 .k(4)
1216 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1217 }
1218
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,strided_cn)1219 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, strided_cn) {
1220 GemmMicrokernelTester()
1221 .mr(4)
1222 .nr(2)
1223 .kr(4)
1224 .sr(1)
1225 .m(4)
1226 .n(2)
1227 .k(4)
1228 .cn_stride(5)
1229 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1230 }
1231
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,k_eq_4_subtile)1232 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_eq_4_subtile) {
1233 for (uint32_t n = 1; n <= 2; n++) {
1234 for (uint32_t m = 1; m <= 4; m++) {
1235 GemmMicrokernelTester()
1236 .mr(4)
1237 .nr(2)
1238 .kr(4)
1239 .sr(1)
1240 .m(m)
1241 .n(n)
1242 .k(4)
1243 .iterations(1)
1244 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1245 }
1246 }
1247 }
1248
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,k_eq_4_subtile_m)1249 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_eq_4_subtile_m) {
1250 for (uint32_t m = 1; m <= 4; m++) {
1251 GemmMicrokernelTester()
1252 .mr(4)
1253 .nr(2)
1254 .kr(4)
1255 .sr(1)
1256 .m(m)
1257 .n(2)
1258 .k(4)
1259 .iterations(1)
1260 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1261 }
1262 }
1263
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,k_eq_4_subtile_n)1264 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_eq_4_subtile_n) {
1265 for (uint32_t n = 1; n <= 2; n++) {
1266 GemmMicrokernelTester()
1267 .mr(4)
1268 .nr(2)
1269 .kr(4)
1270 .sr(1)
1271 .m(4)
1272 .n(n)
1273 .k(4)
1274 .iterations(1)
1275 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1276 }
1277 }
1278
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,k_lt_4)1279 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_lt_4) {
1280 for (size_t k = 1; k < 4; k++) {
1281 GemmMicrokernelTester()
1282 .mr(4)
1283 .nr(2)
1284 .kr(4)
1285 .sr(1)
1286 .m(4)
1287 .n(2)
1288 .k(k)
1289 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1290 }
1291 }
1292
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,k_lt_4_subtile)1293 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_lt_4_subtile) {
1294 for (size_t k = 1; k < 4; k++) {
1295 for (uint32_t n = 1; n <= 2; n++) {
1296 for (uint32_t m = 1; m <= 4; m++) {
1297 GemmMicrokernelTester()
1298 .mr(4)
1299 .nr(2)
1300 .kr(4)
1301 .sr(1)
1302 .m(m)
1303 .n(n)
1304 .k(k)
1305 .iterations(1)
1306 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1307 }
1308 }
1309 }
1310 }
1311
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,k_gt_4)1312 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_gt_4) {
1313 for (size_t k = 5; k < 8; k++) {
1314 GemmMicrokernelTester()
1315 .mr(4)
1316 .nr(2)
1317 .kr(4)
1318 .sr(1)
1319 .m(4)
1320 .n(2)
1321 .k(k)
1322 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1323 }
1324 }
1325
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,k_gt_4_subtile)1326 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_gt_4_subtile) {
1327 for (size_t k = 5; k < 8; k++) {
1328 for (uint32_t n = 1; n <= 2; n++) {
1329 for (uint32_t m = 1; m <= 4; m++) {
1330 GemmMicrokernelTester()
1331 .mr(4)
1332 .nr(2)
1333 .kr(4)
1334 .sr(1)
1335 .m(m)
1336 .n(n)
1337 .k(k)
1338 .iterations(1)
1339 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1340 }
1341 }
1342 }
1343 }
1344
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,k_div_4)1345 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_div_4) {
1346 for (size_t k = 8; k <= 40; k += 4) {
1347 GemmMicrokernelTester()
1348 .mr(4)
1349 .nr(2)
1350 .kr(4)
1351 .sr(1)
1352 .m(4)
1353 .n(2)
1354 .k(k)
1355 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1356 }
1357 }
1358
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,k_div_4_subtile)1359 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_div_4_subtile) {
1360 for (size_t k = 8; k <= 40; k += 4) {
1361 for (uint32_t n = 1; n <= 2; n++) {
1362 for (uint32_t m = 1; m <= 4; m++) {
1363 GemmMicrokernelTester()
1364 .mr(4)
1365 .nr(2)
1366 .kr(4)
1367 .sr(1)
1368 .m(m)
1369 .n(n)
1370 .k(k)
1371 .iterations(1)
1372 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1373 }
1374 }
1375 }
1376 }
1377
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,n_gt_2)1378 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, n_gt_2) {
1379 for (uint32_t n = 3; n < 4; n++) {
1380 for (size_t k = 1; k <= 20; k += 5) {
1381 GemmMicrokernelTester()
1382 .mr(4)
1383 .nr(2)
1384 .kr(4)
1385 .sr(1)
1386 .m(4)
1387 .n(n)
1388 .k(k)
1389 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1390 }
1391 }
1392 }
1393
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,n_gt_2_strided_cn)1394 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, n_gt_2_strided_cn) {
1395 for (uint32_t n = 3; n < 4; n++) {
1396 for (size_t k = 1; k <= 20; k += 5) {
1397 GemmMicrokernelTester()
1398 .mr(4)
1399 .nr(2)
1400 .kr(4)
1401 .sr(1)
1402 .m(4)
1403 .n(n)
1404 .k(k)
1405 .cn_stride(5)
1406 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1407 }
1408 }
1409 }
1410
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,n_gt_2_subtile)1411 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, n_gt_2_subtile) {
1412 for (uint32_t n = 3; n < 4; n++) {
1413 for (size_t k = 1; k <= 20; k += 5) {
1414 for (uint32_t m = 1; m <= 4; m++) {
1415 GemmMicrokernelTester()
1416 .mr(4)
1417 .nr(2)
1418 .kr(4)
1419 .sr(1)
1420 .m(m)
1421 .n(n)
1422 .k(k)
1423 .iterations(1)
1424 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1425 }
1426 }
1427 }
1428 }
1429
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,n_div_2)1430 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, n_div_2) {
1431 for (uint32_t n = 4; n <= 6; n += 2) {
1432 for (size_t k = 1; k <= 20; k += 5) {
1433 GemmMicrokernelTester()
1434 .mr(4)
1435 .nr(2)
1436 .kr(4)
1437 .sr(1)
1438 .m(4)
1439 .n(n)
1440 .k(k)
1441 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1442 }
1443 }
1444 }
1445
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,n_div_2_strided_cn)1446 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, n_div_2_strided_cn) {
1447 for (uint32_t n = 4; n <= 6; n += 2) {
1448 for (size_t k = 1; k <= 20; k += 5) {
1449 GemmMicrokernelTester()
1450 .mr(4)
1451 .nr(2)
1452 .kr(4)
1453 .sr(1)
1454 .m(4)
1455 .n(n)
1456 .k(k)
1457 .cn_stride(5)
1458 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1459 }
1460 }
1461 }
1462
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,n_div_2_subtile)1463 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, n_div_2_subtile) {
1464 for (uint32_t n = 4; n <= 6; n += 2) {
1465 for (size_t k = 1; k <= 20; k += 5) {
1466 for (uint32_t m = 1; m <= 4; m++) {
1467 GemmMicrokernelTester()
1468 .mr(4)
1469 .nr(2)
1470 .kr(4)
1471 .sr(1)
1472 .m(m)
1473 .n(n)
1474 .k(k)
1475 .iterations(1)
1476 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1477 }
1478 }
1479 }
1480 }
1481
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,small_kernel)1482 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, small_kernel) {
1483 for (size_t k = 1; k <= 20; k += 5) {
1484 GemmMicrokernelTester()
1485 .mr(4)
1486 .nr(2)
1487 .kr(4)
1488 .sr(1)
1489 .m(4)
1490 .n(2)
1491 .k(k)
1492 .ks(3)
1493 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1494 }
1495 }
1496
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,small_kernel_subtile)1497 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, small_kernel_subtile) {
1498 for (size_t k = 1; k <= 20; k += 5) {
1499 for (uint32_t n = 1; n <= 2; n++) {
1500 for (uint32_t m = 1; m <= 4; m++) {
1501 GemmMicrokernelTester()
1502 .mr(4)
1503 .nr(2)
1504 .kr(4)
1505 .sr(1)
1506 .m(m)
1507 .n(n)
1508 .k(k)
1509 .ks(3)
1510 .iterations(1)
1511 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1512 }
1513 }
1514 }
1515 }
1516
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,n_gt_2_small_kernel)1517 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, n_gt_2_small_kernel) {
1518 for (uint32_t n = 3; n < 4; n++) {
1519 for (size_t k = 1; k <= 20; k += 5) {
1520 GemmMicrokernelTester()
1521 .mr(4)
1522 .nr(2)
1523 .kr(4)
1524 .sr(1)
1525 .m(4)
1526 .n(n)
1527 .k(k)
1528 .ks(3)
1529 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1530 }
1531 }
1532 }
1533
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,n_div_2_small_kernel)1534 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, n_div_2_small_kernel) {
1535 for (uint32_t n = 4; n <= 6; n += 2) {
1536 for (size_t k = 1; k <= 20; k += 5) {
1537 GemmMicrokernelTester()
1538 .mr(4)
1539 .nr(2)
1540 .kr(4)
1541 .sr(1)
1542 .m(4)
1543 .n(n)
1544 .k(k)
1545 .ks(3)
1546 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1547 }
1548 }
1549 }
1550
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,strided_cm_subtile)1551 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, strided_cm_subtile) {
1552 for (size_t k = 1; k <= 20; k += 5) {
1553 for (uint32_t n = 1; n <= 2; n++) {
1554 for (uint32_t m = 1; m <= 4; m++) {
1555 GemmMicrokernelTester()
1556 .mr(4)
1557 .nr(2)
1558 .kr(4)
1559 .sr(1)
1560 .m(m)
1561 .n(n)
1562 .k(k)
1563 .cm_stride(5)
1564 .iterations(1)
1565 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1566 }
1567 }
1568 }
1569 }
1570
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,a_offset)1571 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, a_offset) {
1572 for (size_t k = 1; k <= 20; k += 5) {
1573 GemmMicrokernelTester()
1574 .mr(4)
1575 .nr(2)
1576 .kr(4)
1577 .sr(1)
1578 .m(4)
1579 .n(2)
1580 .k(k)
1581 .ks(3)
1582 .a_offset(83)
1583 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1584 }
1585 }
1586
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,zero)1587 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, zero) {
1588 for (size_t k = 1; k <= 20; k += 5) {
1589 for (uint32_t mz = 0; mz < 4; mz++) {
1590 GemmMicrokernelTester()
1591 .mr(4)
1592 .nr(2)
1593 .kr(4)
1594 .sr(1)
1595 .m(4)
1596 .n(2)
1597 .k(k)
1598 .ks(3)
1599 .a_offset(83)
1600 .zero_index(mz)
1601 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1602 }
1603 }
1604 }
1605
TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD,strided_cm)1606 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, strided_cm) {
1607 GemmMicrokernelTester()
1608 .mr(4)
1609 .nr(2)
1610 .kr(4)
1611 .sr(1)
1612 .m(4)
1613 .n(2)
1614 .k(4)
1615 .cm_stride(5)
1616 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
1617 }
1618 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1619
1620
1621 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,k_eq_4)1622 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, k_eq_4) {
1623 GemmMicrokernelTester()
1624 .mr(4)
1625 .nr(8)
1626 .kr(1)
1627 .sr(4)
1628 .m(4)
1629 .n(8)
1630 .k(4)
1631 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
1632 }
1633
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,strided_cn)1634 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, strided_cn) {
1635 GemmMicrokernelTester()
1636 .mr(4)
1637 .nr(8)
1638 .kr(1)
1639 .sr(4)
1640 .m(4)
1641 .n(8)
1642 .k(4)
1643 .cn_stride(11)
1644 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
1645 }
1646
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,k_eq_4_subtile)1647 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, k_eq_4_subtile) {
1648 for (uint32_t n = 1; n <= 8; n++) {
1649 for (uint32_t m = 1; m <= 4; m++) {
1650 GemmMicrokernelTester()
1651 .mr(4)
1652 .nr(8)
1653 .kr(1)
1654 .sr(4)
1655 .m(m)
1656 .n(n)
1657 .k(4)
1658 .iterations(1)
1659 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
1660 }
1661 }
1662 }
1663
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,k_eq_4_subtile_m)1664 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, k_eq_4_subtile_m) {
1665 for (uint32_t m = 1; m <= 4; m++) {
1666 GemmMicrokernelTester()
1667 .mr(4)
1668 .nr(8)
1669 .kr(1)
1670 .sr(4)
1671 .m(m)
1672 .n(8)
1673 .k(4)
1674 .iterations(1)
1675 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
1676 }
1677 }
1678
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,k_eq_4_subtile_n)1679 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, k_eq_4_subtile_n) {
1680 for (uint32_t n = 1; n <= 8; n++) {
1681 GemmMicrokernelTester()
1682 .mr(4)
1683 .nr(8)
1684 .kr(1)
1685 .sr(4)
1686 .m(4)
1687 .n(n)
1688 .k(4)
1689 .iterations(1)
1690 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
1691 }
1692 }
1693
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,k_lt_4)1694 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, k_lt_4) {
1695 for (size_t k = 1; k < 4; k++) {
1696 GemmMicrokernelTester()
1697 .mr(4)
1698 .nr(8)
1699 .kr(1)
1700 .sr(4)
1701 .m(4)
1702 .n(8)
1703 .k(k)
1704 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
1705 }
1706 }
1707
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,k_lt_4_subtile)1708 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, k_lt_4_subtile) {
1709 for (size_t k = 1; k < 4; k++) {
1710 for (uint32_t n = 1; n <= 8; n++) {
1711 for (uint32_t m = 1; m <= 4; m++) {
1712 GemmMicrokernelTester()
1713 .mr(4)
1714 .nr(8)
1715 .kr(1)
1716 .sr(4)
1717 .m(m)
1718 .n(n)
1719 .k(k)
1720 .iterations(1)
1721 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
1722 }
1723 }
1724 }
1725 }
1726
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,k_gt_4)1727 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, k_gt_4) {
1728 for (size_t k = 5; k < 8; k++) {
1729 GemmMicrokernelTester()
1730 .mr(4)
1731 .nr(8)
1732 .kr(1)
1733 .sr(4)
1734 .m(4)
1735 .n(8)
1736 .k(k)
1737 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
1738 }
1739 }
1740
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,k_gt_4_subtile)1741 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, k_gt_4_subtile) {
1742 for (size_t k = 5; k < 8; k++) {
1743 for (uint32_t n = 1; n <= 8; n++) {
1744 for (uint32_t m = 1; m <= 4; m++) {
1745 GemmMicrokernelTester()
1746 .mr(4)
1747 .nr(8)
1748 .kr(1)
1749 .sr(4)
1750 .m(m)
1751 .n(n)
1752 .k(k)
1753 .iterations(1)
1754 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
1755 }
1756 }
1757 }
1758 }
1759
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,k_div_4)1760 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, k_div_4) {
1761 for (size_t k = 8; k <= 40; k += 4) {
1762 GemmMicrokernelTester()
1763 .mr(4)
1764 .nr(8)
1765 .kr(1)
1766 .sr(4)
1767 .m(4)
1768 .n(8)
1769 .k(k)
1770 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
1771 }
1772 }
1773
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,k_div_4_subtile)1774 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, k_div_4_subtile) {
1775 for (size_t k = 8; k <= 40; k += 4) {
1776 for (uint32_t n = 1; n <= 8; n++) {
1777 for (uint32_t m = 1; m <= 4; m++) {
1778 GemmMicrokernelTester()
1779 .mr(4)
1780 .nr(8)
1781 .kr(1)
1782 .sr(4)
1783 .m(m)
1784 .n(n)
1785 .k(k)
1786 .iterations(1)
1787 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
1788 }
1789 }
1790 }
1791 }
1792
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,n_gt_8)1793 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, n_gt_8) {
1794 for (uint32_t n = 9; n < 16; n++) {
1795 for (size_t k = 1; k <= 20; k += 5) {
1796 GemmMicrokernelTester()
1797 .mr(4)
1798 .nr(8)
1799 .kr(1)
1800 .sr(4)
1801 .m(4)
1802 .n(n)
1803 .k(k)
1804 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
1805 }
1806 }
1807 }
1808
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,n_gt_8_strided_cn)1809 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, n_gt_8_strided_cn) {
1810 for (uint32_t n = 9; n < 16; n++) {
1811 for (size_t k = 1; k <= 20; k += 5) {
1812 GemmMicrokernelTester()
1813 .mr(4)
1814 .nr(8)
1815 .kr(1)
1816 .sr(4)
1817 .m(4)
1818 .n(n)
1819 .k(k)
1820 .cn_stride(11)
1821 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
1822 }
1823 }
1824 }
1825
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,n_gt_8_subtile)1826 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, n_gt_8_subtile) {
1827 for (uint32_t n = 9; n < 16; n++) {
1828 for (size_t k = 1; k <= 20; k += 5) {
1829 for (uint32_t m = 1; m <= 4; m++) {
1830 GemmMicrokernelTester()
1831 .mr(4)
1832 .nr(8)
1833 .kr(1)
1834 .sr(4)
1835 .m(m)
1836 .n(n)
1837 .k(k)
1838 .iterations(1)
1839 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
1840 }
1841 }
1842 }
1843 }
1844
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,n_div_8)1845 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, n_div_8) {
1846 for (uint32_t n = 16; n <= 24; n += 8) {
1847 for (size_t k = 1; k <= 20; k += 5) {
1848 GemmMicrokernelTester()
1849 .mr(4)
1850 .nr(8)
1851 .kr(1)
1852 .sr(4)
1853 .m(4)
1854 .n(n)
1855 .k(k)
1856 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
1857 }
1858 }
1859 }
1860
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,n_div_8_strided_cn)1861 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, n_div_8_strided_cn) {
1862 for (uint32_t n = 16; n <= 24; n += 8) {
1863 for (size_t k = 1; k <= 20; k += 5) {
1864 GemmMicrokernelTester()
1865 .mr(4)
1866 .nr(8)
1867 .kr(1)
1868 .sr(4)
1869 .m(4)
1870 .n(n)
1871 .k(k)
1872 .cn_stride(11)
1873 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
1874 }
1875 }
1876 }
1877
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,n_div_8_subtile)1878 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, n_div_8_subtile) {
1879 for (uint32_t n = 16; n <= 24; n += 8) {
1880 for (size_t k = 1; k <= 20; k += 5) {
1881 for (uint32_t m = 1; m <= 4; m++) {
1882 GemmMicrokernelTester()
1883 .mr(4)
1884 .nr(8)
1885 .kr(1)
1886 .sr(4)
1887 .m(m)
1888 .n(n)
1889 .k(k)
1890 .iterations(1)
1891 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
1892 }
1893 }
1894 }
1895 }
1896
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,small_kernel)1897 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, small_kernel) {
1898 for (size_t k = 1; k <= 20; k += 5) {
1899 GemmMicrokernelTester()
1900 .mr(4)
1901 .nr(8)
1902 .kr(1)
1903 .sr(4)
1904 .m(4)
1905 .n(8)
1906 .k(k)
1907 .ks(3)
1908 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
1909 }
1910 }
1911
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,small_kernel_subtile)1912 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, small_kernel_subtile) {
1913 for (size_t k = 1; k <= 20; k += 5) {
1914 for (uint32_t n = 1; n <= 8; n++) {
1915 for (uint32_t m = 1; m <= 4; m++) {
1916 GemmMicrokernelTester()
1917 .mr(4)
1918 .nr(8)
1919 .kr(1)
1920 .sr(4)
1921 .m(m)
1922 .n(n)
1923 .k(k)
1924 .ks(3)
1925 .iterations(1)
1926 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
1927 }
1928 }
1929 }
1930 }
1931
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,n_gt_8_small_kernel)1932 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, n_gt_8_small_kernel) {
1933 for (uint32_t n = 9; n < 16; n++) {
1934 for (size_t k = 1; k <= 20; k += 5) {
1935 GemmMicrokernelTester()
1936 .mr(4)
1937 .nr(8)
1938 .kr(1)
1939 .sr(4)
1940 .m(4)
1941 .n(n)
1942 .k(k)
1943 .ks(3)
1944 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
1945 }
1946 }
1947 }
1948
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,n_div_8_small_kernel)1949 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, n_div_8_small_kernel) {
1950 for (uint32_t n = 16; n <= 24; n += 8) {
1951 for (size_t k = 1; k <= 20; k += 5) {
1952 GemmMicrokernelTester()
1953 .mr(4)
1954 .nr(8)
1955 .kr(1)
1956 .sr(4)
1957 .m(4)
1958 .n(n)
1959 .k(k)
1960 .ks(3)
1961 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
1962 }
1963 }
1964 }
1965
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,strided_cm_subtile)1966 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, strided_cm_subtile) {
1967 for (size_t k = 1; k <= 20; k += 5) {
1968 for (uint32_t n = 1; n <= 8; n++) {
1969 for (uint32_t m = 1; m <= 4; m++) {
1970 GemmMicrokernelTester()
1971 .mr(4)
1972 .nr(8)
1973 .kr(1)
1974 .sr(4)
1975 .m(m)
1976 .n(n)
1977 .k(k)
1978 .cm_stride(11)
1979 .iterations(1)
1980 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
1981 }
1982 }
1983 }
1984 }
1985
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,a_offset)1986 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, a_offset) {
1987 for (size_t k = 1; k <= 20; k += 5) {
1988 GemmMicrokernelTester()
1989 .mr(4)
1990 .nr(8)
1991 .kr(1)
1992 .sr(4)
1993 .m(4)
1994 .n(8)
1995 .k(k)
1996 .ks(3)
1997 .a_offset(83)
1998 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
1999 }
2000 }
2001
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,zero)2002 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, zero) {
2003 for (size_t k = 1; k <= 20; k += 5) {
2004 for (uint32_t mz = 0; mz < 4; mz++) {
2005 GemmMicrokernelTester()
2006 .mr(4)
2007 .nr(8)
2008 .kr(1)
2009 .sr(4)
2010 .m(4)
2011 .n(8)
2012 .k(k)
2013 .ks(3)
2014 .a_offset(83)
2015 .zero_index(mz)
2016 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
2017 }
2018 }
2019 }
2020
TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD,strided_cm)2021 TEST(F32_IGEMM_RELU_4X8S4__WASMSIMD, strided_cm) {
2022 GemmMicrokernelTester()
2023 .mr(4)
2024 .nr(8)
2025 .kr(1)
2026 .sr(4)
2027 .m(4)
2028 .n(8)
2029 .k(4)
2030 .cm_stride(11)
2031 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmsimd);
2032 }
2033 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2034
2035
2036 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,k_eq_4)2037 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, k_eq_4) {
2038 GemmMicrokernelTester()
2039 .mr(5)
2040 .nr(8)
2041 .kr(1)
2042 .sr(4)
2043 .m(5)
2044 .n(8)
2045 .k(4)
2046 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2047 }
2048
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,strided_cn)2049 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, strided_cn) {
2050 GemmMicrokernelTester()
2051 .mr(5)
2052 .nr(8)
2053 .kr(1)
2054 .sr(4)
2055 .m(5)
2056 .n(8)
2057 .k(4)
2058 .cn_stride(11)
2059 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2060 }
2061
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,k_eq_4_subtile)2062 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, k_eq_4_subtile) {
2063 for (uint32_t n = 1; n <= 8; n++) {
2064 for (uint32_t m = 1; m <= 5; m++) {
2065 GemmMicrokernelTester()
2066 .mr(5)
2067 .nr(8)
2068 .kr(1)
2069 .sr(4)
2070 .m(m)
2071 .n(n)
2072 .k(4)
2073 .iterations(1)
2074 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2075 }
2076 }
2077 }
2078
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,k_eq_4_subtile_m)2079 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, k_eq_4_subtile_m) {
2080 for (uint32_t m = 1; m <= 5; m++) {
2081 GemmMicrokernelTester()
2082 .mr(5)
2083 .nr(8)
2084 .kr(1)
2085 .sr(4)
2086 .m(m)
2087 .n(8)
2088 .k(4)
2089 .iterations(1)
2090 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2091 }
2092 }
2093
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,k_eq_4_subtile_n)2094 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, k_eq_4_subtile_n) {
2095 for (uint32_t n = 1; n <= 8; n++) {
2096 GemmMicrokernelTester()
2097 .mr(5)
2098 .nr(8)
2099 .kr(1)
2100 .sr(4)
2101 .m(5)
2102 .n(n)
2103 .k(4)
2104 .iterations(1)
2105 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2106 }
2107 }
2108
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,k_lt_4)2109 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, k_lt_4) {
2110 for (size_t k = 1; k < 4; k++) {
2111 GemmMicrokernelTester()
2112 .mr(5)
2113 .nr(8)
2114 .kr(1)
2115 .sr(4)
2116 .m(5)
2117 .n(8)
2118 .k(k)
2119 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2120 }
2121 }
2122
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,k_lt_4_subtile)2123 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, k_lt_4_subtile) {
2124 for (size_t k = 1; k < 4; k++) {
2125 for (uint32_t n = 1; n <= 8; n++) {
2126 for (uint32_t m = 1; m <= 5; m++) {
2127 GemmMicrokernelTester()
2128 .mr(5)
2129 .nr(8)
2130 .kr(1)
2131 .sr(4)
2132 .m(m)
2133 .n(n)
2134 .k(k)
2135 .iterations(1)
2136 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2137 }
2138 }
2139 }
2140 }
2141
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,k_gt_4)2142 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, k_gt_4) {
2143 for (size_t k = 5; k < 8; k++) {
2144 GemmMicrokernelTester()
2145 .mr(5)
2146 .nr(8)
2147 .kr(1)
2148 .sr(4)
2149 .m(5)
2150 .n(8)
2151 .k(k)
2152 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2153 }
2154 }
2155
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,k_gt_4_subtile)2156 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, k_gt_4_subtile) {
2157 for (size_t k = 5; k < 8; k++) {
2158 for (uint32_t n = 1; n <= 8; n++) {
2159 for (uint32_t m = 1; m <= 5; m++) {
2160 GemmMicrokernelTester()
2161 .mr(5)
2162 .nr(8)
2163 .kr(1)
2164 .sr(4)
2165 .m(m)
2166 .n(n)
2167 .k(k)
2168 .iterations(1)
2169 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2170 }
2171 }
2172 }
2173 }
2174
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,k_div_4)2175 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, k_div_4) {
2176 for (size_t k = 8; k <= 40; k += 4) {
2177 GemmMicrokernelTester()
2178 .mr(5)
2179 .nr(8)
2180 .kr(1)
2181 .sr(4)
2182 .m(5)
2183 .n(8)
2184 .k(k)
2185 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2186 }
2187 }
2188
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,k_div_4_subtile)2189 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, k_div_4_subtile) {
2190 for (size_t k = 8; k <= 40; k += 4) {
2191 for (uint32_t n = 1; n <= 8; n++) {
2192 for (uint32_t m = 1; m <= 5; m++) {
2193 GemmMicrokernelTester()
2194 .mr(5)
2195 .nr(8)
2196 .kr(1)
2197 .sr(4)
2198 .m(m)
2199 .n(n)
2200 .k(k)
2201 .iterations(1)
2202 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2203 }
2204 }
2205 }
2206 }
2207
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,n_gt_8)2208 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, n_gt_8) {
2209 for (uint32_t n = 9; n < 16; n++) {
2210 for (size_t k = 1; k <= 20; k += 5) {
2211 GemmMicrokernelTester()
2212 .mr(5)
2213 .nr(8)
2214 .kr(1)
2215 .sr(4)
2216 .m(5)
2217 .n(n)
2218 .k(k)
2219 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2220 }
2221 }
2222 }
2223
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,n_gt_8_strided_cn)2224 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, n_gt_8_strided_cn) {
2225 for (uint32_t n = 9; n < 16; n++) {
2226 for (size_t k = 1; k <= 20; k += 5) {
2227 GemmMicrokernelTester()
2228 .mr(5)
2229 .nr(8)
2230 .kr(1)
2231 .sr(4)
2232 .m(5)
2233 .n(n)
2234 .k(k)
2235 .cn_stride(11)
2236 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2237 }
2238 }
2239 }
2240
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,n_gt_8_subtile)2241 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, n_gt_8_subtile) {
2242 for (uint32_t n = 9; n < 16; n++) {
2243 for (size_t k = 1; k <= 20; k += 5) {
2244 for (uint32_t m = 1; m <= 5; m++) {
2245 GemmMicrokernelTester()
2246 .mr(5)
2247 .nr(8)
2248 .kr(1)
2249 .sr(4)
2250 .m(m)
2251 .n(n)
2252 .k(k)
2253 .iterations(1)
2254 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2255 }
2256 }
2257 }
2258 }
2259
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,n_div_8)2260 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, n_div_8) {
2261 for (uint32_t n = 16; n <= 24; n += 8) {
2262 for (size_t k = 1; k <= 20; k += 5) {
2263 GemmMicrokernelTester()
2264 .mr(5)
2265 .nr(8)
2266 .kr(1)
2267 .sr(4)
2268 .m(5)
2269 .n(n)
2270 .k(k)
2271 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2272 }
2273 }
2274 }
2275
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,n_div_8_strided_cn)2276 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, n_div_8_strided_cn) {
2277 for (uint32_t n = 16; n <= 24; n += 8) {
2278 for (size_t k = 1; k <= 20; k += 5) {
2279 GemmMicrokernelTester()
2280 .mr(5)
2281 .nr(8)
2282 .kr(1)
2283 .sr(4)
2284 .m(5)
2285 .n(n)
2286 .k(k)
2287 .cn_stride(11)
2288 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2289 }
2290 }
2291 }
2292
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,n_div_8_subtile)2293 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, n_div_8_subtile) {
2294 for (uint32_t n = 16; n <= 24; n += 8) {
2295 for (size_t k = 1; k <= 20; k += 5) {
2296 for (uint32_t m = 1; m <= 5; m++) {
2297 GemmMicrokernelTester()
2298 .mr(5)
2299 .nr(8)
2300 .kr(1)
2301 .sr(4)
2302 .m(m)
2303 .n(n)
2304 .k(k)
2305 .iterations(1)
2306 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2307 }
2308 }
2309 }
2310 }
2311
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,small_kernel)2312 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, small_kernel) {
2313 for (size_t k = 1; k <= 20; k += 5) {
2314 GemmMicrokernelTester()
2315 .mr(5)
2316 .nr(8)
2317 .kr(1)
2318 .sr(4)
2319 .m(5)
2320 .n(8)
2321 .k(k)
2322 .ks(3)
2323 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2324 }
2325 }
2326
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,small_kernel_subtile)2327 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, small_kernel_subtile) {
2328 for (size_t k = 1; k <= 20; k += 5) {
2329 for (uint32_t n = 1; n <= 8; n++) {
2330 for (uint32_t m = 1; m <= 5; m++) {
2331 GemmMicrokernelTester()
2332 .mr(5)
2333 .nr(8)
2334 .kr(1)
2335 .sr(4)
2336 .m(m)
2337 .n(n)
2338 .k(k)
2339 .ks(3)
2340 .iterations(1)
2341 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2342 }
2343 }
2344 }
2345 }
2346
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,n_gt_8_small_kernel)2347 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, n_gt_8_small_kernel) {
2348 for (uint32_t n = 9; n < 16; n++) {
2349 for (size_t k = 1; k <= 20; k += 5) {
2350 GemmMicrokernelTester()
2351 .mr(5)
2352 .nr(8)
2353 .kr(1)
2354 .sr(4)
2355 .m(5)
2356 .n(n)
2357 .k(k)
2358 .ks(3)
2359 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2360 }
2361 }
2362 }
2363
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,n_div_8_small_kernel)2364 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, n_div_8_small_kernel) {
2365 for (uint32_t n = 16; n <= 24; n += 8) {
2366 for (size_t k = 1; k <= 20; k += 5) {
2367 GemmMicrokernelTester()
2368 .mr(5)
2369 .nr(8)
2370 .kr(1)
2371 .sr(4)
2372 .m(5)
2373 .n(n)
2374 .k(k)
2375 .ks(3)
2376 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2377 }
2378 }
2379 }
2380
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,strided_cm_subtile)2381 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, strided_cm_subtile) {
2382 for (size_t k = 1; k <= 20; k += 5) {
2383 for (uint32_t n = 1; n <= 8; n++) {
2384 for (uint32_t m = 1; m <= 5; m++) {
2385 GemmMicrokernelTester()
2386 .mr(5)
2387 .nr(8)
2388 .kr(1)
2389 .sr(4)
2390 .m(m)
2391 .n(n)
2392 .k(k)
2393 .cm_stride(11)
2394 .iterations(1)
2395 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2396 }
2397 }
2398 }
2399 }
2400
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,a_offset)2401 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, a_offset) {
2402 for (size_t k = 1; k <= 20; k += 5) {
2403 GemmMicrokernelTester()
2404 .mr(5)
2405 .nr(8)
2406 .kr(1)
2407 .sr(4)
2408 .m(5)
2409 .n(8)
2410 .k(k)
2411 .ks(3)
2412 .a_offset(103)
2413 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2414 }
2415 }
2416
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,zero)2417 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, zero) {
2418 for (size_t k = 1; k <= 20; k += 5) {
2419 for (uint32_t mz = 0; mz < 5; mz++) {
2420 GemmMicrokernelTester()
2421 .mr(5)
2422 .nr(8)
2423 .kr(1)
2424 .sr(4)
2425 .m(5)
2426 .n(8)
2427 .k(k)
2428 .ks(3)
2429 .a_offset(103)
2430 .zero_index(mz)
2431 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2432 }
2433 }
2434 }
2435
TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD,strided_cm)2436 TEST(F32_IGEMM_RELU_5X8S4__WASMSIMD, strided_cm) {
2437 GemmMicrokernelTester()
2438 .mr(5)
2439 .nr(8)
2440 .kr(1)
2441 .sr(4)
2442 .m(5)
2443 .n(8)
2444 .k(4)
2445 .cm_stride(11)
2446 .Test(xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd);
2447 }
2448 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2449
2450
2451 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT,k_eq_1)2452 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT, k_eq_1) {
2453 GemmMicrokernelTester()
2454 .mr(6)
2455 .nr(8)
2456 .kr(1)
2457 .sr(1)
2458 .m(6)
2459 .n(8)
2460 .k(1)
2461 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2462 }
2463
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT,strided_cn)2464 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT, strided_cn) {
2465 GemmMicrokernelTester()
2466 .mr(6)
2467 .nr(8)
2468 .kr(1)
2469 .sr(1)
2470 .m(6)
2471 .n(8)
2472 .k(1)
2473 .cn_stride(11)
2474 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2475 }
2476
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile)2477 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile) {
2478 for (uint32_t n = 1; n <= 8; n++) {
2479 for (uint32_t m = 1; m <= 6; m++) {
2480 GemmMicrokernelTester()
2481 .mr(6)
2482 .nr(8)
2483 .kr(1)
2484 .sr(1)
2485 .m(m)
2486 .n(n)
2487 .k(1)
2488 .iterations(1)
2489 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2490 }
2491 }
2492 }
2493
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_m)2494 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_m) {
2495 for (uint32_t m = 1; m <= 6; m++) {
2496 GemmMicrokernelTester()
2497 .mr(6)
2498 .nr(8)
2499 .kr(1)
2500 .sr(1)
2501 .m(m)
2502 .n(8)
2503 .k(1)
2504 .iterations(1)
2505 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2506 }
2507 }
2508
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_n)2509 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_n) {
2510 for (uint32_t n = 1; n <= 8; n++) {
2511 GemmMicrokernelTester()
2512 .mr(6)
2513 .nr(8)
2514 .kr(1)
2515 .sr(1)
2516 .m(6)
2517 .n(n)
2518 .k(1)
2519 .iterations(1)
2520 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2521 }
2522 }
2523
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT,k_gt_1)2524 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT, k_gt_1) {
2525 for (size_t k = 2; k < 10; k++) {
2526 GemmMicrokernelTester()
2527 .mr(6)
2528 .nr(8)
2529 .kr(1)
2530 .sr(1)
2531 .m(6)
2532 .n(8)
2533 .k(k)
2534 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2535 }
2536 }
2537
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT,k_gt_1_subtile)2538 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT, k_gt_1_subtile) {
2539 for (size_t k = 2; k < 10; k++) {
2540 for (uint32_t n = 1; n <= 8; n++) {
2541 for (uint32_t m = 1; m <= 6; m++) {
2542 GemmMicrokernelTester()
2543 .mr(6)
2544 .nr(8)
2545 .kr(1)
2546 .sr(1)
2547 .m(m)
2548 .n(n)
2549 .k(k)
2550 .iterations(1)
2551 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2552 }
2553 }
2554 }
2555 }
2556
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT,n_gt_8)2557 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT, n_gt_8) {
2558 for (uint32_t n = 9; n < 16; n++) {
2559 for (size_t k = 1; k <= 5; k += 2) {
2560 GemmMicrokernelTester()
2561 .mr(6)
2562 .nr(8)
2563 .kr(1)
2564 .sr(1)
2565 .m(6)
2566 .n(n)
2567 .k(k)
2568 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2569 }
2570 }
2571 }
2572
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_cn)2573 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_cn) {
2574 for (uint32_t n = 9; n < 16; n++) {
2575 for (size_t k = 1; k <= 5; k += 2) {
2576 GemmMicrokernelTester()
2577 .mr(6)
2578 .nr(8)
2579 .kr(1)
2580 .sr(1)
2581 .m(6)
2582 .n(n)
2583 .k(k)
2584 .cn_stride(11)
2585 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2586 }
2587 }
2588 }
2589
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT,n_gt_8_subtile)2590 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT, n_gt_8_subtile) {
2591 for (uint32_t n = 9; n < 16; n++) {
2592 for (size_t k = 1; k <= 5; k += 2) {
2593 for (uint32_t m = 1; m <= 6; m++) {
2594 GemmMicrokernelTester()
2595 .mr(6)
2596 .nr(8)
2597 .kr(1)
2598 .sr(1)
2599 .m(m)
2600 .n(n)
2601 .k(k)
2602 .iterations(1)
2603 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2604 }
2605 }
2606 }
2607 }
2608
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT,n_div_8)2609 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT, n_div_8) {
2610 for (uint32_t n = 16; n <= 24; n += 8) {
2611 for (size_t k = 1; k <= 5; k += 2) {
2612 GemmMicrokernelTester()
2613 .mr(6)
2614 .nr(8)
2615 .kr(1)
2616 .sr(1)
2617 .m(6)
2618 .n(n)
2619 .k(k)
2620 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2621 }
2622 }
2623 }
2624
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT,n_div_8_strided_cn)2625 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT, n_div_8_strided_cn) {
2626 for (uint32_t n = 16; n <= 24; n += 8) {
2627 for (size_t k = 1; k <= 5; k += 2) {
2628 GemmMicrokernelTester()
2629 .mr(6)
2630 .nr(8)
2631 .kr(1)
2632 .sr(1)
2633 .m(6)
2634 .n(n)
2635 .k(k)
2636 .cn_stride(11)
2637 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2638 }
2639 }
2640 }
2641
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT,n_div_8_subtile)2642 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT, n_div_8_subtile) {
2643 for (uint32_t n = 16; n <= 24; n += 8) {
2644 for (size_t k = 1; k <= 5; k += 2) {
2645 for (uint32_t m = 1; m <= 6; m++) {
2646 GemmMicrokernelTester()
2647 .mr(6)
2648 .nr(8)
2649 .kr(1)
2650 .sr(1)
2651 .m(m)
2652 .n(n)
2653 .k(k)
2654 .iterations(1)
2655 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2656 }
2657 }
2658 }
2659 }
2660
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT,small_kernel)2661 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT, small_kernel) {
2662 for (size_t k = 1; k <= 5; k += 2) {
2663 GemmMicrokernelTester()
2664 .mr(6)
2665 .nr(8)
2666 .kr(1)
2667 .sr(1)
2668 .m(6)
2669 .n(8)
2670 .k(k)
2671 .ks(3)
2672 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2673 }
2674 }
2675
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT,small_kernel_subtile)2676 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT, small_kernel_subtile) {
2677 for (size_t k = 1; k <= 5; k += 2) {
2678 for (uint32_t n = 1; n <= 8; n++) {
2679 for (uint32_t m = 1; m <= 6; m++) {
2680 GemmMicrokernelTester()
2681 .mr(6)
2682 .nr(8)
2683 .kr(1)
2684 .sr(1)
2685 .m(m)
2686 .n(n)
2687 .k(k)
2688 .ks(3)
2689 .iterations(1)
2690 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2691 }
2692 }
2693 }
2694 }
2695
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT,n_gt_8_small_kernel)2696 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT, n_gt_8_small_kernel) {
2697 for (uint32_t n = 9; n < 16; n++) {
2698 for (size_t k = 1; k <= 5; k += 2) {
2699 GemmMicrokernelTester()
2700 .mr(6)
2701 .nr(8)
2702 .kr(1)
2703 .sr(1)
2704 .m(6)
2705 .n(n)
2706 .k(k)
2707 .ks(3)
2708 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2709 }
2710 }
2711 }
2712
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT,n_div_8_small_kernel)2713 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT, n_div_8_small_kernel) {
2714 for (uint32_t n = 16; n <= 24; n += 8) {
2715 for (size_t k = 1; k <= 5; k += 2) {
2716 GemmMicrokernelTester()
2717 .mr(6)
2718 .nr(8)
2719 .kr(1)
2720 .sr(1)
2721 .m(6)
2722 .n(n)
2723 .k(k)
2724 .ks(3)
2725 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2726 }
2727 }
2728 }
2729
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT,strided_cm_subtile)2730 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT, strided_cm_subtile) {
2731 for (size_t k = 1; k <= 5; k += 2) {
2732 for (uint32_t n = 1; n <= 8; n++) {
2733 for (uint32_t m = 1; m <= 6; m++) {
2734 GemmMicrokernelTester()
2735 .mr(6)
2736 .nr(8)
2737 .kr(1)
2738 .sr(1)
2739 .m(m)
2740 .n(n)
2741 .k(k)
2742 .cm_stride(11)
2743 .iterations(1)
2744 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2745 }
2746 }
2747 }
2748 }
2749
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT,a_offset)2750 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT, a_offset) {
2751 for (size_t k = 1; k <= 5; k += 2) {
2752 GemmMicrokernelTester()
2753 .mr(6)
2754 .nr(8)
2755 .kr(1)
2756 .sr(1)
2757 .m(6)
2758 .n(8)
2759 .k(k)
2760 .ks(3)
2761 .a_offset(37)
2762 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2763 }
2764 }
2765
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT,zero)2766 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT, zero) {
2767 for (size_t k = 1; k <= 5; k += 2) {
2768 for (uint32_t mz = 0; mz < 6; mz++) {
2769 GemmMicrokernelTester()
2770 .mr(6)
2771 .nr(8)
2772 .kr(1)
2773 .sr(1)
2774 .m(6)
2775 .n(8)
2776 .k(k)
2777 .ks(3)
2778 .a_offset(37)
2779 .zero_index(mz)
2780 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2781 }
2782 }
2783 }
2784
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT,strided_cm)2785 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_LOADSPLAT, strided_cm) {
2786 GemmMicrokernelTester()
2787 .mr(6)
2788 .nr(8)
2789 .kr(1)
2790 .sr(1)
2791 .m(6)
2792 .n(8)
2793 .k(1)
2794 .cm_stride(11)
2795 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_loadsplat);
2796 }
2797 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2798
2799
2800 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,k_eq_4)2801 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, k_eq_4) {
2802 GemmMicrokernelTester()
2803 .mr(6)
2804 .nr(8)
2805 .kr(1)
2806 .sr(1)
2807 .m(6)
2808 .n(8)
2809 .k(4)
2810 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
2811 }
2812
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,strided_cn)2813 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, strided_cn) {
2814 GemmMicrokernelTester()
2815 .mr(6)
2816 .nr(8)
2817 .kr(1)
2818 .sr(1)
2819 .m(6)
2820 .n(8)
2821 .k(4)
2822 .cn_stride(11)
2823 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
2824 }
2825
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,k_eq_4_subtile)2826 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
2827 for (uint32_t n = 1; n <= 8; n++) {
2828 for (uint32_t m = 1; m <= 6; m++) {
2829 GemmMicrokernelTester()
2830 .mr(6)
2831 .nr(8)
2832 .kr(1)
2833 .sr(1)
2834 .m(m)
2835 .n(n)
2836 .k(4)
2837 .iterations(1)
2838 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
2839 }
2840 }
2841 }
2842
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,k_eq_4_subtile_m)2843 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
2844 for (uint32_t m = 1; m <= 6; m++) {
2845 GemmMicrokernelTester()
2846 .mr(6)
2847 .nr(8)
2848 .kr(1)
2849 .sr(1)
2850 .m(m)
2851 .n(8)
2852 .k(4)
2853 .iterations(1)
2854 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
2855 }
2856 }
2857
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,k_eq_4_subtile_n)2858 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
2859 for (uint32_t n = 1; n <= 8; n++) {
2860 GemmMicrokernelTester()
2861 .mr(6)
2862 .nr(8)
2863 .kr(1)
2864 .sr(1)
2865 .m(6)
2866 .n(n)
2867 .k(4)
2868 .iterations(1)
2869 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
2870 }
2871 }
2872
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,k_lt_4)2873 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, k_lt_4) {
2874 for (size_t k = 1; k < 4; k++) {
2875 GemmMicrokernelTester()
2876 .mr(6)
2877 .nr(8)
2878 .kr(1)
2879 .sr(1)
2880 .m(6)
2881 .n(8)
2882 .k(k)
2883 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
2884 }
2885 }
2886
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,k_lt_4_subtile)2887 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
2888 for (size_t k = 1; k < 4; k++) {
2889 for (uint32_t n = 1; n <= 8; n++) {
2890 for (uint32_t m = 1; m <= 6; m++) {
2891 GemmMicrokernelTester()
2892 .mr(6)
2893 .nr(8)
2894 .kr(1)
2895 .sr(1)
2896 .m(m)
2897 .n(n)
2898 .k(k)
2899 .iterations(1)
2900 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
2901 }
2902 }
2903 }
2904 }
2905
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,k_gt_4)2906 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, k_gt_4) {
2907 for (size_t k = 5; k < 8; k++) {
2908 GemmMicrokernelTester()
2909 .mr(6)
2910 .nr(8)
2911 .kr(1)
2912 .sr(1)
2913 .m(6)
2914 .n(8)
2915 .k(k)
2916 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
2917 }
2918 }
2919
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,k_gt_4_subtile)2920 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
2921 for (size_t k = 5; k < 8; k++) {
2922 for (uint32_t n = 1; n <= 8; n++) {
2923 for (uint32_t m = 1; m <= 6; m++) {
2924 GemmMicrokernelTester()
2925 .mr(6)
2926 .nr(8)
2927 .kr(1)
2928 .sr(1)
2929 .m(m)
2930 .n(n)
2931 .k(k)
2932 .iterations(1)
2933 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
2934 }
2935 }
2936 }
2937 }
2938
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,k_div_4)2939 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, k_div_4) {
2940 for (size_t k = 8; k <= 40; k += 4) {
2941 GemmMicrokernelTester()
2942 .mr(6)
2943 .nr(8)
2944 .kr(1)
2945 .sr(1)
2946 .m(6)
2947 .n(8)
2948 .k(k)
2949 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
2950 }
2951 }
2952
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,k_div_4_subtile)2953 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, k_div_4_subtile) {
2954 for (size_t k = 8; k <= 40; k += 4) {
2955 for (uint32_t n = 1; n <= 8; n++) {
2956 for (uint32_t m = 1; m <= 6; m++) {
2957 GemmMicrokernelTester()
2958 .mr(6)
2959 .nr(8)
2960 .kr(1)
2961 .sr(1)
2962 .m(m)
2963 .n(n)
2964 .k(k)
2965 .iterations(1)
2966 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
2967 }
2968 }
2969 }
2970 }
2971
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,n_gt_8)2972 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, n_gt_8) {
2973 for (uint32_t n = 9; n < 16; n++) {
2974 for (size_t k = 1; k <= 20; k += 5) {
2975 GemmMicrokernelTester()
2976 .mr(6)
2977 .nr(8)
2978 .kr(1)
2979 .sr(1)
2980 .m(6)
2981 .n(n)
2982 .k(k)
2983 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
2984 }
2985 }
2986 }
2987
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,n_gt_8_strided_cn)2988 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
2989 for (uint32_t n = 9; n < 16; n++) {
2990 for (size_t k = 1; k <= 20; k += 5) {
2991 GemmMicrokernelTester()
2992 .mr(6)
2993 .nr(8)
2994 .kr(1)
2995 .sr(1)
2996 .m(6)
2997 .n(n)
2998 .k(k)
2999 .cn_stride(11)
3000 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
3001 }
3002 }
3003 }
3004
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,n_gt_8_subtile)3005 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
3006 for (uint32_t n = 9; n < 16; n++) {
3007 for (size_t k = 1; k <= 20; k += 5) {
3008 for (uint32_t m = 1; m <= 6; m++) {
3009 GemmMicrokernelTester()
3010 .mr(6)
3011 .nr(8)
3012 .kr(1)
3013 .sr(1)
3014 .m(m)
3015 .n(n)
3016 .k(k)
3017 .iterations(1)
3018 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
3019 }
3020 }
3021 }
3022 }
3023
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,n_div_8)3024 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, n_div_8) {
3025 for (uint32_t n = 16; n <= 24; n += 8) {
3026 for (size_t k = 1; k <= 20; k += 5) {
3027 GemmMicrokernelTester()
3028 .mr(6)
3029 .nr(8)
3030 .kr(1)
3031 .sr(1)
3032 .m(6)
3033 .n(n)
3034 .k(k)
3035 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
3036 }
3037 }
3038 }
3039
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,n_div_8_strided_cn)3040 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
3041 for (uint32_t n = 16; n <= 24; n += 8) {
3042 for (size_t k = 1; k <= 20; k += 5) {
3043 GemmMicrokernelTester()
3044 .mr(6)
3045 .nr(8)
3046 .kr(1)
3047 .sr(1)
3048 .m(6)
3049 .n(n)
3050 .k(k)
3051 .cn_stride(11)
3052 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
3053 }
3054 }
3055 }
3056
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,n_div_8_subtile)3057 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, n_div_8_subtile) {
3058 for (uint32_t n = 16; n <= 24; n += 8) {
3059 for (size_t k = 1; k <= 20; k += 5) {
3060 for (uint32_t m = 1; m <= 6; m++) {
3061 GemmMicrokernelTester()
3062 .mr(6)
3063 .nr(8)
3064 .kr(1)
3065 .sr(1)
3066 .m(m)
3067 .n(n)
3068 .k(k)
3069 .iterations(1)
3070 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
3071 }
3072 }
3073 }
3074 }
3075
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,small_kernel)3076 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, small_kernel) {
3077 for (size_t k = 1; k <= 20; k += 5) {
3078 GemmMicrokernelTester()
3079 .mr(6)
3080 .nr(8)
3081 .kr(1)
3082 .sr(1)
3083 .m(6)
3084 .n(8)
3085 .k(k)
3086 .ks(3)
3087 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
3088 }
3089 }
3090
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,small_kernel_subtile)3091 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, small_kernel_subtile) {
3092 for (size_t k = 1; k <= 20; k += 5) {
3093 for (uint32_t n = 1; n <= 8; n++) {
3094 for (uint32_t m = 1; m <= 6; m++) {
3095 GemmMicrokernelTester()
3096 .mr(6)
3097 .nr(8)
3098 .kr(1)
3099 .sr(1)
3100 .m(m)
3101 .n(n)
3102 .k(k)
3103 .ks(3)
3104 .iterations(1)
3105 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
3106 }
3107 }
3108 }
3109 }
3110
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,n_gt_8_small_kernel)3111 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, n_gt_8_small_kernel) {
3112 for (uint32_t n = 9; n < 16; n++) {
3113 for (size_t k = 1; k <= 20; k += 5) {
3114 GemmMicrokernelTester()
3115 .mr(6)
3116 .nr(8)
3117 .kr(1)
3118 .sr(1)
3119 .m(6)
3120 .n(n)
3121 .k(k)
3122 .ks(3)
3123 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
3124 }
3125 }
3126 }
3127
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,n_div_8_small_kernel)3128 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, n_div_8_small_kernel) {
3129 for (uint32_t n = 16; n <= 24; n += 8) {
3130 for (size_t k = 1; k <= 20; k += 5) {
3131 GemmMicrokernelTester()
3132 .mr(6)
3133 .nr(8)
3134 .kr(1)
3135 .sr(1)
3136 .m(6)
3137 .n(n)
3138 .k(k)
3139 .ks(3)
3140 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
3141 }
3142 }
3143 }
3144
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,strided_cm_subtile)3145 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, strided_cm_subtile) {
3146 for (size_t k = 1; k <= 20; k += 5) {
3147 for (uint32_t n = 1; n <= 8; n++) {
3148 for (uint32_t m = 1; m <= 6; m++) {
3149 GemmMicrokernelTester()
3150 .mr(6)
3151 .nr(8)
3152 .kr(1)
3153 .sr(1)
3154 .m(m)
3155 .n(n)
3156 .k(k)
3157 .cm_stride(11)
3158 .iterations(1)
3159 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
3160 }
3161 }
3162 }
3163 }
3164
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,a_offset)3165 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, a_offset) {
3166 for (size_t k = 1; k <= 20; k += 5) {
3167 GemmMicrokernelTester()
3168 .mr(6)
3169 .nr(8)
3170 .kr(1)
3171 .sr(1)
3172 .m(6)
3173 .n(8)
3174 .k(k)
3175 .ks(3)
3176 .a_offset(127)
3177 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
3178 }
3179 }
3180
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,zero)3181 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, zero) {
3182 for (size_t k = 1; k <= 20; k += 5) {
3183 for (uint32_t mz = 0; mz < 6; mz++) {
3184 GemmMicrokernelTester()
3185 .mr(6)
3186 .nr(8)
3187 .kr(1)
3188 .sr(1)
3189 .m(6)
3190 .n(8)
3191 .k(k)
3192 .ks(3)
3193 .a_offset(127)
3194 .zero_index(mz)
3195 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
3196 }
3197 }
3198 }
3199
TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT,strided_cm)3200 TEST(F32_IGEMM_RELU_6X8__WASMSIMD_SPLAT, strided_cm) {
3201 GemmMicrokernelTester()
3202 .mr(6)
3203 .nr(8)
3204 .kr(1)
3205 .sr(1)
3206 .m(6)
3207 .n(8)
3208 .k(4)
3209 .cm_stride(11)
3210 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmsimd_splat);
3211 }
3212 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3213
3214
3215 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)3216 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
3217 GemmMicrokernelTester()
3218 .mr(1)
3219 .nr(8)
3220 .kr(1)
3221 .sr(1)
3222 .m(1)
3223 .n(8)
3224 .k(1)
3225 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3226 }
3227
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)3228 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
3229 GemmMicrokernelTester()
3230 .mr(1)
3231 .nr(8)
3232 .kr(1)
3233 .sr(1)
3234 .m(1)
3235 .n(8)
3236 .k(1)
3237 .cn_stride(11)
3238 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3239 }
3240
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)3241 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
3242 for (uint32_t n = 1; n <= 8; n++) {
3243 for (uint32_t m = 1; m <= 1; m++) {
3244 GemmMicrokernelTester()
3245 .mr(1)
3246 .nr(8)
3247 .kr(1)
3248 .sr(1)
3249 .m(m)
3250 .n(n)
3251 .k(1)
3252 .iterations(1)
3253 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3254 }
3255 }
3256 }
3257
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)3258 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
3259 for (uint32_t m = 1; m <= 1; m++) {
3260 GemmMicrokernelTester()
3261 .mr(1)
3262 .nr(8)
3263 .kr(1)
3264 .sr(1)
3265 .m(m)
3266 .n(8)
3267 .k(1)
3268 .iterations(1)
3269 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3270 }
3271 }
3272
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)3273 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
3274 for (uint32_t n = 1; n <= 8; n++) {
3275 GemmMicrokernelTester()
3276 .mr(1)
3277 .nr(8)
3278 .kr(1)
3279 .sr(1)
3280 .m(1)
3281 .n(n)
3282 .k(1)
3283 .iterations(1)
3284 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3285 }
3286 }
3287
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)3288 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
3289 for (size_t k = 2; k < 10; k++) {
3290 GemmMicrokernelTester()
3291 .mr(1)
3292 .nr(8)
3293 .kr(1)
3294 .sr(1)
3295 .m(1)
3296 .n(8)
3297 .k(k)
3298 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3299 }
3300 }
3301
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)3302 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
3303 for (size_t k = 2; k < 10; k++) {
3304 for (uint32_t n = 1; n <= 8; n++) {
3305 for (uint32_t m = 1; m <= 1; m++) {
3306 GemmMicrokernelTester()
3307 .mr(1)
3308 .nr(8)
3309 .kr(1)
3310 .sr(1)
3311 .m(m)
3312 .n(n)
3313 .k(k)
3314 .iterations(1)
3315 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3316 }
3317 }
3318 }
3319 }
3320
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)3321 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
3322 for (uint32_t n = 9; n < 16; n++) {
3323 for (size_t k = 1; k <= 5; k += 2) {
3324 GemmMicrokernelTester()
3325 .mr(1)
3326 .nr(8)
3327 .kr(1)
3328 .sr(1)
3329 .m(1)
3330 .n(n)
3331 .k(k)
3332 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3333 }
3334 }
3335 }
3336
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)3337 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
3338 for (uint32_t n = 9; n < 16; n++) {
3339 for (size_t k = 1; k <= 5; k += 2) {
3340 GemmMicrokernelTester()
3341 .mr(1)
3342 .nr(8)
3343 .kr(1)
3344 .sr(1)
3345 .m(1)
3346 .n(n)
3347 .k(k)
3348 .cn_stride(11)
3349 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3350 }
3351 }
3352 }
3353
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)3354 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
3355 for (uint32_t n = 9; n < 16; n++) {
3356 for (size_t k = 1; k <= 5; k += 2) {
3357 for (uint32_t m = 1; m <= 1; m++) {
3358 GemmMicrokernelTester()
3359 .mr(1)
3360 .nr(8)
3361 .kr(1)
3362 .sr(1)
3363 .m(m)
3364 .n(n)
3365 .k(k)
3366 .iterations(1)
3367 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3368 }
3369 }
3370 }
3371 }
3372
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)3373 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
3374 for (uint32_t n = 16; n <= 24; n += 8) {
3375 for (size_t k = 1; k <= 5; k += 2) {
3376 GemmMicrokernelTester()
3377 .mr(1)
3378 .nr(8)
3379 .kr(1)
3380 .sr(1)
3381 .m(1)
3382 .n(n)
3383 .k(k)
3384 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3385 }
3386 }
3387 }
3388
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)3389 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
3390 for (uint32_t n = 16; n <= 24; n += 8) {
3391 for (size_t k = 1; k <= 5; k += 2) {
3392 GemmMicrokernelTester()
3393 .mr(1)
3394 .nr(8)
3395 .kr(1)
3396 .sr(1)
3397 .m(1)
3398 .n(n)
3399 .k(k)
3400 .cn_stride(11)
3401 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3402 }
3403 }
3404 }
3405
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)3406 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
3407 for (uint32_t n = 16; n <= 24; n += 8) {
3408 for (size_t k = 1; k <= 5; k += 2) {
3409 for (uint32_t m = 1; m <= 1; m++) {
3410 GemmMicrokernelTester()
3411 .mr(1)
3412 .nr(8)
3413 .kr(1)
3414 .sr(1)
3415 .m(m)
3416 .n(n)
3417 .k(k)
3418 .iterations(1)
3419 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3420 }
3421 }
3422 }
3423 }
3424
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel)3425 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel) {
3426 for (size_t k = 1; k <= 5; k += 2) {
3427 GemmMicrokernelTester()
3428 .mr(1)
3429 .nr(8)
3430 .kr(1)
3431 .sr(1)
3432 .m(1)
3433 .n(8)
3434 .k(k)
3435 .ks(3)
3436 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3437 }
3438 }
3439
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel_subtile)3440 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel_subtile) {
3441 for (size_t k = 1; k <= 5; k += 2) {
3442 for (uint32_t n = 1; n <= 8; n++) {
3443 for (uint32_t m = 1; m <= 1; m++) {
3444 GemmMicrokernelTester()
3445 .mr(1)
3446 .nr(8)
3447 .kr(1)
3448 .sr(1)
3449 .m(m)
3450 .n(n)
3451 .k(k)
3452 .ks(3)
3453 .iterations(1)
3454 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3455 }
3456 }
3457 }
3458 }
3459
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_small_kernel)3460 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_small_kernel) {
3461 for (uint32_t n = 9; n < 16; n++) {
3462 for (size_t k = 1; k <= 5; k += 2) {
3463 GemmMicrokernelTester()
3464 .mr(1)
3465 .nr(8)
3466 .kr(1)
3467 .sr(1)
3468 .m(1)
3469 .n(n)
3470 .k(k)
3471 .ks(3)
3472 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3473 }
3474 }
3475 }
3476
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_small_kernel)3477 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_small_kernel) {
3478 for (uint32_t n = 16; n <= 24; n += 8) {
3479 for (size_t k = 1; k <= 5; k += 2) {
3480 GemmMicrokernelTester()
3481 .mr(1)
3482 .nr(8)
3483 .kr(1)
3484 .sr(1)
3485 .m(1)
3486 .n(n)
3487 .k(k)
3488 .ks(3)
3489 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3490 }
3491 }
3492 }
3493
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)3494 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
3495 for (size_t k = 1; k <= 5; k += 2) {
3496 for (uint32_t n = 1; n <= 8; n++) {
3497 for (uint32_t m = 1; m <= 1; m++) {
3498 GemmMicrokernelTester()
3499 .mr(1)
3500 .nr(8)
3501 .kr(1)
3502 .sr(1)
3503 .m(m)
3504 .n(n)
3505 .k(k)
3506 .cm_stride(11)
3507 .iterations(1)
3508 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3509 }
3510 }
3511 }
3512 }
3513
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,a_offset)3514 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, a_offset) {
3515 for (size_t k = 1; k <= 5; k += 2) {
3516 GemmMicrokernelTester()
3517 .mr(1)
3518 .nr(8)
3519 .kr(1)
3520 .sr(1)
3521 .m(1)
3522 .n(8)
3523 .k(k)
3524 .ks(3)
3525 .a_offset(7)
3526 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3527 }
3528 }
3529
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,zero)3530 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, zero) {
3531 for (size_t k = 1; k <= 5; k += 2) {
3532 for (uint32_t mz = 0; mz < 1; mz++) {
3533 GemmMicrokernelTester()
3534 .mr(1)
3535 .nr(8)
3536 .kr(1)
3537 .sr(1)
3538 .m(1)
3539 .n(8)
3540 .k(k)
3541 .ks(3)
3542 .a_offset(7)
3543 .zero_index(mz)
3544 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3545 }
3546 }
3547 }
3548
TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)3549 TEST(F32_IGEMM_RELU_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
3550 GemmMicrokernelTester()
3551 .mr(1)
3552 .nr(8)
3553 .kr(1)
3554 .sr(1)
3555 .m(1)
3556 .n(8)
3557 .k(1)
3558 .cm_stride(11)
3559 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3560 }
3561 #endif // XNN_ARCH_WASMRELAXEDSIMD
3562
3563
3564 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)3565 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
3566 GemmMicrokernelTester()
3567 .mr(3)
3568 .nr(8)
3569 .kr(1)
3570 .sr(1)
3571 .m(3)
3572 .n(8)
3573 .k(1)
3574 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3575 }
3576
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)3577 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
3578 GemmMicrokernelTester()
3579 .mr(3)
3580 .nr(8)
3581 .kr(1)
3582 .sr(1)
3583 .m(3)
3584 .n(8)
3585 .k(1)
3586 .cn_stride(11)
3587 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3588 }
3589
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)3590 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
3591 for (uint32_t n = 1; n <= 8; n++) {
3592 for (uint32_t m = 1; m <= 3; m++) {
3593 GemmMicrokernelTester()
3594 .mr(3)
3595 .nr(8)
3596 .kr(1)
3597 .sr(1)
3598 .m(m)
3599 .n(n)
3600 .k(1)
3601 .iterations(1)
3602 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3603 }
3604 }
3605 }
3606
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)3607 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
3608 for (uint32_t m = 1; m <= 3; m++) {
3609 GemmMicrokernelTester()
3610 .mr(3)
3611 .nr(8)
3612 .kr(1)
3613 .sr(1)
3614 .m(m)
3615 .n(8)
3616 .k(1)
3617 .iterations(1)
3618 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3619 }
3620 }
3621
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)3622 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
3623 for (uint32_t n = 1; n <= 8; n++) {
3624 GemmMicrokernelTester()
3625 .mr(3)
3626 .nr(8)
3627 .kr(1)
3628 .sr(1)
3629 .m(3)
3630 .n(n)
3631 .k(1)
3632 .iterations(1)
3633 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3634 }
3635 }
3636
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)3637 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
3638 for (size_t k = 2; k < 10; k++) {
3639 GemmMicrokernelTester()
3640 .mr(3)
3641 .nr(8)
3642 .kr(1)
3643 .sr(1)
3644 .m(3)
3645 .n(8)
3646 .k(k)
3647 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3648 }
3649 }
3650
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)3651 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
3652 for (size_t k = 2; k < 10; k++) {
3653 for (uint32_t n = 1; n <= 8; n++) {
3654 for (uint32_t m = 1; m <= 3; m++) {
3655 GemmMicrokernelTester()
3656 .mr(3)
3657 .nr(8)
3658 .kr(1)
3659 .sr(1)
3660 .m(m)
3661 .n(n)
3662 .k(k)
3663 .iterations(1)
3664 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3665 }
3666 }
3667 }
3668 }
3669
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)3670 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
3671 for (uint32_t n = 9; n < 16; n++) {
3672 for (size_t k = 1; k <= 5; k += 2) {
3673 GemmMicrokernelTester()
3674 .mr(3)
3675 .nr(8)
3676 .kr(1)
3677 .sr(1)
3678 .m(3)
3679 .n(n)
3680 .k(k)
3681 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3682 }
3683 }
3684 }
3685
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)3686 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
3687 for (uint32_t n = 9; n < 16; n++) {
3688 for (size_t k = 1; k <= 5; k += 2) {
3689 GemmMicrokernelTester()
3690 .mr(3)
3691 .nr(8)
3692 .kr(1)
3693 .sr(1)
3694 .m(3)
3695 .n(n)
3696 .k(k)
3697 .cn_stride(11)
3698 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3699 }
3700 }
3701 }
3702
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)3703 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
3704 for (uint32_t n = 9; n < 16; n++) {
3705 for (size_t k = 1; k <= 5; k += 2) {
3706 for (uint32_t m = 1; m <= 3; m++) {
3707 GemmMicrokernelTester()
3708 .mr(3)
3709 .nr(8)
3710 .kr(1)
3711 .sr(1)
3712 .m(m)
3713 .n(n)
3714 .k(k)
3715 .iterations(1)
3716 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3717 }
3718 }
3719 }
3720 }
3721
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)3722 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
3723 for (uint32_t n = 16; n <= 24; n += 8) {
3724 for (size_t k = 1; k <= 5; k += 2) {
3725 GemmMicrokernelTester()
3726 .mr(3)
3727 .nr(8)
3728 .kr(1)
3729 .sr(1)
3730 .m(3)
3731 .n(n)
3732 .k(k)
3733 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3734 }
3735 }
3736 }
3737
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)3738 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
3739 for (uint32_t n = 16; n <= 24; n += 8) {
3740 for (size_t k = 1; k <= 5; k += 2) {
3741 GemmMicrokernelTester()
3742 .mr(3)
3743 .nr(8)
3744 .kr(1)
3745 .sr(1)
3746 .m(3)
3747 .n(n)
3748 .k(k)
3749 .cn_stride(11)
3750 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3751 }
3752 }
3753 }
3754
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)3755 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
3756 for (uint32_t n = 16; n <= 24; n += 8) {
3757 for (size_t k = 1; k <= 5; k += 2) {
3758 for (uint32_t m = 1; m <= 3; m++) {
3759 GemmMicrokernelTester()
3760 .mr(3)
3761 .nr(8)
3762 .kr(1)
3763 .sr(1)
3764 .m(m)
3765 .n(n)
3766 .k(k)
3767 .iterations(1)
3768 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3769 }
3770 }
3771 }
3772 }
3773
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel)3774 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel) {
3775 for (size_t k = 1; k <= 5; k += 2) {
3776 GemmMicrokernelTester()
3777 .mr(3)
3778 .nr(8)
3779 .kr(1)
3780 .sr(1)
3781 .m(3)
3782 .n(8)
3783 .k(k)
3784 .ks(3)
3785 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3786 }
3787 }
3788
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel_subtile)3789 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel_subtile) {
3790 for (size_t k = 1; k <= 5; k += 2) {
3791 for (uint32_t n = 1; n <= 8; n++) {
3792 for (uint32_t m = 1; m <= 3; m++) {
3793 GemmMicrokernelTester()
3794 .mr(3)
3795 .nr(8)
3796 .kr(1)
3797 .sr(1)
3798 .m(m)
3799 .n(n)
3800 .k(k)
3801 .ks(3)
3802 .iterations(1)
3803 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3804 }
3805 }
3806 }
3807 }
3808
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_small_kernel)3809 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_small_kernel) {
3810 for (uint32_t n = 9; n < 16; n++) {
3811 for (size_t k = 1; k <= 5; k += 2) {
3812 GemmMicrokernelTester()
3813 .mr(3)
3814 .nr(8)
3815 .kr(1)
3816 .sr(1)
3817 .m(3)
3818 .n(n)
3819 .k(k)
3820 .ks(3)
3821 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3822 }
3823 }
3824 }
3825
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_small_kernel)3826 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_small_kernel) {
3827 for (uint32_t n = 16; n <= 24; n += 8) {
3828 for (size_t k = 1; k <= 5; k += 2) {
3829 GemmMicrokernelTester()
3830 .mr(3)
3831 .nr(8)
3832 .kr(1)
3833 .sr(1)
3834 .m(3)
3835 .n(n)
3836 .k(k)
3837 .ks(3)
3838 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3839 }
3840 }
3841 }
3842
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)3843 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
3844 for (size_t k = 1; k <= 5; k += 2) {
3845 for (uint32_t n = 1; n <= 8; n++) {
3846 for (uint32_t m = 1; m <= 3; m++) {
3847 GemmMicrokernelTester()
3848 .mr(3)
3849 .nr(8)
3850 .kr(1)
3851 .sr(1)
3852 .m(m)
3853 .n(n)
3854 .k(k)
3855 .cm_stride(11)
3856 .iterations(1)
3857 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3858 }
3859 }
3860 }
3861 }
3862
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,a_offset)3863 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, a_offset) {
3864 for (size_t k = 1; k <= 5; k += 2) {
3865 GemmMicrokernelTester()
3866 .mr(3)
3867 .nr(8)
3868 .kr(1)
3869 .sr(1)
3870 .m(3)
3871 .n(8)
3872 .k(k)
3873 .ks(3)
3874 .a_offset(17)
3875 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3876 }
3877 }
3878
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,zero)3879 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, zero) {
3880 for (size_t k = 1; k <= 5; k += 2) {
3881 for (uint32_t mz = 0; mz < 3; mz++) {
3882 GemmMicrokernelTester()
3883 .mr(3)
3884 .nr(8)
3885 .kr(1)
3886 .sr(1)
3887 .m(3)
3888 .n(8)
3889 .k(k)
3890 .ks(3)
3891 .a_offset(17)
3892 .zero_index(mz)
3893 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3894 }
3895 }
3896 }
3897
TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)3898 TEST(F32_IGEMM_RELU_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
3899 GemmMicrokernelTester()
3900 .mr(3)
3901 .nr(8)
3902 .kr(1)
3903 .sr(1)
3904 .m(3)
3905 .n(8)
3906 .k(1)
3907 .cm_stride(11)
3908 .Test(xnn_f32_igemm_relu_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3909 }
3910 #endif // XNN_ARCH_WASMRELAXEDSIMD
3911
3912
3913 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)3914 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
3915 GemmMicrokernelTester()
3916 .mr(3)
3917 .nr(8)
3918 .kr(1)
3919 .sr(4)
3920 .m(3)
3921 .n(8)
3922 .k(4)
3923 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3924 }
3925
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,strided_cn)3926 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
3927 GemmMicrokernelTester()
3928 .mr(3)
3929 .nr(8)
3930 .kr(1)
3931 .sr(4)
3932 .m(3)
3933 .n(8)
3934 .k(4)
3935 .cn_stride(11)
3936 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3937 }
3938
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)3939 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
3940 for (uint32_t n = 1; n <= 8; n++) {
3941 for (uint32_t m = 1; m <= 3; m++) {
3942 GemmMicrokernelTester()
3943 .mr(3)
3944 .nr(8)
3945 .kr(1)
3946 .sr(4)
3947 .m(m)
3948 .n(n)
3949 .k(4)
3950 .iterations(1)
3951 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3952 }
3953 }
3954 }
3955
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)3956 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
3957 for (uint32_t m = 1; m <= 3; m++) {
3958 GemmMicrokernelTester()
3959 .mr(3)
3960 .nr(8)
3961 .kr(1)
3962 .sr(4)
3963 .m(m)
3964 .n(8)
3965 .k(4)
3966 .iterations(1)
3967 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3968 }
3969 }
3970
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)3971 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
3972 for (uint32_t n = 1; n <= 8; n++) {
3973 GemmMicrokernelTester()
3974 .mr(3)
3975 .nr(8)
3976 .kr(1)
3977 .sr(4)
3978 .m(3)
3979 .n(n)
3980 .k(4)
3981 .iterations(1)
3982 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3983 }
3984 }
3985
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)3986 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
3987 for (size_t k = 1; k < 4; k++) {
3988 GemmMicrokernelTester()
3989 .mr(3)
3990 .nr(8)
3991 .kr(1)
3992 .sr(4)
3993 .m(3)
3994 .n(8)
3995 .k(k)
3996 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
3997 }
3998 }
3999
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)4000 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
4001 for (size_t k = 1; k < 4; k++) {
4002 for (uint32_t n = 1; n <= 8; n++) {
4003 for (uint32_t m = 1; m <= 3; m++) {
4004 GemmMicrokernelTester()
4005 .mr(3)
4006 .nr(8)
4007 .kr(1)
4008 .sr(4)
4009 .m(m)
4010 .n(n)
4011 .k(k)
4012 .iterations(1)
4013 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
4014 }
4015 }
4016 }
4017 }
4018
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)4019 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
4020 for (size_t k = 5; k < 8; k++) {
4021 GemmMicrokernelTester()
4022 .mr(3)
4023 .nr(8)
4024 .kr(1)
4025 .sr(4)
4026 .m(3)
4027 .n(8)
4028 .k(k)
4029 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
4030 }
4031 }
4032
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)4033 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
4034 for (size_t k = 5; k < 8; k++) {
4035 for (uint32_t n = 1; n <= 8; n++) {
4036 for (uint32_t m = 1; m <= 3; m++) {
4037 GemmMicrokernelTester()
4038 .mr(3)
4039 .nr(8)
4040 .kr(1)
4041 .sr(4)
4042 .m(m)
4043 .n(n)
4044 .k(k)
4045 .iterations(1)
4046 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
4047 }
4048 }
4049 }
4050 }
4051
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,k_div_4)4052 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
4053 for (size_t k = 8; k <= 40; k += 4) {
4054 GemmMicrokernelTester()
4055 .mr(3)
4056 .nr(8)
4057 .kr(1)
4058 .sr(4)
4059 .m(3)
4060 .n(8)
4061 .k(k)
4062 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
4063 }
4064 }
4065
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)4066 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
4067 for (size_t k = 8; k <= 40; k += 4) {
4068 for (uint32_t n = 1; n <= 8; n++) {
4069 for (uint32_t m = 1; m <= 3; m++) {
4070 GemmMicrokernelTester()
4071 .mr(3)
4072 .nr(8)
4073 .kr(1)
4074 .sr(4)
4075 .m(m)
4076 .n(n)
4077 .k(k)
4078 .iterations(1)
4079 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
4080 }
4081 }
4082 }
4083 }
4084
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)4085 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
4086 for (uint32_t n = 9; n < 16; n++) {
4087 for (size_t k = 1; k <= 20; k += 5) {
4088 GemmMicrokernelTester()
4089 .mr(3)
4090 .nr(8)
4091 .kr(1)
4092 .sr(4)
4093 .m(3)
4094 .n(n)
4095 .k(k)
4096 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
4097 }
4098 }
4099 }
4100
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)4101 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
4102 for (uint32_t n = 9; n < 16; n++) {
4103 for (size_t k = 1; k <= 20; k += 5) {
4104 GemmMicrokernelTester()
4105 .mr(3)
4106 .nr(8)
4107 .kr(1)
4108 .sr(4)
4109 .m(3)
4110 .n(n)
4111 .k(k)
4112 .cn_stride(11)
4113 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
4114 }
4115 }
4116 }
4117
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)4118 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
4119 for (uint32_t n = 9; n < 16; n++) {
4120 for (size_t k = 1; k <= 20; k += 5) {
4121 for (uint32_t m = 1; m <= 3; m++) {
4122 GemmMicrokernelTester()
4123 .mr(3)
4124 .nr(8)
4125 .kr(1)
4126 .sr(4)
4127 .m(m)
4128 .n(n)
4129 .k(k)
4130 .iterations(1)
4131 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
4132 }
4133 }
4134 }
4135 }
4136
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,n_div_8)4137 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
4138 for (uint32_t n = 16; n <= 24; n += 8) {
4139 for (size_t k = 1; k <= 20; k += 5) {
4140 GemmMicrokernelTester()
4141 .mr(3)
4142 .nr(8)
4143 .kr(1)
4144 .sr(4)
4145 .m(3)
4146 .n(n)
4147 .k(k)
4148 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
4149 }
4150 }
4151 }
4152
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)4153 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
4154 for (uint32_t n = 16; n <= 24; n += 8) {
4155 for (size_t k = 1; k <= 20; k += 5) {
4156 GemmMicrokernelTester()
4157 .mr(3)
4158 .nr(8)
4159 .kr(1)
4160 .sr(4)
4161 .m(3)
4162 .n(n)
4163 .k(k)
4164 .cn_stride(11)
4165 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
4166 }
4167 }
4168 }
4169
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)4170 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
4171 for (uint32_t n = 16; n <= 24; n += 8) {
4172 for (size_t k = 1; k <= 20; k += 5) {
4173 for (uint32_t m = 1; m <= 3; m++) {
4174 GemmMicrokernelTester()
4175 .mr(3)
4176 .nr(8)
4177 .kr(1)
4178 .sr(4)
4179 .m(m)
4180 .n(n)
4181 .k(k)
4182 .iterations(1)
4183 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
4184 }
4185 }
4186 }
4187 }
4188
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,small_kernel)4189 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, small_kernel) {
4190 for (size_t k = 1; k <= 20; k += 5) {
4191 GemmMicrokernelTester()
4192 .mr(3)
4193 .nr(8)
4194 .kr(1)
4195 .sr(4)
4196 .m(3)
4197 .n(8)
4198 .k(k)
4199 .ks(3)
4200 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
4201 }
4202 }
4203
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,small_kernel_subtile)4204 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, small_kernel_subtile) {
4205 for (size_t k = 1; k <= 20; k += 5) {
4206 for (uint32_t n = 1; n <= 8; n++) {
4207 for (uint32_t m = 1; m <= 3; m++) {
4208 GemmMicrokernelTester()
4209 .mr(3)
4210 .nr(8)
4211 .kr(1)
4212 .sr(4)
4213 .m(m)
4214 .n(n)
4215 .k(k)
4216 .ks(3)
4217 .iterations(1)
4218 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
4219 }
4220 }
4221 }
4222 }
4223
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_small_kernel)4224 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_small_kernel) {
4225 for (uint32_t n = 9; n < 16; n++) {
4226 for (size_t k = 1; k <= 20; k += 5) {
4227 GemmMicrokernelTester()
4228 .mr(3)
4229 .nr(8)
4230 .kr(1)
4231 .sr(4)
4232 .m(3)
4233 .n(n)
4234 .k(k)
4235 .ks(3)
4236 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
4237 }
4238 }
4239 }
4240
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,n_div_8_small_kernel)4241 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, n_div_8_small_kernel) {
4242 for (uint32_t n = 16; n <= 24; n += 8) {
4243 for (size_t k = 1; k <= 20; k += 5) {
4244 GemmMicrokernelTester()
4245 .mr(3)
4246 .nr(8)
4247 .kr(1)
4248 .sr(4)
4249 .m(3)
4250 .n(n)
4251 .k(k)
4252 .ks(3)
4253 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
4254 }
4255 }
4256 }
4257
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)4258 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
4259 for (size_t k = 1; k <= 20; k += 5) {
4260 for (uint32_t n = 1; n <= 8; n++) {
4261 for (uint32_t m = 1; m <= 3; m++) {
4262 GemmMicrokernelTester()
4263 .mr(3)
4264 .nr(8)
4265 .kr(1)
4266 .sr(4)
4267 .m(m)
4268 .n(n)
4269 .k(k)
4270 .cm_stride(11)
4271 .iterations(1)
4272 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
4273 }
4274 }
4275 }
4276 }
4277
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,a_offset)4278 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, a_offset) {
4279 for (size_t k = 1; k <= 20; k += 5) {
4280 GemmMicrokernelTester()
4281 .mr(3)
4282 .nr(8)
4283 .kr(1)
4284 .sr(4)
4285 .m(3)
4286 .n(8)
4287 .k(k)
4288 .ks(3)
4289 .a_offset(67)
4290 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
4291 }
4292 }
4293
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,zero)4294 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, zero) {
4295 for (size_t k = 1; k <= 20; k += 5) {
4296 for (uint32_t mz = 0; mz < 3; mz++) {
4297 GemmMicrokernelTester()
4298 .mr(3)
4299 .nr(8)
4300 .kr(1)
4301 .sr(4)
4302 .m(3)
4303 .n(8)
4304 .k(k)
4305 .ks(3)
4306 .a_offset(67)
4307 .zero_index(mz)
4308 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
4309 }
4310 }
4311 }
4312
TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA,strided_cm)4313 TEST(F32_IGEMM_RELU_3X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
4314 GemmMicrokernelTester()
4315 .mr(3)
4316 .nr(8)
4317 .kr(1)
4318 .sr(4)
4319 .m(3)
4320 .n(8)
4321 .k(4)
4322 .cm_stride(11)
4323 .Test(xnn_f32_igemm_relu_ukernel_3x8s4__wasmrelaxedsimd_fma);
4324 }
4325 #endif // XNN_ARCH_WASMRELAXEDSIMD
4326
4327
4328 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)4329 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
4330 GemmMicrokernelTester()
4331 .mr(4)
4332 .nr(8)
4333 .kr(1)
4334 .sr(1)
4335 .m(4)
4336 .n(8)
4337 .k(1)
4338 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4339 }
4340
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)4341 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
4342 GemmMicrokernelTester()
4343 .mr(4)
4344 .nr(8)
4345 .kr(1)
4346 .sr(1)
4347 .m(4)
4348 .n(8)
4349 .k(1)
4350 .cn_stride(11)
4351 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4352 }
4353
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)4354 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
4355 for (uint32_t n = 1; n <= 8; n++) {
4356 for (uint32_t m = 1; m <= 4; m++) {
4357 GemmMicrokernelTester()
4358 .mr(4)
4359 .nr(8)
4360 .kr(1)
4361 .sr(1)
4362 .m(m)
4363 .n(n)
4364 .k(1)
4365 .iterations(1)
4366 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4367 }
4368 }
4369 }
4370
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)4371 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
4372 for (uint32_t m = 1; m <= 4; m++) {
4373 GemmMicrokernelTester()
4374 .mr(4)
4375 .nr(8)
4376 .kr(1)
4377 .sr(1)
4378 .m(m)
4379 .n(8)
4380 .k(1)
4381 .iterations(1)
4382 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4383 }
4384 }
4385
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)4386 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
4387 for (uint32_t n = 1; n <= 8; n++) {
4388 GemmMicrokernelTester()
4389 .mr(4)
4390 .nr(8)
4391 .kr(1)
4392 .sr(1)
4393 .m(4)
4394 .n(n)
4395 .k(1)
4396 .iterations(1)
4397 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4398 }
4399 }
4400
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)4401 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
4402 for (size_t k = 2; k < 10; k++) {
4403 GemmMicrokernelTester()
4404 .mr(4)
4405 .nr(8)
4406 .kr(1)
4407 .sr(1)
4408 .m(4)
4409 .n(8)
4410 .k(k)
4411 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4412 }
4413 }
4414
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)4415 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
4416 for (size_t k = 2; k < 10; k++) {
4417 for (uint32_t n = 1; n <= 8; n++) {
4418 for (uint32_t m = 1; m <= 4; m++) {
4419 GemmMicrokernelTester()
4420 .mr(4)
4421 .nr(8)
4422 .kr(1)
4423 .sr(1)
4424 .m(m)
4425 .n(n)
4426 .k(k)
4427 .iterations(1)
4428 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4429 }
4430 }
4431 }
4432 }
4433
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)4434 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
4435 for (uint32_t n = 9; n < 16; n++) {
4436 for (size_t k = 1; k <= 5; k += 2) {
4437 GemmMicrokernelTester()
4438 .mr(4)
4439 .nr(8)
4440 .kr(1)
4441 .sr(1)
4442 .m(4)
4443 .n(n)
4444 .k(k)
4445 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4446 }
4447 }
4448 }
4449
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)4450 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
4451 for (uint32_t n = 9; n < 16; n++) {
4452 for (size_t k = 1; k <= 5; k += 2) {
4453 GemmMicrokernelTester()
4454 .mr(4)
4455 .nr(8)
4456 .kr(1)
4457 .sr(1)
4458 .m(4)
4459 .n(n)
4460 .k(k)
4461 .cn_stride(11)
4462 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4463 }
4464 }
4465 }
4466
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)4467 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
4468 for (uint32_t n = 9; n < 16; n++) {
4469 for (size_t k = 1; k <= 5; k += 2) {
4470 for (uint32_t m = 1; m <= 4; m++) {
4471 GemmMicrokernelTester()
4472 .mr(4)
4473 .nr(8)
4474 .kr(1)
4475 .sr(1)
4476 .m(m)
4477 .n(n)
4478 .k(k)
4479 .iterations(1)
4480 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4481 }
4482 }
4483 }
4484 }
4485
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)4486 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
4487 for (uint32_t n = 16; n <= 24; n += 8) {
4488 for (size_t k = 1; k <= 5; k += 2) {
4489 GemmMicrokernelTester()
4490 .mr(4)
4491 .nr(8)
4492 .kr(1)
4493 .sr(1)
4494 .m(4)
4495 .n(n)
4496 .k(k)
4497 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4498 }
4499 }
4500 }
4501
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)4502 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
4503 for (uint32_t n = 16; n <= 24; n += 8) {
4504 for (size_t k = 1; k <= 5; k += 2) {
4505 GemmMicrokernelTester()
4506 .mr(4)
4507 .nr(8)
4508 .kr(1)
4509 .sr(1)
4510 .m(4)
4511 .n(n)
4512 .k(k)
4513 .cn_stride(11)
4514 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4515 }
4516 }
4517 }
4518
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)4519 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
4520 for (uint32_t n = 16; n <= 24; n += 8) {
4521 for (size_t k = 1; k <= 5; k += 2) {
4522 for (uint32_t m = 1; m <= 4; m++) {
4523 GemmMicrokernelTester()
4524 .mr(4)
4525 .nr(8)
4526 .kr(1)
4527 .sr(1)
4528 .m(m)
4529 .n(n)
4530 .k(k)
4531 .iterations(1)
4532 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4533 }
4534 }
4535 }
4536 }
4537
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel)4538 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel) {
4539 for (size_t k = 1; k <= 5; k += 2) {
4540 GemmMicrokernelTester()
4541 .mr(4)
4542 .nr(8)
4543 .kr(1)
4544 .sr(1)
4545 .m(4)
4546 .n(8)
4547 .k(k)
4548 .ks(3)
4549 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4550 }
4551 }
4552
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel_subtile)4553 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel_subtile) {
4554 for (size_t k = 1; k <= 5; k += 2) {
4555 for (uint32_t n = 1; n <= 8; n++) {
4556 for (uint32_t m = 1; m <= 4; m++) {
4557 GemmMicrokernelTester()
4558 .mr(4)
4559 .nr(8)
4560 .kr(1)
4561 .sr(1)
4562 .m(m)
4563 .n(n)
4564 .k(k)
4565 .ks(3)
4566 .iterations(1)
4567 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4568 }
4569 }
4570 }
4571 }
4572
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_small_kernel)4573 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_small_kernel) {
4574 for (uint32_t n = 9; n < 16; n++) {
4575 for (size_t k = 1; k <= 5; k += 2) {
4576 GemmMicrokernelTester()
4577 .mr(4)
4578 .nr(8)
4579 .kr(1)
4580 .sr(1)
4581 .m(4)
4582 .n(n)
4583 .k(k)
4584 .ks(3)
4585 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4586 }
4587 }
4588 }
4589
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_small_kernel)4590 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_small_kernel) {
4591 for (uint32_t n = 16; n <= 24; n += 8) {
4592 for (size_t k = 1; k <= 5; k += 2) {
4593 GemmMicrokernelTester()
4594 .mr(4)
4595 .nr(8)
4596 .kr(1)
4597 .sr(1)
4598 .m(4)
4599 .n(n)
4600 .k(k)
4601 .ks(3)
4602 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4603 }
4604 }
4605 }
4606
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)4607 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
4608 for (size_t k = 1; k <= 5; k += 2) {
4609 for (uint32_t n = 1; n <= 8; n++) {
4610 for (uint32_t m = 1; m <= 4; m++) {
4611 GemmMicrokernelTester()
4612 .mr(4)
4613 .nr(8)
4614 .kr(1)
4615 .sr(1)
4616 .m(m)
4617 .n(n)
4618 .k(k)
4619 .cm_stride(11)
4620 .iterations(1)
4621 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4622 }
4623 }
4624 }
4625 }
4626
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,a_offset)4627 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, a_offset) {
4628 for (size_t k = 1; k <= 5; k += 2) {
4629 GemmMicrokernelTester()
4630 .mr(4)
4631 .nr(8)
4632 .kr(1)
4633 .sr(1)
4634 .m(4)
4635 .n(8)
4636 .k(k)
4637 .ks(3)
4638 .a_offset(23)
4639 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4640 }
4641 }
4642
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,zero)4643 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, zero) {
4644 for (size_t k = 1; k <= 5; k += 2) {
4645 for (uint32_t mz = 0; mz < 4; mz++) {
4646 GemmMicrokernelTester()
4647 .mr(4)
4648 .nr(8)
4649 .kr(1)
4650 .sr(1)
4651 .m(4)
4652 .n(8)
4653 .k(k)
4654 .ks(3)
4655 .a_offset(23)
4656 .zero_index(mz)
4657 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4658 }
4659 }
4660 }
4661
TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)4662 TEST(F32_IGEMM_RELU_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
4663 GemmMicrokernelTester()
4664 .mr(4)
4665 .nr(8)
4666 .kr(1)
4667 .sr(1)
4668 .m(4)
4669 .n(8)
4670 .k(1)
4671 .cm_stride(11)
4672 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4673 }
4674 #endif // XNN_ARCH_WASMRELAXEDSIMD
4675
4676
4677 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)4678 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
4679 GemmMicrokernelTester()
4680 .mr(4)
4681 .nr(8)
4682 .kr(1)
4683 .sr(4)
4684 .m(4)
4685 .n(8)
4686 .k(4)
4687 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4688 }
4689
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,strided_cn)4690 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
4691 GemmMicrokernelTester()
4692 .mr(4)
4693 .nr(8)
4694 .kr(1)
4695 .sr(4)
4696 .m(4)
4697 .n(8)
4698 .k(4)
4699 .cn_stride(11)
4700 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4701 }
4702
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)4703 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
4704 for (uint32_t n = 1; n <= 8; n++) {
4705 for (uint32_t m = 1; m <= 4; m++) {
4706 GemmMicrokernelTester()
4707 .mr(4)
4708 .nr(8)
4709 .kr(1)
4710 .sr(4)
4711 .m(m)
4712 .n(n)
4713 .k(4)
4714 .iterations(1)
4715 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4716 }
4717 }
4718 }
4719
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)4720 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
4721 for (uint32_t m = 1; m <= 4; m++) {
4722 GemmMicrokernelTester()
4723 .mr(4)
4724 .nr(8)
4725 .kr(1)
4726 .sr(4)
4727 .m(m)
4728 .n(8)
4729 .k(4)
4730 .iterations(1)
4731 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4732 }
4733 }
4734
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)4735 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
4736 for (uint32_t n = 1; n <= 8; n++) {
4737 GemmMicrokernelTester()
4738 .mr(4)
4739 .nr(8)
4740 .kr(1)
4741 .sr(4)
4742 .m(4)
4743 .n(n)
4744 .k(4)
4745 .iterations(1)
4746 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4747 }
4748 }
4749
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)4750 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
4751 for (size_t k = 1; k < 4; k++) {
4752 GemmMicrokernelTester()
4753 .mr(4)
4754 .nr(8)
4755 .kr(1)
4756 .sr(4)
4757 .m(4)
4758 .n(8)
4759 .k(k)
4760 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4761 }
4762 }
4763
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)4764 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
4765 for (size_t k = 1; k < 4; k++) {
4766 for (uint32_t n = 1; n <= 8; n++) {
4767 for (uint32_t m = 1; m <= 4; m++) {
4768 GemmMicrokernelTester()
4769 .mr(4)
4770 .nr(8)
4771 .kr(1)
4772 .sr(4)
4773 .m(m)
4774 .n(n)
4775 .k(k)
4776 .iterations(1)
4777 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4778 }
4779 }
4780 }
4781 }
4782
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)4783 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
4784 for (size_t k = 5; k < 8; k++) {
4785 GemmMicrokernelTester()
4786 .mr(4)
4787 .nr(8)
4788 .kr(1)
4789 .sr(4)
4790 .m(4)
4791 .n(8)
4792 .k(k)
4793 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4794 }
4795 }
4796
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)4797 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
4798 for (size_t k = 5; k < 8; k++) {
4799 for (uint32_t n = 1; n <= 8; n++) {
4800 for (uint32_t m = 1; m <= 4; m++) {
4801 GemmMicrokernelTester()
4802 .mr(4)
4803 .nr(8)
4804 .kr(1)
4805 .sr(4)
4806 .m(m)
4807 .n(n)
4808 .k(k)
4809 .iterations(1)
4810 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4811 }
4812 }
4813 }
4814 }
4815
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,k_div_4)4816 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
4817 for (size_t k = 8; k <= 40; k += 4) {
4818 GemmMicrokernelTester()
4819 .mr(4)
4820 .nr(8)
4821 .kr(1)
4822 .sr(4)
4823 .m(4)
4824 .n(8)
4825 .k(k)
4826 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4827 }
4828 }
4829
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)4830 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
4831 for (size_t k = 8; k <= 40; k += 4) {
4832 for (uint32_t n = 1; n <= 8; n++) {
4833 for (uint32_t m = 1; m <= 4; m++) {
4834 GemmMicrokernelTester()
4835 .mr(4)
4836 .nr(8)
4837 .kr(1)
4838 .sr(4)
4839 .m(m)
4840 .n(n)
4841 .k(k)
4842 .iterations(1)
4843 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4844 }
4845 }
4846 }
4847 }
4848
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)4849 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
4850 for (uint32_t n = 9; n < 16; n++) {
4851 for (size_t k = 1; k <= 20; k += 5) {
4852 GemmMicrokernelTester()
4853 .mr(4)
4854 .nr(8)
4855 .kr(1)
4856 .sr(4)
4857 .m(4)
4858 .n(n)
4859 .k(k)
4860 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4861 }
4862 }
4863 }
4864
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)4865 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
4866 for (uint32_t n = 9; n < 16; n++) {
4867 for (size_t k = 1; k <= 20; k += 5) {
4868 GemmMicrokernelTester()
4869 .mr(4)
4870 .nr(8)
4871 .kr(1)
4872 .sr(4)
4873 .m(4)
4874 .n(n)
4875 .k(k)
4876 .cn_stride(11)
4877 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4878 }
4879 }
4880 }
4881
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)4882 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
4883 for (uint32_t n = 9; n < 16; n++) {
4884 for (size_t k = 1; k <= 20; k += 5) {
4885 for (uint32_t m = 1; m <= 4; m++) {
4886 GemmMicrokernelTester()
4887 .mr(4)
4888 .nr(8)
4889 .kr(1)
4890 .sr(4)
4891 .m(m)
4892 .n(n)
4893 .k(k)
4894 .iterations(1)
4895 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4896 }
4897 }
4898 }
4899 }
4900
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,n_div_8)4901 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
4902 for (uint32_t n = 16; n <= 24; n += 8) {
4903 for (size_t k = 1; k <= 20; k += 5) {
4904 GemmMicrokernelTester()
4905 .mr(4)
4906 .nr(8)
4907 .kr(1)
4908 .sr(4)
4909 .m(4)
4910 .n(n)
4911 .k(k)
4912 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4913 }
4914 }
4915 }
4916
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)4917 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
4918 for (uint32_t n = 16; n <= 24; n += 8) {
4919 for (size_t k = 1; k <= 20; k += 5) {
4920 GemmMicrokernelTester()
4921 .mr(4)
4922 .nr(8)
4923 .kr(1)
4924 .sr(4)
4925 .m(4)
4926 .n(n)
4927 .k(k)
4928 .cn_stride(11)
4929 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4930 }
4931 }
4932 }
4933
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)4934 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
4935 for (uint32_t n = 16; n <= 24; n += 8) {
4936 for (size_t k = 1; k <= 20; k += 5) {
4937 for (uint32_t m = 1; m <= 4; m++) {
4938 GemmMicrokernelTester()
4939 .mr(4)
4940 .nr(8)
4941 .kr(1)
4942 .sr(4)
4943 .m(m)
4944 .n(n)
4945 .k(k)
4946 .iterations(1)
4947 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4948 }
4949 }
4950 }
4951 }
4952
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,small_kernel)4953 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, small_kernel) {
4954 for (size_t k = 1; k <= 20; k += 5) {
4955 GemmMicrokernelTester()
4956 .mr(4)
4957 .nr(8)
4958 .kr(1)
4959 .sr(4)
4960 .m(4)
4961 .n(8)
4962 .k(k)
4963 .ks(3)
4964 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4965 }
4966 }
4967
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,small_kernel_subtile)4968 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, small_kernel_subtile) {
4969 for (size_t k = 1; k <= 20; k += 5) {
4970 for (uint32_t n = 1; n <= 8; n++) {
4971 for (uint32_t m = 1; m <= 4; m++) {
4972 GemmMicrokernelTester()
4973 .mr(4)
4974 .nr(8)
4975 .kr(1)
4976 .sr(4)
4977 .m(m)
4978 .n(n)
4979 .k(k)
4980 .ks(3)
4981 .iterations(1)
4982 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
4983 }
4984 }
4985 }
4986 }
4987
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_small_kernel)4988 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_small_kernel) {
4989 for (uint32_t n = 9; n < 16; n++) {
4990 for (size_t k = 1; k <= 20; k += 5) {
4991 GemmMicrokernelTester()
4992 .mr(4)
4993 .nr(8)
4994 .kr(1)
4995 .sr(4)
4996 .m(4)
4997 .n(n)
4998 .k(k)
4999 .ks(3)
5000 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
5001 }
5002 }
5003 }
5004
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,n_div_8_small_kernel)5005 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, n_div_8_small_kernel) {
5006 for (uint32_t n = 16; n <= 24; n += 8) {
5007 for (size_t k = 1; k <= 20; k += 5) {
5008 GemmMicrokernelTester()
5009 .mr(4)
5010 .nr(8)
5011 .kr(1)
5012 .sr(4)
5013 .m(4)
5014 .n(n)
5015 .k(k)
5016 .ks(3)
5017 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
5018 }
5019 }
5020 }
5021
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)5022 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
5023 for (size_t k = 1; k <= 20; k += 5) {
5024 for (uint32_t n = 1; n <= 8; n++) {
5025 for (uint32_t m = 1; m <= 4; m++) {
5026 GemmMicrokernelTester()
5027 .mr(4)
5028 .nr(8)
5029 .kr(1)
5030 .sr(4)
5031 .m(m)
5032 .n(n)
5033 .k(k)
5034 .cm_stride(11)
5035 .iterations(1)
5036 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
5037 }
5038 }
5039 }
5040 }
5041
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,a_offset)5042 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, a_offset) {
5043 for (size_t k = 1; k <= 20; k += 5) {
5044 GemmMicrokernelTester()
5045 .mr(4)
5046 .nr(8)
5047 .kr(1)
5048 .sr(4)
5049 .m(4)
5050 .n(8)
5051 .k(k)
5052 .ks(3)
5053 .a_offset(83)
5054 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
5055 }
5056 }
5057
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,zero)5058 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, zero) {
5059 for (size_t k = 1; k <= 20; k += 5) {
5060 for (uint32_t mz = 0; mz < 4; mz++) {
5061 GemmMicrokernelTester()
5062 .mr(4)
5063 .nr(8)
5064 .kr(1)
5065 .sr(4)
5066 .m(4)
5067 .n(8)
5068 .k(k)
5069 .ks(3)
5070 .a_offset(83)
5071 .zero_index(mz)
5072 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
5073 }
5074 }
5075 }
5076
TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA,strided_cm)5077 TEST(F32_IGEMM_RELU_4X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
5078 GemmMicrokernelTester()
5079 .mr(4)
5080 .nr(8)
5081 .kr(1)
5082 .sr(4)
5083 .m(4)
5084 .n(8)
5085 .k(4)
5086 .cm_stride(11)
5087 .Test(xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma);
5088 }
5089 #endif // XNN_ARCH_WASMRELAXEDSIMD
5090
5091
5092 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)5093 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
5094 GemmMicrokernelTester()
5095 .mr(5)
5096 .nr(8)
5097 .kr(1)
5098 .sr(1)
5099 .m(5)
5100 .n(8)
5101 .k(1)
5102 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5103 }
5104
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)5105 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
5106 GemmMicrokernelTester()
5107 .mr(5)
5108 .nr(8)
5109 .kr(1)
5110 .sr(1)
5111 .m(5)
5112 .n(8)
5113 .k(1)
5114 .cn_stride(11)
5115 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5116 }
5117
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)5118 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
5119 for (uint32_t n = 1; n <= 8; n++) {
5120 for (uint32_t m = 1; m <= 5; m++) {
5121 GemmMicrokernelTester()
5122 .mr(5)
5123 .nr(8)
5124 .kr(1)
5125 .sr(1)
5126 .m(m)
5127 .n(n)
5128 .k(1)
5129 .iterations(1)
5130 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5131 }
5132 }
5133 }
5134
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)5135 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
5136 for (uint32_t m = 1; m <= 5; m++) {
5137 GemmMicrokernelTester()
5138 .mr(5)
5139 .nr(8)
5140 .kr(1)
5141 .sr(1)
5142 .m(m)
5143 .n(8)
5144 .k(1)
5145 .iterations(1)
5146 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5147 }
5148 }
5149
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)5150 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
5151 for (uint32_t n = 1; n <= 8; n++) {
5152 GemmMicrokernelTester()
5153 .mr(5)
5154 .nr(8)
5155 .kr(1)
5156 .sr(1)
5157 .m(5)
5158 .n(n)
5159 .k(1)
5160 .iterations(1)
5161 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5162 }
5163 }
5164
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)5165 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
5166 for (size_t k = 2; k < 10; k++) {
5167 GemmMicrokernelTester()
5168 .mr(5)
5169 .nr(8)
5170 .kr(1)
5171 .sr(1)
5172 .m(5)
5173 .n(8)
5174 .k(k)
5175 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5176 }
5177 }
5178
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)5179 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
5180 for (size_t k = 2; k < 10; k++) {
5181 for (uint32_t n = 1; n <= 8; n++) {
5182 for (uint32_t m = 1; m <= 5; m++) {
5183 GemmMicrokernelTester()
5184 .mr(5)
5185 .nr(8)
5186 .kr(1)
5187 .sr(1)
5188 .m(m)
5189 .n(n)
5190 .k(k)
5191 .iterations(1)
5192 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5193 }
5194 }
5195 }
5196 }
5197
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)5198 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
5199 for (uint32_t n = 9; n < 16; n++) {
5200 for (size_t k = 1; k <= 5; k += 2) {
5201 GemmMicrokernelTester()
5202 .mr(5)
5203 .nr(8)
5204 .kr(1)
5205 .sr(1)
5206 .m(5)
5207 .n(n)
5208 .k(k)
5209 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5210 }
5211 }
5212 }
5213
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)5214 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
5215 for (uint32_t n = 9; n < 16; n++) {
5216 for (size_t k = 1; k <= 5; k += 2) {
5217 GemmMicrokernelTester()
5218 .mr(5)
5219 .nr(8)
5220 .kr(1)
5221 .sr(1)
5222 .m(5)
5223 .n(n)
5224 .k(k)
5225 .cn_stride(11)
5226 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5227 }
5228 }
5229 }
5230
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)5231 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
5232 for (uint32_t n = 9; n < 16; n++) {
5233 for (size_t k = 1; k <= 5; k += 2) {
5234 for (uint32_t m = 1; m <= 5; m++) {
5235 GemmMicrokernelTester()
5236 .mr(5)
5237 .nr(8)
5238 .kr(1)
5239 .sr(1)
5240 .m(m)
5241 .n(n)
5242 .k(k)
5243 .iterations(1)
5244 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5245 }
5246 }
5247 }
5248 }
5249
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)5250 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
5251 for (uint32_t n = 16; n <= 24; n += 8) {
5252 for (size_t k = 1; k <= 5; k += 2) {
5253 GemmMicrokernelTester()
5254 .mr(5)
5255 .nr(8)
5256 .kr(1)
5257 .sr(1)
5258 .m(5)
5259 .n(n)
5260 .k(k)
5261 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5262 }
5263 }
5264 }
5265
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)5266 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
5267 for (uint32_t n = 16; n <= 24; n += 8) {
5268 for (size_t k = 1; k <= 5; k += 2) {
5269 GemmMicrokernelTester()
5270 .mr(5)
5271 .nr(8)
5272 .kr(1)
5273 .sr(1)
5274 .m(5)
5275 .n(n)
5276 .k(k)
5277 .cn_stride(11)
5278 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5279 }
5280 }
5281 }
5282
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)5283 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
5284 for (uint32_t n = 16; n <= 24; n += 8) {
5285 for (size_t k = 1; k <= 5; k += 2) {
5286 for (uint32_t m = 1; m <= 5; m++) {
5287 GemmMicrokernelTester()
5288 .mr(5)
5289 .nr(8)
5290 .kr(1)
5291 .sr(1)
5292 .m(m)
5293 .n(n)
5294 .k(k)
5295 .iterations(1)
5296 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5297 }
5298 }
5299 }
5300 }
5301
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel)5302 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel) {
5303 for (size_t k = 1; k <= 5; k += 2) {
5304 GemmMicrokernelTester()
5305 .mr(5)
5306 .nr(8)
5307 .kr(1)
5308 .sr(1)
5309 .m(5)
5310 .n(8)
5311 .k(k)
5312 .ks(3)
5313 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5314 }
5315 }
5316
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel_subtile)5317 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel_subtile) {
5318 for (size_t k = 1; k <= 5; k += 2) {
5319 for (uint32_t n = 1; n <= 8; n++) {
5320 for (uint32_t m = 1; m <= 5; m++) {
5321 GemmMicrokernelTester()
5322 .mr(5)
5323 .nr(8)
5324 .kr(1)
5325 .sr(1)
5326 .m(m)
5327 .n(n)
5328 .k(k)
5329 .ks(3)
5330 .iterations(1)
5331 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5332 }
5333 }
5334 }
5335 }
5336
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_small_kernel)5337 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_small_kernel) {
5338 for (uint32_t n = 9; n < 16; n++) {
5339 for (size_t k = 1; k <= 5; k += 2) {
5340 GemmMicrokernelTester()
5341 .mr(5)
5342 .nr(8)
5343 .kr(1)
5344 .sr(1)
5345 .m(5)
5346 .n(n)
5347 .k(k)
5348 .ks(3)
5349 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5350 }
5351 }
5352 }
5353
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_small_kernel)5354 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_small_kernel) {
5355 for (uint32_t n = 16; n <= 24; n += 8) {
5356 for (size_t k = 1; k <= 5; k += 2) {
5357 GemmMicrokernelTester()
5358 .mr(5)
5359 .nr(8)
5360 .kr(1)
5361 .sr(1)
5362 .m(5)
5363 .n(n)
5364 .k(k)
5365 .ks(3)
5366 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5367 }
5368 }
5369 }
5370
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)5371 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
5372 for (size_t k = 1; k <= 5; k += 2) {
5373 for (uint32_t n = 1; n <= 8; n++) {
5374 for (uint32_t m = 1; m <= 5; m++) {
5375 GemmMicrokernelTester()
5376 .mr(5)
5377 .nr(8)
5378 .kr(1)
5379 .sr(1)
5380 .m(m)
5381 .n(n)
5382 .k(k)
5383 .cm_stride(11)
5384 .iterations(1)
5385 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5386 }
5387 }
5388 }
5389 }
5390
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,a_offset)5391 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, a_offset) {
5392 for (size_t k = 1; k <= 5; k += 2) {
5393 GemmMicrokernelTester()
5394 .mr(5)
5395 .nr(8)
5396 .kr(1)
5397 .sr(1)
5398 .m(5)
5399 .n(8)
5400 .k(k)
5401 .ks(3)
5402 .a_offset(29)
5403 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5404 }
5405 }
5406
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,zero)5407 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, zero) {
5408 for (size_t k = 1; k <= 5; k += 2) {
5409 for (uint32_t mz = 0; mz < 5; mz++) {
5410 GemmMicrokernelTester()
5411 .mr(5)
5412 .nr(8)
5413 .kr(1)
5414 .sr(1)
5415 .m(5)
5416 .n(8)
5417 .k(k)
5418 .ks(3)
5419 .a_offset(29)
5420 .zero_index(mz)
5421 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5422 }
5423 }
5424 }
5425
TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)5426 TEST(F32_IGEMM_RELU_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
5427 GemmMicrokernelTester()
5428 .mr(5)
5429 .nr(8)
5430 .kr(1)
5431 .sr(1)
5432 .m(5)
5433 .n(8)
5434 .k(1)
5435 .cm_stride(11)
5436 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5437 }
5438 #endif // XNN_ARCH_WASMRELAXEDSIMD
5439
5440
5441 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)5442 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
5443 GemmMicrokernelTester()
5444 .mr(6)
5445 .nr(8)
5446 .kr(1)
5447 .sr(1)
5448 .m(6)
5449 .n(8)
5450 .k(1)
5451 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5452 }
5453
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)5454 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
5455 GemmMicrokernelTester()
5456 .mr(6)
5457 .nr(8)
5458 .kr(1)
5459 .sr(1)
5460 .m(6)
5461 .n(8)
5462 .k(1)
5463 .cn_stride(11)
5464 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5465 }
5466
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)5467 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
5468 for (uint32_t n = 1; n <= 8; n++) {
5469 for (uint32_t m = 1; m <= 6; m++) {
5470 GemmMicrokernelTester()
5471 .mr(6)
5472 .nr(8)
5473 .kr(1)
5474 .sr(1)
5475 .m(m)
5476 .n(n)
5477 .k(1)
5478 .iterations(1)
5479 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5480 }
5481 }
5482 }
5483
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)5484 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
5485 for (uint32_t m = 1; m <= 6; m++) {
5486 GemmMicrokernelTester()
5487 .mr(6)
5488 .nr(8)
5489 .kr(1)
5490 .sr(1)
5491 .m(m)
5492 .n(8)
5493 .k(1)
5494 .iterations(1)
5495 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5496 }
5497 }
5498
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)5499 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
5500 for (uint32_t n = 1; n <= 8; n++) {
5501 GemmMicrokernelTester()
5502 .mr(6)
5503 .nr(8)
5504 .kr(1)
5505 .sr(1)
5506 .m(6)
5507 .n(n)
5508 .k(1)
5509 .iterations(1)
5510 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5511 }
5512 }
5513
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)5514 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
5515 for (size_t k = 2; k < 10; k++) {
5516 GemmMicrokernelTester()
5517 .mr(6)
5518 .nr(8)
5519 .kr(1)
5520 .sr(1)
5521 .m(6)
5522 .n(8)
5523 .k(k)
5524 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5525 }
5526 }
5527
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)5528 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
5529 for (size_t k = 2; k < 10; k++) {
5530 for (uint32_t n = 1; n <= 8; n++) {
5531 for (uint32_t m = 1; m <= 6; m++) {
5532 GemmMicrokernelTester()
5533 .mr(6)
5534 .nr(8)
5535 .kr(1)
5536 .sr(1)
5537 .m(m)
5538 .n(n)
5539 .k(k)
5540 .iterations(1)
5541 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5542 }
5543 }
5544 }
5545 }
5546
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)5547 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
5548 for (uint32_t n = 9; n < 16; n++) {
5549 for (size_t k = 1; k <= 5; k += 2) {
5550 GemmMicrokernelTester()
5551 .mr(6)
5552 .nr(8)
5553 .kr(1)
5554 .sr(1)
5555 .m(6)
5556 .n(n)
5557 .k(k)
5558 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5559 }
5560 }
5561 }
5562
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)5563 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
5564 for (uint32_t n = 9; n < 16; n++) {
5565 for (size_t k = 1; k <= 5; k += 2) {
5566 GemmMicrokernelTester()
5567 .mr(6)
5568 .nr(8)
5569 .kr(1)
5570 .sr(1)
5571 .m(6)
5572 .n(n)
5573 .k(k)
5574 .cn_stride(11)
5575 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5576 }
5577 }
5578 }
5579
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)5580 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
5581 for (uint32_t n = 9; n < 16; n++) {
5582 for (size_t k = 1; k <= 5; k += 2) {
5583 for (uint32_t m = 1; m <= 6; m++) {
5584 GemmMicrokernelTester()
5585 .mr(6)
5586 .nr(8)
5587 .kr(1)
5588 .sr(1)
5589 .m(m)
5590 .n(n)
5591 .k(k)
5592 .iterations(1)
5593 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5594 }
5595 }
5596 }
5597 }
5598
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)5599 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
5600 for (uint32_t n = 16; n <= 24; n += 8) {
5601 for (size_t k = 1; k <= 5; k += 2) {
5602 GemmMicrokernelTester()
5603 .mr(6)
5604 .nr(8)
5605 .kr(1)
5606 .sr(1)
5607 .m(6)
5608 .n(n)
5609 .k(k)
5610 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5611 }
5612 }
5613 }
5614
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)5615 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
5616 for (uint32_t n = 16; n <= 24; n += 8) {
5617 for (size_t k = 1; k <= 5; k += 2) {
5618 GemmMicrokernelTester()
5619 .mr(6)
5620 .nr(8)
5621 .kr(1)
5622 .sr(1)
5623 .m(6)
5624 .n(n)
5625 .k(k)
5626 .cn_stride(11)
5627 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5628 }
5629 }
5630 }
5631
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)5632 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
5633 for (uint32_t n = 16; n <= 24; n += 8) {
5634 for (size_t k = 1; k <= 5; k += 2) {
5635 for (uint32_t m = 1; m <= 6; m++) {
5636 GemmMicrokernelTester()
5637 .mr(6)
5638 .nr(8)
5639 .kr(1)
5640 .sr(1)
5641 .m(m)
5642 .n(n)
5643 .k(k)
5644 .iterations(1)
5645 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5646 }
5647 }
5648 }
5649 }
5650
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel)5651 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel) {
5652 for (size_t k = 1; k <= 5; k += 2) {
5653 GemmMicrokernelTester()
5654 .mr(6)
5655 .nr(8)
5656 .kr(1)
5657 .sr(1)
5658 .m(6)
5659 .n(8)
5660 .k(k)
5661 .ks(3)
5662 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5663 }
5664 }
5665
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel_subtile)5666 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel_subtile) {
5667 for (size_t k = 1; k <= 5; k += 2) {
5668 for (uint32_t n = 1; n <= 8; n++) {
5669 for (uint32_t m = 1; m <= 6; m++) {
5670 GemmMicrokernelTester()
5671 .mr(6)
5672 .nr(8)
5673 .kr(1)
5674 .sr(1)
5675 .m(m)
5676 .n(n)
5677 .k(k)
5678 .ks(3)
5679 .iterations(1)
5680 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5681 }
5682 }
5683 }
5684 }
5685
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_small_kernel)5686 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_small_kernel) {
5687 for (uint32_t n = 9; n < 16; n++) {
5688 for (size_t k = 1; k <= 5; k += 2) {
5689 GemmMicrokernelTester()
5690 .mr(6)
5691 .nr(8)
5692 .kr(1)
5693 .sr(1)
5694 .m(6)
5695 .n(n)
5696 .k(k)
5697 .ks(3)
5698 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5699 }
5700 }
5701 }
5702
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_small_kernel)5703 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_small_kernel) {
5704 for (uint32_t n = 16; n <= 24; n += 8) {
5705 for (size_t k = 1; k <= 5; k += 2) {
5706 GemmMicrokernelTester()
5707 .mr(6)
5708 .nr(8)
5709 .kr(1)
5710 .sr(1)
5711 .m(6)
5712 .n(n)
5713 .k(k)
5714 .ks(3)
5715 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5716 }
5717 }
5718 }
5719
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)5720 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
5721 for (size_t k = 1; k <= 5; k += 2) {
5722 for (uint32_t n = 1; n <= 8; n++) {
5723 for (uint32_t m = 1; m <= 6; m++) {
5724 GemmMicrokernelTester()
5725 .mr(6)
5726 .nr(8)
5727 .kr(1)
5728 .sr(1)
5729 .m(m)
5730 .n(n)
5731 .k(k)
5732 .cm_stride(11)
5733 .iterations(1)
5734 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5735 }
5736 }
5737 }
5738 }
5739
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,a_offset)5740 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, a_offset) {
5741 for (size_t k = 1; k <= 5; k += 2) {
5742 GemmMicrokernelTester()
5743 .mr(6)
5744 .nr(8)
5745 .kr(1)
5746 .sr(1)
5747 .m(6)
5748 .n(8)
5749 .k(k)
5750 .ks(3)
5751 .a_offset(37)
5752 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5753 }
5754 }
5755
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,zero)5756 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, zero) {
5757 for (size_t k = 1; k <= 5; k += 2) {
5758 for (uint32_t mz = 0; mz < 6; mz++) {
5759 GemmMicrokernelTester()
5760 .mr(6)
5761 .nr(8)
5762 .kr(1)
5763 .sr(1)
5764 .m(6)
5765 .n(8)
5766 .k(k)
5767 .ks(3)
5768 .a_offset(37)
5769 .zero_index(mz)
5770 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5771 }
5772 }
5773 }
5774
TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)5775 TEST(F32_IGEMM_RELU_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
5776 GemmMicrokernelTester()
5777 .mr(6)
5778 .nr(8)
5779 .kr(1)
5780 .sr(1)
5781 .m(6)
5782 .n(8)
5783 .k(1)
5784 .cm_stride(11)
5785 .Test(xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5786 }
5787 #endif // XNN_ARCH_WASMRELAXEDSIMD
5788
5789
5790 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_2X4__WASM,k_eq_1)5791 TEST(F32_IGEMM_RELU_2X4__WASM, k_eq_1) {
5792 GemmMicrokernelTester()
5793 .mr(2)
5794 .nr(4)
5795 .kr(1)
5796 .sr(1)
5797 .m(2)
5798 .n(4)
5799 .k(1)
5800 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
5801 }
5802
TEST(F32_IGEMM_RELU_2X4__WASM,strided_cn)5803 TEST(F32_IGEMM_RELU_2X4__WASM, strided_cn) {
5804 GemmMicrokernelTester()
5805 .mr(2)
5806 .nr(4)
5807 .kr(1)
5808 .sr(1)
5809 .m(2)
5810 .n(4)
5811 .k(1)
5812 .cn_stride(7)
5813 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
5814 }
5815
TEST(F32_IGEMM_RELU_2X4__WASM,k_eq_1_subtile)5816 TEST(F32_IGEMM_RELU_2X4__WASM, k_eq_1_subtile) {
5817 for (uint32_t n = 1; n <= 4; n++) {
5818 for (uint32_t m = 1; m <= 2; m++) {
5819 GemmMicrokernelTester()
5820 .mr(2)
5821 .nr(4)
5822 .kr(1)
5823 .sr(1)
5824 .m(m)
5825 .n(n)
5826 .k(1)
5827 .iterations(1)
5828 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
5829 }
5830 }
5831 }
5832
TEST(F32_IGEMM_RELU_2X4__WASM,k_eq_1_subtile_m)5833 TEST(F32_IGEMM_RELU_2X4__WASM, k_eq_1_subtile_m) {
5834 for (uint32_t m = 1; m <= 2; m++) {
5835 GemmMicrokernelTester()
5836 .mr(2)
5837 .nr(4)
5838 .kr(1)
5839 .sr(1)
5840 .m(m)
5841 .n(4)
5842 .k(1)
5843 .iterations(1)
5844 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
5845 }
5846 }
5847
TEST(F32_IGEMM_RELU_2X4__WASM,k_eq_1_subtile_n)5848 TEST(F32_IGEMM_RELU_2X4__WASM, k_eq_1_subtile_n) {
5849 for (uint32_t n = 1; n <= 4; n++) {
5850 GemmMicrokernelTester()
5851 .mr(2)
5852 .nr(4)
5853 .kr(1)
5854 .sr(1)
5855 .m(2)
5856 .n(n)
5857 .k(1)
5858 .iterations(1)
5859 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
5860 }
5861 }
5862
TEST(F32_IGEMM_RELU_2X4__WASM,k_gt_1)5863 TEST(F32_IGEMM_RELU_2X4__WASM, k_gt_1) {
5864 for (size_t k = 2; k < 10; k++) {
5865 GemmMicrokernelTester()
5866 .mr(2)
5867 .nr(4)
5868 .kr(1)
5869 .sr(1)
5870 .m(2)
5871 .n(4)
5872 .k(k)
5873 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
5874 }
5875 }
5876
TEST(F32_IGEMM_RELU_2X4__WASM,k_gt_1_subtile)5877 TEST(F32_IGEMM_RELU_2X4__WASM, k_gt_1_subtile) {
5878 for (size_t k = 2; k < 10; k++) {
5879 for (uint32_t n = 1; n <= 4; n++) {
5880 for (uint32_t m = 1; m <= 2; m++) {
5881 GemmMicrokernelTester()
5882 .mr(2)
5883 .nr(4)
5884 .kr(1)
5885 .sr(1)
5886 .m(m)
5887 .n(n)
5888 .k(k)
5889 .iterations(1)
5890 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
5891 }
5892 }
5893 }
5894 }
5895
TEST(F32_IGEMM_RELU_2X4__WASM,n_gt_4)5896 TEST(F32_IGEMM_RELU_2X4__WASM, n_gt_4) {
5897 for (uint32_t n = 5; n < 8; n++) {
5898 for (size_t k = 1; k <= 5; k += 2) {
5899 GemmMicrokernelTester()
5900 .mr(2)
5901 .nr(4)
5902 .kr(1)
5903 .sr(1)
5904 .m(2)
5905 .n(n)
5906 .k(k)
5907 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
5908 }
5909 }
5910 }
5911
TEST(F32_IGEMM_RELU_2X4__WASM,n_gt_4_strided_cn)5912 TEST(F32_IGEMM_RELU_2X4__WASM, n_gt_4_strided_cn) {
5913 for (uint32_t n = 5; n < 8; n++) {
5914 for (size_t k = 1; k <= 5; k += 2) {
5915 GemmMicrokernelTester()
5916 .mr(2)
5917 .nr(4)
5918 .kr(1)
5919 .sr(1)
5920 .m(2)
5921 .n(n)
5922 .k(k)
5923 .cn_stride(7)
5924 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
5925 }
5926 }
5927 }
5928
TEST(F32_IGEMM_RELU_2X4__WASM,n_gt_4_subtile)5929 TEST(F32_IGEMM_RELU_2X4__WASM, n_gt_4_subtile) {
5930 for (uint32_t n = 5; n < 8; n++) {
5931 for (size_t k = 1; k <= 5; k += 2) {
5932 for (uint32_t m = 1; m <= 2; m++) {
5933 GemmMicrokernelTester()
5934 .mr(2)
5935 .nr(4)
5936 .kr(1)
5937 .sr(1)
5938 .m(m)
5939 .n(n)
5940 .k(k)
5941 .iterations(1)
5942 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
5943 }
5944 }
5945 }
5946 }
5947
TEST(F32_IGEMM_RELU_2X4__WASM,n_div_4)5948 TEST(F32_IGEMM_RELU_2X4__WASM, n_div_4) {
5949 for (uint32_t n = 8; n <= 12; n += 4) {
5950 for (size_t k = 1; k <= 5; k += 2) {
5951 GemmMicrokernelTester()
5952 .mr(2)
5953 .nr(4)
5954 .kr(1)
5955 .sr(1)
5956 .m(2)
5957 .n(n)
5958 .k(k)
5959 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
5960 }
5961 }
5962 }
5963
TEST(F32_IGEMM_RELU_2X4__WASM,n_div_4_strided_cn)5964 TEST(F32_IGEMM_RELU_2X4__WASM, n_div_4_strided_cn) {
5965 for (uint32_t n = 8; n <= 12; n += 4) {
5966 for (size_t k = 1; k <= 5; k += 2) {
5967 GemmMicrokernelTester()
5968 .mr(2)
5969 .nr(4)
5970 .kr(1)
5971 .sr(1)
5972 .m(2)
5973 .n(n)
5974 .k(k)
5975 .cn_stride(7)
5976 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
5977 }
5978 }
5979 }
5980
TEST(F32_IGEMM_RELU_2X4__WASM,n_div_4_subtile)5981 TEST(F32_IGEMM_RELU_2X4__WASM, n_div_4_subtile) {
5982 for (uint32_t n = 8; n <= 12; n += 4) {
5983 for (size_t k = 1; k <= 5; k += 2) {
5984 for (uint32_t m = 1; m <= 2; m++) {
5985 GemmMicrokernelTester()
5986 .mr(2)
5987 .nr(4)
5988 .kr(1)
5989 .sr(1)
5990 .m(m)
5991 .n(n)
5992 .k(k)
5993 .iterations(1)
5994 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
5995 }
5996 }
5997 }
5998 }
5999
TEST(F32_IGEMM_RELU_2X4__WASM,small_kernel)6000 TEST(F32_IGEMM_RELU_2X4__WASM, small_kernel) {
6001 for (size_t k = 1; k <= 5; k += 2) {
6002 GemmMicrokernelTester()
6003 .mr(2)
6004 .nr(4)
6005 .kr(1)
6006 .sr(1)
6007 .m(2)
6008 .n(4)
6009 .k(k)
6010 .ks(3)
6011 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
6012 }
6013 }
6014
TEST(F32_IGEMM_RELU_2X4__WASM,small_kernel_subtile)6015 TEST(F32_IGEMM_RELU_2X4__WASM, small_kernel_subtile) {
6016 for (size_t k = 1; k <= 5; k += 2) {
6017 for (uint32_t n = 1; n <= 4; n++) {
6018 for (uint32_t m = 1; m <= 2; m++) {
6019 GemmMicrokernelTester()
6020 .mr(2)
6021 .nr(4)
6022 .kr(1)
6023 .sr(1)
6024 .m(m)
6025 .n(n)
6026 .k(k)
6027 .ks(3)
6028 .iterations(1)
6029 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
6030 }
6031 }
6032 }
6033 }
6034
TEST(F32_IGEMM_RELU_2X4__WASM,n_gt_4_small_kernel)6035 TEST(F32_IGEMM_RELU_2X4__WASM, n_gt_4_small_kernel) {
6036 for (uint32_t n = 5; n < 8; n++) {
6037 for (size_t k = 1; k <= 5; k += 2) {
6038 GemmMicrokernelTester()
6039 .mr(2)
6040 .nr(4)
6041 .kr(1)
6042 .sr(1)
6043 .m(2)
6044 .n(n)
6045 .k(k)
6046 .ks(3)
6047 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
6048 }
6049 }
6050 }
6051
TEST(F32_IGEMM_RELU_2X4__WASM,n_div_4_small_kernel)6052 TEST(F32_IGEMM_RELU_2X4__WASM, n_div_4_small_kernel) {
6053 for (uint32_t n = 8; n <= 12; n += 4) {
6054 for (size_t k = 1; k <= 5; k += 2) {
6055 GemmMicrokernelTester()
6056 .mr(2)
6057 .nr(4)
6058 .kr(1)
6059 .sr(1)
6060 .m(2)
6061 .n(n)
6062 .k(k)
6063 .ks(3)
6064 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
6065 }
6066 }
6067 }
6068
TEST(F32_IGEMM_RELU_2X4__WASM,strided_cm_subtile)6069 TEST(F32_IGEMM_RELU_2X4__WASM, strided_cm_subtile) {
6070 for (size_t k = 1; k <= 5; k += 2) {
6071 for (uint32_t n = 1; n <= 4; n++) {
6072 for (uint32_t m = 1; m <= 2; m++) {
6073 GemmMicrokernelTester()
6074 .mr(2)
6075 .nr(4)
6076 .kr(1)
6077 .sr(1)
6078 .m(m)
6079 .n(n)
6080 .k(k)
6081 .cm_stride(7)
6082 .iterations(1)
6083 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
6084 }
6085 }
6086 }
6087 }
6088
TEST(F32_IGEMM_RELU_2X4__WASM,a_offset)6089 TEST(F32_IGEMM_RELU_2X4__WASM, a_offset) {
6090 for (size_t k = 1; k <= 5; k += 2) {
6091 GemmMicrokernelTester()
6092 .mr(2)
6093 .nr(4)
6094 .kr(1)
6095 .sr(1)
6096 .m(2)
6097 .n(4)
6098 .k(k)
6099 .ks(3)
6100 .a_offset(13)
6101 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
6102 }
6103 }
6104
TEST(F32_IGEMM_RELU_2X4__WASM,zero)6105 TEST(F32_IGEMM_RELU_2X4__WASM, zero) {
6106 for (size_t k = 1; k <= 5; k += 2) {
6107 for (uint32_t mz = 0; mz < 2; mz++) {
6108 GemmMicrokernelTester()
6109 .mr(2)
6110 .nr(4)
6111 .kr(1)
6112 .sr(1)
6113 .m(2)
6114 .n(4)
6115 .k(k)
6116 .ks(3)
6117 .a_offset(13)
6118 .zero_index(mz)
6119 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
6120 }
6121 }
6122 }
6123
TEST(F32_IGEMM_RELU_2X4__WASM,strided_cm)6124 TEST(F32_IGEMM_RELU_2X4__WASM, strided_cm) {
6125 GemmMicrokernelTester()
6126 .mr(2)
6127 .nr(4)
6128 .kr(1)
6129 .sr(1)
6130 .m(2)
6131 .n(4)
6132 .k(1)
6133 .cm_stride(7)
6134 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
6135 }
6136 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6137
6138
6139 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_4X2__WASM,k_eq_1)6140 TEST(F32_IGEMM_RELU_4X2__WASM, k_eq_1) {
6141 GemmMicrokernelTester()
6142 .mr(4)
6143 .nr(2)
6144 .kr(1)
6145 .sr(1)
6146 .m(4)
6147 .n(2)
6148 .k(1)
6149 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
6150 }
6151
TEST(F32_IGEMM_RELU_4X2__WASM,strided_cn)6152 TEST(F32_IGEMM_RELU_4X2__WASM, strided_cn) {
6153 GemmMicrokernelTester()
6154 .mr(4)
6155 .nr(2)
6156 .kr(1)
6157 .sr(1)
6158 .m(4)
6159 .n(2)
6160 .k(1)
6161 .cn_stride(5)
6162 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
6163 }
6164
TEST(F32_IGEMM_RELU_4X2__WASM,k_eq_1_subtile)6165 TEST(F32_IGEMM_RELU_4X2__WASM, k_eq_1_subtile) {
6166 for (uint32_t n = 1; n <= 2; n++) {
6167 for (uint32_t m = 1; m <= 4; m++) {
6168 GemmMicrokernelTester()
6169 .mr(4)
6170 .nr(2)
6171 .kr(1)
6172 .sr(1)
6173 .m(m)
6174 .n(n)
6175 .k(1)
6176 .iterations(1)
6177 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
6178 }
6179 }
6180 }
6181
TEST(F32_IGEMM_RELU_4X2__WASM,k_eq_1_subtile_m)6182 TEST(F32_IGEMM_RELU_4X2__WASM, k_eq_1_subtile_m) {
6183 for (uint32_t m = 1; m <= 4; m++) {
6184 GemmMicrokernelTester()
6185 .mr(4)
6186 .nr(2)
6187 .kr(1)
6188 .sr(1)
6189 .m(m)
6190 .n(2)
6191 .k(1)
6192 .iterations(1)
6193 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
6194 }
6195 }
6196
TEST(F32_IGEMM_RELU_4X2__WASM,k_eq_1_subtile_n)6197 TEST(F32_IGEMM_RELU_4X2__WASM, k_eq_1_subtile_n) {
6198 for (uint32_t n = 1; n <= 2; n++) {
6199 GemmMicrokernelTester()
6200 .mr(4)
6201 .nr(2)
6202 .kr(1)
6203 .sr(1)
6204 .m(4)
6205 .n(n)
6206 .k(1)
6207 .iterations(1)
6208 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
6209 }
6210 }
6211
TEST(F32_IGEMM_RELU_4X2__WASM,k_gt_1)6212 TEST(F32_IGEMM_RELU_4X2__WASM, k_gt_1) {
6213 for (size_t k = 2; k < 10; k++) {
6214 GemmMicrokernelTester()
6215 .mr(4)
6216 .nr(2)
6217 .kr(1)
6218 .sr(1)
6219 .m(4)
6220 .n(2)
6221 .k(k)
6222 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
6223 }
6224 }
6225
TEST(F32_IGEMM_RELU_4X2__WASM,k_gt_1_subtile)6226 TEST(F32_IGEMM_RELU_4X2__WASM, k_gt_1_subtile) {
6227 for (size_t k = 2; k < 10; k++) {
6228 for (uint32_t n = 1; n <= 2; n++) {
6229 for (uint32_t m = 1; m <= 4; m++) {
6230 GemmMicrokernelTester()
6231 .mr(4)
6232 .nr(2)
6233 .kr(1)
6234 .sr(1)
6235 .m(m)
6236 .n(n)
6237 .k(k)
6238 .iterations(1)
6239 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
6240 }
6241 }
6242 }
6243 }
6244
TEST(F32_IGEMM_RELU_4X2__WASM,n_gt_2)6245 TEST(F32_IGEMM_RELU_4X2__WASM, n_gt_2) {
6246 for (uint32_t n = 3; n < 4; n++) {
6247 for (size_t k = 1; k <= 5; k += 2) {
6248 GemmMicrokernelTester()
6249 .mr(4)
6250 .nr(2)
6251 .kr(1)
6252 .sr(1)
6253 .m(4)
6254 .n(n)
6255 .k(k)
6256 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
6257 }
6258 }
6259 }
6260
TEST(F32_IGEMM_RELU_4X2__WASM,n_gt_2_strided_cn)6261 TEST(F32_IGEMM_RELU_4X2__WASM, n_gt_2_strided_cn) {
6262 for (uint32_t n = 3; n < 4; n++) {
6263 for (size_t k = 1; k <= 5; k += 2) {
6264 GemmMicrokernelTester()
6265 .mr(4)
6266 .nr(2)
6267 .kr(1)
6268 .sr(1)
6269 .m(4)
6270 .n(n)
6271 .k(k)
6272 .cn_stride(5)
6273 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
6274 }
6275 }
6276 }
6277
TEST(F32_IGEMM_RELU_4X2__WASM,n_gt_2_subtile)6278 TEST(F32_IGEMM_RELU_4X2__WASM, n_gt_2_subtile) {
6279 for (uint32_t n = 3; n < 4; n++) {
6280 for (size_t k = 1; k <= 5; k += 2) {
6281 for (uint32_t m = 1; m <= 4; m++) {
6282 GemmMicrokernelTester()
6283 .mr(4)
6284 .nr(2)
6285 .kr(1)
6286 .sr(1)
6287 .m(m)
6288 .n(n)
6289 .k(k)
6290 .iterations(1)
6291 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
6292 }
6293 }
6294 }
6295 }
6296
TEST(F32_IGEMM_RELU_4X2__WASM,n_div_2)6297 TEST(F32_IGEMM_RELU_4X2__WASM, n_div_2) {
6298 for (uint32_t n = 4; n <= 6; n += 2) {
6299 for (size_t k = 1; k <= 5; k += 2) {
6300 GemmMicrokernelTester()
6301 .mr(4)
6302 .nr(2)
6303 .kr(1)
6304 .sr(1)
6305 .m(4)
6306 .n(n)
6307 .k(k)
6308 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
6309 }
6310 }
6311 }
6312
TEST(F32_IGEMM_RELU_4X2__WASM,n_div_2_strided_cn)6313 TEST(F32_IGEMM_RELU_4X2__WASM, n_div_2_strided_cn) {
6314 for (uint32_t n = 4; n <= 6; n += 2) {
6315 for (size_t k = 1; k <= 5; k += 2) {
6316 GemmMicrokernelTester()
6317 .mr(4)
6318 .nr(2)
6319 .kr(1)
6320 .sr(1)
6321 .m(4)
6322 .n(n)
6323 .k(k)
6324 .cn_stride(5)
6325 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
6326 }
6327 }
6328 }
6329
TEST(F32_IGEMM_RELU_4X2__WASM,n_div_2_subtile)6330 TEST(F32_IGEMM_RELU_4X2__WASM, n_div_2_subtile) {
6331 for (uint32_t n = 4; n <= 6; n += 2) {
6332 for (size_t k = 1; k <= 5; k += 2) {
6333 for (uint32_t m = 1; m <= 4; m++) {
6334 GemmMicrokernelTester()
6335 .mr(4)
6336 .nr(2)
6337 .kr(1)
6338 .sr(1)
6339 .m(m)
6340 .n(n)
6341 .k(k)
6342 .iterations(1)
6343 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
6344 }
6345 }
6346 }
6347 }
6348
TEST(F32_IGEMM_RELU_4X2__WASM,small_kernel)6349 TEST(F32_IGEMM_RELU_4X2__WASM, small_kernel) {
6350 for (size_t k = 1; k <= 5; k += 2) {
6351 GemmMicrokernelTester()
6352 .mr(4)
6353 .nr(2)
6354 .kr(1)
6355 .sr(1)
6356 .m(4)
6357 .n(2)
6358 .k(k)
6359 .ks(3)
6360 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
6361 }
6362 }
6363
TEST(F32_IGEMM_RELU_4X2__WASM,small_kernel_subtile)6364 TEST(F32_IGEMM_RELU_4X2__WASM, small_kernel_subtile) {
6365 for (size_t k = 1; k <= 5; k += 2) {
6366 for (uint32_t n = 1; n <= 2; n++) {
6367 for (uint32_t m = 1; m <= 4; m++) {
6368 GemmMicrokernelTester()
6369 .mr(4)
6370 .nr(2)
6371 .kr(1)
6372 .sr(1)
6373 .m(m)
6374 .n(n)
6375 .k(k)
6376 .ks(3)
6377 .iterations(1)
6378 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
6379 }
6380 }
6381 }
6382 }
6383
TEST(F32_IGEMM_RELU_4X2__WASM,n_gt_2_small_kernel)6384 TEST(F32_IGEMM_RELU_4X2__WASM, n_gt_2_small_kernel) {
6385 for (uint32_t n = 3; n < 4; n++) {
6386 for (size_t k = 1; k <= 5; k += 2) {
6387 GemmMicrokernelTester()
6388 .mr(4)
6389 .nr(2)
6390 .kr(1)
6391 .sr(1)
6392 .m(4)
6393 .n(n)
6394 .k(k)
6395 .ks(3)
6396 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
6397 }
6398 }
6399 }
6400
TEST(F32_IGEMM_RELU_4X2__WASM,n_div_2_small_kernel)6401 TEST(F32_IGEMM_RELU_4X2__WASM, n_div_2_small_kernel) {
6402 for (uint32_t n = 4; n <= 6; n += 2) {
6403 for (size_t k = 1; k <= 5; k += 2) {
6404 GemmMicrokernelTester()
6405 .mr(4)
6406 .nr(2)
6407 .kr(1)
6408 .sr(1)
6409 .m(4)
6410 .n(n)
6411 .k(k)
6412 .ks(3)
6413 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
6414 }
6415 }
6416 }
6417
TEST(F32_IGEMM_RELU_4X2__WASM,strided_cm_subtile)6418 TEST(F32_IGEMM_RELU_4X2__WASM, strided_cm_subtile) {
6419 for (size_t k = 1; k <= 5; k += 2) {
6420 for (uint32_t n = 1; n <= 2; n++) {
6421 for (uint32_t m = 1; m <= 4; m++) {
6422 GemmMicrokernelTester()
6423 .mr(4)
6424 .nr(2)
6425 .kr(1)
6426 .sr(1)
6427 .m(m)
6428 .n(n)
6429 .k(k)
6430 .cm_stride(5)
6431 .iterations(1)
6432 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
6433 }
6434 }
6435 }
6436 }
6437
TEST(F32_IGEMM_RELU_4X2__WASM,a_offset)6438 TEST(F32_IGEMM_RELU_4X2__WASM, a_offset) {
6439 for (size_t k = 1; k <= 5; k += 2) {
6440 GemmMicrokernelTester()
6441 .mr(4)
6442 .nr(2)
6443 .kr(1)
6444 .sr(1)
6445 .m(4)
6446 .n(2)
6447 .k(k)
6448 .ks(3)
6449 .a_offset(23)
6450 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
6451 }
6452 }
6453
TEST(F32_IGEMM_RELU_4X2__WASM,zero)6454 TEST(F32_IGEMM_RELU_4X2__WASM, zero) {
6455 for (size_t k = 1; k <= 5; k += 2) {
6456 for (uint32_t mz = 0; mz < 4; mz++) {
6457 GemmMicrokernelTester()
6458 .mr(4)
6459 .nr(2)
6460 .kr(1)
6461 .sr(1)
6462 .m(4)
6463 .n(2)
6464 .k(k)
6465 .ks(3)
6466 .a_offset(23)
6467 .zero_index(mz)
6468 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
6469 }
6470 }
6471 }
6472
TEST(F32_IGEMM_RELU_4X2__WASM,strided_cm)6473 TEST(F32_IGEMM_RELU_4X2__WASM, strided_cm) {
6474 GemmMicrokernelTester()
6475 .mr(4)
6476 .nr(2)
6477 .kr(1)
6478 .sr(1)
6479 .m(4)
6480 .n(2)
6481 .k(1)
6482 .cm_stride(5)
6483 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
6484 }
6485 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6486
6487
6488 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_RELU_4X4__WASM,k_eq_1)6489 TEST(F32_IGEMM_RELU_4X4__WASM, k_eq_1) {
6490 GemmMicrokernelTester()
6491 .mr(4)
6492 .nr(4)
6493 .kr(1)
6494 .sr(1)
6495 .m(4)
6496 .n(4)
6497 .k(1)
6498 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
6499 }
6500
TEST(F32_IGEMM_RELU_4X4__WASM,strided_cn)6501 TEST(F32_IGEMM_RELU_4X4__WASM, strided_cn) {
6502 GemmMicrokernelTester()
6503 .mr(4)
6504 .nr(4)
6505 .kr(1)
6506 .sr(1)
6507 .m(4)
6508 .n(4)
6509 .k(1)
6510 .cn_stride(7)
6511 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
6512 }
6513
TEST(F32_IGEMM_RELU_4X4__WASM,k_eq_1_subtile)6514 TEST(F32_IGEMM_RELU_4X4__WASM, k_eq_1_subtile) {
6515 for (uint32_t n = 1; n <= 4; n++) {
6516 for (uint32_t m = 1; m <= 4; m++) {
6517 GemmMicrokernelTester()
6518 .mr(4)
6519 .nr(4)
6520 .kr(1)
6521 .sr(1)
6522 .m(m)
6523 .n(n)
6524 .k(1)
6525 .iterations(1)
6526 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
6527 }
6528 }
6529 }
6530
TEST(F32_IGEMM_RELU_4X4__WASM,k_eq_1_subtile_m)6531 TEST(F32_IGEMM_RELU_4X4__WASM, k_eq_1_subtile_m) {
6532 for (uint32_t m = 1; m <= 4; m++) {
6533 GemmMicrokernelTester()
6534 .mr(4)
6535 .nr(4)
6536 .kr(1)
6537 .sr(1)
6538 .m(m)
6539 .n(4)
6540 .k(1)
6541 .iterations(1)
6542 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
6543 }
6544 }
6545
TEST(F32_IGEMM_RELU_4X4__WASM,k_eq_1_subtile_n)6546 TEST(F32_IGEMM_RELU_4X4__WASM, k_eq_1_subtile_n) {
6547 for (uint32_t n = 1; n <= 4; n++) {
6548 GemmMicrokernelTester()
6549 .mr(4)
6550 .nr(4)
6551 .kr(1)
6552 .sr(1)
6553 .m(4)
6554 .n(n)
6555 .k(1)
6556 .iterations(1)
6557 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
6558 }
6559 }
6560
TEST(F32_IGEMM_RELU_4X4__WASM,k_gt_1)6561 TEST(F32_IGEMM_RELU_4X4__WASM, k_gt_1) {
6562 for (size_t k = 2; k < 10; k++) {
6563 GemmMicrokernelTester()
6564 .mr(4)
6565 .nr(4)
6566 .kr(1)
6567 .sr(1)
6568 .m(4)
6569 .n(4)
6570 .k(k)
6571 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
6572 }
6573 }
6574
TEST(F32_IGEMM_RELU_4X4__WASM,k_gt_1_subtile)6575 TEST(F32_IGEMM_RELU_4X4__WASM, k_gt_1_subtile) {
6576 for (size_t k = 2; k < 10; k++) {
6577 for (uint32_t n = 1; n <= 4; n++) {
6578 for (uint32_t m = 1; m <= 4; m++) {
6579 GemmMicrokernelTester()
6580 .mr(4)
6581 .nr(4)
6582 .kr(1)
6583 .sr(1)
6584 .m(m)
6585 .n(n)
6586 .k(k)
6587 .iterations(1)
6588 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
6589 }
6590 }
6591 }
6592 }
6593
TEST(F32_IGEMM_RELU_4X4__WASM,n_gt_4)6594 TEST(F32_IGEMM_RELU_4X4__WASM, n_gt_4) {
6595 for (uint32_t n = 5; n < 8; n++) {
6596 for (size_t k = 1; k <= 5; k += 2) {
6597 GemmMicrokernelTester()
6598 .mr(4)
6599 .nr(4)
6600 .kr(1)
6601 .sr(1)
6602 .m(4)
6603 .n(n)
6604 .k(k)
6605 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
6606 }
6607 }
6608 }
6609
TEST(F32_IGEMM_RELU_4X4__WASM,n_gt_4_strided_cn)6610 TEST(F32_IGEMM_RELU_4X4__WASM, n_gt_4_strided_cn) {
6611 for (uint32_t n = 5; n < 8; n++) {
6612 for (size_t k = 1; k <= 5; k += 2) {
6613 GemmMicrokernelTester()
6614 .mr(4)
6615 .nr(4)
6616 .kr(1)
6617 .sr(1)
6618 .m(4)
6619 .n(n)
6620 .k(k)
6621 .cn_stride(7)
6622 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
6623 }
6624 }
6625 }
6626
TEST(F32_IGEMM_RELU_4X4__WASM,n_gt_4_subtile)6627 TEST(F32_IGEMM_RELU_4X4__WASM, n_gt_4_subtile) {
6628 for (uint32_t n = 5; n < 8; n++) {
6629 for (size_t k = 1; k <= 5; k += 2) {
6630 for (uint32_t m = 1; m <= 4; m++) {
6631 GemmMicrokernelTester()
6632 .mr(4)
6633 .nr(4)
6634 .kr(1)
6635 .sr(1)
6636 .m(m)
6637 .n(n)
6638 .k(k)
6639 .iterations(1)
6640 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
6641 }
6642 }
6643 }
6644 }
6645
TEST(F32_IGEMM_RELU_4X4__WASM,n_div_4)6646 TEST(F32_IGEMM_RELU_4X4__WASM, n_div_4) {
6647 for (uint32_t n = 8; n <= 12; n += 4) {
6648 for (size_t k = 1; k <= 5; k += 2) {
6649 GemmMicrokernelTester()
6650 .mr(4)
6651 .nr(4)
6652 .kr(1)
6653 .sr(1)
6654 .m(4)
6655 .n(n)
6656 .k(k)
6657 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
6658 }
6659 }
6660 }
6661
TEST(F32_IGEMM_RELU_4X4__WASM,n_div_4_strided_cn)6662 TEST(F32_IGEMM_RELU_4X4__WASM, n_div_4_strided_cn) {
6663 for (uint32_t n = 8; n <= 12; n += 4) {
6664 for (size_t k = 1; k <= 5; k += 2) {
6665 GemmMicrokernelTester()
6666 .mr(4)
6667 .nr(4)
6668 .kr(1)
6669 .sr(1)
6670 .m(4)
6671 .n(n)
6672 .k(k)
6673 .cn_stride(7)
6674 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
6675 }
6676 }
6677 }
6678
TEST(F32_IGEMM_RELU_4X4__WASM,n_div_4_subtile)6679 TEST(F32_IGEMM_RELU_4X4__WASM, n_div_4_subtile) {
6680 for (uint32_t n = 8; n <= 12; n += 4) {
6681 for (size_t k = 1; k <= 5; k += 2) {
6682 for (uint32_t m = 1; m <= 4; m++) {
6683 GemmMicrokernelTester()
6684 .mr(4)
6685 .nr(4)
6686 .kr(1)
6687 .sr(1)
6688 .m(m)
6689 .n(n)
6690 .k(k)
6691 .iterations(1)
6692 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
6693 }
6694 }
6695 }
6696 }
6697
TEST(F32_IGEMM_RELU_4X4__WASM,small_kernel)6698 TEST(F32_IGEMM_RELU_4X4__WASM, small_kernel) {
6699 for (size_t k = 1; k <= 5; k += 2) {
6700 GemmMicrokernelTester()
6701 .mr(4)
6702 .nr(4)
6703 .kr(1)
6704 .sr(1)
6705 .m(4)
6706 .n(4)
6707 .k(k)
6708 .ks(3)
6709 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
6710 }
6711 }
6712
TEST(F32_IGEMM_RELU_4X4__WASM,small_kernel_subtile)6713 TEST(F32_IGEMM_RELU_4X4__WASM, small_kernel_subtile) {
6714 for (size_t k = 1; k <= 5; k += 2) {
6715 for (uint32_t n = 1; n <= 4; n++) {
6716 for (uint32_t m = 1; m <= 4; m++) {
6717 GemmMicrokernelTester()
6718 .mr(4)
6719 .nr(4)
6720 .kr(1)
6721 .sr(1)
6722 .m(m)
6723 .n(n)
6724 .k(k)
6725 .ks(3)
6726 .iterations(1)
6727 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
6728 }
6729 }
6730 }
6731 }
6732
TEST(F32_IGEMM_RELU_4X4__WASM,n_gt_4_small_kernel)6733 TEST(F32_IGEMM_RELU_4X4__WASM, n_gt_4_small_kernel) {
6734 for (uint32_t n = 5; n < 8; n++) {
6735 for (size_t k = 1; k <= 5; k += 2) {
6736 GemmMicrokernelTester()
6737 .mr(4)
6738 .nr(4)
6739 .kr(1)
6740 .sr(1)
6741 .m(4)
6742 .n(n)
6743 .k(k)
6744 .ks(3)
6745 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
6746 }
6747 }
6748 }
6749
TEST(F32_IGEMM_RELU_4X4__WASM,n_div_4_small_kernel)6750 TEST(F32_IGEMM_RELU_4X4__WASM, n_div_4_small_kernel) {
6751 for (uint32_t n = 8; n <= 12; n += 4) {
6752 for (size_t k = 1; k <= 5; k += 2) {
6753 GemmMicrokernelTester()
6754 .mr(4)
6755 .nr(4)
6756 .kr(1)
6757 .sr(1)
6758 .m(4)
6759 .n(n)
6760 .k(k)
6761 .ks(3)
6762 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
6763 }
6764 }
6765 }
6766
TEST(F32_IGEMM_RELU_4X4__WASM,strided_cm_subtile)6767 TEST(F32_IGEMM_RELU_4X4__WASM, strided_cm_subtile) {
6768 for (size_t k = 1; k <= 5; k += 2) {
6769 for (uint32_t n = 1; n <= 4; n++) {
6770 for (uint32_t m = 1; m <= 4; m++) {
6771 GemmMicrokernelTester()
6772 .mr(4)
6773 .nr(4)
6774 .kr(1)
6775 .sr(1)
6776 .m(m)
6777 .n(n)
6778 .k(k)
6779 .cm_stride(7)
6780 .iterations(1)
6781 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
6782 }
6783 }
6784 }
6785 }
6786
TEST(F32_IGEMM_RELU_4X4__WASM,a_offset)6787 TEST(F32_IGEMM_RELU_4X4__WASM, a_offset) {
6788 for (size_t k = 1; k <= 5; k += 2) {
6789 GemmMicrokernelTester()
6790 .mr(4)
6791 .nr(4)
6792 .kr(1)
6793 .sr(1)
6794 .m(4)
6795 .n(4)
6796 .k(k)
6797 .ks(3)
6798 .a_offset(23)
6799 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
6800 }
6801 }
6802
TEST(F32_IGEMM_RELU_4X4__WASM,zero)6803 TEST(F32_IGEMM_RELU_4X4__WASM, zero) {
6804 for (size_t k = 1; k <= 5; k += 2) {
6805 for (uint32_t mz = 0; mz < 4; mz++) {
6806 GemmMicrokernelTester()
6807 .mr(4)
6808 .nr(4)
6809 .kr(1)
6810 .sr(1)
6811 .m(4)
6812 .n(4)
6813 .k(k)
6814 .ks(3)
6815 .a_offset(23)
6816 .zero_index(mz)
6817 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
6818 }
6819 }
6820 }
6821
TEST(F32_IGEMM_RELU_4X4__WASM,strided_cm)6822 TEST(F32_IGEMM_RELU_4X4__WASM, strided_cm) {
6823 GemmMicrokernelTester()
6824 .mr(4)
6825 .nr(4)
6826 .kr(1)
6827 .sr(1)
6828 .m(4)
6829 .n(4)
6830 .k(1)
6831 .cm_stride(7)
6832 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
6833 }
6834 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6835
6836
TEST(F32_IGEMM_RELU_1X4__SCALAR,k_eq_1)6837 TEST(F32_IGEMM_RELU_1X4__SCALAR, k_eq_1) {
6838 GemmMicrokernelTester()
6839 .mr(1)
6840 .nr(4)
6841 .kr(1)
6842 .sr(1)
6843 .m(1)
6844 .n(4)
6845 .k(1)
6846 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
6847 }
6848
TEST(F32_IGEMM_RELU_1X4__SCALAR,strided_cn)6849 TEST(F32_IGEMM_RELU_1X4__SCALAR, strided_cn) {
6850 GemmMicrokernelTester()
6851 .mr(1)
6852 .nr(4)
6853 .kr(1)
6854 .sr(1)
6855 .m(1)
6856 .n(4)
6857 .k(1)
6858 .cn_stride(7)
6859 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
6860 }
6861
TEST(F32_IGEMM_RELU_1X4__SCALAR,k_eq_1_subtile)6862 TEST(F32_IGEMM_RELU_1X4__SCALAR, k_eq_1_subtile) {
6863 for (uint32_t n = 1; n <= 4; n++) {
6864 for (uint32_t m = 1; m <= 1; m++) {
6865 GemmMicrokernelTester()
6866 .mr(1)
6867 .nr(4)
6868 .kr(1)
6869 .sr(1)
6870 .m(m)
6871 .n(n)
6872 .k(1)
6873 .iterations(1)
6874 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
6875 }
6876 }
6877 }
6878
TEST(F32_IGEMM_RELU_1X4__SCALAR,k_eq_1_subtile_m)6879 TEST(F32_IGEMM_RELU_1X4__SCALAR, k_eq_1_subtile_m) {
6880 for (uint32_t m = 1; m <= 1; m++) {
6881 GemmMicrokernelTester()
6882 .mr(1)
6883 .nr(4)
6884 .kr(1)
6885 .sr(1)
6886 .m(m)
6887 .n(4)
6888 .k(1)
6889 .iterations(1)
6890 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
6891 }
6892 }
6893
TEST(F32_IGEMM_RELU_1X4__SCALAR,k_eq_1_subtile_n)6894 TEST(F32_IGEMM_RELU_1X4__SCALAR, k_eq_1_subtile_n) {
6895 for (uint32_t n = 1; n <= 4; n++) {
6896 GemmMicrokernelTester()
6897 .mr(1)
6898 .nr(4)
6899 .kr(1)
6900 .sr(1)
6901 .m(1)
6902 .n(n)
6903 .k(1)
6904 .iterations(1)
6905 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
6906 }
6907 }
6908
TEST(F32_IGEMM_RELU_1X4__SCALAR,k_gt_1)6909 TEST(F32_IGEMM_RELU_1X4__SCALAR, k_gt_1) {
6910 for (size_t k = 2; k < 10; k++) {
6911 GemmMicrokernelTester()
6912 .mr(1)
6913 .nr(4)
6914 .kr(1)
6915 .sr(1)
6916 .m(1)
6917 .n(4)
6918 .k(k)
6919 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
6920 }
6921 }
6922
TEST(F32_IGEMM_RELU_1X4__SCALAR,k_gt_1_subtile)6923 TEST(F32_IGEMM_RELU_1X4__SCALAR, k_gt_1_subtile) {
6924 for (size_t k = 2; k < 10; k++) {
6925 for (uint32_t n = 1; n <= 4; n++) {
6926 for (uint32_t m = 1; m <= 1; m++) {
6927 GemmMicrokernelTester()
6928 .mr(1)
6929 .nr(4)
6930 .kr(1)
6931 .sr(1)
6932 .m(m)
6933 .n(n)
6934 .k(k)
6935 .iterations(1)
6936 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
6937 }
6938 }
6939 }
6940 }
6941
TEST(F32_IGEMM_RELU_1X4__SCALAR,n_gt_4)6942 TEST(F32_IGEMM_RELU_1X4__SCALAR, n_gt_4) {
6943 for (uint32_t n = 5; n < 8; n++) {
6944 for (size_t k = 1; k <= 5; k += 2) {
6945 GemmMicrokernelTester()
6946 .mr(1)
6947 .nr(4)
6948 .kr(1)
6949 .sr(1)
6950 .m(1)
6951 .n(n)
6952 .k(k)
6953 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
6954 }
6955 }
6956 }
6957
TEST(F32_IGEMM_RELU_1X4__SCALAR,n_gt_4_strided_cn)6958 TEST(F32_IGEMM_RELU_1X4__SCALAR, n_gt_4_strided_cn) {
6959 for (uint32_t n = 5; n < 8; n++) {
6960 for (size_t k = 1; k <= 5; k += 2) {
6961 GemmMicrokernelTester()
6962 .mr(1)
6963 .nr(4)
6964 .kr(1)
6965 .sr(1)
6966 .m(1)
6967 .n(n)
6968 .k(k)
6969 .cn_stride(7)
6970 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
6971 }
6972 }
6973 }
6974
TEST(F32_IGEMM_RELU_1X4__SCALAR,n_gt_4_subtile)6975 TEST(F32_IGEMM_RELU_1X4__SCALAR, n_gt_4_subtile) {
6976 for (uint32_t n = 5; n < 8; n++) {
6977 for (size_t k = 1; k <= 5; k += 2) {
6978 for (uint32_t m = 1; m <= 1; m++) {
6979 GemmMicrokernelTester()
6980 .mr(1)
6981 .nr(4)
6982 .kr(1)
6983 .sr(1)
6984 .m(m)
6985 .n(n)
6986 .k(k)
6987 .iterations(1)
6988 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
6989 }
6990 }
6991 }
6992 }
6993
TEST(F32_IGEMM_RELU_1X4__SCALAR,n_div_4)6994 TEST(F32_IGEMM_RELU_1X4__SCALAR, n_div_4) {
6995 for (uint32_t n = 8; n <= 12; n += 4) {
6996 for (size_t k = 1; k <= 5; k += 2) {
6997 GemmMicrokernelTester()
6998 .mr(1)
6999 .nr(4)
7000 .kr(1)
7001 .sr(1)
7002 .m(1)
7003 .n(n)
7004 .k(k)
7005 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
7006 }
7007 }
7008 }
7009
TEST(F32_IGEMM_RELU_1X4__SCALAR,n_div_4_strided_cn)7010 TEST(F32_IGEMM_RELU_1X4__SCALAR, n_div_4_strided_cn) {
7011 for (uint32_t n = 8; n <= 12; n += 4) {
7012 for (size_t k = 1; k <= 5; k += 2) {
7013 GemmMicrokernelTester()
7014 .mr(1)
7015 .nr(4)
7016 .kr(1)
7017 .sr(1)
7018 .m(1)
7019 .n(n)
7020 .k(k)
7021 .cn_stride(7)
7022 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
7023 }
7024 }
7025 }
7026
TEST(F32_IGEMM_RELU_1X4__SCALAR,n_div_4_subtile)7027 TEST(F32_IGEMM_RELU_1X4__SCALAR, n_div_4_subtile) {
7028 for (uint32_t n = 8; n <= 12; n += 4) {
7029 for (size_t k = 1; k <= 5; k += 2) {
7030 for (uint32_t m = 1; m <= 1; m++) {
7031 GemmMicrokernelTester()
7032 .mr(1)
7033 .nr(4)
7034 .kr(1)
7035 .sr(1)
7036 .m(m)
7037 .n(n)
7038 .k(k)
7039 .iterations(1)
7040 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
7041 }
7042 }
7043 }
7044 }
7045
TEST(F32_IGEMM_RELU_1X4__SCALAR,small_kernel)7046 TEST(F32_IGEMM_RELU_1X4__SCALAR, small_kernel) {
7047 for (size_t k = 1; k <= 5; k += 2) {
7048 GemmMicrokernelTester()
7049 .mr(1)
7050 .nr(4)
7051 .kr(1)
7052 .sr(1)
7053 .m(1)
7054 .n(4)
7055 .k(k)
7056 .ks(3)
7057 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
7058 }
7059 }
7060
TEST(F32_IGEMM_RELU_1X4__SCALAR,small_kernel_subtile)7061 TEST(F32_IGEMM_RELU_1X4__SCALAR, small_kernel_subtile) {
7062 for (size_t k = 1; k <= 5; k += 2) {
7063 for (uint32_t n = 1; n <= 4; n++) {
7064 for (uint32_t m = 1; m <= 1; m++) {
7065 GemmMicrokernelTester()
7066 .mr(1)
7067 .nr(4)
7068 .kr(1)
7069 .sr(1)
7070 .m(m)
7071 .n(n)
7072 .k(k)
7073 .ks(3)
7074 .iterations(1)
7075 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
7076 }
7077 }
7078 }
7079 }
7080
TEST(F32_IGEMM_RELU_1X4__SCALAR,n_gt_4_small_kernel)7081 TEST(F32_IGEMM_RELU_1X4__SCALAR, n_gt_4_small_kernel) {
7082 for (uint32_t n = 5; n < 8; n++) {
7083 for (size_t k = 1; k <= 5; k += 2) {
7084 GemmMicrokernelTester()
7085 .mr(1)
7086 .nr(4)
7087 .kr(1)
7088 .sr(1)
7089 .m(1)
7090 .n(n)
7091 .k(k)
7092 .ks(3)
7093 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
7094 }
7095 }
7096 }
7097
TEST(F32_IGEMM_RELU_1X4__SCALAR,n_div_4_small_kernel)7098 TEST(F32_IGEMM_RELU_1X4__SCALAR, n_div_4_small_kernel) {
7099 for (uint32_t n = 8; n <= 12; n += 4) {
7100 for (size_t k = 1; k <= 5; k += 2) {
7101 GemmMicrokernelTester()
7102 .mr(1)
7103 .nr(4)
7104 .kr(1)
7105 .sr(1)
7106 .m(1)
7107 .n(n)
7108 .k(k)
7109 .ks(3)
7110 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
7111 }
7112 }
7113 }
7114
TEST(F32_IGEMM_RELU_1X4__SCALAR,strided_cm_subtile)7115 TEST(F32_IGEMM_RELU_1X4__SCALAR, strided_cm_subtile) {
7116 for (size_t k = 1; k <= 5; k += 2) {
7117 for (uint32_t n = 1; n <= 4; n++) {
7118 for (uint32_t m = 1; m <= 1; m++) {
7119 GemmMicrokernelTester()
7120 .mr(1)
7121 .nr(4)
7122 .kr(1)
7123 .sr(1)
7124 .m(m)
7125 .n(n)
7126 .k(k)
7127 .cm_stride(7)
7128 .iterations(1)
7129 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
7130 }
7131 }
7132 }
7133 }
7134
TEST(F32_IGEMM_RELU_1X4__SCALAR,a_offset)7135 TEST(F32_IGEMM_RELU_1X4__SCALAR, a_offset) {
7136 for (size_t k = 1; k <= 5; k += 2) {
7137 GemmMicrokernelTester()
7138 .mr(1)
7139 .nr(4)
7140 .kr(1)
7141 .sr(1)
7142 .m(1)
7143 .n(4)
7144 .k(k)
7145 .ks(3)
7146 .a_offset(7)
7147 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
7148 }
7149 }
7150
TEST(F32_IGEMM_RELU_1X4__SCALAR,zero)7151 TEST(F32_IGEMM_RELU_1X4__SCALAR, zero) {
7152 for (size_t k = 1; k <= 5; k += 2) {
7153 for (uint32_t mz = 0; mz < 1; mz++) {
7154 GemmMicrokernelTester()
7155 .mr(1)
7156 .nr(4)
7157 .kr(1)
7158 .sr(1)
7159 .m(1)
7160 .n(4)
7161 .k(k)
7162 .ks(3)
7163 .a_offset(7)
7164 .zero_index(mz)
7165 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
7166 }
7167 }
7168 }
7169
TEST(F32_IGEMM_RELU_1X4__SCALAR,strided_cm)7170 TEST(F32_IGEMM_RELU_1X4__SCALAR, strided_cm) {
7171 GemmMicrokernelTester()
7172 .mr(1)
7173 .nr(4)
7174 .kr(1)
7175 .sr(1)
7176 .m(1)
7177 .n(4)
7178 .k(1)
7179 .cm_stride(7)
7180 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
7181 }
7182
7183
TEST(F32_IGEMM_RELU_4X2__SCALAR,k_eq_1)7184 TEST(F32_IGEMM_RELU_4X2__SCALAR, k_eq_1) {
7185 GemmMicrokernelTester()
7186 .mr(4)
7187 .nr(2)
7188 .kr(1)
7189 .sr(1)
7190 .m(4)
7191 .n(2)
7192 .k(1)
7193 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
7194 }
7195
TEST(F32_IGEMM_RELU_4X2__SCALAR,strided_cn)7196 TEST(F32_IGEMM_RELU_4X2__SCALAR, strided_cn) {
7197 GemmMicrokernelTester()
7198 .mr(4)
7199 .nr(2)
7200 .kr(1)
7201 .sr(1)
7202 .m(4)
7203 .n(2)
7204 .k(1)
7205 .cn_stride(5)
7206 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
7207 }
7208
TEST(F32_IGEMM_RELU_4X2__SCALAR,k_eq_1_subtile)7209 TEST(F32_IGEMM_RELU_4X2__SCALAR, k_eq_1_subtile) {
7210 for (uint32_t n = 1; n <= 2; n++) {
7211 for (uint32_t m = 1; m <= 4; m++) {
7212 GemmMicrokernelTester()
7213 .mr(4)
7214 .nr(2)
7215 .kr(1)
7216 .sr(1)
7217 .m(m)
7218 .n(n)
7219 .k(1)
7220 .iterations(1)
7221 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
7222 }
7223 }
7224 }
7225
TEST(F32_IGEMM_RELU_4X2__SCALAR,k_eq_1_subtile_m)7226 TEST(F32_IGEMM_RELU_4X2__SCALAR, k_eq_1_subtile_m) {
7227 for (uint32_t m = 1; m <= 4; m++) {
7228 GemmMicrokernelTester()
7229 .mr(4)
7230 .nr(2)
7231 .kr(1)
7232 .sr(1)
7233 .m(m)
7234 .n(2)
7235 .k(1)
7236 .iterations(1)
7237 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
7238 }
7239 }
7240
TEST(F32_IGEMM_RELU_4X2__SCALAR,k_eq_1_subtile_n)7241 TEST(F32_IGEMM_RELU_4X2__SCALAR, k_eq_1_subtile_n) {
7242 for (uint32_t n = 1; n <= 2; n++) {
7243 GemmMicrokernelTester()
7244 .mr(4)
7245 .nr(2)
7246 .kr(1)
7247 .sr(1)
7248 .m(4)
7249 .n(n)
7250 .k(1)
7251 .iterations(1)
7252 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
7253 }
7254 }
7255
TEST(F32_IGEMM_RELU_4X2__SCALAR,k_gt_1)7256 TEST(F32_IGEMM_RELU_4X2__SCALAR, k_gt_1) {
7257 for (size_t k = 2; k < 10; k++) {
7258 GemmMicrokernelTester()
7259 .mr(4)
7260 .nr(2)
7261 .kr(1)
7262 .sr(1)
7263 .m(4)
7264 .n(2)
7265 .k(k)
7266 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
7267 }
7268 }
7269
TEST(F32_IGEMM_RELU_4X2__SCALAR,k_gt_1_subtile)7270 TEST(F32_IGEMM_RELU_4X2__SCALAR, k_gt_1_subtile) {
7271 for (size_t k = 2; k < 10; k++) {
7272 for (uint32_t n = 1; n <= 2; n++) {
7273 for (uint32_t m = 1; m <= 4; m++) {
7274 GemmMicrokernelTester()
7275 .mr(4)
7276 .nr(2)
7277 .kr(1)
7278 .sr(1)
7279 .m(m)
7280 .n(n)
7281 .k(k)
7282 .iterations(1)
7283 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
7284 }
7285 }
7286 }
7287 }
7288
TEST(F32_IGEMM_RELU_4X2__SCALAR,n_gt_2)7289 TEST(F32_IGEMM_RELU_4X2__SCALAR, n_gt_2) {
7290 for (uint32_t n = 3; n < 4; n++) {
7291 for (size_t k = 1; k <= 5; k += 2) {
7292 GemmMicrokernelTester()
7293 .mr(4)
7294 .nr(2)
7295 .kr(1)
7296 .sr(1)
7297 .m(4)
7298 .n(n)
7299 .k(k)
7300 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
7301 }
7302 }
7303 }
7304
TEST(F32_IGEMM_RELU_4X2__SCALAR,n_gt_2_strided_cn)7305 TEST(F32_IGEMM_RELU_4X2__SCALAR, n_gt_2_strided_cn) {
7306 for (uint32_t n = 3; n < 4; n++) {
7307 for (size_t k = 1; k <= 5; k += 2) {
7308 GemmMicrokernelTester()
7309 .mr(4)
7310 .nr(2)
7311 .kr(1)
7312 .sr(1)
7313 .m(4)
7314 .n(n)
7315 .k(k)
7316 .cn_stride(5)
7317 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
7318 }
7319 }
7320 }
7321
TEST(F32_IGEMM_RELU_4X2__SCALAR,n_gt_2_subtile)7322 TEST(F32_IGEMM_RELU_4X2__SCALAR, n_gt_2_subtile) {
7323 for (uint32_t n = 3; n < 4; n++) {
7324 for (size_t k = 1; k <= 5; k += 2) {
7325 for (uint32_t m = 1; m <= 4; m++) {
7326 GemmMicrokernelTester()
7327 .mr(4)
7328 .nr(2)
7329 .kr(1)
7330 .sr(1)
7331 .m(m)
7332 .n(n)
7333 .k(k)
7334 .iterations(1)
7335 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
7336 }
7337 }
7338 }
7339 }
7340
TEST(F32_IGEMM_RELU_4X2__SCALAR,n_div_2)7341 TEST(F32_IGEMM_RELU_4X2__SCALAR, n_div_2) {
7342 for (uint32_t n = 4; n <= 6; n += 2) {
7343 for (size_t k = 1; k <= 5; k += 2) {
7344 GemmMicrokernelTester()
7345 .mr(4)
7346 .nr(2)
7347 .kr(1)
7348 .sr(1)
7349 .m(4)
7350 .n(n)
7351 .k(k)
7352 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
7353 }
7354 }
7355 }
7356
TEST(F32_IGEMM_RELU_4X2__SCALAR,n_div_2_strided_cn)7357 TEST(F32_IGEMM_RELU_4X2__SCALAR, n_div_2_strided_cn) {
7358 for (uint32_t n = 4; n <= 6; n += 2) {
7359 for (size_t k = 1; k <= 5; k += 2) {
7360 GemmMicrokernelTester()
7361 .mr(4)
7362 .nr(2)
7363 .kr(1)
7364 .sr(1)
7365 .m(4)
7366 .n(n)
7367 .k(k)
7368 .cn_stride(5)
7369 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
7370 }
7371 }
7372 }
7373
TEST(F32_IGEMM_RELU_4X2__SCALAR,n_div_2_subtile)7374 TEST(F32_IGEMM_RELU_4X2__SCALAR, n_div_2_subtile) {
7375 for (uint32_t n = 4; n <= 6; n += 2) {
7376 for (size_t k = 1; k <= 5; k += 2) {
7377 for (uint32_t m = 1; m <= 4; m++) {
7378 GemmMicrokernelTester()
7379 .mr(4)
7380 .nr(2)
7381 .kr(1)
7382 .sr(1)
7383 .m(m)
7384 .n(n)
7385 .k(k)
7386 .iterations(1)
7387 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
7388 }
7389 }
7390 }
7391 }
7392
TEST(F32_IGEMM_RELU_4X2__SCALAR,small_kernel)7393 TEST(F32_IGEMM_RELU_4X2__SCALAR, small_kernel) {
7394 for (size_t k = 1; k <= 5; k += 2) {
7395 GemmMicrokernelTester()
7396 .mr(4)
7397 .nr(2)
7398 .kr(1)
7399 .sr(1)
7400 .m(4)
7401 .n(2)
7402 .k(k)
7403 .ks(3)
7404 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
7405 }
7406 }
7407
TEST(F32_IGEMM_RELU_4X2__SCALAR,small_kernel_subtile)7408 TEST(F32_IGEMM_RELU_4X2__SCALAR, small_kernel_subtile) {
7409 for (size_t k = 1; k <= 5; k += 2) {
7410 for (uint32_t n = 1; n <= 2; n++) {
7411 for (uint32_t m = 1; m <= 4; m++) {
7412 GemmMicrokernelTester()
7413 .mr(4)
7414 .nr(2)
7415 .kr(1)
7416 .sr(1)
7417 .m(m)
7418 .n(n)
7419 .k(k)
7420 .ks(3)
7421 .iterations(1)
7422 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
7423 }
7424 }
7425 }
7426 }
7427
TEST(F32_IGEMM_RELU_4X2__SCALAR,n_gt_2_small_kernel)7428 TEST(F32_IGEMM_RELU_4X2__SCALAR, n_gt_2_small_kernel) {
7429 for (uint32_t n = 3; n < 4; n++) {
7430 for (size_t k = 1; k <= 5; k += 2) {
7431 GemmMicrokernelTester()
7432 .mr(4)
7433 .nr(2)
7434 .kr(1)
7435 .sr(1)
7436 .m(4)
7437 .n(n)
7438 .k(k)
7439 .ks(3)
7440 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
7441 }
7442 }
7443 }
7444
TEST(F32_IGEMM_RELU_4X2__SCALAR,n_div_2_small_kernel)7445 TEST(F32_IGEMM_RELU_4X2__SCALAR, n_div_2_small_kernel) {
7446 for (uint32_t n = 4; n <= 6; n += 2) {
7447 for (size_t k = 1; k <= 5; k += 2) {
7448 GemmMicrokernelTester()
7449 .mr(4)
7450 .nr(2)
7451 .kr(1)
7452 .sr(1)
7453 .m(4)
7454 .n(n)
7455 .k(k)
7456 .ks(3)
7457 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
7458 }
7459 }
7460 }
7461
TEST(F32_IGEMM_RELU_4X2__SCALAR,strided_cm_subtile)7462 TEST(F32_IGEMM_RELU_4X2__SCALAR, strided_cm_subtile) {
7463 for (size_t k = 1; k <= 5; k += 2) {
7464 for (uint32_t n = 1; n <= 2; n++) {
7465 for (uint32_t m = 1; m <= 4; m++) {
7466 GemmMicrokernelTester()
7467 .mr(4)
7468 .nr(2)
7469 .kr(1)
7470 .sr(1)
7471 .m(m)
7472 .n(n)
7473 .k(k)
7474 .cm_stride(5)
7475 .iterations(1)
7476 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
7477 }
7478 }
7479 }
7480 }
7481
TEST(F32_IGEMM_RELU_4X2__SCALAR,a_offset)7482 TEST(F32_IGEMM_RELU_4X2__SCALAR, a_offset) {
7483 for (size_t k = 1; k <= 5; k += 2) {
7484 GemmMicrokernelTester()
7485 .mr(4)
7486 .nr(2)
7487 .kr(1)
7488 .sr(1)
7489 .m(4)
7490 .n(2)
7491 .k(k)
7492 .ks(3)
7493 .a_offset(23)
7494 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
7495 }
7496 }
7497
TEST(F32_IGEMM_RELU_4X2__SCALAR,zero)7498 TEST(F32_IGEMM_RELU_4X2__SCALAR, zero) {
7499 for (size_t k = 1; k <= 5; k += 2) {
7500 for (uint32_t mz = 0; mz < 4; mz++) {
7501 GemmMicrokernelTester()
7502 .mr(4)
7503 .nr(2)
7504 .kr(1)
7505 .sr(1)
7506 .m(4)
7507 .n(2)
7508 .k(k)
7509 .ks(3)
7510 .a_offset(23)
7511 .zero_index(mz)
7512 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
7513 }
7514 }
7515 }
7516
TEST(F32_IGEMM_RELU_4X2__SCALAR,strided_cm)7517 TEST(F32_IGEMM_RELU_4X2__SCALAR, strided_cm) {
7518 GemmMicrokernelTester()
7519 .mr(4)
7520 .nr(2)
7521 .kr(1)
7522 .sr(1)
7523 .m(4)
7524 .n(2)
7525 .k(1)
7526 .cm_stride(5)
7527 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
7528 }
7529
7530
TEST(F32_IGEMM_RELU_4X4__SCALAR,k_eq_1)7531 TEST(F32_IGEMM_RELU_4X4__SCALAR, k_eq_1) {
7532 GemmMicrokernelTester()
7533 .mr(4)
7534 .nr(4)
7535 .kr(1)
7536 .sr(1)
7537 .m(4)
7538 .n(4)
7539 .k(1)
7540 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
7541 }
7542
TEST(F32_IGEMM_RELU_4X4__SCALAR,strided_cn)7543 TEST(F32_IGEMM_RELU_4X4__SCALAR, strided_cn) {
7544 GemmMicrokernelTester()
7545 .mr(4)
7546 .nr(4)
7547 .kr(1)
7548 .sr(1)
7549 .m(4)
7550 .n(4)
7551 .k(1)
7552 .cn_stride(7)
7553 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
7554 }
7555
TEST(F32_IGEMM_RELU_4X4__SCALAR,k_eq_1_subtile)7556 TEST(F32_IGEMM_RELU_4X4__SCALAR, k_eq_1_subtile) {
7557 for (uint32_t n = 1; n <= 4; n++) {
7558 for (uint32_t m = 1; m <= 4; m++) {
7559 GemmMicrokernelTester()
7560 .mr(4)
7561 .nr(4)
7562 .kr(1)
7563 .sr(1)
7564 .m(m)
7565 .n(n)
7566 .k(1)
7567 .iterations(1)
7568 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
7569 }
7570 }
7571 }
7572
TEST(F32_IGEMM_RELU_4X4__SCALAR,k_eq_1_subtile_m)7573 TEST(F32_IGEMM_RELU_4X4__SCALAR, k_eq_1_subtile_m) {
7574 for (uint32_t m = 1; m <= 4; m++) {
7575 GemmMicrokernelTester()
7576 .mr(4)
7577 .nr(4)
7578 .kr(1)
7579 .sr(1)
7580 .m(m)
7581 .n(4)
7582 .k(1)
7583 .iterations(1)
7584 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
7585 }
7586 }
7587
TEST(F32_IGEMM_RELU_4X4__SCALAR,k_eq_1_subtile_n)7588 TEST(F32_IGEMM_RELU_4X4__SCALAR, k_eq_1_subtile_n) {
7589 for (uint32_t n = 1; n <= 4; n++) {
7590 GemmMicrokernelTester()
7591 .mr(4)
7592 .nr(4)
7593 .kr(1)
7594 .sr(1)
7595 .m(4)
7596 .n(n)
7597 .k(1)
7598 .iterations(1)
7599 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
7600 }
7601 }
7602
TEST(F32_IGEMM_RELU_4X4__SCALAR,k_gt_1)7603 TEST(F32_IGEMM_RELU_4X4__SCALAR, k_gt_1) {
7604 for (size_t k = 2; k < 10; k++) {
7605 GemmMicrokernelTester()
7606 .mr(4)
7607 .nr(4)
7608 .kr(1)
7609 .sr(1)
7610 .m(4)
7611 .n(4)
7612 .k(k)
7613 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
7614 }
7615 }
7616
TEST(F32_IGEMM_RELU_4X4__SCALAR,k_gt_1_subtile)7617 TEST(F32_IGEMM_RELU_4X4__SCALAR, k_gt_1_subtile) {
7618 for (size_t k = 2; k < 10; k++) {
7619 for (uint32_t n = 1; n <= 4; n++) {
7620 for (uint32_t m = 1; m <= 4; m++) {
7621 GemmMicrokernelTester()
7622 .mr(4)
7623 .nr(4)
7624 .kr(1)
7625 .sr(1)
7626 .m(m)
7627 .n(n)
7628 .k(k)
7629 .iterations(1)
7630 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
7631 }
7632 }
7633 }
7634 }
7635
TEST(F32_IGEMM_RELU_4X4__SCALAR,n_gt_4)7636 TEST(F32_IGEMM_RELU_4X4__SCALAR, n_gt_4) {
7637 for (uint32_t n = 5; n < 8; n++) {
7638 for (size_t k = 1; k <= 5; k += 2) {
7639 GemmMicrokernelTester()
7640 .mr(4)
7641 .nr(4)
7642 .kr(1)
7643 .sr(1)
7644 .m(4)
7645 .n(n)
7646 .k(k)
7647 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
7648 }
7649 }
7650 }
7651
TEST(F32_IGEMM_RELU_4X4__SCALAR,n_gt_4_strided_cn)7652 TEST(F32_IGEMM_RELU_4X4__SCALAR, n_gt_4_strided_cn) {
7653 for (uint32_t n = 5; n < 8; n++) {
7654 for (size_t k = 1; k <= 5; k += 2) {
7655 GemmMicrokernelTester()
7656 .mr(4)
7657 .nr(4)
7658 .kr(1)
7659 .sr(1)
7660 .m(4)
7661 .n(n)
7662 .k(k)
7663 .cn_stride(7)
7664 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
7665 }
7666 }
7667 }
7668
TEST(F32_IGEMM_RELU_4X4__SCALAR,n_gt_4_subtile)7669 TEST(F32_IGEMM_RELU_4X4__SCALAR, n_gt_4_subtile) {
7670 for (uint32_t n = 5; n < 8; n++) {
7671 for (size_t k = 1; k <= 5; k += 2) {
7672 for (uint32_t m = 1; m <= 4; m++) {
7673 GemmMicrokernelTester()
7674 .mr(4)
7675 .nr(4)
7676 .kr(1)
7677 .sr(1)
7678 .m(m)
7679 .n(n)
7680 .k(k)
7681 .iterations(1)
7682 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
7683 }
7684 }
7685 }
7686 }
7687
TEST(F32_IGEMM_RELU_4X4__SCALAR,n_div_4)7688 TEST(F32_IGEMM_RELU_4X4__SCALAR, n_div_4) {
7689 for (uint32_t n = 8; n <= 12; n += 4) {
7690 for (size_t k = 1; k <= 5; k += 2) {
7691 GemmMicrokernelTester()
7692 .mr(4)
7693 .nr(4)
7694 .kr(1)
7695 .sr(1)
7696 .m(4)
7697 .n(n)
7698 .k(k)
7699 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
7700 }
7701 }
7702 }
7703
TEST(F32_IGEMM_RELU_4X4__SCALAR,n_div_4_strided_cn)7704 TEST(F32_IGEMM_RELU_4X4__SCALAR, n_div_4_strided_cn) {
7705 for (uint32_t n = 8; n <= 12; n += 4) {
7706 for (size_t k = 1; k <= 5; k += 2) {
7707 GemmMicrokernelTester()
7708 .mr(4)
7709 .nr(4)
7710 .kr(1)
7711 .sr(1)
7712 .m(4)
7713 .n(n)
7714 .k(k)
7715 .cn_stride(7)
7716 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
7717 }
7718 }
7719 }
7720
TEST(F32_IGEMM_RELU_4X4__SCALAR,n_div_4_subtile)7721 TEST(F32_IGEMM_RELU_4X4__SCALAR, n_div_4_subtile) {
7722 for (uint32_t n = 8; n <= 12; n += 4) {
7723 for (size_t k = 1; k <= 5; k += 2) {
7724 for (uint32_t m = 1; m <= 4; m++) {
7725 GemmMicrokernelTester()
7726 .mr(4)
7727 .nr(4)
7728 .kr(1)
7729 .sr(1)
7730 .m(m)
7731 .n(n)
7732 .k(k)
7733 .iterations(1)
7734 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
7735 }
7736 }
7737 }
7738 }
7739
TEST(F32_IGEMM_RELU_4X4__SCALAR,small_kernel)7740 TEST(F32_IGEMM_RELU_4X4__SCALAR, small_kernel) {
7741 for (size_t k = 1; k <= 5; k += 2) {
7742 GemmMicrokernelTester()
7743 .mr(4)
7744 .nr(4)
7745 .kr(1)
7746 .sr(1)
7747 .m(4)
7748 .n(4)
7749 .k(k)
7750 .ks(3)
7751 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
7752 }
7753 }
7754
TEST(F32_IGEMM_RELU_4X4__SCALAR,small_kernel_subtile)7755 TEST(F32_IGEMM_RELU_4X4__SCALAR, small_kernel_subtile) {
7756 for (size_t k = 1; k <= 5; k += 2) {
7757 for (uint32_t n = 1; n <= 4; n++) {
7758 for (uint32_t m = 1; m <= 4; m++) {
7759 GemmMicrokernelTester()
7760 .mr(4)
7761 .nr(4)
7762 .kr(1)
7763 .sr(1)
7764 .m(m)
7765 .n(n)
7766 .k(k)
7767 .ks(3)
7768 .iterations(1)
7769 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
7770 }
7771 }
7772 }
7773 }
7774
TEST(F32_IGEMM_RELU_4X4__SCALAR,n_gt_4_small_kernel)7775 TEST(F32_IGEMM_RELU_4X4__SCALAR, n_gt_4_small_kernel) {
7776 for (uint32_t n = 5; n < 8; n++) {
7777 for (size_t k = 1; k <= 5; k += 2) {
7778 GemmMicrokernelTester()
7779 .mr(4)
7780 .nr(4)
7781 .kr(1)
7782 .sr(1)
7783 .m(4)
7784 .n(n)
7785 .k(k)
7786 .ks(3)
7787 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
7788 }
7789 }
7790 }
7791
TEST(F32_IGEMM_RELU_4X4__SCALAR,n_div_4_small_kernel)7792 TEST(F32_IGEMM_RELU_4X4__SCALAR, n_div_4_small_kernel) {
7793 for (uint32_t n = 8; n <= 12; n += 4) {
7794 for (size_t k = 1; k <= 5; k += 2) {
7795 GemmMicrokernelTester()
7796 .mr(4)
7797 .nr(4)
7798 .kr(1)
7799 .sr(1)
7800 .m(4)
7801 .n(n)
7802 .k(k)
7803 .ks(3)
7804 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
7805 }
7806 }
7807 }
7808
TEST(F32_IGEMM_RELU_4X4__SCALAR,strided_cm_subtile)7809 TEST(F32_IGEMM_RELU_4X4__SCALAR, strided_cm_subtile) {
7810 for (size_t k = 1; k <= 5; k += 2) {
7811 for (uint32_t n = 1; n <= 4; n++) {
7812 for (uint32_t m = 1; m <= 4; m++) {
7813 GemmMicrokernelTester()
7814 .mr(4)
7815 .nr(4)
7816 .kr(1)
7817 .sr(1)
7818 .m(m)
7819 .n(n)
7820 .k(k)
7821 .cm_stride(7)
7822 .iterations(1)
7823 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
7824 }
7825 }
7826 }
7827 }
7828
TEST(F32_IGEMM_RELU_4X4__SCALAR,a_offset)7829 TEST(F32_IGEMM_RELU_4X4__SCALAR, a_offset) {
7830 for (size_t k = 1; k <= 5; k += 2) {
7831 GemmMicrokernelTester()
7832 .mr(4)
7833 .nr(4)
7834 .kr(1)
7835 .sr(1)
7836 .m(4)
7837 .n(4)
7838 .k(k)
7839 .ks(3)
7840 .a_offset(23)
7841 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
7842 }
7843 }
7844
TEST(F32_IGEMM_RELU_4X4__SCALAR,zero)7845 TEST(F32_IGEMM_RELU_4X4__SCALAR, zero) {
7846 for (size_t k = 1; k <= 5; k += 2) {
7847 for (uint32_t mz = 0; mz < 4; mz++) {
7848 GemmMicrokernelTester()
7849 .mr(4)
7850 .nr(4)
7851 .kr(1)
7852 .sr(1)
7853 .m(4)
7854 .n(4)
7855 .k(k)
7856 .ks(3)
7857 .a_offset(23)
7858 .zero_index(mz)
7859 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
7860 }
7861 }
7862 }
7863
TEST(F32_IGEMM_RELU_4X4__SCALAR,strided_cm)7864 TEST(F32_IGEMM_RELU_4X4__SCALAR, strided_cm) {
7865 GemmMicrokernelTester()
7866 .mr(4)
7867 .nr(4)
7868 .kr(1)
7869 .sr(1)
7870 .m(4)
7871 .n(4)
7872 .k(1)
7873 .cm_stride(7)
7874 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
7875 }
7876