1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 // Specification: test/f32-vsigmoid.yaml
8 // Generator: tools/generate-vunary-test.py
9
10
11 #include <gtest/gtest.h>
12
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15
16 #include <xnnpack/vunary.h>
17 #include "vunary-microkernel-tester.h"
18
19
20 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X4,batch_eq_4)21 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X4, batch_eq_4) {
22 TEST_REQUIRES_ARM_NEON;
23 VUnaryMicrokernelTester()
24 .batch_size(4)
25 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
26 }
27
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X4,batch_div_4)28 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X4, batch_div_4) {
29 TEST_REQUIRES_ARM_NEON;
30 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
31 VUnaryMicrokernelTester()
32 .batch_size(batch_size)
33 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
34 }
35 }
36
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X4,batch_lt_4)37 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X4, batch_lt_4) {
38 TEST_REQUIRES_ARM_NEON;
39 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
40 VUnaryMicrokernelTester()
41 .batch_size(batch_size)
42 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
43 }
44 }
45
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X4,batch_gt_4)46 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X4, batch_gt_4) {
47 TEST_REQUIRES_ARM_NEON;
48 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
49 VUnaryMicrokernelTester()
50 .batch_size(batch_size)
51 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
52 }
53 }
54
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X4,inplace)55 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X4, inplace) {
56 TEST_REQUIRES_ARM_NEON;
57 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
58 VUnaryMicrokernelTester()
59 .batch_size(batch_size)
60 .inplace(true)
61 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
62 }
63 }
64 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
65
66
67 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X8,batch_eq_8)68 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X8, batch_eq_8) {
69 TEST_REQUIRES_ARM_NEON;
70 VUnaryMicrokernelTester()
71 .batch_size(8)
72 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
73 }
74
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X8,batch_div_8)75 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X8, batch_div_8) {
76 TEST_REQUIRES_ARM_NEON;
77 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
78 VUnaryMicrokernelTester()
79 .batch_size(batch_size)
80 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
81 }
82 }
83
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X8,batch_lt_8)84 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X8, batch_lt_8) {
85 TEST_REQUIRES_ARM_NEON;
86 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
87 VUnaryMicrokernelTester()
88 .batch_size(batch_size)
89 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
90 }
91 }
92
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X8,batch_gt_8)93 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X8, batch_gt_8) {
94 TEST_REQUIRES_ARM_NEON;
95 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
96 VUnaryMicrokernelTester()
97 .batch_size(batch_size)
98 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
99 }
100 }
101
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X8,inplace)102 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X8, inplace) {
103 TEST_REQUIRES_ARM_NEON;
104 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
105 VUnaryMicrokernelTester()
106 .batch_size(batch_size)
107 .inplace(true)
108 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
109 }
110 }
111 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
112
113
114 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X12,batch_eq_12)115 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X12, batch_eq_12) {
116 TEST_REQUIRES_ARM_NEON;
117 VUnaryMicrokernelTester()
118 .batch_size(12)
119 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x12, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
120 }
121
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X12,batch_div_12)122 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X12, batch_div_12) {
123 TEST_REQUIRES_ARM_NEON;
124 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
125 VUnaryMicrokernelTester()
126 .batch_size(batch_size)
127 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x12, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
128 }
129 }
130
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X12,batch_lt_12)131 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X12, batch_lt_12) {
132 TEST_REQUIRES_ARM_NEON;
133 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
134 VUnaryMicrokernelTester()
135 .batch_size(batch_size)
136 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x12, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
137 }
138 }
139
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X12,batch_gt_12)140 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X12, batch_gt_12) {
141 TEST_REQUIRES_ARM_NEON;
142 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
143 VUnaryMicrokernelTester()
144 .batch_size(batch_size)
145 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x12, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
146 }
147 }
148
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X12,inplace)149 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X12, inplace) {
150 TEST_REQUIRES_ARM_NEON;
151 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
152 VUnaryMicrokernelTester()
153 .batch_size(batch_size)
154 .inplace(true)
155 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x12, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
156 }
157 }
158 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
159
160
161 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X16,batch_eq_16)162 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X16, batch_eq_16) {
163 TEST_REQUIRES_ARM_NEON;
164 VUnaryMicrokernelTester()
165 .batch_size(16)
166 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x16, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
167 }
168
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X16,batch_div_16)169 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X16, batch_div_16) {
170 TEST_REQUIRES_ARM_NEON;
171 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
172 VUnaryMicrokernelTester()
173 .batch_size(batch_size)
174 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x16, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
175 }
176 }
177
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X16,batch_lt_16)178 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X16, batch_lt_16) {
179 TEST_REQUIRES_ARM_NEON;
180 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
181 VUnaryMicrokernelTester()
182 .batch_size(batch_size)
183 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x16, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
184 }
185 }
186
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X16,batch_gt_16)187 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X16, batch_gt_16) {
188 TEST_REQUIRES_ARM_NEON;
189 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
190 VUnaryMicrokernelTester()
191 .batch_size(batch_size)
192 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x16, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
193 }
194 }
195
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X16,inplace)196 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X16, inplace) {
197 TEST_REQUIRES_ARM_NEON;
198 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
199 VUnaryMicrokernelTester()
200 .batch_size(batch_size)
201 .inplace(true)
202 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x16, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
203 }
204 }
205 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
206
207
208 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X20,batch_eq_20)209 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X20, batch_eq_20) {
210 TEST_REQUIRES_ARM_NEON;
211 VUnaryMicrokernelTester()
212 .batch_size(20)
213 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
214 }
215
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X20,batch_div_20)216 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X20, batch_div_20) {
217 TEST_REQUIRES_ARM_NEON;
218 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
219 VUnaryMicrokernelTester()
220 .batch_size(batch_size)
221 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
222 }
223 }
224
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X20,batch_lt_20)225 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X20, batch_lt_20) {
226 TEST_REQUIRES_ARM_NEON;
227 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
228 VUnaryMicrokernelTester()
229 .batch_size(batch_size)
230 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
231 }
232 }
233
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X20,batch_gt_20)234 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X20, batch_gt_20) {
235 TEST_REQUIRES_ARM_NEON;
236 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
237 VUnaryMicrokernelTester()
238 .batch_size(batch_size)
239 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
240 }
241 }
242
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X20,inplace)243 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X20, inplace) {
244 TEST_REQUIRES_ARM_NEON;
245 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
246 VUnaryMicrokernelTester()
247 .batch_size(batch_size)
248 .inplace(true)
249 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
250 }
251 }
252 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
253
254
255 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X24,batch_eq_24)256 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X24, batch_eq_24) {
257 TEST_REQUIRES_ARM_NEON;
258 VUnaryMicrokernelTester()
259 .batch_size(24)
260 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
261 }
262
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X24,batch_div_24)263 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X24, batch_div_24) {
264 TEST_REQUIRES_ARM_NEON;
265 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
266 VUnaryMicrokernelTester()
267 .batch_size(batch_size)
268 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
269 }
270 }
271
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X24,batch_lt_24)272 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X24, batch_lt_24) {
273 TEST_REQUIRES_ARM_NEON;
274 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
275 VUnaryMicrokernelTester()
276 .batch_size(batch_size)
277 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
278 }
279 }
280
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X24,batch_gt_24)281 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X24, batch_gt_24) {
282 TEST_REQUIRES_ARM_NEON;
283 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
284 VUnaryMicrokernelTester()
285 .batch_size(batch_size)
286 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
287 }
288 }
289
TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X24,inplace)290 TEST(F32_VSIGMOID__NEON_RR2_LUT64_P2_NR2RECPS_X24, inplace) {
291 TEST_REQUIRES_ARM_NEON;
292 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
293 VUnaryMicrokernelTester()
294 .batch_size(batch_size)
295 .inplace(true)
296 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24, xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params);
297 }
298 }
299 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
300
301
302 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X4,batch_eq_4)303 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X4, batch_eq_4) {
304 TEST_REQUIRES_ARM_NEON;
305 VUnaryMicrokernelTester()
306 .batch_size(4)
307 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
308 }
309
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X4,batch_div_4)310 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X4, batch_div_4) {
311 TEST_REQUIRES_ARM_NEON;
312 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
313 VUnaryMicrokernelTester()
314 .batch_size(batch_size)
315 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
316 }
317 }
318
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X4,batch_lt_4)319 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X4, batch_lt_4) {
320 TEST_REQUIRES_ARM_NEON;
321 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
322 VUnaryMicrokernelTester()
323 .batch_size(batch_size)
324 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
325 }
326 }
327
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X4,batch_gt_4)328 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X4, batch_gt_4) {
329 TEST_REQUIRES_ARM_NEON;
330 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
331 VUnaryMicrokernelTester()
332 .batch_size(batch_size)
333 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
334 }
335 }
336
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X4,inplace)337 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X4, inplace) {
338 TEST_REQUIRES_ARM_NEON;
339 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
340 VUnaryMicrokernelTester()
341 .batch_size(batch_size)
342 .inplace(true)
343 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
344 }
345 }
346 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
347
348
349 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X8,batch_eq_8)350 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X8, batch_eq_8) {
351 TEST_REQUIRES_ARM_NEON;
352 VUnaryMicrokernelTester()
353 .batch_size(8)
354 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
355 }
356
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X8,batch_div_8)357 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X8, batch_div_8) {
358 TEST_REQUIRES_ARM_NEON;
359 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
360 VUnaryMicrokernelTester()
361 .batch_size(batch_size)
362 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
363 }
364 }
365
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X8,batch_lt_8)366 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X8, batch_lt_8) {
367 TEST_REQUIRES_ARM_NEON;
368 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
369 VUnaryMicrokernelTester()
370 .batch_size(batch_size)
371 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
372 }
373 }
374
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X8,batch_gt_8)375 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X8, batch_gt_8) {
376 TEST_REQUIRES_ARM_NEON;
377 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
378 VUnaryMicrokernelTester()
379 .batch_size(batch_size)
380 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
381 }
382 }
383
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X8,inplace)384 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X8, inplace) {
385 TEST_REQUIRES_ARM_NEON;
386 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
387 VUnaryMicrokernelTester()
388 .batch_size(batch_size)
389 .inplace(true)
390 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
391 }
392 }
393 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
394
395
396 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X12,batch_eq_12)397 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X12, batch_eq_12) {
398 TEST_REQUIRES_ARM_NEON;
399 VUnaryMicrokernelTester()
400 .batch_size(12)
401 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
402 }
403
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X12,batch_div_12)404 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X12, batch_div_12) {
405 TEST_REQUIRES_ARM_NEON;
406 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
407 VUnaryMicrokernelTester()
408 .batch_size(batch_size)
409 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
410 }
411 }
412
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X12,batch_lt_12)413 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X12, batch_lt_12) {
414 TEST_REQUIRES_ARM_NEON;
415 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
416 VUnaryMicrokernelTester()
417 .batch_size(batch_size)
418 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
419 }
420 }
421
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X12,batch_gt_12)422 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X12, batch_gt_12) {
423 TEST_REQUIRES_ARM_NEON;
424 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
425 VUnaryMicrokernelTester()
426 .batch_size(batch_size)
427 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
428 }
429 }
430
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X12,inplace)431 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X12, inplace) {
432 TEST_REQUIRES_ARM_NEON;
433 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
434 VUnaryMicrokernelTester()
435 .batch_size(batch_size)
436 .inplace(true)
437 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
438 }
439 }
440 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
441
442
443 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X16,batch_eq_16)444 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X16, batch_eq_16) {
445 TEST_REQUIRES_ARM_NEON;
446 VUnaryMicrokernelTester()
447 .batch_size(16)
448 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
449 }
450
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X16,batch_div_16)451 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X16, batch_div_16) {
452 TEST_REQUIRES_ARM_NEON;
453 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
454 VUnaryMicrokernelTester()
455 .batch_size(batch_size)
456 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
457 }
458 }
459
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X16,batch_lt_16)460 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X16, batch_lt_16) {
461 TEST_REQUIRES_ARM_NEON;
462 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
463 VUnaryMicrokernelTester()
464 .batch_size(batch_size)
465 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
466 }
467 }
468
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X16,batch_gt_16)469 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X16, batch_gt_16) {
470 TEST_REQUIRES_ARM_NEON;
471 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
472 VUnaryMicrokernelTester()
473 .batch_size(batch_size)
474 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
475 }
476 }
477
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X16,inplace)478 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X16, inplace) {
479 TEST_REQUIRES_ARM_NEON;
480 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
481 VUnaryMicrokernelTester()
482 .batch_size(batch_size)
483 .inplace(true)
484 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
485 }
486 }
487 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
488
489
490 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X20,batch_eq_20)491 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X20, batch_eq_20) {
492 TEST_REQUIRES_ARM_NEON;
493 VUnaryMicrokernelTester()
494 .batch_size(20)
495 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
496 }
497
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X20,batch_div_20)498 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X20, batch_div_20) {
499 TEST_REQUIRES_ARM_NEON;
500 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
501 VUnaryMicrokernelTester()
502 .batch_size(batch_size)
503 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
504 }
505 }
506
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X20,batch_lt_20)507 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X20, batch_lt_20) {
508 TEST_REQUIRES_ARM_NEON;
509 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
510 VUnaryMicrokernelTester()
511 .batch_size(batch_size)
512 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
513 }
514 }
515
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X20,batch_gt_20)516 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X20, batch_gt_20) {
517 TEST_REQUIRES_ARM_NEON;
518 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
519 VUnaryMicrokernelTester()
520 .batch_size(batch_size)
521 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
522 }
523 }
524
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X20,inplace)525 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X20, inplace) {
526 TEST_REQUIRES_ARM_NEON;
527 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
528 VUnaryMicrokernelTester()
529 .batch_size(batch_size)
530 .inplace(true)
531 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
532 }
533 }
534 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
535
536
537 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X24,batch_eq_24)538 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X24, batch_eq_24) {
539 TEST_REQUIRES_ARM_NEON;
540 VUnaryMicrokernelTester()
541 .batch_size(24)
542 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
543 }
544
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X24,batch_div_24)545 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X24, batch_div_24) {
546 TEST_REQUIRES_ARM_NEON;
547 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
548 VUnaryMicrokernelTester()
549 .batch_size(batch_size)
550 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
551 }
552 }
553
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X24,batch_lt_24)554 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X24, batch_lt_24) {
555 TEST_REQUIRES_ARM_NEON;
556 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
557 VUnaryMicrokernelTester()
558 .batch_size(batch_size)
559 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
560 }
561 }
562
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X24,batch_gt_24)563 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X24, batch_gt_24) {
564 TEST_REQUIRES_ARM_NEON;
565 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
566 VUnaryMicrokernelTester()
567 .batch_size(batch_size)
568 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
569 }
570 }
571
TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X24,inplace)572 TEST(F32_VSIGMOID__NEON_RR2_LUT2048_P1_NR2RECPS_X24, inplace) {
573 TEST_REQUIRES_ARM_NEON;
574 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
575 VUnaryMicrokernelTester()
576 .batch_size(batch_size)
577 .inplace(true)
578 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24, xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params);
579 }
580 }
581 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
582
583
584 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X4,batch_eq_4)585 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X4, batch_eq_4) {
586 TEST_REQUIRES_ARM_NEON;
587 VUnaryMicrokernelTester()
588 .batch_size(4)
589 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x4, xnn_init_f32_sigmoid_neon_rr2_p5_params);
590 }
591
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X4,batch_div_4)592 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X4, batch_div_4) {
593 TEST_REQUIRES_ARM_NEON;
594 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
595 VUnaryMicrokernelTester()
596 .batch_size(batch_size)
597 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x4, xnn_init_f32_sigmoid_neon_rr2_p5_params);
598 }
599 }
600
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X4,batch_lt_4)601 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X4, batch_lt_4) {
602 TEST_REQUIRES_ARM_NEON;
603 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
604 VUnaryMicrokernelTester()
605 .batch_size(batch_size)
606 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x4, xnn_init_f32_sigmoid_neon_rr2_p5_params);
607 }
608 }
609
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X4,batch_gt_4)610 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X4, batch_gt_4) {
611 TEST_REQUIRES_ARM_NEON;
612 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
613 VUnaryMicrokernelTester()
614 .batch_size(batch_size)
615 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x4, xnn_init_f32_sigmoid_neon_rr2_p5_params);
616 }
617 }
618
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X4,inplace)619 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X4, inplace) {
620 TEST_REQUIRES_ARM_NEON;
621 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
622 VUnaryMicrokernelTester()
623 .batch_size(batch_size)
624 .inplace(true)
625 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x4, xnn_init_f32_sigmoid_neon_rr2_p5_params);
626 }
627 }
628 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
629
630
631 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X8,batch_eq_8)632 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X8, batch_eq_8) {
633 TEST_REQUIRES_ARM_NEON;
634 VUnaryMicrokernelTester()
635 .batch_size(8)
636 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x8, xnn_init_f32_sigmoid_neon_rr2_p5_params);
637 }
638
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X8,batch_div_8)639 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X8, batch_div_8) {
640 TEST_REQUIRES_ARM_NEON;
641 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
642 VUnaryMicrokernelTester()
643 .batch_size(batch_size)
644 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x8, xnn_init_f32_sigmoid_neon_rr2_p5_params);
645 }
646 }
647
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X8,batch_lt_8)648 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X8, batch_lt_8) {
649 TEST_REQUIRES_ARM_NEON;
650 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
651 VUnaryMicrokernelTester()
652 .batch_size(batch_size)
653 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x8, xnn_init_f32_sigmoid_neon_rr2_p5_params);
654 }
655 }
656
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X8,batch_gt_8)657 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X8, batch_gt_8) {
658 TEST_REQUIRES_ARM_NEON;
659 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
660 VUnaryMicrokernelTester()
661 .batch_size(batch_size)
662 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x8, xnn_init_f32_sigmoid_neon_rr2_p5_params);
663 }
664 }
665
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X8,inplace)666 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X8, inplace) {
667 TEST_REQUIRES_ARM_NEON;
668 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
669 VUnaryMicrokernelTester()
670 .batch_size(batch_size)
671 .inplace(true)
672 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x8, xnn_init_f32_sigmoid_neon_rr2_p5_params);
673 }
674 }
675 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
676
677
678 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X12,batch_eq_12)679 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X12, batch_eq_12) {
680 TEST_REQUIRES_ARM_NEON;
681 VUnaryMicrokernelTester()
682 .batch_size(12)
683 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x12, xnn_init_f32_sigmoid_neon_rr2_p5_params);
684 }
685
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X12,batch_div_12)686 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X12, batch_div_12) {
687 TEST_REQUIRES_ARM_NEON;
688 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
689 VUnaryMicrokernelTester()
690 .batch_size(batch_size)
691 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x12, xnn_init_f32_sigmoid_neon_rr2_p5_params);
692 }
693 }
694
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X12,batch_lt_12)695 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X12, batch_lt_12) {
696 TEST_REQUIRES_ARM_NEON;
697 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
698 VUnaryMicrokernelTester()
699 .batch_size(batch_size)
700 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x12, xnn_init_f32_sigmoid_neon_rr2_p5_params);
701 }
702 }
703
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X12,batch_gt_12)704 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X12, batch_gt_12) {
705 TEST_REQUIRES_ARM_NEON;
706 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
707 VUnaryMicrokernelTester()
708 .batch_size(batch_size)
709 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x12, xnn_init_f32_sigmoid_neon_rr2_p5_params);
710 }
711 }
712
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X12,inplace)713 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X12, inplace) {
714 TEST_REQUIRES_ARM_NEON;
715 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
716 VUnaryMicrokernelTester()
717 .batch_size(batch_size)
718 .inplace(true)
719 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x12, xnn_init_f32_sigmoid_neon_rr2_p5_params);
720 }
721 }
722 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
723
724
725 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X16,batch_eq_16)726 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X16, batch_eq_16) {
727 TEST_REQUIRES_ARM_NEON;
728 VUnaryMicrokernelTester()
729 .batch_size(16)
730 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x16, xnn_init_f32_sigmoid_neon_rr2_p5_params);
731 }
732
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X16,batch_div_16)733 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X16, batch_div_16) {
734 TEST_REQUIRES_ARM_NEON;
735 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
736 VUnaryMicrokernelTester()
737 .batch_size(batch_size)
738 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x16, xnn_init_f32_sigmoid_neon_rr2_p5_params);
739 }
740 }
741
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X16,batch_lt_16)742 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X16, batch_lt_16) {
743 TEST_REQUIRES_ARM_NEON;
744 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
745 VUnaryMicrokernelTester()
746 .batch_size(batch_size)
747 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x16, xnn_init_f32_sigmoid_neon_rr2_p5_params);
748 }
749 }
750
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X16,batch_gt_16)751 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X16, batch_gt_16) {
752 TEST_REQUIRES_ARM_NEON;
753 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
754 VUnaryMicrokernelTester()
755 .batch_size(batch_size)
756 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x16, xnn_init_f32_sigmoid_neon_rr2_p5_params);
757 }
758 }
759
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X16,inplace)760 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X16, inplace) {
761 TEST_REQUIRES_ARM_NEON;
762 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
763 VUnaryMicrokernelTester()
764 .batch_size(batch_size)
765 .inplace(true)
766 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x16, xnn_init_f32_sigmoid_neon_rr2_p5_params);
767 }
768 }
769 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
770
771
772 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X20,batch_eq_20)773 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X20, batch_eq_20) {
774 TEST_REQUIRES_ARM_NEON;
775 VUnaryMicrokernelTester()
776 .batch_size(20)
777 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20, xnn_init_f32_sigmoid_neon_rr2_p5_params);
778 }
779
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X20,batch_div_20)780 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X20, batch_div_20) {
781 TEST_REQUIRES_ARM_NEON;
782 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
783 VUnaryMicrokernelTester()
784 .batch_size(batch_size)
785 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20, xnn_init_f32_sigmoid_neon_rr2_p5_params);
786 }
787 }
788
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X20,batch_lt_20)789 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X20, batch_lt_20) {
790 TEST_REQUIRES_ARM_NEON;
791 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
792 VUnaryMicrokernelTester()
793 .batch_size(batch_size)
794 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20, xnn_init_f32_sigmoid_neon_rr2_p5_params);
795 }
796 }
797
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X20,batch_gt_20)798 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X20, batch_gt_20) {
799 TEST_REQUIRES_ARM_NEON;
800 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
801 VUnaryMicrokernelTester()
802 .batch_size(batch_size)
803 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20, xnn_init_f32_sigmoid_neon_rr2_p5_params);
804 }
805 }
806
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X20,inplace)807 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X20, inplace) {
808 TEST_REQUIRES_ARM_NEON;
809 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
810 VUnaryMicrokernelTester()
811 .batch_size(batch_size)
812 .inplace(true)
813 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20, xnn_init_f32_sigmoid_neon_rr2_p5_params);
814 }
815 }
816 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
817
818
819 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X24,batch_eq_24)820 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X24, batch_eq_24) {
821 TEST_REQUIRES_ARM_NEON;
822 VUnaryMicrokernelTester()
823 .batch_size(24)
824 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x24, xnn_init_f32_sigmoid_neon_rr2_p5_params);
825 }
826
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X24,batch_div_24)827 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X24, batch_div_24) {
828 TEST_REQUIRES_ARM_NEON;
829 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
830 VUnaryMicrokernelTester()
831 .batch_size(batch_size)
832 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x24, xnn_init_f32_sigmoid_neon_rr2_p5_params);
833 }
834 }
835
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X24,batch_lt_24)836 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X24, batch_lt_24) {
837 TEST_REQUIRES_ARM_NEON;
838 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
839 VUnaryMicrokernelTester()
840 .batch_size(batch_size)
841 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x24, xnn_init_f32_sigmoid_neon_rr2_p5_params);
842 }
843 }
844
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X24,batch_gt_24)845 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X24, batch_gt_24) {
846 TEST_REQUIRES_ARM_NEON;
847 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
848 VUnaryMicrokernelTester()
849 .batch_size(batch_size)
850 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x24, xnn_init_f32_sigmoid_neon_rr2_p5_params);
851 }
852 }
853
TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X24,inplace)854 TEST(F32_VSIGMOID__NEON_RR2_P5_NR2RECPS_X24, inplace) {
855 TEST_REQUIRES_ARM_NEON;
856 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
857 VUnaryMicrokernelTester()
858 .batch_size(batch_size)
859 .inplace(true)
860 .Test(xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x24, xnn_init_f32_sigmoid_neon_rr2_p5_params);
861 }
862 }
863 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
864
865
866 #if XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X4,batch_eq_4)867 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X4, batch_eq_4) {
868 TEST_REQUIRES_ARM_NEON_FMA;
869 VUnaryMicrokernelTester()
870 .batch_size(4)
871 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
872 }
873
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X4,batch_div_4)874 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X4, batch_div_4) {
875 TEST_REQUIRES_ARM_NEON_FMA;
876 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
877 VUnaryMicrokernelTester()
878 .batch_size(batch_size)
879 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
880 }
881 }
882
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X4,batch_lt_4)883 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X4, batch_lt_4) {
884 TEST_REQUIRES_ARM_NEON_FMA;
885 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
886 VUnaryMicrokernelTester()
887 .batch_size(batch_size)
888 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
889 }
890 }
891
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X4,batch_gt_4)892 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X4, batch_gt_4) {
893 TEST_REQUIRES_ARM_NEON_FMA;
894 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
895 VUnaryMicrokernelTester()
896 .batch_size(batch_size)
897 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
898 }
899 }
900
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X4,inplace)901 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X4, inplace) {
902 TEST_REQUIRES_ARM_NEON_FMA;
903 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
904 VUnaryMicrokernelTester()
905 .batch_size(batch_size)
906 .inplace(true)
907 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
908 }
909 }
910 #endif // XNN_ARCH_ARM64
911
912
913 #if XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X8,batch_eq_8)914 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X8, batch_eq_8) {
915 TEST_REQUIRES_ARM_NEON_FMA;
916 VUnaryMicrokernelTester()
917 .batch_size(8)
918 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
919 }
920
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X8,batch_div_8)921 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X8, batch_div_8) {
922 TEST_REQUIRES_ARM_NEON_FMA;
923 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
924 VUnaryMicrokernelTester()
925 .batch_size(batch_size)
926 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
927 }
928 }
929
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X8,batch_lt_8)930 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X8, batch_lt_8) {
931 TEST_REQUIRES_ARM_NEON_FMA;
932 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
933 VUnaryMicrokernelTester()
934 .batch_size(batch_size)
935 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
936 }
937 }
938
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X8,batch_gt_8)939 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X8, batch_gt_8) {
940 TEST_REQUIRES_ARM_NEON_FMA;
941 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
942 VUnaryMicrokernelTester()
943 .batch_size(batch_size)
944 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
945 }
946 }
947
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X8,inplace)948 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X8, inplace) {
949 TEST_REQUIRES_ARM_NEON_FMA;
950 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
951 VUnaryMicrokernelTester()
952 .batch_size(batch_size)
953 .inplace(true)
954 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
955 }
956 }
957 #endif // XNN_ARCH_ARM64
958
959
960 #if XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X12,batch_eq_12)961 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X12, batch_eq_12) {
962 TEST_REQUIRES_ARM_NEON_FMA;
963 VUnaryMicrokernelTester()
964 .batch_size(12)
965 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
966 }
967
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X12,batch_div_12)968 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X12, batch_div_12) {
969 TEST_REQUIRES_ARM_NEON_FMA;
970 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
971 VUnaryMicrokernelTester()
972 .batch_size(batch_size)
973 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
974 }
975 }
976
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X12,batch_lt_12)977 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X12, batch_lt_12) {
978 TEST_REQUIRES_ARM_NEON_FMA;
979 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
980 VUnaryMicrokernelTester()
981 .batch_size(batch_size)
982 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
983 }
984 }
985
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X12,batch_gt_12)986 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X12, batch_gt_12) {
987 TEST_REQUIRES_ARM_NEON_FMA;
988 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
989 VUnaryMicrokernelTester()
990 .batch_size(batch_size)
991 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
992 }
993 }
994
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X12,inplace)995 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X12, inplace) {
996 TEST_REQUIRES_ARM_NEON_FMA;
997 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
998 VUnaryMicrokernelTester()
999 .batch_size(batch_size)
1000 .inplace(true)
1001 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1002 }
1003 }
1004 #endif // XNN_ARCH_ARM64
1005
1006
1007 #if XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X16,batch_eq_16)1008 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X16, batch_eq_16) {
1009 TEST_REQUIRES_ARM_NEON_FMA;
1010 VUnaryMicrokernelTester()
1011 .batch_size(16)
1012 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1013 }
1014
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X16,batch_div_16)1015 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X16, batch_div_16) {
1016 TEST_REQUIRES_ARM_NEON_FMA;
1017 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1018 VUnaryMicrokernelTester()
1019 .batch_size(batch_size)
1020 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1021 }
1022 }
1023
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X16,batch_lt_16)1024 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X16, batch_lt_16) {
1025 TEST_REQUIRES_ARM_NEON_FMA;
1026 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1027 VUnaryMicrokernelTester()
1028 .batch_size(batch_size)
1029 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1030 }
1031 }
1032
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X16,batch_gt_16)1033 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X16, batch_gt_16) {
1034 TEST_REQUIRES_ARM_NEON_FMA;
1035 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1036 VUnaryMicrokernelTester()
1037 .batch_size(batch_size)
1038 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1039 }
1040 }
1041
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X16,inplace)1042 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X16, inplace) {
1043 TEST_REQUIRES_ARM_NEON_FMA;
1044 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1045 VUnaryMicrokernelTester()
1046 .batch_size(batch_size)
1047 .inplace(true)
1048 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1049 }
1050 }
1051 #endif // XNN_ARCH_ARM64
1052
1053
1054 #if XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X20,batch_eq_20)1055 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X20, batch_eq_20) {
1056 TEST_REQUIRES_ARM_NEON_FMA;
1057 VUnaryMicrokernelTester()
1058 .batch_size(20)
1059 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1060 }
1061
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X20,batch_div_20)1062 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X20, batch_div_20) {
1063 TEST_REQUIRES_ARM_NEON_FMA;
1064 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
1065 VUnaryMicrokernelTester()
1066 .batch_size(batch_size)
1067 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1068 }
1069 }
1070
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X20,batch_lt_20)1071 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X20, batch_lt_20) {
1072 TEST_REQUIRES_ARM_NEON_FMA;
1073 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
1074 VUnaryMicrokernelTester()
1075 .batch_size(batch_size)
1076 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1077 }
1078 }
1079
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X20,batch_gt_20)1080 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X20, batch_gt_20) {
1081 TEST_REQUIRES_ARM_NEON_FMA;
1082 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
1083 VUnaryMicrokernelTester()
1084 .batch_size(batch_size)
1085 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1086 }
1087 }
1088
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X20,inplace)1089 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X20, inplace) {
1090 TEST_REQUIRES_ARM_NEON_FMA;
1091 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1092 VUnaryMicrokernelTester()
1093 .batch_size(batch_size)
1094 .inplace(true)
1095 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1096 }
1097 }
1098 #endif // XNN_ARCH_ARM64
1099
1100
1101 #if XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X24,batch_eq_24)1102 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X24, batch_eq_24) {
1103 TEST_REQUIRES_ARM_NEON_FMA;
1104 VUnaryMicrokernelTester()
1105 .batch_size(24)
1106 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1107 }
1108
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X24,batch_div_24)1109 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X24, batch_div_24) {
1110 TEST_REQUIRES_ARM_NEON_FMA;
1111 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1112 VUnaryMicrokernelTester()
1113 .batch_size(batch_size)
1114 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1115 }
1116 }
1117
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X24,batch_lt_24)1118 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X24, batch_lt_24) {
1119 TEST_REQUIRES_ARM_NEON_FMA;
1120 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1121 VUnaryMicrokernelTester()
1122 .batch_size(batch_size)
1123 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1124 }
1125 }
1126
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X24,batch_gt_24)1127 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X24, batch_gt_24) {
1128 TEST_REQUIRES_ARM_NEON_FMA;
1129 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1130 VUnaryMicrokernelTester()
1131 .batch_size(batch_size)
1132 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1133 }
1134 }
1135
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X24,inplace)1136 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_DIV_X24, inplace) {
1137 TEST_REQUIRES_ARM_NEON_FMA;
1138 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1139 VUnaryMicrokernelTester()
1140 .batch_size(batch_size)
1141 .inplace(true)
1142 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1143 }
1144 }
1145 #endif // XNN_ARCH_ARM64
1146
1147
1148 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X4,batch_eq_4)1149 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X4, batch_eq_4) {
1150 TEST_REQUIRES_ARM_NEON_FMA;
1151 VUnaryMicrokernelTester()
1152 .batch_size(4)
1153 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1154 }
1155
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X4,batch_div_4)1156 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X4, batch_div_4) {
1157 TEST_REQUIRES_ARM_NEON_FMA;
1158 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
1159 VUnaryMicrokernelTester()
1160 .batch_size(batch_size)
1161 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1162 }
1163 }
1164
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X4,batch_lt_4)1165 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X4, batch_lt_4) {
1166 TEST_REQUIRES_ARM_NEON_FMA;
1167 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
1168 VUnaryMicrokernelTester()
1169 .batch_size(batch_size)
1170 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1171 }
1172 }
1173
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X4,batch_gt_4)1174 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X4, batch_gt_4) {
1175 TEST_REQUIRES_ARM_NEON_FMA;
1176 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
1177 VUnaryMicrokernelTester()
1178 .batch_size(batch_size)
1179 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1180 }
1181 }
1182
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X4,inplace)1183 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X4, inplace) {
1184 TEST_REQUIRES_ARM_NEON_FMA;
1185 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1186 VUnaryMicrokernelTester()
1187 .batch_size(batch_size)
1188 .inplace(true)
1189 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1190 }
1191 }
1192 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1193
1194
1195 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X8,batch_eq_8)1196 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X8, batch_eq_8) {
1197 TEST_REQUIRES_ARM_NEON_FMA;
1198 VUnaryMicrokernelTester()
1199 .batch_size(8)
1200 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1201 }
1202
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X8,batch_div_8)1203 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X8, batch_div_8) {
1204 TEST_REQUIRES_ARM_NEON_FMA;
1205 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1206 VUnaryMicrokernelTester()
1207 .batch_size(batch_size)
1208 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1209 }
1210 }
1211
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X8,batch_lt_8)1212 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X8, batch_lt_8) {
1213 TEST_REQUIRES_ARM_NEON_FMA;
1214 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1215 VUnaryMicrokernelTester()
1216 .batch_size(batch_size)
1217 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1218 }
1219 }
1220
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X8,batch_gt_8)1221 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X8, batch_gt_8) {
1222 TEST_REQUIRES_ARM_NEON_FMA;
1223 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1224 VUnaryMicrokernelTester()
1225 .batch_size(batch_size)
1226 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1227 }
1228 }
1229
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X8,inplace)1230 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X8, inplace) {
1231 TEST_REQUIRES_ARM_NEON_FMA;
1232 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1233 VUnaryMicrokernelTester()
1234 .batch_size(batch_size)
1235 .inplace(true)
1236 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1237 }
1238 }
1239 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1240
1241
1242 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X12,batch_eq_12)1243 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X12, batch_eq_12) {
1244 TEST_REQUIRES_ARM_NEON_FMA;
1245 VUnaryMicrokernelTester()
1246 .batch_size(12)
1247 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1248 }
1249
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X12,batch_div_12)1250 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X12, batch_div_12) {
1251 TEST_REQUIRES_ARM_NEON_FMA;
1252 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
1253 VUnaryMicrokernelTester()
1254 .batch_size(batch_size)
1255 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1256 }
1257 }
1258
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X12,batch_lt_12)1259 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X12, batch_lt_12) {
1260 TEST_REQUIRES_ARM_NEON_FMA;
1261 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
1262 VUnaryMicrokernelTester()
1263 .batch_size(batch_size)
1264 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1265 }
1266 }
1267
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X12,batch_gt_12)1268 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X12, batch_gt_12) {
1269 TEST_REQUIRES_ARM_NEON_FMA;
1270 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
1271 VUnaryMicrokernelTester()
1272 .batch_size(batch_size)
1273 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1274 }
1275 }
1276
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X12,inplace)1277 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X12, inplace) {
1278 TEST_REQUIRES_ARM_NEON_FMA;
1279 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1280 VUnaryMicrokernelTester()
1281 .batch_size(batch_size)
1282 .inplace(true)
1283 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1284 }
1285 }
1286 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1287
1288
1289 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X16,batch_eq_16)1290 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X16, batch_eq_16) {
1291 TEST_REQUIRES_ARM_NEON_FMA;
1292 VUnaryMicrokernelTester()
1293 .batch_size(16)
1294 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1295 }
1296
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X16,batch_div_16)1297 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X16, batch_div_16) {
1298 TEST_REQUIRES_ARM_NEON_FMA;
1299 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1300 VUnaryMicrokernelTester()
1301 .batch_size(batch_size)
1302 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1303 }
1304 }
1305
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X16,batch_lt_16)1306 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X16, batch_lt_16) {
1307 TEST_REQUIRES_ARM_NEON_FMA;
1308 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1309 VUnaryMicrokernelTester()
1310 .batch_size(batch_size)
1311 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1312 }
1313 }
1314
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X16,batch_gt_16)1315 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X16, batch_gt_16) {
1316 TEST_REQUIRES_ARM_NEON_FMA;
1317 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1318 VUnaryMicrokernelTester()
1319 .batch_size(batch_size)
1320 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1321 }
1322 }
1323
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X16,inplace)1324 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X16, inplace) {
1325 TEST_REQUIRES_ARM_NEON_FMA;
1326 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1327 VUnaryMicrokernelTester()
1328 .batch_size(batch_size)
1329 .inplace(true)
1330 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1331 }
1332 }
1333 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1334
1335
1336 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X20,batch_eq_20)1337 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X20, batch_eq_20) {
1338 TEST_REQUIRES_ARM_NEON_FMA;
1339 VUnaryMicrokernelTester()
1340 .batch_size(20)
1341 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1342 }
1343
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X20,batch_div_20)1344 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X20, batch_div_20) {
1345 TEST_REQUIRES_ARM_NEON_FMA;
1346 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
1347 VUnaryMicrokernelTester()
1348 .batch_size(batch_size)
1349 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1350 }
1351 }
1352
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X20,batch_lt_20)1353 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X20, batch_lt_20) {
1354 TEST_REQUIRES_ARM_NEON_FMA;
1355 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
1356 VUnaryMicrokernelTester()
1357 .batch_size(batch_size)
1358 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1359 }
1360 }
1361
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X20,batch_gt_20)1362 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X20, batch_gt_20) {
1363 TEST_REQUIRES_ARM_NEON_FMA;
1364 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
1365 VUnaryMicrokernelTester()
1366 .batch_size(batch_size)
1367 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1368 }
1369 }
1370
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X20,inplace)1371 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X20, inplace) {
1372 TEST_REQUIRES_ARM_NEON_FMA;
1373 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1374 VUnaryMicrokernelTester()
1375 .batch_size(batch_size)
1376 .inplace(true)
1377 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1378 }
1379 }
1380 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1381
1382
1383 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X24,batch_eq_24)1384 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X24, batch_eq_24) {
1385 TEST_REQUIRES_ARM_NEON_FMA;
1386 VUnaryMicrokernelTester()
1387 .batch_size(24)
1388 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1389 }
1390
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X24,batch_div_24)1391 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X24, batch_div_24) {
1392 TEST_REQUIRES_ARM_NEON_FMA;
1393 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1394 VUnaryMicrokernelTester()
1395 .batch_size(batch_size)
1396 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1397 }
1398 }
1399
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X24,batch_lt_24)1400 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X24, batch_lt_24) {
1401 TEST_REQUIRES_ARM_NEON_FMA;
1402 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1403 VUnaryMicrokernelTester()
1404 .batch_size(batch_size)
1405 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1406 }
1407 }
1408
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X24,batch_gt_24)1409 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X24, batch_gt_24) {
1410 TEST_REQUIRES_ARM_NEON_FMA;
1411 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1412 VUnaryMicrokernelTester()
1413 .batch_size(batch_size)
1414 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1415 }
1416 }
1417
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X24,inplace)1418 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR1RECPS1FMA_X24, inplace) {
1419 TEST_REQUIRES_ARM_NEON_FMA;
1420 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1421 VUnaryMicrokernelTester()
1422 .batch_size(batch_size)
1423 .inplace(true)
1424 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1425 }
1426 }
1427 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1428
1429
1430 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X4,batch_eq_4)1431 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X4, batch_eq_4) {
1432 TEST_REQUIRES_ARM_NEON_FMA;
1433 VUnaryMicrokernelTester()
1434 .batch_size(4)
1435 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1436 }
1437
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X4,batch_div_4)1438 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X4, batch_div_4) {
1439 TEST_REQUIRES_ARM_NEON_FMA;
1440 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
1441 VUnaryMicrokernelTester()
1442 .batch_size(batch_size)
1443 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1444 }
1445 }
1446
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X4,batch_lt_4)1447 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X4, batch_lt_4) {
1448 TEST_REQUIRES_ARM_NEON_FMA;
1449 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
1450 VUnaryMicrokernelTester()
1451 .batch_size(batch_size)
1452 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1453 }
1454 }
1455
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X4,batch_gt_4)1456 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X4, batch_gt_4) {
1457 TEST_REQUIRES_ARM_NEON_FMA;
1458 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
1459 VUnaryMicrokernelTester()
1460 .batch_size(batch_size)
1461 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1462 }
1463 }
1464
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X4,inplace)1465 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X4, inplace) {
1466 TEST_REQUIRES_ARM_NEON_FMA;
1467 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1468 VUnaryMicrokernelTester()
1469 .batch_size(batch_size)
1470 .inplace(true)
1471 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1472 }
1473 }
1474 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1475
1476
1477 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X8,batch_eq_8)1478 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X8, batch_eq_8) {
1479 TEST_REQUIRES_ARM_NEON_FMA;
1480 VUnaryMicrokernelTester()
1481 .batch_size(8)
1482 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1483 }
1484
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X8,batch_div_8)1485 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X8, batch_div_8) {
1486 TEST_REQUIRES_ARM_NEON_FMA;
1487 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1488 VUnaryMicrokernelTester()
1489 .batch_size(batch_size)
1490 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1491 }
1492 }
1493
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X8,batch_lt_8)1494 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X8, batch_lt_8) {
1495 TEST_REQUIRES_ARM_NEON_FMA;
1496 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1497 VUnaryMicrokernelTester()
1498 .batch_size(batch_size)
1499 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1500 }
1501 }
1502
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X8,batch_gt_8)1503 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X8, batch_gt_8) {
1504 TEST_REQUIRES_ARM_NEON_FMA;
1505 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1506 VUnaryMicrokernelTester()
1507 .batch_size(batch_size)
1508 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1509 }
1510 }
1511
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X8,inplace)1512 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X8, inplace) {
1513 TEST_REQUIRES_ARM_NEON_FMA;
1514 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1515 VUnaryMicrokernelTester()
1516 .batch_size(batch_size)
1517 .inplace(true)
1518 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1519 }
1520 }
1521 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1522
1523
1524 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X12,batch_eq_12)1525 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X12, batch_eq_12) {
1526 TEST_REQUIRES_ARM_NEON_FMA;
1527 VUnaryMicrokernelTester()
1528 .batch_size(12)
1529 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1530 }
1531
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X12,batch_div_12)1532 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X12, batch_div_12) {
1533 TEST_REQUIRES_ARM_NEON_FMA;
1534 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
1535 VUnaryMicrokernelTester()
1536 .batch_size(batch_size)
1537 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1538 }
1539 }
1540
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X12,batch_lt_12)1541 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X12, batch_lt_12) {
1542 TEST_REQUIRES_ARM_NEON_FMA;
1543 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
1544 VUnaryMicrokernelTester()
1545 .batch_size(batch_size)
1546 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1547 }
1548 }
1549
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X12,batch_gt_12)1550 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X12, batch_gt_12) {
1551 TEST_REQUIRES_ARM_NEON_FMA;
1552 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
1553 VUnaryMicrokernelTester()
1554 .batch_size(batch_size)
1555 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1556 }
1557 }
1558
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X12,inplace)1559 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X12, inplace) {
1560 TEST_REQUIRES_ARM_NEON_FMA;
1561 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1562 VUnaryMicrokernelTester()
1563 .batch_size(batch_size)
1564 .inplace(true)
1565 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1566 }
1567 }
1568 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1569
1570
1571 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X16,batch_eq_16)1572 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X16, batch_eq_16) {
1573 TEST_REQUIRES_ARM_NEON_FMA;
1574 VUnaryMicrokernelTester()
1575 .batch_size(16)
1576 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1577 }
1578
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X16,batch_div_16)1579 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X16, batch_div_16) {
1580 TEST_REQUIRES_ARM_NEON_FMA;
1581 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1582 VUnaryMicrokernelTester()
1583 .batch_size(batch_size)
1584 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1585 }
1586 }
1587
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X16,batch_lt_16)1588 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X16, batch_lt_16) {
1589 TEST_REQUIRES_ARM_NEON_FMA;
1590 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1591 VUnaryMicrokernelTester()
1592 .batch_size(batch_size)
1593 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1594 }
1595 }
1596
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X16,batch_gt_16)1597 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X16, batch_gt_16) {
1598 TEST_REQUIRES_ARM_NEON_FMA;
1599 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1600 VUnaryMicrokernelTester()
1601 .batch_size(batch_size)
1602 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1603 }
1604 }
1605
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X16,inplace)1606 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X16, inplace) {
1607 TEST_REQUIRES_ARM_NEON_FMA;
1608 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1609 VUnaryMicrokernelTester()
1610 .batch_size(batch_size)
1611 .inplace(true)
1612 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1613 }
1614 }
1615 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1616
1617
1618 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X20,batch_eq_20)1619 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X20, batch_eq_20) {
1620 TEST_REQUIRES_ARM_NEON_FMA;
1621 VUnaryMicrokernelTester()
1622 .batch_size(20)
1623 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1624 }
1625
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X20,batch_div_20)1626 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X20, batch_div_20) {
1627 TEST_REQUIRES_ARM_NEON_FMA;
1628 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
1629 VUnaryMicrokernelTester()
1630 .batch_size(batch_size)
1631 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1632 }
1633 }
1634
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X20,batch_lt_20)1635 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X20, batch_lt_20) {
1636 TEST_REQUIRES_ARM_NEON_FMA;
1637 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
1638 VUnaryMicrokernelTester()
1639 .batch_size(batch_size)
1640 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1641 }
1642 }
1643
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X20,batch_gt_20)1644 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X20, batch_gt_20) {
1645 TEST_REQUIRES_ARM_NEON_FMA;
1646 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
1647 VUnaryMicrokernelTester()
1648 .batch_size(batch_size)
1649 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1650 }
1651 }
1652
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X20,inplace)1653 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X20, inplace) {
1654 TEST_REQUIRES_ARM_NEON_FMA;
1655 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1656 VUnaryMicrokernelTester()
1657 .batch_size(batch_size)
1658 .inplace(true)
1659 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1660 }
1661 }
1662 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1663
1664
1665 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X24,batch_eq_24)1666 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X24, batch_eq_24) {
1667 TEST_REQUIRES_ARM_NEON_FMA;
1668 VUnaryMicrokernelTester()
1669 .batch_size(24)
1670 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1671 }
1672
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X24,batch_div_24)1673 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X24, batch_div_24) {
1674 TEST_REQUIRES_ARM_NEON_FMA;
1675 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1676 VUnaryMicrokernelTester()
1677 .batch_size(batch_size)
1678 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1679 }
1680 }
1681
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X24,batch_lt_24)1682 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X24, batch_lt_24) {
1683 TEST_REQUIRES_ARM_NEON_FMA;
1684 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1685 VUnaryMicrokernelTester()
1686 .batch_size(batch_size)
1687 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1688 }
1689 }
1690
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X24,batch_gt_24)1691 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X24, batch_gt_24) {
1692 TEST_REQUIRES_ARM_NEON_FMA;
1693 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1694 VUnaryMicrokernelTester()
1695 .batch_size(batch_size)
1696 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1697 }
1698 }
1699
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X24,inplace)1700 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2FMA_X24, inplace) {
1701 TEST_REQUIRES_ARM_NEON_FMA;
1702 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1703 VUnaryMicrokernelTester()
1704 .batch_size(batch_size)
1705 .inplace(true)
1706 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1707 }
1708 }
1709 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1710
1711
1712 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X4,batch_eq_4)1713 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X4, batch_eq_4) {
1714 TEST_REQUIRES_ARM_NEON_FMA;
1715 VUnaryMicrokernelTester()
1716 .batch_size(4)
1717 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1718 }
1719
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X4,batch_div_4)1720 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X4, batch_div_4) {
1721 TEST_REQUIRES_ARM_NEON_FMA;
1722 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
1723 VUnaryMicrokernelTester()
1724 .batch_size(batch_size)
1725 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1726 }
1727 }
1728
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X4,batch_lt_4)1729 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X4, batch_lt_4) {
1730 TEST_REQUIRES_ARM_NEON_FMA;
1731 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
1732 VUnaryMicrokernelTester()
1733 .batch_size(batch_size)
1734 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1735 }
1736 }
1737
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X4,batch_gt_4)1738 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X4, batch_gt_4) {
1739 TEST_REQUIRES_ARM_NEON_FMA;
1740 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
1741 VUnaryMicrokernelTester()
1742 .batch_size(batch_size)
1743 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1744 }
1745 }
1746
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X4,inplace)1747 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X4, inplace) {
1748 TEST_REQUIRES_ARM_NEON_FMA;
1749 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1750 VUnaryMicrokernelTester()
1751 .batch_size(batch_size)
1752 .inplace(true)
1753 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1754 }
1755 }
1756 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1757
1758
1759 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X8,batch_eq_8)1760 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X8, batch_eq_8) {
1761 TEST_REQUIRES_ARM_NEON_FMA;
1762 VUnaryMicrokernelTester()
1763 .batch_size(8)
1764 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1765 }
1766
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X8,batch_div_8)1767 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X8, batch_div_8) {
1768 TEST_REQUIRES_ARM_NEON_FMA;
1769 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1770 VUnaryMicrokernelTester()
1771 .batch_size(batch_size)
1772 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1773 }
1774 }
1775
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X8,batch_lt_8)1776 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X8, batch_lt_8) {
1777 TEST_REQUIRES_ARM_NEON_FMA;
1778 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1779 VUnaryMicrokernelTester()
1780 .batch_size(batch_size)
1781 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1782 }
1783 }
1784
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X8,batch_gt_8)1785 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X8, batch_gt_8) {
1786 TEST_REQUIRES_ARM_NEON_FMA;
1787 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1788 VUnaryMicrokernelTester()
1789 .batch_size(batch_size)
1790 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1791 }
1792 }
1793
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X8,inplace)1794 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X8, inplace) {
1795 TEST_REQUIRES_ARM_NEON_FMA;
1796 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1797 VUnaryMicrokernelTester()
1798 .batch_size(batch_size)
1799 .inplace(true)
1800 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1801 }
1802 }
1803 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1804
1805
1806 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X12,batch_eq_12)1807 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X12, batch_eq_12) {
1808 TEST_REQUIRES_ARM_NEON_FMA;
1809 VUnaryMicrokernelTester()
1810 .batch_size(12)
1811 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1812 }
1813
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X12,batch_div_12)1814 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X12, batch_div_12) {
1815 TEST_REQUIRES_ARM_NEON_FMA;
1816 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
1817 VUnaryMicrokernelTester()
1818 .batch_size(batch_size)
1819 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1820 }
1821 }
1822
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X12,batch_lt_12)1823 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X12, batch_lt_12) {
1824 TEST_REQUIRES_ARM_NEON_FMA;
1825 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
1826 VUnaryMicrokernelTester()
1827 .batch_size(batch_size)
1828 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1829 }
1830 }
1831
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X12,batch_gt_12)1832 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X12, batch_gt_12) {
1833 TEST_REQUIRES_ARM_NEON_FMA;
1834 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
1835 VUnaryMicrokernelTester()
1836 .batch_size(batch_size)
1837 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1838 }
1839 }
1840
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X12,inplace)1841 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X12, inplace) {
1842 TEST_REQUIRES_ARM_NEON_FMA;
1843 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1844 VUnaryMicrokernelTester()
1845 .batch_size(batch_size)
1846 .inplace(true)
1847 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1848 }
1849 }
1850 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1851
1852
1853 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X16,batch_eq_16)1854 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X16, batch_eq_16) {
1855 TEST_REQUIRES_ARM_NEON_FMA;
1856 VUnaryMicrokernelTester()
1857 .batch_size(16)
1858 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1859 }
1860
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X16,batch_div_16)1861 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X16, batch_div_16) {
1862 TEST_REQUIRES_ARM_NEON_FMA;
1863 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1864 VUnaryMicrokernelTester()
1865 .batch_size(batch_size)
1866 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1867 }
1868 }
1869
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X16,batch_lt_16)1870 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X16, batch_lt_16) {
1871 TEST_REQUIRES_ARM_NEON_FMA;
1872 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1873 VUnaryMicrokernelTester()
1874 .batch_size(batch_size)
1875 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1876 }
1877 }
1878
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X16,batch_gt_16)1879 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X16, batch_gt_16) {
1880 TEST_REQUIRES_ARM_NEON_FMA;
1881 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1882 VUnaryMicrokernelTester()
1883 .batch_size(batch_size)
1884 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1885 }
1886 }
1887
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X16,inplace)1888 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X16, inplace) {
1889 TEST_REQUIRES_ARM_NEON_FMA;
1890 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1891 VUnaryMicrokernelTester()
1892 .batch_size(batch_size)
1893 .inplace(true)
1894 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1895 }
1896 }
1897 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1898
1899
1900 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X20,batch_eq_20)1901 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X20, batch_eq_20) {
1902 TEST_REQUIRES_ARM_NEON_FMA;
1903 VUnaryMicrokernelTester()
1904 .batch_size(20)
1905 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1906 }
1907
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X20,batch_div_20)1908 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X20, batch_div_20) {
1909 TEST_REQUIRES_ARM_NEON_FMA;
1910 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
1911 VUnaryMicrokernelTester()
1912 .batch_size(batch_size)
1913 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1914 }
1915 }
1916
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X20,batch_lt_20)1917 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X20, batch_lt_20) {
1918 TEST_REQUIRES_ARM_NEON_FMA;
1919 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
1920 VUnaryMicrokernelTester()
1921 .batch_size(batch_size)
1922 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1923 }
1924 }
1925
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X20,batch_gt_20)1926 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X20, batch_gt_20) {
1927 TEST_REQUIRES_ARM_NEON_FMA;
1928 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
1929 VUnaryMicrokernelTester()
1930 .batch_size(batch_size)
1931 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1932 }
1933 }
1934
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X20,inplace)1935 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X20, inplace) {
1936 TEST_REQUIRES_ARM_NEON_FMA;
1937 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1938 VUnaryMicrokernelTester()
1939 .batch_size(batch_size)
1940 .inplace(true)
1941 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1942 }
1943 }
1944 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1945
1946
1947 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X24,batch_eq_24)1948 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X24, batch_eq_24) {
1949 TEST_REQUIRES_ARM_NEON_FMA;
1950 VUnaryMicrokernelTester()
1951 .batch_size(24)
1952 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1953 }
1954
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X24,batch_div_24)1955 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X24, batch_div_24) {
1956 TEST_REQUIRES_ARM_NEON_FMA;
1957 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1958 VUnaryMicrokernelTester()
1959 .batch_size(batch_size)
1960 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1961 }
1962 }
1963
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X24,batch_lt_24)1964 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X24, batch_lt_24) {
1965 TEST_REQUIRES_ARM_NEON_FMA;
1966 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1967 VUnaryMicrokernelTester()
1968 .batch_size(batch_size)
1969 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1970 }
1971 }
1972
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X24,batch_gt_24)1973 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X24, batch_gt_24) {
1974 TEST_REQUIRES_ARM_NEON_FMA;
1975 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1976 VUnaryMicrokernelTester()
1977 .batch_size(batch_size)
1978 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1979 }
1980 }
1981
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X24,inplace)1982 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT64_P2_NR2RECPS_X24, inplace) {
1983 TEST_REQUIRES_ARM_NEON_FMA;
1984 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1985 VUnaryMicrokernelTester()
1986 .batch_size(batch_size)
1987 .inplace(true)
1988 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params);
1989 }
1990 }
1991 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1992
1993
1994 #if XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X4,batch_eq_4)1995 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X4, batch_eq_4) {
1996 TEST_REQUIRES_ARM_NEON_FMA;
1997 VUnaryMicrokernelTester()
1998 .batch_size(4)
1999 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2000 }
2001
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X4,batch_div_4)2002 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X4, batch_div_4) {
2003 TEST_REQUIRES_ARM_NEON_FMA;
2004 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
2005 VUnaryMicrokernelTester()
2006 .batch_size(batch_size)
2007 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2008 }
2009 }
2010
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X4,batch_lt_4)2011 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X4, batch_lt_4) {
2012 TEST_REQUIRES_ARM_NEON_FMA;
2013 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
2014 VUnaryMicrokernelTester()
2015 .batch_size(batch_size)
2016 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2017 }
2018 }
2019
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X4,batch_gt_4)2020 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X4, batch_gt_4) {
2021 TEST_REQUIRES_ARM_NEON_FMA;
2022 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
2023 VUnaryMicrokernelTester()
2024 .batch_size(batch_size)
2025 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2026 }
2027 }
2028
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X4,inplace)2029 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X4, inplace) {
2030 TEST_REQUIRES_ARM_NEON_FMA;
2031 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2032 VUnaryMicrokernelTester()
2033 .batch_size(batch_size)
2034 .inplace(true)
2035 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2036 }
2037 }
2038 #endif // XNN_ARCH_ARM64
2039
2040
2041 #if XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X8,batch_eq_8)2042 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X8, batch_eq_8) {
2043 TEST_REQUIRES_ARM_NEON_FMA;
2044 VUnaryMicrokernelTester()
2045 .batch_size(8)
2046 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2047 }
2048
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X8,batch_div_8)2049 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X8, batch_div_8) {
2050 TEST_REQUIRES_ARM_NEON_FMA;
2051 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2052 VUnaryMicrokernelTester()
2053 .batch_size(batch_size)
2054 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2055 }
2056 }
2057
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X8,batch_lt_8)2058 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X8, batch_lt_8) {
2059 TEST_REQUIRES_ARM_NEON_FMA;
2060 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2061 VUnaryMicrokernelTester()
2062 .batch_size(batch_size)
2063 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2064 }
2065 }
2066
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X8,batch_gt_8)2067 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X8, batch_gt_8) {
2068 TEST_REQUIRES_ARM_NEON_FMA;
2069 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2070 VUnaryMicrokernelTester()
2071 .batch_size(batch_size)
2072 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2073 }
2074 }
2075
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X8,inplace)2076 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X8, inplace) {
2077 TEST_REQUIRES_ARM_NEON_FMA;
2078 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2079 VUnaryMicrokernelTester()
2080 .batch_size(batch_size)
2081 .inplace(true)
2082 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2083 }
2084 }
2085 #endif // XNN_ARCH_ARM64
2086
2087
2088 #if XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X12,batch_eq_12)2089 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X12, batch_eq_12) {
2090 TEST_REQUIRES_ARM_NEON_FMA;
2091 VUnaryMicrokernelTester()
2092 .batch_size(12)
2093 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2094 }
2095
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X12,batch_div_12)2096 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X12, batch_div_12) {
2097 TEST_REQUIRES_ARM_NEON_FMA;
2098 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
2099 VUnaryMicrokernelTester()
2100 .batch_size(batch_size)
2101 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2102 }
2103 }
2104
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X12,batch_lt_12)2105 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X12, batch_lt_12) {
2106 TEST_REQUIRES_ARM_NEON_FMA;
2107 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
2108 VUnaryMicrokernelTester()
2109 .batch_size(batch_size)
2110 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2111 }
2112 }
2113
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X12,batch_gt_12)2114 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X12, batch_gt_12) {
2115 TEST_REQUIRES_ARM_NEON_FMA;
2116 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
2117 VUnaryMicrokernelTester()
2118 .batch_size(batch_size)
2119 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2120 }
2121 }
2122
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X12,inplace)2123 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X12, inplace) {
2124 TEST_REQUIRES_ARM_NEON_FMA;
2125 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2126 VUnaryMicrokernelTester()
2127 .batch_size(batch_size)
2128 .inplace(true)
2129 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2130 }
2131 }
2132 #endif // XNN_ARCH_ARM64
2133
2134
2135 #if XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X16,batch_eq_16)2136 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X16, batch_eq_16) {
2137 TEST_REQUIRES_ARM_NEON_FMA;
2138 VUnaryMicrokernelTester()
2139 .batch_size(16)
2140 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2141 }
2142
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X16,batch_div_16)2143 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X16, batch_div_16) {
2144 TEST_REQUIRES_ARM_NEON_FMA;
2145 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2146 VUnaryMicrokernelTester()
2147 .batch_size(batch_size)
2148 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2149 }
2150 }
2151
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X16,batch_lt_16)2152 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X16, batch_lt_16) {
2153 TEST_REQUIRES_ARM_NEON_FMA;
2154 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2155 VUnaryMicrokernelTester()
2156 .batch_size(batch_size)
2157 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2158 }
2159 }
2160
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X16,batch_gt_16)2161 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X16, batch_gt_16) {
2162 TEST_REQUIRES_ARM_NEON_FMA;
2163 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2164 VUnaryMicrokernelTester()
2165 .batch_size(batch_size)
2166 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2167 }
2168 }
2169
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X16,inplace)2170 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X16, inplace) {
2171 TEST_REQUIRES_ARM_NEON_FMA;
2172 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2173 VUnaryMicrokernelTester()
2174 .batch_size(batch_size)
2175 .inplace(true)
2176 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2177 }
2178 }
2179 #endif // XNN_ARCH_ARM64
2180
2181
2182 #if XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X20,batch_eq_20)2183 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X20, batch_eq_20) {
2184 TEST_REQUIRES_ARM_NEON_FMA;
2185 VUnaryMicrokernelTester()
2186 .batch_size(20)
2187 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2188 }
2189
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X20,batch_div_20)2190 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X20, batch_div_20) {
2191 TEST_REQUIRES_ARM_NEON_FMA;
2192 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
2193 VUnaryMicrokernelTester()
2194 .batch_size(batch_size)
2195 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2196 }
2197 }
2198
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X20,batch_lt_20)2199 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X20, batch_lt_20) {
2200 TEST_REQUIRES_ARM_NEON_FMA;
2201 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
2202 VUnaryMicrokernelTester()
2203 .batch_size(batch_size)
2204 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2205 }
2206 }
2207
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X20,batch_gt_20)2208 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X20, batch_gt_20) {
2209 TEST_REQUIRES_ARM_NEON_FMA;
2210 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
2211 VUnaryMicrokernelTester()
2212 .batch_size(batch_size)
2213 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2214 }
2215 }
2216
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X20,inplace)2217 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X20, inplace) {
2218 TEST_REQUIRES_ARM_NEON_FMA;
2219 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2220 VUnaryMicrokernelTester()
2221 .batch_size(batch_size)
2222 .inplace(true)
2223 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2224 }
2225 }
2226 #endif // XNN_ARCH_ARM64
2227
2228
2229 #if XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X24,batch_eq_24)2230 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X24, batch_eq_24) {
2231 TEST_REQUIRES_ARM_NEON_FMA;
2232 VUnaryMicrokernelTester()
2233 .batch_size(24)
2234 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2235 }
2236
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X24,batch_div_24)2237 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X24, batch_div_24) {
2238 TEST_REQUIRES_ARM_NEON_FMA;
2239 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
2240 VUnaryMicrokernelTester()
2241 .batch_size(batch_size)
2242 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2243 }
2244 }
2245
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X24,batch_lt_24)2246 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X24, batch_lt_24) {
2247 TEST_REQUIRES_ARM_NEON_FMA;
2248 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
2249 VUnaryMicrokernelTester()
2250 .batch_size(batch_size)
2251 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2252 }
2253 }
2254
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X24,batch_gt_24)2255 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X24, batch_gt_24) {
2256 TEST_REQUIRES_ARM_NEON_FMA;
2257 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
2258 VUnaryMicrokernelTester()
2259 .batch_size(batch_size)
2260 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2261 }
2262 }
2263
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X24,inplace)2264 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_DIV_X24, inplace) {
2265 TEST_REQUIRES_ARM_NEON_FMA;
2266 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2267 VUnaryMicrokernelTester()
2268 .batch_size(batch_size)
2269 .inplace(true)
2270 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2271 }
2272 }
2273 #endif // XNN_ARCH_ARM64
2274
2275
2276 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X4,batch_eq_4)2277 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X4, batch_eq_4) {
2278 TEST_REQUIRES_ARM_NEON_FMA;
2279 VUnaryMicrokernelTester()
2280 .batch_size(4)
2281 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2282 }
2283
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X4,batch_div_4)2284 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X4, batch_div_4) {
2285 TEST_REQUIRES_ARM_NEON_FMA;
2286 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
2287 VUnaryMicrokernelTester()
2288 .batch_size(batch_size)
2289 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2290 }
2291 }
2292
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X4,batch_lt_4)2293 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X4, batch_lt_4) {
2294 TEST_REQUIRES_ARM_NEON_FMA;
2295 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
2296 VUnaryMicrokernelTester()
2297 .batch_size(batch_size)
2298 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2299 }
2300 }
2301
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X4,batch_gt_4)2302 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X4, batch_gt_4) {
2303 TEST_REQUIRES_ARM_NEON_FMA;
2304 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
2305 VUnaryMicrokernelTester()
2306 .batch_size(batch_size)
2307 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2308 }
2309 }
2310
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X4,inplace)2311 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X4, inplace) {
2312 TEST_REQUIRES_ARM_NEON_FMA;
2313 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2314 VUnaryMicrokernelTester()
2315 .batch_size(batch_size)
2316 .inplace(true)
2317 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2318 }
2319 }
2320 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2321
2322
2323 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X8,batch_eq_8)2324 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X8, batch_eq_8) {
2325 TEST_REQUIRES_ARM_NEON_FMA;
2326 VUnaryMicrokernelTester()
2327 .batch_size(8)
2328 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2329 }
2330
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X8,batch_div_8)2331 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X8, batch_div_8) {
2332 TEST_REQUIRES_ARM_NEON_FMA;
2333 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2334 VUnaryMicrokernelTester()
2335 .batch_size(batch_size)
2336 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2337 }
2338 }
2339
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X8,batch_lt_8)2340 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X8, batch_lt_8) {
2341 TEST_REQUIRES_ARM_NEON_FMA;
2342 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2343 VUnaryMicrokernelTester()
2344 .batch_size(batch_size)
2345 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2346 }
2347 }
2348
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X8,batch_gt_8)2349 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X8, batch_gt_8) {
2350 TEST_REQUIRES_ARM_NEON_FMA;
2351 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2352 VUnaryMicrokernelTester()
2353 .batch_size(batch_size)
2354 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2355 }
2356 }
2357
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X8,inplace)2358 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X8, inplace) {
2359 TEST_REQUIRES_ARM_NEON_FMA;
2360 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2361 VUnaryMicrokernelTester()
2362 .batch_size(batch_size)
2363 .inplace(true)
2364 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2365 }
2366 }
2367 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2368
2369
2370 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X12,batch_eq_12)2371 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X12, batch_eq_12) {
2372 TEST_REQUIRES_ARM_NEON_FMA;
2373 VUnaryMicrokernelTester()
2374 .batch_size(12)
2375 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2376 }
2377
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X12,batch_div_12)2378 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X12, batch_div_12) {
2379 TEST_REQUIRES_ARM_NEON_FMA;
2380 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
2381 VUnaryMicrokernelTester()
2382 .batch_size(batch_size)
2383 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2384 }
2385 }
2386
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X12,batch_lt_12)2387 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X12, batch_lt_12) {
2388 TEST_REQUIRES_ARM_NEON_FMA;
2389 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
2390 VUnaryMicrokernelTester()
2391 .batch_size(batch_size)
2392 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2393 }
2394 }
2395
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X12,batch_gt_12)2396 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X12, batch_gt_12) {
2397 TEST_REQUIRES_ARM_NEON_FMA;
2398 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
2399 VUnaryMicrokernelTester()
2400 .batch_size(batch_size)
2401 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2402 }
2403 }
2404
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X12,inplace)2405 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X12, inplace) {
2406 TEST_REQUIRES_ARM_NEON_FMA;
2407 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2408 VUnaryMicrokernelTester()
2409 .batch_size(batch_size)
2410 .inplace(true)
2411 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2412 }
2413 }
2414 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2415
2416
2417 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X16,batch_eq_16)2418 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X16, batch_eq_16) {
2419 TEST_REQUIRES_ARM_NEON_FMA;
2420 VUnaryMicrokernelTester()
2421 .batch_size(16)
2422 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2423 }
2424
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X16,batch_div_16)2425 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X16, batch_div_16) {
2426 TEST_REQUIRES_ARM_NEON_FMA;
2427 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2428 VUnaryMicrokernelTester()
2429 .batch_size(batch_size)
2430 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2431 }
2432 }
2433
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X16,batch_lt_16)2434 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X16, batch_lt_16) {
2435 TEST_REQUIRES_ARM_NEON_FMA;
2436 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2437 VUnaryMicrokernelTester()
2438 .batch_size(batch_size)
2439 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2440 }
2441 }
2442
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X16,batch_gt_16)2443 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X16, batch_gt_16) {
2444 TEST_REQUIRES_ARM_NEON_FMA;
2445 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2446 VUnaryMicrokernelTester()
2447 .batch_size(batch_size)
2448 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2449 }
2450 }
2451
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X16,inplace)2452 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X16, inplace) {
2453 TEST_REQUIRES_ARM_NEON_FMA;
2454 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2455 VUnaryMicrokernelTester()
2456 .batch_size(batch_size)
2457 .inplace(true)
2458 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2459 }
2460 }
2461 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2462
2463
2464 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X20,batch_eq_20)2465 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X20, batch_eq_20) {
2466 TEST_REQUIRES_ARM_NEON_FMA;
2467 VUnaryMicrokernelTester()
2468 .batch_size(20)
2469 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2470 }
2471
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X20,batch_div_20)2472 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X20, batch_div_20) {
2473 TEST_REQUIRES_ARM_NEON_FMA;
2474 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
2475 VUnaryMicrokernelTester()
2476 .batch_size(batch_size)
2477 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2478 }
2479 }
2480
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X20,batch_lt_20)2481 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X20, batch_lt_20) {
2482 TEST_REQUIRES_ARM_NEON_FMA;
2483 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
2484 VUnaryMicrokernelTester()
2485 .batch_size(batch_size)
2486 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2487 }
2488 }
2489
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X20,batch_gt_20)2490 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X20, batch_gt_20) {
2491 TEST_REQUIRES_ARM_NEON_FMA;
2492 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
2493 VUnaryMicrokernelTester()
2494 .batch_size(batch_size)
2495 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2496 }
2497 }
2498
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X20,inplace)2499 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X20, inplace) {
2500 TEST_REQUIRES_ARM_NEON_FMA;
2501 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2502 VUnaryMicrokernelTester()
2503 .batch_size(batch_size)
2504 .inplace(true)
2505 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2506 }
2507 }
2508 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2509
2510
2511 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X24,batch_eq_24)2512 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X24, batch_eq_24) {
2513 TEST_REQUIRES_ARM_NEON_FMA;
2514 VUnaryMicrokernelTester()
2515 .batch_size(24)
2516 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2517 }
2518
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X24,batch_div_24)2519 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X24, batch_div_24) {
2520 TEST_REQUIRES_ARM_NEON_FMA;
2521 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
2522 VUnaryMicrokernelTester()
2523 .batch_size(batch_size)
2524 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2525 }
2526 }
2527
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X24,batch_lt_24)2528 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X24, batch_lt_24) {
2529 TEST_REQUIRES_ARM_NEON_FMA;
2530 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
2531 VUnaryMicrokernelTester()
2532 .batch_size(batch_size)
2533 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2534 }
2535 }
2536
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X24,batch_gt_24)2537 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X24, batch_gt_24) {
2538 TEST_REQUIRES_ARM_NEON_FMA;
2539 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
2540 VUnaryMicrokernelTester()
2541 .batch_size(batch_size)
2542 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2543 }
2544 }
2545
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X24,inplace)2546 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR1RECPS1FMA_X24, inplace) {
2547 TEST_REQUIRES_ARM_NEON_FMA;
2548 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2549 VUnaryMicrokernelTester()
2550 .batch_size(batch_size)
2551 .inplace(true)
2552 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2553 }
2554 }
2555 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2556
2557
2558 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X4,batch_eq_4)2559 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X4, batch_eq_4) {
2560 TEST_REQUIRES_ARM_NEON_FMA;
2561 VUnaryMicrokernelTester()
2562 .batch_size(4)
2563 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2564 }
2565
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X4,batch_div_4)2566 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X4, batch_div_4) {
2567 TEST_REQUIRES_ARM_NEON_FMA;
2568 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
2569 VUnaryMicrokernelTester()
2570 .batch_size(batch_size)
2571 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2572 }
2573 }
2574
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X4,batch_lt_4)2575 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X4, batch_lt_4) {
2576 TEST_REQUIRES_ARM_NEON_FMA;
2577 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
2578 VUnaryMicrokernelTester()
2579 .batch_size(batch_size)
2580 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2581 }
2582 }
2583
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X4,batch_gt_4)2584 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X4, batch_gt_4) {
2585 TEST_REQUIRES_ARM_NEON_FMA;
2586 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
2587 VUnaryMicrokernelTester()
2588 .batch_size(batch_size)
2589 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2590 }
2591 }
2592
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X4,inplace)2593 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X4, inplace) {
2594 TEST_REQUIRES_ARM_NEON_FMA;
2595 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2596 VUnaryMicrokernelTester()
2597 .batch_size(batch_size)
2598 .inplace(true)
2599 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2600 }
2601 }
2602 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2603
2604
2605 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X8,batch_eq_8)2606 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X8, batch_eq_8) {
2607 TEST_REQUIRES_ARM_NEON_FMA;
2608 VUnaryMicrokernelTester()
2609 .batch_size(8)
2610 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2611 }
2612
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X8,batch_div_8)2613 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X8, batch_div_8) {
2614 TEST_REQUIRES_ARM_NEON_FMA;
2615 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2616 VUnaryMicrokernelTester()
2617 .batch_size(batch_size)
2618 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2619 }
2620 }
2621
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X8,batch_lt_8)2622 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X8, batch_lt_8) {
2623 TEST_REQUIRES_ARM_NEON_FMA;
2624 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2625 VUnaryMicrokernelTester()
2626 .batch_size(batch_size)
2627 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2628 }
2629 }
2630
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X8,batch_gt_8)2631 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X8, batch_gt_8) {
2632 TEST_REQUIRES_ARM_NEON_FMA;
2633 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2634 VUnaryMicrokernelTester()
2635 .batch_size(batch_size)
2636 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2637 }
2638 }
2639
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X8,inplace)2640 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X8, inplace) {
2641 TEST_REQUIRES_ARM_NEON_FMA;
2642 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2643 VUnaryMicrokernelTester()
2644 .batch_size(batch_size)
2645 .inplace(true)
2646 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2647 }
2648 }
2649 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2650
2651
2652 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X12,batch_eq_12)2653 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X12, batch_eq_12) {
2654 TEST_REQUIRES_ARM_NEON_FMA;
2655 VUnaryMicrokernelTester()
2656 .batch_size(12)
2657 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2658 }
2659
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X12,batch_div_12)2660 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X12, batch_div_12) {
2661 TEST_REQUIRES_ARM_NEON_FMA;
2662 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
2663 VUnaryMicrokernelTester()
2664 .batch_size(batch_size)
2665 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2666 }
2667 }
2668
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X12,batch_lt_12)2669 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X12, batch_lt_12) {
2670 TEST_REQUIRES_ARM_NEON_FMA;
2671 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
2672 VUnaryMicrokernelTester()
2673 .batch_size(batch_size)
2674 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2675 }
2676 }
2677
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X12,batch_gt_12)2678 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X12, batch_gt_12) {
2679 TEST_REQUIRES_ARM_NEON_FMA;
2680 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
2681 VUnaryMicrokernelTester()
2682 .batch_size(batch_size)
2683 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2684 }
2685 }
2686
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X12,inplace)2687 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X12, inplace) {
2688 TEST_REQUIRES_ARM_NEON_FMA;
2689 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2690 VUnaryMicrokernelTester()
2691 .batch_size(batch_size)
2692 .inplace(true)
2693 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2694 }
2695 }
2696 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2697
2698
2699 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X16,batch_eq_16)2700 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X16, batch_eq_16) {
2701 TEST_REQUIRES_ARM_NEON_FMA;
2702 VUnaryMicrokernelTester()
2703 .batch_size(16)
2704 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2705 }
2706
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X16,batch_div_16)2707 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X16, batch_div_16) {
2708 TEST_REQUIRES_ARM_NEON_FMA;
2709 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2710 VUnaryMicrokernelTester()
2711 .batch_size(batch_size)
2712 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2713 }
2714 }
2715
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X16,batch_lt_16)2716 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X16, batch_lt_16) {
2717 TEST_REQUIRES_ARM_NEON_FMA;
2718 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2719 VUnaryMicrokernelTester()
2720 .batch_size(batch_size)
2721 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2722 }
2723 }
2724
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X16,batch_gt_16)2725 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X16, batch_gt_16) {
2726 TEST_REQUIRES_ARM_NEON_FMA;
2727 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2728 VUnaryMicrokernelTester()
2729 .batch_size(batch_size)
2730 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2731 }
2732 }
2733
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X16,inplace)2734 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X16, inplace) {
2735 TEST_REQUIRES_ARM_NEON_FMA;
2736 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2737 VUnaryMicrokernelTester()
2738 .batch_size(batch_size)
2739 .inplace(true)
2740 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2741 }
2742 }
2743 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2744
2745
2746 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X20,batch_eq_20)2747 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X20, batch_eq_20) {
2748 TEST_REQUIRES_ARM_NEON_FMA;
2749 VUnaryMicrokernelTester()
2750 .batch_size(20)
2751 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2752 }
2753
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X20,batch_div_20)2754 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X20, batch_div_20) {
2755 TEST_REQUIRES_ARM_NEON_FMA;
2756 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
2757 VUnaryMicrokernelTester()
2758 .batch_size(batch_size)
2759 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2760 }
2761 }
2762
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X20,batch_lt_20)2763 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X20, batch_lt_20) {
2764 TEST_REQUIRES_ARM_NEON_FMA;
2765 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
2766 VUnaryMicrokernelTester()
2767 .batch_size(batch_size)
2768 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2769 }
2770 }
2771
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X20,batch_gt_20)2772 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X20, batch_gt_20) {
2773 TEST_REQUIRES_ARM_NEON_FMA;
2774 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
2775 VUnaryMicrokernelTester()
2776 .batch_size(batch_size)
2777 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2778 }
2779 }
2780
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X20,inplace)2781 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X20, inplace) {
2782 TEST_REQUIRES_ARM_NEON_FMA;
2783 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2784 VUnaryMicrokernelTester()
2785 .batch_size(batch_size)
2786 .inplace(true)
2787 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2788 }
2789 }
2790 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2791
2792
2793 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X24,batch_eq_24)2794 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X24, batch_eq_24) {
2795 TEST_REQUIRES_ARM_NEON_FMA;
2796 VUnaryMicrokernelTester()
2797 .batch_size(24)
2798 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2799 }
2800
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X24,batch_div_24)2801 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X24, batch_div_24) {
2802 TEST_REQUIRES_ARM_NEON_FMA;
2803 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
2804 VUnaryMicrokernelTester()
2805 .batch_size(batch_size)
2806 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2807 }
2808 }
2809
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X24,batch_lt_24)2810 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X24, batch_lt_24) {
2811 TEST_REQUIRES_ARM_NEON_FMA;
2812 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
2813 VUnaryMicrokernelTester()
2814 .batch_size(batch_size)
2815 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2816 }
2817 }
2818
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X24,batch_gt_24)2819 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X24, batch_gt_24) {
2820 TEST_REQUIRES_ARM_NEON_FMA;
2821 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
2822 VUnaryMicrokernelTester()
2823 .batch_size(batch_size)
2824 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2825 }
2826 }
2827
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X24,inplace)2828 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2FMA_X24, inplace) {
2829 TEST_REQUIRES_ARM_NEON_FMA;
2830 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2831 VUnaryMicrokernelTester()
2832 .batch_size(batch_size)
2833 .inplace(true)
2834 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2835 }
2836 }
2837 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2838
2839
2840 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X4,batch_eq_4)2841 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X4, batch_eq_4) {
2842 TEST_REQUIRES_ARM_NEON_FMA;
2843 VUnaryMicrokernelTester()
2844 .batch_size(4)
2845 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2846 }
2847
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X4,batch_div_4)2848 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X4, batch_div_4) {
2849 TEST_REQUIRES_ARM_NEON_FMA;
2850 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
2851 VUnaryMicrokernelTester()
2852 .batch_size(batch_size)
2853 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2854 }
2855 }
2856
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X4,batch_lt_4)2857 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X4, batch_lt_4) {
2858 TEST_REQUIRES_ARM_NEON_FMA;
2859 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
2860 VUnaryMicrokernelTester()
2861 .batch_size(batch_size)
2862 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2863 }
2864 }
2865
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X4,batch_gt_4)2866 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X4, batch_gt_4) {
2867 TEST_REQUIRES_ARM_NEON_FMA;
2868 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
2869 VUnaryMicrokernelTester()
2870 .batch_size(batch_size)
2871 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2872 }
2873 }
2874
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X4,inplace)2875 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X4, inplace) {
2876 TEST_REQUIRES_ARM_NEON_FMA;
2877 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2878 VUnaryMicrokernelTester()
2879 .batch_size(batch_size)
2880 .inplace(true)
2881 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2882 }
2883 }
2884 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2885
2886
2887 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X8,batch_eq_8)2888 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X8, batch_eq_8) {
2889 TEST_REQUIRES_ARM_NEON_FMA;
2890 VUnaryMicrokernelTester()
2891 .batch_size(8)
2892 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2893 }
2894
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X8,batch_div_8)2895 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X8, batch_div_8) {
2896 TEST_REQUIRES_ARM_NEON_FMA;
2897 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2898 VUnaryMicrokernelTester()
2899 .batch_size(batch_size)
2900 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2901 }
2902 }
2903
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X8,batch_lt_8)2904 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X8, batch_lt_8) {
2905 TEST_REQUIRES_ARM_NEON_FMA;
2906 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2907 VUnaryMicrokernelTester()
2908 .batch_size(batch_size)
2909 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2910 }
2911 }
2912
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X8,batch_gt_8)2913 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X8, batch_gt_8) {
2914 TEST_REQUIRES_ARM_NEON_FMA;
2915 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2916 VUnaryMicrokernelTester()
2917 .batch_size(batch_size)
2918 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2919 }
2920 }
2921
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X8,inplace)2922 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X8, inplace) {
2923 TEST_REQUIRES_ARM_NEON_FMA;
2924 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2925 VUnaryMicrokernelTester()
2926 .batch_size(batch_size)
2927 .inplace(true)
2928 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2929 }
2930 }
2931 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2932
2933
2934 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X12,batch_eq_12)2935 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X12, batch_eq_12) {
2936 TEST_REQUIRES_ARM_NEON_FMA;
2937 VUnaryMicrokernelTester()
2938 .batch_size(12)
2939 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2940 }
2941
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X12,batch_div_12)2942 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X12, batch_div_12) {
2943 TEST_REQUIRES_ARM_NEON_FMA;
2944 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
2945 VUnaryMicrokernelTester()
2946 .batch_size(batch_size)
2947 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2948 }
2949 }
2950
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X12,batch_lt_12)2951 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X12, batch_lt_12) {
2952 TEST_REQUIRES_ARM_NEON_FMA;
2953 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
2954 VUnaryMicrokernelTester()
2955 .batch_size(batch_size)
2956 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2957 }
2958 }
2959
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X12,batch_gt_12)2960 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X12, batch_gt_12) {
2961 TEST_REQUIRES_ARM_NEON_FMA;
2962 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
2963 VUnaryMicrokernelTester()
2964 .batch_size(batch_size)
2965 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2966 }
2967 }
2968
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X12,inplace)2969 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X12, inplace) {
2970 TEST_REQUIRES_ARM_NEON_FMA;
2971 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2972 VUnaryMicrokernelTester()
2973 .batch_size(batch_size)
2974 .inplace(true)
2975 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2976 }
2977 }
2978 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2979
2980
2981 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X16,batch_eq_16)2982 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X16, batch_eq_16) {
2983 TEST_REQUIRES_ARM_NEON_FMA;
2984 VUnaryMicrokernelTester()
2985 .batch_size(16)
2986 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2987 }
2988
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X16,batch_div_16)2989 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X16, batch_div_16) {
2990 TEST_REQUIRES_ARM_NEON_FMA;
2991 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2992 VUnaryMicrokernelTester()
2993 .batch_size(batch_size)
2994 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
2995 }
2996 }
2997
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X16,batch_lt_16)2998 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X16, batch_lt_16) {
2999 TEST_REQUIRES_ARM_NEON_FMA;
3000 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3001 VUnaryMicrokernelTester()
3002 .batch_size(batch_size)
3003 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
3004 }
3005 }
3006
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X16,batch_gt_16)3007 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X16, batch_gt_16) {
3008 TEST_REQUIRES_ARM_NEON_FMA;
3009 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3010 VUnaryMicrokernelTester()
3011 .batch_size(batch_size)
3012 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
3013 }
3014 }
3015
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X16,inplace)3016 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X16, inplace) {
3017 TEST_REQUIRES_ARM_NEON_FMA;
3018 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3019 VUnaryMicrokernelTester()
3020 .batch_size(batch_size)
3021 .inplace(true)
3022 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
3023 }
3024 }
3025 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3026
3027
3028 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X20,batch_eq_20)3029 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X20, batch_eq_20) {
3030 TEST_REQUIRES_ARM_NEON_FMA;
3031 VUnaryMicrokernelTester()
3032 .batch_size(20)
3033 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
3034 }
3035
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X20,batch_div_20)3036 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X20, batch_div_20) {
3037 TEST_REQUIRES_ARM_NEON_FMA;
3038 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
3039 VUnaryMicrokernelTester()
3040 .batch_size(batch_size)
3041 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
3042 }
3043 }
3044
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X20,batch_lt_20)3045 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X20, batch_lt_20) {
3046 TEST_REQUIRES_ARM_NEON_FMA;
3047 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
3048 VUnaryMicrokernelTester()
3049 .batch_size(batch_size)
3050 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
3051 }
3052 }
3053
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X20,batch_gt_20)3054 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X20, batch_gt_20) {
3055 TEST_REQUIRES_ARM_NEON_FMA;
3056 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
3057 VUnaryMicrokernelTester()
3058 .batch_size(batch_size)
3059 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
3060 }
3061 }
3062
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X20,inplace)3063 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X20, inplace) {
3064 TEST_REQUIRES_ARM_NEON_FMA;
3065 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3066 VUnaryMicrokernelTester()
3067 .batch_size(batch_size)
3068 .inplace(true)
3069 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
3070 }
3071 }
3072 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3073
3074
3075 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X24,batch_eq_24)3076 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X24, batch_eq_24) {
3077 TEST_REQUIRES_ARM_NEON_FMA;
3078 VUnaryMicrokernelTester()
3079 .batch_size(24)
3080 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
3081 }
3082
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X24,batch_div_24)3083 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X24, batch_div_24) {
3084 TEST_REQUIRES_ARM_NEON_FMA;
3085 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
3086 VUnaryMicrokernelTester()
3087 .batch_size(batch_size)
3088 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
3089 }
3090 }
3091
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X24,batch_lt_24)3092 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X24, batch_lt_24) {
3093 TEST_REQUIRES_ARM_NEON_FMA;
3094 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
3095 VUnaryMicrokernelTester()
3096 .batch_size(batch_size)
3097 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
3098 }
3099 }
3100
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X24,batch_gt_24)3101 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X24, batch_gt_24) {
3102 TEST_REQUIRES_ARM_NEON_FMA;
3103 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
3104 VUnaryMicrokernelTester()
3105 .batch_size(batch_size)
3106 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
3107 }
3108 }
3109
TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X24,inplace)3110 TEST(F32_VSIGMOID__NEONFMA_RR1_LUT2048_P1_NR2RECPS_X24, inplace) {
3111 TEST_REQUIRES_ARM_NEON_FMA;
3112 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3113 VUnaryMicrokernelTester()
3114 .batch_size(batch_size)
3115 .inplace(true)
3116 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24, xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params);
3117 }
3118 }
3119 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3120
3121
3122 #if XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X4,batch_eq_4)3123 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X4, batch_eq_4) {
3124 TEST_REQUIRES_ARM_NEON_FMA;
3125 VUnaryMicrokernelTester()
3126 .batch_size(4)
3127 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x4, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3128 }
3129
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X4,batch_div_4)3130 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X4, batch_div_4) {
3131 TEST_REQUIRES_ARM_NEON_FMA;
3132 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
3133 VUnaryMicrokernelTester()
3134 .batch_size(batch_size)
3135 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x4, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3136 }
3137 }
3138
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X4,batch_lt_4)3139 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X4, batch_lt_4) {
3140 TEST_REQUIRES_ARM_NEON_FMA;
3141 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
3142 VUnaryMicrokernelTester()
3143 .batch_size(batch_size)
3144 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x4, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3145 }
3146 }
3147
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X4,batch_gt_4)3148 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X4, batch_gt_4) {
3149 TEST_REQUIRES_ARM_NEON_FMA;
3150 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
3151 VUnaryMicrokernelTester()
3152 .batch_size(batch_size)
3153 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x4, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3154 }
3155 }
3156
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X4,inplace)3157 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X4, inplace) {
3158 TEST_REQUIRES_ARM_NEON_FMA;
3159 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3160 VUnaryMicrokernelTester()
3161 .batch_size(batch_size)
3162 .inplace(true)
3163 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x4, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3164 }
3165 }
3166 #endif // XNN_ARCH_ARM64
3167
3168
3169 #if XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X8,batch_eq_8)3170 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X8, batch_eq_8) {
3171 TEST_REQUIRES_ARM_NEON_FMA;
3172 VUnaryMicrokernelTester()
3173 .batch_size(8)
3174 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x8, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3175 }
3176
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X8,batch_div_8)3177 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X8, batch_div_8) {
3178 TEST_REQUIRES_ARM_NEON_FMA;
3179 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
3180 VUnaryMicrokernelTester()
3181 .batch_size(batch_size)
3182 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x8, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3183 }
3184 }
3185
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X8,batch_lt_8)3186 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X8, batch_lt_8) {
3187 TEST_REQUIRES_ARM_NEON_FMA;
3188 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
3189 VUnaryMicrokernelTester()
3190 .batch_size(batch_size)
3191 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x8, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3192 }
3193 }
3194
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X8,batch_gt_8)3195 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X8, batch_gt_8) {
3196 TEST_REQUIRES_ARM_NEON_FMA;
3197 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
3198 VUnaryMicrokernelTester()
3199 .batch_size(batch_size)
3200 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x8, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3201 }
3202 }
3203
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X8,inplace)3204 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X8, inplace) {
3205 TEST_REQUIRES_ARM_NEON_FMA;
3206 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3207 VUnaryMicrokernelTester()
3208 .batch_size(batch_size)
3209 .inplace(true)
3210 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x8, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3211 }
3212 }
3213 #endif // XNN_ARCH_ARM64
3214
3215
3216 #if XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X12,batch_eq_12)3217 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X12, batch_eq_12) {
3218 TEST_REQUIRES_ARM_NEON_FMA;
3219 VUnaryMicrokernelTester()
3220 .batch_size(12)
3221 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x12, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3222 }
3223
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X12,batch_div_12)3224 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X12, batch_div_12) {
3225 TEST_REQUIRES_ARM_NEON_FMA;
3226 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
3227 VUnaryMicrokernelTester()
3228 .batch_size(batch_size)
3229 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x12, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3230 }
3231 }
3232
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X12,batch_lt_12)3233 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X12, batch_lt_12) {
3234 TEST_REQUIRES_ARM_NEON_FMA;
3235 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
3236 VUnaryMicrokernelTester()
3237 .batch_size(batch_size)
3238 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x12, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3239 }
3240 }
3241
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X12,batch_gt_12)3242 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X12, batch_gt_12) {
3243 TEST_REQUIRES_ARM_NEON_FMA;
3244 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
3245 VUnaryMicrokernelTester()
3246 .batch_size(batch_size)
3247 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x12, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3248 }
3249 }
3250
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X12,inplace)3251 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X12, inplace) {
3252 TEST_REQUIRES_ARM_NEON_FMA;
3253 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3254 VUnaryMicrokernelTester()
3255 .batch_size(batch_size)
3256 .inplace(true)
3257 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x12, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3258 }
3259 }
3260 #endif // XNN_ARCH_ARM64
3261
3262
3263 #if XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X16,batch_eq_16)3264 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X16, batch_eq_16) {
3265 TEST_REQUIRES_ARM_NEON_FMA;
3266 VUnaryMicrokernelTester()
3267 .batch_size(16)
3268 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x16, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3269 }
3270
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X16,batch_div_16)3271 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X16, batch_div_16) {
3272 TEST_REQUIRES_ARM_NEON_FMA;
3273 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
3274 VUnaryMicrokernelTester()
3275 .batch_size(batch_size)
3276 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x16, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3277 }
3278 }
3279
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X16,batch_lt_16)3280 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X16, batch_lt_16) {
3281 TEST_REQUIRES_ARM_NEON_FMA;
3282 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3283 VUnaryMicrokernelTester()
3284 .batch_size(batch_size)
3285 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x16, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3286 }
3287 }
3288
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X16,batch_gt_16)3289 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X16, batch_gt_16) {
3290 TEST_REQUIRES_ARM_NEON_FMA;
3291 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3292 VUnaryMicrokernelTester()
3293 .batch_size(batch_size)
3294 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x16, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3295 }
3296 }
3297
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X16,inplace)3298 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X16, inplace) {
3299 TEST_REQUIRES_ARM_NEON_FMA;
3300 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3301 VUnaryMicrokernelTester()
3302 .batch_size(batch_size)
3303 .inplace(true)
3304 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x16, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3305 }
3306 }
3307 #endif // XNN_ARCH_ARM64
3308
3309
3310 #if XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X20,batch_eq_20)3311 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X20, batch_eq_20) {
3312 TEST_REQUIRES_ARM_NEON_FMA;
3313 VUnaryMicrokernelTester()
3314 .batch_size(20)
3315 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x20, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3316 }
3317
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X20,batch_div_20)3318 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X20, batch_div_20) {
3319 TEST_REQUIRES_ARM_NEON_FMA;
3320 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
3321 VUnaryMicrokernelTester()
3322 .batch_size(batch_size)
3323 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x20, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3324 }
3325 }
3326
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X20,batch_lt_20)3327 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X20, batch_lt_20) {
3328 TEST_REQUIRES_ARM_NEON_FMA;
3329 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
3330 VUnaryMicrokernelTester()
3331 .batch_size(batch_size)
3332 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x20, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3333 }
3334 }
3335
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X20,batch_gt_20)3336 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X20, batch_gt_20) {
3337 TEST_REQUIRES_ARM_NEON_FMA;
3338 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
3339 VUnaryMicrokernelTester()
3340 .batch_size(batch_size)
3341 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x20, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3342 }
3343 }
3344
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X20,inplace)3345 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X20, inplace) {
3346 TEST_REQUIRES_ARM_NEON_FMA;
3347 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3348 VUnaryMicrokernelTester()
3349 .batch_size(batch_size)
3350 .inplace(true)
3351 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x20, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3352 }
3353 }
3354 #endif // XNN_ARCH_ARM64
3355
3356
3357 #if XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X24,batch_eq_24)3358 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X24, batch_eq_24) {
3359 TEST_REQUIRES_ARM_NEON_FMA;
3360 VUnaryMicrokernelTester()
3361 .batch_size(24)
3362 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x24, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3363 }
3364
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X24,batch_div_24)3365 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X24, batch_div_24) {
3366 TEST_REQUIRES_ARM_NEON_FMA;
3367 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
3368 VUnaryMicrokernelTester()
3369 .batch_size(batch_size)
3370 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x24, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3371 }
3372 }
3373
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X24,batch_lt_24)3374 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X24, batch_lt_24) {
3375 TEST_REQUIRES_ARM_NEON_FMA;
3376 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
3377 VUnaryMicrokernelTester()
3378 .batch_size(batch_size)
3379 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x24, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3380 }
3381 }
3382
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X24,batch_gt_24)3383 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X24, batch_gt_24) {
3384 TEST_REQUIRES_ARM_NEON_FMA;
3385 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
3386 VUnaryMicrokernelTester()
3387 .batch_size(batch_size)
3388 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x24, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3389 }
3390 }
3391
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X24,inplace)3392 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_DIV_X24, inplace) {
3393 TEST_REQUIRES_ARM_NEON_FMA;
3394 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3395 VUnaryMicrokernelTester()
3396 .batch_size(batch_size)
3397 .inplace(true)
3398 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x24, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3399 }
3400 }
3401 #endif // XNN_ARCH_ARM64
3402
3403
3404 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X4,batch_eq_4)3405 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X4, batch_eq_4) {
3406 TEST_REQUIRES_ARM_NEON_FMA;
3407 VUnaryMicrokernelTester()
3408 .batch_size(4)
3409 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3410 }
3411
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X4,batch_div_4)3412 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X4, batch_div_4) {
3413 TEST_REQUIRES_ARM_NEON_FMA;
3414 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
3415 VUnaryMicrokernelTester()
3416 .batch_size(batch_size)
3417 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3418 }
3419 }
3420
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X4,batch_lt_4)3421 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X4, batch_lt_4) {
3422 TEST_REQUIRES_ARM_NEON_FMA;
3423 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
3424 VUnaryMicrokernelTester()
3425 .batch_size(batch_size)
3426 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3427 }
3428 }
3429
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X4,batch_gt_4)3430 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X4, batch_gt_4) {
3431 TEST_REQUIRES_ARM_NEON_FMA;
3432 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
3433 VUnaryMicrokernelTester()
3434 .batch_size(batch_size)
3435 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3436 }
3437 }
3438
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X4,inplace)3439 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X4, inplace) {
3440 TEST_REQUIRES_ARM_NEON_FMA;
3441 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3442 VUnaryMicrokernelTester()
3443 .batch_size(batch_size)
3444 .inplace(true)
3445 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3446 }
3447 }
3448 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3449
3450
3451 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X8,batch_eq_8)3452 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X8, batch_eq_8) {
3453 TEST_REQUIRES_ARM_NEON_FMA;
3454 VUnaryMicrokernelTester()
3455 .batch_size(8)
3456 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3457 }
3458
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X8,batch_div_8)3459 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X8, batch_div_8) {
3460 TEST_REQUIRES_ARM_NEON_FMA;
3461 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
3462 VUnaryMicrokernelTester()
3463 .batch_size(batch_size)
3464 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3465 }
3466 }
3467
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X8,batch_lt_8)3468 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X8, batch_lt_8) {
3469 TEST_REQUIRES_ARM_NEON_FMA;
3470 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
3471 VUnaryMicrokernelTester()
3472 .batch_size(batch_size)
3473 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3474 }
3475 }
3476
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X8,batch_gt_8)3477 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X8, batch_gt_8) {
3478 TEST_REQUIRES_ARM_NEON_FMA;
3479 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
3480 VUnaryMicrokernelTester()
3481 .batch_size(batch_size)
3482 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3483 }
3484 }
3485
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X8,inplace)3486 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X8, inplace) {
3487 TEST_REQUIRES_ARM_NEON_FMA;
3488 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3489 VUnaryMicrokernelTester()
3490 .batch_size(batch_size)
3491 .inplace(true)
3492 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3493 }
3494 }
3495 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3496
3497
3498 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X12,batch_eq_12)3499 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X12, batch_eq_12) {
3500 TEST_REQUIRES_ARM_NEON_FMA;
3501 VUnaryMicrokernelTester()
3502 .batch_size(12)
3503 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3504 }
3505
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X12,batch_div_12)3506 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X12, batch_div_12) {
3507 TEST_REQUIRES_ARM_NEON_FMA;
3508 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
3509 VUnaryMicrokernelTester()
3510 .batch_size(batch_size)
3511 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3512 }
3513 }
3514
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X12,batch_lt_12)3515 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X12, batch_lt_12) {
3516 TEST_REQUIRES_ARM_NEON_FMA;
3517 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
3518 VUnaryMicrokernelTester()
3519 .batch_size(batch_size)
3520 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3521 }
3522 }
3523
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X12,batch_gt_12)3524 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X12, batch_gt_12) {
3525 TEST_REQUIRES_ARM_NEON_FMA;
3526 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
3527 VUnaryMicrokernelTester()
3528 .batch_size(batch_size)
3529 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3530 }
3531 }
3532
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X12,inplace)3533 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X12, inplace) {
3534 TEST_REQUIRES_ARM_NEON_FMA;
3535 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3536 VUnaryMicrokernelTester()
3537 .batch_size(batch_size)
3538 .inplace(true)
3539 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3540 }
3541 }
3542 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3543
3544
3545 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X16,batch_eq_16)3546 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X16, batch_eq_16) {
3547 TEST_REQUIRES_ARM_NEON_FMA;
3548 VUnaryMicrokernelTester()
3549 .batch_size(16)
3550 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3551 }
3552
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X16,batch_div_16)3553 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X16, batch_div_16) {
3554 TEST_REQUIRES_ARM_NEON_FMA;
3555 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
3556 VUnaryMicrokernelTester()
3557 .batch_size(batch_size)
3558 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3559 }
3560 }
3561
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X16,batch_lt_16)3562 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X16, batch_lt_16) {
3563 TEST_REQUIRES_ARM_NEON_FMA;
3564 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3565 VUnaryMicrokernelTester()
3566 .batch_size(batch_size)
3567 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3568 }
3569 }
3570
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X16,batch_gt_16)3571 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X16, batch_gt_16) {
3572 TEST_REQUIRES_ARM_NEON_FMA;
3573 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3574 VUnaryMicrokernelTester()
3575 .batch_size(batch_size)
3576 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3577 }
3578 }
3579
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X16,inplace)3580 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X16, inplace) {
3581 TEST_REQUIRES_ARM_NEON_FMA;
3582 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3583 VUnaryMicrokernelTester()
3584 .batch_size(batch_size)
3585 .inplace(true)
3586 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3587 }
3588 }
3589 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3590
3591
3592 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X20,batch_eq_20)3593 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X20, batch_eq_20) {
3594 TEST_REQUIRES_ARM_NEON_FMA;
3595 VUnaryMicrokernelTester()
3596 .batch_size(20)
3597 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3598 }
3599
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X20,batch_div_20)3600 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X20, batch_div_20) {
3601 TEST_REQUIRES_ARM_NEON_FMA;
3602 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
3603 VUnaryMicrokernelTester()
3604 .batch_size(batch_size)
3605 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3606 }
3607 }
3608
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X20,batch_lt_20)3609 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X20, batch_lt_20) {
3610 TEST_REQUIRES_ARM_NEON_FMA;
3611 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
3612 VUnaryMicrokernelTester()
3613 .batch_size(batch_size)
3614 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3615 }
3616 }
3617
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X20,batch_gt_20)3618 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X20, batch_gt_20) {
3619 TEST_REQUIRES_ARM_NEON_FMA;
3620 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
3621 VUnaryMicrokernelTester()
3622 .batch_size(batch_size)
3623 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3624 }
3625 }
3626
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X20,inplace)3627 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X20, inplace) {
3628 TEST_REQUIRES_ARM_NEON_FMA;
3629 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3630 VUnaryMicrokernelTester()
3631 .batch_size(batch_size)
3632 .inplace(true)
3633 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3634 }
3635 }
3636 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3637
3638
3639 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X24,batch_eq_24)3640 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X24, batch_eq_24) {
3641 TEST_REQUIRES_ARM_NEON_FMA;
3642 VUnaryMicrokernelTester()
3643 .batch_size(24)
3644 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3645 }
3646
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X24,batch_div_24)3647 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X24, batch_div_24) {
3648 TEST_REQUIRES_ARM_NEON_FMA;
3649 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
3650 VUnaryMicrokernelTester()
3651 .batch_size(batch_size)
3652 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3653 }
3654 }
3655
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X24,batch_lt_24)3656 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X24, batch_lt_24) {
3657 TEST_REQUIRES_ARM_NEON_FMA;
3658 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
3659 VUnaryMicrokernelTester()
3660 .batch_size(batch_size)
3661 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3662 }
3663 }
3664
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X24,batch_gt_24)3665 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X24, batch_gt_24) {
3666 TEST_REQUIRES_ARM_NEON_FMA;
3667 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
3668 VUnaryMicrokernelTester()
3669 .batch_size(batch_size)
3670 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3671 }
3672 }
3673
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X24,inplace)3674 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR1RECPS1FMA_X24, inplace) {
3675 TEST_REQUIRES_ARM_NEON_FMA;
3676 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3677 VUnaryMicrokernelTester()
3678 .batch_size(batch_size)
3679 .inplace(true)
3680 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3681 }
3682 }
3683 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3684
3685
3686 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X4,batch_eq_4)3687 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X4, batch_eq_4) {
3688 TEST_REQUIRES_ARM_NEON_FMA;
3689 VUnaryMicrokernelTester()
3690 .batch_size(4)
3691 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3692 }
3693
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X4,batch_div_4)3694 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X4, batch_div_4) {
3695 TEST_REQUIRES_ARM_NEON_FMA;
3696 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
3697 VUnaryMicrokernelTester()
3698 .batch_size(batch_size)
3699 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3700 }
3701 }
3702
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X4,batch_lt_4)3703 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X4, batch_lt_4) {
3704 TEST_REQUIRES_ARM_NEON_FMA;
3705 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
3706 VUnaryMicrokernelTester()
3707 .batch_size(batch_size)
3708 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3709 }
3710 }
3711
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X4,batch_gt_4)3712 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X4, batch_gt_4) {
3713 TEST_REQUIRES_ARM_NEON_FMA;
3714 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
3715 VUnaryMicrokernelTester()
3716 .batch_size(batch_size)
3717 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3718 }
3719 }
3720
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X4,inplace)3721 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X4, inplace) {
3722 TEST_REQUIRES_ARM_NEON_FMA;
3723 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3724 VUnaryMicrokernelTester()
3725 .batch_size(batch_size)
3726 .inplace(true)
3727 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x4, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3728 }
3729 }
3730 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3731
3732
3733 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X8,batch_eq_8)3734 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X8, batch_eq_8) {
3735 TEST_REQUIRES_ARM_NEON_FMA;
3736 VUnaryMicrokernelTester()
3737 .batch_size(8)
3738 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3739 }
3740
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X8,batch_div_8)3741 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X8, batch_div_8) {
3742 TEST_REQUIRES_ARM_NEON_FMA;
3743 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
3744 VUnaryMicrokernelTester()
3745 .batch_size(batch_size)
3746 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3747 }
3748 }
3749
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X8,batch_lt_8)3750 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X8, batch_lt_8) {
3751 TEST_REQUIRES_ARM_NEON_FMA;
3752 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
3753 VUnaryMicrokernelTester()
3754 .batch_size(batch_size)
3755 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3756 }
3757 }
3758
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X8,batch_gt_8)3759 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X8, batch_gt_8) {
3760 TEST_REQUIRES_ARM_NEON_FMA;
3761 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
3762 VUnaryMicrokernelTester()
3763 .batch_size(batch_size)
3764 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3765 }
3766 }
3767
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X8,inplace)3768 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X8, inplace) {
3769 TEST_REQUIRES_ARM_NEON_FMA;
3770 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3771 VUnaryMicrokernelTester()
3772 .batch_size(batch_size)
3773 .inplace(true)
3774 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3775 }
3776 }
3777 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3778
3779
3780 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X12,batch_eq_12)3781 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X12, batch_eq_12) {
3782 TEST_REQUIRES_ARM_NEON_FMA;
3783 VUnaryMicrokernelTester()
3784 .batch_size(12)
3785 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3786 }
3787
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X12,batch_div_12)3788 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X12, batch_div_12) {
3789 TEST_REQUIRES_ARM_NEON_FMA;
3790 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
3791 VUnaryMicrokernelTester()
3792 .batch_size(batch_size)
3793 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3794 }
3795 }
3796
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X12,batch_lt_12)3797 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X12, batch_lt_12) {
3798 TEST_REQUIRES_ARM_NEON_FMA;
3799 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
3800 VUnaryMicrokernelTester()
3801 .batch_size(batch_size)
3802 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3803 }
3804 }
3805
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X12,batch_gt_12)3806 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X12, batch_gt_12) {
3807 TEST_REQUIRES_ARM_NEON_FMA;
3808 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
3809 VUnaryMicrokernelTester()
3810 .batch_size(batch_size)
3811 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3812 }
3813 }
3814
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X12,inplace)3815 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X12, inplace) {
3816 TEST_REQUIRES_ARM_NEON_FMA;
3817 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3818 VUnaryMicrokernelTester()
3819 .batch_size(batch_size)
3820 .inplace(true)
3821 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3822 }
3823 }
3824 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3825
3826
3827 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X16,batch_eq_16)3828 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X16, batch_eq_16) {
3829 TEST_REQUIRES_ARM_NEON_FMA;
3830 VUnaryMicrokernelTester()
3831 .batch_size(16)
3832 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3833 }
3834
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X16,batch_div_16)3835 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X16, batch_div_16) {
3836 TEST_REQUIRES_ARM_NEON_FMA;
3837 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
3838 VUnaryMicrokernelTester()
3839 .batch_size(batch_size)
3840 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3841 }
3842 }
3843
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X16,batch_lt_16)3844 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X16, batch_lt_16) {
3845 TEST_REQUIRES_ARM_NEON_FMA;
3846 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3847 VUnaryMicrokernelTester()
3848 .batch_size(batch_size)
3849 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3850 }
3851 }
3852
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X16,batch_gt_16)3853 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X16, batch_gt_16) {
3854 TEST_REQUIRES_ARM_NEON_FMA;
3855 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3856 VUnaryMicrokernelTester()
3857 .batch_size(batch_size)
3858 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3859 }
3860 }
3861
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X16,inplace)3862 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X16, inplace) {
3863 TEST_REQUIRES_ARM_NEON_FMA;
3864 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3865 VUnaryMicrokernelTester()
3866 .batch_size(batch_size)
3867 .inplace(true)
3868 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3869 }
3870 }
3871 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3872
3873
3874 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X20,batch_eq_20)3875 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X20, batch_eq_20) {
3876 TEST_REQUIRES_ARM_NEON_FMA;
3877 VUnaryMicrokernelTester()
3878 .batch_size(20)
3879 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3880 }
3881
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X20,batch_div_20)3882 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X20, batch_div_20) {
3883 TEST_REQUIRES_ARM_NEON_FMA;
3884 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
3885 VUnaryMicrokernelTester()
3886 .batch_size(batch_size)
3887 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3888 }
3889 }
3890
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X20,batch_lt_20)3891 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X20, batch_lt_20) {
3892 TEST_REQUIRES_ARM_NEON_FMA;
3893 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
3894 VUnaryMicrokernelTester()
3895 .batch_size(batch_size)
3896 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3897 }
3898 }
3899
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X20,batch_gt_20)3900 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X20, batch_gt_20) {
3901 TEST_REQUIRES_ARM_NEON_FMA;
3902 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
3903 VUnaryMicrokernelTester()
3904 .batch_size(batch_size)
3905 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3906 }
3907 }
3908
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X20,inplace)3909 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X20, inplace) {
3910 TEST_REQUIRES_ARM_NEON_FMA;
3911 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3912 VUnaryMicrokernelTester()
3913 .batch_size(batch_size)
3914 .inplace(true)
3915 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3916 }
3917 }
3918 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3919
3920
3921 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X24,batch_eq_24)3922 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X24, batch_eq_24) {
3923 TEST_REQUIRES_ARM_NEON_FMA;
3924 VUnaryMicrokernelTester()
3925 .batch_size(24)
3926 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3927 }
3928
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X24,batch_div_24)3929 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X24, batch_div_24) {
3930 TEST_REQUIRES_ARM_NEON_FMA;
3931 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
3932 VUnaryMicrokernelTester()
3933 .batch_size(batch_size)
3934 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3935 }
3936 }
3937
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X24,batch_lt_24)3938 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X24, batch_lt_24) {
3939 TEST_REQUIRES_ARM_NEON_FMA;
3940 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
3941 VUnaryMicrokernelTester()
3942 .batch_size(batch_size)
3943 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3944 }
3945 }
3946
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X24,batch_gt_24)3947 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X24, batch_gt_24) {
3948 TEST_REQUIRES_ARM_NEON_FMA;
3949 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
3950 VUnaryMicrokernelTester()
3951 .batch_size(batch_size)
3952 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3953 }
3954 }
3955
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X24,inplace)3956 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2FMA_X24, inplace) {
3957 TEST_REQUIRES_ARM_NEON_FMA;
3958 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3959 VUnaryMicrokernelTester()
3960 .batch_size(batch_size)
3961 .inplace(true)
3962 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3963 }
3964 }
3965 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3966
3967
3968 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X4,batch_eq_4)3969 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X4, batch_eq_4) {
3970 TEST_REQUIRES_ARM_NEON_FMA;
3971 VUnaryMicrokernelTester()
3972 .batch_size(4)
3973 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x4, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3974 }
3975
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X4,batch_div_4)3976 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X4, batch_div_4) {
3977 TEST_REQUIRES_ARM_NEON_FMA;
3978 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
3979 VUnaryMicrokernelTester()
3980 .batch_size(batch_size)
3981 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x4, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3982 }
3983 }
3984
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X4,batch_lt_4)3985 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X4, batch_lt_4) {
3986 TEST_REQUIRES_ARM_NEON_FMA;
3987 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
3988 VUnaryMicrokernelTester()
3989 .batch_size(batch_size)
3990 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x4, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
3991 }
3992 }
3993
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X4,batch_gt_4)3994 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X4, batch_gt_4) {
3995 TEST_REQUIRES_ARM_NEON_FMA;
3996 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
3997 VUnaryMicrokernelTester()
3998 .batch_size(batch_size)
3999 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x4, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4000 }
4001 }
4002
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X4,inplace)4003 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X4, inplace) {
4004 TEST_REQUIRES_ARM_NEON_FMA;
4005 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
4006 VUnaryMicrokernelTester()
4007 .batch_size(batch_size)
4008 .inplace(true)
4009 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x4, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4010 }
4011 }
4012 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4013
4014
4015 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X8,batch_eq_8)4016 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X8, batch_eq_8) {
4017 TEST_REQUIRES_ARM_NEON_FMA;
4018 VUnaryMicrokernelTester()
4019 .batch_size(8)
4020 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4021 }
4022
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X8,batch_div_8)4023 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X8, batch_div_8) {
4024 TEST_REQUIRES_ARM_NEON_FMA;
4025 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
4026 VUnaryMicrokernelTester()
4027 .batch_size(batch_size)
4028 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4029 }
4030 }
4031
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X8,batch_lt_8)4032 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X8, batch_lt_8) {
4033 TEST_REQUIRES_ARM_NEON_FMA;
4034 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
4035 VUnaryMicrokernelTester()
4036 .batch_size(batch_size)
4037 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4038 }
4039 }
4040
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X8,batch_gt_8)4041 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X8, batch_gt_8) {
4042 TEST_REQUIRES_ARM_NEON_FMA;
4043 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
4044 VUnaryMicrokernelTester()
4045 .batch_size(batch_size)
4046 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4047 }
4048 }
4049
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X8,inplace)4050 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X8, inplace) {
4051 TEST_REQUIRES_ARM_NEON_FMA;
4052 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4053 VUnaryMicrokernelTester()
4054 .batch_size(batch_size)
4055 .inplace(true)
4056 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4057 }
4058 }
4059 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4060
4061
4062 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X12,batch_eq_12)4063 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X12, batch_eq_12) {
4064 TEST_REQUIRES_ARM_NEON_FMA;
4065 VUnaryMicrokernelTester()
4066 .batch_size(12)
4067 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4068 }
4069
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X12,batch_div_12)4070 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X12, batch_div_12) {
4071 TEST_REQUIRES_ARM_NEON_FMA;
4072 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
4073 VUnaryMicrokernelTester()
4074 .batch_size(batch_size)
4075 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4076 }
4077 }
4078
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X12,batch_lt_12)4079 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X12, batch_lt_12) {
4080 TEST_REQUIRES_ARM_NEON_FMA;
4081 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
4082 VUnaryMicrokernelTester()
4083 .batch_size(batch_size)
4084 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4085 }
4086 }
4087
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X12,batch_gt_12)4088 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X12, batch_gt_12) {
4089 TEST_REQUIRES_ARM_NEON_FMA;
4090 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
4091 VUnaryMicrokernelTester()
4092 .batch_size(batch_size)
4093 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4094 }
4095 }
4096
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X12,inplace)4097 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X12, inplace) {
4098 TEST_REQUIRES_ARM_NEON_FMA;
4099 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
4100 VUnaryMicrokernelTester()
4101 .batch_size(batch_size)
4102 .inplace(true)
4103 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4104 }
4105 }
4106 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4107
4108
4109 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X16,batch_eq_16)4110 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X16, batch_eq_16) {
4111 TEST_REQUIRES_ARM_NEON_FMA;
4112 VUnaryMicrokernelTester()
4113 .batch_size(16)
4114 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4115 }
4116
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X16,batch_div_16)4117 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X16, batch_div_16) {
4118 TEST_REQUIRES_ARM_NEON_FMA;
4119 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
4120 VUnaryMicrokernelTester()
4121 .batch_size(batch_size)
4122 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4123 }
4124 }
4125
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X16,batch_lt_16)4126 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X16, batch_lt_16) {
4127 TEST_REQUIRES_ARM_NEON_FMA;
4128 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
4129 VUnaryMicrokernelTester()
4130 .batch_size(batch_size)
4131 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4132 }
4133 }
4134
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X16,batch_gt_16)4135 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X16, batch_gt_16) {
4136 TEST_REQUIRES_ARM_NEON_FMA;
4137 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
4138 VUnaryMicrokernelTester()
4139 .batch_size(batch_size)
4140 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4141 }
4142 }
4143
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X16,inplace)4144 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X16, inplace) {
4145 TEST_REQUIRES_ARM_NEON_FMA;
4146 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4147 VUnaryMicrokernelTester()
4148 .batch_size(batch_size)
4149 .inplace(true)
4150 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4151 }
4152 }
4153 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4154
4155
4156 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X20,batch_eq_20)4157 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X20, batch_eq_20) {
4158 TEST_REQUIRES_ARM_NEON_FMA;
4159 VUnaryMicrokernelTester()
4160 .batch_size(20)
4161 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4162 }
4163
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X20,batch_div_20)4164 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X20, batch_div_20) {
4165 TEST_REQUIRES_ARM_NEON_FMA;
4166 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
4167 VUnaryMicrokernelTester()
4168 .batch_size(batch_size)
4169 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4170 }
4171 }
4172
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X20,batch_lt_20)4173 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X20, batch_lt_20) {
4174 TEST_REQUIRES_ARM_NEON_FMA;
4175 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
4176 VUnaryMicrokernelTester()
4177 .batch_size(batch_size)
4178 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4179 }
4180 }
4181
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X20,batch_gt_20)4182 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X20, batch_gt_20) {
4183 TEST_REQUIRES_ARM_NEON_FMA;
4184 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
4185 VUnaryMicrokernelTester()
4186 .batch_size(batch_size)
4187 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4188 }
4189 }
4190
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X20,inplace)4191 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X20, inplace) {
4192 TEST_REQUIRES_ARM_NEON_FMA;
4193 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
4194 VUnaryMicrokernelTester()
4195 .batch_size(batch_size)
4196 .inplace(true)
4197 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4198 }
4199 }
4200 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4201
4202
4203 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X24,batch_eq_24)4204 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X24, batch_eq_24) {
4205 TEST_REQUIRES_ARM_NEON_FMA;
4206 VUnaryMicrokernelTester()
4207 .batch_size(24)
4208 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4209 }
4210
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X24,batch_div_24)4211 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X24, batch_div_24) {
4212 TEST_REQUIRES_ARM_NEON_FMA;
4213 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
4214 VUnaryMicrokernelTester()
4215 .batch_size(batch_size)
4216 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4217 }
4218 }
4219
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X24,batch_lt_24)4220 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X24, batch_lt_24) {
4221 TEST_REQUIRES_ARM_NEON_FMA;
4222 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
4223 VUnaryMicrokernelTester()
4224 .batch_size(batch_size)
4225 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4226 }
4227 }
4228
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X24,batch_gt_24)4229 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X24, batch_gt_24) {
4230 TEST_REQUIRES_ARM_NEON_FMA;
4231 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
4232 VUnaryMicrokernelTester()
4233 .batch_size(batch_size)
4234 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4235 }
4236 }
4237
TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X24,inplace)4238 TEST(F32_VSIGMOID__NEONFMA_RR1_P5_NR2RECPS_X24, inplace) {
4239 TEST_REQUIRES_ARM_NEON_FMA;
4240 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4241 VUnaryMicrokernelTester()
4242 .batch_size(batch_size)
4243 .inplace(true)
4244 .Test(xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24, xnn_init_f32_sigmoid_neonfma_rr1_p5_params);
4245 }
4246 }
4247 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4248
4249
4250 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X4,batch_eq_4)4251 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X4, batch_eq_4) {
4252 TEST_REQUIRES_X86_SSE2;
4253 VUnaryMicrokernelTester()
4254 .batch_size(4)
4255 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x4, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4256 }
4257
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X4,batch_div_4)4258 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X4, batch_div_4) {
4259 TEST_REQUIRES_X86_SSE2;
4260 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
4261 VUnaryMicrokernelTester()
4262 .batch_size(batch_size)
4263 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x4, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4264 }
4265 }
4266
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X4,batch_lt_4)4267 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X4, batch_lt_4) {
4268 TEST_REQUIRES_X86_SSE2;
4269 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
4270 VUnaryMicrokernelTester()
4271 .batch_size(batch_size)
4272 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x4, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4273 }
4274 }
4275
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X4,batch_gt_4)4276 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X4, batch_gt_4) {
4277 TEST_REQUIRES_X86_SSE2;
4278 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
4279 VUnaryMicrokernelTester()
4280 .batch_size(batch_size)
4281 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x4, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4282 }
4283 }
4284
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X4,inplace)4285 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X4, inplace) {
4286 TEST_REQUIRES_X86_SSE2;
4287 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
4288 VUnaryMicrokernelTester()
4289 .batch_size(batch_size)
4290 .inplace(true)
4291 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x4, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4292 }
4293 }
4294 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4295
4296
4297 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X8,batch_eq_8)4298 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X8, batch_eq_8) {
4299 TEST_REQUIRES_X86_SSE2;
4300 VUnaryMicrokernelTester()
4301 .batch_size(8)
4302 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4303 }
4304
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X8,batch_div_8)4305 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X8, batch_div_8) {
4306 TEST_REQUIRES_X86_SSE2;
4307 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
4308 VUnaryMicrokernelTester()
4309 .batch_size(batch_size)
4310 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4311 }
4312 }
4313
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X8,batch_lt_8)4314 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X8, batch_lt_8) {
4315 TEST_REQUIRES_X86_SSE2;
4316 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
4317 VUnaryMicrokernelTester()
4318 .batch_size(batch_size)
4319 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4320 }
4321 }
4322
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X8,batch_gt_8)4323 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X8, batch_gt_8) {
4324 TEST_REQUIRES_X86_SSE2;
4325 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
4326 VUnaryMicrokernelTester()
4327 .batch_size(batch_size)
4328 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4329 }
4330 }
4331
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X8,inplace)4332 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X8, inplace) {
4333 TEST_REQUIRES_X86_SSE2;
4334 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4335 VUnaryMicrokernelTester()
4336 .batch_size(batch_size)
4337 .inplace(true)
4338 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4339 }
4340 }
4341 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4342
4343
4344 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X12,batch_eq_12)4345 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X12, batch_eq_12) {
4346 TEST_REQUIRES_X86_SSE2;
4347 VUnaryMicrokernelTester()
4348 .batch_size(12)
4349 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4350 }
4351
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X12,batch_div_12)4352 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X12, batch_div_12) {
4353 TEST_REQUIRES_X86_SSE2;
4354 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
4355 VUnaryMicrokernelTester()
4356 .batch_size(batch_size)
4357 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4358 }
4359 }
4360
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X12,batch_lt_12)4361 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X12, batch_lt_12) {
4362 TEST_REQUIRES_X86_SSE2;
4363 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
4364 VUnaryMicrokernelTester()
4365 .batch_size(batch_size)
4366 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4367 }
4368 }
4369
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X12,batch_gt_12)4370 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X12, batch_gt_12) {
4371 TEST_REQUIRES_X86_SSE2;
4372 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
4373 VUnaryMicrokernelTester()
4374 .batch_size(batch_size)
4375 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4376 }
4377 }
4378
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X12,inplace)4379 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X12, inplace) {
4380 TEST_REQUIRES_X86_SSE2;
4381 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
4382 VUnaryMicrokernelTester()
4383 .batch_size(batch_size)
4384 .inplace(true)
4385 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4386 }
4387 }
4388 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4389
4390
4391 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X16,batch_eq_16)4392 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X16, batch_eq_16) {
4393 TEST_REQUIRES_X86_SSE2;
4394 VUnaryMicrokernelTester()
4395 .batch_size(16)
4396 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4397 }
4398
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X16,batch_div_16)4399 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X16, batch_div_16) {
4400 TEST_REQUIRES_X86_SSE2;
4401 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
4402 VUnaryMicrokernelTester()
4403 .batch_size(batch_size)
4404 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4405 }
4406 }
4407
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X16,batch_lt_16)4408 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X16, batch_lt_16) {
4409 TEST_REQUIRES_X86_SSE2;
4410 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
4411 VUnaryMicrokernelTester()
4412 .batch_size(batch_size)
4413 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4414 }
4415 }
4416
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X16,batch_gt_16)4417 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X16, batch_gt_16) {
4418 TEST_REQUIRES_X86_SSE2;
4419 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
4420 VUnaryMicrokernelTester()
4421 .batch_size(batch_size)
4422 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4423 }
4424 }
4425
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X16,inplace)4426 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X16, inplace) {
4427 TEST_REQUIRES_X86_SSE2;
4428 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4429 VUnaryMicrokernelTester()
4430 .batch_size(batch_size)
4431 .inplace(true)
4432 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4433 }
4434 }
4435 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4436
4437
4438 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X20,batch_eq_20)4439 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X20, batch_eq_20) {
4440 TEST_REQUIRES_X86_SSE2;
4441 VUnaryMicrokernelTester()
4442 .batch_size(20)
4443 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4444 }
4445
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X20,batch_div_20)4446 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X20, batch_div_20) {
4447 TEST_REQUIRES_X86_SSE2;
4448 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
4449 VUnaryMicrokernelTester()
4450 .batch_size(batch_size)
4451 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4452 }
4453 }
4454
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X20,batch_lt_20)4455 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X20, batch_lt_20) {
4456 TEST_REQUIRES_X86_SSE2;
4457 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
4458 VUnaryMicrokernelTester()
4459 .batch_size(batch_size)
4460 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4461 }
4462 }
4463
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X20,batch_gt_20)4464 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X20, batch_gt_20) {
4465 TEST_REQUIRES_X86_SSE2;
4466 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
4467 VUnaryMicrokernelTester()
4468 .batch_size(batch_size)
4469 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4470 }
4471 }
4472
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X20,inplace)4473 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X20, inplace) {
4474 TEST_REQUIRES_X86_SSE2;
4475 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
4476 VUnaryMicrokernelTester()
4477 .batch_size(batch_size)
4478 .inplace(true)
4479 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4480 }
4481 }
4482 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4483
4484
4485 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X24,batch_eq_24)4486 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X24, batch_eq_24) {
4487 TEST_REQUIRES_X86_SSE2;
4488 VUnaryMicrokernelTester()
4489 .batch_size(24)
4490 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4491 }
4492
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X24,batch_div_24)4493 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X24, batch_div_24) {
4494 TEST_REQUIRES_X86_SSE2;
4495 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
4496 VUnaryMicrokernelTester()
4497 .batch_size(batch_size)
4498 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4499 }
4500 }
4501
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X24,batch_lt_24)4502 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X24, batch_lt_24) {
4503 TEST_REQUIRES_X86_SSE2;
4504 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
4505 VUnaryMicrokernelTester()
4506 .batch_size(batch_size)
4507 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4508 }
4509 }
4510
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X24,batch_gt_24)4511 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X24, batch_gt_24) {
4512 TEST_REQUIRES_X86_SSE2;
4513 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
4514 VUnaryMicrokernelTester()
4515 .batch_size(batch_size)
4516 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4517 }
4518 }
4519
TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X24,inplace)4520 TEST(F32_VSIGMOID__SSE2_RR2_LUT64_P2_DIV_X24, inplace) {
4521 TEST_REQUIRES_X86_SSE2;
4522 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4523 VUnaryMicrokernelTester()
4524 .batch_size(batch_size)
4525 .inplace(true)
4526 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4527 }
4528 }
4529 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4530
4531
4532 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X4,batch_eq_4)4533 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X4, batch_eq_4) {
4534 TEST_REQUIRES_X86_SSE2;
4535 VUnaryMicrokernelTester()
4536 .batch_size(4)
4537 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x4, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4538 }
4539
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X4,batch_div_4)4540 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X4, batch_div_4) {
4541 TEST_REQUIRES_X86_SSE2;
4542 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
4543 VUnaryMicrokernelTester()
4544 .batch_size(batch_size)
4545 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x4, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4546 }
4547 }
4548
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X4,batch_lt_4)4549 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X4, batch_lt_4) {
4550 TEST_REQUIRES_X86_SSE2;
4551 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
4552 VUnaryMicrokernelTester()
4553 .batch_size(batch_size)
4554 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x4, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4555 }
4556 }
4557
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X4,batch_gt_4)4558 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X4, batch_gt_4) {
4559 TEST_REQUIRES_X86_SSE2;
4560 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
4561 VUnaryMicrokernelTester()
4562 .batch_size(batch_size)
4563 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x4, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4564 }
4565 }
4566
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X4,inplace)4567 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X4, inplace) {
4568 TEST_REQUIRES_X86_SSE2;
4569 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
4570 VUnaryMicrokernelTester()
4571 .batch_size(batch_size)
4572 .inplace(true)
4573 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x4, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4574 }
4575 }
4576 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4577
4578
4579 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X8,batch_eq_8)4580 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X8, batch_eq_8) {
4581 TEST_REQUIRES_X86_SSE2;
4582 VUnaryMicrokernelTester()
4583 .batch_size(8)
4584 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x8, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4585 }
4586
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X8,batch_div_8)4587 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X8, batch_div_8) {
4588 TEST_REQUIRES_X86_SSE2;
4589 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
4590 VUnaryMicrokernelTester()
4591 .batch_size(batch_size)
4592 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x8, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4593 }
4594 }
4595
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X8,batch_lt_8)4596 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X8, batch_lt_8) {
4597 TEST_REQUIRES_X86_SSE2;
4598 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
4599 VUnaryMicrokernelTester()
4600 .batch_size(batch_size)
4601 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x8, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4602 }
4603 }
4604
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X8,batch_gt_8)4605 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X8, batch_gt_8) {
4606 TEST_REQUIRES_X86_SSE2;
4607 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
4608 VUnaryMicrokernelTester()
4609 .batch_size(batch_size)
4610 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x8, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4611 }
4612 }
4613
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X8,inplace)4614 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X8, inplace) {
4615 TEST_REQUIRES_X86_SSE2;
4616 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4617 VUnaryMicrokernelTester()
4618 .batch_size(batch_size)
4619 .inplace(true)
4620 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x8, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4621 }
4622 }
4623 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4624
4625
4626 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X12,batch_eq_12)4627 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X12, batch_eq_12) {
4628 TEST_REQUIRES_X86_SSE2;
4629 VUnaryMicrokernelTester()
4630 .batch_size(12)
4631 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x12, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4632 }
4633
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X12,batch_div_12)4634 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X12, batch_div_12) {
4635 TEST_REQUIRES_X86_SSE2;
4636 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
4637 VUnaryMicrokernelTester()
4638 .batch_size(batch_size)
4639 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x12, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4640 }
4641 }
4642
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X12,batch_lt_12)4643 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X12, batch_lt_12) {
4644 TEST_REQUIRES_X86_SSE2;
4645 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
4646 VUnaryMicrokernelTester()
4647 .batch_size(batch_size)
4648 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x12, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4649 }
4650 }
4651
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X12,batch_gt_12)4652 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X12, batch_gt_12) {
4653 TEST_REQUIRES_X86_SSE2;
4654 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
4655 VUnaryMicrokernelTester()
4656 .batch_size(batch_size)
4657 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x12, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4658 }
4659 }
4660
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X12,inplace)4661 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X12, inplace) {
4662 TEST_REQUIRES_X86_SSE2;
4663 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
4664 VUnaryMicrokernelTester()
4665 .batch_size(batch_size)
4666 .inplace(true)
4667 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x12, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4668 }
4669 }
4670 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4671
4672
4673 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X16,batch_eq_16)4674 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X16, batch_eq_16) {
4675 TEST_REQUIRES_X86_SSE2;
4676 VUnaryMicrokernelTester()
4677 .batch_size(16)
4678 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x16, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4679 }
4680
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X16,batch_div_16)4681 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X16, batch_div_16) {
4682 TEST_REQUIRES_X86_SSE2;
4683 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
4684 VUnaryMicrokernelTester()
4685 .batch_size(batch_size)
4686 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x16, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4687 }
4688 }
4689
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X16,batch_lt_16)4690 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X16, batch_lt_16) {
4691 TEST_REQUIRES_X86_SSE2;
4692 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
4693 VUnaryMicrokernelTester()
4694 .batch_size(batch_size)
4695 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x16, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4696 }
4697 }
4698
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X16,batch_gt_16)4699 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X16, batch_gt_16) {
4700 TEST_REQUIRES_X86_SSE2;
4701 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
4702 VUnaryMicrokernelTester()
4703 .batch_size(batch_size)
4704 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x16, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4705 }
4706 }
4707
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X16,inplace)4708 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X16, inplace) {
4709 TEST_REQUIRES_X86_SSE2;
4710 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4711 VUnaryMicrokernelTester()
4712 .batch_size(batch_size)
4713 .inplace(true)
4714 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x16, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4715 }
4716 }
4717 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4718
4719
4720 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X20,batch_eq_20)4721 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X20, batch_eq_20) {
4722 TEST_REQUIRES_X86_SSE2;
4723 VUnaryMicrokernelTester()
4724 .batch_size(20)
4725 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4726 }
4727
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X20,batch_div_20)4728 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X20, batch_div_20) {
4729 TEST_REQUIRES_X86_SSE2;
4730 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
4731 VUnaryMicrokernelTester()
4732 .batch_size(batch_size)
4733 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4734 }
4735 }
4736
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X20,batch_lt_20)4737 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X20, batch_lt_20) {
4738 TEST_REQUIRES_X86_SSE2;
4739 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
4740 VUnaryMicrokernelTester()
4741 .batch_size(batch_size)
4742 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4743 }
4744 }
4745
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X20,batch_gt_20)4746 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X20, batch_gt_20) {
4747 TEST_REQUIRES_X86_SSE2;
4748 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
4749 VUnaryMicrokernelTester()
4750 .batch_size(batch_size)
4751 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4752 }
4753 }
4754
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X20,inplace)4755 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X20, inplace) {
4756 TEST_REQUIRES_X86_SSE2;
4757 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
4758 VUnaryMicrokernelTester()
4759 .batch_size(batch_size)
4760 .inplace(true)
4761 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4762 }
4763 }
4764 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4765
4766
4767 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X24,batch_eq_24)4768 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X24, batch_eq_24) {
4769 TEST_REQUIRES_X86_SSE2;
4770 VUnaryMicrokernelTester()
4771 .batch_size(24)
4772 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x24, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4773 }
4774
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X24,batch_div_24)4775 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X24, batch_div_24) {
4776 TEST_REQUIRES_X86_SSE2;
4777 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
4778 VUnaryMicrokernelTester()
4779 .batch_size(batch_size)
4780 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x24, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4781 }
4782 }
4783
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X24,batch_lt_24)4784 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X24, batch_lt_24) {
4785 TEST_REQUIRES_X86_SSE2;
4786 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
4787 VUnaryMicrokernelTester()
4788 .batch_size(batch_size)
4789 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x24, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4790 }
4791 }
4792
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X24,batch_gt_24)4793 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X24, batch_gt_24) {
4794 TEST_REQUIRES_X86_SSE2;
4795 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
4796 VUnaryMicrokernelTester()
4797 .batch_size(batch_size)
4798 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x24, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4799 }
4800 }
4801
TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X24,inplace)4802 TEST(F32_VSIGMOID__SSE2_RR2_P5_DIV_X24, inplace) {
4803 TEST_REQUIRES_X86_SSE2;
4804 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4805 VUnaryMicrokernelTester()
4806 .batch_size(batch_size)
4807 .inplace(true)
4808 .Test(xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x24, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
4809 }
4810 }
4811 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4812
4813
4814 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X4,batch_eq_4)4815 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X4, batch_eq_4) {
4816 TEST_REQUIRES_X86_SSE41;
4817 VUnaryMicrokernelTester()
4818 .batch_size(4)
4819 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x4, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4820 }
4821
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X4,batch_div_4)4822 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X4, batch_div_4) {
4823 TEST_REQUIRES_X86_SSE41;
4824 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
4825 VUnaryMicrokernelTester()
4826 .batch_size(batch_size)
4827 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x4, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4828 }
4829 }
4830
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X4,batch_lt_4)4831 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X4, batch_lt_4) {
4832 TEST_REQUIRES_X86_SSE41;
4833 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
4834 VUnaryMicrokernelTester()
4835 .batch_size(batch_size)
4836 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x4, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4837 }
4838 }
4839
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X4,batch_gt_4)4840 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X4, batch_gt_4) {
4841 TEST_REQUIRES_X86_SSE41;
4842 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
4843 VUnaryMicrokernelTester()
4844 .batch_size(batch_size)
4845 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x4, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4846 }
4847 }
4848
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X4,inplace)4849 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X4, inplace) {
4850 TEST_REQUIRES_X86_SSE41;
4851 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
4852 VUnaryMicrokernelTester()
4853 .batch_size(batch_size)
4854 .inplace(true)
4855 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x4, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4856 }
4857 }
4858 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4859
4860
4861 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X8,batch_eq_8)4862 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X8, batch_eq_8) {
4863 TEST_REQUIRES_X86_SSE41;
4864 VUnaryMicrokernelTester()
4865 .batch_size(8)
4866 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4867 }
4868
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X8,batch_div_8)4869 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X8, batch_div_8) {
4870 TEST_REQUIRES_X86_SSE41;
4871 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
4872 VUnaryMicrokernelTester()
4873 .batch_size(batch_size)
4874 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4875 }
4876 }
4877
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X8,batch_lt_8)4878 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X8, batch_lt_8) {
4879 TEST_REQUIRES_X86_SSE41;
4880 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
4881 VUnaryMicrokernelTester()
4882 .batch_size(batch_size)
4883 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4884 }
4885 }
4886
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X8,batch_gt_8)4887 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X8, batch_gt_8) {
4888 TEST_REQUIRES_X86_SSE41;
4889 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
4890 VUnaryMicrokernelTester()
4891 .batch_size(batch_size)
4892 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4893 }
4894 }
4895
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X8,inplace)4896 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X8, inplace) {
4897 TEST_REQUIRES_X86_SSE41;
4898 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4899 VUnaryMicrokernelTester()
4900 .batch_size(batch_size)
4901 .inplace(true)
4902 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4903 }
4904 }
4905 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4906
4907
4908 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X12,batch_eq_12)4909 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X12, batch_eq_12) {
4910 TEST_REQUIRES_X86_SSE41;
4911 VUnaryMicrokernelTester()
4912 .batch_size(12)
4913 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4914 }
4915
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X12,batch_div_12)4916 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X12, batch_div_12) {
4917 TEST_REQUIRES_X86_SSE41;
4918 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
4919 VUnaryMicrokernelTester()
4920 .batch_size(batch_size)
4921 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4922 }
4923 }
4924
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X12,batch_lt_12)4925 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X12, batch_lt_12) {
4926 TEST_REQUIRES_X86_SSE41;
4927 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
4928 VUnaryMicrokernelTester()
4929 .batch_size(batch_size)
4930 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4931 }
4932 }
4933
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X12,batch_gt_12)4934 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X12, batch_gt_12) {
4935 TEST_REQUIRES_X86_SSE41;
4936 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
4937 VUnaryMicrokernelTester()
4938 .batch_size(batch_size)
4939 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4940 }
4941 }
4942
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X12,inplace)4943 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X12, inplace) {
4944 TEST_REQUIRES_X86_SSE41;
4945 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
4946 VUnaryMicrokernelTester()
4947 .batch_size(batch_size)
4948 .inplace(true)
4949 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4950 }
4951 }
4952 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4953
4954
4955 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X16,batch_eq_16)4956 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X16, batch_eq_16) {
4957 TEST_REQUIRES_X86_SSE41;
4958 VUnaryMicrokernelTester()
4959 .batch_size(16)
4960 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4961 }
4962
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X16,batch_div_16)4963 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X16, batch_div_16) {
4964 TEST_REQUIRES_X86_SSE41;
4965 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
4966 VUnaryMicrokernelTester()
4967 .batch_size(batch_size)
4968 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4969 }
4970 }
4971
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X16,batch_lt_16)4972 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X16, batch_lt_16) {
4973 TEST_REQUIRES_X86_SSE41;
4974 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
4975 VUnaryMicrokernelTester()
4976 .batch_size(batch_size)
4977 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4978 }
4979 }
4980
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X16,batch_gt_16)4981 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X16, batch_gt_16) {
4982 TEST_REQUIRES_X86_SSE41;
4983 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
4984 VUnaryMicrokernelTester()
4985 .batch_size(batch_size)
4986 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4987 }
4988 }
4989
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X16,inplace)4990 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X16, inplace) {
4991 TEST_REQUIRES_X86_SSE41;
4992 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4993 VUnaryMicrokernelTester()
4994 .batch_size(batch_size)
4995 .inplace(true)
4996 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
4997 }
4998 }
4999 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5000
5001
5002 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X20,batch_eq_20)5003 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X20, batch_eq_20) {
5004 TEST_REQUIRES_X86_SSE41;
5005 VUnaryMicrokernelTester()
5006 .batch_size(20)
5007 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
5008 }
5009
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X20,batch_div_20)5010 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X20, batch_div_20) {
5011 TEST_REQUIRES_X86_SSE41;
5012 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
5013 VUnaryMicrokernelTester()
5014 .batch_size(batch_size)
5015 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
5016 }
5017 }
5018
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X20,batch_lt_20)5019 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X20, batch_lt_20) {
5020 TEST_REQUIRES_X86_SSE41;
5021 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
5022 VUnaryMicrokernelTester()
5023 .batch_size(batch_size)
5024 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
5025 }
5026 }
5027
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X20,batch_gt_20)5028 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X20, batch_gt_20) {
5029 TEST_REQUIRES_X86_SSE41;
5030 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
5031 VUnaryMicrokernelTester()
5032 .batch_size(batch_size)
5033 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
5034 }
5035 }
5036
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X20,inplace)5037 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X20, inplace) {
5038 TEST_REQUIRES_X86_SSE41;
5039 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
5040 VUnaryMicrokernelTester()
5041 .batch_size(batch_size)
5042 .inplace(true)
5043 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
5044 }
5045 }
5046 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5047
5048
5049 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X24,batch_eq_24)5050 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X24, batch_eq_24) {
5051 TEST_REQUIRES_X86_SSE41;
5052 VUnaryMicrokernelTester()
5053 .batch_size(24)
5054 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
5055 }
5056
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X24,batch_div_24)5057 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X24, batch_div_24) {
5058 TEST_REQUIRES_X86_SSE41;
5059 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
5060 VUnaryMicrokernelTester()
5061 .batch_size(batch_size)
5062 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
5063 }
5064 }
5065
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X24,batch_lt_24)5066 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X24, batch_lt_24) {
5067 TEST_REQUIRES_X86_SSE41;
5068 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
5069 VUnaryMicrokernelTester()
5070 .batch_size(batch_size)
5071 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
5072 }
5073 }
5074
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X24,batch_gt_24)5075 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X24, batch_gt_24) {
5076 TEST_REQUIRES_X86_SSE41;
5077 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
5078 VUnaryMicrokernelTester()
5079 .batch_size(batch_size)
5080 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
5081 }
5082 }
5083
TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X24,inplace)5084 TEST(F32_VSIGMOID__SSE41_RR2_LUT64_P2_DIV_X24, inplace) {
5085 TEST_REQUIRES_X86_SSE41;
5086 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5087 VUnaryMicrokernelTester()
5088 .batch_size(batch_size)
5089 .inplace(true)
5090 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24, xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params);
5091 }
5092 }
5093 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5094
5095
5096 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X4,batch_eq_4)5097 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X4, batch_eq_4) {
5098 TEST_REQUIRES_X86_SSE41;
5099 VUnaryMicrokernelTester()
5100 .batch_size(4)
5101 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x4, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5102 }
5103
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X4,batch_div_4)5104 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X4, batch_div_4) {
5105 TEST_REQUIRES_X86_SSE41;
5106 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
5107 VUnaryMicrokernelTester()
5108 .batch_size(batch_size)
5109 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x4, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5110 }
5111 }
5112
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X4,batch_lt_4)5113 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X4, batch_lt_4) {
5114 TEST_REQUIRES_X86_SSE41;
5115 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
5116 VUnaryMicrokernelTester()
5117 .batch_size(batch_size)
5118 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x4, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5119 }
5120 }
5121
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X4,batch_gt_4)5122 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X4, batch_gt_4) {
5123 TEST_REQUIRES_X86_SSE41;
5124 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
5125 VUnaryMicrokernelTester()
5126 .batch_size(batch_size)
5127 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x4, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5128 }
5129 }
5130
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X4,inplace)5131 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X4, inplace) {
5132 TEST_REQUIRES_X86_SSE41;
5133 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
5134 VUnaryMicrokernelTester()
5135 .batch_size(batch_size)
5136 .inplace(true)
5137 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x4, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5138 }
5139 }
5140 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5141
5142
5143 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X8,batch_eq_8)5144 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X8, batch_eq_8) {
5145 TEST_REQUIRES_X86_SSE41;
5146 VUnaryMicrokernelTester()
5147 .batch_size(8)
5148 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x8, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5149 }
5150
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X8,batch_div_8)5151 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X8, batch_div_8) {
5152 TEST_REQUIRES_X86_SSE41;
5153 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
5154 VUnaryMicrokernelTester()
5155 .batch_size(batch_size)
5156 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x8, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5157 }
5158 }
5159
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X8,batch_lt_8)5160 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X8, batch_lt_8) {
5161 TEST_REQUIRES_X86_SSE41;
5162 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
5163 VUnaryMicrokernelTester()
5164 .batch_size(batch_size)
5165 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x8, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5166 }
5167 }
5168
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X8,batch_gt_8)5169 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X8, batch_gt_8) {
5170 TEST_REQUIRES_X86_SSE41;
5171 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
5172 VUnaryMicrokernelTester()
5173 .batch_size(batch_size)
5174 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x8, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5175 }
5176 }
5177
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X8,inplace)5178 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X8, inplace) {
5179 TEST_REQUIRES_X86_SSE41;
5180 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5181 VUnaryMicrokernelTester()
5182 .batch_size(batch_size)
5183 .inplace(true)
5184 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x8, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5185 }
5186 }
5187 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5188
5189
5190 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X12,batch_eq_12)5191 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X12, batch_eq_12) {
5192 TEST_REQUIRES_X86_SSE41;
5193 VUnaryMicrokernelTester()
5194 .batch_size(12)
5195 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x12, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5196 }
5197
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X12,batch_div_12)5198 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X12, batch_div_12) {
5199 TEST_REQUIRES_X86_SSE41;
5200 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
5201 VUnaryMicrokernelTester()
5202 .batch_size(batch_size)
5203 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x12, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5204 }
5205 }
5206
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X12,batch_lt_12)5207 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X12, batch_lt_12) {
5208 TEST_REQUIRES_X86_SSE41;
5209 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
5210 VUnaryMicrokernelTester()
5211 .batch_size(batch_size)
5212 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x12, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5213 }
5214 }
5215
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X12,batch_gt_12)5216 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X12, batch_gt_12) {
5217 TEST_REQUIRES_X86_SSE41;
5218 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
5219 VUnaryMicrokernelTester()
5220 .batch_size(batch_size)
5221 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x12, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5222 }
5223 }
5224
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X12,inplace)5225 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X12, inplace) {
5226 TEST_REQUIRES_X86_SSE41;
5227 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
5228 VUnaryMicrokernelTester()
5229 .batch_size(batch_size)
5230 .inplace(true)
5231 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x12, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5232 }
5233 }
5234 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5235
5236
5237 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X16,batch_eq_16)5238 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X16, batch_eq_16) {
5239 TEST_REQUIRES_X86_SSE41;
5240 VUnaryMicrokernelTester()
5241 .batch_size(16)
5242 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x16, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5243 }
5244
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X16,batch_div_16)5245 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X16, batch_div_16) {
5246 TEST_REQUIRES_X86_SSE41;
5247 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
5248 VUnaryMicrokernelTester()
5249 .batch_size(batch_size)
5250 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x16, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5251 }
5252 }
5253
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X16,batch_lt_16)5254 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X16, batch_lt_16) {
5255 TEST_REQUIRES_X86_SSE41;
5256 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
5257 VUnaryMicrokernelTester()
5258 .batch_size(batch_size)
5259 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x16, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5260 }
5261 }
5262
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X16,batch_gt_16)5263 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X16, batch_gt_16) {
5264 TEST_REQUIRES_X86_SSE41;
5265 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
5266 VUnaryMicrokernelTester()
5267 .batch_size(batch_size)
5268 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x16, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5269 }
5270 }
5271
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X16,inplace)5272 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X16, inplace) {
5273 TEST_REQUIRES_X86_SSE41;
5274 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5275 VUnaryMicrokernelTester()
5276 .batch_size(batch_size)
5277 .inplace(true)
5278 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x16, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5279 }
5280 }
5281 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5282
5283
5284 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X20,batch_eq_20)5285 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X20, batch_eq_20) {
5286 TEST_REQUIRES_X86_SSE41;
5287 VUnaryMicrokernelTester()
5288 .batch_size(20)
5289 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5290 }
5291
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X20,batch_div_20)5292 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X20, batch_div_20) {
5293 TEST_REQUIRES_X86_SSE41;
5294 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
5295 VUnaryMicrokernelTester()
5296 .batch_size(batch_size)
5297 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5298 }
5299 }
5300
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X20,batch_lt_20)5301 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X20, batch_lt_20) {
5302 TEST_REQUIRES_X86_SSE41;
5303 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
5304 VUnaryMicrokernelTester()
5305 .batch_size(batch_size)
5306 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5307 }
5308 }
5309
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X20,batch_gt_20)5310 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X20, batch_gt_20) {
5311 TEST_REQUIRES_X86_SSE41;
5312 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
5313 VUnaryMicrokernelTester()
5314 .batch_size(batch_size)
5315 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5316 }
5317 }
5318
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X20,inplace)5319 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X20, inplace) {
5320 TEST_REQUIRES_X86_SSE41;
5321 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
5322 VUnaryMicrokernelTester()
5323 .batch_size(batch_size)
5324 .inplace(true)
5325 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5326 }
5327 }
5328 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5329
5330
5331 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X24,batch_eq_24)5332 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X24, batch_eq_24) {
5333 TEST_REQUIRES_X86_SSE41;
5334 VUnaryMicrokernelTester()
5335 .batch_size(24)
5336 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5337 }
5338
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X24,batch_div_24)5339 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X24, batch_div_24) {
5340 TEST_REQUIRES_X86_SSE41;
5341 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
5342 VUnaryMicrokernelTester()
5343 .batch_size(batch_size)
5344 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5345 }
5346 }
5347
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X24,batch_lt_24)5348 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X24, batch_lt_24) {
5349 TEST_REQUIRES_X86_SSE41;
5350 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
5351 VUnaryMicrokernelTester()
5352 .batch_size(batch_size)
5353 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5354 }
5355 }
5356
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X24,batch_gt_24)5357 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X24, batch_gt_24) {
5358 TEST_REQUIRES_X86_SSE41;
5359 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
5360 VUnaryMicrokernelTester()
5361 .batch_size(batch_size)
5362 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5363 }
5364 }
5365
TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X24,inplace)5366 TEST(F32_VSIGMOID__SSE41_RR2_P5_DIV_X24, inplace) {
5367 TEST_REQUIRES_X86_SSE41;
5368 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5369 VUnaryMicrokernelTester()
5370 .batch_size(batch_size)
5371 .inplace(true)
5372 .Test(xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24, xnn_init_f32_sigmoid_sse2_rr2_p5_params);
5373 }
5374 }
5375 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5376
5377
5378 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X8,batch_eq_8)5379 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X8, batch_eq_8) {
5380 TEST_REQUIRES_X86_AVX;
5381 VUnaryMicrokernelTester()
5382 .batch_size(8)
5383 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x8, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5384 }
5385
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X8,batch_div_8)5386 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X8, batch_div_8) {
5387 TEST_REQUIRES_X86_AVX;
5388 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
5389 VUnaryMicrokernelTester()
5390 .batch_size(batch_size)
5391 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x8, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5392 }
5393 }
5394
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X8,batch_lt_8)5395 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X8, batch_lt_8) {
5396 TEST_REQUIRES_X86_AVX;
5397 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
5398 VUnaryMicrokernelTester()
5399 .batch_size(batch_size)
5400 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x8, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5401 }
5402 }
5403
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X8,batch_gt_8)5404 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X8, batch_gt_8) {
5405 TEST_REQUIRES_X86_AVX;
5406 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
5407 VUnaryMicrokernelTester()
5408 .batch_size(batch_size)
5409 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x8, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5410 }
5411 }
5412
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X8,inplace)5413 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X8, inplace) {
5414 TEST_REQUIRES_X86_AVX;
5415 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5416 VUnaryMicrokernelTester()
5417 .batch_size(batch_size)
5418 .inplace(true)
5419 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x8, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5420 }
5421 }
5422 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5423
5424
5425 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X16,batch_eq_16)5426 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X16, batch_eq_16) {
5427 TEST_REQUIRES_X86_AVX;
5428 VUnaryMicrokernelTester()
5429 .batch_size(16)
5430 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x16, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5431 }
5432
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X16,batch_div_16)5433 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X16, batch_div_16) {
5434 TEST_REQUIRES_X86_AVX;
5435 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
5436 VUnaryMicrokernelTester()
5437 .batch_size(batch_size)
5438 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x16, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5439 }
5440 }
5441
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X16,batch_lt_16)5442 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X16, batch_lt_16) {
5443 TEST_REQUIRES_X86_AVX;
5444 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
5445 VUnaryMicrokernelTester()
5446 .batch_size(batch_size)
5447 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x16, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5448 }
5449 }
5450
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X16,batch_gt_16)5451 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X16, batch_gt_16) {
5452 TEST_REQUIRES_X86_AVX;
5453 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
5454 VUnaryMicrokernelTester()
5455 .batch_size(batch_size)
5456 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x16, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5457 }
5458 }
5459
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X16,inplace)5460 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X16, inplace) {
5461 TEST_REQUIRES_X86_AVX;
5462 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5463 VUnaryMicrokernelTester()
5464 .batch_size(batch_size)
5465 .inplace(true)
5466 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x16, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5467 }
5468 }
5469 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5470
5471
5472 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X24,batch_eq_24)5473 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X24, batch_eq_24) {
5474 TEST_REQUIRES_X86_AVX;
5475 VUnaryMicrokernelTester()
5476 .batch_size(24)
5477 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x24, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5478 }
5479
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X24,batch_div_24)5480 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X24, batch_div_24) {
5481 TEST_REQUIRES_X86_AVX;
5482 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
5483 VUnaryMicrokernelTester()
5484 .batch_size(batch_size)
5485 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x24, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5486 }
5487 }
5488
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X24,batch_lt_24)5489 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X24, batch_lt_24) {
5490 TEST_REQUIRES_X86_AVX;
5491 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
5492 VUnaryMicrokernelTester()
5493 .batch_size(batch_size)
5494 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x24, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5495 }
5496 }
5497
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X24,batch_gt_24)5498 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X24, batch_gt_24) {
5499 TEST_REQUIRES_X86_AVX;
5500 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
5501 VUnaryMicrokernelTester()
5502 .batch_size(batch_size)
5503 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x24, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5504 }
5505 }
5506
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X24,inplace)5507 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X24, inplace) {
5508 TEST_REQUIRES_X86_AVX;
5509 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5510 VUnaryMicrokernelTester()
5511 .batch_size(batch_size)
5512 .inplace(true)
5513 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x24, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5514 }
5515 }
5516 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5517
5518
5519 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X32,batch_eq_32)5520 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X32, batch_eq_32) {
5521 TEST_REQUIRES_X86_AVX;
5522 VUnaryMicrokernelTester()
5523 .batch_size(32)
5524 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x32, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5525 }
5526
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X32,batch_div_32)5527 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X32, batch_div_32) {
5528 TEST_REQUIRES_X86_AVX;
5529 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
5530 VUnaryMicrokernelTester()
5531 .batch_size(batch_size)
5532 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x32, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5533 }
5534 }
5535
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X32,batch_lt_32)5536 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X32, batch_lt_32) {
5537 TEST_REQUIRES_X86_AVX;
5538 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
5539 VUnaryMicrokernelTester()
5540 .batch_size(batch_size)
5541 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x32, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5542 }
5543 }
5544
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X32,batch_gt_32)5545 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X32, batch_gt_32) {
5546 TEST_REQUIRES_X86_AVX;
5547 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
5548 VUnaryMicrokernelTester()
5549 .batch_size(batch_size)
5550 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x32, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5551 }
5552 }
5553
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X32,inplace)5554 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X32, inplace) {
5555 TEST_REQUIRES_X86_AVX;
5556 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5557 VUnaryMicrokernelTester()
5558 .batch_size(batch_size)
5559 .inplace(true)
5560 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x32, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5561 }
5562 }
5563 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5564
5565
5566 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X40,batch_eq_40)5567 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X40, batch_eq_40) {
5568 TEST_REQUIRES_X86_AVX;
5569 VUnaryMicrokernelTester()
5570 .batch_size(40)
5571 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x40, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5572 }
5573
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X40,batch_div_40)5574 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X40, batch_div_40) {
5575 TEST_REQUIRES_X86_AVX;
5576 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
5577 VUnaryMicrokernelTester()
5578 .batch_size(batch_size)
5579 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x40, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5580 }
5581 }
5582
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X40,batch_lt_40)5583 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X40, batch_lt_40) {
5584 TEST_REQUIRES_X86_AVX;
5585 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
5586 VUnaryMicrokernelTester()
5587 .batch_size(batch_size)
5588 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x40, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5589 }
5590 }
5591
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X40,batch_gt_40)5592 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X40, batch_gt_40) {
5593 TEST_REQUIRES_X86_AVX;
5594 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
5595 VUnaryMicrokernelTester()
5596 .batch_size(batch_size)
5597 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x40, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5598 }
5599 }
5600
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X40,inplace)5601 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X40, inplace) {
5602 TEST_REQUIRES_X86_AVX;
5603 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5604 VUnaryMicrokernelTester()
5605 .batch_size(batch_size)
5606 .inplace(true)
5607 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x40, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5608 }
5609 }
5610 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5611
5612
5613 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X48,batch_eq_48)5614 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X48, batch_eq_48) {
5615 TEST_REQUIRES_X86_AVX;
5616 VUnaryMicrokernelTester()
5617 .batch_size(48)
5618 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x48, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5619 }
5620
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X48,batch_div_48)5621 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X48, batch_div_48) {
5622 TEST_REQUIRES_X86_AVX;
5623 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
5624 VUnaryMicrokernelTester()
5625 .batch_size(batch_size)
5626 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x48, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5627 }
5628 }
5629
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X48,batch_lt_48)5630 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X48, batch_lt_48) {
5631 TEST_REQUIRES_X86_AVX;
5632 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
5633 VUnaryMicrokernelTester()
5634 .batch_size(batch_size)
5635 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x48, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5636 }
5637 }
5638
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X48,batch_gt_48)5639 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X48, batch_gt_48) {
5640 TEST_REQUIRES_X86_AVX;
5641 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
5642 VUnaryMicrokernelTester()
5643 .batch_size(batch_size)
5644 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x48, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5645 }
5646 }
5647
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X48,inplace)5648 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X48, inplace) {
5649 TEST_REQUIRES_X86_AVX;
5650 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5651 VUnaryMicrokernelTester()
5652 .batch_size(batch_size)
5653 .inplace(true)
5654 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x48, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5655 }
5656 }
5657 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5658
5659
5660 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X56,batch_eq_56)5661 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X56, batch_eq_56) {
5662 TEST_REQUIRES_X86_AVX;
5663 VUnaryMicrokernelTester()
5664 .batch_size(56)
5665 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x56, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5666 }
5667
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X56,batch_div_56)5668 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X56, batch_div_56) {
5669 TEST_REQUIRES_X86_AVX;
5670 for (size_t batch_size = 112; batch_size < 560; batch_size += 56) {
5671 VUnaryMicrokernelTester()
5672 .batch_size(batch_size)
5673 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x56, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5674 }
5675 }
5676
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X56,batch_lt_56)5677 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X56, batch_lt_56) {
5678 TEST_REQUIRES_X86_AVX;
5679 for (size_t batch_size = 1; batch_size < 56; batch_size++) {
5680 VUnaryMicrokernelTester()
5681 .batch_size(batch_size)
5682 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x56, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5683 }
5684 }
5685
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X56,batch_gt_56)5686 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X56, batch_gt_56) {
5687 TEST_REQUIRES_X86_AVX;
5688 for (size_t batch_size = 57; batch_size < 112; batch_size++) {
5689 VUnaryMicrokernelTester()
5690 .batch_size(batch_size)
5691 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x56, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5692 }
5693 }
5694
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X56,inplace)5695 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X56, inplace) {
5696 TEST_REQUIRES_X86_AVX;
5697 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
5698 VUnaryMicrokernelTester()
5699 .batch_size(batch_size)
5700 .inplace(true)
5701 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x56, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5702 }
5703 }
5704 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5705
5706
5707 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X64,batch_eq_64)5708 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X64, batch_eq_64) {
5709 TEST_REQUIRES_X86_AVX;
5710 VUnaryMicrokernelTester()
5711 .batch_size(64)
5712 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x64, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5713 }
5714
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X64,batch_div_64)5715 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X64, batch_div_64) {
5716 TEST_REQUIRES_X86_AVX;
5717 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
5718 VUnaryMicrokernelTester()
5719 .batch_size(batch_size)
5720 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x64, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5721 }
5722 }
5723
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X64,batch_lt_64)5724 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X64, batch_lt_64) {
5725 TEST_REQUIRES_X86_AVX;
5726 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
5727 VUnaryMicrokernelTester()
5728 .batch_size(batch_size)
5729 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x64, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5730 }
5731 }
5732
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X64,batch_gt_64)5733 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X64, batch_gt_64) {
5734 TEST_REQUIRES_X86_AVX;
5735 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
5736 VUnaryMicrokernelTester()
5737 .batch_size(batch_size)
5738 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x64, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5739 }
5740 }
5741
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X64,inplace)5742 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X64, inplace) {
5743 TEST_REQUIRES_X86_AVX;
5744 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
5745 VUnaryMicrokernelTester()
5746 .batch_size(batch_size)
5747 .inplace(true)
5748 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x64, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5749 }
5750 }
5751 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5752
5753
5754 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X72,batch_eq_72)5755 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X72, batch_eq_72) {
5756 TEST_REQUIRES_X86_AVX;
5757 VUnaryMicrokernelTester()
5758 .batch_size(72)
5759 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x72, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5760 }
5761
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X72,batch_div_72)5762 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X72, batch_div_72) {
5763 TEST_REQUIRES_X86_AVX;
5764 for (size_t batch_size = 144; batch_size < 720; batch_size += 72) {
5765 VUnaryMicrokernelTester()
5766 .batch_size(batch_size)
5767 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x72, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5768 }
5769 }
5770
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X72,batch_lt_72)5771 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X72, batch_lt_72) {
5772 TEST_REQUIRES_X86_AVX;
5773 for (size_t batch_size = 1; batch_size < 72; batch_size++) {
5774 VUnaryMicrokernelTester()
5775 .batch_size(batch_size)
5776 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x72, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5777 }
5778 }
5779
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X72,batch_gt_72)5780 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X72, batch_gt_72) {
5781 TEST_REQUIRES_X86_AVX;
5782 for (size_t batch_size = 73; batch_size < 144; batch_size++) {
5783 VUnaryMicrokernelTester()
5784 .batch_size(batch_size)
5785 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x72, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5786 }
5787 }
5788
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X72,inplace)5789 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X72, inplace) {
5790 TEST_REQUIRES_X86_AVX;
5791 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
5792 VUnaryMicrokernelTester()
5793 .batch_size(batch_size)
5794 .inplace(true)
5795 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x72, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5796 }
5797 }
5798 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5799
5800
5801 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X80,batch_eq_80)5802 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X80, batch_eq_80) {
5803 TEST_REQUIRES_X86_AVX;
5804 VUnaryMicrokernelTester()
5805 .batch_size(80)
5806 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x80, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5807 }
5808
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X80,batch_div_80)5809 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X80, batch_div_80) {
5810 TEST_REQUIRES_X86_AVX;
5811 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
5812 VUnaryMicrokernelTester()
5813 .batch_size(batch_size)
5814 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x80, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5815 }
5816 }
5817
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X80,batch_lt_80)5818 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X80, batch_lt_80) {
5819 TEST_REQUIRES_X86_AVX;
5820 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
5821 VUnaryMicrokernelTester()
5822 .batch_size(batch_size)
5823 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x80, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5824 }
5825 }
5826
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X80,batch_gt_80)5827 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X80, batch_gt_80) {
5828 TEST_REQUIRES_X86_AVX;
5829 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
5830 VUnaryMicrokernelTester()
5831 .batch_size(batch_size)
5832 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x80, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5833 }
5834 }
5835
TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X80,inplace)5836 TEST(F32_VSIGMOID__AVX_RR2_P5_DIV_X80, inplace) {
5837 TEST_REQUIRES_X86_AVX;
5838 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
5839 VUnaryMicrokernelTester()
5840 .batch_size(batch_size)
5841 .inplace(true)
5842 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x80, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5843 }
5844 }
5845 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5846
5847
5848 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X8,batch_eq_8)5849 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X8, batch_eq_8) {
5850 TEST_REQUIRES_X86_AVX;
5851 VUnaryMicrokernelTester()
5852 .batch_size(8)
5853 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x8, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5854 }
5855
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X8,batch_div_8)5856 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X8, batch_div_8) {
5857 TEST_REQUIRES_X86_AVX;
5858 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
5859 VUnaryMicrokernelTester()
5860 .batch_size(batch_size)
5861 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x8, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5862 }
5863 }
5864
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X8,batch_lt_8)5865 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X8, batch_lt_8) {
5866 TEST_REQUIRES_X86_AVX;
5867 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
5868 VUnaryMicrokernelTester()
5869 .batch_size(batch_size)
5870 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x8, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5871 }
5872 }
5873
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X8,batch_gt_8)5874 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X8, batch_gt_8) {
5875 TEST_REQUIRES_X86_AVX;
5876 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
5877 VUnaryMicrokernelTester()
5878 .batch_size(batch_size)
5879 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x8, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5880 }
5881 }
5882
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X8,inplace)5883 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X8, inplace) {
5884 TEST_REQUIRES_X86_AVX;
5885 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5886 VUnaryMicrokernelTester()
5887 .batch_size(batch_size)
5888 .inplace(true)
5889 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x8, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5890 }
5891 }
5892 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5893
5894
5895 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X16,batch_eq_16)5896 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X16, batch_eq_16) {
5897 TEST_REQUIRES_X86_AVX;
5898 VUnaryMicrokernelTester()
5899 .batch_size(16)
5900 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x16, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5901 }
5902
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X16,batch_div_16)5903 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X16, batch_div_16) {
5904 TEST_REQUIRES_X86_AVX;
5905 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
5906 VUnaryMicrokernelTester()
5907 .batch_size(batch_size)
5908 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x16, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5909 }
5910 }
5911
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X16,batch_lt_16)5912 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X16, batch_lt_16) {
5913 TEST_REQUIRES_X86_AVX;
5914 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
5915 VUnaryMicrokernelTester()
5916 .batch_size(batch_size)
5917 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x16, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5918 }
5919 }
5920
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X16,batch_gt_16)5921 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X16, batch_gt_16) {
5922 TEST_REQUIRES_X86_AVX;
5923 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
5924 VUnaryMicrokernelTester()
5925 .batch_size(batch_size)
5926 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x16, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5927 }
5928 }
5929
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X16,inplace)5930 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X16, inplace) {
5931 TEST_REQUIRES_X86_AVX;
5932 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5933 VUnaryMicrokernelTester()
5934 .batch_size(batch_size)
5935 .inplace(true)
5936 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x16, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5937 }
5938 }
5939 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5940
5941
5942 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X24,batch_eq_24)5943 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X24, batch_eq_24) {
5944 TEST_REQUIRES_X86_AVX;
5945 VUnaryMicrokernelTester()
5946 .batch_size(24)
5947 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x24, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5948 }
5949
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X24,batch_div_24)5950 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X24, batch_div_24) {
5951 TEST_REQUIRES_X86_AVX;
5952 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
5953 VUnaryMicrokernelTester()
5954 .batch_size(batch_size)
5955 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x24, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5956 }
5957 }
5958
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X24,batch_lt_24)5959 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X24, batch_lt_24) {
5960 TEST_REQUIRES_X86_AVX;
5961 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
5962 VUnaryMicrokernelTester()
5963 .batch_size(batch_size)
5964 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x24, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5965 }
5966 }
5967
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X24,batch_gt_24)5968 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X24, batch_gt_24) {
5969 TEST_REQUIRES_X86_AVX;
5970 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
5971 VUnaryMicrokernelTester()
5972 .batch_size(batch_size)
5973 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x24, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5974 }
5975 }
5976
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X24,inplace)5977 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X24, inplace) {
5978 TEST_REQUIRES_X86_AVX;
5979 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5980 VUnaryMicrokernelTester()
5981 .batch_size(batch_size)
5982 .inplace(true)
5983 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x24, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5984 }
5985 }
5986 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5987
5988
5989 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X32,batch_eq_32)5990 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X32, batch_eq_32) {
5991 TEST_REQUIRES_X86_AVX;
5992 VUnaryMicrokernelTester()
5993 .batch_size(32)
5994 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x32, xnn_init_f32_sigmoid_avx_rr2_p5_params);
5995 }
5996
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X32,batch_div_32)5997 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X32, batch_div_32) {
5998 TEST_REQUIRES_X86_AVX;
5999 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
6000 VUnaryMicrokernelTester()
6001 .batch_size(batch_size)
6002 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x32, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6003 }
6004 }
6005
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X32,batch_lt_32)6006 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X32, batch_lt_32) {
6007 TEST_REQUIRES_X86_AVX;
6008 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
6009 VUnaryMicrokernelTester()
6010 .batch_size(batch_size)
6011 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x32, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6012 }
6013 }
6014
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X32,batch_gt_32)6015 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X32, batch_gt_32) {
6016 TEST_REQUIRES_X86_AVX;
6017 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
6018 VUnaryMicrokernelTester()
6019 .batch_size(batch_size)
6020 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x32, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6021 }
6022 }
6023
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X32,inplace)6024 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X32, inplace) {
6025 TEST_REQUIRES_X86_AVX;
6026 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6027 VUnaryMicrokernelTester()
6028 .batch_size(batch_size)
6029 .inplace(true)
6030 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x32, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6031 }
6032 }
6033 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6034
6035
6036 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X40,batch_eq_40)6037 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X40, batch_eq_40) {
6038 TEST_REQUIRES_X86_AVX;
6039 VUnaryMicrokernelTester()
6040 .batch_size(40)
6041 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x40, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6042 }
6043
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X40,batch_div_40)6044 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X40, batch_div_40) {
6045 TEST_REQUIRES_X86_AVX;
6046 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
6047 VUnaryMicrokernelTester()
6048 .batch_size(batch_size)
6049 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x40, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6050 }
6051 }
6052
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X40,batch_lt_40)6053 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X40, batch_lt_40) {
6054 TEST_REQUIRES_X86_AVX;
6055 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
6056 VUnaryMicrokernelTester()
6057 .batch_size(batch_size)
6058 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x40, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6059 }
6060 }
6061
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X40,batch_gt_40)6062 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X40, batch_gt_40) {
6063 TEST_REQUIRES_X86_AVX;
6064 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
6065 VUnaryMicrokernelTester()
6066 .batch_size(batch_size)
6067 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x40, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6068 }
6069 }
6070
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X40,inplace)6071 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X40, inplace) {
6072 TEST_REQUIRES_X86_AVX;
6073 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
6074 VUnaryMicrokernelTester()
6075 .batch_size(batch_size)
6076 .inplace(true)
6077 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x40, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6078 }
6079 }
6080 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6081
6082
6083 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X48,batch_eq_48)6084 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X48, batch_eq_48) {
6085 TEST_REQUIRES_X86_AVX;
6086 VUnaryMicrokernelTester()
6087 .batch_size(48)
6088 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x48, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6089 }
6090
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X48,batch_div_48)6091 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X48, batch_div_48) {
6092 TEST_REQUIRES_X86_AVX;
6093 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
6094 VUnaryMicrokernelTester()
6095 .batch_size(batch_size)
6096 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x48, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6097 }
6098 }
6099
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X48,batch_lt_48)6100 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X48, batch_lt_48) {
6101 TEST_REQUIRES_X86_AVX;
6102 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
6103 VUnaryMicrokernelTester()
6104 .batch_size(batch_size)
6105 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x48, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6106 }
6107 }
6108
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X48,batch_gt_48)6109 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X48, batch_gt_48) {
6110 TEST_REQUIRES_X86_AVX;
6111 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
6112 VUnaryMicrokernelTester()
6113 .batch_size(batch_size)
6114 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x48, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6115 }
6116 }
6117
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X48,inplace)6118 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X48, inplace) {
6119 TEST_REQUIRES_X86_AVX;
6120 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
6121 VUnaryMicrokernelTester()
6122 .batch_size(batch_size)
6123 .inplace(true)
6124 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x48, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6125 }
6126 }
6127 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6128
6129
6130 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X56,batch_eq_56)6131 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X56, batch_eq_56) {
6132 TEST_REQUIRES_X86_AVX;
6133 VUnaryMicrokernelTester()
6134 .batch_size(56)
6135 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x56, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6136 }
6137
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X56,batch_div_56)6138 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X56, batch_div_56) {
6139 TEST_REQUIRES_X86_AVX;
6140 for (size_t batch_size = 112; batch_size < 560; batch_size += 56) {
6141 VUnaryMicrokernelTester()
6142 .batch_size(batch_size)
6143 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x56, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6144 }
6145 }
6146
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X56,batch_lt_56)6147 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X56, batch_lt_56) {
6148 TEST_REQUIRES_X86_AVX;
6149 for (size_t batch_size = 1; batch_size < 56; batch_size++) {
6150 VUnaryMicrokernelTester()
6151 .batch_size(batch_size)
6152 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x56, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6153 }
6154 }
6155
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X56,batch_gt_56)6156 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X56, batch_gt_56) {
6157 TEST_REQUIRES_X86_AVX;
6158 for (size_t batch_size = 57; batch_size < 112; batch_size++) {
6159 VUnaryMicrokernelTester()
6160 .batch_size(batch_size)
6161 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x56, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6162 }
6163 }
6164
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X56,inplace)6165 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X56, inplace) {
6166 TEST_REQUIRES_X86_AVX;
6167 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6168 VUnaryMicrokernelTester()
6169 .batch_size(batch_size)
6170 .inplace(true)
6171 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x56, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6172 }
6173 }
6174 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6175
6176
6177 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X64,batch_eq_64)6178 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X64, batch_eq_64) {
6179 TEST_REQUIRES_X86_AVX;
6180 VUnaryMicrokernelTester()
6181 .batch_size(64)
6182 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x64, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6183 }
6184
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X64,batch_div_64)6185 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X64, batch_div_64) {
6186 TEST_REQUIRES_X86_AVX;
6187 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
6188 VUnaryMicrokernelTester()
6189 .batch_size(batch_size)
6190 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x64, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6191 }
6192 }
6193
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X64,batch_lt_64)6194 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X64, batch_lt_64) {
6195 TEST_REQUIRES_X86_AVX;
6196 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
6197 VUnaryMicrokernelTester()
6198 .batch_size(batch_size)
6199 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x64, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6200 }
6201 }
6202
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X64,batch_gt_64)6203 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X64, batch_gt_64) {
6204 TEST_REQUIRES_X86_AVX;
6205 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
6206 VUnaryMicrokernelTester()
6207 .batch_size(batch_size)
6208 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x64, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6209 }
6210 }
6211
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X64,inplace)6212 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X64, inplace) {
6213 TEST_REQUIRES_X86_AVX;
6214 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6215 VUnaryMicrokernelTester()
6216 .batch_size(batch_size)
6217 .inplace(true)
6218 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x64, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6219 }
6220 }
6221 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6222
6223
6224 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X72,batch_eq_72)6225 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X72, batch_eq_72) {
6226 TEST_REQUIRES_X86_AVX;
6227 VUnaryMicrokernelTester()
6228 .batch_size(72)
6229 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x72, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6230 }
6231
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X72,batch_div_72)6232 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X72, batch_div_72) {
6233 TEST_REQUIRES_X86_AVX;
6234 for (size_t batch_size = 144; batch_size < 720; batch_size += 72) {
6235 VUnaryMicrokernelTester()
6236 .batch_size(batch_size)
6237 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x72, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6238 }
6239 }
6240
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X72,batch_lt_72)6241 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X72, batch_lt_72) {
6242 TEST_REQUIRES_X86_AVX;
6243 for (size_t batch_size = 1; batch_size < 72; batch_size++) {
6244 VUnaryMicrokernelTester()
6245 .batch_size(batch_size)
6246 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x72, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6247 }
6248 }
6249
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X72,batch_gt_72)6250 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X72, batch_gt_72) {
6251 TEST_REQUIRES_X86_AVX;
6252 for (size_t batch_size = 73; batch_size < 144; batch_size++) {
6253 VUnaryMicrokernelTester()
6254 .batch_size(batch_size)
6255 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x72, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6256 }
6257 }
6258
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X72,inplace)6259 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X72, inplace) {
6260 TEST_REQUIRES_X86_AVX;
6261 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
6262 VUnaryMicrokernelTester()
6263 .batch_size(batch_size)
6264 .inplace(true)
6265 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x72, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6266 }
6267 }
6268 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6269
6270
6271 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X80,batch_eq_80)6272 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X80, batch_eq_80) {
6273 TEST_REQUIRES_X86_AVX;
6274 VUnaryMicrokernelTester()
6275 .batch_size(80)
6276 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x80, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6277 }
6278
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X80,batch_div_80)6279 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X80, batch_div_80) {
6280 TEST_REQUIRES_X86_AVX;
6281 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
6282 VUnaryMicrokernelTester()
6283 .batch_size(batch_size)
6284 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x80, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6285 }
6286 }
6287
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X80,batch_lt_80)6288 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X80, batch_lt_80) {
6289 TEST_REQUIRES_X86_AVX;
6290 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
6291 VUnaryMicrokernelTester()
6292 .batch_size(batch_size)
6293 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x80, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6294 }
6295 }
6296
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X80,batch_gt_80)6297 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X80, batch_gt_80) {
6298 TEST_REQUIRES_X86_AVX;
6299 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
6300 VUnaryMicrokernelTester()
6301 .batch_size(batch_size)
6302 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x80, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6303 }
6304 }
6305
TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X80,inplace)6306 TEST(F32_VSIGMOID__AVX_RR2_P5_NR2_X80, inplace) {
6307 TEST_REQUIRES_X86_AVX;
6308 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
6309 VUnaryMicrokernelTester()
6310 .batch_size(batch_size)
6311 .inplace(true)
6312 .Test(xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x80, xnn_init_f32_sigmoid_avx_rr2_p5_params);
6313 }
6314 }
6315 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6316
6317
6318 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X8,batch_eq_8)6319 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X8, batch_eq_8) {
6320 TEST_REQUIRES_X86_AVX2;
6321 VUnaryMicrokernelTester()
6322 .batch_size(8)
6323 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x8, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6324 }
6325
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X8,batch_div_8)6326 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X8, batch_div_8) {
6327 TEST_REQUIRES_X86_AVX2;
6328 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
6329 VUnaryMicrokernelTester()
6330 .batch_size(batch_size)
6331 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x8, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6332 }
6333 }
6334
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X8,batch_lt_8)6335 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X8, batch_lt_8) {
6336 TEST_REQUIRES_X86_AVX2;
6337 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
6338 VUnaryMicrokernelTester()
6339 .batch_size(batch_size)
6340 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x8, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6341 }
6342 }
6343
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X8,batch_gt_8)6344 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X8, batch_gt_8) {
6345 TEST_REQUIRES_X86_AVX2;
6346 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
6347 VUnaryMicrokernelTester()
6348 .batch_size(batch_size)
6349 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x8, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6350 }
6351 }
6352
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X8,inplace)6353 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X8, inplace) {
6354 TEST_REQUIRES_X86_AVX2;
6355 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
6356 VUnaryMicrokernelTester()
6357 .batch_size(batch_size)
6358 .inplace(true)
6359 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x8, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6360 }
6361 }
6362 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6363
6364
6365 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X16,batch_eq_16)6366 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X16, batch_eq_16) {
6367 TEST_REQUIRES_X86_AVX2;
6368 VUnaryMicrokernelTester()
6369 .batch_size(16)
6370 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x16, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6371 }
6372
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X16,batch_div_16)6373 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X16, batch_div_16) {
6374 TEST_REQUIRES_X86_AVX2;
6375 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
6376 VUnaryMicrokernelTester()
6377 .batch_size(batch_size)
6378 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x16, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6379 }
6380 }
6381
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X16,batch_lt_16)6382 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X16, batch_lt_16) {
6383 TEST_REQUIRES_X86_AVX2;
6384 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
6385 VUnaryMicrokernelTester()
6386 .batch_size(batch_size)
6387 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x16, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6388 }
6389 }
6390
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X16,batch_gt_16)6391 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X16, batch_gt_16) {
6392 TEST_REQUIRES_X86_AVX2;
6393 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
6394 VUnaryMicrokernelTester()
6395 .batch_size(batch_size)
6396 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x16, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6397 }
6398 }
6399
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X16,inplace)6400 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X16, inplace) {
6401 TEST_REQUIRES_X86_AVX2;
6402 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6403 VUnaryMicrokernelTester()
6404 .batch_size(batch_size)
6405 .inplace(true)
6406 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x16, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6407 }
6408 }
6409 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6410
6411
6412 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X24,batch_eq_24)6413 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X24, batch_eq_24) {
6414 TEST_REQUIRES_X86_AVX2;
6415 VUnaryMicrokernelTester()
6416 .batch_size(24)
6417 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x24, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6418 }
6419
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X24,batch_div_24)6420 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X24, batch_div_24) {
6421 TEST_REQUIRES_X86_AVX2;
6422 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
6423 VUnaryMicrokernelTester()
6424 .batch_size(batch_size)
6425 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x24, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6426 }
6427 }
6428
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X24,batch_lt_24)6429 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X24, batch_lt_24) {
6430 TEST_REQUIRES_X86_AVX2;
6431 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
6432 VUnaryMicrokernelTester()
6433 .batch_size(batch_size)
6434 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x24, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6435 }
6436 }
6437
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X24,batch_gt_24)6438 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X24, batch_gt_24) {
6439 TEST_REQUIRES_X86_AVX2;
6440 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
6441 VUnaryMicrokernelTester()
6442 .batch_size(batch_size)
6443 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x24, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6444 }
6445 }
6446
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X24,inplace)6447 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X24, inplace) {
6448 TEST_REQUIRES_X86_AVX2;
6449 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6450 VUnaryMicrokernelTester()
6451 .batch_size(batch_size)
6452 .inplace(true)
6453 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x24, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6454 }
6455 }
6456 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6457
6458
6459 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X32,batch_eq_32)6460 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X32, batch_eq_32) {
6461 TEST_REQUIRES_X86_AVX2;
6462 VUnaryMicrokernelTester()
6463 .batch_size(32)
6464 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x32, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6465 }
6466
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X32,batch_div_32)6467 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X32, batch_div_32) {
6468 TEST_REQUIRES_X86_AVX2;
6469 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
6470 VUnaryMicrokernelTester()
6471 .batch_size(batch_size)
6472 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x32, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6473 }
6474 }
6475
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X32,batch_lt_32)6476 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X32, batch_lt_32) {
6477 TEST_REQUIRES_X86_AVX2;
6478 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
6479 VUnaryMicrokernelTester()
6480 .batch_size(batch_size)
6481 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x32, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6482 }
6483 }
6484
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X32,batch_gt_32)6485 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X32, batch_gt_32) {
6486 TEST_REQUIRES_X86_AVX2;
6487 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
6488 VUnaryMicrokernelTester()
6489 .batch_size(batch_size)
6490 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x32, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6491 }
6492 }
6493
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X32,inplace)6494 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X32, inplace) {
6495 TEST_REQUIRES_X86_AVX2;
6496 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6497 VUnaryMicrokernelTester()
6498 .batch_size(batch_size)
6499 .inplace(true)
6500 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x32, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6501 }
6502 }
6503 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6504
6505
6506 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X40,batch_eq_40)6507 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X40, batch_eq_40) {
6508 TEST_REQUIRES_X86_AVX2;
6509 VUnaryMicrokernelTester()
6510 .batch_size(40)
6511 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x40, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6512 }
6513
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X40,batch_div_40)6514 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X40, batch_div_40) {
6515 TEST_REQUIRES_X86_AVX2;
6516 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
6517 VUnaryMicrokernelTester()
6518 .batch_size(batch_size)
6519 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x40, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6520 }
6521 }
6522
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X40,batch_lt_40)6523 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X40, batch_lt_40) {
6524 TEST_REQUIRES_X86_AVX2;
6525 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
6526 VUnaryMicrokernelTester()
6527 .batch_size(batch_size)
6528 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x40, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6529 }
6530 }
6531
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X40,batch_gt_40)6532 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X40, batch_gt_40) {
6533 TEST_REQUIRES_X86_AVX2;
6534 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
6535 VUnaryMicrokernelTester()
6536 .batch_size(batch_size)
6537 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x40, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6538 }
6539 }
6540
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X40,inplace)6541 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X40, inplace) {
6542 TEST_REQUIRES_X86_AVX2;
6543 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
6544 VUnaryMicrokernelTester()
6545 .batch_size(batch_size)
6546 .inplace(true)
6547 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x40, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6548 }
6549 }
6550 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6551
6552
6553 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X48,batch_eq_48)6554 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X48, batch_eq_48) {
6555 TEST_REQUIRES_X86_AVX2;
6556 VUnaryMicrokernelTester()
6557 .batch_size(48)
6558 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x48, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6559 }
6560
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X48,batch_div_48)6561 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X48, batch_div_48) {
6562 TEST_REQUIRES_X86_AVX2;
6563 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
6564 VUnaryMicrokernelTester()
6565 .batch_size(batch_size)
6566 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x48, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6567 }
6568 }
6569
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X48,batch_lt_48)6570 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X48, batch_lt_48) {
6571 TEST_REQUIRES_X86_AVX2;
6572 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
6573 VUnaryMicrokernelTester()
6574 .batch_size(batch_size)
6575 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x48, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6576 }
6577 }
6578
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X48,batch_gt_48)6579 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X48, batch_gt_48) {
6580 TEST_REQUIRES_X86_AVX2;
6581 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
6582 VUnaryMicrokernelTester()
6583 .batch_size(batch_size)
6584 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x48, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6585 }
6586 }
6587
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X48,inplace)6588 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X48, inplace) {
6589 TEST_REQUIRES_X86_AVX2;
6590 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
6591 VUnaryMicrokernelTester()
6592 .batch_size(batch_size)
6593 .inplace(true)
6594 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x48, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6595 }
6596 }
6597 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6598
6599
6600 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X56,batch_eq_56)6601 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X56, batch_eq_56) {
6602 TEST_REQUIRES_X86_AVX2;
6603 VUnaryMicrokernelTester()
6604 .batch_size(56)
6605 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x56, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6606 }
6607
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X56,batch_div_56)6608 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X56, batch_div_56) {
6609 TEST_REQUIRES_X86_AVX2;
6610 for (size_t batch_size = 112; batch_size < 560; batch_size += 56) {
6611 VUnaryMicrokernelTester()
6612 .batch_size(batch_size)
6613 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x56, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6614 }
6615 }
6616
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X56,batch_lt_56)6617 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X56, batch_lt_56) {
6618 TEST_REQUIRES_X86_AVX2;
6619 for (size_t batch_size = 1; batch_size < 56; batch_size++) {
6620 VUnaryMicrokernelTester()
6621 .batch_size(batch_size)
6622 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x56, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6623 }
6624 }
6625
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X56,batch_gt_56)6626 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X56, batch_gt_56) {
6627 TEST_REQUIRES_X86_AVX2;
6628 for (size_t batch_size = 57; batch_size < 112; batch_size++) {
6629 VUnaryMicrokernelTester()
6630 .batch_size(batch_size)
6631 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x56, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6632 }
6633 }
6634
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X56,inplace)6635 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X56, inplace) {
6636 TEST_REQUIRES_X86_AVX2;
6637 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6638 VUnaryMicrokernelTester()
6639 .batch_size(batch_size)
6640 .inplace(true)
6641 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x56, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6642 }
6643 }
6644 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6645
6646
6647 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X64,batch_eq_64)6648 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X64, batch_eq_64) {
6649 TEST_REQUIRES_X86_AVX2;
6650 VUnaryMicrokernelTester()
6651 .batch_size(64)
6652 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x64, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6653 }
6654
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X64,batch_div_64)6655 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X64, batch_div_64) {
6656 TEST_REQUIRES_X86_AVX2;
6657 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
6658 VUnaryMicrokernelTester()
6659 .batch_size(batch_size)
6660 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x64, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6661 }
6662 }
6663
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X64,batch_lt_64)6664 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X64, batch_lt_64) {
6665 TEST_REQUIRES_X86_AVX2;
6666 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
6667 VUnaryMicrokernelTester()
6668 .batch_size(batch_size)
6669 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x64, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6670 }
6671 }
6672
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X64,batch_gt_64)6673 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X64, batch_gt_64) {
6674 TEST_REQUIRES_X86_AVX2;
6675 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
6676 VUnaryMicrokernelTester()
6677 .batch_size(batch_size)
6678 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x64, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6679 }
6680 }
6681
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X64,inplace)6682 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X64, inplace) {
6683 TEST_REQUIRES_X86_AVX2;
6684 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6685 VUnaryMicrokernelTester()
6686 .batch_size(batch_size)
6687 .inplace(true)
6688 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x64, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6689 }
6690 }
6691 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6692
6693
6694 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X72,batch_eq_72)6695 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X72, batch_eq_72) {
6696 TEST_REQUIRES_X86_AVX2;
6697 VUnaryMicrokernelTester()
6698 .batch_size(72)
6699 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x72, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6700 }
6701
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X72,batch_div_72)6702 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X72, batch_div_72) {
6703 TEST_REQUIRES_X86_AVX2;
6704 for (size_t batch_size = 144; batch_size < 720; batch_size += 72) {
6705 VUnaryMicrokernelTester()
6706 .batch_size(batch_size)
6707 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x72, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6708 }
6709 }
6710
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X72,batch_lt_72)6711 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X72, batch_lt_72) {
6712 TEST_REQUIRES_X86_AVX2;
6713 for (size_t batch_size = 1; batch_size < 72; batch_size++) {
6714 VUnaryMicrokernelTester()
6715 .batch_size(batch_size)
6716 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x72, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6717 }
6718 }
6719
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X72,batch_gt_72)6720 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X72, batch_gt_72) {
6721 TEST_REQUIRES_X86_AVX2;
6722 for (size_t batch_size = 73; batch_size < 144; batch_size++) {
6723 VUnaryMicrokernelTester()
6724 .batch_size(batch_size)
6725 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x72, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6726 }
6727 }
6728
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X72,inplace)6729 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X72, inplace) {
6730 TEST_REQUIRES_X86_AVX2;
6731 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
6732 VUnaryMicrokernelTester()
6733 .batch_size(batch_size)
6734 .inplace(true)
6735 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x72, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6736 }
6737 }
6738 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6739
6740
6741 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X80,batch_eq_80)6742 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X80, batch_eq_80) {
6743 TEST_REQUIRES_X86_AVX2;
6744 VUnaryMicrokernelTester()
6745 .batch_size(80)
6746 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x80, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6747 }
6748
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X80,batch_div_80)6749 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X80, batch_div_80) {
6750 TEST_REQUIRES_X86_AVX2;
6751 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
6752 VUnaryMicrokernelTester()
6753 .batch_size(batch_size)
6754 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x80, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6755 }
6756 }
6757
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X80,batch_lt_80)6758 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X80, batch_lt_80) {
6759 TEST_REQUIRES_X86_AVX2;
6760 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
6761 VUnaryMicrokernelTester()
6762 .batch_size(batch_size)
6763 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x80, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6764 }
6765 }
6766
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X80,batch_gt_80)6767 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X80, batch_gt_80) {
6768 TEST_REQUIRES_X86_AVX2;
6769 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
6770 VUnaryMicrokernelTester()
6771 .batch_size(batch_size)
6772 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x80, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6773 }
6774 }
6775
TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X80,inplace)6776 TEST(F32_VSIGMOID__AVX2_RR1_P5_DIV_X80, inplace) {
6777 TEST_REQUIRES_X86_AVX2;
6778 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
6779 VUnaryMicrokernelTester()
6780 .batch_size(batch_size)
6781 .inplace(true)
6782 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x80, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6783 }
6784 }
6785 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6786
6787
6788 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X8,batch_eq_8)6789 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X8, batch_eq_8) {
6790 TEST_REQUIRES_X86_AVX2;
6791 VUnaryMicrokernelTester()
6792 .batch_size(8)
6793 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x8, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6794 }
6795
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X8,batch_div_8)6796 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X8, batch_div_8) {
6797 TEST_REQUIRES_X86_AVX2;
6798 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
6799 VUnaryMicrokernelTester()
6800 .batch_size(batch_size)
6801 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x8, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6802 }
6803 }
6804
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X8,batch_lt_8)6805 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X8, batch_lt_8) {
6806 TEST_REQUIRES_X86_AVX2;
6807 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
6808 VUnaryMicrokernelTester()
6809 .batch_size(batch_size)
6810 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x8, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6811 }
6812 }
6813
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X8,batch_gt_8)6814 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X8, batch_gt_8) {
6815 TEST_REQUIRES_X86_AVX2;
6816 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
6817 VUnaryMicrokernelTester()
6818 .batch_size(batch_size)
6819 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x8, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6820 }
6821 }
6822
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X8,inplace)6823 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X8, inplace) {
6824 TEST_REQUIRES_X86_AVX2;
6825 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
6826 VUnaryMicrokernelTester()
6827 .batch_size(batch_size)
6828 .inplace(true)
6829 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x8, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6830 }
6831 }
6832 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6833
6834
6835 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X16,batch_eq_16)6836 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X16, batch_eq_16) {
6837 TEST_REQUIRES_X86_AVX2;
6838 VUnaryMicrokernelTester()
6839 .batch_size(16)
6840 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x16, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6841 }
6842
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X16,batch_div_16)6843 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X16, batch_div_16) {
6844 TEST_REQUIRES_X86_AVX2;
6845 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
6846 VUnaryMicrokernelTester()
6847 .batch_size(batch_size)
6848 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x16, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6849 }
6850 }
6851
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X16,batch_lt_16)6852 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X16, batch_lt_16) {
6853 TEST_REQUIRES_X86_AVX2;
6854 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
6855 VUnaryMicrokernelTester()
6856 .batch_size(batch_size)
6857 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x16, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6858 }
6859 }
6860
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X16,batch_gt_16)6861 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X16, batch_gt_16) {
6862 TEST_REQUIRES_X86_AVX2;
6863 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
6864 VUnaryMicrokernelTester()
6865 .batch_size(batch_size)
6866 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x16, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6867 }
6868 }
6869
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X16,inplace)6870 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X16, inplace) {
6871 TEST_REQUIRES_X86_AVX2;
6872 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6873 VUnaryMicrokernelTester()
6874 .batch_size(batch_size)
6875 .inplace(true)
6876 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x16, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6877 }
6878 }
6879 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6880
6881
6882 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X24,batch_eq_24)6883 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X24, batch_eq_24) {
6884 TEST_REQUIRES_X86_AVX2;
6885 VUnaryMicrokernelTester()
6886 .batch_size(24)
6887 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x24, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6888 }
6889
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X24,batch_div_24)6890 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X24, batch_div_24) {
6891 TEST_REQUIRES_X86_AVX2;
6892 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
6893 VUnaryMicrokernelTester()
6894 .batch_size(batch_size)
6895 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x24, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6896 }
6897 }
6898
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X24,batch_lt_24)6899 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X24, batch_lt_24) {
6900 TEST_REQUIRES_X86_AVX2;
6901 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
6902 VUnaryMicrokernelTester()
6903 .batch_size(batch_size)
6904 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x24, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6905 }
6906 }
6907
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X24,batch_gt_24)6908 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X24, batch_gt_24) {
6909 TEST_REQUIRES_X86_AVX2;
6910 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
6911 VUnaryMicrokernelTester()
6912 .batch_size(batch_size)
6913 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x24, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6914 }
6915 }
6916
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X24,inplace)6917 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X24, inplace) {
6918 TEST_REQUIRES_X86_AVX2;
6919 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6920 VUnaryMicrokernelTester()
6921 .batch_size(batch_size)
6922 .inplace(true)
6923 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x24, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6924 }
6925 }
6926 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6927
6928
6929 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X32,batch_eq_32)6930 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X32, batch_eq_32) {
6931 TEST_REQUIRES_X86_AVX2;
6932 VUnaryMicrokernelTester()
6933 .batch_size(32)
6934 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x32, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6935 }
6936
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X32,batch_div_32)6937 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X32, batch_div_32) {
6938 TEST_REQUIRES_X86_AVX2;
6939 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
6940 VUnaryMicrokernelTester()
6941 .batch_size(batch_size)
6942 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x32, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6943 }
6944 }
6945
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X32,batch_lt_32)6946 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X32, batch_lt_32) {
6947 TEST_REQUIRES_X86_AVX2;
6948 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
6949 VUnaryMicrokernelTester()
6950 .batch_size(batch_size)
6951 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x32, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6952 }
6953 }
6954
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X32,batch_gt_32)6955 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X32, batch_gt_32) {
6956 TEST_REQUIRES_X86_AVX2;
6957 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
6958 VUnaryMicrokernelTester()
6959 .batch_size(batch_size)
6960 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x32, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6961 }
6962 }
6963
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X32,inplace)6964 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X32, inplace) {
6965 TEST_REQUIRES_X86_AVX2;
6966 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6967 VUnaryMicrokernelTester()
6968 .batch_size(batch_size)
6969 .inplace(true)
6970 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x32, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6971 }
6972 }
6973 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6974
6975
6976 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X40,batch_eq_40)6977 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X40, batch_eq_40) {
6978 TEST_REQUIRES_X86_AVX2;
6979 VUnaryMicrokernelTester()
6980 .batch_size(40)
6981 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x40, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6982 }
6983
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X40,batch_div_40)6984 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X40, batch_div_40) {
6985 TEST_REQUIRES_X86_AVX2;
6986 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
6987 VUnaryMicrokernelTester()
6988 .batch_size(batch_size)
6989 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x40, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6990 }
6991 }
6992
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X40,batch_lt_40)6993 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X40, batch_lt_40) {
6994 TEST_REQUIRES_X86_AVX2;
6995 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
6996 VUnaryMicrokernelTester()
6997 .batch_size(batch_size)
6998 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x40, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
6999 }
7000 }
7001
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X40,batch_gt_40)7002 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X40, batch_gt_40) {
7003 TEST_REQUIRES_X86_AVX2;
7004 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
7005 VUnaryMicrokernelTester()
7006 .batch_size(batch_size)
7007 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x40, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7008 }
7009 }
7010
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X40,inplace)7011 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X40, inplace) {
7012 TEST_REQUIRES_X86_AVX2;
7013 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
7014 VUnaryMicrokernelTester()
7015 .batch_size(batch_size)
7016 .inplace(true)
7017 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x40, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7018 }
7019 }
7020 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7021
7022
7023 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X48,batch_eq_48)7024 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X48, batch_eq_48) {
7025 TEST_REQUIRES_X86_AVX2;
7026 VUnaryMicrokernelTester()
7027 .batch_size(48)
7028 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x48, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7029 }
7030
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X48,batch_div_48)7031 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X48, batch_div_48) {
7032 TEST_REQUIRES_X86_AVX2;
7033 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
7034 VUnaryMicrokernelTester()
7035 .batch_size(batch_size)
7036 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x48, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7037 }
7038 }
7039
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X48,batch_lt_48)7040 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X48, batch_lt_48) {
7041 TEST_REQUIRES_X86_AVX2;
7042 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
7043 VUnaryMicrokernelTester()
7044 .batch_size(batch_size)
7045 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x48, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7046 }
7047 }
7048
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X48,batch_gt_48)7049 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X48, batch_gt_48) {
7050 TEST_REQUIRES_X86_AVX2;
7051 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
7052 VUnaryMicrokernelTester()
7053 .batch_size(batch_size)
7054 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x48, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7055 }
7056 }
7057
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X48,inplace)7058 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X48, inplace) {
7059 TEST_REQUIRES_X86_AVX2;
7060 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
7061 VUnaryMicrokernelTester()
7062 .batch_size(batch_size)
7063 .inplace(true)
7064 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x48, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7065 }
7066 }
7067 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7068
7069
7070 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X56,batch_eq_56)7071 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X56, batch_eq_56) {
7072 TEST_REQUIRES_X86_AVX2;
7073 VUnaryMicrokernelTester()
7074 .batch_size(56)
7075 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x56, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7076 }
7077
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X56,batch_div_56)7078 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X56, batch_div_56) {
7079 TEST_REQUIRES_X86_AVX2;
7080 for (size_t batch_size = 112; batch_size < 560; batch_size += 56) {
7081 VUnaryMicrokernelTester()
7082 .batch_size(batch_size)
7083 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x56, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7084 }
7085 }
7086
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X56,batch_lt_56)7087 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X56, batch_lt_56) {
7088 TEST_REQUIRES_X86_AVX2;
7089 for (size_t batch_size = 1; batch_size < 56; batch_size++) {
7090 VUnaryMicrokernelTester()
7091 .batch_size(batch_size)
7092 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x56, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7093 }
7094 }
7095
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X56,batch_gt_56)7096 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X56, batch_gt_56) {
7097 TEST_REQUIRES_X86_AVX2;
7098 for (size_t batch_size = 57; batch_size < 112; batch_size++) {
7099 VUnaryMicrokernelTester()
7100 .batch_size(batch_size)
7101 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x56, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7102 }
7103 }
7104
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X56,inplace)7105 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X56, inplace) {
7106 TEST_REQUIRES_X86_AVX2;
7107 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
7108 VUnaryMicrokernelTester()
7109 .batch_size(batch_size)
7110 .inplace(true)
7111 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x56, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7112 }
7113 }
7114 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7115
7116
7117 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X64,batch_eq_64)7118 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X64, batch_eq_64) {
7119 TEST_REQUIRES_X86_AVX2;
7120 VUnaryMicrokernelTester()
7121 .batch_size(64)
7122 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x64, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7123 }
7124
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X64,batch_div_64)7125 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X64, batch_div_64) {
7126 TEST_REQUIRES_X86_AVX2;
7127 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
7128 VUnaryMicrokernelTester()
7129 .batch_size(batch_size)
7130 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x64, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7131 }
7132 }
7133
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X64,batch_lt_64)7134 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X64, batch_lt_64) {
7135 TEST_REQUIRES_X86_AVX2;
7136 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
7137 VUnaryMicrokernelTester()
7138 .batch_size(batch_size)
7139 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x64, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7140 }
7141 }
7142
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X64,batch_gt_64)7143 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X64, batch_gt_64) {
7144 TEST_REQUIRES_X86_AVX2;
7145 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
7146 VUnaryMicrokernelTester()
7147 .batch_size(batch_size)
7148 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x64, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7149 }
7150 }
7151
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X64,inplace)7152 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X64, inplace) {
7153 TEST_REQUIRES_X86_AVX2;
7154 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
7155 VUnaryMicrokernelTester()
7156 .batch_size(batch_size)
7157 .inplace(true)
7158 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x64, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7159 }
7160 }
7161 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7162
7163
7164 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X72,batch_eq_72)7165 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X72, batch_eq_72) {
7166 TEST_REQUIRES_X86_AVX2;
7167 VUnaryMicrokernelTester()
7168 .batch_size(72)
7169 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x72, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7170 }
7171
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X72,batch_div_72)7172 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X72, batch_div_72) {
7173 TEST_REQUIRES_X86_AVX2;
7174 for (size_t batch_size = 144; batch_size < 720; batch_size += 72) {
7175 VUnaryMicrokernelTester()
7176 .batch_size(batch_size)
7177 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x72, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7178 }
7179 }
7180
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X72,batch_lt_72)7181 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X72, batch_lt_72) {
7182 TEST_REQUIRES_X86_AVX2;
7183 for (size_t batch_size = 1; batch_size < 72; batch_size++) {
7184 VUnaryMicrokernelTester()
7185 .batch_size(batch_size)
7186 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x72, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7187 }
7188 }
7189
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X72,batch_gt_72)7190 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X72, batch_gt_72) {
7191 TEST_REQUIRES_X86_AVX2;
7192 for (size_t batch_size = 73; batch_size < 144; batch_size++) {
7193 VUnaryMicrokernelTester()
7194 .batch_size(batch_size)
7195 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x72, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7196 }
7197 }
7198
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X72,inplace)7199 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X72, inplace) {
7200 TEST_REQUIRES_X86_AVX2;
7201 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7202 VUnaryMicrokernelTester()
7203 .batch_size(batch_size)
7204 .inplace(true)
7205 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x72, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7206 }
7207 }
7208 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7209
7210
7211 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X80,batch_eq_80)7212 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X80, batch_eq_80) {
7213 TEST_REQUIRES_X86_AVX2;
7214 VUnaryMicrokernelTester()
7215 .batch_size(80)
7216 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x80, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7217 }
7218
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X80,batch_div_80)7219 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X80, batch_div_80) {
7220 TEST_REQUIRES_X86_AVX2;
7221 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
7222 VUnaryMicrokernelTester()
7223 .batch_size(batch_size)
7224 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x80, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7225 }
7226 }
7227
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X80,batch_lt_80)7228 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X80, batch_lt_80) {
7229 TEST_REQUIRES_X86_AVX2;
7230 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
7231 VUnaryMicrokernelTester()
7232 .batch_size(batch_size)
7233 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x80, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7234 }
7235 }
7236
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X80,batch_gt_80)7237 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X80, batch_gt_80) {
7238 TEST_REQUIRES_X86_AVX2;
7239 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
7240 VUnaryMicrokernelTester()
7241 .batch_size(batch_size)
7242 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x80, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7243 }
7244 }
7245
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X80,inplace)7246 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR1FMA_X80, inplace) {
7247 TEST_REQUIRES_X86_AVX2;
7248 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7249 VUnaryMicrokernelTester()
7250 .batch_size(batch_size)
7251 .inplace(true)
7252 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x80, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7253 }
7254 }
7255 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7256
7257
7258 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X8,batch_eq_8)7259 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X8, batch_eq_8) {
7260 TEST_REQUIRES_X86_AVX2;
7261 VUnaryMicrokernelTester()
7262 .batch_size(8)
7263 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x8, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7264 }
7265
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X8,batch_div_8)7266 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X8, batch_div_8) {
7267 TEST_REQUIRES_X86_AVX2;
7268 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
7269 VUnaryMicrokernelTester()
7270 .batch_size(batch_size)
7271 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x8, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7272 }
7273 }
7274
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X8,batch_lt_8)7275 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X8, batch_lt_8) {
7276 TEST_REQUIRES_X86_AVX2;
7277 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
7278 VUnaryMicrokernelTester()
7279 .batch_size(batch_size)
7280 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x8, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7281 }
7282 }
7283
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X8,batch_gt_8)7284 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X8, batch_gt_8) {
7285 TEST_REQUIRES_X86_AVX2;
7286 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
7287 VUnaryMicrokernelTester()
7288 .batch_size(batch_size)
7289 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x8, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7290 }
7291 }
7292
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X8,inplace)7293 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X8, inplace) {
7294 TEST_REQUIRES_X86_AVX2;
7295 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
7296 VUnaryMicrokernelTester()
7297 .batch_size(batch_size)
7298 .inplace(true)
7299 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x8, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7300 }
7301 }
7302 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7303
7304
7305 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X16,batch_eq_16)7306 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X16, batch_eq_16) {
7307 TEST_REQUIRES_X86_AVX2;
7308 VUnaryMicrokernelTester()
7309 .batch_size(16)
7310 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x16, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7311 }
7312
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X16,batch_div_16)7313 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X16, batch_div_16) {
7314 TEST_REQUIRES_X86_AVX2;
7315 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
7316 VUnaryMicrokernelTester()
7317 .batch_size(batch_size)
7318 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x16, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7319 }
7320 }
7321
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X16,batch_lt_16)7322 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X16, batch_lt_16) {
7323 TEST_REQUIRES_X86_AVX2;
7324 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
7325 VUnaryMicrokernelTester()
7326 .batch_size(batch_size)
7327 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x16, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7328 }
7329 }
7330
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X16,batch_gt_16)7331 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X16, batch_gt_16) {
7332 TEST_REQUIRES_X86_AVX2;
7333 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
7334 VUnaryMicrokernelTester()
7335 .batch_size(batch_size)
7336 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x16, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7337 }
7338 }
7339
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X16,inplace)7340 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X16, inplace) {
7341 TEST_REQUIRES_X86_AVX2;
7342 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
7343 VUnaryMicrokernelTester()
7344 .batch_size(batch_size)
7345 .inplace(true)
7346 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x16, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7347 }
7348 }
7349 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7350
7351
7352 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X24,batch_eq_24)7353 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X24, batch_eq_24) {
7354 TEST_REQUIRES_X86_AVX2;
7355 VUnaryMicrokernelTester()
7356 .batch_size(24)
7357 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x24, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7358 }
7359
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X24,batch_div_24)7360 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X24, batch_div_24) {
7361 TEST_REQUIRES_X86_AVX2;
7362 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
7363 VUnaryMicrokernelTester()
7364 .batch_size(batch_size)
7365 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x24, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7366 }
7367 }
7368
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X24,batch_lt_24)7369 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X24, batch_lt_24) {
7370 TEST_REQUIRES_X86_AVX2;
7371 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
7372 VUnaryMicrokernelTester()
7373 .batch_size(batch_size)
7374 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x24, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7375 }
7376 }
7377
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X24,batch_gt_24)7378 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X24, batch_gt_24) {
7379 TEST_REQUIRES_X86_AVX2;
7380 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
7381 VUnaryMicrokernelTester()
7382 .batch_size(batch_size)
7383 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x24, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7384 }
7385 }
7386
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X24,inplace)7387 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X24, inplace) {
7388 TEST_REQUIRES_X86_AVX2;
7389 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
7390 VUnaryMicrokernelTester()
7391 .batch_size(batch_size)
7392 .inplace(true)
7393 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x24, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7394 }
7395 }
7396 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7397
7398
7399 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X32,batch_eq_32)7400 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X32, batch_eq_32) {
7401 TEST_REQUIRES_X86_AVX2;
7402 VUnaryMicrokernelTester()
7403 .batch_size(32)
7404 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x32, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7405 }
7406
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X32,batch_div_32)7407 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X32, batch_div_32) {
7408 TEST_REQUIRES_X86_AVX2;
7409 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
7410 VUnaryMicrokernelTester()
7411 .batch_size(batch_size)
7412 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x32, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7413 }
7414 }
7415
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X32,batch_lt_32)7416 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X32, batch_lt_32) {
7417 TEST_REQUIRES_X86_AVX2;
7418 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
7419 VUnaryMicrokernelTester()
7420 .batch_size(batch_size)
7421 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x32, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7422 }
7423 }
7424
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X32,batch_gt_32)7425 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X32, batch_gt_32) {
7426 TEST_REQUIRES_X86_AVX2;
7427 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
7428 VUnaryMicrokernelTester()
7429 .batch_size(batch_size)
7430 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x32, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7431 }
7432 }
7433
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X32,inplace)7434 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X32, inplace) {
7435 TEST_REQUIRES_X86_AVX2;
7436 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
7437 VUnaryMicrokernelTester()
7438 .batch_size(batch_size)
7439 .inplace(true)
7440 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x32, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7441 }
7442 }
7443 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7444
7445
7446 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X40,batch_eq_40)7447 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X40, batch_eq_40) {
7448 TEST_REQUIRES_X86_AVX2;
7449 VUnaryMicrokernelTester()
7450 .batch_size(40)
7451 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x40, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7452 }
7453
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X40,batch_div_40)7454 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X40, batch_div_40) {
7455 TEST_REQUIRES_X86_AVX2;
7456 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
7457 VUnaryMicrokernelTester()
7458 .batch_size(batch_size)
7459 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x40, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7460 }
7461 }
7462
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X40,batch_lt_40)7463 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X40, batch_lt_40) {
7464 TEST_REQUIRES_X86_AVX2;
7465 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
7466 VUnaryMicrokernelTester()
7467 .batch_size(batch_size)
7468 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x40, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7469 }
7470 }
7471
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X40,batch_gt_40)7472 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X40, batch_gt_40) {
7473 TEST_REQUIRES_X86_AVX2;
7474 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
7475 VUnaryMicrokernelTester()
7476 .batch_size(batch_size)
7477 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x40, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7478 }
7479 }
7480
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X40,inplace)7481 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X40, inplace) {
7482 TEST_REQUIRES_X86_AVX2;
7483 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
7484 VUnaryMicrokernelTester()
7485 .batch_size(batch_size)
7486 .inplace(true)
7487 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x40, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7488 }
7489 }
7490 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7491
7492
7493 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X48,batch_eq_48)7494 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X48, batch_eq_48) {
7495 TEST_REQUIRES_X86_AVX2;
7496 VUnaryMicrokernelTester()
7497 .batch_size(48)
7498 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x48, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7499 }
7500
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X48,batch_div_48)7501 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X48, batch_div_48) {
7502 TEST_REQUIRES_X86_AVX2;
7503 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
7504 VUnaryMicrokernelTester()
7505 .batch_size(batch_size)
7506 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x48, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7507 }
7508 }
7509
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X48,batch_lt_48)7510 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X48, batch_lt_48) {
7511 TEST_REQUIRES_X86_AVX2;
7512 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
7513 VUnaryMicrokernelTester()
7514 .batch_size(batch_size)
7515 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x48, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7516 }
7517 }
7518
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X48,batch_gt_48)7519 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X48, batch_gt_48) {
7520 TEST_REQUIRES_X86_AVX2;
7521 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
7522 VUnaryMicrokernelTester()
7523 .batch_size(batch_size)
7524 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x48, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7525 }
7526 }
7527
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X48,inplace)7528 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X48, inplace) {
7529 TEST_REQUIRES_X86_AVX2;
7530 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
7531 VUnaryMicrokernelTester()
7532 .batch_size(batch_size)
7533 .inplace(true)
7534 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x48, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7535 }
7536 }
7537 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7538
7539
7540 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X56,batch_eq_56)7541 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X56, batch_eq_56) {
7542 TEST_REQUIRES_X86_AVX2;
7543 VUnaryMicrokernelTester()
7544 .batch_size(56)
7545 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x56, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7546 }
7547
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X56,batch_div_56)7548 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X56, batch_div_56) {
7549 TEST_REQUIRES_X86_AVX2;
7550 for (size_t batch_size = 112; batch_size < 560; batch_size += 56) {
7551 VUnaryMicrokernelTester()
7552 .batch_size(batch_size)
7553 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x56, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7554 }
7555 }
7556
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X56,batch_lt_56)7557 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X56, batch_lt_56) {
7558 TEST_REQUIRES_X86_AVX2;
7559 for (size_t batch_size = 1; batch_size < 56; batch_size++) {
7560 VUnaryMicrokernelTester()
7561 .batch_size(batch_size)
7562 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x56, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7563 }
7564 }
7565
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X56,batch_gt_56)7566 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X56, batch_gt_56) {
7567 TEST_REQUIRES_X86_AVX2;
7568 for (size_t batch_size = 57; batch_size < 112; batch_size++) {
7569 VUnaryMicrokernelTester()
7570 .batch_size(batch_size)
7571 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x56, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7572 }
7573 }
7574
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X56,inplace)7575 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X56, inplace) {
7576 TEST_REQUIRES_X86_AVX2;
7577 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
7578 VUnaryMicrokernelTester()
7579 .batch_size(batch_size)
7580 .inplace(true)
7581 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x56, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7582 }
7583 }
7584 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7585
7586
7587 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X64,batch_eq_64)7588 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X64, batch_eq_64) {
7589 TEST_REQUIRES_X86_AVX2;
7590 VUnaryMicrokernelTester()
7591 .batch_size(64)
7592 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x64, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7593 }
7594
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X64,batch_div_64)7595 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X64, batch_div_64) {
7596 TEST_REQUIRES_X86_AVX2;
7597 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
7598 VUnaryMicrokernelTester()
7599 .batch_size(batch_size)
7600 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x64, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7601 }
7602 }
7603
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X64,batch_lt_64)7604 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X64, batch_lt_64) {
7605 TEST_REQUIRES_X86_AVX2;
7606 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
7607 VUnaryMicrokernelTester()
7608 .batch_size(batch_size)
7609 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x64, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7610 }
7611 }
7612
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X64,batch_gt_64)7613 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X64, batch_gt_64) {
7614 TEST_REQUIRES_X86_AVX2;
7615 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
7616 VUnaryMicrokernelTester()
7617 .batch_size(batch_size)
7618 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x64, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7619 }
7620 }
7621
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X64,inplace)7622 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X64, inplace) {
7623 TEST_REQUIRES_X86_AVX2;
7624 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
7625 VUnaryMicrokernelTester()
7626 .batch_size(batch_size)
7627 .inplace(true)
7628 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x64, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7629 }
7630 }
7631 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7632
7633
7634 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X72,batch_eq_72)7635 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X72, batch_eq_72) {
7636 TEST_REQUIRES_X86_AVX2;
7637 VUnaryMicrokernelTester()
7638 .batch_size(72)
7639 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x72, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7640 }
7641
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X72,batch_div_72)7642 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X72, batch_div_72) {
7643 TEST_REQUIRES_X86_AVX2;
7644 for (size_t batch_size = 144; batch_size < 720; batch_size += 72) {
7645 VUnaryMicrokernelTester()
7646 .batch_size(batch_size)
7647 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x72, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7648 }
7649 }
7650
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X72,batch_lt_72)7651 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X72, batch_lt_72) {
7652 TEST_REQUIRES_X86_AVX2;
7653 for (size_t batch_size = 1; batch_size < 72; batch_size++) {
7654 VUnaryMicrokernelTester()
7655 .batch_size(batch_size)
7656 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x72, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7657 }
7658 }
7659
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X72,batch_gt_72)7660 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X72, batch_gt_72) {
7661 TEST_REQUIRES_X86_AVX2;
7662 for (size_t batch_size = 73; batch_size < 144; batch_size++) {
7663 VUnaryMicrokernelTester()
7664 .batch_size(batch_size)
7665 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x72, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7666 }
7667 }
7668
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X72,inplace)7669 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X72, inplace) {
7670 TEST_REQUIRES_X86_AVX2;
7671 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7672 VUnaryMicrokernelTester()
7673 .batch_size(batch_size)
7674 .inplace(true)
7675 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x72, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7676 }
7677 }
7678 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7679
7680
7681 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X80,batch_eq_80)7682 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X80, batch_eq_80) {
7683 TEST_REQUIRES_X86_AVX2;
7684 VUnaryMicrokernelTester()
7685 .batch_size(80)
7686 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x80, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7687 }
7688
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X80,batch_div_80)7689 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X80, batch_div_80) {
7690 TEST_REQUIRES_X86_AVX2;
7691 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
7692 VUnaryMicrokernelTester()
7693 .batch_size(batch_size)
7694 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x80, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7695 }
7696 }
7697
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X80,batch_lt_80)7698 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X80, batch_lt_80) {
7699 TEST_REQUIRES_X86_AVX2;
7700 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
7701 VUnaryMicrokernelTester()
7702 .batch_size(batch_size)
7703 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x80, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7704 }
7705 }
7706
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X80,batch_gt_80)7707 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X80, batch_gt_80) {
7708 TEST_REQUIRES_X86_AVX2;
7709 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
7710 VUnaryMicrokernelTester()
7711 .batch_size(batch_size)
7712 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x80, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7713 }
7714 }
7715
TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X80,inplace)7716 TEST(F32_VSIGMOID__AVX2_RR1_P5_NR2FMA_X80, inplace) {
7717 TEST_REQUIRES_X86_AVX2;
7718 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7719 VUnaryMicrokernelTester()
7720 .batch_size(batch_size)
7721 .inplace(true)
7722 .Test(xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x80, xnn_init_f32_sigmoid_avx2_rr1_p5_params);
7723 }
7724 }
7725 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7726
7727
7728 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X16,batch_eq_16)7729 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X16, batch_eq_16) {
7730 TEST_REQUIRES_X86_AVX512F;
7731 VUnaryMicrokernelTester()
7732 .batch_size(16)
7733 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x16, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7734 }
7735
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X16,batch_div_16)7736 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X16, batch_div_16) {
7737 TEST_REQUIRES_X86_AVX512F;
7738 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
7739 VUnaryMicrokernelTester()
7740 .batch_size(batch_size)
7741 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x16, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7742 }
7743 }
7744
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X16,batch_lt_16)7745 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X16, batch_lt_16) {
7746 TEST_REQUIRES_X86_AVX512F;
7747 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
7748 VUnaryMicrokernelTester()
7749 .batch_size(batch_size)
7750 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x16, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7751 }
7752 }
7753
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X16,batch_gt_16)7754 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X16, batch_gt_16) {
7755 TEST_REQUIRES_X86_AVX512F;
7756 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
7757 VUnaryMicrokernelTester()
7758 .batch_size(batch_size)
7759 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x16, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7760 }
7761 }
7762
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X16,inplace)7763 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X16, inplace) {
7764 TEST_REQUIRES_X86_AVX512F;
7765 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
7766 VUnaryMicrokernelTester()
7767 .batch_size(batch_size)
7768 .inplace(true)
7769 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x16, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7770 }
7771 }
7772 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7773
7774
7775 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X32,batch_eq_32)7776 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X32, batch_eq_32) {
7777 TEST_REQUIRES_X86_AVX512F;
7778 VUnaryMicrokernelTester()
7779 .batch_size(32)
7780 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x32, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7781 }
7782
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X32,batch_div_32)7783 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X32, batch_div_32) {
7784 TEST_REQUIRES_X86_AVX512F;
7785 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
7786 VUnaryMicrokernelTester()
7787 .batch_size(batch_size)
7788 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x32, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7789 }
7790 }
7791
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X32,batch_lt_32)7792 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X32, batch_lt_32) {
7793 TEST_REQUIRES_X86_AVX512F;
7794 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
7795 VUnaryMicrokernelTester()
7796 .batch_size(batch_size)
7797 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x32, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7798 }
7799 }
7800
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X32,batch_gt_32)7801 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X32, batch_gt_32) {
7802 TEST_REQUIRES_X86_AVX512F;
7803 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
7804 VUnaryMicrokernelTester()
7805 .batch_size(batch_size)
7806 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x32, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7807 }
7808 }
7809
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X32,inplace)7810 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X32, inplace) {
7811 TEST_REQUIRES_X86_AVX512F;
7812 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
7813 VUnaryMicrokernelTester()
7814 .batch_size(batch_size)
7815 .inplace(true)
7816 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x32, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7817 }
7818 }
7819 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7820
7821
7822 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X48,batch_eq_48)7823 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X48, batch_eq_48) {
7824 TEST_REQUIRES_X86_AVX512F;
7825 VUnaryMicrokernelTester()
7826 .batch_size(48)
7827 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x48, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7828 }
7829
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X48,batch_div_48)7830 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X48, batch_div_48) {
7831 TEST_REQUIRES_X86_AVX512F;
7832 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
7833 VUnaryMicrokernelTester()
7834 .batch_size(batch_size)
7835 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x48, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7836 }
7837 }
7838
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X48,batch_lt_48)7839 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X48, batch_lt_48) {
7840 TEST_REQUIRES_X86_AVX512F;
7841 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
7842 VUnaryMicrokernelTester()
7843 .batch_size(batch_size)
7844 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x48, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7845 }
7846 }
7847
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X48,batch_gt_48)7848 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X48, batch_gt_48) {
7849 TEST_REQUIRES_X86_AVX512F;
7850 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
7851 VUnaryMicrokernelTester()
7852 .batch_size(batch_size)
7853 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x48, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7854 }
7855 }
7856
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X48,inplace)7857 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X48, inplace) {
7858 TEST_REQUIRES_X86_AVX512F;
7859 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
7860 VUnaryMicrokernelTester()
7861 .batch_size(batch_size)
7862 .inplace(true)
7863 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x48, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7864 }
7865 }
7866 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7867
7868
7869 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X64,batch_eq_64)7870 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X64, batch_eq_64) {
7871 TEST_REQUIRES_X86_AVX512F;
7872 VUnaryMicrokernelTester()
7873 .batch_size(64)
7874 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x64, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7875 }
7876
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X64,batch_div_64)7877 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X64, batch_div_64) {
7878 TEST_REQUIRES_X86_AVX512F;
7879 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
7880 VUnaryMicrokernelTester()
7881 .batch_size(batch_size)
7882 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x64, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7883 }
7884 }
7885
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X64,batch_lt_64)7886 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X64, batch_lt_64) {
7887 TEST_REQUIRES_X86_AVX512F;
7888 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
7889 VUnaryMicrokernelTester()
7890 .batch_size(batch_size)
7891 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x64, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7892 }
7893 }
7894
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X64,batch_gt_64)7895 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X64, batch_gt_64) {
7896 TEST_REQUIRES_X86_AVX512F;
7897 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
7898 VUnaryMicrokernelTester()
7899 .batch_size(batch_size)
7900 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x64, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7901 }
7902 }
7903
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X64,inplace)7904 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X64, inplace) {
7905 TEST_REQUIRES_X86_AVX512F;
7906 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
7907 VUnaryMicrokernelTester()
7908 .batch_size(batch_size)
7909 .inplace(true)
7910 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x64, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7911 }
7912 }
7913 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7914
7915
7916 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X80,batch_eq_80)7917 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X80, batch_eq_80) {
7918 TEST_REQUIRES_X86_AVX512F;
7919 VUnaryMicrokernelTester()
7920 .batch_size(80)
7921 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x80, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7922 }
7923
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X80,batch_div_80)7924 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X80, batch_div_80) {
7925 TEST_REQUIRES_X86_AVX512F;
7926 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
7927 VUnaryMicrokernelTester()
7928 .batch_size(batch_size)
7929 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x80, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7930 }
7931 }
7932
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X80,batch_lt_80)7933 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X80, batch_lt_80) {
7934 TEST_REQUIRES_X86_AVX512F;
7935 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
7936 VUnaryMicrokernelTester()
7937 .batch_size(batch_size)
7938 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x80, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7939 }
7940 }
7941
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X80,batch_gt_80)7942 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X80, batch_gt_80) {
7943 TEST_REQUIRES_X86_AVX512F;
7944 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
7945 VUnaryMicrokernelTester()
7946 .batch_size(batch_size)
7947 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x80, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7948 }
7949 }
7950
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X80,inplace)7951 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X80, inplace) {
7952 TEST_REQUIRES_X86_AVX512F;
7953 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7954 VUnaryMicrokernelTester()
7955 .batch_size(batch_size)
7956 .inplace(true)
7957 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x80, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7958 }
7959 }
7960 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7961
7962
7963 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X96,batch_eq_96)7964 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X96, batch_eq_96) {
7965 TEST_REQUIRES_X86_AVX512F;
7966 VUnaryMicrokernelTester()
7967 .batch_size(96)
7968 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7969 }
7970
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X96,batch_div_96)7971 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X96, batch_div_96) {
7972 TEST_REQUIRES_X86_AVX512F;
7973 for (size_t batch_size = 192; batch_size < 960; batch_size += 96) {
7974 VUnaryMicrokernelTester()
7975 .batch_size(batch_size)
7976 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7977 }
7978 }
7979
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X96,batch_lt_96)7980 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X96, batch_lt_96) {
7981 TEST_REQUIRES_X86_AVX512F;
7982 for (size_t batch_size = 1; batch_size < 96; batch_size++) {
7983 VUnaryMicrokernelTester()
7984 .batch_size(batch_size)
7985 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7986 }
7987 }
7988
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X96,batch_gt_96)7989 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X96, batch_gt_96) {
7990 TEST_REQUIRES_X86_AVX512F;
7991 for (size_t batch_size = 97; batch_size < 192; batch_size++) {
7992 VUnaryMicrokernelTester()
7993 .batch_size(batch_size)
7994 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
7995 }
7996 }
7997
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X96,inplace)7998 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X96, inplace) {
7999 TEST_REQUIRES_X86_AVX512F;
8000 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
8001 VUnaryMicrokernelTester()
8002 .batch_size(batch_size)
8003 .inplace(true)
8004 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8005 }
8006 }
8007 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8008
8009
8010 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X112,batch_eq_112)8011 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X112, batch_eq_112) {
8012 TEST_REQUIRES_X86_AVX512F;
8013 VUnaryMicrokernelTester()
8014 .batch_size(112)
8015 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8016 }
8017
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X112,batch_div_112)8018 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X112, batch_div_112) {
8019 TEST_REQUIRES_X86_AVX512F;
8020 for (size_t batch_size = 224; batch_size < 1120; batch_size += 112) {
8021 VUnaryMicrokernelTester()
8022 .batch_size(batch_size)
8023 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8024 }
8025 }
8026
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X112,batch_lt_112)8027 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X112, batch_lt_112) {
8028 TEST_REQUIRES_X86_AVX512F;
8029 for (size_t batch_size = 1; batch_size < 112; batch_size++) {
8030 VUnaryMicrokernelTester()
8031 .batch_size(batch_size)
8032 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8033 }
8034 }
8035
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X112,batch_gt_112)8036 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X112, batch_gt_112) {
8037 TEST_REQUIRES_X86_AVX512F;
8038 for (size_t batch_size = 113; batch_size < 224; batch_size++) {
8039 VUnaryMicrokernelTester()
8040 .batch_size(batch_size)
8041 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8042 }
8043 }
8044
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X112,inplace)8045 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X112, inplace) {
8046 TEST_REQUIRES_X86_AVX512F;
8047 for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
8048 VUnaryMicrokernelTester()
8049 .batch_size(batch_size)
8050 .inplace(true)
8051 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8052 }
8053 }
8054 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8055
8056
8057 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X128,batch_eq_128)8058 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X128, batch_eq_128) {
8059 TEST_REQUIRES_X86_AVX512F;
8060 VUnaryMicrokernelTester()
8061 .batch_size(128)
8062 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8063 }
8064
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X128,batch_div_128)8065 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X128, batch_div_128) {
8066 TEST_REQUIRES_X86_AVX512F;
8067 for (size_t batch_size = 256; batch_size < 1280; batch_size += 128) {
8068 VUnaryMicrokernelTester()
8069 .batch_size(batch_size)
8070 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8071 }
8072 }
8073
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X128,batch_lt_128)8074 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X128, batch_lt_128) {
8075 TEST_REQUIRES_X86_AVX512F;
8076 for (size_t batch_size = 1; batch_size < 128; batch_size++) {
8077 VUnaryMicrokernelTester()
8078 .batch_size(batch_size)
8079 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8080 }
8081 }
8082
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X128,batch_gt_128)8083 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X128, batch_gt_128) {
8084 TEST_REQUIRES_X86_AVX512F;
8085 for (size_t batch_size = 129; batch_size < 256; batch_size++) {
8086 VUnaryMicrokernelTester()
8087 .batch_size(batch_size)
8088 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8089 }
8090 }
8091
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X128,inplace)8092 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_DIV_X128, inplace) {
8093 TEST_REQUIRES_X86_AVX512F;
8094 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
8095 VUnaryMicrokernelTester()
8096 .batch_size(batch_size)
8097 .inplace(true)
8098 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8099 }
8100 }
8101 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8102
8103
8104 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X16,batch_eq_16)8105 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X16, batch_eq_16) {
8106 TEST_REQUIRES_X86_AVX512F;
8107 VUnaryMicrokernelTester()
8108 .batch_size(16)
8109 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x16, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8110 }
8111
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X16,batch_div_16)8112 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X16, batch_div_16) {
8113 TEST_REQUIRES_X86_AVX512F;
8114 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
8115 VUnaryMicrokernelTester()
8116 .batch_size(batch_size)
8117 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x16, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8118 }
8119 }
8120
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X16,batch_lt_16)8121 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X16, batch_lt_16) {
8122 TEST_REQUIRES_X86_AVX512F;
8123 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
8124 VUnaryMicrokernelTester()
8125 .batch_size(batch_size)
8126 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x16, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8127 }
8128 }
8129
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X16,batch_gt_16)8130 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X16, batch_gt_16) {
8131 TEST_REQUIRES_X86_AVX512F;
8132 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
8133 VUnaryMicrokernelTester()
8134 .batch_size(batch_size)
8135 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x16, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8136 }
8137 }
8138
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X16,inplace)8139 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X16, inplace) {
8140 TEST_REQUIRES_X86_AVX512F;
8141 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8142 VUnaryMicrokernelTester()
8143 .batch_size(batch_size)
8144 .inplace(true)
8145 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x16, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8146 }
8147 }
8148 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8149
8150
8151 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X32,batch_eq_32)8152 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X32, batch_eq_32) {
8153 TEST_REQUIRES_X86_AVX512F;
8154 VUnaryMicrokernelTester()
8155 .batch_size(32)
8156 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8157 }
8158
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X32,batch_div_32)8159 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X32, batch_div_32) {
8160 TEST_REQUIRES_X86_AVX512F;
8161 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
8162 VUnaryMicrokernelTester()
8163 .batch_size(batch_size)
8164 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8165 }
8166 }
8167
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X32,batch_lt_32)8168 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X32, batch_lt_32) {
8169 TEST_REQUIRES_X86_AVX512F;
8170 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
8171 VUnaryMicrokernelTester()
8172 .batch_size(batch_size)
8173 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8174 }
8175 }
8176
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X32,batch_gt_32)8177 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X32, batch_gt_32) {
8178 TEST_REQUIRES_X86_AVX512F;
8179 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
8180 VUnaryMicrokernelTester()
8181 .batch_size(batch_size)
8182 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8183 }
8184 }
8185
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X32,inplace)8186 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X32, inplace) {
8187 TEST_REQUIRES_X86_AVX512F;
8188 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8189 VUnaryMicrokernelTester()
8190 .batch_size(batch_size)
8191 .inplace(true)
8192 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8193 }
8194 }
8195 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8196
8197
8198 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X48,batch_eq_48)8199 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X48, batch_eq_48) {
8200 TEST_REQUIRES_X86_AVX512F;
8201 VUnaryMicrokernelTester()
8202 .batch_size(48)
8203 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8204 }
8205
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X48,batch_div_48)8206 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X48, batch_div_48) {
8207 TEST_REQUIRES_X86_AVX512F;
8208 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
8209 VUnaryMicrokernelTester()
8210 .batch_size(batch_size)
8211 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8212 }
8213 }
8214
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X48,batch_lt_48)8215 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X48, batch_lt_48) {
8216 TEST_REQUIRES_X86_AVX512F;
8217 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
8218 VUnaryMicrokernelTester()
8219 .batch_size(batch_size)
8220 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8221 }
8222 }
8223
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X48,batch_gt_48)8224 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X48, batch_gt_48) {
8225 TEST_REQUIRES_X86_AVX512F;
8226 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
8227 VUnaryMicrokernelTester()
8228 .batch_size(batch_size)
8229 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8230 }
8231 }
8232
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X48,inplace)8233 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X48, inplace) {
8234 TEST_REQUIRES_X86_AVX512F;
8235 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
8236 VUnaryMicrokernelTester()
8237 .batch_size(batch_size)
8238 .inplace(true)
8239 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8240 }
8241 }
8242 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8243
8244
8245 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X64,batch_eq_64)8246 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X64, batch_eq_64) {
8247 TEST_REQUIRES_X86_AVX512F;
8248 VUnaryMicrokernelTester()
8249 .batch_size(64)
8250 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8251 }
8252
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X64,batch_div_64)8253 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X64, batch_div_64) {
8254 TEST_REQUIRES_X86_AVX512F;
8255 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
8256 VUnaryMicrokernelTester()
8257 .batch_size(batch_size)
8258 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8259 }
8260 }
8261
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X64,batch_lt_64)8262 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X64, batch_lt_64) {
8263 TEST_REQUIRES_X86_AVX512F;
8264 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
8265 VUnaryMicrokernelTester()
8266 .batch_size(batch_size)
8267 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8268 }
8269 }
8270
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X64,batch_gt_64)8271 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X64, batch_gt_64) {
8272 TEST_REQUIRES_X86_AVX512F;
8273 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
8274 VUnaryMicrokernelTester()
8275 .batch_size(batch_size)
8276 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8277 }
8278 }
8279
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X64,inplace)8280 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X64, inplace) {
8281 TEST_REQUIRES_X86_AVX512F;
8282 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
8283 VUnaryMicrokernelTester()
8284 .batch_size(batch_size)
8285 .inplace(true)
8286 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8287 }
8288 }
8289 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8290
8291
8292 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X80,batch_eq_80)8293 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X80, batch_eq_80) {
8294 TEST_REQUIRES_X86_AVX512F;
8295 VUnaryMicrokernelTester()
8296 .batch_size(80)
8297 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8298 }
8299
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X80,batch_div_80)8300 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X80, batch_div_80) {
8301 TEST_REQUIRES_X86_AVX512F;
8302 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
8303 VUnaryMicrokernelTester()
8304 .batch_size(batch_size)
8305 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8306 }
8307 }
8308
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X80,batch_lt_80)8309 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X80, batch_lt_80) {
8310 TEST_REQUIRES_X86_AVX512F;
8311 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
8312 VUnaryMicrokernelTester()
8313 .batch_size(batch_size)
8314 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8315 }
8316 }
8317
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X80,batch_gt_80)8318 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X80, batch_gt_80) {
8319 TEST_REQUIRES_X86_AVX512F;
8320 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
8321 VUnaryMicrokernelTester()
8322 .batch_size(batch_size)
8323 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8324 }
8325 }
8326
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X80,inplace)8327 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X80, inplace) {
8328 TEST_REQUIRES_X86_AVX512F;
8329 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
8330 VUnaryMicrokernelTester()
8331 .batch_size(batch_size)
8332 .inplace(true)
8333 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8334 }
8335 }
8336 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8337
8338
8339 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X96,batch_eq_96)8340 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X96, batch_eq_96) {
8341 TEST_REQUIRES_X86_AVX512F;
8342 VUnaryMicrokernelTester()
8343 .batch_size(96)
8344 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8345 }
8346
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X96,batch_div_96)8347 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X96, batch_div_96) {
8348 TEST_REQUIRES_X86_AVX512F;
8349 for (size_t batch_size = 192; batch_size < 960; batch_size += 96) {
8350 VUnaryMicrokernelTester()
8351 .batch_size(batch_size)
8352 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8353 }
8354 }
8355
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X96,batch_lt_96)8356 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X96, batch_lt_96) {
8357 TEST_REQUIRES_X86_AVX512F;
8358 for (size_t batch_size = 1; batch_size < 96; batch_size++) {
8359 VUnaryMicrokernelTester()
8360 .batch_size(batch_size)
8361 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8362 }
8363 }
8364
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X96,batch_gt_96)8365 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X96, batch_gt_96) {
8366 TEST_REQUIRES_X86_AVX512F;
8367 for (size_t batch_size = 97; batch_size < 192; batch_size++) {
8368 VUnaryMicrokernelTester()
8369 .batch_size(batch_size)
8370 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8371 }
8372 }
8373
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X96,inplace)8374 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X96, inplace) {
8375 TEST_REQUIRES_X86_AVX512F;
8376 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
8377 VUnaryMicrokernelTester()
8378 .batch_size(batch_size)
8379 .inplace(true)
8380 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8381 }
8382 }
8383 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8384
8385
8386 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X112,batch_eq_112)8387 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X112, batch_eq_112) {
8388 TEST_REQUIRES_X86_AVX512F;
8389 VUnaryMicrokernelTester()
8390 .batch_size(112)
8391 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8392 }
8393
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X112,batch_div_112)8394 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X112, batch_div_112) {
8395 TEST_REQUIRES_X86_AVX512F;
8396 for (size_t batch_size = 224; batch_size < 1120; batch_size += 112) {
8397 VUnaryMicrokernelTester()
8398 .batch_size(batch_size)
8399 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8400 }
8401 }
8402
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X112,batch_lt_112)8403 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X112, batch_lt_112) {
8404 TEST_REQUIRES_X86_AVX512F;
8405 for (size_t batch_size = 1; batch_size < 112; batch_size++) {
8406 VUnaryMicrokernelTester()
8407 .batch_size(batch_size)
8408 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8409 }
8410 }
8411
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X112,batch_gt_112)8412 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X112, batch_gt_112) {
8413 TEST_REQUIRES_X86_AVX512F;
8414 for (size_t batch_size = 113; batch_size < 224; batch_size++) {
8415 VUnaryMicrokernelTester()
8416 .batch_size(batch_size)
8417 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8418 }
8419 }
8420
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X112,inplace)8421 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X112, inplace) {
8422 TEST_REQUIRES_X86_AVX512F;
8423 for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
8424 VUnaryMicrokernelTester()
8425 .batch_size(batch_size)
8426 .inplace(true)
8427 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8428 }
8429 }
8430 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8431
8432
8433 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X128,batch_eq_128)8434 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X128, batch_eq_128) {
8435 TEST_REQUIRES_X86_AVX512F;
8436 VUnaryMicrokernelTester()
8437 .batch_size(128)
8438 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8439 }
8440
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X128,batch_div_128)8441 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X128, batch_div_128) {
8442 TEST_REQUIRES_X86_AVX512F;
8443 for (size_t batch_size = 256; batch_size < 1280; batch_size += 128) {
8444 VUnaryMicrokernelTester()
8445 .batch_size(batch_size)
8446 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8447 }
8448 }
8449
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X128,batch_lt_128)8450 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X128, batch_lt_128) {
8451 TEST_REQUIRES_X86_AVX512F;
8452 for (size_t batch_size = 1; batch_size < 128; batch_size++) {
8453 VUnaryMicrokernelTester()
8454 .batch_size(batch_size)
8455 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8456 }
8457 }
8458
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X128,batch_gt_128)8459 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X128, batch_gt_128) {
8460 TEST_REQUIRES_X86_AVX512F;
8461 for (size_t batch_size = 129; batch_size < 256; batch_size++) {
8462 VUnaryMicrokernelTester()
8463 .batch_size(batch_size)
8464 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8465 }
8466 }
8467
TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X128,inplace)8468 TEST(F32_VSIGMOID__AVX512F_RR1_LUT16_P3_PERM_SCALEF_NR1FMA_X128, inplace) {
8469 TEST_REQUIRES_X86_AVX512F;
8470 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
8471 VUnaryMicrokernelTester()
8472 .batch_size(batch_size)
8473 .inplace(true)
8474 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128, xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params);
8475 }
8476 }
8477 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8478
8479
8480 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X16,batch_eq_16)8481 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X16, batch_eq_16) {
8482 TEST_REQUIRES_X86_AVX512F;
8483 VUnaryMicrokernelTester()
8484 .batch_size(16)
8485 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x16, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8486 }
8487
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X16,batch_div_16)8488 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X16, batch_div_16) {
8489 TEST_REQUIRES_X86_AVX512F;
8490 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
8491 VUnaryMicrokernelTester()
8492 .batch_size(batch_size)
8493 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x16, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8494 }
8495 }
8496
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X16,batch_lt_16)8497 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X16, batch_lt_16) {
8498 TEST_REQUIRES_X86_AVX512F;
8499 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
8500 VUnaryMicrokernelTester()
8501 .batch_size(batch_size)
8502 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x16, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8503 }
8504 }
8505
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X16,batch_gt_16)8506 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X16, batch_gt_16) {
8507 TEST_REQUIRES_X86_AVX512F;
8508 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
8509 VUnaryMicrokernelTester()
8510 .batch_size(batch_size)
8511 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x16, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8512 }
8513 }
8514
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X16,inplace)8515 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X16, inplace) {
8516 TEST_REQUIRES_X86_AVX512F;
8517 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8518 VUnaryMicrokernelTester()
8519 .batch_size(batch_size)
8520 .inplace(true)
8521 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x16, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8522 }
8523 }
8524 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8525
8526
8527 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X32,batch_eq_32)8528 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X32, batch_eq_32) {
8529 TEST_REQUIRES_X86_AVX512F;
8530 VUnaryMicrokernelTester()
8531 .batch_size(32)
8532 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x32, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8533 }
8534
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X32,batch_div_32)8535 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X32, batch_div_32) {
8536 TEST_REQUIRES_X86_AVX512F;
8537 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
8538 VUnaryMicrokernelTester()
8539 .batch_size(batch_size)
8540 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x32, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8541 }
8542 }
8543
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X32,batch_lt_32)8544 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X32, batch_lt_32) {
8545 TEST_REQUIRES_X86_AVX512F;
8546 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
8547 VUnaryMicrokernelTester()
8548 .batch_size(batch_size)
8549 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x32, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8550 }
8551 }
8552
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X32,batch_gt_32)8553 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X32, batch_gt_32) {
8554 TEST_REQUIRES_X86_AVX512F;
8555 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
8556 VUnaryMicrokernelTester()
8557 .batch_size(batch_size)
8558 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x32, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8559 }
8560 }
8561
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X32,inplace)8562 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X32, inplace) {
8563 TEST_REQUIRES_X86_AVX512F;
8564 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8565 VUnaryMicrokernelTester()
8566 .batch_size(batch_size)
8567 .inplace(true)
8568 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x32, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8569 }
8570 }
8571 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8572
8573
8574 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X48,batch_eq_48)8575 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X48, batch_eq_48) {
8576 TEST_REQUIRES_X86_AVX512F;
8577 VUnaryMicrokernelTester()
8578 .batch_size(48)
8579 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x48, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8580 }
8581
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X48,batch_div_48)8582 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X48, batch_div_48) {
8583 TEST_REQUIRES_X86_AVX512F;
8584 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
8585 VUnaryMicrokernelTester()
8586 .batch_size(batch_size)
8587 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x48, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8588 }
8589 }
8590
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X48,batch_lt_48)8591 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X48, batch_lt_48) {
8592 TEST_REQUIRES_X86_AVX512F;
8593 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
8594 VUnaryMicrokernelTester()
8595 .batch_size(batch_size)
8596 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x48, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8597 }
8598 }
8599
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X48,batch_gt_48)8600 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X48, batch_gt_48) {
8601 TEST_REQUIRES_X86_AVX512F;
8602 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
8603 VUnaryMicrokernelTester()
8604 .batch_size(batch_size)
8605 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x48, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8606 }
8607 }
8608
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X48,inplace)8609 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X48, inplace) {
8610 TEST_REQUIRES_X86_AVX512F;
8611 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
8612 VUnaryMicrokernelTester()
8613 .batch_size(batch_size)
8614 .inplace(true)
8615 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x48, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8616 }
8617 }
8618 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8619
8620
8621 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X64,batch_eq_64)8622 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X64, batch_eq_64) {
8623 TEST_REQUIRES_X86_AVX512F;
8624 VUnaryMicrokernelTester()
8625 .batch_size(64)
8626 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x64, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8627 }
8628
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X64,batch_div_64)8629 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X64, batch_div_64) {
8630 TEST_REQUIRES_X86_AVX512F;
8631 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
8632 VUnaryMicrokernelTester()
8633 .batch_size(batch_size)
8634 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x64, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8635 }
8636 }
8637
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X64,batch_lt_64)8638 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X64, batch_lt_64) {
8639 TEST_REQUIRES_X86_AVX512F;
8640 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
8641 VUnaryMicrokernelTester()
8642 .batch_size(batch_size)
8643 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x64, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8644 }
8645 }
8646
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X64,batch_gt_64)8647 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X64, batch_gt_64) {
8648 TEST_REQUIRES_X86_AVX512F;
8649 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
8650 VUnaryMicrokernelTester()
8651 .batch_size(batch_size)
8652 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x64, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8653 }
8654 }
8655
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X64,inplace)8656 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X64, inplace) {
8657 TEST_REQUIRES_X86_AVX512F;
8658 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
8659 VUnaryMicrokernelTester()
8660 .batch_size(batch_size)
8661 .inplace(true)
8662 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x64, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8663 }
8664 }
8665 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8666
8667
8668 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X80,batch_eq_80)8669 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X80, batch_eq_80) {
8670 TEST_REQUIRES_X86_AVX512F;
8671 VUnaryMicrokernelTester()
8672 .batch_size(80)
8673 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x80, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8674 }
8675
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X80,batch_div_80)8676 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X80, batch_div_80) {
8677 TEST_REQUIRES_X86_AVX512F;
8678 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
8679 VUnaryMicrokernelTester()
8680 .batch_size(batch_size)
8681 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x80, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8682 }
8683 }
8684
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X80,batch_lt_80)8685 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X80, batch_lt_80) {
8686 TEST_REQUIRES_X86_AVX512F;
8687 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
8688 VUnaryMicrokernelTester()
8689 .batch_size(batch_size)
8690 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x80, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8691 }
8692 }
8693
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X80,batch_gt_80)8694 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X80, batch_gt_80) {
8695 TEST_REQUIRES_X86_AVX512F;
8696 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
8697 VUnaryMicrokernelTester()
8698 .batch_size(batch_size)
8699 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x80, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8700 }
8701 }
8702
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X80,inplace)8703 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X80, inplace) {
8704 TEST_REQUIRES_X86_AVX512F;
8705 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
8706 VUnaryMicrokernelTester()
8707 .batch_size(batch_size)
8708 .inplace(true)
8709 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x80, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8710 }
8711 }
8712 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8713
8714
8715 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X96,batch_eq_96)8716 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X96, batch_eq_96) {
8717 TEST_REQUIRES_X86_AVX512F;
8718 VUnaryMicrokernelTester()
8719 .batch_size(96)
8720 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8721 }
8722
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X96,batch_div_96)8723 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X96, batch_div_96) {
8724 TEST_REQUIRES_X86_AVX512F;
8725 for (size_t batch_size = 192; batch_size < 960; batch_size += 96) {
8726 VUnaryMicrokernelTester()
8727 .batch_size(batch_size)
8728 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8729 }
8730 }
8731
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X96,batch_lt_96)8732 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X96, batch_lt_96) {
8733 TEST_REQUIRES_X86_AVX512F;
8734 for (size_t batch_size = 1; batch_size < 96; batch_size++) {
8735 VUnaryMicrokernelTester()
8736 .batch_size(batch_size)
8737 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8738 }
8739 }
8740
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X96,batch_gt_96)8741 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X96, batch_gt_96) {
8742 TEST_REQUIRES_X86_AVX512F;
8743 for (size_t batch_size = 97; batch_size < 192; batch_size++) {
8744 VUnaryMicrokernelTester()
8745 .batch_size(batch_size)
8746 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8747 }
8748 }
8749
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X96,inplace)8750 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X96, inplace) {
8751 TEST_REQUIRES_X86_AVX512F;
8752 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
8753 VUnaryMicrokernelTester()
8754 .batch_size(batch_size)
8755 .inplace(true)
8756 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8757 }
8758 }
8759 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8760
8761
8762 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X112,batch_eq_112)8763 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X112, batch_eq_112) {
8764 TEST_REQUIRES_X86_AVX512F;
8765 VUnaryMicrokernelTester()
8766 .batch_size(112)
8767 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8768 }
8769
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X112,batch_div_112)8770 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X112, batch_div_112) {
8771 TEST_REQUIRES_X86_AVX512F;
8772 for (size_t batch_size = 224; batch_size < 1120; batch_size += 112) {
8773 VUnaryMicrokernelTester()
8774 .batch_size(batch_size)
8775 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8776 }
8777 }
8778
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X112,batch_lt_112)8779 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X112, batch_lt_112) {
8780 TEST_REQUIRES_X86_AVX512F;
8781 for (size_t batch_size = 1; batch_size < 112; batch_size++) {
8782 VUnaryMicrokernelTester()
8783 .batch_size(batch_size)
8784 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8785 }
8786 }
8787
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X112,batch_gt_112)8788 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X112, batch_gt_112) {
8789 TEST_REQUIRES_X86_AVX512F;
8790 for (size_t batch_size = 113; batch_size < 224; batch_size++) {
8791 VUnaryMicrokernelTester()
8792 .batch_size(batch_size)
8793 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8794 }
8795 }
8796
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X112,inplace)8797 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X112, inplace) {
8798 TEST_REQUIRES_X86_AVX512F;
8799 for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
8800 VUnaryMicrokernelTester()
8801 .batch_size(batch_size)
8802 .inplace(true)
8803 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8804 }
8805 }
8806 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8807
8808
8809 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X128,batch_eq_128)8810 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X128, batch_eq_128) {
8811 TEST_REQUIRES_X86_AVX512F;
8812 VUnaryMicrokernelTester()
8813 .batch_size(128)
8814 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8815 }
8816
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X128,batch_div_128)8817 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X128, batch_div_128) {
8818 TEST_REQUIRES_X86_AVX512F;
8819 for (size_t batch_size = 256; batch_size < 1280; batch_size += 128) {
8820 VUnaryMicrokernelTester()
8821 .batch_size(batch_size)
8822 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8823 }
8824 }
8825
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X128,batch_lt_128)8826 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X128, batch_lt_128) {
8827 TEST_REQUIRES_X86_AVX512F;
8828 for (size_t batch_size = 1; batch_size < 128; batch_size++) {
8829 VUnaryMicrokernelTester()
8830 .batch_size(batch_size)
8831 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8832 }
8833 }
8834
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X128,batch_gt_128)8835 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X128, batch_gt_128) {
8836 TEST_REQUIRES_X86_AVX512F;
8837 for (size_t batch_size = 129; batch_size < 256; batch_size++) {
8838 VUnaryMicrokernelTester()
8839 .batch_size(batch_size)
8840 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8841 }
8842 }
8843
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X128,inplace)8844 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_DIV_X128, inplace) {
8845 TEST_REQUIRES_X86_AVX512F;
8846 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
8847 VUnaryMicrokernelTester()
8848 .batch_size(batch_size)
8849 .inplace(true)
8850 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8851 }
8852 }
8853 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8854
8855
8856 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X16,batch_eq_16)8857 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X16, batch_eq_16) {
8858 TEST_REQUIRES_X86_AVX512F;
8859 VUnaryMicrokernelTester()
8860 .batch_size(16)
8861 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x16, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8862 }
8863
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X16,batch_div_16)8864 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X16, batch_div_16) {
8865 TEST_REQUIRES_X86_AVX512F;
8866 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
8867 VUnaryMicrokernelTester()
8868 .batch_size(batch_size)
8869 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x16, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8870 }
8871 }
8872
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X16,batch_lt_16)8873 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X16, batch_lt_16) {
8874 TEST_REQUIRES_X86_AVX512F;
8875 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
8876 VUnaryMicrokernelTester()
8877 .batch_size(batch_size)
8878 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x16, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8879 }
8880 }
8881
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X16,batch_gt_16)8882 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X16, batch_gt_16) {
8883 TEST_REQUIRES_X86_AVX512F;
8884 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
8885 VUnaryMicrokernelTester()
8886 .batch_size(batch_size)
8887 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x16, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8888 }
8889 }
8890
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X16,inplace)8891 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X16, inplace) {
8892 TEST_REQUIRES_X86_AVX512F;
8893 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8894 VUnaryMicrokernelTester()
8895 .batch_size(batch_size)
8896 .inplace(true)
8897 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x16, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8898 }
8899 }
8900 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8901
8902
8903 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X32,batch_eq_32)8904 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X32, batch_eq_32) {
8905 TEST_REQUIRES_X86_AVX512F;
8906 VUnaryMicrokernelTester()
8907 .batch_size(32)
8908 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8909 }
8910
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X32,batch_div_32)8911 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X32, batch_div_32) {
8912 TEST_REQUIRES_X86_AVX512F;
8913 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
8914 VUnaryMicrokernelTester()
8915 .batch_size(batch_size)
8916 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8917 }
8918 }
8919
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X32,batch_lt_32)8920 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X32, batch_lt_32) {
8921 TEST_REQUIRES_X86_AVX512F;
8922 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
8923 VUnaryMicrokernelTester()
8924 .batch_size(batch_size)
8925 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8926 }
8927 }
8928
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X32,batch_gt_32)8929 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X32, batch_gt_32) {
8930 TEST_REQUIRES_X86_AVX512F;
8931 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
8932 VUnaryMicrokernelTester()
8933 .batch_size(batch_size)
8934 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8935 }
8936 }
8937
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X32,inplace)8938 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X32, inplace) {
8939 TEST_REQUIRES_X86_AVX512F;
8940 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8941 VUnaryMicrokernelTester()
8942 .batch_size(batch_size)
8943 .inplace(true)
8944 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8945 }
8946 }
8947 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8948
8949
8950 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X48,batch_eq_48)8951 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X48, batch_eq_48) {
8952 TEST_REQUIRES_X86_AVX512F;
8953 VUnaryMicrokernelTester()
8954 .batch_size(48)
8955 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8956 }
8957
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X48,batch_div_48)8958 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X48, batch_div_48) {
8959 TEST_REQUIRES_X86_AVX512F;
8960 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
8961 VUnaryMicrokernelTester()
8962 .batch_size(batch_size)
8963 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8964 }
8965 }
8966
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X48,batch_lt_48)8967 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X48, batch_lt_48) {
8968 TEST_REQUIRES_X86_AVX512F;
8969 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
8970 VUnaryMicrokernelTester()
8971 .batch_size(batch_size)
8972 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8973 }
8974 }
8975
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X48,batch_gt_48)8976 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X48, batch_gt_48) {
8977 TEST_REQUIRES_X86_AVX512F;
8978 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
8979 VUnaryMicrokernelTester()
8980 .batch_size(batch_size)
8981 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8982 }
8983 }
8984
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X48,inplace)8985 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X48, inplace) {
8986 TEST_REQUIRES_X86_AVX512F;
8987 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
8988 VUnaryMicrokernelTester()
8989 .batch_size(batch_size)
8990 .inplace(true)
8991 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
8992 }
8993 }
8994 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8995
8996
8997 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X64,batch_eq_64)8998 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X64, batch_eq_64) {
8999 TEST_REQUIRES_X86_AVX512F;
9000 VUnaryMicrokernelTester()
9001 .batch_size(64)
9002 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9003 }
9004
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X64,batch_div_64)9005 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X64, batch_div_64) {
9006 TEST_REQUIRES_X86_AVX512F;
9007 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
9008 VUnaryMicrokernelTester()
9009 .batch_size(batch_size)
9010 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9011 }
9012 }
9013
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X64,batch_lt_64)9014 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X64, batch_lt_64) {
9015 TEST_REQUIRES_X86_AVX512F;
9016 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
9017 VUnaryMicrokernelTester()
9018 .batch_size(batch_size)
9019 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9020 }
9021 }
9022
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X64,batch_gt_64)9023 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X64, batch_gt_64) {
9024 TEST_REQUIRES_X86_AVX512F;
9025 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
9026 VUnaryMicrokernelTester()
9027 .batch_size(batch_size)
9028 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9029 }
9030 }
9031
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X64,inplace)9032 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X64, inplace) {
9033 TEST_REQUIRES_X86_AVX512F;
9034 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9035 VUnaryMicrokernelTester()
9036 .batch_size(batch_size)
9037 .inplace(true)
9038 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9039 }
9040 }
9041 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9042
9043
9044 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X80,batch_eq_80)9045 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X80, batch_eq_80) {
9046 TEST_REQUIRES_X86_AVX512F;
9047 VUnaryMicrokernelTester()
9048 .batch_size(80)
9049 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9050 }
9051
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X80,batch_div_80)9052 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X80, batch_div_80) {
9053 TEST_REQUIRES_X86_AVX512F;
9054 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
9055 VUnaryMicrokernelTester()
9056 .batch_size(batch_size)
9057 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9058 }
9059 }
9060
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X80,batch_lt_80)9061 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X80, batch_lt_80) {
9062 TEST_REQUIRES_X86_AVX512F;
9063 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
9064 VUnaryMicrokernelTester()
9065 .batch_size(batch_size)
9066 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9067 }
9068 }
9069
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X80,batch_gt_80)9070 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X80, batch_gt_80) {
9071 TEST_REQUIRES_X86_AVX512F;
9072 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
9073 VUnaryMicrokernelTester()
9074 .batch_size(batch_size)
9075 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9076 }
9077 }
9078
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X80,inplace)9079 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X80, inplace) {
9080 TEST_REQUIRES_X86_AVX512F;
9081 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9082 VUnaryMicrokernelTester()
9083 .batch_size(batch_size)
9084 .inplace(true)
9085 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9086 }
9087 }
9088 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9089
9090
9091 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X96,batch_eq_96)9092 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X96, batch_eq_96) {
9093 TEST_REQUIRES_X86_AVX512F;
9094 VUnaryMicrokernelTester()
9095 .batch_size(96)
9096 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9097 }
9098
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X96,batch_div_96)9099 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X96, batch_div_96) {
9100 TEST_REQUIRES_X86_AVX512F;
9101 for (size_t batch_size = 192; batch_size < 960; batch_size += 96) {
9102 VUnaryMicrokernelTester()
9103 .batch_size(batch_size)
9104 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9105 }
9106 }
9107
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X96,batch_lt_96)9108 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X96, batch_lt_96) {
9109 TEST_REQUIRES_X86_AVX512F;
9110 for (size_t batch_size = 1; batch_size < 96; batch_size++) {
9111 VUnaryMicrokernelTester()
9112 .batch_size(batch_size)
9113 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9114 }
9115 }
9116
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X96,batch_gt_96)9117 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X96, batch_gt_96) {
9118 TEST_REQUIRES_X86_AVX512F;
9119 for (size_t batch_size = 97; batch_size < 192; batch_size++) {
9120 VUnaryMicrokernelTester()
9121 .batch_size(batch_size)
9122 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9123 }
9124 }
9125
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X96,inplace)9126 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X96, inplace) {
9127 TEST_REQUIRES_X86_AVX512F;
9128 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9129 VUnaryMicrokernelTester()
9130 .batch_size(batch_size)
9131 .inplace(true)
9132 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9133 }
9134 }
9135 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9136
9137
9138 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X112,batch_eq_112)9139 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X112, batch_eq_112) {
9140 TEST_REQUIRES_X86_AVX512F;
9141 VUnaryMicrokernelTester()
9142 .batch_size(112)
9143 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9144 }
9145
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X112,batch_div_112)9146 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X112, batch_div_112) {
9147 TEST_REQUIRES_X86_AVX512F;
9148 for (size_t batch_size = 224; batch_size < 1120; batch_size += 112) {
9149 VUnaryMicrokernelTester()
9150 .batch_size(batch_size)
9151 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9152 }
9153 }
9154
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X112,batch_lt_112)9155 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X112, batch_lt_112) {
9156 TEST_REQUIRES_X86_AVX512F;
9157 for (size_t batch_size = 1; batch_size < 112; batch_size++) {
9158 VUnaryMicrokernelTester()
9159 .batch_size(batch_size)
9160 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9161 }
9162 }
9163
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X112,batch_gt_112)9164 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X112, batch_gt_112) {
9165 TEST_REQUIRES_X86_AVX512F;
9166 for (size_t batch_size = 113; batch_size < 224; batch_size++) {
9167 VUnaryMicrokernelTester()
9168 .batch_size(batch_size)
9169 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9170 }
9171 }
9172
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X112,inplace)9173 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X112, inplace) {
9174 TEST_REQUIRES_X86_AVX512F;
9175 for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
9176 VUnaryMicrokernelTester()
9177 .batch_size(batch_size)
9178 .inplace(true)
9179 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9180 }
9181 }
9182 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9183
9184
9185 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X128,batch_eq_128)9186 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X128, batch_eq_128) {
9187 TEST_REQUIRES_X86_AVX512F;
9188 VUnaryMicrokernelTester()
9189 .batch_size(128)
9190 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9191 }
9192
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X128,batch_div_128)9193 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X128, batch_div_128) {
9194 TEST_REQUIRES_X86_AVX512F;
9195 for (size_t batch_size = 256; batch_size < 1280; batch_size += 128) {
9196 VUnaryMicrokernelTester()
9197 .batch_size(batch_size)
9198 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9199 }
9200 }
9201
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X128,batch_lt_128)9202 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X128, batch_lt_128) {
9203 TEST_REQUIRES_X86_AVX512F;
9204 for (size_t batch_size = 1; batch_size < 128; batch_size++) {
9205 VUnaryMicrokernelTester()
9206 .batch_size(batch_size)
9207 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9208 }
9209 }
9210
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X128,batch_gt_128)9211 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X128, batch_gt_128) {
9212 TEST_REQUIRES_X86_AVX512F;
9213 for (size_t batch_size = 129; batch_size < 256; batch_size++) {
9214 VUnaryMicrokernelTester()
9215 .batch_size(batch_size)
9216 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9217 }
9218 }
9219
TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X128,inplace)9220 TEST(F32_VSIGMOID__AVX512F_RR1_P5_SCALEF_NR1FMA_X128, inplace) {
9221 TEST_REQUIRES_X86_AVX512F;
9222 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
9223 VUnaryMicrokernelTester()
9224 .batch_size(batch_size)
9225 .inplace(true)
9226 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128, xnn_init_f32_sigmoid_avx512_rr1_p5_params);
9227 }
9228 }
9229 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9230
9231
9232 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X16,batch_eq_16)9233 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X16, batch_eq_16) {
9234 TEST_REQUIRES_X86_AVX512F;
9235 VUnaryMicrokernelTester()
9236 .batch_size(16)
9237 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x16, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9238 }
9239
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X16,batch_div_16)9240 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X16, batch_div_16) {
9241 TEST_REQUIRES_X86_AVX512F;
9242 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
9243 VUnaryMicrokernelTester()
9244 .batch_size(batch_size)
9245 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x16, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9246 }
9247 }
9248
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X16,batch_lt_16)9249 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X16, batch_lt_16) {
9250 TEST_REQUIRES_X86_AVX512F;
9251 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
9252 VUnaryMicrokernelTester()
9253 .batch_size(batch_size)
9254 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x16, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9255 }
9256 }
9257
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X16,batch_gt_16)9258 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X16, batch_gt_16) {
9259 TEST_REQUIRES_X86_AVX512F;
9260 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
9261 VUnaryMicrokernelTester()
9262 .batch_size(batch_size)
9263 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x16, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9264 }
9265 }
9266
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X16,inplace)9267 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X16, inplace) {
9268 TEST_REQUIRES_X86_AVX512F;
9269 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
9270 VUnaryMicrokernelTester()
9271 .batch_size(batch_size)
9272 .inplace(true)
9273 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x16, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9274 }
9275 }
9276 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9277
9278
9279 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X32,batch_eq_32)9280 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X32, batch_eq_32) {
9281 TEST_REQUIRES_X86_AVX512F;
9282 VUnaryMicrokernelTester()
9283 .batch_size(32)
9284 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x32, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9285 }
9286
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X32,batch_div_32)9287 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X32, batch_div_32) {
9288 TEST_REQUIRES_X86_AVX512F;
9289 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
9290 VUnaryMicrokernelTester()
9291 .batch_size(batch_size)
9292 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x32, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9293 }
9294 }
9295
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X32,batch_lt_32)9296 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X32, batch_lt_32) {
9297 TEST_REQUIRES_X86_AVX512F;
9298 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
9299 VUnaryMicrokernelTester()
9300 .batch_size(batch_size)
9301 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x32, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9302 }
9303 }
9304
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X32,batch_gt_32)9305 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X32, batch_gt_32) {
9306 TEST_REQUIRES_X86_AVX512F;
9307 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
9308 VUnaryMicrokernelTester()
9309 .batch_size(batch_size)
9310 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x32, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9311 }
9312 }
9313
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X32,inplace)9314 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X32, inplace) {
9315 TEST_REQUIRES_X86_AVX512F;
9316 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
9317 VUnaryMicrokernelTester()
9318 .batch_size(batch_size)
9319 .inplace(true)
9320 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x32, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9321 }
9322 }
9323 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9324
9325
9326 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X48,batch_eq_48)9327 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X48, batch_eq_48) {
9328 TEST_REQUIRES_X86_AVX512F;
9329 VUnaryMicrokernelTester()
9330 .batch_size(48)
9331 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x48, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9332 }
9333
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X48,batch_div_48)9334 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X48, batch_div_48) {
9335 TEST_REQUIRES_X86_AVX512F;
9336 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
9337 VUnaryMicrokernelTester()
9338 .batch_size(batch_size)
9339 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x48, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9340 }
9341 }
9342
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X48,batch_lt_48)9343 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X48, batch_lt_48) {
9344 TEST_REQUIRES_X86_AVX512F;
9345 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
9346 VUnaryMicrokernelTester()
9347 .batch_size(batch_size)
9348 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x48, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9349 }
9350 }
9351
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X48,batch_gt_48)9352 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X48, batch_gt_48) {
9353 TEST_REQUIRES_X86_AVX512F;
9354 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
9355 VUnaryMicrokernelTester()
9356 .batch_size(batch_size)
9357 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x48, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9358 }
9359 }
9360
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X48,inplace)9361 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X48, inplace) {
9362 TEST_REQUIRES_X86_AVX512F;
9363 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9364 VUnaryMicrokernelTester()
9365 .batch_size(batch_size)
9366 .inplace(true)
9367 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x48, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9368 }
9369 }
9370 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9371
9372
9373 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X64,batch_eq_64)9374 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X64, batch_eq_64) {
9375 TEST_REQUIRES_X86_AVX512F;
9376 VUnaryMicrokernelTester()
9377 .batch_size(64)
9378 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x64, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9379 }
9380
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X64,batch_div_64)9381 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X64, batch_div_64) {
9382 TEST_REQUIRES_X86_AVX512F;
9383 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
9384 VUnaryMicrokernelTester()
9385 .batch_size(batch_size)
9386 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x64, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9387 }
9388 }
9389
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X64,batch_lt_64)9390 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X64, batch_lt_64) {
9391 TEST_REQUIRES_X86_AVX512F;
9392 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
9393 VUnaryMicrokernelTester()
9394 .batch_size(batch_size)
9395 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x64, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9396 }
9397 }
9398
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X64,batch_gt_64)9399 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X64, batch_gt_64) {
9400 TEST_REQUIRES_X86_AVX512F;
9401 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
9402 VUnaryMicrokernelTester()
9403 .batch_size(batch_size)
9404 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x64, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9405 }
9406 }
9407
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X64,inplace)9408 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X64, inplace) {
9409 TEST_REQUIRES_X86_AVX512F;
9410 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9411 VUnaryMicrokernelTester()
9412 .batch_size(batch_size)
9413 .inplace(true)
9414 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x64, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9415 }
9416 }
9417 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9418
9419
9420 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X80,batch_eq_80)9421 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X80, batch_eq_80) {
9422 TEST_REQUIRES_X86_AVX512F;
9423 VUnaryMicrokernelTester()
9424 .batch_size(80)
9425 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9426 }
9427
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X80,batch_div_80)9428 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X80, batch_div_80) {
9429 TEST_REQUIRES_X86_AVX512F;
9430 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
9431 VUnaryMicrokernelTester()
9432 .batch_size(batch_size)
9433 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9434 }
9435 }
9436
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X80,batch_lt_80)9437 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X80, batch_lt_80) {
9438 TEST_REQUIRES_X86_AVX512F;
9439 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
9440 VUnaryMicrokernelTester()
9441 .batch_size(batch_size)
9442 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9443 }
9444 }
9445
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X80,batch_gt_80)9446 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X80, batch_gt_80) {
9447 TEST_REQUIRES_X86_AVX512F;
9448 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
9449 VUnaryMicrokernelTester()
9450 .batch_size(batch_size)
9451 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9452 }
9453 }
9454
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X80,inplace)9455 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X80, inplace) {
9456 TEST_REQUIRES_X86_AVX512F;
9457 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9458 VUnaryMicrokernelTester()
9459 .batch_size(batch_size)
9460 .inplace(true)
9461 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9462 }
9463 }
9464 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9465
9466
9467 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X96,batch_eq_96)9468 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X96, batch_eq_96) {
9469 TEST_REQUIRES_X86_AVX512F;
9470 VUnaryMicrokernelTester()
9471 .batch_size(96)
9472 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9473 }
9474
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X96,batch_div_96)9475 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X96, batch_div_96) {
9476 TEST_REQUIRES_X86_AVX512F;
9477 for (size_t batch_size = 192; batch_size < 960; batch_size += 96) {
9478 VUnaryMicrokernelTester()
9479 .batch_size(batch_size)
9480 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9481 }
9482 }
9483
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X96,batch_lt_96)9484 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X96, batch_lt_96) {
9485 TEST_REQUIRES_X86_AVX512F;
9486 for (size_t batch_size = 1; batch_size < 96; batch_size++) {
9487 VUnaryMicrokernelTester()
9488 .batch_size(batch_size)
9489 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9490 }
9491 }
9492
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X96,batch_gt_96)9493 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X96, batch_gt_96) {
9494 TEST_REQUIRES_X86_AVX512F;
9495 for (size_t batch_size = 97; batch_size < 192; batch_size++) {
9496 VUnaryMicrokernelTester()
9497 .batch_size(batch_size)
9498 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9499 }
9500 }
9501
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X96,inplace)9502 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X96, inplace) {
9503 TEST_REQUIRES_X86_AVX512F;
9504 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9505 VUnaryMicrokernelTester()
9506 .batch_size(batch_size)
9507 .inplace(true)
9508 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9509 }
9510 }
9511 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9512
9513
9514 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X112,batch_eq_112)9515 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X112, batch_eq_112) {
9516 TEST_REQUIRES_X86_AVX512F;
9517 VUnaryMicrokernelTester()
9518 .batch_size(112)
9519 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9520 }
9521
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X112,batch_div_112)9522 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X112, batch_div_112) {
9523 TEST_REQUIRES_X86_AVX512F;
9524 for (size_t batch_size = 224; batch_size < 1120; batch_size += 112) {
9525 VUnaryMicrokernelTester()
9526 .batch_size(batch_size)
9527 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9528 }
9529 }
9530
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X112,batch_lt_112)9531 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X112, batch_lt_112) {
9532 TEST_REQUIRES_X86_AVX512F;
9533 for (size_t batch_size = 1; batch_size < 112; batch_size++) {
9534 VUnaryMicrokernelTester()
9535 .batch_size(batch_size)
9536 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9537 }
9538 }
9539
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X112,batch_gt_112)9540 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X112, batch_gt_112) {
9541 TEST_REQUIRES_X86_AVX512F;
9542 for (size_t batch_size = 113; batch_size < 224; batch_size++) {
9543 VUnaryMicrokernelTester()
9544 .batch_size(batch_size)
9545 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9546 }
9547 }
9548
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X112,inplace)9549 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X112, inplace) {
9550 TEST_REQUIRES_X86_AVX512F;
9551 for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
9552 VUnaryMicrokernelTester()
9553 .batch_size(batch_size)
9554 .inplace(true)
9555 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9556 }
9557 }
9558 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9559
9560
9561 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X128,batch_eq_128)9562 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X128, batch_eq_128) {
9563 TEST_REQUIRES_X86_AVX512F;
9564 VUnaryMicrokernelTester()
9565 .batch_size(128)
9566 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9567 }
9568
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X128,batch_div_128)9569 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X128, batch_div_128) {
9570 TEST_REQUIRES_X86_AVX512F;
9571 for (size_t batch_size = 256; batch_size < 1280; batch_size += 128) {
9572 VUnaryMicrokernelTester()
9573 .batch_size(batch_size)
9574 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9575 }
9576 }
9577
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X128,batch_lt_128)9578 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X128, batch_lt_128) {
9579 TEST_REQUIRES_X86_AVX512F;
9580 for (size_t batch_size = 1; batch_size < 128; batch_size++) {
9581 VUnaryMicrokernelTester()
9582 .batch_size(batch_size)
9583 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9584 }
9585 }
9586
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X128,batch_gt_128)9587 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X128, batch_gt_128) {
9588 TEST_REQUIRES_X86_AVX512F;
9589 for (size_t batch_size = 129; batch_size < 256; batch_size++) {
9590 VUnaryMicrokernelTester()
9591 .batch_size(batch_size)
9592 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9593 }
9594 }
9595
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X128,inplace)9596 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_DIV_X128, inplace) {
9597 TEST_REQUIRES_X86_AVX512F;
9598 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
9599 VUnaryMicrokernelTester()
9600 .batch_size(batch_size)
9601 .inplace(true)
9602 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9603 }
9604 }
9605 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9606
9607
9608 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X16,batch_eq_16)9609 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X16, batch_eq_16) {
9610 TEST_REQUIRES_X86_AVX512F;
9611 VUnaryMicrokernelTester()
9612 .batch_size(16)
9613 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x16, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9614 }
9615
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X16,batch_div_16)9616 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X16, batch_div_16) {
9617 TEST_REQUIRES_X86_AVX512F;
9618 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
9619 VUnaryMicrokernelTester()
9620 .batch_size(batch_size)
9621 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x16, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9622 }
9623 }
9624
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X16,batch_lt_16)9625 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X16, batch_lt_16) {
9626 TEST_REQUIRES_X86_AVX512F;
9627 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
9628 VUnaryMicrokernelTester()
9629 .batch_size(batch_size)
9630 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x16, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9631 }
9632 }
9633
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X16,batch_gt_16)9634 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X16, batch_gt_16) {
9635 TEST_REQUIRES_X86_AVX512F;
9636 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
9637 VUnaryMicrokernelTester()
9638 .batch_size(batch_size)
9639 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x16, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9640 }
9641 }
9642
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X16,inplace)9643 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X16, inplace) {
9644 TEST_REQUIRES_X86_AVX512F;
9645 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
9646 VUnaryMicrokernelTester()
9647 .batch_size(batch_size)
9648 .inplace(true)
9649 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x16, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9650 }
9651 }
9652 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9653
9654
9655 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X32,batch_eq_32)9656 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X32, batch_eq_32) {
9657 TEST_REQUIRES_X86_AVX512F;
9658 VUnaryMicrokernelTester()
9659 .batch_size(32)
9660 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9661 }
9662
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X32,batch_div_32)9663 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X32, batch_div_32) {
9664 TEST_REQUIRES_X86_AVX512F;
9665 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
9666 VUnaryMicrokernelTester()
9667 .batch_size(batch_size)
9668 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9669 }
9670 }
9671
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X32,batch_lt_32)9672 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X32, batch_lt_32) {
9673 TEST_REQUIRES_X86_AVX512F;
9674 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
9675 VUnaryMicrokernelTester()
9676 .batch_size(batch_size)
9677 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9678 }
9679 }
9680
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X32,batch_gt_32)9681 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X32, batch_gt_32) {
9682 TEST_REQUIRES_X86_AVX512F;
9683 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
9684 VUnaryMicrokernelTester()
9685 .batch_size(batch_size)
9686 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9687 }
9688 }
9689
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X32,inplace)9690 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X32, inplace) {
9691 TEST_REQUIRES_X86_AVX512F;
9692 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
9693 VUnaryMicrokernelTester()
9694 .batch_size(batch_size)
9695 .inplace(true)
9696 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9697 }
9698 }
9699 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9700
9701
9702 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X48,batch_eq_48)9703 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X48, batch_eq_48) {
9704 TEST_REQUIRES_X86_AVX512F;
9705 VUnaryMicrokernelTester()
9706 .batch_size(48)
9707 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9708 }
9709
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X48,batch_div_48)9710 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X48, batch_div_48) {
9711 TEST_REQUIRES_X86_AVX512F;
9712 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
9713 VUnaryMicrokernelTester()
9714 .batch_size(batch_size)
9715 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9716 }
9717 }
9718
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X48,batch_lt_48)9719 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X48, batch_lt_48) {
9720 TEST_REQUIRES_X86_AVX512F;
9721 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
9722 VUnaryMicrokernelTester()
9723 .batch_size(batch_size)
9724 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9725 }
9726 }
9727
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X48,batch_gt_48)9728 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X48, batch_gt_48) {
9729 TEST_REQUIRES_X86_AVX512F;
9730 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
9731 VUnaryMicrokernelTester()
9732 .batch_size(batch_size)
9733 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9734 }
9735 }
9736
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X48,inplace)9737 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X48, inplace) {
9738 TEST_REQUIRES_X86_AVX512F;
9739 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9740 VUnaryMicrokernelTester()
9741 .batch_size(batch_size)
9742 .inplace(true)
9743 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9744 }
9745 }
9746 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9747
9748
9749 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X64,batch_eq_64)9750 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X64, batch_eq_64) {
9751 TEST_REQUIRES_X86_AVX512F;
9752 VUnaryMicrokernelTester()
9753 .batch_size(64)
9754 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9755 }
9756
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X64,batch_div_64)9757 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X64, batch_div_64) {
9758 TEST_REQUIRES_X86_AVX512F;
9759 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
9760 VUnaryMicrokernelTester()
9761 .batch_size(batch_size)
9762 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9763 }
9764 }
9765
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X64,batch_lt_64)9766 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X64, batch_lt_64) {
9767 TEST_REQUIRES_X86_AVX512F;
9768 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
9769 VUnaryMicrokernelTester()
9770 .batch_size(batch_size)
9771 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9772 }
9773 }
9774
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X64,batch_gt_64)9775 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X64, batch_gt_64) {
9776 TEST_REQUIRES_X86_AVX512F;
9777 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
9778 VUnaryMicrokernelTester()
9779 .batch_size(batch_size)
9780 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9781 }
9782 }
9783
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X64,inplace)9784 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X64, inplace) {
9785 TEST_REQUIRES_X86_AVX512F;
9786 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9787 VUnaryMicrokernelTester()
9788 .batch_size(batch_size)
9789 .inplace(true)
9790 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9791 }
9792 }
9793 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9794
9795
9796 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X80,batch_eq_80)9797 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X80, batch_eq_80) {
9798 TEST_REQUIRES_X86_AVX512F;
9799 VUnaryMicrokernelTester()
9800 .batch_size(80)
9801 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9802 }
9803
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X80,batch_div_80)9804 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X80, batch_div_80) {
9805 TEST_REQUIRES_X86_AVX512F;
9806 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
9807 VUnaryMicrokernelTester()
9808 .batch_size(batch_size)
9809 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9810 }
9811 }
9812
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X80,batch_lt_80)9813 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X80, batch_lt_80) {
9814 TEST_REQUIRES_X86_AVX512F;
9815 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
9816 VUnaryMicrokernelTester()
9817 .batch_size(batch_size)
9818 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9819 }
9820 }
9821
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X80,batch_gt_80)9822 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X80, batch_gt_80) {
9823 TEST_REQUIRES_X86_AVX512F;
9824 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
9825 VUnaryMicrokernelTester()
9826 .batch_size(batch_size)
9827 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9828 }
9829 }
9830
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X80,inplace)9831 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X80, inplace) {
9832 TEST_REQUIRES_X86_AVX512F;
9833 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9834 VUnaryMicrokernelTester()
9835 .batch_size(batch_size)
9836 .inplace(true)
9837 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9838 }
9839 }
9840 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9841
9842
9843 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X96,batch_eq_96)9844 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X96, batch_eq_96) {
9845 TEST_REQUIRES_X86_AVX512F;
9846 VUnaryMicrokernelTester()
9847 .batch_size(96)
9848 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9849 }
9850
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X96,batch_div_96)9851 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X96, batch_div_96) {
9852 TEST_REQUIRES_X86_AVX512F;
9853 for (size_t batch_size = 192; batch_size < 960; batch_size += 96) {
9854 VUnaryMicrokernelTester()
9855 .batch_size(batch_size)
9856 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9857 }
9858 }
9859
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X96,batch_lt_96)9860 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X96, batch_lt_96) {
9861 TEST_REQUIRES_X86_AVX512F;
9862 for (size_t batch_size = 1; batch_size < 96; batch_size++) {
9863 VUnaryMicrokernelTester()
9864 .batch_size(batch_size)
9865 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9866 }
9867 }
9868
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X96,batch_gt_96)9869 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X96, batch_gt_96) {
9870 TEST_REQUIRES_X86_AVX512F;
9871 for (size_t batch_size = 97; batch_size < 192; batch_size++) {
9872 VUnaryMicrokernelTester()
9873 .batch_size(batch_size)
9874 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9875 }
9876 }
9877
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X96,inplace)9878 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X96, inplace) {
9879 TEST_REQUIRES_X86_AVX512F;
9880 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9881 VUnaryMicrokernelTester()
9882 .batch_size(batch_size)
9883 .inplace(true)
9884 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9885 }
9886 }
9887 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9888
9889
9890 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X112,batch_eq_112)9891 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X112, batch_eq_112) {
9892 TEST_REQUIRES_X86_AVX512F;
9893 VUnaryMicrokernelTester()
9894 .batch_size(112)
9895 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9896 }
9897
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X112,batch_div_112)9898 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X112, batch_div_112) {
9899 TEST_REQUIRES_X86_AVX512F;
9900 for (size_t batch_size = 224; batch_size < 1120; batch_size += 112) {
9901 VUnaryMicrokernelTester()
9902 .batch_size(batch_size)
9903 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9904 }
9905 }
9906
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X112,batch_lt_112)9907 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X112, batch_lt_112) {
9908 TEST_REQUIRES_X86_AVX512F;
9909 for (size_t batch_size = 1; batch_size < 112; batch_size++) {
9910 VUnaryMicrokernelTester()
9911 .batch_size(batch_size)
9912 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9913 }
9914 }
9915
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X112,batch_gt_112)9916 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X112, batch_gt_112) {
9917 TEST_REQUIRES_X86_AVX512F;
9918 for (size_t batch_size = 113; batch_size < 224; batch_size++) {
9919 VUnaryMicrokernelTester()
9920 .batch_size(batch_size)
9921 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9922 }
9923 }
9924
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X112,inplace)9925 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X112, inplace) {
9926 TEST_REQUIRES_X86_AVX512F;
9927 for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
9928 VUnaryMicrokernelTester()
9929 .batch_size(batch_size)
9930 .inplace(true)
9931 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9932 }
9933 }
9934 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9935
9936
9937 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X128,batch_eq_128)9938 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X128, batch_eq_128) {
9939 TEST_REQUIRES_X86_AVX512F;
9940 VUnaryMicrokernelTester()
9941 .batch_size(128)
9942 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9943 }
9944
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X128,batch_div_128)9945 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X128, batch_div_128) {
9946 TEST_REQUIRES_X86_AVX512F;
9947 for (size_t batch_size = 256; batch_size < 1280; batch_size += 128) {
9948 VUnaryMicrokernelTester()
9949 .batch_size(batch_size)
9950 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9951 }
9952 }
9953
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X128,batch_lt_128)9954 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X128, batch_lt_128) {
9955 TEST_REQUIRES_X86_AVX512F;
9956 for (size_t batch_size = 1; batch_size < 128; batch_size++) {
9957 VUnaryMicrokernelTester()
9958 .batch_size(batch_size)
9959 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9960 }
9961 }
9962
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X128,batch_gt_128)9963 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X128, batch_gt_128) {
9964 TEST_REQUIRES_X86_AVX512F;
9965 for (size_t batch_size = 129; batch_size < 256; batch_size++) {
9966 VUnaryMicrokernelTester()
9967 .batch_size(batch_size)
9968 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9969 }
9970 }
9971
TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X128,inplace)9972 TEST(F32_VSIGMOID__AVX512F_RR2_LUT32_P2_PERM2_SCALEF_NR1FMA_X128, inplace) {
9973 TEST_REQUIRES_X86_AVX512F;
9974 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
9975 VUnaryMicrokernelTester()
9976 .batch_size(batch_size)
9977 .inplace(true)
9978 .Test(xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128, xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params);
9979 }
9980 }
9981 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9982
9983
9984 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X4,batch_eq_4)9985 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X4, batch_eq_4) {
9986 VUnaryMicrokernelTester()
9987 .batch_size(4)
9988 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x4, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
9989 }
9990
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X4,batch_div_4)9991 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X4, batch_div_4) {
9992 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
9993 VUnaryMicrokernelTester()
9994 .batch_size(batch_size)
9995 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x4, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
9996 }
9997 }
9998
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X4,batch_lt_4)9999 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X4, batch_lt_4) {
10000 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
10001 VUnaryMicrokernelTester()
10002 .batch_size(batch_size)
10003 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x4, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10004 }
10005 }
10006
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X4,batch_gt_4)10007 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X4, batch_gt_4) {
10008 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
10009 VUnaryMicrokernelTester()
10010 .batch_size(batch_size)
10011 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x4, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10012 }
10013 }
10014
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X4,inplace)10015 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X4, inplace) {
10016 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10017 VUnaryMicrokernelTester()
10018 .batch_size(batch_size)
10019 .inplace(true)
10020 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x4, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10021 }
10022 }
10023 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10024
10025
10026 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X8,batch_eq_8)10027 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X8, batch_eq_8) {
10028 VUnaryMicrokernelTester()
10029 .batch_size(8)
10030 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10031 }
10032
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X8,batch_div_8)10033 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X8, batch_div_8) {
10034 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
10035 VUnaryMicrokernelTester()
10036 .batch_size(batch_size)
10037 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10038 }
10039 }
10040
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X8,batch_lt_8)10041 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X8, batch_lt_8) {
10042 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
10043 VUnaryMicrokernelTester()
10044 .batch_size(batch_size)
10045 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10046 }
10047 }
10048
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X8,batch_gt_8)10049 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X8, batch_gt_8) {
10050 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
10051 VUnaryMicrokernelTester()
10052 .batch_size(batch_size)
10053 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10054 }
10055 }
10056
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X8,inplace)10057 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X8, inplace) {
10058 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10059 VUnaryMicrokernelTester()
10060 .batch_size(batch_size)
10061 .inplace(true)
10062 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10063 }
10064 }
10065 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10066
10067
10068 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X12,batch_eq_12)10069 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X12, batch_eq_12) {
10070 VUnaryMicrokernelTester()
10071 .batch_size(12)
10072 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x12, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10073 }
10074
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X12,batch_div_12)10075 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X12, batch_div_12) {
10076 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
10077 VUnaryMicrokernelTester()
10078 .batch_size(batch_size)
10079 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x12, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10080 }
10081 }
10082
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X12,batch_lt_12)10083 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X12, batch_lt_12) {
10084 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
10085 VUnaryMicrokernelTester()
10086 .batch_size(batch_size)
10087 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x12, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10088 }
10089 }
10090
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X12,batch_gt_12)10091 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X12, batch_gt_12) {
10092 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
10093 VUnaryMicrokernelTester()
10094 .batch_size(batch_size)
10095 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x12, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10096 }
10097 }
10098
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X12,inplace)10099 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X12, inplace) {
10100 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10101 VUnaryMicrokernelTester()
10102 .batch_size(batch_size)
10103 .inplace(true)
10104 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x12, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10105 }
10106 }
10107 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10108
10109
10110 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X16,batch_eq_16)10111 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X16, batch_eq_16) {
10112 VUnaryMicrokernelTester()
10113 .batch_size(16)
10114 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x16, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10115 }
10116
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X16,batch_div_16)10117 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X16, batch_div_16) {
10118 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
10119 VUnaryMicrokernelTester()
10120 .batch_size(batch_size)
10121 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x16, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10122 }
10123 }
10124
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X16,batch_lt_16)10125 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X16, batch_lt_16) {
10126 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
10127 VUnaryMicrokernelTester()
10128 .batch_size(batch_size)
10129 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x16, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10130 }
10131 }
10132
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X16,batch_gt_16)10133 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X16, batch_gt_16) {
10134 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
10135 VUnaryMicrokernelTester()
10136 .batch_size(batch_size)
10137 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x16, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10138 }
10139 }
10140
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X16,inplace)10141 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X16, inplace) {
10142 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10143 VUnaryMicrokernelTester()
10144 .batch_size(batch_size)
10145 .inplace(true)
10146 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x16, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10147 }
10148 }
10149 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10150
10151
10152 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X20,batch_eq_20)10153 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X20, batch_eq_20) {
10154 VUnaryMicrokernelTester()
10155 .batch_size(20)
10156 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x20, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10157 }
10158
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X20,batch_div_20)10159 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X20, batch_div_20) {
10160 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
10161 VUnaryMicrokernelTester()
10162 .batch_size(batch_size)
10163 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x20, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10164 }
10165 }
10166
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X20,batch_lt_20)10167 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X20, batch_lt_20) {
10168 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
10169 VUnaryMicrokernelTester()
10170 .batch_size(batch_size)
10171 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x20, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10172 }
10173 }
10174
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X20,batch_gt_20)10175 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X20, batch_gt_20) {
10176 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
10177 VUnaryMicrokernelTester()
10178 .batch_size(batch_size)
10179 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x20, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10180 }
10181 }
10182
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X20,inplace)10183 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X20, inplace) {
10184 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10185 VUnaryMicrokernelTester()
10186 .batch_size(batch_size)
10187 .inplace(true)
10188 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x20, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10189 }
10190 }
10191 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10192
10193
10194 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X24,batch_eq_24)10195 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X24, batch_eq_24) {
10196 VUnaryMicrokernelTester()
10197 .batch_size(24)
10198 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x24, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10199 }
10200
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X24,batch_div_24)10201 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X24, batch_div_24) {
10202 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
10203 VUnaryMicrokernelTester()
10204 .batch_size(batch_size)
10205 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x24, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10206 }
10207 }
10208
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X24,batch_lt_24)10209 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X24, batch_lt_24) {
10210 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
10211 VUnaryMicrokernelTester()
10212 .batch_size(batch_size)
10213 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x24, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10214 }
10215 }
10216
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X24,batch_gt_24)10217 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X24, batch_gt_24) {
10218 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
10219 VUnaryMicrokernelTester()
10220 .batch_size(batch_size)
10221 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x24, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10222 }
10223 }
10224
TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X24,inplace)10225 TEST(F32_VSIGMOID__WASMSIMD_RR2_LUT64_P2_DIV_X24, inplace) {
10226 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
10227 VUnaryMicrokernelTester()
10228 .batch_size(batch_size)
10229 .inplace(true)
10230 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x24, xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params);
10231 }
10232 }
10233 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10234
10235
10236 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X4,batch_eq_4)10237 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X4, batch_eq_4) {
10238 VUnaryMicrokernelTester()
10239 .batch_size(4)
10240 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x4, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10241 }
10242
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X4,batch_div_4)10243 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X4, batch_div_4) {
10244 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
10245 VUnaryMicrokernelTester()
10246 .batch_size(batch_size)
10247 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x4, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10248 }
10249 }
10250
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X4,batch_lt_4)10251 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X4, batch_lt_4) {
10252 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
10253 VUnaryMicrokernelTester()
10254 .batch_size(batch_size)
10255 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x4, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10256 }
10257 }
10258
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X4,batch_gt_4)10259 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X4, batch_gt_4) {
10260 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
10261 VUnaryMicrokernelTester()
10262 .batch_size(batch_size)
10263 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x4, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10264 }
10265 }
10266
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X4,inplace)10267 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X4, inplace) {
10268 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10269 VUnaryMicrokernelTester()
10270 .batch_size(batch_size)
10271 .inplace(true)
10272 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x4, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10273 }
10274 }
10275 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10276
10277
10278 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X8,batch_eq_8)10279 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X8, batch_eq_8) {
10280 VUnaryMicrokernelTester()
10281 .batch_size(8)
10282 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x8, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10283 }
10284
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X8,batch_div_8)10285 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X8, batch_div_8) {
10286 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
10287 VUnaryMicrokernelTester()
10288 .batch_size(batch_size)
10289 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x8, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10290 }
10291 }
10292
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X8,batch_lt_8)10293 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X8, batch_lt_8) {
10294 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
10295 VUnaryMicrokernelTester()
10296 .batch_size(batch_size)
10297 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x8, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10298 }
10299 }
10300
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X8,batch_gt_8)10301 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X8, batch_gt_8) {
10302 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
10303 VUnaryMicrokernelTester()
10304 .batch_size(batch_size)
10305 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x8, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10306 }
10307 }
10308
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X8,inplace)10309 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X8, inplace) {
10310 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10311 VUnaryMicrokernelTester()
10312 .batch_size(batch_size)
10313 .inplace(true)
10314 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x8, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10315 }
10316 }
10317 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10318
10319
10320 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X12,batch_eq_12)10321 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X12, batch_eq_12) {
10322 VUnaryMicrokernelTester()
10323 .batch_size(12)
10324 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x12, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10325 }
10326
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X12,batch_div_12)10327 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X12, batch_div_12) {
10328 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
10329 VUnaryMicrokernelTester()
10330 .batch_size(batch_size)
10331 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x12, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10332 }
10333 }
10334
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X12,batch_lt_12)10335 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X12, batch_lt_12) {
10336 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
10337 VUnaryMicrokernelTester()
10338 .batch_size(batch_size)
10339 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x12, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10340 }
10341 }
10342
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X12,batch_gt_12)10343 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X12, batch_gt_12) {
10344 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
10345 VUnaryMicrokernelTester()
10346 .batch_size(batch_size)
10347 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x12, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10348 }
10349 }
10350
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X12,inplace)10351 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X12, inplace) {
10352 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10353 VUnaryMicrokernelTester()
10354 .batch_size(batch_size)
10355 .inplace(true)
10356 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x12, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10357 }
10358 }
10359 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10360
10361
10362 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X16,batch_eq_16)10363 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X16, batch_eq_16) {
10364 VUnaryMicrokernelTester()
10365 .batch_size(16)
10366 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x16, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10367 }
10368
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X16,batch_div_16)10369 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X16, batch_div_16) {
10370 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
10371 VUnaryMicrokernelTester()
10372 .batch_size(batch_size)
10373 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x16, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10374 }
10375 }
10376
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X16,batch_lt_16)10377 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X16, batch_lt_16) {
10378 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
10379 VUnaryMicrokernelTester()
10380 .batch_size(batch_size)
10381 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x16, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10382 }
10383 }
10384
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X16,batch_gt_16)10385 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X16, batch_gt_16) {
10386 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
10387 VUnaryMicrokernelTester()
10388 .batch_size(batch_size)
10389 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x16, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10390 }
10391 }
10392
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X16,inplace)10393 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X16, inplace) {
10394 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10395 VUnaryMicrokernelTester()
10396 .batch_size(batch_size)
10397 .inplace(true)
10398 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x16, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10399 }
10400 }
10401 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10402
10403
10404 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X20,batch_eq_20)10405 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X20, batch_eq_20) {
10406 VUnaryMicrokernelTester()
10407 .batch_size(20)
10408 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10409 }
10410
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X20,batch_div_20)10411 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X20, batch_div_20) {
10412 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
10413 VUnaryMicrokernelTester()
10414 .batch_size(batch_size)
10415 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10416 }
10417 }
10418
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X20,batch_lt_20)10419 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X20, batch_lt_20) {
10420 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
10421 VUnaryMicrokernelTester()
10422 .batch_size(batch_size)
10423 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10424 }
10425 }
10426
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X20,batch_gt_20)10427 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X20, batch_gt_20) {
10428 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
10429 VUnaryMicrokernelTester()
10430 .batch_size(batch_size)
10431 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10432 }
10433 }
10434
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X20,inplace)10435 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X20, inplace) {
10436 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10437 VUnaryMicrokernelTester()
10438 .batch_size(batch_size)
10439 .inplace(true)
10440 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10441 }
10442 }
10443 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10444
10445
10446 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X24,batch_eq_24)10447 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X24, batch_eq_24) {
10448 VUnaryMicrokernelTester()
10449 .batch_size(24)
10450 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x24, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10451 }
10452
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X24,batch_div_24)10453 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X24, batch_div_24) {
10454 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
10455 VUnaryMicrokernelTester()
10456 .batch_size(batch_size)
10457 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x24, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10458 }
10459 }
10460
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X24,batch_lt_24)10461 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X24, batch_lt_24) {
10462 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
10463 VUnaryMicrokernelTester()
10464 .batch_size(batch_size)
10465 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x24, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10466 }
10467 }
10468
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X24,batch_gt_24)10469 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X24, batch_gt_24) {
10470 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
10471 VUnaryMicrokernelTester()
10472 .batch_size(batch_size)
10473 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x24, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10474 }
10475 }
10476
TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X24,inplace)10477 TEST(F32_VSIGMOID__WASMSIMD_RR2_P5_DIV_X24, inplace) {
10478 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
10479 VUnaryMicrokernelTester()
10480 .batch_size(batch_size)
10481 .inplace(true)
10482 .Test(xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x24, xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params);
10483 }
10484 }
10485 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10486
10487
TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X1,batch_eq_1)10488 TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X1, batch_eq_1) {
10489 VUnaryMicrokernelTester()
10490 .batch_size(1)
10491 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x1, xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params);
10492 }
10493
TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X1,batch_gt_1)10494 TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X1, batch_gt_1) {
10495 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
10496 VUnaryMicrokernelTester()
10497 .batch_size(batch_size)
10498 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x1, xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params);
10499 }
10500 }
10501
TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X1,inplace)10502 TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X1, inplace) {
10503 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
10504 VUnaryMicrokernelTester()
10505 .batch_size(batch_size)
10506 .inplace(true)
10507 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x1, xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params);
10508 }
10509 }
10510
10511
TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X2,batch_eq_2)10512 TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X2, batch_eq_2) {
10513 VUnaryMicrokernelTester()
10514 .batch_size(2)
10515 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x2, xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params);
10516 }
10517
TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X2,batch_div_2)10518 TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X2, batch_div_2) {
10519 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
10520 VUnaryMicrokernelTester()
10521 .batch_size(batch_size)
10522 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x2, xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params);
10523 }
10524 }
10525
TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X2,batch_lt_2)10526 TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X2, batch_lt_2) {
10527 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
10528 VUnaryMicrokernelTester()
10529 .batch_size(batch_size)
10530 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x2, xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params);
10531 }
10532 }
10533
TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X2,batch_gt_2)10534 TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X2, batch_gt_2) {
10535 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
10536 VUnaryMicrokernelTester()
10537 .batch_size(batch_size)
10538 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x2, xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params);
10539 }
10540 }
10541
TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X2,inplace)10542 TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X2, inplace) {
10543 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
10544 VUnaryMicrokernelTester()
10545 .batch_size(batch_size)
10546 .inplace(true)
10547 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x2, xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params);
10548 }
10549 }
10550
10551
TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X4,batch_eq_4)10552 TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X4, batch_eq_4) {
10553 VUnaryMicrokernelTester()
10554 .batch_size(4)
10555 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x4, xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params);
10556 }
10557
TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X4,batch_div_4)10558 TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X4, batch_div_4) {
10559 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
10560 VUnaryMicrokernelTester()
10561 .batch_size(batch_size)
10562 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x4, xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params);
10563 }
10564 }
10565
TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X4,batch_lt_4)10566 TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X4, batch_lt_4) {
10567 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
10568 VUnaryMicrokernelTester()
10569 .batch_size(batch_size)
10570 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x4, xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params);
10571 }
10572 }
10573
TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X4,batch_gt_4)10574 TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X4, batch_gt_4) {
10575 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
10576 VUnaryMicrokernelTester()
10577 .batch_size(batch_size)
10578 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x4, xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params);
10579 }
10580 }
10581
TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X4,inplace)10582 TEST(F32_VSIGMOID__SCALAR_RR2_LUT64_P2_DIV_X4, inplace) {
10583 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10584 VUnaryMicrokernelTester()
10585 .batch_size(batch_size)
10586 .inplace(true)
10587 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x4, xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params);
10588 }
10589 }
10590
10591
TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X1,batch_eq_1)10592 TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X1, batch_eq_1) {
10593 VUnaryMicrokernelTester()
10594 .batch_size(1)
10595 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x1, xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params);
10596 }
10597
TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X1,batch_gt_1)10598 TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X1, batch_gt_1) {
10599 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
10600 VUnaryMicrokernelTester()
10601 .batch_size(batch_size)
10602 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x1, xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params);
10603 }
10604 }
10605
TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X1,inplace)10606 TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X1, inplace) {
10607 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
10608 VUnaryMicrokernelTester()
10609 .batch_size(batch_size)
10610 .inplace(true)
10611 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x1, xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params);
10612 }
10613 }
10614
10615
TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X2,batch_eq_2)10616 TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X2, batch_eq_2) {
10617 VUnaryMicrokernelTester()
10618 .batch_size(2)
10619 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x2, xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params);
10620 }
10621
TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X2,batch_div_2)10622 TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X2, batch_div_2) {
10623 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
10624 VUnaryMicrokernelTester()
10625 .batch_size(batch_size)
10626 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x2, xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params);
10627 }
10628 }
10629
TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X2,batch_lt_2)10630 TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X2, batch_lt_2) {
10631 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
10632 VUnaryMicrokernelTester()
10633 .batch_size(batch_size)
10634 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x2, xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params);
10635 }
10636 }
10637
TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X2,batch_gt_2)10638 TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X2, batch_gt_2) {
10639 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
10640 VUnaryMicrokernelTester()
10641 .batch_size(batch_size)
10642 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x2, xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params);
10643 }
10644 }
10645
TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X2,inplace)10646 TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X2, inplace) {
10647 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
10648 VUnaryMicrokernelTester()
10649 .batch_size(batch_size)
10650 .inplace(true)
10651 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x2, xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params);
10652 }
10653 }
10654
10655
TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X4,batch_eq_4)10656 TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X4, batch_eq_4) {
10657 VUnaryMicrokernelTester()
10658 .batch_size(4)
10659 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x4, xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params);
10660 }
10661
TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X4,batch_div_4)10662 TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X4, batch_div_4) {
10663 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
10664 VUnaryMicrokernelTester()
10665 .batch_size(batch_size)
10666 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x4, xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params);
10667 }
10668 }
10669
TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X4,batch_lt_4)10670 TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X4, batch_lt_4) {
10671 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
10672 VUnaryMicrokernelTester()
10673 .batch_size(batch_size)
10674 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x4, xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params);
10675 }
10676 }
10677
TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X4,batch_gt_4)10678 TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X4, batch_gt_4) {
10679 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
10680 VUnaryMicrokernelTester()
10681 .batch_size(batch_size)
10682 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x4, xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params);
10683 }
10684 }
10685
TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X4,inplace)10686 TEST(F32_VSIGMOID__SCALAR_RR2_LUT2048_P1_DIV_X4, inplace) {
10687 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10688 VUnaryMicrokernelTester()
10689 .batch_size(batch_size)
10690 .inplace(true)
10691 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x4, xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params);
10692 }
10693 }
10694
10695
TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X1,batch_eq_1)10696 TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X1, batch_eq_1) {
10697 VUnaryMicrokernelTester()
10698 .batch_size(1)
10699 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x1, xnn_init_f32_sigmoid_scalar_rr2_p5_params);
10700 }
10701
TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X1,batch_gt_1)10702 TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X1, batch_gt_1) {
10703 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
10704 VUnaryMicrokernelTester()
10705 .batch_size(batch_size)
10706 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x1, xnn_init_f32_sigmoid_scalar_rr2_p5_params);
10707 }
10708 }
10709
TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X1,inplace)10710 TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X1, inplace) {
10711 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
10712 VUnaryMicrokernelTester()
10713 .batch_size(batch_size)
10714 .inplace(true)
10715 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x1, xnn_init_f32_sigmoid_scalar_rr2_p5_params);
10716 }
10717 }
10718
10719
TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X2,batch_eq_2)10720 TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X2, batch_eq_2) {
10721 VUnaryMicrokernelTester()
10722 .batch_size(2)
10723 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x2, xnn_init_f32_sigmoid_scalar_rr2_p5_params);
10724 }
10725
TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X2,batch_div_2)10726 TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X2, batch_div_2) {
10727 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
10728 VUnaryMicrokernelTester()
10729 .batch_size(batch_size)
10730 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x2, xnn_init_f32_sigmoid_scalar_rr2_p5_params);
10731 }
10732 }
10733
TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X2,batch_lt_2)10734 TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X2, batch_lt_2) {
10735 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
10736 VUnaryMicrokernelTester()
10737 .batch_size(batch_size)
10738 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x2, xnn_init_f32_sigmoid_scalar_rr2_p5_params);
10739 }
10740 }
10741
TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X2,batch_gt_2)10742 TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X2, batch_gt_2) {
10743 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
10744 VUnaryMicrokernelTester()
10745 .batch_size(batch_size)
10746 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x2, xnn_init_f32_sigmoid_scalar_rr2_p5_params);
10747 }
10748 }
10749
TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X2,inplace)10750 TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X2, inplace) {
10751 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
10752 VUnaryMicrokernelTester()
10753 .batch_size(batch_size)
10754 .inplace(true)
10755 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x2, xnn_init_f32_sigmoid_scalar_rr2_p5_params);
10756 }
10757 }
10758
10759
TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X4,batch_eq_4)10760 TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X4, batch_eq_4) {
10761 VUnaryMicrokernelTester()
10762 .batch_size(4)
10763 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x4, xnn_init_f32_sigmoid_scalar_rr2_p5_params);
10764 }
10765
TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X4,batch_div_4)10766 TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X4, batch_div_4) {
10767 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
10768 VUnaryMicrokernelTester()
10769 .batch_size(batch_size)
10770 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x4, xnn_init_f32_sigmoid_scalar_rr2_p5_params);
10771 }
10772 }
10773
TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X4,batch_lt_4)10774 TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X4, batch_lt_4) {
10775 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
10776 VUnaryMicrokernelTester()
10777 .batch_size(batch_size)
10778 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x4, xnn_init_f32_sigmoid_scalar_rr2_p5_params);
10779 }
10780 }
10781
TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X4,batch_gt_4)10782 TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X4, batch_gt_4) {
10783 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
10784 VUnaryMicrokernelTester()
10785 .batch_size(batch_size)
10786 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x4, xnn_init_f32_sigmoid_scalar_rr2_p5_params);
10787 }
10788 }
10789
TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X4,inplace)10790 TEST(F32_VSIGMOID__SCALAR_RR2_P5_DIV_X4, inplace) {
10791 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10792 VUnaryMicrokernelTester()
10793 .batch_size(batch_size)
10794 .inplace(true)
10795 .Test(xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x4, xnn_init_f32_sigmoid_scalar_rr2_p5_params);
10796 }
10797 }
10798