xref: /aosp_15_r20/external/XNNPACK/eval/f32-roundz.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <algorithm>
7 #include <cmath>
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstdlib>
11 #include <iomanip>
12 #include <ios>
13 #include <vector>
14 
15 #include <gtest/gtest.h>
16 
17 #include <fp16.h>
18 
19 #include <xnnpack/aligned-allocator.h>
20 #include <xnnpack/common.h>
21 #include <xnnpack/math.h>
22 #include <xnnpack/math-stubs.h>
23 
24 
25 constexpr int kBlockSize = 1024;
26 
27 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDZ__SSE_ADDSUB,positive_normal)28   TEST(ROUNDZ__SSE_ADDSUB, positive_normal) {
29     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
30     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
31     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
32       for (uint32_t i = 0; i < kBlockSize; i++) {
33         inputs[i] = uint32_as_float(n + i);
34       }
35       xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
36       for (uint32_t i = 0; i < kBlockSize; i++) {
37         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
38         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
39           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
40           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
41           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
42       }
43     }
44   }
45 
TEST(ROUNDZ__SSE_ADDSUB,negative_normal)46   TEST(ROUNDZ__SSE_ADDSUB, negative_normal) {
47     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
48     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
49     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
50       for (uint32_t i = 0; i < kBlockSize; i++) {
51         inputs[i] = uint32_as_float(n + i);
52       }
53       xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
54       for (uint32_t i = 0; i < kBlockSize; i++) {
55         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
56         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
57           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
58           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
59           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
60       }
61     }
62   }
63 
TEST(ROUNDZ__SSE_ADDSUB,positive_integral)64   TEST(ROUNDZ__SSE_ADDSUB, positive_integral) {
65     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
66     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
67     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
68       for (uint32_t i = 0; i < kBlockSize; i++) {
69         inputs[i] = uint32_as_float(n + i);
70       }
71       xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
72       for (uint32_t i = 0; i < kBlockSize; i++) {
73         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
74         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
75           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
76           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
77           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
78       }
79     }
80   }
81 
TEST(ROUNDZ__SSE_ADDSUB,negative_integral)82   TEST(ROUNDZ__SSE_ADDSUB, negative_integral) {
83     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
84     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
85     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
86       for (uint32_t i = 0; i < kBlockSize; i++) {
87         inputs[i] = uint32_as_float(n + i);
88       }
89       xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
90       for (uint32_t i = 0; i < kBlockSize; i++) {
91         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
92         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
93           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
94           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
95           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
96       }
97     }
98   }
99 
TEST(ROUNDZ__SSE_ADDSUB,positive_infinity)100   TEST(ROUNDZ__SSE_ADDSUB, positive_infinity) {
101     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
102     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
103     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
104     xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
105     const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
106     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
107       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
108       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
109       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
110   }
111 
TEST(ROUNDZ__SSE_ADDSUB,negative_infinity)112   TEST(ROUNDZ__SSE_ADDSUB, negative_infinity) {
113     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
114     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
115     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
116     xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
117     const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
118     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
119       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
120       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
121       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
122   }
123 
TEST(ROUNDZ__SSE_ADDSUB,positive_qnan)124   TEST(ROUNDZ__SSE_ADDSUB, positive_qnan) {
125     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
126     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
127     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
128       for (uint32_t i = 0; i < kBlockSize; i++) {
129         inputs[i] = uint32_as_float(n + i);
130       }
131       xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
132       for (uint32_t i = 0; i < kBlockSize; i++) {
133         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
134         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
135           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
136           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
137           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
138       }
139     }
140   }
141 
TEST(ROUNDZ__SSE_ADDSUB,negative_qnan)142   TEST(ROUNDZ__SSE_ADDSUB, negative_qnan) {
143     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
144     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
145     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
146       for (uint32_t i = 0; i < kBlockSize; i++) {
147         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
148       }
149       xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
150       for (uint32_t i = 0; i < kBlockSize; i++) {
151         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
152         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
153           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
154           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
155           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
156       }
157     }
158   }
159 
TEST(ROUNDZ__SSE_ADDSUB,positive_snan)160   TEST(ROUNDZ__SSE_ADDSUB, positive_snan) {
161     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
162     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
163     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
164       for (uint32_t i = 0; i < kBlockSize; i++) {
165         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
166       }
167       xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
168       for (uint32_t i = 0; i < kBlockSize; i++) {
169         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
170         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
171           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
172           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
173           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
174       }
175     }
176   }
177 
TEST(ROUNDZ__SSE_ADDSUB,negative_snan)178   TEST(ROUNDZ__SSE_ADDSUB, negative_snan) {
179     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
180     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
181     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
182       for (uint32_t i = 0; i < kBlockSize; i++) {
183         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
184       }
185       xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
186       for (uint32_t i = 0; i < kBlockSize; i++) {
187         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
188         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
189           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
190           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
191           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
192       }
193     }
194   }
195 
TEST(ROUNDZ__SSE_ADDSUB,positive_snan_to_qnan)196   TEST(ROUNDZ__SSE_ADDSUB, positive_snan_to_qnan) {
197     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
198     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
199     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
200       for (uint32_t i = 0; i < kBlockSize; i++) {
201         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
202       }
203       xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
204       for (uint32_t i = 0; i < kBlockSize; i++) {
205         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
206         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
207           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
208           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
209           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
210       }
211     }
212   }
213 
TEST(ROUNDZ__SSE_ADDSUB,negative_snan_to_qnan)214   TEST(ROUNDZ__SSE_ADDSUB, negative_snan_to_qnan) {
215     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
216     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
217     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
218       for (uint32_t i = 0; i < kBlockSize; i++) {
219         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
220       }
221       xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
222       for (uint32_t i = 0; i < kBlockSize; i++) {
223         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
224         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
225           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
226           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
227           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
228       }
229     }
230   }
231 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
232 
233 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDZ__SSE2_CVT,positive_normal)234   TEST(ROUNDZ__SSE2_CVT, positive_normal) {
235     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
236     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
237     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
238       for (uint32_t i = 0; i < kBlockSize; i++) {
239         inputs[i] = uint32_as_float(n + i);
240       }
241       xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
242       for (uint32_t i = 0; i < kBlockSize; i++) {
243         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
244         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
245           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
246           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
247           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
248       }
249     }
250   }
251 
TEST(ROUNDZ__SSE2_CVT,negative_normal)252   TEST(ROUNDZ__SSE2_CVT, negative_normal) {
253     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
254     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
255     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
256       for (uint32_t i = 0; i < kBlockSize; i++) {
257         inputs[i] = uint32_as_float(n + i);
258       }
259       xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
260       for (uint32_t i = 0; i < kBlockSize; i++) {
261         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
262         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
263           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
264           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
265           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
266       }
267     }
268   }
269 
TEST(ROUNDZ__SSE2_CVT,positive_integral)270   TEST(ROUNDZ__SSE2_CVT, positive_integral) {
271     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
272     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
273     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
274       for (uint32_t i = 0; i < kBlockSize; i++) {
275         inputs[i] = uint32_as_float(n + i);
276       }
277       xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
278       for (uint32_t i = 0; i < kBlockSize; i++) {
279         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
280         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
281           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
282           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
283           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
284       }
285     }
286   }
287 
TEST(ROUNDZ__SSE2_CVT,negative_integral)288   TEST(ROUNDZ__SSE2_CVT, negative_integral) {
289     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
290     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
291     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
292       for (uint32_t i = 0; i < kBlockSize; i++) {
293         inputs[i] = uint32_as_float(n + i);
294       }
295       xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
296       for (uint32_t i = 0; i < kBlockSize; i++) {
297         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
298         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
299           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
300           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
301           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
302       }
303     }
304   }
305 
TEST(ROUNDZ__SSE2_CVT,positive_infinity)306   TEST(ROUNDZ__SSE2_CVT, positive_infinity) {
307     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
308     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
309     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
310     xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
311     const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
312     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
313       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
314       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
315       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
316   }
317 
TEST(ROUNDZ__SSE2_CVT,negative_infinity)318   TEST(ROUNDZ__SSE2_CVT, negative_infinity) {
319     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
320     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
321     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
322     xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
323     const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
324     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
325       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
326       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
327       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
328   }
329 
TEST(ROUNDZ__SSE2_CVT,positive_qnan)330   TEST(ROUNDZ__SSE2_CVT, positive_qnan) {
331     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
332     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
333     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
334       for (uint32_t i = 0; i < kBlockSize; i++) {
335         inputs[i] = uint32_as_float(n + i);
336       }
337       xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
338       for (uint32_t i = 0; i < kBlockSize; i++) {
339         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
340         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
341           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
342           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
343           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
344       }
345     }
346   }
347 
TEST(ROUNDZ__SSE2_CVT,negative_qnan)348   TEST(ROUNDZ__SSE2_CVT, negative_qnan) {
349     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
350     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
351     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
352       for (uint32_t i = 0; i < kBlockSize; i++) {
353         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
354       }
355       xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
356       for (uint32_t i = 0; i < kBlockSize; i++) {
357         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
358         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
359           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
360           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
361           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
362       }
363     }
364   }
365 
TEST(ROUNDZ__SSE2_CVT,positive_snan)366   TEST(ROUNDZ__SSE2_CVT, positive_snan) {
367     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
368     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
369     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
370       for (uint32_t i = 0; i < kBlockSize; i++) {
371         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
372       }
373       xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
374       for (uint32_t i = 0; i < kBlockSize; i++) {
375         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
376         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
377           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
378           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
379           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
380       }
381     }
382   }
383 
TEST(ROUNDZ__SSE2_CVT,negative_snan)384   TEST(ROUNDZ__SSE2_CVT, negative_snan) {
385     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
386     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
387     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
388       for (uint32_t i = 0; i < kBlockSize; i++) {
389         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
390       }
391       xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
392       for (uint32_t i = 0; i < kBlockSize; i++) {
393         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
394         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
395           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
396           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
397           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
398       }
399     }
400   }
401 
TEST(ROUNDZ__SSE2_CVT,DISABLED_positive_snan_to_qnan)402   TEST(ROUNDZ__SSE2_CVT, DISABLED_positive_snan_to_qnan) {
403     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
404     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
405     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
406       for (uint32_t i = 0; i < kBlockSize; i++) {
407         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
408       }
409       xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
410       for (uint32_t i = 0; i < kBlockSize; i++) {
411         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
412         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
413           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
414           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
415           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
416       }
417     }
418   }
419 
TEST(ROUNDZ__SSE2_CVT,DISABLED_negative_snan_to_qnan)420   TEST(ROUNDZ__SSE2_CVT, DISABLED_negative_snan_to_qnan) {
421     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
422     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
423     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
424       for (uint32_t i = 0; i < kBlockSize; i++) {
425         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
426       }
427       xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
428       for (uint32_t i = 0; i < kBlockSize; i++) {
429         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
430         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
431           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
432           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
433           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
434       }
435     }
436   }
437 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
438 
439 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDZ__SSE41,positive_normal)440   TEST(ROUNDZ__SSE41, positive_normal) {
441     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
442     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
443     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
444       for (uint32_t i = 0; i < kBlockSize; i++) {
445         inputs[i] = uint32_as_float(n + i);
446       }
447       xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
448       for (uint32_t i = 0; i < kBlockSize; i++) {
449         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
450         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
451           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
452           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
453           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
454       }
455     }
456   }
457 
TEST(ROUNDZ__SSE41,negative_normal)458   TEST(ROUNDZ__SSE41, negative_normal) {
459     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
460     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
461     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
462       for (uint32_t i = 0; i < kBlockSize; i++) {
463         inputs[i] = uint32_as_float(n + i);
464       }
465       xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
466       for (uint32_t i = 0; i < kBlockSize; i++) {
467         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
468         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
469           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
470           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
471           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
472       }
473     }
474   }
475 
TEST(ROUNDZ__SSE41,positive_integral)476   TEST(ROUNDZ__SSE41, positive_integral) {
477     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
478     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
479     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
480       for (uint32_t i = 0; i < kBlockSize; i++) {
481         inputs[i] = uint32_as_float(n + i);
482       }
483       xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
484       for (uint32_t i = 0; i < kBlockSize; i++) {
485         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
486         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
487           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
488           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
489           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
490       }
491     }
492   }
493 
TEST(ROUNDZ__SSE41,negative_integral)494   TEST(ROUNDZ__SSE41, negative_integral) {
495     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
496     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
497     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
498       for (uint32_t i = 0; i < kBlockSize; i++) {
499         inputs[i] = uint32_as_float(n + i);
500       }
501       xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
502       for (uint32_t i = 0; i < kBlockSize; i++) {
503         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
504         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
505           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
506           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
507           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
508       }
509     }
510   }
511 
TEST(ROUNDZ__SSE41,positive_infinity)512   TEST(ROUNDZ__SSE41, positive_infinity) {
513     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
514     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
515     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
516     xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
517     const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
518     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
519       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
520       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
521       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
522   }
523 
TEST(ROUNDZ__SSE41,negative_infinity)524   TEST(ROUNDZ__SSE41, negative_infinity) {
525     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
526     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
527     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
528     xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
529     const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
530     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
531       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
532       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
533       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
534   }
535 
TEST(ROUNDZ__SSE41,positive_qnan)536   TEST(ROUNDZ__SSE41, positive_qnan) {
537     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
538     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
539     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
540       for (uint32_t i = 0; i < kBlockSize; i++) {
541         inputs[i] = uint32_as_float(n + i);
542       }
543       xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
544       for (uint32_t i = 0; i < kBlockSize; i++) {
545         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
546         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
547           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
548           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
549           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
550       }
551     }
552   }
553 
TEST(ROUNDZ__SSE41,negative_qnan)554   TEST(ROUNDZ__SSE41, negative_qnan) {
555     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
556     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
557     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
558       for (uint32_t i = 0; i < kBlockSize; i++) {
559         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
560       }
561       xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
562       for (uint32_t i = 0; i < kBlockSize; i++) {
563         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
564         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
565           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
566           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
567           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
568       }
569     }
570   }
571 
TEST(ROUNDZ__SSE41,positive_snan)572   TEST(ROUNDZ__SSE41, positive_snan) {
573     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
574     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
575     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
576       for (uint32_t i = 0; i < kBlockSize; i++) {
577         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
578       }
579       xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
580       for (uint32_t i = 0; i < kBlockSize; i++) {
581         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
582         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
583           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
584           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
585           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
586       }
587     }
588   }
589 
TEST(ROUNDZ__SSE41,negative_snan)590   TEST(ROUNDZ__SSE41, negative_snan) {
591     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
592     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
593     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
594       for (uint32_t i = 0; i < kBlockSize; i++) {
595         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
596       }
597       xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
598       for (uint32_t i = 0; i < kBlockSize; i++) {
599         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
600         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
601           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
602           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
603           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
604       }
605     }
606   }
607 
TEST(ROUNDZ__SSE41,positive_snan_to_qnan)608   TEST(ROUNDZ__SSE41, positive_snan_to_qnan) {
609     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
610     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
611     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
612       for (uint32_t i = 0; i < kBlockSize; i++) {
613         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
614       }
615       xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
616       for (uint32_t i = 0; i < kBlockSize; i++) {
617         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
618         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
619           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
620           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
621           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
622       }
623     }
624   }
625 
TEST(ROUNDZ__SSE41,negative_snan_to_qnan)626   TEST(ROUNDZ__SSE41, negative_snan_to_qnan) {
627     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
628     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
629     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
630       for (uint32_t i = 0; i < kBlockSize; i++) {
631         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
632       }
633       xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
634       for (uint32_t i = 0; i < kBlockSize; i++) {
635         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
636         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
637           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
638           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
639           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
640       }
641     }
642   }
643 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
644 
645 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(ROUNDZ__NEON_ADDSUB,positive_normal)646   TEST(ROUNDZ__NEON_ADDSUB, positive_normal) {
647     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
648     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
649     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
650       for (uint32_t i = 0; i < kBlockSize; i++) {
651         inputs[i] = uint32_as_float(n + i);
652       }
653       xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
654       for (uint32_t i = 0; i < kBlockSize; i++) {
655         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
656         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
657           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
658           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
659           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
660       }
661     }
662   }
663 
TEST(ROUNDZ__NEON_ADDSUB,negative_normal)664   TEST(ROUNDZ__NEON_ADDSUB, negative_normal) {
665     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
666     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
667     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
668       for (uint32_t i = 0; i < kBlockSize; i++) {
669         inputs[i] = uint32_as_float(n + i);
670       }
671       xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
672       for (uint32_t i = 0; i < kBlockSize; i++) {
673         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
674         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
675           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
676           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
677           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
678       }
679     }
680   }
681 
TEST(ROUNDZ__NEON_ADDSUB,positive_integral)682   TEST(ROUNDZ__NEON_ADDSUB, positive_integral) {
683     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
684     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
685     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
686       for (uint32_t i = 0; i < kBlockSize; i++) {
687         inputs[i] = uint32_as_float(n + i);
688       }
689       xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
690       for (uint32_t i = 0; i < kBlockSize; i++) {
691         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
692         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
693           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
694           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
695           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
696       }
697     }
698   }
699 
TEST(ROUNDZ__NEON_ADDSUB,negative_integral)700   TEST(ROUNDZ__NEON_ADDSUB, negative_integral) {
701     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
702     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
703     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
704       for (uint32_t i = 0; i < kBlockSize; i++) {
705         inputs[i] = uint32_as_float(n + i);
706       }
707       xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
708       for (uint32_t i = 0; i < kBlockSize; i++) {
709         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
710         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
711           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
712           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
713           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
714       }
715     }
716   }
717 
TEST(ROUNDZ__NEON_ADDSUB,positive_infinity)718   TEST(ROUNDZ__NEON_ADDSUB, positive_infinity) {
719     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
720     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
721     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
722     xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
723     const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
724     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
725       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
726       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
727       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
728   }
729 
TEST(ROUNDZ__NEON_ADDSUB,negative_infinity)730   TEST(ROUNDZ__NEON_ADDSUB, negative_infinity) {
731     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
732     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
733     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
734     xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
735     const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
736     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
737       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
738       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
739       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
740   }
741 
TEST(ROUNDZ__NEON_ADDSUB,positive_qnan)742   TEST(ROUNDZ__NEON_ADDSUB, positive_qnan) {
743     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
744     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
745     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
746       for (uint32_t i = 0; i < kBlockSize; i++) {
747         inputs[i] = uint32_as_float(n + i);
748       }
749       xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
750       for (uint32_t i = 0; i < kBlockSize; i++) {
751         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
752         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
753           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
754           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
755           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
756       }
757     }
758   }
759 
TEST(ROUNDZ__NEON_ADDSUB,negative_qnan)760   TEST(ROUNDZ__NEON_ADDSUB, negative_qnan) {
761     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
762     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
763     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
764       for (uint32_t i = 0; i < kBlockSize; i++) {
765         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
766       }
767       xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
768       for (uint32_t i = 0; i < kBlockSize; i++) {
769         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
770         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
771           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
772           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
773           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
774       }
775     }
776   }
777 
TEST(ROUNDZ__NEON_ADDSUB,positive_snan)778   TEST(ROUNDZ__NEON_ADDSUB, positive_snan) {
779     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
780     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
781     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
782       for (uint32_t i = 0; i < kBlockSize; i++) {
783         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
784       }
785       xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
786       for (uint32_t i = 0; i < kBlockSize; i++) {
787         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
788         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
789           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
790           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
791           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
792       }
793     }
794   }
795 
TEST(ROUNDZ__NEON_ADDSUB,negative_snan)796   TEST(ROUNDZ__NEON_ADDSUB, negative_snan) {
797     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
798     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
799     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
800       for (uint32_t i = 0; i < kBlockSize; i++) {
801         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
802       }
803       xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
804       for (uint32_t i = 0; i < kBlockSize; i++) {
805         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
806         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
807           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
808           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
809           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
810       }
811     }
812   }
813 
TEST(ROUNDZ__NEON_ADDSUB,positive_snan_to_qnan)814   TEST(ROUNDZ__NEON_ADDSUB, positive_snan_to_qnan) {
815     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
816     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
817     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
818       for (uint32_t i = 0; i < kBlockSize; i++) {
819         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
820       }
821       xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
822       for (uint32_t i = 0; i < kBlockSize; i++) {
823         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
824         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
825           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
826           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
827           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
828       }
829     }
830   }
831 
TEST(ROUNDZ__NEON_ADDSUB,negative_snan_to_qnan)832   TEST(ROUNDZ__NEON_ADDSUB, negative_snan_to_qnan) {
833     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
834     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
835     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
836       for (uint32_t i = 0; i < kBlockSize; i++) {
837         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
838       }
839       xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
840       for (uint32_t i = 0; i < kBlockSize; i++) {
841         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
842         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
843           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
844           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
845           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
846       }
847     }
848   }
849 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
850 
851 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(ROUNDZ__NEON_CVT,positive_normal)852   TEST(ROUNDZ__NEON_CVT, positive_normal) {
853     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
854     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
855     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
856       for (uint32_t i = 0; i < kBlockSize; i++) {
857         inputs[i] = uint32_as_float(n + i);
858       }
859       xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
860       for (uint32_t i = 0; i < kBlockSize; i++) {
861         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
862         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
863           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
864           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
865           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
866       }
867     }
868   }
869 
TEST(ROUNDZ__NEON_CVT,negative_normal)870   TEST(ROUNDZ__NEON_CVT, negative_normal) {
871     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
872     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
873     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
874       for (uint32_t i = 0; i < kBlockSize; i++) {
875         inputs[i] = uint32_as_float(n + i);
876       }
877       xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
878       for (uint32_t i = 0; i < kBlockSize; i++) {
879         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
880         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
881           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
882           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
883           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
884       }
885     }
886   }
887 
TEST(ROUNDZ__NEON_CVT,positive_integral)888   TEST(ROUNDZ__NEON_CVT, positive_integral) {
889     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
890     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
891     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
892       for (uint32_t i = 0; i < kBlockSize; i++) {
893         inputs[i] = uint32_as_float(n + i);
894       }
895       xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
896       for (uint32_t i = 0; i < kBlockSize; i++) {
897         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
898         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
899           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
900           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
901           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
902       }
903     }
904   }
905 
TEST(ROUNDZ__NEON_CVT,negative_integral)906   TEST(ROUNDZ__NEON_CVT, negative_integral) {
907     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
908     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
909     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
910       for (uint32_t i = 0; i < kBlockSize; i++) {
911         inputs[i] = uint32_as_float(n + i);
912       }
913       xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
914       for (uint32_t i = 0; i < kBlockSize; i++) {
915         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
916         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
917           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
918           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
919           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
920       }
921     }
922   }
923 
TEST(ROUNDZ__NEON_CVT,positive_infinity)924   TEST(ROUNDZ__NEON_CVT, positive_infinity) {
925     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
926     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
927     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
928     xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
929     const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
930     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
931       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
932       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
933       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
934   }
935 
TEST(ROUNDZ__NEON_CVT,negative_infinity)936   TEST(ROUNDZ__NEON_CVT, negative_infinity) {
937     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
938     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
939     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
940     xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
941     const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
942     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
943       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
944       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
945       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
946   }
947 
TEST(ROUNDZ__NEON_CVT,positive_qnan)948   TEST(ROUNDZ__NEON_CVT, positive_qnan) {
949     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
950     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
951     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
952       for (uint32_t i = 0; i < kBlockSize; i++) {
953         inputs[i] = uint32_as_float(n + i);
954       }
955       xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
956       for (uint32_t i = 0; i < kBlockSize; i++) {
957         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
958         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
959           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
960           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
961           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
962       }
963     }
964   }
965 
TEST(ROUNDZ__NEON_CVT,negative_qnan)966   TEST(ROUNDZ__NEON_CVT, negative_qnan) {
967     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
968     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
969     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
970       for (uint32_t i = 0; i < kBlockSize; i++) {
971         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
972       }
973       xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
974       for (uint32_t i = 0; i < kBlockSize; i++) {
975         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
976         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
977           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
978           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
979           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
980       }
981     }
982   }
983 
TEST(ROUNDZ__NEON_CVT,positive_snan)984   TEST(ROUNDZ__NEON_CVT, positive_snan) {
985     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
986     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
987     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
988       for (uint32_t i = 0; i < kBlockSize; i++) {
989         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
990       }
991       xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
992       for (uint32_t i = 0; i < kBlockSize; i++) {
993         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
994         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
995           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
996           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
997           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
998       }
999     }
1000   }
1001 
TEST(ROUNDZ__NEON_CVT,negative_snan)1002   TEST(ROUNDZ__NEON_CVT, negative_snan) {
1003     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1004     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1005     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1006       for (uint32_t i = 0; i < kBlockSize; i++) {
1007         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1008       }
1009       xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1010       for (uint32_t i = 0; i < kBlockSize; i++) {
1011         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1012         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1013           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1014           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1015           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1016       }
1017     }
1018   }
1019 
TEST(ROUNDZ__NEON_CVT,DISABLED_positive_snan_to_qnan)1020   TEST(ROUNDZ__NEON_CVT, DISABLED_positive_snan_to_qnan) {
1021     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1022     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1023     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1024       for (uint32_t i = 0; i < kBlockSize; i++) {
1025         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1026       }
1027       xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1028       for (uint32_t i = 0; i < kBlockSize; i++) {
1029         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1030         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1031           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1032           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1033           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1034       }
1035     }
1036   }
1037 
TEST(ROUNDZ__NEON_CVT,DISABLED_negative_snan_to_qnan)1038   TEST(ROUNDZ__NEON_CVT, DISABLED_negative_snan_to_qnan) {
1039     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1040     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1041     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1042       for (uint32_t i = 0; i < kBlockSize; i++) {
1043         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1044       }
1045       xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1046       for (uint32_t i = 0; i < kBlockSize; i++) {
1047         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1048         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1049           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1050           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1051           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1052       }
1053     }
1054   }
1055 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1056 
1057 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(ROUNDZ__NEONV8,positive_normal)1058   TEST(ROUNDZ__NEONV8, positive_normal) {
1059     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1060     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1061     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1062       for (uint32_t i = 0; i < kBlockSize; i++) {
1063         inputs[i] = uint32_as_float(n + i);
1064       }
1065       xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1066       for (uint32_t i = 0; i < kBlockSize; i++) {
1067         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1068         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1069           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1070           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1071           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1072       }
1073     }
1074   }
1075 
TEST(ROUNDZ__NEONV8,negative_normal)1076   TEST(ROUNDZ__NEONV8, negative_normal) {
1077     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1078     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1079     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1080       for (uint32_t i = 0; i < kBlockSize; i++) {
1081         inputs[i] = uint32_as_float(n + i);
1082       }
1083       xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1084       for (uint32_t i = 0; i < kBlockSize; i++) {
1085         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1086         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1087           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1088           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1089           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1090       }
1091     }
1092   }
1093 
TEST(ROUNDZ__NEONV8,positive_integral)1094   TEST(ROUNDZ__NEONV8, positive_integral) {
1095     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1096     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1097     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1098       for (uint32_t i = 0; i < kBlockSize; i++) {
1099         inputs[i] = uint32_as_float(n + i);
1100       }
1101       xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1102       for (uint32_t i = 0; i < kBlockSize; i++) {
1103         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1104         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1105           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1106           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1107           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1108       }
1109     }
1110   }
1111 
TEST(ROUNDZ__NEONV8,negative_integral)1112   TEST(ROUNDZ__NEONV8, negative_integral) {
1113     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1114     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1115     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1116       for (uint32_t i = 0; i < kBlockSize; i++) {
1117         inputs[i] = uint32_as_float(n + i);
1118       }
1119       xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1120       for (uint32_t i = 0; i < kBlockSize; i++) {
1121         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1122         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1123           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1124           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1125           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1126       }
1127     }
1128   }
1129 
TEST(ROUNDZ__NEONV8,positive_infinity)1130   TEST(ROUNDZ__NEONV8, positive_infinity) {
1131     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1132     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1133     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1134     xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1135     const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
1136     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1137       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1138       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1139       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1140   }
1141 
TEST(ROUNDZ__NEONV8,negative_infinity)1142   TEST(ROUNDZ__NEONV8, negative_infinity) {
1143     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1144     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1145     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1146     xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1147     const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
1148     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1149       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1150       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1151       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1152   }
1153 
TEST(ROUNDZ__NEONV8,positive_qnan)1154   TEST(ROUNDZ__NEONV8, positive_qnan) {
1155     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1156     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1157     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1158       for (uint32_t i = 0; i < kBlockSize; i++) {
1159         inputs[i] = uint32_as_float(n + i);
1160       }
1161       xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1162       for (uint32_t i = 0; i < kBlockSize; i++) {
1163         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1164         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1165           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1166           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1167           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1168       }
1169     }
1170   }
1171 
TEST(ROUNDZ__NEONV8,negative_qnan)1172   TEST(ROUNDZ__NEONV8, negative_qnan) {
1173     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1174     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1175     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1176       for (uint32_t i = 0; i < kBlockSize; i++) {
1177         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
1178       }
1179       xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1180       for (uint32_t i = 0; i < kBlockSize; i++) {
1181         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1182         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1183           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1184           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1185           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1186       }
1187     }
1188   }
1189 
TEST(ROUNDZ__NEONV8,positive_snan)1190   TEST(ROUNDZ__NEONV8, positive_snan) {
1191     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1192     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1193     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1194       for (uint32_t i = 0; i < kBlockSize; i++) {
1195         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1196       }
1197       xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1198       for (uint32_t i = 0; i < kBlockSize; i++) {
1199         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1200         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1201           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1202           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1203           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1204       }
1205     }
1206   }
1207 
TEST(ROUNDZ__NEONV8,negative_snan)1208   TEST(ROUNDZ__NEONV8, negative_snan) {
1209     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1210     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1211     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1212       for (uint32_t i = 0; i < kBlockSize; i++) {
1213         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1214       }
1215       xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1216       for (uint32_t i = 0; i < kBlockSize; i++) {
1217         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1218         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1219           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1220           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1221           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1222       }
1223     }
1224   }
1225 
TEST(ROUNDZ__NEONV8,positive_snan_to_qnan)1226   TEST(ROUNDZ__NEONV8, positive_snan_to_qnan) {
1227     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1228     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1229     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1230       for (uint32_t i = 0; i < kBlockSize; i++) {
1231         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1232       }
1233       xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1234       for (uint32_t i = 0; i < kBlockSize; i++) {
1235         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1236         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1237           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1238           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1239           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1240       }
1241     }
1242   }
1243 
TEST(ROUNDZ__NEONV8,negative_snan_to_qnan)1244   TEST(ROUNDZ__NEONV8, negative_snan_to_qnan) {
1245     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1246     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1247     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1248       for (uint32_t i = 0; i < kBlockSize; i++) {
1249         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1250       }
1251       xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1252       for (uint32_t i = 0; i < kBlockSize; i++) {
1253         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1254         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1255           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1256           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1257           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1258       }
1259     }
1260   }
1261 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1262 
1263 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(ROUNDZ__WASMSIMD_ADDSUB,positive_normal)1264   TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_normal) {
1265     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1266     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1267     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1268       for (uint32_t i = 0; i < kBlockSize; i++) {
1269         inputs[i] = uint32_as_float(n + i);
1270       }
1271       xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1272       for (uint32_t i = 0; i < kBlockSize; i++) {
1273         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1274         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1275           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1276           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1277           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1278       }
1279     }
1280   }
1281 
TEST(ROUNDZ__WASMSIMD_ADDSUB,negative_normal)1282   TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_normal) {
1283     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1284     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1285     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1286       for (uint32_t i = 0; i < kBlockSize; i++) {
1287         inputs[i] = uint32_as_float(n + i);
1288       }
1289       xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1290       for (uint32_t i = 0; i < kBlockSize; i++) {
1291         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1292         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1293           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1294           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1295           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1296       }
1297     }
1298   }
1299 
TEST(ROUNDZ__WASMSIMD_ADDSUB,positive_integral)1300   TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_integral) {
1301     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1302     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1303     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1304       for (uint32_t i = 0; i < kBlockSize; i++) {
1305         inputs[i] = uint32_as_float(n + i);
1306       }
1307       xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1308       for (uint32_t i = 0; i < kBlockSize; i++) {
1309         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1310         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1311           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1312           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1313           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1314       }
1315     }
1316   }
1317 
TEST(ROUNDZ__WASMSIMD_ADDSUB,negative_integral)1318   TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_integral) {
1319     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1320     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1321     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1322       for (uint32_t i = 0; i < kBlockSize; i++) {
1323         inputs[i] = uint32_as_float(n + i);
1324       }
1325       xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1326       for (uint32_t i = 0; i < kBlockSize; i++) {
1327         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1328         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1329           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1330           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1331           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1332       }
1333     }
1334   }
1335 
TEST(ROUNDZ__WASMSIMD_ADDSUB,positive_infinity)1336   TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_infinity) {
1337     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1338     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1339     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1340     xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1341     const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
1342     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1343       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1344       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1345       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1346   }
1347 
TEST(ROUNDZ__WASMSIMD_ADDSUB,negative_infinity)1348   TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_infinity) {
1349     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1350     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1351     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1352     xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1353     const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
1354     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1355       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1356       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1357       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1358   }
1359 
TEST(ROUNDZ__WASMSIMD_ADDSUB,positive_qnan)1360   TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_qnan) {
1361     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1362     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1363     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1364       for (uint32_t i = 0; i < kBlockSize; i++) {
1365         inputs[i] = uint32_as_float(n + i);
1366       }
1367       xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1368       for (uint32_t i = 0; i < kBlockSize; i++) {
1369         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1370         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1371           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1372           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1373           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1374       }
1375     }
1376   }
1377 
TEST(ROUNDZ__WASMSIMD_ADDSUB,negative_qnan)1378   TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_qnan) {
1379     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1380     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1381     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1382       for (uint32_t i = 0; i < kBlockSize; i++) {
1383         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
1384       }
1385       xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1386       for (uint32_t i = 0; i < kBlockSize; i++) {
1387         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1388         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1389           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1390           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1391           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1392       }
1393     }
1394   }
1395 
TEST(ROUNDZ__WASMSIMD_ADDSUB,positive_snan)1396   TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_snan) {
1397     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1398     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1399     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1400       for (uint32_t i = 0; i < kBlockSize; i++) {
1401         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1402       }
1403       xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1404       for (uint32_t i = 0; i < kBlockSize; i++) {
1405         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1406         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1407           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1408           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1409           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1410       }
1411     }
1412   }
1413 
TEST(ROUNDZ__WASMSIMD_ADDSUB,negative_snan)1414   TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_snan) {
1415     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1416     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1417     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1418       for (uint32_t i = 0; i < kBlockSize; i++) {
1419         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1420       }
1421       xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1422       for (uint32_t i = 0; i < kBlockSize; i++) {
1423         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1424         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1425           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1426           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1427           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1428       }
1429     }
1430   }
1431 
TEST(ROUNDZ__WASMSIMD_ADDSUB,positive_snan_to_qnan)1432   TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_snan_to_qnan) {
1433     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1434     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1435     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1436       for (uint32_t i = 0; i < kBlockSize; i++) {
1437         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1438       }
1439       xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1440       for (uint32_t i = 0; i < kBlockSize; i++) {
1441         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1442         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1443           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1444           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1445           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1446       }
1447     }
1448   }
1449 
TEST(ROUNDZ__WASMSIMD_ADDSUB,negative_snan_to_qnan)1450   TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_snan_to_qnan) {
1451     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1452     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1453     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1454       for (uint32_t i = 0; i < kBlockSize; i++) {
1455         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1456       }
1457       xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1458       for (uint32_t i = 0; i < kBlockSize; i++) {
1459         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1460         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1461           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1462           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1463           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1464       }
1465     }
1466   }
1467 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1468 
1469 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(ROUNDZ__WASMSIMD_CVT,positive_normal)1470   TEST(ROUNDZ__WASMSIMD_CVT, positive_normal) {
1471     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1472     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1473     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1474       for (uint32_t i = 0; i < kBlockSize; i++) {
1475         inputs[i] = uint32_as_float(n + i);
1476       }
1477       xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1478       for (uint32_t i = 0; i < kBlockSize; i++) {
1479         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1480         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1481           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1482           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1483           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1484       }
1485     }
1486   }
1487 
TEST(ROUNDZ__WASMSIMD_CVT,negative_normal)1488   TEST(ROUNDZ__WASMSIMD_CVT, negative_normal) {
1489     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1490     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1491     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1492       for (uint32_t i = 0; i < kBlockSize; i++) {
1493         inputs[i] = uint32_as_float(n + i);
1494       }
1495       xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1496       for (uint32_t i = 0; i < kBlockSize; i++) {
1497         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1498         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1499           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1500           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1501           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1502       }
1503     }
1504   }
1505 
TEST(ROUNDZ__WASMSIMD_CVT,positive_integral)1506   TEST(ROUNDZ__WASMSIMD_CVT, positive_integral) {
1507     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1508     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1509     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1510       for (uint32_t i = 0; i < kBlockSize; i++) {
1511         inputs[i] = uint32_as_float(n + i);
1512       }
1513       xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1514       for (uint32_t i = 0; i < kBlockSize; i++) {
1515         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1516         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1517           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1518           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1519           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1520       }
1521     }
1522   }
1523 
TEST(ROUNDZ__WASMSIMD_CVT,negative_integral)1524   TEST(ROUNDZ__WASMSIMD_CVT, negative_integral) {
1525     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1526     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1527     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1528       for (uint32_t i = 0; i < kBlockSize; i++) {
1529         inputs[i] = uint32_as_float(n + i);
1530       }
1531       xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1532       for (uint32_t i = 0; i < kBlockSize; i++) {
1533         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1534         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1535           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1536           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1537           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1538       }
1539     }
1540   }
1541 
TEST(ROUNDZ__WASMSIMD_CVT,positive_infinity)1542   TEST(ROUNDZ__WASMSIMD_CVT, positive_infinity) {
1543     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1544     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1545     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1546     xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1547     const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
1548     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1549       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1550       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1551       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1552   }
1553 
TEST(ROUNDZ__WASMSIMD_CVT,negative_infinity)1554   TEST(ROUNDZ__WASMSIMD_CVT, negative_infinity) {
1555     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1556     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1557     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1558     xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1559     const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
1560     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1561       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1562       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1563       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1564   }
1565 
TEST(ROUNDZ__WASMSIMD_CVT,positive_qnan)1566   TEST(ROUNDZ__WASMSIMD_CVT, positive_qnan) {
1567     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1568     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1569     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1570       for (uint32_t i = 0; i < kBlockSize; i++) {
1571         inputs[i] = uint32_as_float(n + i);
1572       }
1573       xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1574       for (uint32_t i = 0; i < kBlockSize; i++) {
1575         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1576         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1577           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1578           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1579           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1580       }
1581     }
1582   }
1583 
TEST(ROUNDZ__WASMSIMD_CVT,negative_qnan)1584   TEST(ROUNDZ__WASMSIMD_CVT, negative_qnan) {
1585     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1586     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1587     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1588       for (uint32_t i = 0; i < kBlockSize; i++) {
1589         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
1590       }
1591       xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1592       for (uint32_t i = 0; i < kBlockSize; i++) {
1593         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1594         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1595           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1596           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1597           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1598       }
1599     }
1600   }
1601 
TEST(ROUNDZ__WASMSIMD_CVT,positive_snan)1602   TEST(ROUNDZ__WASMSIMD_CVT, positive_snan) {
1603     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1604     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1605     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1606       for (uint32_t i = 0; i < kBlockSize; i++) {
1607         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1608       }
1609       xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1610       for (uint32_t i = 0; i < kBlockSize; i++) {
1611         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1612         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1613           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1614           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1615           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1616       }
1617     }
1618   }
1619 
TEST(ROUNDZ__WASMSIMD_CVT,negative_snan)1620   TEST(ROUNDZ__WASMSIMD_CVT, negative_snan) {
1621     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1622     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1623     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1624       for (uint32_t i = 0; i < kBlockSize; i++) {
1625         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1626       }
1627       xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1628       for (uint32_t i = 0; i < kBlockSize; i++) {
1629         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1630         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1631           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1632           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1633           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1634       }
1635     }
1636   }
1637 
TEST(ROUNDZ__WASMSIMD_CVT,DISABLED_positive_snan_to_qnan)1638   TEST(ROUNDZ__WASMSIMD_CVT, DISABLED_positive_snan_to_qnan) {
1639     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1640     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1641     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1642       for (uint32_t i = 0; i < kBlockSize; i++) {
1643         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1644       }
1645       xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1646       for (uint32_t i = 0; i < kBlockSize; i++) {
1647         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1648         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1649           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1650           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1651           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1652       }
1653     }
1654   }
1655 
TEST(ROUNDZ__WASMSIMD_CVT,DISABLED_negative_snan_to_qnan)1656   TEST(ROUNDZ__WASMSIMD_CVT, DISABLED_negative_snan_to_qnan) {
1657     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1658     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1659     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1660       for (uint32_t i = 0; i < kBlockSize; i++) {
1661         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1662       }
1663       xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1664       for (uint32_t i = 0; i < kBlockSize; i++) {
1665         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1666         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1667           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1668           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1669           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1670       }
1671     }
1672   }
1673 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1674 
1675 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(ROUNDZ__WASMSIMD_NATIVE,positive_normal)1676   TEST(ROUNDZ__WASMSIMD_NATIVE, positive_normal) {
1677     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1678     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1679     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1680       for (uint32_t i = 0; i < kBlockSize; i++) {
1681         inputs[i] = uint32_as_float(n + i);
1682       }
1683       xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1684       for (uint32_t i = 0; i < kBlockSize; i++) {
1685         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1686         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1687           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1688           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1689           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1690       }
1691     }
1692   }
1693 
TEST(ROUNDZ__WASMSIMD_NATIVE,negative_normal)1694   TEST(ROUNDZ__WASMSIMD_NATIVE, negative_normal) {
1695     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1696     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1697     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1698       for (uint32_t i = 0; i < kBlockSize; i++) {
1699         inputs[i] = uint32_as_float(n + i);
1700       }
1701       xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1702       for (uint32_t i = 0; i < kBlockSize; i++) {
1703         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1704         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1705           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1706           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1707           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1708       }
1709     }
1710   }
1711 
TEST(ROUNDZ__WASMSIMD_NATIVE,positive_integral)1712   TEST(ROUNDZ__WASMSIMD_NATIVE, positive_integral) {
1713     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1714     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1715     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1716       for (uint32_t i = 0; i < kBlockSize; i++) {
1717         inputs[i] = uint32_as_float(n + i);
1718       }
1719       xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1720       for (uint32_t i = 0; i < kBlockSize; i++) {
1721         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1722         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1723           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1724           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1725           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1726       }
1727     }
1728   }
1729 
TEST(ROUNDZ__WASMSIMD_NATIVE,negative_integral)1730   TEST(ROUNDZ__WASMSIMD_NATIVE, negative_integral) {
1731     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1732     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1733     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1734       for (uint32_t i = 0; i < kBlockSize; i++) {
1735         inputs[i] = uint32_as_float(n + i);
1736       }
1737       xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1738       for (uint32_t i = 0; i < kBlockSize; i++) {
1739         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1740         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1741           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1742           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1743           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1744       }
1745     }
1746   }
1747 
TEST(ROUNDZ__WASMSIMD_NATIVE,positive_infinity)1748   TEST(ROUNDZ__WASMSIMD_NATIVE, positive_infinity) {
1749     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1750     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1751     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1752     xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1753     const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
1754     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1755       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1756       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1757       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1758   }
1759 
TEST(ROUNDZ__WASMSIMD_NATIVE,negative_infinity)1760   TEST(ROUNDZ__WASMSIMD_NATIVE, negative_infinity) {
1761     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1762     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1763     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1764     xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1765     const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
1766     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1767       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1768       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1769       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1770   }
1771 
TEST(ROUNDZ__WASMSIMD_NATIVE,positive_qnan)1772   TEST(ROUNDZ__WASMSIMD_NATIVE, positive_qnan) {
1773     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1774     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1775     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1776       for (uint32_t i = 0; i < kBlockSize; i++) {
1777         inputs[i] = uint32_as_float(n + i);
1778       }
1779       xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1780       for (uint32_t i = 0; i < kBlockSize; i++) {
1781         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1782         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1783           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1784           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1785           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1786       }
1787     }
1788   }
1789 
TEST(ROUNDZ__WASMSIMD_NATIVE,negative_qnan)1790   TEST(ROUNDZ__WASMSIMD_NATIVE, negative_qnan) {
1791     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1792     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1793     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1794       for (uint32_t i = 0; i < kBlockSize; i++) {
1795         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
1796       }
1797       xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1798       for (uint32_t i = 0; i < kBlockSize; i++) {
1799         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1800         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1801           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1802           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1803           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1804       }
1805     }
1806   }
1807 
TEST(ROUNDZ__WASMSIMD_NATIVE,positive_snan)1808   TEST(ROUNDZ__WASMSIMD_NATIVE, positive_snan) {
1809     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1810     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1811     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1812       for (uint32_t i = 0; i < kBlockSize; i++) {
1813         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1814       }
1815       xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1816       for (uint32_t i = 0; i < kBlockSize; i++) {
1817         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1818         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1819           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1820           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1821           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1822       }
1823     }
1824   }
1825 
TEST(ROUNDZ__WASMSIMD_NATIVE,negative_snan)1826   TEST(ROUNDZ__WASMSIMD_NATIVE, negative_snan) {
1827     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1828     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1829     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1830       for (uint32_t i = 0; i < kBlockSize; i++) {
1831         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1832       }
1833       xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1834       for (uint32_t i = 0; i < kBlockSize; i++) {
1835         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1836         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1837           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1838           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1839           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1840       }
1841     }
1842   }
1843 
TEST(ROUNDZ__WASMSIMD_NATIVE,positive_snan_to_qnan)1844   TEST(ROUNDZ__WASMSIMD_NATIVE, positive_snan_to_qnan) {
1845     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1846     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1847     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1848       for (uint32_t i = 0; i < kBlockSize; i++) {
1849         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1850       }
1851       xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1852       for (uint32_t i = 0; i < kBlockSize; i++) {
1853         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1854         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1855           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1856           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1857           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1858       }
1859     }
1860   }
1861 
TEST(ROUNDZ__WASMSIMD_NATIVE,negative_snan_to_qnan)1862   TEST(ROUNDZ__WASMSIMD_NATIVE, negative_snan_to_qnan) {
1863     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1864     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1865     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1866       for (uint32_t i = 0; i < kBlockSize; i++) {
1867         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1868       }
1869       xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1870       for (uint32_t i = 0; i < kBlockSize; i++) {
1871         const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1872         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1873           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1874           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1875           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1876       }
1877     }
1878   }
1879 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1880 
TEST(ROUNDZ__SCALAR_ADDSUB,positive_normal)1881 TEST(ROUNDZ__SCALAR_ADDSUB, positive_normal) {
1882   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1883   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1884   for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1885     for (uint32_t i = 0; i < kBlockSize; i++) {
1886       inputs[i] = uint32_as_float(n + i);
1887     }
1888     xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1889     for (uint32_t i = 0; i < kBlockSize; i++) {
1890       const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1891       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1892         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1893         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1894         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1895     }
1896   }
1897 }
1898 
TEST(ROUNDZ__SCALAR_ADDSUB,negative_normal)1899 TEST(ROUNDZ__SCALAR_ADDSUB, negative_normal) {
1900   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1901   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1902   for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1903     for (uint32_t i = 0; i < kBlockSize; i++) {
1904       inputs[i] = uint32_as_float(n + i);
1905     }
1906     xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1907     for (uint32_t i = 0; i < kBlockSize; i++) {
1908       const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1909       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1910         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1911         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1912         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1913     }
1914   }
1915 }
1916 
TEST(ROUNDZ__SCALAR_ADDSUB,positive_integral)1917 TEST(ROUNDZ__SCALAR_ADDSUB, positive_integral) {
1918   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1919   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1920   for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1921     for (uint32_t i = 0; i < kBlockSize; i++) {
1922       inputs[i] = uint32_as_float(n + i);
1923     }
1924     xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1925     for (uint32_t i = 0; i < kBlockSize; i++) {
1926       const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1927       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1928         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1929         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1930         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1931     }
1932   }
1933 }
1934 
TEST(ROUNDZ__SCALAR_ADDSUB,negative_integral)1935 TEST(ROUNDZ__SCALAR_ADDSUB, negative_integral) {
1936   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1937   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1938   for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1939     for (uint32_t i = 0; i < kBlockSize; i++) {
1940       inputs[i] = uint32_as_float(n + i);
1941     }
1942     xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1943     for (uint32_t i = 0; i < kBlockSize; i++) {
1944       const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1945       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1946         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1947         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1948         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1949     }
1950   }
1951 }
1952 
TEST(ROUNDZ__SCALAR_ADDSUB,positive_infinity)1953 TEST(ROUNDZ__SCALAR_ADDSUB, positive_infinity) {
1954   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1955   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1956   std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1957   xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1958   const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
1959   ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1960     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1961     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1962     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1963 }
1964 
TEST(ROUNDZ__SCALAR_ADDSUB,negative_infinity)1965 TEST(ROUNDZ__SCALAR_ADDSUB, negative_infinity) {
1966   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1967   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1968   std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1969   xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1970   const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
1971   ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1972     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1973     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1974     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1975 }
1976 
TEST(ROUNDZ__SCALAR_ADDSUB,positive_qnan)1977 TEST(ROUNDZ__SCALAR_ADDSUB, positive_qnan) {
1978   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1979   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1980   for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1981     for (uint32_t i = 0; i < kBlockSize; i++) {
1982       inputs[i] = uint32_as_float(n + i);
1983     }
1984     xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1985     for (uint32_t i = 0; i < kBlockSize; i++) {
1986       const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1987       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1988         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1989         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1990         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1991     }
1992   }
1993 }
1994 
TEST(ROUNDZ__SCALAR_ADDSUB,negative_qnan)1995 TEST(ROUNDZ__SCALAR_ADDSUB, negative_qnan) {
1996   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1997   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1998   for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1999     for (uint32_t i = 0; i < kBlockSize; i++) {
2000       inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
2001     }
2002     xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2003     for (uint32_t i = 0; i < kBlockSize; i++) {
2004       const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2005       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2006         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2007         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2008         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2009     }
2010   }
2011 }
2012 
TEST(ROUNDZ__SCALAR_ADDSUB,positive_snan)2013 TEST(ROUNDZ__SCALAR_ADDSUB, positive_snan) {
2014   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2015   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2016   for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2017     for (uint32_t i = 0; i < kBlockSize; i++) {
2018       inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2019     }
2020     xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2021     for (uint32_t i = 0; i < kBlockSize; i++) {
2022       const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2023       ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
2024         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2025         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2026         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2027     }
2028   }
2029 }
2030 
TEST(ROUNDZ__SCALAR_ADDSUB,negative_snan)2031 TEST(ROUNDZ__SCALAR_ADDSUB, negative_snan) {
2032   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2033   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2034   for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2035     for (uint32_t i = 0; i < kBlockSize; i++) {
2036       inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2037     }
2038     xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2039     for (uint32_t i = 0; i < kBlockSize; i++) {
2040       const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2041       ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
2042         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2043         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2044         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2045     }
2046   }
2047 }
2048 
TEST(ROUNDZ__SCALAR_ADDSUB,positive_snan_to_qnan)2049 TEST(ROUNDZ__SCALAR_ADDSUB, positive_snan_to_qnan) {
2050   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2051   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2052   for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2053     for (uint32_t i = 0; i < kBlockSize; i++) {
2054       inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2055     }
2056     xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2057     for (uint32_t i = 0; i < kBlockSize; i++) {
2058       const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2059       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2060         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2061         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2062         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2063     }
2064   }
2065 }
2066 
TEST(ROUNDZ__SCALAR_ADDSUB,negative_snan_to_qnan)2067 TEST(ROUNDZ__SCALAR_ADDSUB, negative_snan_to_qnan) {
2068   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2069   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2070   for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2071     for (uint32_t i = 0; i < kBlockSize; i++) {
2072       inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2073     }
2074     xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2075     for (uint32_t i = 0; i < kBlockSize; i++) {
2076       const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2077       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2078         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2079         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2080         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2081     }
2082   }
2083 }
2084 
TEST(ROUNDZ__SCALAR_CVT,positive_normal)2085 TEST(ROUNDZ__SCALAR_CVT, positive_normal) {
2086   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2087   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2088   for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
2089     for (uint32_t i = 0; i < kBlockSize; i++) {
2090       inputs[i] = uint32_as_float(n + i);
2091     }
2092     xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2093     for (uint32_t i = 0; i < kBlockSize; i++) {
2094       const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2095       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2096         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2097         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2098         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2099     }
2100   }
2101 }
2102 
TEST(ROUNDZ__SCALAR_CVT,negative_normal)2103 TEST(ROUNDZ__SCALAR_CVT, negative_normal) {
2104   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2105   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2106   for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
2107     for (uint32_t i = 0; i < kBlockSize; i++) {
2108       inputs[i] = uint32_as_float(n + i);
2109     }
2110     xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2111     for (uint32_t i = 0; i < kBlockSize; i++) {
2112       const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2113       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2114         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2115         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2116         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2117     }
2118   }
2119 }
2120 
TEST(ROUNDZ__SCALAR_CVT,positive_integral)2121 TEST(ROUNDZ__SCALAR_CVT, positive_integral) {
2122   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2123   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2124   for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
2125     for (uint32_t i = 0; i < kBlockSize; i++) {
2126       inputs[i] = uint32_as_float(n + i);
2127     }
2128     xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2129     for (uint32_t i = 0; i < kBlockSize; i++) {
2130       const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2131       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2132         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2133         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2134         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2135     }
2136   }
2137 }
2138 
TEST(ROUNDZ__SCALAR_CVT,negative_integral)2139 TEST(ROUNDZ__SCALAR_CVT, negative_integral) {
2140   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2141   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2142   for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
2143     for (uint32_t i = 0; i < kBlockSize; i++) {
2144       inputs[i] = uint32_as_float(n + i);
2145     }
2146     xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2147     for (uint32_t i = 0; i < kBlockSize; i++) {
2148       const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2149       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2150         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2151         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2152         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2153     }
2154   }
2155 }
2156 
TEST(ROUNDZ__SCALAR_CVT,positive_infinity)2157 TEST(ROUNDZ__SCALAR_CVT, positive_infinity) {
2158   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2159   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2160   std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
2161   xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2162   const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
2163   ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2164     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2165     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2166     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2167 }
2168 
TEST(ROUNDZ__SCALAR_CVT,negative_infinity)2169 TEST(ROUNDZ__SCALAR_CVT, negative_infinity) {
2170   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2171   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2172   std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
2173   xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2174   const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
2175   ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2176     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2177     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2178     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2179 }
2180 
TEST(ROUNDZ__SCALAR_CVT,positive_qnan)2181 TEST(ROUNDZ__SCALAR_CVT, positive_qnan) {
2182   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2183   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2184   for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2185     for (uint32_t i = 0; i < kBlockSize; i++) {
2186       inputs[i] = uint32_as_float(n + i);
2187     }
2188     xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2189     for (uint32_t i = 0; i < kBlockSize; i++) {
2190       const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2191       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2192         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2193         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2194         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2195     }
2196   }
2197 }
2198 
TEST(ROUNDZ__SCALAR_CVT,negative_qnan)2199 TEST(ROUNDZ__SCALAR_CVT, negative_qnan) {
2200   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2201   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2202   for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2203     for (uint32_t i = 0; i < kBlockSize; i++) {
2204       inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
2205     }
2206     xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2207     for (uint32_t i = 0; i < kBlockSize; i++) {
2208       const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2209       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2210         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2211         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2212         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2213     }
2214   }
2215 }
2216 
TEST(ROUNDZ__SCALAR_CVT,positive_snan)2217 TEST(ROUNDZ__SCALAR_CVT, positive_snan) {
2218   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2219   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2220   for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2221     for (uint32_t i = 0; i < kBlockSize; i++) {
2222       inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2223     }
2224     xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2225     for (uint32_t i = 0; i < kBlockSize; i++) {
2226       const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2227       ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
2228         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2229         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2230         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2231     }
2232   }
2233 }
2234 
TEST(ROUNDZ__SCALAR_CVT,negative_snan)2235 TEST(ROUNDZ__SCALAR_CVT, negative_snan) {
2236   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2237   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2238   for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2239     for (uint32_t i = 0; i < kBlockSize; i++) {
2240       inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2241     }
2242     xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2243     for (uint32_t i = 0; i < kBlockSize; i++) {
2244       const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2245       ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
2246         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2247         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2248         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2249     }
2250   }
2251 }
2252 
TEST(ROUNDZ__SCALAR_CVT,DISABLED_positive_snan_to_qnan)2253 TEST(ROUNDZ__SCALAR_CVT, DISABLED_positive_snan_to_qnan) {
2254   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2255   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2256   for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2257     for (uint32_t i = 0; i < kBlockSize; i++) {
2258       inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2259     }
2260     xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2261     for (uint32_t i = 0; i < kBlockSize; i++) {
2262       const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2263       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2264         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2265         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2266         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2267     }
2268   }
2269 }
2270 
TEST(ROUNDZ__SCALAR_CVT,DISABLED_negative_snan_to_qnan)2271 TEST(ROUNDZ__SCALAR_CVT, DISABLED_negative_snan_to_qnan) {
2272   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2273   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2274   for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2275     for (uint32_t i = 0; i < kBlockSize; i++) {
2276       inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2277     }
2278     xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2279     for (uint32_t i = 0; i < kBlockSize; i++) {
2280       const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2281       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2282         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2283         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2284         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2285     }
2286   }
2287 }
2288