1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm>
7 #include <cmath>
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstdlib>
11 #include <iomanip>
12 #include <ios>
13 #include <vector>
14
15 #include <gtest/gtest.h>
16
17 #include <fp16.h>
18
19 #include <xnnpack/aligned-allocator.h>
20 #include <xnnpack/common.h>
21 #include <xnnpack/math.h>
22 #include <xnnpack/math-stubs.h>
23
24
25 constexpr int kBlockSize = 1024;
26
27 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDZ__SSE_ADDSUB,positive_normal)28 TEST(ROUNDZ__SSE_ADDSUB, positive_normal) {
29 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
30 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
31 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
32 for (uint32_t i = 0; i < kBlockSize; i++) {
33 inputs[i] = uint32_as_float(n + i);
34 }
35 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
36 for (uint32_t i = 0; i < kBlockSize; i++) {
37 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
38 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
39 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
40 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
41 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
42 }
43 }
44 }
45
TEST(ROUNDZ__SSE_ADDSUB,negative_normal)46 TEST(ROUNDZ__SSE_ADDSUB, negative_normal) {
47 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
48 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
49 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
50 for (uint32_t i = 0; i < kBlockSize; i++) {
51 inputs[i] = uint32_as_float(n + i);
52 }
53 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
54 for (uint32_t i = 0; i < kBlockSize; i++) {
55 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
56 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
57 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
58 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
59 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
60 }
61 }
62 }
63
TEST(ROUNDZ__SSE_ADDSUB,positive_integral)64 TEST(ROUNDZ__SSE_ADDSUB, positive_integral) {
65 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
66 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
67 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
68 for (uint32_t i = 0; i < kBlockSize; i++) {
69 inputs[i] = uint32_as_float(n + i);
70 }
71 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
72 for (uint32_t i = 0; i < kBlockSize; i++) {
73 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
74 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
75 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
76 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
77 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
78 }
79 }
80 }
81
TEST(ROUNDZ__SSE_ADDSUB,negative_integral)82 TEST(ROUNDZ__SSE_ADDSUB, negative_integral) {
83 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
84 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
85 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
86 for (uint32_t i = 0; i < kBlockSize; i++) {
87 inputs[i] = uint32_as_float(n + i);
88 }
89 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
90 for (uint32_t i = 0; i < kBlockSize; i++) {
91 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
92 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
93 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
94 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
95 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
96 }
97 }
98 }
99
TEST(ROUNDZ__SSE_ADDSUB,positive_infinity)100 TEST(ROUNDZ__SSE_ADDSUB, positive_infinity) {
101 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
102 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
103 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
104 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
105 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
106 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
107 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
108 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
109 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
110 }
111
TEST(ROUNDZ__SSE_ADDSUB,negative_infinity)112 TEST(ROUNDZ__SSE_ADDSUB, negative_infinity) {
113 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
114 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
115 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
116 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
117 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
118 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
119 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
120 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
121 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
122 }
123
TEST(ROUNDZ__SSE_ADDSUB,positive_qnan)124 TEST(ROUNDZ__SSE_ADDSUB, positive_qnan) {
125 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
126 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
127 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
128 for (uint32_t i = 0; i < kBlockSize; i++) {
129 inputs[i] = uint32_as_float(n + i);
130 }
131 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
132 for (uint32_t i = 0; i < kBlockSize; i++) {
133 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
134 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
135 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
136 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
137 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
138 }
139 }
140 }
141
TEST(ROUNDZ__SSE_ADDSUB,negative_qnan)142 TEST(ROUNDZ__SSE_ADDSUB, negative_qnan) {
143 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
144 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
145 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
146 for (uint32_t i = 0; i < kBlockSize; i++) {
147 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
148 }
149 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
150 for (uint32_t i = 0; i < kBlockSize; i++) {
151 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
152 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
153 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
154 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
155 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
156 }
157 }
158 }
159
TEST(ROUNDZ__SSE_ADDSUB,positive_snan)160 TEST(ROUNDZ__SSE_ADDSUB, positive_snan) {
161 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
162 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
163 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
164 for (uint32_t i = 0; i < kBlockSize; i++) {
165 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
166 }
167 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
168 for (uint32_t i = 0; i < kBlockSize; i++) {
169 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
170 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
171 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
172 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
173 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
174 }
175 }
176 }
177
TEST(ROUNDZ__SSE_ADDSUB,negative_snan)178 TEST(ROUNDZ__SSE_ADDSUB, negative_snan) {
179 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
180 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
181 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
182 for (uint32_t i = 0; i < kBlockSize; i++) {
183 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
184 }
185 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
186 for (uint32_t i = 0; i < kBlockSize; i++) {
187 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
188 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
189 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
190 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
191 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
192 }
193 }
194 }
195
TEST(ROUNDZ__SSE_ADDSUB,positive_snan_to_qnan)196 TEST(ROUNDZ__SSE_ADDSUB, positive_snan_to_qnan) {
197 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
198 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
199 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
200 for (uint32_t i = 0; i < kBlockSize; i++) {
201 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
202 }
203 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
204 for (uint32_t i = 0; i < kBlockSize; i++) {
205 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
206 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
207 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
208 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
209 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
210 }
211 }
212 }
213
TEST(ROUNDZ__SSE_ADDSUB,negative_snan_to_qnan)214 TEST(ROUNDZ__SSE_ADDSUB, negative_snan_to_qnan) {
215 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
216 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
217 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
218 for (uint32_t i = 0; i < kBlockSize; i++) {
219 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
220 }
221 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
222 for (uint32_t i = 0; i < kBlockSize; i++) {
223 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
224 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
225 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
226 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
227 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
228 }
229 }
230 }
231 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
232
233 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDZ__SSE2_CVT,positive_normal)234 TEST(ROUNDZ__SSE2_CVT, positive_normal) {
235 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
236 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
237 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
238 for (uint32_t i = 0; i < kBlockSize; i++) {
239 inputs[i] = uint32_as_float(n + i);
240 }
241 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
242 for (uint32_t i = 0; i < kBlockSize; i++) {
243 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
244 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
245 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
246 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
247 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
248 }
249 }
250 }
251
TEST(ROUNDZ__SSE2_CVT,negative_normal)252 TEST(ROUNDZ__SSE2_CVT, negative_normal) {
253 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
254 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
255 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
256 for (uint32_t i = 0; i < kBlockSize; i++) {
257 inputs[i] = uint32_as_float(n + i);
258 }
259 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
260 for (uint32_t i = 0; i < kBlockSize; i++) {
261 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
262 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
263 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
264 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
265 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
266 }
267 }
268 }
269
TEST(ROUNDZ__SSE2_CVT,positive_integral)270 TEST(ROUNDZ__SSE2_CVT, positive_integral) {
271 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
272 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
273 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
274 for (uint32_t i = 0; i < kBlockSize; i++) {
275 inputs[i] = uint32_as_float(n + i);
276 }
277 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
278 for (uint32_t i = 0; i < kBlockSize; i++) {
279 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
280 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
281 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
282 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
283 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
284 }
285 }
286 }
287
TEST(ROUNDZ__SSE2_CVT,negative_integral)288 TEST(ROUNDZ__SSE2_CVT, negative_integral) {
289 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
290 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
291 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
292 for (uint32_t i = 0; i < kBlockSize; i++) {
293 inputs[i] = uint32_as_float(n + i);
294 }
295 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
296 for (uint32_t i = 0; i < kBlockSize; i++) {
297 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
298 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
299 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
300 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
301 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
302 }
303 }
304 }
305
TEST(ROUNDZ__SSE2_CVT,positive_infinity)306 TEST(ROUNDZ__SSE2_CVT, positive_infinity) {
307 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
308 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
309 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
310 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
311 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
312 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
313 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
314 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
315 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
316 }
317
TEST(ROUNDZ__SSE2_CVT,negative_infinity)318 TEST(ROUNDZ__SSE2_CVT, negative_infinity) {
319 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
320 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
321 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
322 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
323 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
324 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
325 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
326 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
327 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
328 }
329
TEST(ROUNDZ__SSE2_CVT,positive_qnan)330 TEST(ROUNDZ__SSE2_CVT, positive_qnan) {
331 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
332 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
333 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
334 for (uint32_t i = 0; i < kBlockSize; i++) {
335 inputs[i] = uint32_as_float(n + i);
336 }
337 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
338 for (uint32_t i = 0; i < kBlockSize; i++) {
339 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
340 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
341 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
342 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
343 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
344 }
345 }
346 }
347
TEST(ROUNDZ__SSE2_CVT,negative_qnan)348 TEST(ROUNDZ__SSE2_CVT, negative_qnan) {
349 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
350 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
351 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
352 for (uint32_t i = 0; i < kBlockSize; i++) {
353 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
354 }
355 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
356 for (uint32_t i = 0; i < kBlockSize; i++) {
357 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
358 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
359 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
360 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
361 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
362 }
363 }
364 }
365
TEST(ROUNDZ__SSE2_CVT,positive_snan)366 TEST(ROUNDZ__SSE2_CVT, positive_snan) {
367 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
368 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
369 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
370 for (uint32_t i = 0; i < kBlockSize; i++) {
371 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
372 }
373 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
374 for (uint32_t i = 0; i < kBlockSize; i++) {
375 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
376 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
377 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
378 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
379 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
380 }
381 }
382 }
383
TEST(ROUNDZ__SSE2_CVT,negative_snan)384 TEST(ROUNDZ__SSE2_CVT, negative_snan) {
385 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
386 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
387 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
388 for (uint32_t i = 0; i < kBlockSize; i++) {
389 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
390 }
391 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
392 for (uint32_t i = 0; i < kBlockSize; i++) {
393 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
394 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
395 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
396 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
397 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
398 }
399 }
400 }
401
TEST(ROUNDZ__SSE2_CVT,DISABLED_positive_snan_to_qnan)402 TEST(ROUNDZ__SSE2_CVT, DISABLED_positive_snan_to_qnan) {
403 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
404 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
405 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
406 for (uint32_t i = 0; i < kBlockSize; i++) {
407 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
408 }
409 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
410 for (uint32_t i = 0; i < kBlockSize; i++) {
411 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
412 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
413 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
414 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
415 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
416 }
417 }
418 }
419
TEST(ROUNDZ__SSE2_CVT,DISABLED_negative_snan_to_qnan)420 TEST(ROUNDZ__SSE2_CVT, DISABLED_negative_snan_to_qnan) {
421 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
422 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
423 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
424 for (uint32_t i = 0; i < kBlockSize; i++) {
425 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
426 }
427 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
428 for (uint32_t i = 0; i < kBlockSize; i++) {
429 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
430 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
431 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
432 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
433 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
434 }
435 }
436 }
437 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
438
439 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDZ__SSE41,positive_normal)440 TEST(ROUNDZ__SSE41, positive_normal) {
441 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
442 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
443 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
444 for (uint32_t i = 0; i < kBlockSize; i++) {
445 inputs[i] = uint32_as_float(n + i);
446 }
447 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
448 for (uint32_t i = 0; i < kBlockSize; i++) {
449 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
450 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
451 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
452 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
453 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
454 }
455 }
456 }
457
TEST(ROUNDZ__SSE41,negative_normal)458 TEST(ROUNDZ__SSE41, negative_normal) {
459 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
460 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
461 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
462 for (uint32_t i = 0; i < kBlockSize; i++) {
463 inputs[i] = uint32_as_float(n + i);
464 }
465 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
466 for (uint32_t i = 0; i < kBlockSize; i++) {
467 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
468 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
469 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
470 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
471 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
472 }
473 }
474 }
475
TEST(ROUNDZ__SSE41,positive_integral)476 TEST(ROUNDZ__SSE41, positive_integral) {
477 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
478 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
479 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
480 for (uint32_t i = 0; i < kBlockSize; i++) {
481 inputs[i] = uint32_as_float(n + i);
482 }
483 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
484 for (uint32_t i = 0; i < kBlockSize; i++) {
485 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
486 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
487 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
488 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
489 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
490 }
491 }
492 }
493
TEST(ROUNDZ__SSE41,negative_integral)494 TEST(ROUNDZ__SSE41, negative_integral) {
495 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
496 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
497 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
498 for (uint32_t i = 0; i < kBlockSize; i++) {
499 inputs[i] = uint32_as_float(n + i);
500 }
501 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
502 for (uint32_t i = 0; i < kBlockSize; i++) {
503 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
504 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
505 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
506 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
507 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
508 }
509 }
510 }
511
TEST(ROUNDZ__SSE41,positive_infinity)512 TEST(ROUNDZ__SSE41, positive_infinity) {
513 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
514 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
515 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
516 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
517 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
518 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
519 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
520 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
521 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
522 }
523
TEST(ROUNDZ__SSE41,negative_infinity)524 TEST(ROUNDZ__SSE41, negative_infinity) {
525 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
526 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
527 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
528 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
529 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
530 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
531 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
532 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
533 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
534 }
535
TEST(ROUNDZ__SSE41,positive_qnan)536 TEST(ROUNDZ__SSE41, positive_qnan) {
537 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
538 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
539 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
540 for (uint32_t i = 0; i < kBlockSize; i++) {
541 inputs[i] = uint32_as_float(n + i);
542 }
543 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
544 for (uint32_t i = 0; i < kBlockSize; i++) {
545 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
546 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
547 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
548 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
549 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
550 }
551 }
552 }
553
TEST(ROUNDZ__SSE41,negative_qnan)554 TEST(ROUNDZ__SSE41, negative_qnan) {
555 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
556 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
557 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
558 for (uint32_t i = 0; i < kBlockSize; i++) {
559 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
560 }
561 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
562 for (uint32_t i = 0; i < kBlockSize; i++) {
563 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
564 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
565 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
566 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
567 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
568 }
569 }
570 }
571
TEST(ROUNDZ__SSE41,positive_snan)572 TEST(ROUNDZ__SSE41, positive_snan) {
573 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
574 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
575 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
576 for (uint32_t i = 0; i < kBlockSize; i++) {
577 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
578 }
579 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
580 for (uint32_t i = 0; i < kBlockSize; i++) {
581 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
582 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
583 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
584 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
585 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
586 }
587 }
588 }
589
TEST(ROUNDZ__SSE41,negative_snan)590 TEST(ROUNDZ__SSE41, negative_snan) {
591 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
592 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
593 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
594 for (uint32_t i = 0; i < kBlockSize; i++) {
595 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
596 }
597 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
598 for (uint32_t i = 0; i < kBlockSize; i++) {
599 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
600 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
601 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
602 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
603 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
604 }
605 }
606 }
607
TEST(ROUNDZ__SSE41,positive_snan_to_qnan)608 TEST(ROUNDZ__SSE41, positive_snan_to_qnan) {
609 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
610 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
611 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
612 for (uint32_t i = 0; i < kBlockSize; i++) {
613 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
614 }
615 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
616 for (uint32_t i = 0; i < kBlockSize; i++) {
617 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
618 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
619 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
620 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
621 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
622 }
623 }
624 }
625
TEST(ROUNDZ__SSE41,negative_snan_to_qnan)626 TEST(ROUNDZ__SSE41, negative_snan_to_qnan) {
627 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
628 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
629 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
630 for (uint32_t i = 0; i < kBlockSize; i++) {
631 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
632 }
633 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
634 for (uint32_t i = 0; i < kBlockSize; i++) {
635 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
636 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
637 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
638 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
639 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
640 }
641 }
642 }
643 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
644
645 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(ROUNDZ__NEON_ADDSUB,positive_normal)646 TEST(ROUNDZ__NEON_ADDSUB, positive_normal) {
647 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
648 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
649 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
650 for (uint32_t i = 0; i < kBlockSize; i++) {
651 inputs[i] = uint32_as_float(n + i);
652 }
653 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
654 for (uint32_t i = 0; i < kBlockSize; i++) {
655 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
656 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
657 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
658 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
659 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
660 }
661 }
662 }
663
TEST(ROUNDZ__NEON_ADDSUB,negative_normal)664 TEST(ROUNDZ__NEON_ADDSUB, negative_normal) {
665 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
666 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
667 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
668 for (uint32_t i = 0; i < kBlockSize; i++) {
669 inputs[i] = uint32_as_float(n + i);
670 }
671 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
672 for (uint32_t i = 0; i < kBlockSize; i++) {
673 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
674 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
675 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
676 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
677 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
678 }
679 }
680 }
681
TEST(ROUNDZ__NEON_ADDSUB,positive_integral)682 TEST(ROUNDZ__NEON_ADDSUB, positive_integral) {
683 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
684 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
685 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
686 for (uint32_t i = 0; i < kBlockSize; i++) {
687 inputs[i] = uint32_as_float(n + i);
688 }
689 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
690 for (uint32_t i = 0; i < kBlockSize; i++) {
691 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
692 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
693 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
694 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
695 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
696 }
697 }
698 }
699
TEST(ROUNDZ__NEON_ADDSUB,negative_integral)700 TEST(ROUNDZ__NEON_ADDSUB, negative_integral) {
701 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
702 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
703 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
704 for (uint32_t i = 0; i < kBlockSize; i++) {
705 inputs[i] = uint32_as_float(n + i);
706 }
707 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
708 for (uint32_t i = 0; i < kBlockSize; i++) {
709 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
710 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
711 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
712 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
713 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
714 }
715 }
716 }
717
TEST(ROUNDZ__NEON_ADDSUB,positive_infinity)718 TEST(ROUNDZ__NEON_ADDSUB, positive_infinity) {
719 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
720 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
721 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
722 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
723 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
724 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
725 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
726 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
727 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
728 }
729
TEST(ROUNDZ__NEON_ADDSUB,negative_infinity)730 TEST(ROUNDZ__NEON_ADDSUB, negative_infinity) {
731 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
732 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
733 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
734 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
735 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
736 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
737 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
738 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
739 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
740 }
741
TEST(ROUNDZ__NEON_ADDSUB,positive_qnan)742 TEST(ROUNDZ__NEON_ADDSUB, positive_qnan) {
743 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
744 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
745 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
746 for (uint32_t i = 0; i < kBlockSize; i++) {
747 inputs[i] = uint32_as_float(n + i);
748 }
749 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
750 for (uint32_t i = 0; i < kBlockSize; i++) {
751 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
752 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
753 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
754 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
755 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
756 }
757 }
758 }
759
TEST(ROUNDZ__NEON_ADDSUB,negative_qnan)760 TEST(ROUNDZ__NEON_ADDSUB, negative_qnan) {
761 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
762 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
763 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
764 for (uint32_t i = 0; i < kBlockSize; i++) {
765 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
766 }
767 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
768 for (uint32_t i = 0; i < kBlockSize; i++) {
769 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
770 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
771 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
772 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
773 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
774 }
775 }
776 }
777
TEST(ROUNDZ__NEON_ADDSUB,positive_snan)778 TEST(ROUNDZ__NEON_ADDSUB, positive_snan) {
779 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
780 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
781 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
782 for (uint32_t i = 0; i < kBlockSize; i++) {
783 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
784 }
785 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
786 for (uint32_t i = 0; i < kBlockSize; i++) {
787 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
788 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
789 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
790 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
791 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
792 }
793 }
794 }
795
TEST(ROUNDZ__NEON_ADDSUB,negative_snan)796 TEST(ROUNDZ__NEON_ADDSUB, negative_snan) {
797 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
798 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
799 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
800 for (uint32_t i = 0; i < kBlockSize; i++) {
801 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
802 }
803 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
804 for (uint32_t i = 0; i < kBlockSize; i++) {
805 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
806 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
807 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
808 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
809 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
810 }
811 }
812 }
813
TEST(ROUNDZ__NEON_ADDSUB,positive_snan_to_qnan)814 TEST(ROUNDZ__NEON_ADDSUB, positive_snan_to_qnan) {
815 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
816 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
817 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
818 for (uint32_t i = 0; i < kBlockSize; i++) {
819 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
820 }
821 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
822 for (uint32_t i = 0; i < kBlockSize; i++) {
823 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
824 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
825 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
826 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
827 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
828 }
829 }
830 }
831
TEST(ROUNDZ__NEON_ADDSUB,negative_snan_to_qnan)832 TEST(ROUNDZ__NEON_ADDSUB, negative_snan_to_qnan) {
833 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
834 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
835 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
836 for (uint32_t i = 0; i < kBlockSize; i++) {
837 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
838 }
839 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
840 for (uint32_t i = 0; i < kBlockSize; i++) {
841 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
842 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
843 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
844 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
845 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
846 }
847 }
848 }
849 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
850
851 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(ROUNDZ__NEON_CVT,positive_normal)852 TEST(ROUNDZ__NEON_CVT, positive_normal) {
853 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
854 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
855 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
856 for (uint32_t i = 0; i < kBlockSize; i++) {
857 inputs[i] = uint32_as_float(n + i);
858 }
859 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
860 for (uint32_t i = 0; i < kBlockSize; i++) {
861 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
862 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
863 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
864 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
865 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
866 }
867 }
868 }
869
TEST(ROUNDZ__NEON_CVT,negative_normal)870 TEST(ROUNDZ__NEON_CVT, negative_normal) {
871 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
872 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
873 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
874 for (uint32_t i = 0; i < kBlockSize; i++) {
875 inputs[i] = uint32_as_float(n + i);
876 }
877 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
878 for (uint32_t i = 0; i < kBlockSize; i++) {
879 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
880 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
881 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
882 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
883 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
884 }
885 }
886 }
887
TEST(ROUNDZ__NEON_CVT,positive_integral)888 TEST(ROUNDZ__NEON_CVT, positive_integral) {
889 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
890 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
891 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
892 for (uint32_t i = 0; i < kBlockSize; i++) {
893 inputs[i] = uint32_as_float(n + i);
894 }
895 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
896 for (uint32_t i = 0; i < kBlockSize; i++) {
897 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
898 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
899 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
900 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
901 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
902 }
903 }
904 }
905
TEST(ROUNDZ__NEON_CVT,negative_integral)906 TEST(ROUNDZ__NEON_CVT, negative_integral) {
907 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
908 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
909 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
910 for (uint32_t i = 0; i < kBlockSize; i++) {
911 inputs[i] = uint32_as_float(n + i);
912 }
913 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
914 for (uint32_t i = 0; i < kBlockSize; i++) {
915 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
916 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
917 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
918 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
919 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
920 }
921 }
922 }
923
TEST(ROUNDZ__NEON_CVT,positive_infinity)924 TEST(ROUNDZ__NEON_CVT, positive_infinity) {
925 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
926 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
927 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
928 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
929 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
930 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
931 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
932 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
933 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
934 }
935
TEST(ROUNDZ__NEON_CVT,negative_infinity)936 TEST(ROUNDZ__NEON_CVT, negative_infinity) {
937 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
938 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
939 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
940 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
941 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
942 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
943 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
944 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
945 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
946 }
947
TEST(ROUNDZ__NEON_CVT,positive_qnan)948 TEST(ROUNDZ__NEON_CVT, positive_qnan) {
949 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
950 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
951 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
952 for (uint32_t i = 0; i < kBlockSize; i++) {
953 inputs[i] = uint32_as_float(n + i);
954 }
955 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
956 for (uint32_t i = 0; i < kBlockSize; i++) {
957 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
958 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
959 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
960 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
961 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
962 }
963 }
964 }
965
TEST(ROUNDZ__NEON_CVT,negative_qnan)966 TEST(ROUNDZ__NEON_CVT, negative_qnan) {
967 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
968 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
969 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
970 for (uint32_t i = 0; i < kBlockSize; i++) {
971 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
972 }
973 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
974 for (uint32_t i = 0; i < kBlockSize; i++) {
975 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
976 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
977 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
978 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
979 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
980 }
981 }
982 }
983
TEST(ROUNDZ__NEON_CVT,positive_snan)984 TEST(ROUNDZ__NEON_CVT, positive_snan) {
985 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
986 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
987 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
988 for (uint32_t i = 0; i < kBlockSize; i++) {
989 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
990 }
991 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
992 for (uint32_t i = 0; i < kBlockSize; i++) {
993 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
994 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
995 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
996 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
997 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
998 }
999 }
1000 }
1001
TEST(ROUNDZ__NEON_CVT,negative_snan)1002 TEST(ROUNDZ__NEON_CVT, negative_snan) {
1003 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1004 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1005 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1006 for (uint32_t i = 0; i < kBlockSize; i++) {
1007 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1008 }
1009 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1010 for (uint32_t i = 0; i < kBlockSize; i++) {
1011 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1012 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1013 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1014 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1015 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1016 }
1017 }
1018 }
1019
TEST(ROUNDZ__NEON_CVT,DISABLED_positive_snan_to_qnan)1020 TEST(ROUNDZ__NEON_CVT, DISABLED_positive_snan_to_qnan) {
1021 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1022 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1023 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1024 for (uint32_t i = 0; i < kBlockSize; i++) {
1025 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1026 }
1027 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1028 for (uint32_t i = 0; i < kBlockSize; i++) {
1029 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1030 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1031 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1032 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1033 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1034 }
1035 }
1036 }
1037
TEST(ROUNDZ__NEON_CVT,DISABLED_negative_snan_to_qnan)1038 TEST(ROUNDZ__NEON_CVT, DISABLED_negative_snan_to_qnan) {
1039 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1040 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1041 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1042 for (uint32_t i = 0; i < kBlockSize; i++) {
1043 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1044 }
1045 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1046 for (uint32_t i = 0; i < kBlockSize; i++) {
1047 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1048 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1049 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1050 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1051 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1052 }
1053 }
1054 }
1055 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1056
1057 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(ROUNDZ__NEONV8,positive_normal)1058 TEST(ROUNDZ__NEONV8, positive_normal) {
1059 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1060 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1061 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1062 for (uint32_t i = 0; i < kBlockSize; i++) {
1063 inputs[i] = uint32_as_float(n + i);
1064 }
1065 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1066 for (uint32_t i = 0; i < kBlockSize; i++) {
1067 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1068 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1069 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1070 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1071 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1072 }
1073 }
1074 }
1075
TEST(ROUNDZ__NEONV8,negative_normal)1076 TEST(ROUNDZ__NEONV8, negative_normal) {
1077 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1078 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1079 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1080 for (uint32_t i = 0; i < kBlockSize; i++) {
1081 inputs[i] = uint32_as_float(n + i);
1082 }
1083 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1084 for (uint32_t i = 0; i < kBlockSize; i++) {
1085 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1086 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1087 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1088 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1089 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1090 }
1091 }
1092 }
1093
TEST(ROUNDZ__NEONV8,positive_integral)1094 TEST(ROUNDZ__NEONV8, positive_integral) {
1095 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1096 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1097 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1098 for (uint32_t i = 0; i < kBlockSize; i++) {
1099 inputs[i] = uint32_as_float(n + i);
1100 }
1101 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1102 for (uint32_t i = 0; i < kBlockSize; i++) {
1103 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1104 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1105 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1106 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1107 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1108 }
1109 }
1110 }
1111
TEST(ROUNDZ__NEONV8,negative_integral)1112 TEST(ROUNDZ__NEONV8, negative_integral) {
1113 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1114 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1115 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1116 for (uint32_t i = 0; i < kBlockSize; i++) {
1117 inputs[i] = uint32_as_float(n + i);
1118 }
1119 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1120 for (uint32_t i = 0; i < kBlockSize; i++) {
1121 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1122 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1123 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1124 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1125 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1126 }
1127 }
1128 }
1129
TEST(ROUNDZ__NEONV8,positive_infinity)1130 TEST(ROUNDZ__NEONV8, positive_infinity) {
1131 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1132 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1133 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1134 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1135 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
1136 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1137 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1138 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1139 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1140 }
1141
TEST(ROUNDZ__NEONV8,negative_infinity)1142 TEST(ROUNDZ__NEONV8, negative_infinity) {
1143 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1144 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1145 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1146 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1147 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
1148 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1149 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1150 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1151 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1152 }
1153
TEST(ROUNDZ__NEONV8,positive_qnan)1154 TEST(ROUNDZ__NEONV8, positive_qnan) {
1155 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1156 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1157 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1158 for (uint32_t i = 0; i < kBlockSize; i++) {
1159 inputs[i] = uint32_as_float(n + i);
1160 }
1161 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1162 for (uint32_t i = 0; i < kBlockSize; i++) {
1163 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1164 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1165 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1166 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1167 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1168 }
1169 }
1170 }
1171
TEST(ROUNDZ__NEONV8,negative_qnan)1172 TEST(ROUNDZ__NEONV8, negative_qnan) {
1173 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1174 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1175 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1176 for (uint32_t i = 0; i < kBlockSize; i++) {
1177 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
1178 }
1179 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1180 for (uint32_t i = 0; i < kBlockSize; i++) {
1181 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1182 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1183 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1184 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1185 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1186 }
1187 }
1188 }
1189
TEST(ROUNDZ__NEONV8,positive_snan)1190 TEST(ROUNDZ__NEONV8, positive_snan) {
1191 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1192 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1193 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1194 for (uint32_t i = 0; i < kBlockSize; i++) {
1195 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1196 }
1197 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1198 for (uint32_t i = 0; i < kBlockSize; i++) {
1199 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1200 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1201 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1202 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1203 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1204 }
1205 }
1206 }
1207
TEST(ROUNDZ__NEONV8,negative_snan)1208 TEST(ROUNDZ__NEONV8, negative_snan) {
1209 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1210 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1211 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1212 for (uint32_t i = 0; i < kBlockSize; i++) {
1213 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1214 }
1215 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1216 for (uint32_t i = 0; i < kBlockSize; i++) {
1217 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1218 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1219 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1220 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1221 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1222 }
1223 }
1224 }
1225
TEST(ROUNDZ__NEONV8,positive_snan_to_qnan)1226 TEST(ROUNDZ__NEONV8, positive_snan_to_qnan) {
1227 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1228 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1229 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1230 for (uint32_t i = 0; i < kBlockSize; i++) {
1231 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1232 }
1233 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1234 for (uint32_t i = 0; i < kBlockSize; i++) {
1235 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1236 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1237 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1238 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1239 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1240 }
1241 }
1242 }
1243
TEST(ROUNDZ__NEONV8,negative_snan_to_qnan)1244 TEST(ROUNDZ__NEONV8, negative_snan_to_qnan) {
1245 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1246 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1247 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1248 for (uint32_t i = 0; i < kBlockSize; i++) {
1249 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1250 }
1251 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1252 for (uint32_t i = 0; i < kBlockSize; i++) {
1253 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1254 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1255 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1256 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1257 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1258 }
1259 }
1260 }
1261 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1262
1263 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(ROUNDZ__WASMSIMD_ADDSUB,positive_normal)1264 TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_normal) {
1265 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1266 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1267 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1268 for (uint32_t i = 0; i < kBlockSize; i++) {
1269 inputs[i] = uint32_as_float(n + i);
1270 }
1271 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1272 for (uint32_t i = 0; i < kBlockSize; i++) {
1273 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1274 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1275 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1276 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1277 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1278 }
1279 }
1280 }
1281
TEST(ROUNDZ__WASMSIMD_ADDSUB,negative_normal)1282 TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_normal) {
1283 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1284 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1285 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1286 for (uint32_t i = 0; i < kBlockSize; i++) {
1287 inputs[i] = uint32_as_float(n + i);
1288 }
1289 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1290 for (uint32_t i = 0; i < kBlockSize; i++) {
1291 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1292 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1293 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1294 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1295 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1296 }
1297 }
1298 }
1299
TEST(ROUNDZ__WASMSIMD_ADDSUB,positive_integral)1300 TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_integral) {
1301 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1302 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1303 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1304 for (uint32_t i = 0; i < kBlockSize; i++) {
1305 inputs[i] = uint32_as_float(n + i);
1306 }
1307 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1308 for (uint32_t i = 0; i < kBlockSize; i++) {
1309 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1310 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1311 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1312 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1313 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1314 }
1315 }
1316 }
1317
TEST(ROUNDZ__WASMSIMD_ADDSUB,negative_integral)1318 TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_integral) {
1319 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1320 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1321 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1322 for (uint32_t i = 0; i < kBlockSize; i++) {
1323 inputs[i] = uint32_as_float(n + i);
1324 }
1325 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1326 for (uint32_t i = 0; i < kBlockSize; i++) {
1327 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1328 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1329 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1330 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1331 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1332 }
1333 }
1334 }
1335
TEST(ROUNDZ__WASMSIMD_ADDSUB,positive_infinity)1336 TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_infinity) {
1337 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1338 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1339 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1340 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1341 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
1342 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1343 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1344 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1345 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1346 }
1347
TEST(ROUNDZ__WASMSIMD_ADDSUB,negative_infinity)1348 TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_infinity) {
1349 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1350 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1351 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1352 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1353 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
1354 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1355 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1356 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1357 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1358 }
1359
TEST(ROUNDZ__WASMSIMD_ADDSUB,positive_qnan)1360 TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_qnan) {
1361 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1362 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1363 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1364 for (uint32_t i = 0; i < kBlockSize; i++) {
1365 inputs[i] = uint32_as_float(n + i);
1366 }
1367 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1368 for (uint32_t i = 0; i < kBlockSize; i++) {
1369 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1370 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1371 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1372 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1373 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1374 }
1375 }
1376 }
1377
TEST(ROUNDZ__WASMSIMD_ADDSUB,negative_qnan)1378 TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_qnan) {
1379 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1380 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1381 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1382 for (uint32_t i = 0; i < kBlockSize; i++) {
1383 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
1384 }
1385 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1386 for (uint32_t i = 0; i < kBlockSize; i++) {
1387 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1388 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1389 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1390 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1391 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1392 }
1393 }
1394 }
1395
TEST(ROUNDZ__WASMSIMD_ADDSUB,positive_snan)1396 TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_snan) {
1397 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1398 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1399 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1400 for (uint32_t i = 0; i < kBlockSize; i++) {
1401 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1402 }
1403 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1404 for (uint32_t i = 0; i < kBlockSize; i++) {
1405 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1406 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1407 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1408 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1409 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1410 }
1411 }
1412 }
1413
TEST(ROUNDZ__WASMSIMD_ADDSUB,negative_snan)1414 TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_snan) {
1415 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1416 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1417 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1418 for (uint32_t i = 0; i < kBlockSize; i++) {
1419 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1420 }
1421 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1422 for (uint32_t i = 0; i < kBlockSize; i++) {
1423 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1424 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1425 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1426 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1427 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1428 }
1429 }
1430 }
1431
TEST(ROUNDZ__WASMSIMD_ADDSUB,positive_snan_to_qnan)1432 TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_snan_to_qnan) {
1433 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1434 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1435 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1436 for (uint32_t i = 0; i < kBlockSize; i++) {
1437 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1438 }
1439 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1440 for (uint32_t i = 0; i < kBlockSize; i++) {
1441 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1442 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1443 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1444 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1445 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1446 }
1447 }
1448 }
1449
TEST(ROUNDZ__WASMSIMD_ADDSUB,negative_snan_to_qnan)1450 TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_snan_to_qnan) {
1451 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1452 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1453 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1454 for (uint32_t i = 0; i < kBlockSize; i++) {
1455 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1456 }
1457 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1458 for (uint32_t i = 0; i < kBlockSize; i++) {
1459 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1460 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1461 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1462 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1463 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1464 }
1465 }
1466 }
1467 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1468
1469 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(ROUNDZ__WASMSIMD_CVT,positive_normal)1470 TEST(ROUNDZ__WASMSIMD_CVT, positive_normal) {
1471 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1472 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1473 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1474 for (uint32_t i = 0; i < kBlockSize; i++) {
1475 inputs[i] = uint32_as_float(n + i);
1476 }
1477 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1478 for (uint32_t i = 0; i < kBlockSize; i++) {
1479 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1480 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1481 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1482 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1483 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1484 }
1485 }
1486 }
1487
TEST(ROUNDZ__WASMSIMD_CVT,negative_normal)1488 TEST(ROUNDZ__WASMSIMD_CVT, negative_normal) {
1489 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1490 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1491 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1492 for (uint32_t i = 0; i < kBlockSize; i++) {
1493 inputs[i] = uint32_as_float(n + i);
1494 }
1495 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1496 for (uint32_t i = 0; i < kBlockSize; i++) {
1497 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1498 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1499 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1500 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1501 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1502 }
1503 }
1504 }
1505
TEST(ROUNDZ__WASMSIMD_CVT,positive_integral)1506 TEST(ROUNDZ__WASMSIMD_CVT, positive_integral) {
1507 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1508 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1509 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1510 for (uint32_t i = 0; i < kBlockSize; i++) {
1511 inputs[i] = uint32_as_float(n + i);
1512 }
1513 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1514 for (uint32_t i = 0; i < kBlockSize; i++) {
1515 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1516 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1517 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1518 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1519 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1520 }
1521 }
1522 }
1523
TEST(ROUNDZ__WASMSIMD_CVT,negative_integral)1524 TEST(ROUNDZ__WASMSIMD_CVT, negative_integral) {
1525 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1526 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1527 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1528 for (uint32_t i = 0; i < kBlockSize; i++) {
1529 inputs[i] = uint32_as_float(n + i);
1530 }
1531 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1532 for (uint32_t i = 0; i < kBlockSize; i++) {
1533 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1534 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1535 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1536 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1537 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1538 }
1539 }
1540 }
1541
TEST(ROUNDZ__WASMSIMD_CVT,positive_infinity)1542 TEST(ROUNDZ__WASMSIMD_CVT, positive_infinity) {
1543 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1544 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1545 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1546 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1547 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
1548 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1549 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1550 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1551 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1552 }
1553
TEST(ROUNDZ__WASMSIMD_CVT,negative_infinity)1554 TEST(ROUNDZ__WASMSIMD_CVT, negative_infinity) {
1555 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1556 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1557 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1558 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1559 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
1560 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1561 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1562 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1563 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1564 }
1565
TEST(ROUNDZ__WASMSIMD_CVT,positive_qnan)1566 TEST(ROUNDZ__WASMSIMD_CVT, positive_qnan) {
1567 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1568 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1569 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1570 for (uint32_t i = 0; i < kBlockSize; i++) {
1571 inputs[i] = uint32_as_float(n + i);
1572 }
1573 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1574 for (uint32_t i = 0; i < kBlockSize; i++) {
1575 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1576 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1577 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1578 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1579 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1580 }
1581 }
1582 }
1583
TEST(ROUNDZ__WASMSIMD_CVT,negative_qnan)1584 TEST(ROUNDZ__WASMSIMD_CVT, negative_qnan) {
1585 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1586 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1587 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1588 for (uint32_t i = 0; i < kBlockSize; i++) {
1589 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
1590 }
1591 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1592 for (uint32_t i = 0; i < kBlockSize; i++) {
1593 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1594 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1595 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1596 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1597 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1598 }
1599 }
1600 }
1601
TEST(ROUNDZ__WASMSIMD_CVT,positive_snan)1602 TEST(ROUNDZ__WASMSIMD_CVT, positive_snan) {
1603 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1604 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1605 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1606 for (uint32_t i = 0; i < kBlockSize; i++) {
1607 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1608 }
1609 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1610 for (uint32_t i = 0; i < kBlockSize; i++) {
1611 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1612 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1613 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1614 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1615 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1616 }
1617 }
1618 }
1619
TEST(ROUNDZ__WASMSIMD_CVT,negative_snan)1620 TEST(ROUNDZ__WASMSIMD_CVT, negative_snan) {
1621 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1622 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1623 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1624 for (uint32_t i = 0; i < kBlockSize; i++) {
1625 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1626 }
1627 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1628 for (uint32_t i = 0; i < kBlockSize; i++) {
1629 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1630 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1631 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1632 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1633 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1634 }
1635 }
1636 }
1637
TEST(ROUNDZ__WASMSIMD_CVT,DISABLED_positive_snan_to_qnan)1638 TEST(ROUNDZ__WASMSIMD_CVT, DISABLED_positive_snan_to_qnan) {
1639 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1640 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1641 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1642 for (uint32_t i = 0; i < kBlockSize; i++) {
1643 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1644 }
1645 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1646 for (uint32_t i = 0; i < kBlockSize; i++) {
1647 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1648 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1649 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1650 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1651 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1652 }
1653 }
1654 }
1655
TEST(ROUNDZ__WASMSIMD_CVT,DISABLED_negative_snan_to_qnan)1656 TEST(ROUNDZ__WASMSIMD_CVT, DISABLED_negative_snan_to_qnan) {
1657 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1658 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1659 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1660 for (uint32_t i = 0; i < kBlockSize; i++) {
1661 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1662 }
1663 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1664 for (uint32_t i = 0; i < kBlockSize; i++) {
1665 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1666 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1667 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1668 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1669 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1670 }
1671 }
1672 }
1673 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1674
1675 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(ROUNDZ__WASMSIMD_NATIVE,positive_normal)1676 TEST(ROUNDZ__WASMSIMD_NATIVE, positive_normal) {
1677 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1678 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1679 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1680 for (uint32_t i = 0; i < kBlockSize; i++) {
1681 inputs[i] = uint32_as_float(n + i);
1682 }
1683 xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1684 for (uint32_t i = 0; i < kBlockSize; i++) {
1685 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1686 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1687 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1688 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1689 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1690 }
1691 }
1692 }
1693
TEST(ROUNDZ__WASMSIMD_NATIVE,negative_normal)1694 TEST(ROUNDZ__WASMSIMD_NATIVE, negative_normal) {
1695 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1696 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1697 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1698 for (uint32_t i = 0; i < kBlockSize; i++) {
1699 inputs[i] = uint32_as_float(n + i);
1700 }
1701 xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1702 for (uint32_t i = 0; i < kBlockSize; i++) {
1703 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1704 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1705 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1706 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1707 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1708 }
1709 }
1710 }
1711
TEST(ROUNDZ__WASMSIMD_NATIVE,positive_integral)1712 TEST(ROUNDZ__WASMSIMD_NATIVE, positive_integral) {
1713 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1714 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1715 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1716 for (uint32_t i = 0; i < kBlockSize; i++) {
1717 inputs[i] = uint32_as_float(n + i);
1718 }
1719 xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1720 for (uint32_t i = 0; i < kBlockSize; i++) {
1721 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1722 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1723 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1724 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1725 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1726 }
1727 }
1728 }
1729
TEST(ROUNDZ__WASMSIMD_NATIVE,negative_integral)1730 TEST(ROUNDZ__WASMSIMD_NATIVE, negative_integral) {
1731 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1732 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1733 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1734 for (uint32_t i = 0; i < kBlockSize; i++) {
1735 inputs[i] = uint32_as_float(n + i);
1736 }
1737 xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1738 for (uint32_t i = 0; i < kBlockSize; i++) {
1739 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1740 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1741 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1742 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1743 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1744 }
1745 }
1746 }
1747
TEST(ROUNDZ__WASMSIMD_NATIVE,positive_infinity)1748 TEST(ROUNDZ__WASMSIMD_NATIVE, positive_infinity) {
1749 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1750 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1751 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1752 xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1753 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
1754 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1755 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1756 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1757 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1758 }
1759
TEST(ROUNDZ__WASMSIMD_NATIVE,negative_infinity)1760 TEST(ROUNDZ__WASMSIMD_NATIVE, negative_infinity) {
1761 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1762 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1763 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1764 xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1765 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
1766 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1767 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1768 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1769 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1770 }
1771
TEST(ROUNDZ__WASMSIMD_NATIVE,positive_qnan)1772 TEST(ROUNDZ__WASMSIMD_NATIVE, positive_qnan) {
1773 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1774 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1775 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1776 for (uint32_t i = 0; i < kBlockSize; i++) {
1777 inputs[i] = uint32_as_float(n + i);
1778 }
1779 xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1780 for (uint32_t i = 0; i < kBlockSize; i++) {
1781 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1782 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1783 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1784 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1785 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1786 }
1787 }
1788 }
1789
TEST(ROUNDZ__WASMSIMD_NATIVE,negative_qnan)1790 TEST(ROUNDZ__WASMSIMD_NATIVE, negative_qnan) {
1791 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1792 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1793 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1794 for (uint32_t i = 0; i < kBlockSize; i++) {
1795 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
1796 }
1797 xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1798 for (uint32_t i = 0; i < kBlockSize; i++) {
1799 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1800 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1801 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1802 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1803 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1804 }
1805 }
1806 }
1807
TEST(ROUNDZ__WASMSIMD_NATIVE,positive_snan)1808 TEST(ROUNDZ__WASMSIMD_NATIVE, positive_snan) {
1809 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1810 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1811 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1812 for (uint32_t i = 0; i < kBlockSize; i++) {
1813 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1814 }
1815 xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1816 for (uint32_t i = 0; i < kBlockSize; i++) {
1817 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1818 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1819 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1820 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1821 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1822 }
1823 }
1824 }
1825
TEST(ROUNDZ__WASMSIMD_NATIVE,negative_snan)1826 TEST(ROUNDZ__WASMSIMD_NATIVE, negative_snan) {
1827 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1828 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1829 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1830 for (uint32_t i = 0; i < kBlockSize; i++) {
1831 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1832 }
1833 xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1834 for (uint32_t i = 0; i < kBlockSize; i++) {
1835 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1836 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1837 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1838 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1839 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1840 }
1841 }
1842 }
1843
TEST(ROUNDZ__WASMSIMD_NATIVE,positive_snan_to_qnan)1844 TEST(ROUNDZ__WASMSIMD_NATIVE, positive_snan_to_qnan) {
1845 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1846 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1847 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1848 for (uint32_t i = 0; i < kBlockSize; i++) {
1849 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1850 }
1851 xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1852 for (uint32_t i = 0; i < kBlockSize; i++) {
1853 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1854 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1855 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1856 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1857 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1858 }
1859 }
1860 }
1861
TEST(ROUNDZ__WASMSIMD_NATIVE,negative_snan_to_qnan)1862 TEST(ROUNDZ__WASMSIMD_NATIVE, negative_snan_to_qnan) {
1863 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1864 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1865 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1866 for (uint32_t i = 0; i < kBlockSize; i++) {
1867 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1868 }
1869 xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1870 for (uint32_t i = 0; i < kBlockSize; i++) {
1871 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1872 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1873 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1874 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1875 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1876 }
1877 }
1878 }
1879 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1880
TEST(ROUNDZ__SCALAR_ADDSUB,positive_normal)1881 TEST(ROUNDZ__SCALAR_ADDSUB, positive_normal) {
1882 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1883 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1884 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1885 for (uint32_t i = 0; i < kBlockSize; i++) {
1886 inputs[i] = uint32_as_float(n + i);
1887 }
1888 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1889 for (uint32_t i = 0; i < kBlockSize; i++) {
1890 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1891 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1892 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1893 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1894 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1895 }
1896 }
1897 }
1898
TEST(ROUNDZ__SCALAR_ADDSUB,negative_normal)1899 TEST(ROUNDZ__SCALAR_ADDSUB, negative_normal) {
1900 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1901 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1902 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1903 for (uint32_t i = 0; i < kBlockSize; i++) {
1904 inputs[i] = uint32_as_float(n + i);
1905 }
1906 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1907 for (uint32_t i = 0; i < kBlockSize; i++) {
1908 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1909 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1910 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1911 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1912 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1913 }
1914 }
1915 }
1916
TEST(ROUNDZ__SCALAR_ADDSUB,positive_integral)1917 TEST(ROUNDZ__SCALAR_ADDSUB, positive_integral) {
1918 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1919 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1920 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1921 for (uint32_t i = 0; i < kBlockSize; i++) {
1922 inputs[i] = uint32_as_float(n + i);
1923 }
1924 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1925 for (uint32_t i = 0; i < kBlockSize; i++) {
1926 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1927 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1928 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1929 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1930 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1931 }
1932 }
1933 }
1934
TEST(ROUNDZ__SCALAR_ADDSUB,negative_integral)1935 TEST(ROUNDZ__SCALAR_ADDSUB, negative_integral) {
1936 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1937 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1938 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1939 for (uint32_t i = 0; i < kBlockSize; i++) {
1940 inputs[i] = uint32_as_float(n + i);
1941 }
1942 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1943 for (uint32_t i = 0; i < kBlockSize; i++) {
1944 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1945 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1946 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1947 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1948 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1949 }
1950 }
1951 }
1952
TEST(ROUNDZ__SCALAR_ADDSUB,positive_infinity)1953 TEST(ROUNDZ__SCALAR_ADDSUB, positive_infinity) {
1954 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1955 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1956 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1957 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1958 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
1959 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1960 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1961 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1962 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1963 }
1964
TEST(ROUNDZ__SCALAR_ADDSUB,negative_infinity)1965 TEST(ROUNDZ__SCALAR_ADDSUB, negative_infinity) {
1966 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1967 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1968 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1969 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1970 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
1971 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1972 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1973 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1974 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1975 }
1976
TEST(ROUNDZ__SCALAR_ADDSUB,positive_qnan)1977 TEST(ROUNDZ__SCALAR_ADDSUB, positive_qnan) {
1978 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1979 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1980 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1981 for (uint32_t i = 0; i < kBlockSize; i++) {
1982 inputs[i] = uint32_as_float(n + i);
1983 }
1984 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1985 for (uint32_t i = 0; i < kBlockSize; i++) {
1986 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
1987 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1988 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1989 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1990 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1991 }
1992 }
1993 }
1994
TEST(ROUNDZ__SCALAR_ADDSUB,negative_qnan)1995 TEST(ROUNDZ__SCALAR_ADDSUB, negative_qnan) {
1996 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1997 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1998 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1999 for (uint32_t i = 0; i < kBlockSize; i++) {
2000 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
2001 }
2002 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2003 for (uint32_t i = 0; i < kBlockSize; i++) {
2004 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2005 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2006 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2007 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2008 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2009 }
2010 }
2011 }
2012
TEST(ROUNDZ__SCALAR_ADDSUB,positive_snan)2013 TEST(ROUNDZ__SCALAR_ADDSUB, positive_snan) {
2014 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2015 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2016 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2017 for (uint32_t i = 0; i < kBlockSize; i++) {
2018 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2019 }
2020 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2021 for (uint32_t i = 0; i < kBlockSize; i++) {
2022 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2023 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
2024 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2025 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2026 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2027 }
2028 }
2029 }
2030
TEST(ROUNDZ__SCALAR_ADDSUB,negative_snan)2031 TEST(ROUNDZ__SCALAR_ADDSUB, negative_snan) {
2032 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2033 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2034 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2035 for (uint32_t i = 0; i < kBlockSize; i++) {
2036 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2037 }
2038 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2039 for (uint32_t i = 0; i < kBlockSize; i++) {
2040 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2041 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
2042 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2043 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2044 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2045 }
2046 }
2047 }
2048
TEST(ROUNDZ__SCALAR_ADDSUB,positive_snan_to_qnan)2049 TEST(ROUNDZ__SCALAR_ADDSUB, positive_snan_to_qnan) {
2050 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2051 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2052 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2053 for (uint32_t i = 0; i < kBlockSize; i++) {
2054 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2055 }
2056 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2057 for (uint32_t i = 0; i < kBlockSize; i++) {
2058 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2059 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2060 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2061 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2062 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2063 }
2064 }
2065 }
2066
TEST(ROUNDZ__SCALAR_ADDSUB,negative_snan_to_qnan)2067 TEST(ROUNDZ__SCALAR_ADDSUB, negative_snan_to_qnan) {
2068 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2069 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2070 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2071 for (uint32_t i = 0; i < kBlockSize; i++) {
2072 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2073 }
2074 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2075 for (uint32_t i = 0; i < kBlockSize; i++) {
2076 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2077 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2078 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2079 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2080 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2081 }
2082 }
2083 }
2084
TEST(ROUNDZ__SCALAR_CVT,positive_normal)2085 TEST(ROUNDZ__SCALAR_CVT, positive_normal) {
2086 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2087 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2088 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
2089 for (uint32_t i = 0; i < kBlockSize; i++) {
2090 inputs[i] = uint32_as_float(n + i);
2091 }
2092 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2093 for (uint32_t i = 0; i < kBlockSize; i++) {
2094 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2095 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2096 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2097 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2098 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2099 }
2100 }
2101 }
2102
TEST(ROUNDZ__SCALAR_CVT,negative_normal)2103 TEST(ROUNDZ__SCALAR_CVT, negative_normal) {
2104 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2105 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2106 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
2107 for (uint32_t i = 0; i < kBlockSize; i++) {
2108 inputs[i] = uint32_as_float(n + i);
2109 }
2110 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2111 for (uint32_t i = 0; i < kBlockSize; i++) {
2112 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2113 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2114 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2115 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2116 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2117 }
2118 }
2119 }
2120
TEST(ROUNDZ__SCALAR_CVT,positive_integral)2121 TEST(ROUNDZ__SCALAR_CVT, positive_integral) {
2122 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2123 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2124 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
2125 for (uint32_t i = 0; i < kBlockSize; i++) {
2126 inputs[i] = uint32_as_float(n + i);
2127 }
2128 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2129 for (uint32_t i = 0; i < kBlockSize; i++) {
2130 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2131 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2132 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2133 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2134 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2135 }
2136 }
2137 }
2138
TEST(ROUNDZ__SCALAR_CVT,negative_integral)2139 TEST(ROUNDZ__SCALAR_CVT, negative_integral) {
2140 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2141 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2142 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
2143 for (uint32_t i = 0; i < kBlockSize; i++) {
2144 inputs[i] = uint32_as_float(n + i);
2145 }
2146 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2147 for (uint32_t i = 0; i < kBlockSize; i++) {
2148 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2149 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2150 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2151 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2152 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2153 }
2154 }
2155 }
2156
TEST(ROUNDZ__SCALAR_CVT,positive_infinity)2157 TEST(ROUNDZ__SCALAR_CVT, positive_infinity) {
2158 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2159 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2160 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
2161 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2162 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
2163 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2164 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2165 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2166 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2167 }
2168
TEST(ROUNDZ__SCALAR_CVT,negative_infinity)2169 TEST(ROUNDZ__SCALAR_CVT, negative_infinity) {
2170 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2171 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2172 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
2173 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2174 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[0]));
2175 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2176 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2177 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2178 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2179 }
2180
TEST(ROUNDZ__SCALAR_CVT,positive_qnan)2181 TEST(ROUNDZ__SCALAR_CVT, positive_qnan) {
2182 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2183 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2184 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2185 for (uint32_t i = 0; i < kBlockSize; i++) {
2186 inputs[i] = uint32_as_float(n + i);
2187 }
2188 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2189 for (uint32_t i = 0; i < kBlockSize; i++) {
2190 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2191 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2192 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2193 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2194 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2195 }
2196 }
2197 }
2198
TEST(ROUNDZ__SCALAR_CVT,negative_qnan)2199 TEST(ROUNDZ__SCALAR_CVT, negative_qnan) {
2200 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2201 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2202 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2203 for (uint32_t i = 0; i < kBlockSize; i++) {
2204 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
2205 }
2206 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2207 for (uint32_t i = 0; i < kBlockSize; i++) {
2208 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2209 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2210 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2211 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2212 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2213 }
2214 }
2215 }
2216
TEST(ROUNDZ__SCALAR_CVT,positive_snan)2217 TEST(ROUNDZ__SCALAR_CVT, positive_snan) {
2218 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2219 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2220 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2221 for (uint32_t i = 0; i < kBlockSize; i++) {
2222 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2223 }
2224 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2225 for (uint32_t i = 0; i < kBlockSize; i++) {
2226 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2227 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
2228 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2229 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2230 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2231 }
2232 }
2233 }
2234
TEST(ROUNDZ__SCALAR_CVT,negative_snan)2235 TEST(ROUNDZ__SCALAR_CVT, negative_snan) {
2236 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2237 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2238 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2239 for (uint32_t i = 0; i < kBlockSize; i++) {
2240 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2241 }
2242 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2243 for (uint32_t i = 0; i < kBlockSize; i++) {
2244 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2245 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
2246 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2247 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2248 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2249 }
2250 }
2251 }
2252
TEST(ROUNDZ__SCALAR_CVT,DISABLED_positive_snan_to_qnan)2253 TEST(ROUNDZ__SCALAR_CVT, DISABLED_positive_snan_to_qnan) {
2254 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2255 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2256 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2257 for (uint32_t i = 0; i < kBlockSize; i++) {
2258 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2259 }
2260 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2261 for (uint32_t i = 0; i < kBlockSize; i++) {
2262 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2263 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2264 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2265 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2266 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2267 }
2268 }
2269 }
2270
TEST(ROUNDZ__SCALAR_CVT,DISABLED_negative_snan_to_qnan)2271 TEST(ROUNDZ__SCALAR_CVT, DISABLED_negative_snan_to_qnan) {
2272 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2273 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2274 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2275 for (uint32_t i = 0; i < kBlockSize; i++) {
2276 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2277 }
2278 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2279 for (uint32_t i = 0; i < kBlockSize; i++) {
2280 const uint32_t reference_output = float_as_uint32(std::trunc(inputs[i]));
2281 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2282 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2283 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2284 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2285 }
2286 }
2287 }
2288