1 /*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9 #include <cstdint>
10 #include <map>
11 #include <typeindex>
12 #include <variant>
13
14 #include <executorch/kernels/test/FunctionHeaderWrapper.h> // Declares the operator
15 #include <executorch/kernels/test/TestUtil.h>
16 #include <executorch/kernels/test/supported_features.h>
17 #include <executorch/runtime/core/exec_aten/exec_aten.h>
18 #include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h>
19 #include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h>
20
21 #include <gtest/gtest.h>
22
23 using namespace ::testing;
24 using exec_aten::MemoryFormat;
25 using exec_aten::optional;
26 using exec_aten::ScalarType;
27 using exec_aten::Tensor;
28 using torch::executor::testing::TensorFactory;
29
30 // To further emphasize the accuracy of our op_to, we test the conversion
31 // from floating-point types to signed int types directly by the test cases
32 // generated by core Pytorch directly. Such data is random generated in [-5, 5].
33
34 // clang-format off
35 typedef std::map<
36 std::type_index,
37 std::variant<
38 std::vector<float>,
39 std::vector<double>,
40 std::vector<exec_aten::Half>,
41 std::vector<exec_aten::BFloat16>>>
42 FloatingTypeToDataMap;
43
44 typedef std::map<
45 std::type_index,
46 std::variant<
47 std::vector<int64_t>,
48 std::vector<int32_t>,
49 std::vector<int16_t>,
50 std::vector<int8_t>,
51 std::vector<uint8_t>>>
52 IntTypeToDataMap;
53 // clang-format on
54
55 class OpToTest : public OperatorTest {
56 protected:
op_to_copy_out(const Tensor & self,bool non_blocking,optional<MemoryFormat> memory_format,Tensor & out)57 Tensor& op_to_copy_out(
58 const Tensor& self,
59 bool non_blocking,
60 optional<MemoryFormat> memory_format,
61 Tensor& out) {
62 return torch::executor::aten::_to_copy_outf(
63 context_, self, non_blocking, memory_format, out);
64 }
65
66 // Cast float vector to OUTPUT_CTYPE vector
67 template <typename INPUT_CTYPE, typename OUTPUT_CTYPE>
vector_type_cast(std::vector<INPUT_CTYPE> input)68 std::vector<OUTPUT_CTYPE> vector_type_cast(std::vector<INPUT_CTYPE> input) {
69 std::vector<OUTPUT_CTYPE> output(input.size());
70 std::transform(
71 input.begin(), input.end(), output.begin(), [](INPUT_CTYPE x) {
72 return static_cast<OUTPUT_CTYPE>(x);
73 });
74 return output;
75 }
76 template <typename INPUT_CTYPE, typename OUTPUT_CTYPE>
77 struct ToTestCase {
78 const std::vector<int32_t> sizes;
79 const std::vector<INPUT_CTYPE> data_in;
80 const std::vector<OUTPUT_CTYPE> data_out;
81 };
82
83 // Each test has different combination of input and output types. Therefore it
84 // is a little bit mess if create template test case and custom data types for
85 // both input data and output data.
86 // We choose another way: for all test cases, their data are all in double.
87 // And we are gonna cast them into desired type when delievering them into
88 // tf.make function. Based on our experiments, type cast of core PyTorch is
89 // same as static_cast in c++ in the representable scope, so here we believe
90 // using static_cast to generate ground truth is reasonable.
91 template <
92 typename INPUT_CTYPE,
93 ScalarType INPUT_DTYPE,
94 typename OUTPUT_CTYPE,
95 ScalarType OUTPUT_DTYPE>
test_runner_static_cast(std::vector<ToTestCase<double,double>> test_cases)96 void test_runner_static_cast(
97 std::vector<ToTestCase<double, double>> test_cases) {
98 TensorFactory<INPUT_DTYPE> tf_in;
99 TensorFactory<OUTPUT_DTYPE> tf_out;
100
101 for (auto test_case : test_cases) {
102 auto data_in = vector_type_cast<double, INPUT_CTYPE>(test_case.data_in);
103 auto data_out = vector_type_cast<INPUT_CTYPE, OUTPUT_CTYPE>(data_in);
104
105 Tensor input = tf_in.make(test_case.sizes, data_in);
106 Tensor output = tf_out.zeros_like(input);
107
108 Tensor ret = op_to_copy_out(
109 /*self=*/input,
110 /*non_blocking=*/false,
111 exec_aten::MemoryFormat::Contiguous,
112 output);
113
114 Tensor expected = tf_out.make(test_case.sizes, data_out);
115
116 // The original tensor a should share same value with the out variable and
117 // return variable of to function
118 EXPECT_TENSOR_EQ(ret, output);
119 EXPECT_TENSOR_EQ(ret, expected);
120 }
121 }
122
123 template <typename INPUT_CTYPE, ScalarType INPUT_DTYPE>
test_runner_to_bool(std::vector<double> test_case,std::vector<uint8_t> data_out)124 void test_runner_to_bool(
125 std::vector<double> test_case,
126 std::vector<uint8_t> data_out) {
127 TensorFactory<INPUT_DTYPE> tf_in;
128 TensorFactory<ScalarType::Bool> tf_out;
129
130 auto data_in = vector_type_cast<double, INPUT_CTYPE>(test_case);
131
132 Tensor input = tf_in.make({(int)test_case.size()}, data_in);
133 Tensor output = tf_out.zeros_like(input);
134
135 Tensor ret = op_to_copy_out(
136 /*self=*/input,
137 /*non_blocking=*/false,
138 exec_aten::MemoryFormat::Contiguous,
139 output);
140
141 Tensor expected = tf_out.make({(int)data_out.size()}, data_out);
142
143 // The return value of op_to_copy_out and the values written to output
144 // should be the same.
145 EXPECT_TENSOR_EQ(ret, output);
146 // The return value of op_to_copy_out and the values in expected which are
147 // the reference values should be the same.
148 EXPECT_TENSOR_EQ(ret, expected);
149 }
150
151 template <typename OUT_CTYPE, ScalarType OUT_DTYPE>
test_runner_from_bool(std::vector<uint8_t> test_case,std::vector<double> out)152 void test_runner_from_bool(
153 std::vector<uint8_t> test_case,
154 std::vector<double> out) {
155 TensorFactory<ScalarType::Bool> tf_in;
156 TensorFactory<OUT_DTYPE> tf_out;
157
158 auto data_out = vector_type_cast<double, OUT_CTYPE>(out);
159
160 Tensor input = tf_in.make({(int)test_case.size()}, test_case);
161 Tensor output = tf_out.zeros_like(input);
162
163 Tensor ret = op_to_copy_out(
164 /*self=*/input,
165 /*non_blocking=*/false,
166 exec_aten::MemoryFormat::Contiguous,
167 output);
168
169 Tensor expected = tf_out.make({(int)data_out.size()}, data_out);
170
171 // The return value of op_to_copy_out and the values written to output
172 // should be the same.
173 EXPECT_TENSOR_EQ(ret, output);
174 // The return value of op_to_copy_out and the values in expected which are
175 // the reference values should be the same.
176 EXPECT_TENSOR_EQ(ret, expected);
177 }
178
179 template <
180 typename INPUT_CTYPE,
181 ScalarType INPUT_DTYPE,
182 typename OUTPUT_CTYPE,
183 ScalarType OUTPUT_DTYPE>
test_runner_hardcode_data(FloatingTypeToDataMap floating_point_data,IntTypeToDataMap int_data)184 void test_runner_hardcode_data(
185 FloatingTypeToDataMap floating_point_data,
186 IntTypeToDataMap int_data) {
187 TensorFactory<INPUT_DTYPE> tf_in;
188 TensorFactory<OUTPUT_DTYPE> tf_out;
189
190 if (typeid(OUTPUT_CTYPE) == typeid(uint8_t)) {
191 // Would cause underflow when testing uint8_t.
192 return;
193 }
194
195 ToTestCase<INPUT_CTYPE, OUTPUT_CTYPE> test_case = {
196 /*sizes=*/{3, 5}, /*data_in=*/
197 std::get<std::vector<INPUT_CTYPE>>(
198 floating_point_data[typeid(INPUT_CTYPE)]),
199 /*data_out=*/
200 std::get<std::vector<OUTPUT_CTYPE>>(int_data[typeid(OUTPUT_CTYPE)])};
201
202 Tensor input = tf_in.make(test_case.sizes, test_case.data_in);
203 Tensor output = tf_out.zeros_like(input);
204
205 Tensor ret = op_to_copy_out(
206 /*self=*/input,
207 /*non_blocking=*/false,
208 exec_aten::MemoryFormat::Contiguous,
209 output);
210
211 Tensor expected = tf_out.make(test_case.sizes, test_case.data_out);
212
213 // The original tensor a should share same value with the out variable and
214 // return variable of to function
215 EXPECT_TENSOR_EQ(ret, output);
216 EXPECT_TENSOR_EQ(ret, expected);
217 }
218
219 /* %python
220 import torch
221 torch.manual_seed(0)
222 x = torch.rand(2, 3)
223 res = x.to(non_blocking = False, memory_format = torch.preserve_format)
224 op = "op_to_copy_out"
225 opt_setup_params = """
226 bool non_blocking = false;
227 optional<MemoryFormat> memory_format;
228 """
229 opt_extra_params = "non_blocking, memory_format,"
230 out_args = "out_shape, dynamism"
231 dtype = "ScalarType::Float"
232 check = "EXPECT_TENSOR_EQ" */
233
test_dynamic_shape(const std::vector<int32_t> & out_shape,enum torch::executor::TensorShapeDynamism dynamism)234 void test_dynamic_shape(
235 const std::vector<int32_t>& out_shape,
236 enum torch::executor::TensorShapeDynamism dynamism) {
237 /* %python
238 %rewrite(unary_op) */
239
240 TensorFactory<ScalarType::Float> tf;
241
242 Tensor x = tf.make(
243 {2, 3},
244 {0.49625658988952637,
245 0.7682217955589294,
246 0.08847743272781372,
247 0.13203048706054688,
248 0.30742281675338745,
249 0.6340786814689636});
250 Tensor expected = tf.make(
251 {2, 3},
252 {0.49625658988952637,
253 0.7682217955589294,
254 0.08847743272781372,
255 0.13203048706054688,
256 0.30742281675338745,
257 0.6340786814689636});
258
259 bool non_blocking = false;
260 optional<MemoryFormat> memory_format;
261
262 Tensor out = tf.zeros(out_shape, dynamism);
263 op_to_copy_out(x, non_blocking, memory_format, out);
264 EXPECT_TENSOR_EQ(out, expected);
265 }
266 };
267
268 /* Here we temporary not try to implement or test the behavior about casting a
269 * number can not be represented in some type to this type (e.g. inf to int32_t
270 * nan to int64_t or 2147483648 to int32_t), because
271 * - a. The result of such kind of cast is undefined according to c++ standard;
272 * - b. No explicit rules can be found in core pytorch for such transaction (not
273 * same as static_cast or any other casting function in c++);
274 * - c. If user tries to cast a unrepresentable value to certain type, they
275 * should take the risk;
276 * - d. Even though we can always use if/switch to cover these boundry cases,
277 * the code will be lengthy and jumbled. I believe using these disordered
278 * code to meet some undefine behavior is meaningless, and we can not
279 * cover all such cases.
280 */
281
282 namespace {} // namespace
283
284 // Regular test for to_copy.out
285 // Test if to_copy.out works well under all kinds of data pairs
TEST_F(OpToTest,AllDtypesSupported)286 TEST_F(OpToTest, AllDtypesSupported) {
287 std::vector<ToTestCase<double, double>> test_cases = {
288 {
289 /*sizes=*/{2, 4}, /*data_in=*/
290 {2.11, 3.2, 2.3, 4.0, 1.1, 5.2, 1.1, 6.3}, /*data_out=*/
291 {}, // data_out shouldn't be used in test_runner_static_cast
292 },
293 {
294 /*sizes=*/{3, 4, 0, 5},
295 /*data_in=*/{},
296 /*data_out=*/{},
297 },
298 {
299 /*sizes=*/{},
300 /*data_in=*/{10.0},
301 /*data_out=*/{}, // data_out shouldn't be used in
302 // test_runner_static_cast
303 },
304 };
305
306 #define TEST_KERNEL(INPUT_CTYPE, INPUT_DTYPE, OUTPUT_CTYPE, OUTPUT_DTYPE) \
307 test_runner_static_cast< \
308 INPUT_CTYPE, \
309 ScalarType::INPUT_DTYPE, \
310 OUTPUT_CTYPE, \
311 ScalarType::OUTPUT_DTYPE>(test_cases);
312
313 #define TEST_ENTRY(INPUT_CTYPE, INPUT_DTYPE) \
314 ET_FORALL_REALHBF16_TYPES_WITH2(INPUT_CTYPE, INPUT_DTYPE, TEST_KERNEL);
315
316 ET_FORALL_REALHBF16_TYPES(TEST_ENTRY);
317
318 #undef TEST_ENTRY
319 #undef TEST_KERNEL
320 }
321
TEST_F(OpToTest,BoolTests)322 TEST_F(OpToTest, BoolTests) {
323 std::vector<double> test_case_to_bool = {1.1, 2.2, 0};
324 std::vector<uint8_t> result_to_bool = {true, true, false};
325 #define TEST_TO_BOOL(INPUT_CTYPE, INPUT_DTYPE) \
326 test_runner_to_bool<INPUT_CTYPE, ScalarType::INPUT_DTYPE>( \
327 test_case_to_bool, result_to_bool);
328 ET_FORALL_REALHBF16_TYPES(TEST_TO_BOOL);
329
330 std::vector<uint8_t> test_case_from_bool = {true, true, false};
331 std::vector<double> result_from_bool = {1.0, 1.0, 0};
332 #define TEST_FROM_BOOL(OUTPUT_CTYPE, OUTPUT_DTYPE) \
333 test_runner_from_bool<OUTPUT_CTYPE, ScalarType::OUTPUT_DTYPE>( \
334 test_case_from_bool, result_from_bool);
335 ET_FORALL_REALHBF16_TYPES(TEST_FROM_BOOL);
336 }
337
TEST_F(OpToTest,NanInfSupported)338 TEST_F(OpToTest, NanInfSupported) {
339 constexpr auto floatInfinity = std::numeric_limits<float>::infinity();
340 std::vector<ToTestCase<double, double>> test_cases = {{
341 /*sizes=*/{2, 4},
342 /*data_in=*/{2, 3, NAN, 4, floatInfinity, 5, -floatInfinity, 6},
343 /*data_out=*/{2, 3, NAN, 4, floatInfinity, 5, -floatInfinity, 6},
344 }};
345
346 #define TEST_KERNEL(INPUT_CTYPE, INPUT_DTYPE, OUTPUT_CTYPE, OUTPUT_DTYPE) \
347 test_runner_static_cast< \
348 INPUT_CTYPE, \
349 ScalarType::INPUT_DTYPE, \
350 OUTPUT_CTYPE, \
351 ScalarType::OUTPUT_DTYPE>(test_cases);
352
353 #define TEST_ENTRY(INPUT_CTYPE, INPUT_DTYPE) \
354 ET_FORALL_FLOATHBF16_TYPES_WITH2(INPUT_CTYPE, INPUT_DTYPE, TEST_KERNEL);
355
356 ET_FORALL_FLOATHBF16_TYPES(TEST_ENTRY);
357
358 #undef TEST_ENTRY
359 #undef TEST_KERNEL
360 }
361
TEST_F(OpToTest,HardcodeFloatConvertInt)362 TEST_F(OpToTest, HardcodeFloatConvertInt) {
363 // Hardcode input and output generated from core PyTorch
364 // clang-format off
365 std::vector<float> float_data = {
366 -1.47900056838989257812, -4.59277725219726562500,
367 2.15365791320800781250, -2.55494546890258789062,
368 3.06999135017395019531, 3.27460670471191406250,
369 -3.98865103721618652344, -4.81065988540649414062,
370 3.67902207374572753906, 3.72226405143737792969,
371 0.80567771196365356445, 2.23788332939147949219,
372 -0.52035576105117797852, -1.58493483066558837891,
373 -0.30919688940048217773};
374
375 std::vector<double> double_data = {
376 -1.47900053955270172068, -4.59277735274143061872,
377 2.15365796963871947156, -2.55494554556038755422,
378 3.06999137834642255029, 3.27460679459944969949,
379 -3.98865109243288795682, -4.81065977167646074975,
380 3.67902198302105531980, 3.72226414774102742911,
381 0.80567768667100203572, 2.23788335717029518435,
382 -0.52035578832931150828, -1.58493480710766210251,
383 -0.30919688936285893988};
384 // clang-format on
385
386 std::vector<exec_aten::Half> half_data;
387 std::vector<exec_aten::BFloat16> bf16_data;
388 for (auto d : double_data) {
389 half_data.emplace_back(d);
390 bf16_data.emplace_back(d);
391 }
392
393 std::vector<int64_t> int64_data = {
394 -1, -4, 2, -2, 3, 3, -3, -4, 3, 3, 0, 2, 0, -1, 0};
395 std::vector<int32_t> int32_data = {
396 -1, -4, 2, -2, 3, 3, -3, -4, 3, 3, 0, 2, 0, -1, 0};
397 std::vector<int16_t> int16_data = {
398 -1, -4, 2, -2, 3, 3, -3, -4, 3, 3, 0, 2, 0, -1, 0};
399 std::vector<int8_t> int8_data = {
400 -1, -4, 2, -2, 3, 3, -3, -4, 3, 3, 0, 2, 0, -1, 0};
401
402 // Gathering all floating point data together for better traversial
403 FloatingTypeToDataMap floating_point_data;
404 floating_point_data[typeid(float)] = float_data;
405 floating_point_data[typeid(double)] = double_data;
406 floating_point_data[typeid(exec_aten::Half)] = half_data;
407 floating_point_data[typeid(exec_aten::BFloat16)] = bf16_data;
408
409 // Gathering all int data together for better traversial
410 IntTypeToDataMap int_data;
411 int_data[typeid(int64_t)] = int64_data;
412 int_data[typeid(int32_t)] = int32_data;
413 int_data[typeid(int16_t)] = int16_data;
414 int_data[typeid(int8_t)] = int8_data;
415
416 #define TEST_KERNEL(INPUT_CTYPE, INPUT_DTYPE, OUTPUT_CTYPE, OUTPUT_DTYPE) \
417 test_runner_hardcode_data< \
418 INPUT_CTYPE, \
419 ScalarType::INPUT_DTYPE, \
420 OUTPUT_CTYPE, \
421 ScalarType::OUTPUT_DTYPE>(floating_point_data, int_data);
422
423 #define TEST_ENTRY(INPUT_CTYPE, INPUT_DTYPE) \
424 ET_FORALL_INT_TYPES_WITH2(INPUT_CTYPE, INPUT_DTYPE, TEST_KERNEL);
425
426 ET_FORALL_FLOATHBF16_TYPES(TEST_ENTRY);
427 }
428
TEST_F(OpToTest,MismatchedSizesDie)429 TEST_F(OpToTest, MismatchedSizesDie) {
430 if (torch::executor::testing::SupportedFeatures::get()->is_aten) {
431 GTEST_SKIP() << "ATen kernel can handle mismatched sizes";
432 }
433 TensorFactory<ScalarType::Int> tf;
434 Tensor input = tf.make(/*sizes=*/{3, 1, 1, 2}, /*data=*/{1, 2, 3, 4, 5, 6});
435 Tensor out = tf.zeros({3, 2, 1, 1});
436 ET_EXPECT_KERNEL_FAILURE(
437 context_,
438 op_to_copy_out(
439 input,
440 /*non_blocking=*/false,
441 exec_aten::MemoryFormat::Contiguous,
442 out));
443 }
444
445 // Only contiguous memory is supported, the memory type MemoryFormat::Contiguous
446 // should not be allowed. The function is expected death if using the illegal
447 // memory format.
TEST_F(OpToTest,MismatchedMemoryFormatDies)448 TEST_F(OpToTest, MismatchedMemoryFormatDies) {
449 if (torch::executor::testing::SupportedFeatures::get()->is_aten) {
450 GTEST_SKIP() << "ATen kernel can handle non contiguous memory formats";
451 }
452 TensorFactory<ScalarType::Float> tf_in;
453 TensorFactory<ScalarType::Float> tf_out;
454 Tensor input =
455 tf_in.make(/*sizes=*/{3, 1, 1, 2}, /*data=*/{1, 2, 3, 4, 5, 6});
456 Tensor out = tf_out.zeros({3, 1, 1, 2});
457
458 ET_EXPECT_KERNEL_FAILURE(
459 context_,
460 op_to_copy_out(
461 input,
462 /*non_blocking=*/false,
463 static_cast<exec_aten::MemoryFormat>(55),
464 out));
465 // memory format can be null
466 EXPECT_TENSOR_EQ(
467 op_to_copy_out(
468 input,
469 /*non_blocking=*/false,
470 /*memory_format=*/exec_aten::nullopt,
471 out),
472 input);
473 }
474
475 // Only blocking data transfer supported
TEST_F(OpToTest,MismatchedBlockingDie)476 TEST_F(OpToTest, MismatchedBlockingDie) {
477 if (torch::executor::testing::SupportedFeatures::get()->is_aten) {
478 GTEST_SKIP() << "ATen kernel can handle non blocking data transfer";
479 }
480 TensorFactory<ScalarType::Int> tf;
481 Tensor input = tf.make(/*sizes=*/{3, 1, 1, 2}, /*data=*/{1, 2, 3, 4, 5, 6});
482 Tensor out = tf.zeros(/*sizes=*/{3, 1, 1, 2});
483 ET_EXPECT_KERNEL_FAILURE(
484 context_,
485 op_to_copy_out(
486 input,
487 /*non_blocking=*/true,
488 exec_aten::MemoryFormat::Contiguous,
489 out));
490 }
491
TEST_F(OpToTest,DynamicShapeUpperBoundSameAsExpected)492 TEST_F(OpToTest, DynamicShapeUpperBoundSameAsExpected) {
493 test_dynamic_shape(
494 {2, 3}, torch::executor::TensorShapeDynamism::DYNAMIC_BOUND);
495 }
496
TEST_F(OpToTest,DynamicShapeUpperBoundLargerThanExpected)497 TEST_F(OpToTest, DynamicShapeUpperBoundLargerThanExpected) {
498 test_dynamic_shape(
499 {10, 10}, torch::executor::TensorShapeDynamism::DYNAMIC_BOUND);
500 }
501
TEST_F(OpToTest,DynamicShapeUnbound)502 TEST_F(OpToTest, DynamicShapeUnbound) {
503 if (!torch::executor::testing::SupportedFeatures::get()->output_resize) {
504 GTEST_SKIP() << "Dynamic shape unbound not supported";
505 }
506 test_dynamic_shape(
507 {1, 1}, torch::executor::TensorShapeDynamism::DYNAMIC_UNBOUND);
508 }
509