xref: /aosp_15_r20/external/executorch/kernels/test/op_to_copy_test.cpp (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #include <cstdint>
10 #include <map>
11 #include <typeindex>
12 #include <variant>
13 
14 #include <executorch/kernels/test/FunctionHeaderWrapper.h> // Declares the operator
15 #include <executorch/kernels/test/TestUtil.h>
16 #include <executorch/kernels/test/supported_features.h>
17 #include <executorch/runtime/core/exec_aten/exec_aten.h>
18 #include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h>
19 #include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h>
20 
21 #include <gtest/gtest.h>
22 
23 using namespace ::testing;
24 using exec_aten::MemoryFormat;
25 using exec_aten::optional;
26 using exec_aten::ScalarType;
27 using exec_aten::Tensor;
28 using torch::executor::testing::TensorFactory;
29 
30 // To further emphasize the accuracy of our op_to, we test the conversion
31 // from floating-point types to signed int types directly by the test cases
32 // generated by core Pytorch directly. Such data is random generated in [-5, 5].
33 
34 // clang-format off
35 typedef std::map<
36           std::type_index,
37           std::variant<
38             std::vector<float>,
39             std::vector<double>,
40             std::vector<exec_aten::Half>,
41             std::vector<exec_aten::BFloat16>>>
42         FloatingTypeToDataMap;
43 
44 typedef std::map<
45           std::type_index,
46           std::variant<
47               std::vector<int64_t>,
48               std::vector<int32_t>,
49               std::vector<int16_t>,
50               std::vector<int8_t>,
51               std::vector<uint8_t>>>
52         IntTypeToDataMap;
53 // clang-format on
54 
55 class OpToTest : public OperatorTest {
56  protected:
op_to_copy_out(const Tensor & self,bool non_blocking,optional<MemoryFormat> memory_format,Tensor & out)57   Tensor& op_to_copy_out(
58       const Tensor& self,
59       bool non_blocking,
60       optional<MemoryFormat> memory_format,
61       Tensor& out) {
62     return torch::executor::aten::_to_copy_outf(
63         context_, self, non_blocking, memory_format, out);
64   }
65 
66   // Cast float vector to OUTPUT_CTYPE vector
67   template <typename INPUT_CTYPE, typename OUTPUT_CTYPE>
vector_type_cast(std::vector<INPUT_CTYPE> input)68   std::vector<OUTPUT_CTYPE> vector_type_cast(std::vector<INPUT_CTYPE> input) {
69     std::vector<OUTPUT_CTYPE> output(input.size());
70     std::transform(
71         input.begin(), input.end(), output.begin(), [](INPUT_CTYPE x) {
72           return static_cast<OUTPUT_CTYPE>(x);
73         });
74     return output;
75   }
76   template <typename INPUT_CTYPE, typename OUTPUT_CTYPE>
77   struct ToTestCase {
78     const std::vector<int32_t> sizes;
79     const std::vector<INPUT_CTYPE> data_in;
80     const std::vector<OUTPUT_CTYPE> data_out;
81   };
82 
83   // Each test has different combination of input and output types. Therefore it
84   // is a little bit mess if create template test case and custom data types for
85   // both input data and output data.
86   // We choose another way: for all test cases, their data are all in double.
87   // And we are gonna cast them into desired type when delievering them into
88   // tf.make function. Based on our experiments, type cast of core PyTorch is
89   // same as static_cast in c++ in the representable scope, so here we believe
90   // using static_cast to generate ground truth is reasonable.
91   template <
92       typename INPUT_CTYPE,
93       ScalarType INPUT_DTYPE,
94       typename OUTPUT_CTYPE,
95       ScalarType OUTPUT_DTYPE>
test_runner_static_cast(std::vector<ToTestCase<double,double>> test_cases)96   void test_runner_static_cast(
97       std::vector<ToTestCase<double, double>> test_cases) {
98     TensorFactory<INPUT_DTYPE> tf_in;
99     TensorFactory<OUTPUT_DTYPE> tf_out;
100 
101     for (auto test_case : test_cases) {
102       auto data_in = vector_type_cast<double, INPUT_CTYPE>(test_case.data_in);
103       auto data_out = vector_type_cast<INPUT_CTYPE, OUTPUT_CTYPE>(data_in);
104 
105       Tensor input = tf_in.make(test_case.sizes, data_in);
106       Tensor output = tf_out.zeros_like(input);
107 
108       Tensor ret = op_to_copy_out(
109           /*self=*/input,
110           /*non_blocking=*/false,
111           exec_aten::MemoryFormat::Contiguous,
112           output);
113 
114       Tensor expected = tf_out.make(test_case.sizes, data_out);
115 
116       // The original tensor a should share same value with the out variable and
117       // return variable of to function
118       EXPECT_TENSOR_EQ(ret, output);
119       EXPECT_TENSOR_EQ(ret, expected);
120     }
121   }
122 
123   template <typename INPUT_CTYPE, ScalarType INPUT_DTYPE>
test_runner_to_bool(std::vector<double> test_case,std::vector<uint8_t> data_out)124   void test_runner_to_bool(
125       std::vector<double> test_case,
126       std::vector<uint8_t> data_out) {
127     TensorFactory<INPUT_DTYPE> tf_in;
128     TensorFactory<ScalarType::Bool> tf_out;
129 
130     auto data_in = vector_type_cast<double, INPUT_CTYPE>(test_case);
131 
132     Tensor input = tf_in.make({(int)test_case.size()}, data_in);
133     Tensor output = tf_out.zeros_like(input);
134 
135     Tensor ret = op_to_copy_out(
136         /*self=*/input,
137         /*non_blocking=*/false,
138         exec_aten::MemoryFormat::Contiguous,
139         output);
140 
141     Tensor expected = tf_out.make({(int)data_out.size()}, data_out);
142 
143     // The return value of op_to_copy_out and the values written to output
144     // should be the same.
145     EXPECT_TENSOR_EQ(ret, output);
146     // The return value of op_to_copy_out and the values in expected which are
147     // the reference values should be the same.
148     EXPECT_TENSOR_EQ(ret, expected);
149   }
150 
151   template <typename OUT_CTYPE, ScalarType OUT_DTYPE>
test_runner_from_bool(std::vector<uint8_t> test_case,std::vector<double> out)152   void test_runner_from_bool(
153       std::vector<uint8_t> test_case,
154       std::vector<double> out) {
155     TensorFactory<ScalarType::Bool> tf_in;
156     TensorFactory<OUT_DTYPE> tf_out;
157 
158     auto data_out = vector_type_cast<double, OUT_CTYPE>(out);
159 
160     Tensor input = tf_in.make({(int)test_case.size()}, test_case);
161     Tensor output = tf_out.zeros_like(input);
162 
163     Tensor ret = op_to_copy_out(
164         /*self=*/input,
165         /*non_blocking=*/false,
166         exec_aten::MemoryFormat::Contiguous,
167         output);
168 
169     Tensor expected = tf_out.make({(int)data_out.size()}, data_out);
170 
171     // The return value of op_to_copy_out and the values written to output
172     // should be the same.
173     EXPECT_TENSOR_EQ(ret, output);
174     // The return value of op_to_copy_out and the values in expected which are
175     // the reference values should be the same.
176     EXPECT_TENSOR_EQ(ret, expected);
177   }
178 
179   template <
180       typename INPUT_CTYPE,
181       ScalarType INPUT_DTYPE,
182       typename OUTPUT_CTYPE,
183       ScalarType OUTPUT_DTYPE>
test_runner_hardcode_data(FloatingTypeToDataMap floating_point_data,IntTypeToDataMap int_data)184   void test_runner_hardcode_data(
185       FloatingTypeToDataMap floating_point_data,
186       IntTypeToDataMap int_data) {
187     TensorFactory<INPUT_DTYPE> tf_in;
188     TensorFactory<OUTPUT_DTYPE> tf_out;
189 
190     if (typeid(OUTPUT_CTYPE) == typeid(uint8_t)) {
191       // Would cause underflow when testing uint8_t.
192       return;
193     }
194 
195     ToTestCase<INPUT_CTYPE, OUTPUT_CTYPE> test_case = {
196         /*sizes=*/{3, 5}, /*data_in=*/
197         std::get<std::vector<INPUT_CTYPE>>(
198             floating_point_data[typeid(INPUT_CTYPE)]),
199         /*data_out=*/
200         std::get<std::vector<OUTPUT_CTYPE>>(int_data[typeid(OUTPUT_CTYPE)])};
201 
202     Tensor input = tf_in.make(test_case.sizes, test_case.data_in);
203     Tensor output = tf_out.zeros_like(input);
204 
205     Tensor ret = op_to_copy_out(
206         /*self=*/input,
207         /*non_blocking=*/false,
208         exec_aten::MemoryFormat::Contiguous,
209         output);
210 
211     Tensor expected = tf_out.make(test_case.sizes, test_case.data_out);
212 
213     // The original tensor a should share same value with the out variable and
214     // return variable of to function
215     EXPECT_TENSOR_EQ(ret, output);
216     EXPECT_TENSOR_EQ(ret, expected);
217   }
218 
219   /* %python
220   import torch
221   torch.manual_seed(0)
222   x = torch.rand(2, 3)
223   res = x.to(non_blocking = False, memory_format = torch.preserve_format)
224   op = "op_to_copy_out"
225   opt_setup_params = """
226     bool non_blocking = false;
227     optional<MemoryFormat> memory_format;
228   """
229   opt_extra_params = "non_blocking, memory_format,"
230   out_args = "out_shape, dynamism"
231   dtype = "ScalarType::Float"
232   check = "EXPECT_TENSOR_EQ" */
233 
test_dynamic_shape(const std::vector<int32_t> & out_shape,enum torch::executor::TensorShapeDynamism dynamism)234   void test_dynamic_shape(
235       const std::vector<int32_t>& out_shape,
236       enum torch::executor::TensorShapeDynamism dynamism) {
237     /* %python
238     %rewrite(unary_op) */
239 
240     TensorFactory<ScalarType::Float> tf;
241 
242     Tensor x = tf.make(
243         {2, 3},
244         {0.49625658988952637,
245          0.7682217955589294,
246          0.08847743272781372,
247          0.13203048706054688,
248          0.30742281675338745,
249          0.6340786814689636});
250     Tensor expected = tf.make(
251         {2, 3},
252         {0.49625658988952637,
253          0.7682217955589294,
254          0.08847743272781372,
255          0.13203048706054688,
256          0.30742281675338745,
257          0.6340786814689636});
258 
259     bool non_blocking = false;
260     optional<MemoryFormat> memory_format;
261 
262     Tensor out = tf.zeros(out_shape, dynamism);
263     op_to_copy_out(x, non_blocking, memory_format, out);
264     EXPECT_TENSOR_EQ(out, expected);
265   }
266 };
267 
268 /* Here we temporary not try to implement or test the behavior about casting a
269  * number can not be represented in some type to this type (e.g. inf to int32_t
270  * nan to int64_t or 2147483648 to int32_t), because
271  * - a. The result of such kind of cast is undefined according to c++ standard;
272  * - b. No explicit rules can be found in core pytorch for such transaction (not
273  *      same as static_cast or any other casting function in c++);
274  * - c. If user tries to cast a unrepresentable value to certain type, they
275  *      should take the risk;
276  * - d. Even though we can always use if/switch to cover these boundry cases,
277  *      the code will be lengthy and jumbled. I believe using these disordered
278  *      code to meet some undefine behavior is meaningless, and we can not
279  *      cover all such cases.
280  */
281 
282 namespace {} // namespace
283 
284 // Regular test for to_copy.out
285 // Test if to_copy.out works well under all kinds of data pairs
TEST_F(OpToTest,AllDtypesSupported)286 TEST_F(OpToTest, AllDtypesSupported) {
287   std::vector<ToTestCase<double, double>> test_cases = {
288       {
289           /*sizes=*/{2, 4}, /*data_in=*/
290           {2.11, 3.2, 2.3, 4.0, 1.1, 5.2, 1.1, 6.3}, /*data_out=*/
291           {}, // data_out shouldn't be used in test_runner_static_cast
292       },
293       {
294           /*sizes=*/{3, 4, 0, 5},
295           /*data_in=*/{},
296           /*data_out=*/{},
297       },
298       {
299           /*sizes=*/{},
300           /*data_in=*/{10.0},
301           /*data_out=*/{}, // data_out shouldn't be used in
302                            // test_runner_static_cast
303       },
304   };
305 
306 #define TEST_KERNEL(INPUT_CTYPE, INPUT_DTYPE, OUTPUT_CTYPE, OUTPUT_DTYPE) \
307   test_runner_static_cast<                                                \
308       INPUT_CTYPE,                                                        \
309       ScalarType::INPUT_DTYPE,                                            \
310       OUTPUT_CTYPE,                                                       \
311       ScalarType::OUTPUT_DTYPE>(test_cases);
312 
313 #define TEST_ENTRY(INPUT_CTYPE, INPUT_DTYPE) \
314   ET_FORALL_REALHBF16_TYPES_WITH2(INPUT_CTYPE, INPUT_DTYPE, TEST_KERNEL);
315 
316   ET_FORALL_REALHBF16_TYPES(TEST_ENTRY);
317 
318 #undef TEST_ENTRY
319 #undef TEST_KERNEL
320 }
321 
TEST_F(OpToTest,BoolTests)322 TEST_F(OpToTest, BoolTests) {
323   std::vector<double> test_case_to_bool = {1.1, 2.2, 0};
324   std::vector<uint8_t> result_to_bool = {true, true, false};
325 #define TEST_TO_BOOL(INPUT_CTYPE, INPUT_DTYPE)               \
326   test_runner_to_bool<INPUT_CTYPE, ScalarType::INPUT_DTYPE>( \
327       test_case_to_bool, result_to_bool);
328   ET_FORALL_REALHBF16_TYPES(TEST_TO_BOOL);
329 
330   std::vector<uint8_t> test_case_from_bool = {true, true, false};
331   std::vector<double> result_from_bool = {1.0, 1.0, 0};
332 #define TEST_FROM_BOOL(OUTPUT_CTYPE, OUTPUT_DTYPE)               \
333   test_runner_from_bool<OUTPUT_CTYPE, ScalarType::OUTPUT_DTYPE>( \
334       test_case_from_bool, result_from_bool);
335   ET_FORALL_REALHBF16_TYPES(TEST_FROM_BOOL);
336 }
337 
TEST_F(OpToTest,NanInfSupported)338 TEST_F(OpToTest, NanInfSupported) {
339   constexpr auto floatInfinity = std::numeric_limits<float>::infinity();
340   std::vector<ToTestCase<double, double>> test_cases = {{
341       /*sizes=*/{2, 4},
342       /*data_in=*/{2, 3, NAN, 4, floatInfinity, 5, -floatInfinity, 6},
343       /*data_out=*/{2, 3, NAN, 4, floatInfinity, 5, -floatInfinity, 6},
344   }};
345 
346 #define TEST_KERNEL(INPUT_CTYPE, INPUT_DTYPE, OUTPUT_CTYPE, OUTPUT_DTYPE) \
347   test_runner_static_cast<                                                \
348       INPUT_CTYPE,                                                        \
349       ScalarType::INPUT_DTYPE,                                            \
350       OUTPUT_CTYPE,                                                       \
351       ScalarType::OUTPUT_DTYPE>(test_cases);
352 
353 #define TEST_ENTRY(INPUT_CTYPE, INPUT_DTYPE) \
354   ET_FORALL_FLOATHBF16_TYPES_WITH2(INPUT_CTYPE, INPUT_DTYPE, TEST_KERNEL);
355 
356   ET_FORALL_FLOATHBF16_TYPES(TEST_ENTRY);
357 
358 #undef TEST_ENTRY
359 #undef TEST_KERNEL
360 }
361 
TEST_F(OpToTest,HardcodeFloatConvertInt)362 TEST_F(OpToTest, HardcodeFloatConvertInt) {
363   // Hardcode input and output generated from core PyTorch
364   // clang-format off
365   std::vector<float> float_data = {
366       -1.47900056838989257812, -4.59277725219726562500,
367        2.15365791320800781250, -2.55494546890258789062,
368        3.06999135017395019531,  3.27460670471191406250,
369       -3.98865103721618652344, -4.81065988540649414062,
370        3.67902207374572753906,  3.72226405143737792969,
371        0.80567771196365356445,  2.23788332939147949219,
372       -0.52035576105117797852, -1.58493483066558837891,
373       -0.30919688940048217773};
374 
375   std::vector<double> double_data = {
376       -1.47900053955270172068, -4.59277735274143061872,
377        2.15365796963871947156, -2.55494554556038755422,
378        3.06999137834642255029,  3.27460679459944969949,
379       -3.98865109243288795682, -4.81065977167646074975,
380        3.67902198302105531980,  3.72226414774102742911,
381        0.80567768667100203572,  2.23788335717029518435,
382       -0.52035578832931150828, -1.58493480710766210251,
383       -0.30919688936285893988};
384   // clang-format on
385 
386   std::vector<exec_aten::Half> half_data;
387   std::vector<exec_aten::BFloat16> bf16_data;
388   for (auto d : double_data) {
389     half_data.emplace_back(d);
390     bf16_data.emplace_back(d);
391   }
392 
393   std::vector<int64_t> int64_data = {
394       -1, -4, 2, -2, 3, 3, -3, -4, 3, 3, 0, 2, 0, -1, 0};
395   std::vector<int32_t> int32_data = {
396       -1, -4, 2, -2, 3, 3, -3, -4, 3, 3, 0, 2, 0, -1, 0};
397   std::vector<int16_t> int16_data = {
398       -1, -4, 2, -2, 3, 3, -3, -4, 3, 3, 0, 2, 0, -1, 0};
399   std::vector<int8_t> int8_data = {
400       -1, -4, 2, -2, 3, 3, -3, -4, 3, 3, 0, 2, 0, -1, 0};
401 
402   // Gathering all floating point data together for better traversial
403   FloatingTypeToDataMap floating_point_data;
404   floating_point_data[typeid(float)] = float_data;
405   floating_point_data[typeid(double)] = double_data;
406   floating_point_data[typeid(exec_aten::Half)] = half_data;
407   floating_point_data[typeid(exec_aten::BFloat16)] = bf16_data;
408 
409   // Gathering all int data together for better traversial
410   IntTypeToDataMap int_data;
411   int_data[typeid(int64_t)] = int64_data;
412   int_data[typeid(int32_t)] = int32_data;
413   int_data[typeid(int16_t)] = int16_data;
414   int_data[typeid(int8_t)] = int8_data;
415 
416 #define TEST_KERNEL(INPUT_CTYPE, INPUT_DTYPE, OUTPUT_CTYPE, OUTPUT_DTYPE) \
417   test_runner_hardcode_data<                                              \
418       INPUT_CTYPE,                                                        \
419       ScalarType::INPUT_DTYPE,                                            \
420       OUTPUT_CTYPE,                                                       \
421       ScalarType::OUTPUT_DTYPE>(floating_point_data, int_data);
422 
423 #define TEST_ENTRY(INPUT_CTYPE, INPUT_DTYPE) \
424   ET_FORALL_INT_TYPES_WITH2(INPUT_CTYPE, INPUT_DTYPE, TEST_KERNEL);
425 
426   ET_FORALL_FLOATHBF16_TYPES(TEST_ENTRY);
427 }
428 
TEST_F(OpToTest,MismatchedSizesDie)429 TEST_F(OpToTest, MismatchedSizesDie) {
430   if (torch::executor::testing::SupportedFeatures::get()->is_aten) {
431     GTEST_SKIP() << "ATen kernel can handle mismatched sizes";
432   }
433   TensorFactory<ScalarType::Int> tf;
434   Tensor input = tf.make(/*sizes=*/{3, 1, 1, 2}, /*data=*/{1, 2, 3, 4, 5, 6});
435   Tensor out = tf.zeros({3, 2, 1, 1});
436   ET_EXPECT_KERNEL_FAILURE(
437       context_,
438       op_to_copy_out(
439           input,
440           /*non_blocking=*/false,
441           exec_aten::MemoryFormat::Contiguous,
442           out));
443 }
444 
445 // Only contiguous memory is supported, the memory type MemoryFormat::Contiguous
446 // should not be allowed. The function is expected death if using the illegal
447 // memory format.
TEST_F(OpToTest,MismatchedMemoryFormatDies)448 TEST_F(OpToTest, MismatchedMemoryFormatDies) {
449   if (torch::executor::testing::SupportedFeatures::get()->is_aten) {
450     GTEST_SKIP() << "ATen kernel can handle non contiguous memory formats";
451   }
452   TensorFactory<ScalarType::Float> tf_in;
453   TensorFactory<ScalarType::Float> tf_out;
454   Tensor input =
455       tf_in.make(/*sizes=*/{3, 1, 1, 2}, /*data=*/{1, 2, 3, 4, 5, 6});
456   Tensor out = tf_out.zeros({3, 1, 1, 2});
457 
458   ET_EXPECT_KERNEL_FAILURE(
459       context_,
460       op_to_copy_out(
461           input,
462           /*non_blocking=*/false,
463           static_cast<exec_aten::MemoryFormat>(55),
464           out));
465   // memory format can be null
466   EXPECT_TENSOR_EQ(
467       op_to_copy_out(
468           input,
469           /*non_blocking=*/false,
470           /*memory_format=*/exec_aten::nullopt,
471           out),
472       input);
473 }
474 
475 // Only blocking data transfer supported
TEST_F(OpToTest,MismatchedBlockingDie)476 TEST_F(OpToTest, MismatchedBlockingDie) {
477   if (torch::executor::testing::SupportedFeatures::get()->is_aten) {
478     GTEST_SKIP() << "ATen kernel can handle non blocking data transfer";
479   }
480   TensorFactory<ScalarType::Int> tf;
481   Tensor input = tf.make(/*sizes=*/{3, 1, 1, 2}, /*data=*/{1, 2, 3, 4, 5, 6});
482   Tensor out = tf.zeros(/*sizes=*/{3, 1, 1, 2});
483   ET_EXPECT_KERNEL_FAILURE(
484       context_,
485       op_to_copy_out(
486           input,
487           /*non_blocking=*/true,
488           exec_aten::MemoryFormat::Contiguous,
489           out));
490 }
491 
TEST_F(OpToTest,DynamicShapeUpperBoundSameAsExpected)492 TEST_F(OpToTest, DynamicShapeUpperBoundSameAsExpected) {
493   test_dynamic_shape(
494       {2, 3}, torch::executor::TensorShapeDynamism::DYNAMIC_BOUND);
495 }
496 
TEST_F(OpToTest,DynamicShapeUpperBoundLargerThanExpected)497 TEST_F(OpToTest, DynamicShapeUpperBoundLargerThanExpected) {
498   test_dynamic_shape(
499       {10, 10}, torch::executor::TensorShapeDynamism::DYNAMIC_BOUND);
500 }
501 
TEST_F(OpToTest,DynamicShapeUnbound)502 TEST_F(OpToTest, DynamicShapeUnbound) {
503   if (!torch::executor::testing::SupportedFeatures::get()->output_resize) {
504     GTEST_SKIP() << "Dynamic shape unbound not supported";
505   }
506   test_dynamic_shape(
507       {1, 1}, torch::executor::TensorShapeDynamism::DYNAMIC_UNBOUND);
508 }
509