1 #ifdef USE_VULKAN_API
2
3 // @lint-ignore-every CLANGTIDY
4
5 #include <gtest/gtest.h>
6 #include <ATen/ATen.h>
7 #include <ATen/core/dispatch/Dispatcher.h>
8 #include <ATen/native/vulkan/api/api.h>
9 #include <c10/util/irange.h>
10 #include <c10/util/ArrayRef.h>
11
12 // TODO: These functions should move to a common place.
13
14 namespace {
15
16 #ifdef USE_VULKAN_FP16_INFERENCE
17 constexpr float kTolerance = 1e-2;
18 #else
19 constexpr float kTolerance = 1e-5;
20 #endif
21
checkRtol(const at::Tensor & diff,float maxTolerance)22 bool checkRtol(const at::Tensor& diff, float maxTolerance) {
23 if (diff.numel() == 0) {
24 return true;
25 }
26 return diff.abs().max().item<float>() <= maxTolerance;
27 }
28
checkRtol(const at::Tensor & diff,const std::vector<at::Tensor> & inputs)29 bool checkRtol(const at::Tensor& diff, const std::vector<at::Tensor>& inputs) {
30 if (diff.numel() == 0) {
31 return true;
32 }
33 float maxValue = 0.0f;
34
35 for (const auto& tensor : inputs) {
36 maxValue = fmax(tensor.abs().max().item<float>(), maxValue);
37 }
38
39 return checkRtol(diff, kTolerance * maxValue);
40 }
41
almostEqual(const at::Tensor & a,const at::Tensor & b)42 bool almostEqual(const at::Tensor& a, const at::Tensor& b) {
43 return checkRtol(a - b, {a, b});
44 }
45
checkHardShrink(const at::Tensor & ref,const at::Tensor & out,const float clamp_thresh)46 bool checkHardShrink(
47 const at::Tensor& ref, const at::Tensor& out, const float clamp_thresh) {
48 float* ref_ptr = ref.data_ptr<float>();
49 float* out_ptr = out.data_ptr<float>();
50 float ref_max = ref.abs().max().item<float>();
51 float out_max = out.abs().max().item<float>();
52 float max_val = std::fmax(ref_max, out_max);
53
54 float abs_clamp_thresh = std::abs(clamp_thresh);
55
56 for (int i = 0; i < ref.numel(); ++i) {
57 float ref_val = ref_ptr[i];
58 float out_val = out_ptr[i];
59
60 float abs_diff = std::abs(ref_val - out_val);
61
62 // For values near the clamp threshold, results may be ambiguous.
63 float distance_from_thresh = std::abs(std::abs(ref_val) - abs_clamp_thresh);
64 if (distance_from_thresh < kTolerance * abs_clamp_thresh) {
65 if (out_val != 0.0f) {
66 if (abs_diff >= kTolerance * max_val) {
67 return false;
68 }
69 }
70 }
71 else if (std::abs(ref_val) < std::abs(abs_clamp_thresh)) {
72 if (out_val != 0.0f) {
73 return false;
74 }
75 }
76 else if (abs_diff >= kTolerance * max_val) {
77 return false;
78 }
79 }
80 return true;
81 }
82
checkThreshold(const at::Tensor & ref,const at::Tensor & out,const float clamp_thresh,const float value)83 bool checkThreshold(
84 const at::Tensor& ref,
85 const at::Tensor& out,
86 const float clamp_thresh,
87 const float value) {
88 float* ref_ptr = ref.data_ptr<float>();
89 float* out_ptr = out.data_ptr<float>();
90 float ref_max = ref.abs().max().item<float>();
91 float out_max = out.abs().max().item<float>();
92 float max_val = std::fmax(ref_max, out_max);
93
94 for (int i = 0; i < ref.numel(); ++i) {
95 float ref_val = ref_ptr[i];
96 float out_val = out_ptr[i];
97
98 float abs_diff = std::abs(ref_val - out_val);
99 float val_diff = std::abs(out_val - value);
100
101 // For values near the clamp threshold, results may be ambiguous.
102 float distance_from_thresh = std::abs(std::abs(ref_val) - clamp_thresh);
103 if (distance_from_thresh < kTolerance * clamp_thresh) {
104 if (val_diff >= kTolerance * value) {
105 if (abs_diff >= kTolerance * max_val) {
106 return false;
107 }
108 }
109 }
110 else if (std::abs(ref_val) < std::abs(clamp_thresh)) {
111 if (val_diff >= kTolerance * value) {
112 return false;
113 }
114 }
115 else if (abs_diff >= kTolerance * max_val) {
116 return false;
117 }
118 }
119 return true;
120 }
121
showRtol(const at::Tensor & a,const at::Tensor & b)122 void showRtol(const at::Tensor& a, const at::Tensor& b) {
123 const auto diff = (a - b).abs();
124
125 float maxValue = a.abs().max().item<float>();
126 maxValue = fmax(b.abs().max().item<float>(), maxValue);
127
128 const float maxDiff = maxValue * kTolerance;
129 std::cout << "Max Diff allowed: " << maxDiff << std::endl;
130 if (diff.sizes().size() == 2) {
131 for (const auto y : c10::irange(diff.sizes()[0])) {
132 std::cout << y << ":";
133 for (const auto x : c10::irange(diff.sizes()[1])) {
134 float diff_xy = diff[y][x].item<float>();
135 if (diff_xy > maxDiff) {
136 std::cout << std::setw(5) << x;
137 }
138 else {
139 std::cout << std::setw(5) << " ";
140 }
141 }
142 std::cout << std::endl;
143 }
144 }
145 }
146
147
gen_allpermutations(std::vector<std::vector<int64_t>> & out,std::vector<int64_t> in,unsigned i)148 static void gen_allpermutations(std::vector<std::vector<int64_t>>& out, std::vector<int64_t> in, unsigned i) {
149 // generate all permutations of a given dims
150 if (i == in.size()) {
151 out.push_back(in);
152 }
153 else {
154 for (const auto j : c10::irange(i, in.size())) {
155 std::swap(in[i], in[j]);
156 gen_allpermutations(out, in, i + 1);
157 }
158 }
159 }
160
gen_all_subsets(std::vector<std::vector<int64_t>> & out,int64_t n,unsigned i,std::vector<int64_t> curr)161 static void gen_all_subsets(
162 std::vector<std::vector<int64_t>>& out,
163 int64_t n,
164 unsigned i,
165 std::vector<int64_t> curr) {
166 // generate all subsets of set {0,...,n - 1} through backtracking
167 if (i == n) {
168 out.push_back(curr);
169 } else {
170 curr.push_back(i);
171 gen_all_subsets(out, n, i + 1, curr);
172 curr.pop_back();
173 gen_all_subsets(out, n, i + 1, curr);
174 }
175 }
176
slice_test(const std::vector<int64_t> & size,int64_t dim,std::optional<int64_t> start,std::optional<int64_t> end,int64_t step)177 static void slice_test(
178 const std::vector<int64_t>& size,
179 int64_t dim,
180 std::optional<int64_t> start,
181 std::optional<int64_t> end,
182 int64_t step) {
183 // Arrange
184 const auto in_cpu = at::rand(size, at::device(at::kCPU).dtype(at::kFloat));
185 const auto in_vulkan = in_cpu.vulkan();
186
187 // Act
188 const auto out_cpu = at::slice(in_cpu, dim, start, end, step);
189 const auto out_vulkan = at::slice(in_vulkan, dim, start, end, step);
190
191 // Assert
192 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
193 if (!check) {
194 showRtol(out_cpu, out_vulkan.cpu());
195 }
196
197 ASSERT_TRUE(check);
198 }
199
slice_tests(const std::unordered_map<int64_t,std::vector<int64_t>> & dim2sizes)200 static void slice_tests(const std::unordered_map<int64_t, std::vector<int64_t>>& dim2sizes) {
201 for (const auto& dim2size : dim2sizes) {
202 slice_test(dim2size.second, dim2size.first, 10, 30, 1); // i.e., 4D tensor's equivalent indexing = [:,:,:,10:30:1]
203 slice_test(dim2size.second, dim2size.first, 10, 30, 7); // i.e., 4D tensor's equivalent indexing = [:,:,:,10:30:7]
204 slice_test(dim2size.second, dim2size.first, 10, 50, 2); // i.e., 4D tensor's equivalent indexing = [:,:,:,10:50:2] with end=out of range
205 slice_test(dim2size.second, dim2size.first, -60, 60, 2); // i.e., 4D tensor's equivalent indexing = [:,:,:,-60:60:2] with start/end=out of range
206 slice_test(dim2size.second, dim2size.first, -30, -10, 1); // i.e., 4D tensor's equivalent indexing = [:,:,:,-30:-10:1] with negative start/end
207 slice_test(dim2size.second, dim2size.first, 0, INT64_MAX, 1); // i.e., 4D 's equivalent indexing = [:,:,:,0:9223372036854775807:1] with end=INT64_MAX
208 slice_test(dim2size.second, dim2size.first, -10, INT64_MAX, 1); // i.e., 4D 's equivalent indexing = [:,:,:,-10:9223372036854775807:1] with negative start and end=INT64_MAX
209 // This triggers a SymInt assert since [-2^63, -2^62-1] range is reserved for packed symints
210 //slice_test(dim2size.second, dim2size.first, INT64_MIN, INT64_MAX, 1); // i.e., 4D 's equivalent indexing = [:,:,:,-9223372036854775808:9223372036854775807:1] with start=INT64_MIN and end=INT64_MAX
211 slice_test(dim2size.second, dim2size.first, {}, {}, 1); // i.e., 4D 's equivalent indexing = [:,:,:,::1] with empty start/end
212 }
213 }
214
clone_test(const std::vector<int64_t> & size,std::optional<at::MemoryFormat> optional_memory_format)215 static void clone_test(const std::vector<int64_t>& size, std::optional<at::MemoryFormat> optional_memory_format) {
216 // Arrange
217 const auto in_cpu = at::rand(size, at::device(at::kCPU).dtype(at::kFloat));
218 const auto in_vulkan = in_cpu.vulkan();
219
220 // Act
221 const auto out_cpu = at::clone(in_cpu, optional_memory_format);
222 const auto out_vulkan = at::clone(in_vulkan, optional_memory_format);
223
224 // Assert
225 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
226 if (!check) {
227 showRtol(out_cpu, out_vulkan.cpu());
228 }
229
230 ASSERT_TRUE(check);
231 }
232
233 template <class... Inputs>
makeStack(Inputs &&...inputs)234 inline std::vector<c10::IValue> makeStack(Inputs&&... inputs) {
235 return {std::forward<Inputs>(inputs)...};
236 }
237
238 template <class... Args>
callOpByHandle(const c10::OperatorHandle & op,Args...args)239 inline std::vector<c10::IValue> callOpByHandle(
240 const c10::OperatorHandle& op,
241 Args... args) {
242 auto stack = makeStack(std::forward<Args>(args)...);
243 c10::Dispatcher::singleton().callBoxed(op, &stack);
244 return stack;
245 }
246
247 template <class... Args>
callOpByName(const char * func_name,const char * overload_name,Args...args)248 inline std::vector<c10::IValue> callOpByName(
249 const char* func_name,
250 const char* overload_name,
251 Args... args) {
252 const std::optional<c10::OperatorHandle> op_handle =
253 c10::Dispatcher::singleton().findSchema({func_name, overload_name});
254 assert(op_handle.has_value());
255 return callOpByHandle(op_handle.value(), std::forward<Args>(args)...);
256 }
257
258 } // namespace
259
260 namespace {
261
262 class VulkanAPITest : public ::testing::Test {
263 public:
SetUp()264 void SetUp() {
265 if (!at::is_vulkan_available()) {
266 GTEST_SKIP() << "Vulkan is not available";
267 }
268 #if defined(USE_VULKAN_GPU_DIAGNOSTICS) && defined(__ANDROID__)
269 if (at::native::vulkan::api::context()->op_profiling_enabled()) {
270 at::native::vulkan::api::context()->reset_querypool();
271 }
272 #endif
273 }
274
TearDown()275 void TearDown() {
276 #if defined(USE_VULKAN_GPU_DIAGNOSTICS) && defined(__ANDROID__)
277 if (at::native::vulkan::api::context()->op_profiling_enabled()) {
278 try {
279 at::native::vulkan::api::context()->querypool().extract_results();
280 at::native::vulkan::api::context()->querypool().print_results();
281 } catch (const std::exception& e) {
282 std::cout << "Could not get querypool results!"
283 << " Reason: " << e.what() << std::endl;
284 }
285 }
286 #endif
287 }
288 };
289
TEST_F(VulkanAPITest,zero_size_tensor)290 TEST_F(VulkanAPITest, zero_size_tensor) {
291 auto cpu = at::rand({1, 0, 0}, at::device(at::kCPU).dtype(at::kFloat));
292 auto vk = cpu.vulkan();
293 auto out_vk = vk.cpu();
294 ASSERT_TRUE(at::equal(out_vk, cpu));
295 }
296
TEST_F(VulkanAPITest,zero_size_tensor_numel)297 TEST_F(VulkanAPITest, zero_size_tensor_numel) {
298 auto vk = at::rand({18, 0, 5}, at::device(at::kVulkan).dtype(at::kFloat));
299 ASSERT_TRUE(vk.numel() == 0);
300 }
301
TEST_F(VulkanAPITest,zero_dim_tensor_1)302 TEST_F(VulkanAPITest, zero_dim_tensor_1) {
303 auto cpu = at::rand({}, at::device(at::kCPU).dtype(at::kFloat));
304 auto vv = cpu.item<float>();
305
306 auto vk = cpu.vulkan();
307 auto out_vk = vk.cpu();
308 ASSERT_TRUE(almostEqual(cpu, out_vk));
309
310 auto vk_vv = out_vk.item<float>();
311 EXPECT_NEAR(vv, vk_vv, kTolerance);
312 }
313
TEST_F(VulkanAPITest,zero_dim_tensor_2)314 TEST_F(VulkanAPITest, zero_dim_tensor_2) {
315 float v = 3.14f;
316 auto cpu = at::zeros({}, at::device(at::kCPU).dtype(at::kFloat)) + v;
317 auto vk = at::zeros({}, at::device(at::kVulkan).dtype(at::kFloat)) + v;
318
319 ASSERT_TRUE(almostEqual(cpu, vk.cpu()));
320 }
321
TEST_F(VulkanAPITest,zero_dim_tensor_3)322 TEST_F(VulkanAPITest, zero_dim_tensor_3) {
323 auto vk = at::zeros({}, at::device(at::kVulkan).dtype(at::kFloat));
324
325 ASSERT_TRUE(vk.cpu().item<float>() == 0.0f);
326 }
327
TEST_F(VulkanAPITest,local_scalar_dense)328 TEST_F(VulkanAPITest, local_scalar_dense) {
329 float v = 8.31f;
330 // Force the zero-dim tensor to a non-zero constant v.
331 auto vk = at::zeros({}, at::device(at::kVulkan).dtype(at::kFloat)) + v;
332 c10::Scalar scalar = at::_local_scalar_dense(vk);
333 EXPECT_NEAR(v, scalar.toFloat(), kTolerance);
334 }
335
TEST_F(VulkanAPITest,copy_to_texture)336 TEST_F(VulkanAPITest, copy_to_texture) {
337 using namespace at::native::vulkan;
338 at::Tensor test_tensors[] = {
339 // 4D
340 at::rand({7, 17, 134, 213}, at::TensorOptions(at::kCPU).dtype(at::kFloat)),
341 // 3D
342 at::rand({67, 134, 213}, at::TensorOptions(at::kCPU).dtype(at::kFloat)),
343 // 2D
344 at::rand({229, 213}, at::TensorOptions(at::kCPU).dtype(at::kFloat)),
345 // 1D
346 at::rand({1902}, at::TensorOptions(at::kCPU).dtype(at::kFloat)),
347 };
348
349 for (auto in_cpu : test_tensors) {
350 at::Tensor in_vk_copied = in_cpu.vulkan();
351 at::Tensor out_copied = in_vk_copied.cpu();
352
353 const auto check_copy = almostEqual(out_copied, in_cpu);
354
355 if(!check_copy) {
356 std::cout << "Copy failed on size " << in_cpu.sizes()
357 << "with dtype" << in_cpu.dtype() << std::endl;
358 }
359
360 ASSERT_TRUE(check_copy);
361 }
362 }
363
test_copy_to_texture_bool(const at::IntArrayRef input_shape)364 void test_copy_to_texture_bool(const at::IntArrayRef input_shape) {
365 using namespace at::native::vulkan;
366 auto cpu = at::randint(0, 2, input_shape, at::TensorOptions(at::kCPU).dtype(at::kBool));
367 auto in_vulkan = cpu.vulkan();
368
369 auto out_vulkan = in_vulkan.cpu();
370 auto check = at::equal(cpu, out_vulkan.cpu());
371
372 if (!check) {
373 std::cout << "Copy texture to bool failed on input_shape " << input_shape << std::endl;
374 }
375 ASSERT_TRUE(check);
376 }
377
TEST_F(VulkanAPITest,copy_to_texture_bool_mul4_hw)378 TEST_F(VulkanAPITest, copy_to_texture_bool_mul4_hw) {
379 // Uses the shader: image_to_nchw_quantized_mul4 ((H * W) % 4 == 0)
380 // ch % 4 != 0, ch < 4
381 test_copy_to_texture_bool({5, 1, 2, 2});
382 test_copy_to_texture_bool({17, 2, 4, 2});
383 test_copy_to_texture_bool({9, 3, 3, 8});
384
385 // ch % 4 != 0, ch > 5
386 test_copy_to_texture_bool({7, 17, 4, 8});
387 test_copy_to_texture_bool({8, 6, 2, 4});
388 test_copy_to_texture_bool({13, 31, 4, 57});
389
390 // 3d, 2d, 1d
391 test_copy_to_texture_bool({17, 31, 4});
392 test_copy_to_texture_bool({64, 16});
393 test_copy_to_texture_bool({8});
394 }
395
TEST_F(VulkanAPITest,copy_to_texture_bool_mul4_chw)396 TEST_F(VulkanAPITest, copy_to_texture_bool_mul4_chw) {
397 // Uses the shader: image_to_nchw_quantized_mul4 ((H * W) % 4 == 0)
398 // ch % 4 == 0
399 test_copy_to_texture_bool({5, 16, 2, 16});
400 test_copy_to_texture_bool({8, 8, 2, 2});
401 test_copy_to_texture_bool({16, 31, 4});
402 }
403
TEST_F(VulkanAPITest,copy_to_texture_bool)404 TEST_F(VulkanAPITest, copy_to_texture_bool) {
405 // Uses the shader: image_to_nchw_uint ((H * W) % 4 != 0)
406 test_copy_to_texture_bool({13, 1, 3, 5});
407 test_copy_to_texture_bool({13, 7, 1, 5});
408 test_copy_to_texture_bool({13, 8, 2, 5});
409 test_copy_to_texture_bool({13, 31, 2, 57});
410
411 test_copy_to_texture_bool({67, 19, 7});
412 test_copy_to_texture_bool({229, 213});
413 test_copy_to_texture_bool({1902});
414 }
415
TEST_F(VulkanAPITest,adaptive_avg_pool2d)416 TEST_F(VulkanAPITest, adaptive_avg_pool2d) {
417 c10::InferenceMode mode;
418
419 const auto in_cpu = at::rand({5, 7, 47, 31}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
420 const auto out_cpu = at::adaptive_avg_pool2d(in_cpu, {3, 3});
421 const auto out_vulkan = at::adaptive_avg_pool2d(in_cpu.vulkan(), {3, 3});
422
423 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
424 if (!check) {
425 showRtol(out_cpu, out_vulkan.cpu());
426 }
427
428 ASSERT_TRUE(check);
429 }
430
test_add(const at::IntArrayRef input_shape,const at::IntArrayRef other_shape,float alpha)431 void test_add(const at::IntArrayRef input_shape, const at::IntArrayRef other_shape, float alpha) {
432 const auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
433 const auto other_cpu = at::rand(other_shape, at::device(at::kCPU).dtype(at::kFloat));
434
435 const auto in_vulkan = in_cpu.vulkan();
436 const auto other_vulkan = other_cpu.vulkan();
437
438 const auto out_cpu = at::add(in_cpu, other_cpu, alpha);
439 const auto out_vulkan = at::add(in_vulkan, other_vulkan, alpha);
440
441 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
442 if (!check) {
443 showRtol(out_cpu, out_vulkan.cpu());
444 }
445
446 ASSERT_TRUE(check);
447 }
448
TEST_F(VulkanAPITest,add_invalid_inputs)449 TEST_F(VulkanAPITest, add_invalid_inputs) {
450 // Incompatible dimensions for broadcasting for binary elementwise op
451 auto in_cpu = at::rand({2, 3, 4, 5}, at::device(at::kCPU).dtype(at::kFloat));
452 auto other_cpu = at::rand({2, 4, 4, 5}, at::device(at::kCPU).dtype(at::kFloat));
453
454 EXPECT_THROW(at::add(in_cpu.vulkan(), other_cpu.vulkan(), 1.0f), ::std::exception);
455 }
456
TEST_F(VulkanAPITest,add)457 TEST_F(VulkanAPITest, add) {
458 test_add({2, 3}, {2, 3}, 1.0f);
459 test_add({11, 7, 139, 109}, {11, 7, 139, 109}, 2.1f);
460 }
461
TEST_F(VulkanAPITest,add_broadcast0)462 TEST_F(VulkanAPITest, add_broadcast0) {
463 test_add({3, 5, 179, 221}, {3, 5, 1, 1}, 1.8f);
464 }
465
TEST_F(VulkanAPITest,add_broadcast1)466 TEST_F(VulkanAPITest, add_broadcast1) {
467 test_add({3, 5, 179, 221}, {3, 5, 1, 221}, 1.8f);
468 }
469
TEST_F(VulkanAPITest,add_broadcast2)470 TEST_F(VulkanAPITest, add_broadcast2) {
471 test_add({3, 4, 179, 221}, {4, 1, 1}, 2.5f);
472 }
473
TEST_F(VulkanAPITest,add_broadcast3)474 TEST_F(VulkanAPITest, add_broadcast3) {
475 test_add({3, 4, 41, 53}, {1, 1, 41, 53}, 2.5f);
476 }
477
TEST_F(VulkanAPITest,add_broadcast4)478 TEST_F(VulkanAPITest, add_broadcast4) {
479 test_add({3, 4, 41, 1}, {1, 41, 53}, 2.5f);
480 }
481
TEST_F(VulkanAPITest,add_broadcast5)482 TEST_F(VulkanAPITest, add_broadcast5) {
483 test_add({2, 1, 7, 1}, {1, 5, 1, 4}, 1.2f);
484 }
485
TEST_F(VulkanAPITest,add_broadcast6)486 TEST_F(VulkanAPITest, add_broadcast6) {
487 test_add({1, 15, 5, 4}, {21, 1, 5, 4}, 1.8f);
488 }
489
TEST_F(VulkanAPITest,add_zero_dim)490 TEST_F(VulkanAPITest, add_zero_dim) {
491 test_add({2, 6, 5, 6}, {}, 1.5f);
492 }
493
test_add_other_cpu_int(const at::IntArrayRef input_shape,const at::IntArrayRef other_shape,float alpha)494 void test_add_other_cpu_int(
495 const at::IntArrayRef input_shape,
496 const at::IntArrayRef other_shape,
497 float alpha) {
498 const auto in_cpu =
499 at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
500 const auto other_cpu =
501 (at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat)) * 100)
502 .to(at::kInt);
503
504 const auto in_vulkan = in_cpu.vulkan();
505
506 const auto out_cpu = at::add(in_cpu, other_cpu, alpha);
507 const auto out_vulkan = at::add(in_vulkan, other_cpu, alpha);
508
509 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
510 if (!check) {
511 showRtol(out_cpu, out_vulkan.cpu());
512 }
513
514 ASSERT_TRUE(check);
515 }
516
TEST_F(VulkanAPITest,add_other_cpu_int)517 TEST_F(VulkanAPITest, add_other_cpu_int) {
518 test_add_other_cpu_int({2, 3}, {2, 3}, 1.0f);
519 test_add_other_cpu_int({11, 7, 139, 109}, {11, 7, 139, 109}, 2.1f);
520 }
521
TEST_F(VulkanAPITest,add_broadcast0_other_cpu_int)522 TEST_F(VulkanAPITest, add_broadcast0_other_cpu_int) {
523 test_add_other_cpu_int({3, 5, 179, 221}, {3, 5, 1, 1}, 1.8f);
524 }
525
TEST_F(VulkanAPITest,add_other_cpu_unsupported_type_should_fail)526 TEST_F(VulkanAPITest, add_other_cpu_unsupported_type_should_fail) {
527 const auto in_cpu = at::rand({2,2,2}, at::device(at::kCPU).dtype(at::kFloat));
528
529 const auto other_cpu =
530 at::zeros({2, 2, 2}, at::device(at::kCPU).dtype(at::kComplexFloat));
531
532 EXPECT_THROW(at::add(in_cpu.vulkan(), other_cpu.vulkan(), 1.0f), ::std::exception);
533 }
534
TEST_F(VulkanAPITest,add_)535 TEST_F(VulkanAPITest, add_) {
536 auto a_cpu = at::rand({61, 17, 29, 83}, at::device(at::kCPU).dtype(at::kFloat));
537 auto a_vulkan = a_cpu.vulkan();
538
539 const auto b_cpu = at::rand({61, 17, 29, 83}, at::device(at::kCPU).dtype(at::kFloat));
540 const auto b_vulkan = b_cpu.vulkan();
541
542 a_cpu.add_(b_cpu, 2.1f);
543 a_vulkan.add_(b_vulkan, 2.1f);
544
545 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
546 if (!check) {
547 showRtol(a_cpu, a_vulkan.cpu());
548 }
549
550 ASSERT_TRUE(check);
551 }
552
TEST_F(VulkanAPITest,add_broadcast0_)553 TEST_F(VulkanAPITest, add_broadcast0_) {
554 auto a_cpu = at::rand({16, 17, 29, 83}, at::device(at::kCPU).dtype(at::kFloat));
555 auto a_vulkan = a_cpu.vulkan();
556
557 const auto b_cpu = at::rand({16, 17, 29, 1}, at::device(at::kCPU).dtype(at::kFloat));
558 const auto b_vulkan = b_cpu.vulkan();
559
560 a_cpu.add_(b_cpu, 2.1f);
561 a_vulkan.add_(b_vulkan, 2.1f);
562
563 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
564 if (!check) {
565 showRtol(a_cpu, a_vulkan.cpu());
566 }
567
568 ASSERT_TRUE(check);
569 }
570
TEST_F(VulkanAPITest,add_other_cpu_int_)571 TEST_F(VulkanAPITest, add_other_cpu_int_) {
572 std::vector<int64_t> input_shape{12, 17, 29, 33};
573 const auto in_cpu =
574 at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
575 const auto other_cpu =
576 (at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat)) * 100)
577 .to(at::kInt);
578
579 const auto in_vulkan = in_cpu.vulkan();
580
581 float alpha = -8.31f;
582 in_cpu.add(other_cpu, alpha);
583 in_vulkan.add(other_cpu, alpha);
584
585 const auto check = almostEqual(in_cpu, in_vulkan.cpu());
586 if (!check) {
587 showRtol(in_cpu, in_vulkan.cpu());
588 }
589 }
590
TEST_F(VulkanAPITest,add_broadcast1_)591 TEST_F(VulkanAPITest, add_broadcast1_) {
592 auto a_cpu = at::rand({3, 8, 29, 83}, at::device(at::kCPU).dtype(at::kFloat));
593 auto a_vulkan = a_cpu.vulkan();
594
595 const auto b_cpu = at::rand({3, 8, 1, 1}, at::device(at::kCPU).dtype(at::kFloat));
596 const auto b_vulkan = b_cpu.vulkan();
597
598 a_cpu.add_(b_cpu, 2.1f);
599 a_vulkan.add_(b_vulkan, 2.1f);
600
601 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
602 if (!check) {
603 showRtol(b_cpu, b_vulkan.cpu());
604 }
605
606 ASSERT_TRUE(check);
607 }
608
TEST_F(VulkanAPITest,add_scalar)609 TEST_F(VulkanAPITest, add_scalar) {
610 const auto a_cpu = at::rand({13, 23, 59, 73}, at::device(at::kCPU).dtype(at::kFloat));
611 const auto a_vulkan = a_cpu.vulkan();
612
613 const float b_scalar = 3.1415f;
614
615 const auto c_cpu = at::add(a_cpu, b_scalar, 2.1f);
616 const auto c_vulkan = at::add(a_vulkan, b_scalar, 2.1f);
617
618 const auto check = almostEqual(c_cpu, c_vulkan.cpu());
619 if (!check) {
620 showRtol(c_cpu, c_vulkan.cpu());
621 }
622
623 ASSERT_TRUE(check);
624 }
625
TEST_F(VulkanAPITest,add_scalar_)626 TEST_F(VulkanAPITest, add_scalar_) {
627 auto a_cpu = at::rand({47, 2, 23, 97}, at::device(at::kCPU).dtype(at::kFloat));
628 auto a_vulkan = a_cpu.vulkan();
629
630 const float b_scalar = 3.1415f;
631
632 a_cpu.add_(b_scalar, 2.1f);
633 a_vulkan.add_(b_scalar, 2.1f);
634
635 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
636 if (!check) {
637 showRtol(a_cpu, a_vulkan.cpu());
638 }
639
640 ASSERT_TRUE(check);
641 }
642
TEST_F(VulkanAPITest,add_scalar_wrapped)643 TEST_F(VulkanAPITest, add_scalar_wrapped) {
644 if (!at::is_vulkan_available()) {
645 return;
646 }
647
648 const auto a_cpu = at::rand({13, 23, 59, 73}, at::device(at::kCPU).dtype(at::kFloat));
649 const auto a_vulkan = a_cpu.vulkan();
650
651 const auto b_scalar = at::rand({1}, at::device(at::kCPU).dtype(at::kFloat));
652
653 const auto c_cpu = at::add(a_cpu, b_scalar, 2.1f);
654 const auto c_vulkan = at::add(a_vulkan, b_scalar, 2.1f);
655
656 const auto check = almostEqual(c_cpu, c_vulkan.cpu());
657 if (!check) {
658 showRtol(c_cpu, c_vulkan.cpu());
659 }
660
661 ASSERT_TRUE(check);
662 }
663
TEST_F(VulkanAPITest,add_scalar_wrapped_)664 TEST_F(VulkanAPITest, add_scalar_wrapped_) {
665 if (!at::is_vulkan_available()) {
666 return;
667 }
668
669 auto a_cpu = at::rand({47, 2, 23, 97}, at::device(at::kCPU).dtype(at::kFloat));
670 auto a_vulkan = a_cpu.vulkan();
671
672 const auto b_scalar = at::rand({1}, at::device(at::kCPU).dtype(at::kFloat));
673
674 a_cpu.add_(b_scalar, 2.1f);
675 a_vulkan.add_(b_scalar, 2.1f);
676
677 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
678 if (!check) {
679 showRtol(a_cpu, a_vulkan.cpu());
680 }
681
682 ASSERT_TRUE(check);
683 }
684
TEST_F(VulkanAPITest,add_to_scalar_wrapped)685 TEST_F(VulkanAPITest, add_to_scalar_wrapped) {
686 if (!at::is_vulkan_available()) {
687 return;
688 }
689
690 const auto a = at::rand({1}, at::device(at::kCPU).dtype(at::kFloat));
691
692 const auto b_cpu = at::rand({11, 7, 139, 109}, at::device(at::kCPU).dtype(at::kFloat));
693 const auto b_vulkan = b_cpu.vulkan();
694
695 const auto c_cpu = at::add(a, b_cpu, 2.1f);
696 const auto c_vulkan = at::add(a, b_vulkan, 2.1f);
697
698 const auto check = almostEqual(c_cpu, c_vulkan.cpu());
699 if (!check) {
700 showRtol(c_cpu, c_vulkan.cpu());
701 }
702
703 ASSERT_TRUE(check);
704 }
705
TEST_F(VulkanAPITest,addmm)706 TEST_F(VulkanAPITest, addmm) {
707 constexpr float alpha = 2.1f;
708 constexpr float beta = 103.24;
709
710 const auto bias_cpu = at::rand({179, 163}, at::device(at::kCPU).dtype(at::kFloat));
711 const auto m1_cpu = at::rand({179, 67}, at::device(at::kCPU).dtype(at::kFloat));
712 const auto m2_cpu = at::rand({67, 163}, at::device(at::kCPU).dtype(at::kFloat));
713 const auto out_cpu = at::addmm(bias_cpu, m1_cpu, m2_cpu, beta, alpha);
714
715 const auto m1_vulkan = m1_cpu.vulkan();
716 const auto out_vulkan = at::addmm(bias_cpu, m1_vulkan, m2_cpu, beta, alpha);
717
718 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
719 if (!check) {
720 showRtol(out_cpu, out_vulkan.cpu());
721 }
722
723 ASSERT_TRUE(check);
724 }
725
TEST_F(VulkanAPITest,addmm_expand)726 TEST_F(VulkanAPITest, addmm_expand) {
727 constexpr float alpha = 2.1f;
728 constexpr float beta = 103.24;
729
730 const auto bias_cpu = at::rand({1000}, at::device(at::kCPU).dtype(at::kFloat));
731 const auto m1_cpu = at::rand({1, 1280}, at::device(at::kCPU).dtype(at::kFloat));
732 const auto m2_cpu = at::rand({1280, 1000}, at::device(at::kCPU).dtype(at::kFloat));
733 const auto out_cpu = at::addmm(bias_cpu, m1_cpu, m2_cpu, beta, alpha);
734
735 const auto m1_vulkan = m1_cpu.vulkan();
736 const auto out_vulkan = at::addmm(bias_cpu, m1_vulkan, m2_cpu, beta, alpha);
737
738 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
739 if (!check) {
740 showRtol(out_cpu, out_vulkan.cpu());
741 }
742
743 ASSERT_TRUE(check);
744 }
745
TEST_F(VulkanAPITest,addmm_expand2)746 TEST_F(VulkanAPITest, addmm_expand2) {
747 constexpr float alpha = 2.1f;
748 constexpr float beta = 103.24;
749
750 const auto bias_cpu = at::rand({9}, at::device(at::kCPU).dtype(at::kFloat));
751 const auto m1_cpu = at::rand({17, 6}, at::device(at::kCPU).dtype(at::kFloat));
752 const auto m2_cpu = at::rand({6, 9}, at::device(at::kCPU).dtype(at::kFloat));
753 const auto out_cpu = at::addmm(bias_cpu, m1_cpu, m2_cpu, beta, alpha);
754
755 const auto m1_vulkan = m1_cpu.vulkan();
756 const auto out_vulkan = at::addmm(bias_cpu, m1_vulkan, m2_cpu, beta, alpha);
757
758 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
759 if (!check) {
760 showRtol(out_cpu, out_vulkan.cpu());
761 }
762
763 ASSERT_TRUE(check);
764 }
765
TEST_F(VulkanAPITest,addmm_error_bias)766 TEST_F(VulkanAPITest, addmm_error_bias) {
767 constexpr float alpha = 2.1f;
768 constexpr float beta = 103.24;
769
770 // mismatched bias size (should be 1-dim or {17, 9})
771 const auto bias_cpu = at::rand({5, 5}, at::device(at::kCPU).dtype(at::kFloat));
772 const auto m1_cpu = at::rand({17, 6}, at::device(at::kCPU).dtype(at::kFloat));
773 const auto m2_cpu = at::rand({6, 9}, at::device(at::kCPU).dtype(at::kFloat));
774 const auto m1_vulkan = m1_cpu.vulkan();
775 EXPECT_THROW(at::addmm(bias_cpu, m1_vulkan, m2_cpu, beta, alpha), ::std::exception);
776 }
777
TEST_F(VulkanAPITest,avg_pool2d)778 TEST_F(VulkanAPITest, avg_pool2d) {
779 const auto in_cpu = at::rand({3, 19, 43, 79}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
780 const auto out_cpu = at::avg_pool2d(in_cpu, {5, 3}, {1, 2}, {2, 0}, true);
781 const auto out_vulkan = at::avg_pool2d(in_cpu.vulkan(), {5, 3}, {1, 2}, {2, 0}, true);
782
783 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
784 if (!check) {
785 showRtol(out_cpu, out_vulkan.cpu());
786 }
787
788 ASSERT_TRUE(check);
789 }
790
TEST_F(VulkanAPITest,DISABLED_batch_norm_invalid_inputs)791 TEST_F(VulkanAPITest, DISABLED_batch_norm_invalid_inputs) {
792 c10::InferenceMode mode;
793
794 // Act: Vulkan batchnorm only supports evaluation mode
795 EXPECT_THROW({
796 at::batch_norm(
797 at::rand({3, 8, 5, 7}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
798 at::rand({8}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
799 at::rand({8}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
800 at::rand({8}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
801 at::rand({8}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
802 true,
803 0.1,
804 1e-05,
805 false);
806 }, ::std::exception);
807
808 // Act: Vulkan batchnorm expects 4-dim input
809 EXPECT_THROW({
810 at::batch_norm(
811 at::rand({3, 8, 5}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
812 at::rand({8}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
813 at::rand({8}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
814 at::rand({8}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
815 at::rand({8}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
816 false,
817 0.1,
818 1e-05,
819 false);
820 }, ::std::exception);
821
822 // Act: Vulkan batchnorm expects 4-dim input
823 EXPECT_THROW({
824 at::batch_norm(
825 at::rand({2, 8, 3, 5, 7}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
826 at::rand({8}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
827 at::rand({8}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
828 at::rand({8}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
829 at::rand({8}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
830 false,
831 0.1,
832 1e-05,
833 false);
834 }, ::std::exception);
835
836 // Act: Vulkan batchnorm expects channel dim to be multiple of 4
837 EXPECT_THROW({
838 at::batch_norm(
839 at::rand({4, 7, 4, 4}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
840 at::rand({7}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
841 at::rand({7}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
842 at::rand({7}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
843 at::rand({7}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
844 false,
845 0.1,
846 1e-05,
847 false);
848 }, ::std::exception);
849
850 // Act: weight tensor contains incorrect number of elements
851 EXPECT_THROW({
852 at::batch_norm(
853 at::rand({4, 8, 4, 4}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
854 at::rand({12}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
855 at::rand({8}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
856 at::rand({8}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
857 at::rand({8}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
858 false,
859 0.1,
860 1e-05,
861 false);
862 }, ::std::exception);
863
864 // Act: bias tensor contains incorrect number of elements
865 EXPECT_THROW({
866 at::batch_norm(
867 at::rand({4, 8, 4, 4}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
868 at::rand({8}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
869 at::rand({12}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
870 at::rand({8}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
871 at::rand({8}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
872 false,
873 0.1,
874 1e-05,
875 false);
876 }, ::std::exception);
877
878 // Act: running mean tensor contains incorrect number of elements
879 EXPECT_THROW({
880 at::batch_norm(
881 at::rand({4, 8, 4, 4}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
882 at::rand({8}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
883 at::rand({8}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
884 at::rand({12}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
885 at::rand({8}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
886 false,
887 0.1,
888 1e-05,
889 false);
890 }, ::std::exception);
891
892 // Act: running var tensor contains incorrect number of elements
893 EXPECT_THROW({
894 at::batch_norm(
895 at::rand({4, 8, 4, 4}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
896 at::rand({8}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
897 at::rand({8}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
898 at::rand({8}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
899 at::rand({12}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
900 false,
901 0.1,
902 1e-05,
903 false);
904 }, ::std::exception);
905 }
906
TEST_F(VulkanAPITest,batch_norm_small)907 TEST_F(VulkanAPITest, batch_norm_small) {
908 c10::InferenceMode mode;
909
910 const auto input_cpu = at::rand({1, 4, 1, 1}, at::device(at::kCPU).dtype(at::kFloat));
911 const auto input_vulkan = input_cpu.vulkan();
912
913 const auto weight_cpu = at::rand({4}, at::device(at::kCPU).dtype(at::kFloat));
914 const auto weight_vulkan = weight_cpu.vulkan();
915
916 const auto bias_cpu = at::rand({4}, at::device(at::kCPU).dtype(at::kFloat));
917 const auto bias_vulkan = bias_cpu.vulkan();
918
919 const auto running_mean_cpu = at::rand({4}, at::device(at::kCPU).dtype(at::kFloat));
920 const auto running_mean_vulkan = running_mean_cpu.vulkan();
921
922 const auto running_var_cpu = at::rand({4}, at::device(at::kCPU).dtype(at::kFloat));
923 const auto running_var_vulkan = running_var_cpu.vulkan();
924
925 const auto output_cpu = at::batch_norm(input_cpu, weight_cpu, bias_cpu, running_mean_cpu, running_var_cpu, false, 0.1, 1e-05, false);
926 const auto output_vulkan = at::batch_norm(input_vulkan, weight_vulkan, bias_vulkan, running_mean_vulkan, running_var_vulkan, false, 0.1, 1e-05, false);
927
928 const auto check = almostEqual(output_cpu, output_vulkan.cpu());
929 if (!check) {
930 showRtol(output_cpu, output_vulkan.cpu());
931 }
932
933 ASSERT_TRUE(check);
934 }
935
TEST_F(VulkanAPITest,batch_norm_medium)936 TEST_F(VulkanAPITest, batch_norm_medium) {
937 c10::InferenceMode mode;
938
939 const auto input_cpu = at::rand({3, 8, 5, 7}, at::device(at::kCPU).dtype(at::kFloat));
940 const auto input_vulkan = input_cpu.vulkan();
941
942 const auto weight_cpu = at::rand({8}, at::device(at::kCPU).dtype(at::kFloat));
943 const auto weight_vulkan = weight_cpu.vulkan();
944
945 const auto bias_cpu = at::rand({8}, at::device(at::kCPU).dtype(at::kFloat));
946 const auto bias_vulkan = bias_cpu.vulkan();
947
948 const auto running_mean_cpu = at::rand({8}, at::device(at::kCPU).dtype(at::kFloat));
949 const auto running_mean_vulkan = running_mean_cpu.vulkan();
950
951 const auto running_var_cpu = at::rand({8}, at::device(at::kCPU).dtype(at::kFloat));
952 const auto running_var_vulkan = running_var_cpu.vulkan();
953
954 const auto output_cpu = at::batch_norm(input_cpu, weight_cpu, bias_cpu, running_mean_cpu, running_var_cpu, false, 0.1, 1e-05, false);
955 const auto output_vulkan = at::batch_norm(input_vulkan, weight_vulkan, bias_vulkan, running_mean_vulkan, running_var_vulkan, false, 0.1, 1e-05, false);
956
957 const auto check = almostEqual(output_cpu, output_vulkan.cpu());
958 if (!check) {
959 showRtol(output_cpu, output_vulkan.cpu());
960 }
961
962 ASSERT_TRUE(check);
963 }
964
TEST_F(VulkanAPITest,batch_norm_large)965 TEST_F(VulkanAPITest, batch_norm_large) {
966 c10::InferenceMode mode;
967
968
969 const auto input_cpu = at::rand({11, 52, 139, 109}, at::device(at::kCPU).dtype(at::kFloat));
970 const auto input_vulkan = input_cpu.vulkan();
971
972 const auto weight_cpu = at::rand({52}, at::device(at::kCPU).dtype(at::kFloat));
973 const auto weight_vulkan = weight_cpu.vulkan();
974
975 const auto bias_cpu = at::rand({52}, at::device(at::kCPU).dtype(at::kFloat));
976 const auto bias_vulkan = bias_cpu.vulkan();
977
978 const auto running_mean_cpu = at::rand({52}, at::device(at::kCPU).dtype(at::kFloat));
979 const auto running_mean_vulkan = running_mean_cpu.vulkan();
980
981 const auto running_var_cpu = at::rand({52}, at::device(at::kCPU).dtype(at::kFloat));
982 const auto running_var_vulkan = running_var_cpu.vulkan();
983
984 const auto output_cpu = at::batch_norm(input_cpu, weight_cpu, bias_cpu, running_mean_cpu, running_var_cpu, false, 0.1, 1e-05, false);
985 const auto output_vulkan = at::batch_norm(input_vulkan, weight_vulkan, bias_vulkan, running_mean_vulkan, running_var_vulkan, false, 0.1, 1e-05, false);
986
987 const auto check = almostEqual(output_cpu, output_vulkan.cpu());
988 if (!check) {
989 showRtol(output_cpu, output_vulkan.cpu());
990 }
991
992 ASSERT_TRUE(check);
993 }
994
test_baddbmm(at::Tensor bias_cpu,at::Tensor m1_cpu,at::Tensor m2_cpu,float beta,float alpha)995 void test_baddbmm(
996 at::Tensor bias_cpu,
997 at::Tensor m1_cpu,
998 at::Tensor m2_cpu,
999 float beta,
1000 float alpha) {
1001 const auto out_cpu = at::baddbmm(bias_cpu, m1_cpu, m2_cpu, beta, alpha);
1002
1003 const auto m1_vulkan = m1_cpu.vulkan();
1004 const auto out_vulkan =
1005 at::baddbmm(bias_cpu, m1_vulkan, m2_cpu.vulkan(), beta, alpha);
1006
1007 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
1008 if (!check) {
1009 showRtol(out_cpu, out_vulkan.cpu());
1010 }
1011
1012 ASSERT_TRUE(check);
1013 }
1014
TEST_F(VulkanAPITest,baddbmm)1015 TEST_F(VulkanAPITest, baddbmm) {
1016 constexpr float alpha = 1.5f;
1017 constexpr float beta = 2.0f;
1018 int batch = 9;
1019 int n = 10;
1020 int p = 41;
1021 int m = 13;
1022
1023 const auto bias_cpu =
1024 at::rand({batch, n, m}, at::device(at::kCPU).dtype(at::kFloat));
1025 const auto m1_cpu =
1026 at::rand({batch, n, p}, at::device(at::kCPU).dtype(at::kFloat));
1027 const auto m2_cpu =
1028 at::rand({batch, p, m}, at::device(at::kCPU).dtype(at::kFloat));
1029
1030 test_baddbmm(bias_cpu, m1_cpu, m2_cpu, beta, alpha);
1031 }
1032
TEST_F(VulkanAPITest,baddbmm_small)1033 TEST_F(VulkanAPITest, baddbmm_small) {
1034 constexpr float alpha = -1.0f;
1035 constexpr float beta = 2.0f;
1036 int batch = 3;
1037 int n = 3;
1038 int p = 5;
1039 int m = 4;
1040
1041 const auto bias_cpu_0 =
1042 at::rand({1, n, m}, at::device(at::kCPU).dtype(at::kFloat));
1043 const auto bias_cpu_1 =
1044 at::ones({1, n, m}, at::device(at::kCPU).dtype(at::kFloat));
1045 const auto bias_cpu_2 =
1046 at::rand({1, n, m}, at::device(at::kCPU).dtype(at::kFloat)) * -1;
1047 const auto bias_cpu = at::cat({bias_cpu_0, bias_cpu_1, bias_cpu_2}, 0);
1048
1049 const auto m1_cpu =
1050 at::rand({batch, n, p}, at::device(at::kCPU).dtype(at::kFloat));
1051 const auto m2_cpu =
1052 at::rand({batch, p, m}, at::device(at::kCPU).dtype(at::kFloat));
1053
1054 test_baddbmm(bias_cpu, m1_cpu, m2_cpu, beta, alpha);
1055 }
1056
TEST_F(VulkanAPITest,baddbmm_one)1057 TEST_F(VulkanAPITest, baddbmm_one) {
1058 constexpr float alpha = 2.1f;
1059 constexpr float beta = 103.24;
1060
1061 const auto bias_cpu =
1062 at::rand({1, 1, 1}, at::device(at::kCPU).dtype(at::kFloat));
1063 const auto m1_cpu =
1064 at::rand({1, 1, 1}, at::device(at::kCPU).dtype(at::kFloat));
1065 const auto m2_cpu =
1066 at::rand({1, 1, 1}, at::device(at::kCPU).dtype(at::kFloat));
1067
1068 test_baddbmm(bias_cpu, m1_cpu, m2_cpu, beta, alpha);
1069 }
1070
TEST_F(VulkanAPITest,baddbmm_bais_error)1071 TEST_F(VulkanAPITest, baddbmm_bais_error) {
1072 constexpr float alpha = 2.1f;
1073 constexpr float beta = 103.24;
1074
1075 // mismatched dimensions of batch sizes.
1076 const auto bias_cpu =
1077 at::rand({200, 179, 163}, at::device(at::kCPU).dtype(at::kFloat));
1078 const auto m1_cpu =
1079 at::rand({150, 179, 67}, at::device(at::kCPU).dtype(at::kFloat));
1080 const auto m2_cpu =
1081 at::rand({150, 67, 163}, at::device(at::kCPU).dtype(at::kFloat));
1082 const auto m1_vulkan = m1_cpu.vulkan();
1083 EXPECT_THROW(
1084 at::baddbmm(bias_cpu, m1_vulkan, m2_cpu, beta, alpha), ::std::exception);
1085 }
1086
TEST_F(VulkanAPITest,baddbmm_bias_boardcast_batch)1087 TEST_F(VulkanAPITest, baddbmm_bias_boardcast_batch) {
1088 constexpr float alpha = 1.5f;
1089 constexpr float beta = 2.0f;
1090 const auto bias_cpu =
1091 at::rand({1, 179, 163}, at::device(at::kCPU).dtype(at::kFloat));
1092 const auto m1_cpu =
1093 at::rand({150, 179, 67}, at::device(at::kCPU).dtype(at::kFloat));
1094 const auto m2_cpu =
1095 at::rand({150, 67, 163}, at::device(at::kCPU).dtype(at::kFloat));
1096 test_baddbmm(bias_cpu, m1_cpu, m2_cpu, beta, alpha);
1097 }
1098
TEST_F(VulkanAPITest,baddbmm_bias_boardcast_height)1099 TEST_F(VulkanAPITest, baddbmm_bias_boardcast_height) {
1100 constexpr float alpha = 1.5f;
1101 constexpr float beta = 2.0f;
1102 const auto bias_cpu =
1103 at::rand({150, 1, 163}, at::device(at::kCPU).dtype(at::kFloat));
1104 const auto m1_cpu =
1105 at::rand({150, 179, 67}, at::device(at::kCPU).dtype(at::kFloat));
1106 const auto m2_cpu =
1107 at::rand({150, 67, 163}, at::device(at::kCPU).dtype(at::kFloat));
1108 test_baddbmm(bias_cpu, m1_cpu, m2_cpu, beta, alpha);
1109 }
1110
TEST_F(VulkanAPITest,baddbmm_bias_boardcast_width)1111 TEST_F(VulkanAPITest, baddbmm_bias_boardcast_width) {
1112 constexpr float alpha = 1.5f;
1113 constexpr float beta = 2.0f;
1114 const auto bias_cpu =
1115 at::rand({150, 179, 1}, at::device(at::kCPU).dtype(at::kFloat));
1116 const auto m1_cpu =
1117 at::rand({150, 179, 67}, at::device(at::kCPU).dtype(at::kFloat));
1118 const auto m2_cpu =
1119 at::rand({150, 67, 163}, at::device(at::kCPU).dtype(at::kFloat));
1120 test_baddbmm(bias_cpu, m1_cpu, m2_cpu, beta, alpha);
1121 }
1122
TEST_F(VulkanAPITest,baddbmm_bias_boardcast_batch_width)1123 TEST_F(VulkanAPITest, baddbmm_bias_boardcast_batch_width) {
1124 constexpr float alpha = 1.5f;
1125 constexpr float beta = 2.0f;
1126 const auto bias_cpu =
1127 at::rand({1, 179, 1}, at::device(at::kCPU).dtype(at::kFloat));
1128 const auto m1_cpu =
1129 at::rand({150, 179, 67}, at::device(at::kCPU).dtype(at::kFloat));
1130 const auto m2_cpu =
1131 at::rand({150, 67, 163}, at::device(at::kCPU).dtype(at::kFloat));
1132 test_baddbmm(bias_cpu, m1_cpu, m2_cpu, beta, alpha);
1133 }
1134
TEST_F(VulkanAPITest,baddbmm_bias_boardcast_batch_height)1135 TEST_F(VulkanAPITest, baddbmm_bias_boardcast_batch_height) {
1136 constexpr float alpha = 1.5f;
1137 constexpr float beta = 2.0f;
1138 const auto bias_cpu =
1139 at::rand({1, 1, 163}, at::device(at::kCPU).dtype(at::kFloat));
1140 const auto m1_cpu =
1141 at::rand({150, 179, 67}, at::device(at::kCPU).dtype(at::kFloat));
1142 const auto m2_cpu =
1143 at::rand({150, 67, 163}, at::device(at::kCPU).dtype(at::kFloat));
1144 test_baddbmm(bias_cpu, m1_cpu, m2_cpu, beta, alpha);
1145 }
1146
TEST_F(VulkanAPITest,baddbmm_bias_boardcast_one)1147 TEST_F(VulkanAPITest, baddbmm_bias_boardcast_one) {
1148 constexpr float alpha = 1.5f;
1149 constexpr float beta = 2.0f;
1150 const auto bias_cpu =
1151 at::rand({1, 1, 1}, at::device(at::kCPU).dtype(at::kFloat));
1152 const auto m1_cpu =
1153 at::rand({150, 179, 67}, at::device(at::kCPU).dtype(at::kFloat));
1154 const auto m2_cpu =
1155 at::rand({150, 67, 163}, at::device(at::kCPU).dtype(at::kFloat));
1156 test_baddbmm(bias_cpu, m1_cpu, m2_cpu, beta, alpha);
1157 }
1158
TEST_F(VulkanAPITest,baddbmm_bias_boardcast_reduce_batch)1159 TEST_F(VulkanAPITest, baddbmm_bias_boardcast_reduce_batch) {
1160 constexpr float alpha = 1.5f;
1161 constexpr float beta = 2.0f;
1162 const auto bias_cpu =
1163 at::rand({179, 163}, at::device(at::kCPU).dtype(at::kFloat));
1164 const auto m1_cpu =
1165 at::rand({150, 179, 67}, at::device(at::kCPU).dtype(at::kFloat));
1166 const auto m2_cpu =
1167 at::rand({150, 67, 163}, at::device(at::kCPU).dtype(at::kFloat));
1168 test_baddbmm(bias_cpu, m1_cpu, m2_cpu, beta, alpha);
1169 }
1170
TEST_F(VulkanAPITest,baddbmm_bias_boardcast_reduce_batch1)1171 TEST_F(VulkanAPITest, baddbmm_bias_boardcast_reduce_batch1) {
1172 constexpr float alpha = 1.5f;
1173 constexpr float beta = 2.0f;
1174 const auto bias_cpu =
1175 at::rand({179, 1}, at::device(at::kCPU).dtype(at::kFloat));
1176 const auto m1_cpu =
1177 at::rand({150, 179, 67}, at::device(at::kCPU).dtype(at::kFloat));
1178 const auto m2_cpu =
1179 at::rand({150, 67, 163}, at::device(at::kCPU).dtype(at::kFloat));
1180 test_baddbmm(bias_cpu, m1_cpu, m2_cpu, beta, alpha);
1181 }
1182
TEST_F(VulkanAPITest,baddbmm_bias_boardcast_reduce_batch2)1183 TEST_F(VulkanAPITest, baddbmm_bias_boardcast_reduce_batch2) {
1184 constexpr float alpha = 1.5f;
1185 constexpr float beta = 2.0f;
1186 const auto bias_cpu =
1187 at::rand({1, 163}, at::device(at::kCPU).dtype(at::kFloat));
1188 const auto m1_cpu =
1189 at::rand({150, 179, 67}, at::device(at::kCPU).dtype(at::kFloat));
1190 const auto m2_cpu =
1191 at::rand({150, 67, 163}, at::device(at::kCPU).dtype(at::kFloat));
1192 test_baddbmm(bias_cpu, m1_cpu, m2_cpu, beta, alpha);
1193 }
1194
TEST_F(VulkanAPITest,baddbmm_bias_boardcast_reduce_batch_height)1195 TEST_F(VulkanAPITest, baddbmm_bias_boardcast_reduce_batch_height) {
1196 constexpr float alpha = 1.5f;
1197 constexpr float beta = 2.0f;
1198 const auto bias_cpu = at::rand({163}, at::device(at::kCPU).dtype(at::kFloat));
1199 const auto m1_cpu =
1200 at::rand({150, 179, 67}, at::device(at::kCPU).dtype(at::kFloat));
1201 const auto m2_cpu =
1202 at::rand({150, 67, 163}, at::device(at::kCPU).dtype(at::kFloat));
1203 test_baddbmm(bias_cpu, m1_cpu, m2_cpu, beta, alpha);
1204 }
1205
TEST_F(VulkanAPITest,baddbmm_bias_boardcast_reduce_all)1206 TEST_F(VulkanAPITest, baddbmm_bias_boardcast_reduce_all) {
1207 constexpr float alpha = 1.5f;
1208 constexpr float beta = 2.0f;
1209 const auto bias_cpu = at::rand({1}, at::device(at::kCPU).dtype(at::kFloat));
1210 const auto m1_cpu =
1211 at::rand({150, 179, 67}, at::device(at::kCPU).dtype(at::kFloat));
1212 const auto m2_cpu =
1213 at::rand({150, 67, 163}, at::device(at::kCPU).dtype(at::kFloat));
1214 test_baddbmm(bias_cpu, m1_cpu, m2_cpu, beta, alpha);
1215 }
1216
test_matmul(at::Tensor m1_cpu,at::Tensor m2_cpu,bool m2_use_vulkan=false)1217 void test_matmul(
1218 at::Tensor m1_cpu,
1219 at::Tensor m2_cpu,
1220 bool m2_use_vulkan = false) {
1221 c10::InferenceMode mode;
1222 const auto out_cpu = at::matmul(m1_cpu, m2_cpu);
1223 auto out_vk =
1224 at::matmul(m1_cpu.vulkan(), m2_use_vulkan ? m2_cpu.vulkan() : m2_cpu);
1225
1226 const auto check = almostEqual(out_cpu, out_vk.cpu());
1227 if (!check) {
1228 showRtol(out_cpu, out_vk.cpu());
1229 }
1230
1231 ASSERT_TRUE(check);
1232 }
1233
TEST_F(VulkanAPITest,DISABLED_matmul_3d_weight_vulkan)1234 TEST_F(VulkanAPITest, DISABLED_matmul_3d_weight_vulkan) {
1235 // This will call at::bmm. Will crash for unknow reason.
1236 const auto m1_cpu =
1237 at::rand({13, 23, 45}, at::device(at::kCPU).dtype(at::kFloat));
1238 const auto m2_cpu =
1239 at::rand({13, 45, 26}, at::device(at::kCPU).dtype(at::kFloat));
1240 test_matmul(m1_cpu, m2_cpu, true);
1241 }
1242
TEST_F(VulkanAPITest,DISABLED_matmul_3d_weight_cpu)1243 TEST_F(VulkanAPITest, DISABLED_matmul_3d_weight_cpu) {
1244 // This will call at::bmm. Will crash for unknow reason.
1245 const auto m1_cpu =
1246 at::rand({13, 23, 45}, at::device(at::kCPU).dtype(at::kFloat));
1247 const auto m2_cpu =
1248 at::rand({13, 45, 26}, at::device(at::kCPU).dtype(at::kFloat));
1249 test_matmul(m1_cpu, m2_cpu);
1250 }
1251
TEST_F(VulkanAPITest,matmul_2d_weight_vulkan)1252 TEST_F(VulkanAPITest, matmul_2d_weight_vulkan) {
1253 // This will call at::mm
1254 const auto m1_cpu = at::rand({7, 42}, at::device(at::kCPU).dtype(at::kFloat));
1255 const auto m2_cpu = at::rand({42, 9}, at::device(at::kCPU).dtype(at::kFloat));
1256 test_matmul(m1_cpu, m2_cpu, true);
1257 }
1258
TEST_F(VulkanAPITest,matmul_2d_weight_cpu)1259 TEST_F(VulkanAPITest, matmul_2d_weight_cpu) {
1260 // This will call at::mm
1261 const auto m1_cpu =
1262 at::rand({23, 45}, at::device(at::kCPU).dtype(at::kFloat));
1263 const auto m2_cpu =
1264 at::rand({45, 26}, at::device(at::kCPU).dtype(at::kFloat));
1265 test_matmul(m1_cpu, m2_cpu);
1266 }
1267
test_bmm(at::Tensor m1_cpu,at::Tensor m2_cpu,bool m2_use_vulkan=false)1268 void test_bmm(
1269 at::Tensor m1_cpu,
1270 at::Tensor m2_cpu,
1271 bool m2_use_vulkan = false) {
1272 const auto out_cpu = m1_cpu.bmm(m2_cpu);
1273
1274 const auto m1_vulkan = m1_cpu.vulkan();
1275 const auto out_vulkan =
1276 m1_vulkan.bmm(m2_use_vulkan ? m2_cpu.vulkan() : m2_cpu);
1277
1278 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
1279 if (!check) {
1280 showRtol(out_cpu, out_vulkan.cpu());
1281 }
1282
1283 ASSERT_TRUE(check);
1284 }
1285
TEST_F(VulkanAPITest,bmm_vulkan_small)1286 TEST_F(VulkanAPITest, bmm_vulkan_small) {
1287 const auto m1_cpu =
1288 at::rand({5, 2, 3}, at::device(at::kCPU).dtype(at::kFloat));
1289 const auto m2_cpu =
1290 at::rand({5, 3, 4}, at::device(at::kCPU).dtype(at::kFloat));
1291 test_bmm(m1_cpu, m2_cpu, true);
1292 }
1293
TEST_F(VulkanAPITest,bmm_vulkan_small_width)1294 TEST_F(VulkanAPITest, bmm_vulkan_small_width) {
1295 const auto m1_cpu =
1296 at::rand({9, 32, 5}, at::device(at::kCPU).dtype(at::kFloat));
1297 const auto m2_cpu =
1298 at::rand({9, 5, 13}, at::device(at::kCPU).dtype(at::kFloat));
1299 test_bmm(m1_cpu, m2_cpu, true);
1300 }
1301
TEST_F(VulkanAPITest,bmm_vulkan_large_width)1302 TEST_F(VulkanAPITest, bmm_vulkan_large_width) {
1303 const auto m1_cpu =
1304 at::rand({9, 7, 45}, at::device(at::kCPU).dtype(at::kFloat));
1305 const auto m2_cpu =
1306 at::rand({9, 45, 6}, at::device(at::kCPU).dtype(at::kFloat));
1307 test_bmm(m1_cpu, m2_cpu, true);
1308 }
1309
TEST_F(VulkanAPITest,bmm_cpu)1310 TEST_F(VulkanAPITest, bmm_cpu) {
1311 const auto m1_cpu =
1312 at::rand({13, 23, 45}, at::device(at::kCPU).dtype(at::kFloat));
1313 const auto m2_cpu =
1314 at::rand({13, 45, 26}, at::device(at::kCPU).dtype(at::kFloat));
1315 test_bmm(m1_cpu, m2_cpu);
1316 }
1317
TEST_F(VulkanAPITest,bmm_small)1318 TEST_F(VulkanAPITest, bmm_small) {
1319 const auto m1_cpu =
1320 at::rand({2, 6, 5}, at::device(at::kCPU).dtype(at::kFloat));
1321 const auto m2_cpu =
1322 at::rand({2, 5, 3}, at::device(at::kCPU).dtype(at::kFloat));
1323 test_bmm(m1_cpu, m2_cpu);
1324 }
1325
TEST_F(VulkanAPITest,bmm_one)1326 TEST_F(VulkanAPITest, bmm_one) {
1327 const auto m1_cpu =
1328 at::rand({1, 1, 1}, at::device(at::kCPU).dtype(at::kFloat));
1329 const auto m2_cpu =
1330 at::rand({1, 1, 1}, at::device(at::kCPU).dtype(at::kFloat));
1331 test_bmm(m1_cpu, m2_cpu);
1332 }
1333
TEST_F(VulkanAPITest,bmm_error)1334 TEST_F(VulkanAPITest, bmm_error) {
1335 // mismatched dimensions of batch sizes.
1336 const auto m1_cpu =
1337 at::rand({100, 235, 546}, at::device(at::kCPU).dtype(at::kFloat));
1338 const auto m2_cpu =
1339 at::rand({200, 546, 267}, at::device(at::kCPU).dtype(at::kFloat));
1340 const auto m1_vulkan = m1_cpu.vulkan();
1341 EXPECT_THROW(m1_vulkan.bmm(m2_cpu), ::std::exception);
1342 }
1343
TEST_F(VulkanAPITest,clamp)1344 TEST_F(VulkanAPITest, clamp) {
1345 const auto in_cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat));
1346 const auto in_vulkan = in_cpu.vulkan();
1347
1348 const float min_value = 0.2f;
1349 const float max_value = 0.8f;
1350
1351 const auto out_cpu = at::clamp(in_cpu, min_value, max_value);
1352 const auto out_vulkan = at::clamp(in_vulkan, min_value, max_value);
1353
1354 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
1355 if (!check) {
1356 showRtol(out_cpu, out_vulkan.cpu());
1357 }
1358
1359 ASSERT_TRUE(check);
1360 }
1361
TEST_F(VulkanAPITest,clamp_)1362 TEST_F(VulkanAPITest, clamp_) {
1363 const auto cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat));
1364 const auto vulkan = cpu.vulkan();
1365
1366 const float min_value = 0.2f;
1367 const float max_value = 0.8f;
1368
1369 cpu.clamp_(min_value, max_value);
1370 vulkan.clamp_(min_value, max_value);
1371
1372 const auto check = almostEqual(cpu, vulkan.cpu());
1373 if (!check) {
1374 showRtol(cpu, vulkan.cpu());
1375 }
1376
1377 ASSERT_TRUE(check);
1378 }
1379
TEST_F(VulkanAPITest,conv1d_simple)1380 TEST_F(VulkanAPITest, conv1d_simple) {
1381 // This is a simple case using arange for input, ones for weights, and arange
1382 // for bias. This makes debugging easiser.
1383 int64_t kernel_size = 3;
1384 int64_t channels = 5;
1385 int64_t lengths = 9;
1386
1387 c10::InferenceMode mode;
1388
1389 const auto input_cpu = at::arange(lengths * channels, at::kFloat).reshape({1, channels, lengths});
1390 const auto weights_cpu = at::ones({channels, 1, kernel_size}, at::device(at::kCPU).dtype(at::kFloat));
1391 const auto bias_cpu = at::arange(channels, at::kFloat);
1392
1393 const auto input_vk = input_cpu.vulkan();
1394 const auto weights_vk = weights_cpu.vulkan();
1395 const auto bias_vk = bias_cpu.vulkan();
1396
1397 int64_t stride = 1;
1398 int64_t padding = 0;
1399 int64_t dilation = 1;
1400
1401 const auto output_cpu = at::conv1d(
1402 input_cpu, weights_cpu, bias_cpu, stride, padding, dilation, channels);
1403
1404 const auto output_vk = at::conv1d(
1405 input_vk, weights_vk, bias_vk, stride, padding, dilation, channels);
1406 const auto output_vk_cpu = output_vk.cpu();
1407
1408 const bool check = almostEqual(output_cpu, output_vk_cpu);
1409 if (!check) {
1410 showRtol(output_cpu, output_vk_cpu);
1411 }
1412
1413 ASSERT_TRUE(check);
1414 }
1415
test_conv1d(int64_t kernel_size,int64_t groups,int64_t lengths,int64_t stride=1,int64_t padding=0,int64_t dilation=1,int64_t in_group_size=1,int64_t out_group_size=1,int64_t batch_size=1)1416 void test_conv1d(
1417 int64_t kernel_size,
1418 int64_t groups,
1419 int64_t lengths,
1420 int64_t stride = 1,
1421 int64_t padding = 0,
1422 int64_t dilation = 1,
1423 int64_t in_group_size = 1,
1424 int64_t out_group_size = 1,
1425 int64_t batch_size = 1) {
1426 c10::InferenceMode mode;
1427
1428 int64_t in_channels = in_group_size * groups;
1429 int64_t out_channels = out_group_size * groups;
1430
1431 const auto input_cpu = at::rand({batch_size, in_channels, lengths}, at::kFloat);
1432 const auto weights_cpu = at::rand({out_channels, in_group_size, kernel_size}, at::kFloat);
1433 const auto bias_cpu = at::rand({out_channels,}, at::kFloat);
1434
1435 const auto input_vk = input_cpu.vulkan();
1436 const auto weights_vk = weights_cpu.vulkan();
1437 const auto bias_vk = bias_cpu.vulkan();
1438
1439 const auto output_cpu = at::conv1d(
1440 input_cpu, weights_cpu, bias_cpu, stride, padding, dilation, groups);
1441
1442 const auto output_vk = at::conv1d(
1443 input_vk, weights_vk, bias_vk, stride, padding, dilation, groups);
1444 const auto output_vk_cpu = output_vk.cpu();
1445
1446 const bool check = almostEqual(output_cpu, output_vk_cpu);
1447 if (!check) {
1448 showRtol(output_cpu, output_vk_cpu);
1449 }
1450
1451 ASSERT_TRUE(check);
1452 }
1453
TEST_F(VulkanAPITest,conv1d)1454 TEST_F(VulkanAPITest, conv1d) {
1455 test_conv1d(3, 5, 8);
1456 test_conv1d(9, 5, 9);
1457 test_conv1d(1, 12, 3);
1458 test_conv1d(1, 12, 1);
1459 test_conv1d(10, 12, 20);
1460 test_conv1d(3, 5, 9, 2, 0, 1);
1461 test_conv1d(3, 5, 9, 2, 1, 1);
1462 test_conv1d(3, 5, 9, 2, 1, 2);
1463 test_conv1d(3, 5, 9, 1, 4, 2);
1464 test_conv1d(6, 22, 30, 5, 5, 3);
1465 test_conv1d(6, 5, 30, 5, 5, 3, 3, 5);
1466 test_conv1d(6, 5, 30, 5, 5, 3, 4, 2);
1467 test_conv1d(6, 5, 30, 5, 5, 3, 4, 2, 2);
1468 test_conv1d(6, 5, 30, 5, 5, 3, 4, 2, 5);
1469 test_conv1d(6, 5, 30, 5, 5, 3, 4, 2, 9);
1470 }
1471
1472
1473
test_conv2d_context(const at::IntArrayRef input_shape,const at::IntArrayRef weight_shape,const at::IntArrayRef bias_shape,std::vector<int64_t> stride,std::vector<int64_t> padding,std::vector<int64_t> dilation,int64_t groups)1474 void test_conv2d_context(
1475 const at::IntArrayRef input_shape,
1476 const at::IntArrayRef weight_shape,
1477 const at::IntArrayRef bias_shape,
1478 std::vector<int64_t> stride,
1479 std::vector<int64_t> padding,
1480 std::vector<int64_t> dilation,
1481 int64_t groups) {
1482 c10::InferenceMode mode;
1483
1484 at::Tensor input = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
1485 at::Tensor weight = at::rand(weight_shape, at::device(at::kCPU).dtype(at::kFloat));
1486 at::Tensor bias = at::rand(bias_shape, at::device(at::kCPU).dtype(at::kFloat));
1487
1488 // cpu
1489 const auto out_cpu = at::conv2d(
1490 input, weight, bias, stride, padding, dilation, groups);
1491
1492 // vulkan
1493 const auto prepack_vulkan = callOpByName(
1494 "vulkan_prepack::create_conv2d_context",
1495 "",
1496 weight, bias, stride, padding, dilation, groups, std::nullopt, std::nullopt);
1497
1498 const auto vulkan_output = callOpByName(
1499 "vulkan_prepack::run_conv2d_context",
1500 "",
1501 input.vulkan(), prepack_vulkan[0]);
1502
1503 const auto out_vulkan = vulkan_output[0].toTensor();
1504 const auto out_vk_cpu = out_vulkan.cpu();
1505
1506 // check
1507 const bool check = almostEqual(out_cpu, out_vk_cpu);
1508 if (!check) {
1509 showRtol(out_cpu, out_vk_cpu);
1510 }
1511
1512 ASSERT_TRUE(check);
1513 }
1514
test_backwards_compatible_conv2d_context(const at::IntArrayRef input_shape,const at::IntArrayRef weight_shape,const at::IntArrayRef bias_shape,std::vector<int64_t> stride,std::vector<int64_t> padding,std::vector<int64_t> dilation,int64_t groups)1515 void test_backwards_compatible_conv2d_context(
1516 const at::IntArrayRef input_shape,
1517 const at::IntArrayRef weight_shape,
1518 const at::IntArrayRef bias_shape,
1519 std::vector<int64_t> stride,
1520 std::vector<int64_t> padding,
1521 std::vector<int64_t> dilation,
1522 int64_t groups) {
1523 c10::InferenceMode mode;
1524
1525 at::Tensor input = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
1526 at::Tensor weight = at::rand(weight_shape, at::device(at::kCPU).dtype(at::kFloat));
1527 at::Tensor bias = at::rand(bias_shape, at::device(at::kCPU).dtype(at::kFloat));
1528
1529 // cpu
1530 const auto out_cpu = at::conv2d(
1531 input, weight, bias, stride, padding, dilation, groups);
1532
1533 // vulkan
1534 const auto prepack_vulkan = callOpByName(
1535 "vulkan_prepack::conv2d_clamp_prepack",
1536 "",
1537 weight, bias, stride, padding, dilation, groups, std::nullopt, std::nullopt);
1538
1539 const auto vulkan_output = callOpByName(
1540 "vulkan_prepack::conv2d_clamp_run",
1541 "",
1542 input.vulkan(), prepack_vulkan[0]);
1543
1544 const auto out_vulkan = vulkan_output[0].toTensor();
1545 const auto out_vk_cpu = out_vulkan.cpu();
1546
1547 // check
1548 const bool check = almostEqual(out_cpu, out_vk_cpu);
1549 if (!check) {
1550 showRtol(out_cpu, out_vk_cpu);
1551 }
1552
1553 ASSERT_TRUE(check);
1554 }
1555
test_transposed_conv2d_context(const at::IntArrayRef input_shape,const at::IntArrayRef weight_shape,const at::IntArrayRef bias_shape,std::vector<int64_t> stride,std::vector<int64_t> padding,std::vector<int64_t> output_padding,std::vector<int64_t> dilation,int64_t groups)1556 void test_transposed_conv2d_context(
1557 const at::IntArrayRef input_shape,
1558 const at::IntArrayRef weight_shape,
1559 const at::IntArrayRef bias_shape,
1560 std::vector<int64_t> stride,
1561 std::vector<int64_t> padding,
1562 std::vector<int64_t> output_padding,
1563 std::vector<int64_t> dilation,
1564 int64_t groups) {
1565 c10::InferenceMode mode;
1566
1567 at::Tensor input = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
1568 at::Tensor weight = at::rand(weight_shape, at::device(at::kCPU).dtype(at::kFloat));
1569 at::Tensor bias = at::rand(bias_shape, at::device(at::kCPU).dtype(at::kFloat));
1570
1571 // cpu
1572 const auto out_cpu = at::conv_transpose2d(
1573 input, weight, bias, stride, padding, output_padding, groups, dilation);
1574
1575 // vulkan
1576 const auto prepack_vulkan = callOpByName(
1577 "vulkan_prepack::create_tconv2d_context",
1578 "",
1579 weight, bias, stride, padding, output_padding, dilation, groups, std::nullopt, std::nullopt);
1580
1581 const auto vulkan_output = callOpByName(
1582 "vulkan_prepack::run_tconv2d_context",
1583 "",
1584 input.vulkan(), prepack_vulkan[0]);
1585
1586 const auto out_vulkan = vulkan_output[0].toTensor();
1587 const auto out_vk_cpu = out_vulkan.cpu();
1588
1589 // check
1590 const bool check = almostEqual(out_cpu, out_vk_cpu);
1591 if (!check) {
1592 showRtol(out_cpu, out_vk_cpu);
1593 }
1594
1595 ASSERT_TRUE(check);
1596 }
1597
TEST_F(VulkanAPITest,conv2d)1598 TEST_F(VulkanAPITest, conv2d) {
1599 constexpr int64_t groups = 1;
1600 constexpr std::array<int64_t, 2u> stride{2, 2};
1601 constexpr std::array<int64_t, 2u> padding{1, 1};
1602 //TODO: Support conv2d with dilation != 1
1603 constexpr std::array<int64_t, 2u> dilation{1, 1};
1604
1605 constexpr struct {
1606 uint32_t batches;
1607 uint32_t channels;
1608 uint32_t width;
1609 uint32_t height;
1610
1611 std::array<int64_t, 4u> size() const {
1612 return {
1613 batches,
1614 channels,
1615 width,
1616 height,
1617 };
1618 }
1619 } input {1, 3, 8, 8};
1620
1621 constexpr struct {
1622 uint32_t output_channels;
1623 uint32_t input_channels;
1624 uint32_t width;
1625 uint32_t height;
1626
1627 std::array<int64_t, 4u> size() const {
1628 return {
1629 output_channels,
1630 input_channels,
1631 width,
1632 height,
1633 };
1634 }
1635 } weights {1, input.channels, 3, 3};
1636
1637 const auto input_cpu = at::randn(input.size(), at::device(at::kCPU).dtype(at::kFloat));
1638 const auto weights_cpu = at::randn(weights.size(), at::device(at::kCPU).dtype(at::kFloat));
1639 const auto bias_cpu = at::randn({weights.output_channels}, at::device(at::kCPU).dtype(at::kFloat));
1640
1641 const auto output_cpu = at::conv2d(
1642 input_cpu,
1643 weights_cpu,
1644 bias_cpu,
1645 stride,
1646 padding,
1647 dilation,
1648 groups);
1649
1650 const auto output_vulkan = at::conv2d(
1651 input_cpu.vulkan(),
1652 weights_cpu,
1653 bias_cpu,
1654 stride,
1655 padding,
1656 dilation,
1657 groups).cpu();
1658
1659 const bool check = almostEqual(output_cpu, output_vulkan);
1660 if (!check) {
1661 showRtol(output_cpu, output_vulkan);
1662 }
1663
1664 ASSERT_TRUE(check);
1665 }
1666
TEST_F(VulkanAPITest,conv2d_prepack)1667 TEST_F(VulkanAPITest, conv2d_prepack) {
1668 test_conv2d_context(
1669 {1, 3, 8, 8}, // input_shape
1670 {1, 3, 3, 3}, // weight_shape
1671 {1}, // bias_shape
1672 {2, 2}, // stride
1673 {1, 1}, // padding
1674 {1, 1}, // dilation
1675 1); // groups
1676 }
1677
TEST_F(VulkanAPITest,conv2d_prepack_bc)1678 TEST_F(VulkanAPITest, conv2d_prepack_bc) {
1679 test_backwards_compatible_conv2d_context(
1680 {1, 3, 8, 8}, // input_shape
1681 {1, 3, 3, 3}, // weight_shape
1682 {1}, // bias_shape
1683 {2, 2}, // stride
1684 {1, 1}, // padding
1685 {1, 1}, // dilation
1686 1); // groups
1687 }
1688
TEST_F(VulkanAPITest,conv2d_dw_3x3)1689 TEST_F(VulkanAPITest, conv2d_dw_3x3) {
1690 constexpr int64_t groups = 7;
1691 constexpr std::array<int64_t, 2u> stride{2, 3};
1692 constexpr std::array<int64_t, 2u> padding{0, 4};
1693 constexpr std::array<int64_t, 2u> dilation{3, 1};
1694
1695 constexpr struct {
1696 uint32_t batches;
1697 uint32_t channels;
1698 uint32_t width;
1699 uint32_t height;
1700
1701 std::array<int64_t, 4u> size() const {
1702 return {
1703 batches,
1704 channels,
1705 width,
1706 height,
1707 };
1708 }
1709 } input{1, groups, 137, 199};
1710
1711 constexpr struct {
1712 uint32_t output_channels;
1713 uint32_t input_channels;
1714 uint32_t width;
1715 uint32_t height;
1716
1717 std::array<int64_t, 4u> size() const {
1718 return {
1719 output_channels,
1720 input_channels,
1721 width,
1722 height,
1723 };
1724 }
1725 } weights{groups, 1, 3, 3};
1726
1727 const auto input_cpu =
1728 at::rand(input.size(), at::device(at::kCPU).dtype(at::kFloat));
1729 const auto weights_cpu =
1730 at::rand(weights.size(), at::device(at::kCPU).dtype(at::kFloat));
1731 const auto bias_cpu = at::rand(
1732 {weights.output_channels}, at::device(at::kCPU).dtype(at::kFloat));
1733
1734 const auto output_cpu = at::conv2d(
1735 input_cpu, weights_cpu, bias_cpu, stride, padding, dilation, groups);
1736
1737 const auto output_vulkan = at::conv2d(
1738 input_cpu.vulkan(),
1739 weights_cpu,
1740 bias_cpu,
1741 stride,
1742 padding,
1743 dilation,
1744 groups);
1745
1746 const bool check = almostEqual(output_cpu, output_vulkan.cpu());
1747 if (!check) {
1748 showRtol(output_cpu, output_vulkan.cpu());
1749 }
1750
1751 ASSERT_TRUE(check);
1752 }
1753
TEST_F(VulkanAPITest,conv2d_dw_5x5)1754 TEST_F(VulkanAPITest, conv2d_dw_5x5) {
1755 constexpr int64_t groups = 7;
1756 constexpr std::array<int64_t, 2u> stride{2, 3};
1757 constexpr std::array<int64_t, 2u> padding{0, 4};
1758 constexpr std::array<int64_t, 2u> dilation{3, 1};
1759
1760 constexpr struct {
1761 uint32_t batches;
1762 uint32_t channels;
1763 uint32_t width;
1764 uint32_t height;
1765
1766 std::array<int64_t, 4u> size() const {
1767 return {
1768 batches,
1769 channels,
1770 width,
1771 height,
1772 };
1773 }
1774 } input{1, groups, 137, 199};
1775
1776 constexpr struct {
1777 uint32_t output_channels;
1778 uint32_t input_channels;
1779 uint32_t width;
1780 uint32_t height;
1781
1782 std::array<int64_t, 4u> size() const {
1783 return {
1784 output_channels,
1785 input_channels,
1786 width,
1787 height,
1788 };
1789 }
1790 } weights{groups, 1, 5, 5};
1791
1792 const auto input_cpu =
1793 at::rand(input.size(), at::device(at::kCPU).dtype(at::kFloat));
1794 const auto weights_cpu =
1795 at::rand(weights.size(), at::device(at::kCPU).dtype(at::kFloat));
1796 const auto bias_cpu = at::rand(
1797 {weights.output_channels}, at::device(at::kCPU).dtype(at::kFloat));
1798
1799 const auto output_cpu = at::conv2d(
1800 input_cpu, weights_cpu, bias_cpu, stride, padding, dilation, groups);
1801
1802 const auto output_vulkan = at::conv2d(
1803 input_cpu.vulkan(),
1804 weights_cpu,
1805 bias_cpu,
1806 stride,
1807 padding,
1808 dilation,
1809 groups);
1810
1811 const bool check = almostEqual(output_cpu, output_vulkan.cpu());
1812 if (!check) {
1813 showRtol(output_cpu, output_vulkan.cpu());
1814 }
1815
1816 ASSERT_TRUE(check);
1817 }
1818
TEST_F(VulkanAPITest,conv2d_dw)1819 TEST_F(VulkanAPITest, conv2d_dw) {
1820 constexpr int64_t groups = 7;
1821 constexpr std::array<int64_t, 2u> stride{2, 3};
1822 constexpr std::array<int64_t, 2u> padding{0, 4};
1823 constexpr std::array<int64_t, 2u> dilation{3, 1};
1824
1825 constexpr struct {
1826 uint32_t batches;
1827 uint32_t channels;
1828 uint32_t width;
1829 uint32_t height;
1830
1831 std::array<int64_t, 4u> size() const {
1832 return {
1833 batches,
1834 channels,
1835 width,
1836 height,
1837 };
1838 }
1839 } input {1, groups, 137, 199};
1840
1841 constexpr struct {
1842 uint32_t output_channels;
1843 uint32_t input_channels;
1844 uint32_t width;
1845 uint32_t height;
1846
1847 std::array<int64_t, 4u> size() const {
1848 return {
1849 output_channels,
1850 input_channels,
1851 width,
1852 height,
1853 };
1854 }
1855 } weights {groups, 1, 17, 7};
1856
1857 const auto input_cpu = at::rand(input.size(), at::device(at::kCPU).dtype(at::kFloat));
1858 const auto weights_cpu = at::rand(weights.size(), at::device(at::kCPU).dtype(at::kFloat));
1859 const auto bias_cpu = at::rand({weights.output_channels}, at::device(at::kCPU).dtype(at::kFloat));
1860
1861 const auto output_cpu = at::conv2d(
1862 input_cpu,
1863 weights_cpu,
1864 bias_cpu,
1865 stride,
1866 padding,
1867 dilation,
1868 groups);
1869
1870 const auto output_vulkan = at::conv2d(
1871 input_cpu.vulkan(),
1872 weights_cpu,
1873 bias_cpu,
1874 stride,
1875 padding,
1876 dilation,
1877 groups);
1878
1879 const bool check = almostEqual(output_cpu, output_vulkan.cpu());
1880 if (!check) {
1881 showRtol(output_cpu, output_vulkan.cpu());
1882 }
1883
1884 ASSERT_TRUE(check);
1885 }
1886
TEST_F(VulkanAPITest,conv2d_dw_prepack)1887 TEST_F(VulkanAPITest, conv2d_dw_prepack) {
1888 test_conv2d_context(
1889 {1, 7, 137, 199}, // input_shape
1890 {7, 1, 17, 7}, // weight_shape
1891 {7}, // bias_shape
1892 {2, 3}, // stride
1893 {0, 4}, // padding
1894 {3, 1}, // dilation
1895 7); // groups
1896 }
1897
TEST_F(VulkanAPITest,conv2d_dw_prepack_bc)1898 TEST_F(VulkanAPITest, conv2d_dw_prepack_bc) {
1899 test_backwards_compatible_conv2d_context(
1900 {1, 7, 137, 199}, // input_shape
1901 {7, 1, 17, 7}, // weight_shape
1902 {7}, // bias_shape
1903 {2, 3}, // stride
1904 {0, 4}, // padding
1905 {3, 1}, // dilation
1906 7); // groups
1907 }
1908
TEST_F(VulkanAPITest,conv2d_pw)1909 TEST_F(VulkanAPITest, conv2d_pw) {
1910 constexpr int64_t groups = 1;
1911 constexpr std::array<int64_t, 2u> stride{1, 1};
1912 constexpr std::array<int64_t, 2u> padding{0, 0};
1913 constexpr std::array<int64_t, 2u> dilation{1, 1};
1914
1915 constexpr struct {
1916 uint32_t batches;
1917 uint32_t channels;
1918 uint32_t width;
1919 uint32_t height;
1920
1921 std::array<int64_t, 4u> size() const {
1922 return {
1923 batches,
1924 channels,
1925 width,
1926 height,
1927 };
1928 }
1929 } input {1, 17, 127, 397};
1930
1931 constexpr struct {
1932 uint32_t output_channels;
1933 uint32_t input_channels;
1934 uint32_t width;
1935 uint32_t height;
1936
1937 std::array<int64_t, 4u> size() const {
1938 return {
1939 output_channels,
1940 input_channels,
1941 width,
1942 height,
1943 };
1944 }
1945 } weights {29, input.channels, 1, 1};
1946
1947 const auto input_cpu = at::randn(input.size(), at::device(at::kCPU).dtype(at::kFloat));
1948 const auto weights_cpu = at::randn(weights.size(), at::device(at::kCPU).dtype(at::kFloat));
1949 const auto bias_cpu = at::randn({weights.output_channels}, at::device(at::kCPU).dtype(at::kFloat));
1950
1951 const auto output_cpu = at::conv2d(
1952 input_cpu,
1953 weights_cpu,
1954 bias_cpu,
1955 stride,
1956 padding,
1957 dilation,
1958 groups);
1959
1960 const auto output_vulkan = at::conv2d(
1961 input_cpu.vulkan(),
1962 weights_cpu,
1963 bias_cpu,
1964 stride,
1965 padding,
1966 dilation,
1967 groups);
1968
1969 const bool check = almostEqual(output_cpu, output_vulkan.cpu());
1970 if (!check) {
1971 showRtol(output_cpu, output_vulkan.cpu());
1972 }
1973
1974 ASSERT_TRUE(check);
1975 }
1976
TEST_F(VulkanAPITest,conv2d_pw_prepack_medium)1977 TEST_F(VulkanAPITest, conv2d_pw_prepack_medium) {
1978 int in_channels = 17;
1979 int out_channels = 29;
1980 int height = 27;
1981 int width = 39;
1982 test_conv2d_context(
1983 {1, in_channels, height, width}, // input_shape
1984 {out_channels, in_channels, 1, 1}, // weight_shape
1985 {out_channels}, // bias_shape
1986 {1, 1}, // stride
1987 {0, 0}, // padding
1988 {1, 1}, // dilation
1989 1); // groups
1990 }
1991
TEST_F(VulkanAPITest,conv2d_pw_prepack_bc_medium)1992 TEST_F(VulkanAPITest, conv2d_pw_prepack_bc_medium) {
1993 int in_channels = 17;
1994 int out_channels = 29;
1995 int height = 27;
1996 int width = 39;
1997 test_backwards_compatible_conv2d_context(
1998 {1, in_channels, height, width}, // input_shape
1999 {out_channels, in_channels, 1, 1}, // weight_shape
2000 {out_channels}, // bias_shape
2001 {1, 1}, // stride
2002 {0, 0}, // padding
2003 {1, 1}, // dilation
2004 1); // groups
2005 }
2006
2007 // The followin 2 tests failed on Meta's CI when all tests are executed. Output
2008 // has lots of nan. Cause unknown.
2009 // When this test is run alone (with gtest_filter), it passes.
2010 // The test also passes with smaller planes, see "conv2d_pw_prepack_medium".
TEST_F(VulkanAPITest,DISABLED_conv2d_pw_prepack)2011 TEST_F(VulkanAPITest, DISABLED_conv2d_pw_prepack) {
2012 test_conv2d_context(
2013 {1, 17, 127, 397}, // input_shape
2014 {29, 17, 1, 1}, // weight_shape
2015 {29}, // bias_shape
2016 {1, 1}, // stride
2017 {0, 0}, // padding
2018 {1, 1}, // dilation
2019 1); // groups
2020 }
2021
TEST_F(VulkanAPITest,DISABLED_conv2d_pw_prepack_bc)2022 TEST_F(VulkanAPITest, DISABLED_conv2d_pw_prepack_bc) {
2023 test_backwards_compatible_conv2d_context(
2024 {1, 17, 127, 397}, // input_shape
2025 {29, 17, 1, 1}, // weight_shape
2026 {29}, // bias_shape
2027 {1, 1}, // stride
2028 {0, 0}, // padding
2029 {1, 1}, // dilation
2030 1); // groups
2031 }
2032
TEST_F(VulkanAPITest,conv2d_transposed)2033 TEST_F(VulkanAPITest, conv2d_transposed) {
2034 // Arrange
2035 constexpr int64_t groups = 1;
2036 constexpr std::array<int64_t, 2u> stride{1, 2};
2037 constexpr std::array<int64_t, 2u> padding{1, 0};
2038 constexpr std::array<int64_t, 2u> output_padding{0, 1};
2039 //TODO: Support conv_transpose2d with dilation != 1
2040 constexpr std::array<int64_t, 2u> dilation{1, 1};
2041
2042 constexpr struct {
2043 uint32_t batches;
2044 uint32_t channels;
2045 uint32_t height;
2046 uint32_t width;
2047
2048 std::array<int64_t, 4u> size() const {
2049 return {
2050 batches,
2051 channels,
2052 height,
2053 width,
2054 };
2055 }
2056 } input {1, 55, 7, 19};
2057
2058 constexpr struct {
2059 uint32_t input_channels;
2060 uint32_t output_channels;
2061 uint32_t height;
2062 uint32_t width;
2063
2064 std::array<int64_t, 4u> size() const {
2065 return {
2066 input_channels,
2067 output_channels,
2068 height,
2069 width,
2070 };
2071 }
2072 } weights {input.channels, 47, 2, 3};
2073
2074 const auto input_cpu = at::randn(input.size(), at::device(at::kCPU).dtype(at::kFloat));
2075 const auto weights_cpu = at::randn(weights.size(), at::device(at::kCPU).dtype(at::kFloat));
2076 const auto bias_cpu = at::zeros({weights.output_channels}, at::device(at::kCPU).dtype(at::kFloat));
2077
2078 // Act
2079 const auto output_cpu = at::conv_transpose2d(
2080 input_cpu,
2081 weights_cpu,
2082 bias_cpu,
2083 stride,
2084 padding,
2085 output_padding,
2086 groups,
2087 dilation);
2088
2089 const auto output_vk = at::conv_transpose2d(
2090 input_cpu.vulkan(),
2091 weights_cpu,
2092 bias_cpu,
2093 stride,
2094 padding,
2095 output_padding,
2096 groups,
2097 dilation).cpu();
2098
2099 // Assert
2100 const bool check = almostEqual(output_cpu, output_vk);
2101 if (!check) {
2102 showRtol(output_cpu, output_vk);
2103 }
2104
2105 ASSERT_TRUE(check);
2106 }
2107
TEST_F(VulkanAPITest,conv2d_transposed_prepack)2108 TEST_F(VulkanAPITest, conv2d_transposed_prepack) {
2109 test_transposed_conv2d_context(
2110 {1, 55, 7, 19}, // input_shape
2111 {55, 47, 2, 3}, // weight_shape
2112 {47}, // bias_shape
2113 {1, 2}, // stride
2114 {1, 0}, // padding
2115 {0, 1}, // output_padding
2116 {1, 1}, // dilation
2117 1); // groups
2118 }
2119
TEST_F(VulkanAPITest,conv2d_clamp_after_div)2120 TEST_F(VulkanAPITest, conv2d_clamp_after_div) {
2121 c10::InferenceMode mode;
2122
2123 constexpr std::array<int64_t, 2u> stride{2, 2};
2124 constexpr std::array<int64_t, 2u> padding{1, 1};
2125 constexpr std::array<int64_t, 2u> dilation{1, 1};
2126 constexpr int64_t groups = 1;
2127
2128 const auto input_numerator = at::rand({1, 3, 64, 64}, at::device(at::kCPU).dtype(at::kFloat));
2129 const auto input_denominator = at::rand({3, 1, 1}, at::device(at::kCPU).dtype(at::kFloat)) + 0.01;
2130 const auto input_cpu = at::div(input_numerator, input_denominator);
2131 const auto input_vk = at::div(input_numerator.vulkan(), input_denominator.vulkan());
2132 at::Tensor weight = at::rand({24, 3, 3, 3}, at::device(at::kCPU).dtype(at::kFloat));
2133 at::Tensor bias = at::rand({24}, at::device(at::kCPU).dtype(at::kFloat));
2134
2135 // cpu
2136 const auto prepack_cpu = callOpByName(
2137 "prepacked::conv2d_clamp_prepack",
2138 "",
2139 weight, bias, stride, padding, dilation, groups, 0.0f, std::nullopt)[0];
2140
2141 const auto out_cpu = callOpByName(
2142 "prepacked::conv2d_clamp_run",
2143 "",
2144 input_cpu, prepack_cpu)[0].toTensor();
2145
2146 // vulkan
2147 const auto prepack_vk = callOpByName(
2148 "vulkan_prepack::create_conv2d_context",
2149 "",
2150 weight, bias, stride, padding, dilation, groups, 0.0f, std::nullopt)[0];
2151
2152 const auto out_vk = callOpByName(
2153 "vulkan_prepack::run_conv2d_context",
2154 "",
2155 input_vk, prepack_vk)[0].toTensor();
2156
2157 const auto out_vk_cpu = out_vk.cpu();
2158
2159 // check
2160 const bool check = almostEqual(out_cpu, out_vk_cpu);
2161 if (!check) {
2162 showRtol(out_cpu, out_vk_cpu);
2163 }
2164
2165 ASSERT_TRUE(check);
2166 }
2167
TEST_F(VulkanAPITest,copy)2168 TEST_F(VulkanAPITest, copy) {
2169 const auto cpu = at::rand({13, 17, 37, 19}, at::device(at::kCPU).dtype(at::kFloat));
2170 const auto vulkan = cpu.vulkan();
2171
2172 const auto check = almostEqual(cpu, vulkan.cpu());
2173 if (!check) {
2174 showRtol(cpu, vulkan.cpu());
2175 }
2176
2177 ASSERT_TRUE(check);
2178 }
2179
test_cumsum(const at::IntArrayRef input_shape,const int64_t dim)2180 void test_cumsum(const at::IntArrayRef input_shape, const int64_t dim) {
2181 const auto in_cpu = at::rand(input_shape, at::TensorOptions(at::kCPU).dtype(at::kFloat));
2182
2183 const auto out_cpu = at::cumsum(in_cpu, dim);
2184 const auto out_vulkan = at::cumsum(in_cpu.vulkan(), dim);
2185 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
2186 if (!check) {
2187 showRtol(out_cpu, out_vulkan.cpu());
2188 }
2189 ASSERT_TRUE(check);
2190 }
2191
TEST_F(VulkanAPITest,cumsum_1d)2192 TEST_F(VulkanAPITest, cumsum_1d) {
2193 test_cumsum({37}, 0);
2194 test_cumsum({37}, -1);
2195 }
2196
TEST_F(VulkanAPITest,cumsum_2d)2197 TEST_F(VulkanAPITest, cumsum_2d) {
2198 for (int64_t i = -1; i <= 1; i++) {
2199 test_cumsum({17, 37}, i);
2200 }
2201 }
2202
TEST_F(VulkanAPITest,cumsum_3d)2203 TEST_F(VulkanAPITest, cumsum_3d) {
2204 for (int64_t i = -2; i <= 2; i++) {
2205 test_cumsum({17, 37, 49}, i);
2206 }
2207 }
2208
TEST_F(VulkanAPITest,cumsum_4d)2209 TEST_F(VulkanAPITest, cumsum_4d) {
2210 for (int64_t i = -3; i <= 3; i++) {
2211 test_cumsum({12, 17, 37, 49}, i);
2212 }
2213 }
2214
test_div(const at::IntArrayRef input_shape,const at::IntArrayRef other_shape)2215 void test_div(const at::IntArrayRef input_shape, const at::IntArrayRef other_shape) {
2216 const auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
2217 const auto other_cpu = at::rand(other_shape, at::device(at::kCPU).dtype(at::kFloat)) + 0.01;
2218
2219 const auto in_vulkan = in_cpu.vulkan();
2220 const auto other_vulkan = other_cpu.vulkan();
2221
2222 const auto out_cpu = at::div(in_cpu, other_cpu);
2223 const auto out_vulkan = at::div(in_vulkan, other_vulkan);
2224
2225 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
2226 if (!check) {
2227 showRtol(out_cpu, out_vulkan.cpu());
2228 }
2229
2230 ASSERT_TRUE(check);
2231 }
2232
TEST_F(VulkanAPITest,div)2233 TEST_F(VulkanAPITest, div) {
2234 test_div({11, 7, 139, 109}, {11, 7, 139, 109});
2235 }
2236
TEST_F(VulkanAPITest,div_broadcast0)2237 TEST_F(VulkanAPITest, div_broadcast0) {
2238 test_div({3, 5, 1, 1}, {3, 5, 179, 221});
2239 }
2240
TEST_F(VulkanAPITest,div_broadcast1)2241 TEST_F(VulkanAPITest, div_broadcast1) {
2242 test_div({3, 5, 179, 221}, {3, 5, 1, 221});
2243 }
2244
TEST_F(VulkanAPITest,div_broadcast2)2245 TEST_F(VulkanAPITest, div_broadcast2) {
2246 test_div({3, 4, 179, 221}, {4, 1, 1});
2247 }
2248
TEST_F(VulkanAPITest,div_broadcast3)2249 TEST_F(VulkanAPITest, div_broadcast3) {
2250 test_div({3, 4, 179, 221}, {1, 1, 179, 221});
2251 }
2252
TEST_F(VulkanAPITest,div_broadcast4)2253 TEST_F(VulkanAPITest, div_broadcast4) {
2254 test_div({3, 4, 41, 1}, {1, 41, 53});
2255 }
2256
TEST_F(VulkanAPITest,div_broadcast5)2257 TEST_F(VulkanAPITest, div_broadcast5) {
2258 test_div({2, 1, 7, 1}, {1, 5, 1, 4});
2259 }
2260
TEST_F(VulkanAPITest,div_broadcast6)2261 TEST_F(VulkanAPITest, div_broadcast6) {
2262 test_div({1, 15, 5, 4}, {21, 1, 5, 4});
2263 }
2264
TEST_F(VulkanAPITest,div_zero_dim)2265 TEST_F(VulkanAPITest, div_zero_dim) {
2266 test_div({1, 15, 5, 4}, {});
2267 }
2268
TEST_F(VulkanAPITest,div_)2269 TEST_F(VulkanAPITest, div_) {
2270 auto a_cpu = at::rand({61, 17, 29, 83}, at::device(at::kCPU).dtype(at::kFloat));
2271 auto a_vulkan = a_cpu.vulkan();
2272
2273 const auto b_cpu = at::rand({61, 17, 29, 83}, at::device(at::kCPU).dtype(at::kFloat)) + 0.01;
2274 const auto b_vulkan = b_cpu.vulkan();
2275
2276 a_cpu.div_(b_cpu);
2277 a_vulkan.div_(b_vulkan);
2278
2279 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
2280 if (!check) {
2281 showRtol(b_cpu, b_vulkan.cpu());
2282 }
2283
2284 ASSERT_TRUE(check);
2285 }
2286
TEST_F(VulkanAPITest,div_broadcast0_)2287 TEST_F(VulkanAPITest, div_broadcast0_) {
2288 auto a_cpu = at::rand({12, 17, 29, 83}, at::device(at::kCPU).dtype(at::kFloat));
2289 auto a_vulkan = a_cpu.vulkan();
2290
2291 const auto b_cpu = at::rand({12, 17, 29, 1}, at::device(at::kCPU).dtype(at::kFloat)) + 0.01;
2292 const auto b_vulkan = b_cpu.vulkan();
2293
2294 a_cpu.div_(b_cpu);
2295 a_vulkan.div_(b_vulkan);
2296
2297 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
2298 if (!check) {
2299 showRtol(b_cpu, b_vulkan.cpu());
2300 }
2301
2302 ASSERT_TRUE(check);
2303 }
2304
TEST_F(VulkanAPITest,div_broadcast1_)2305 TEST_F(VulkanAPITest, div_broadcast1_) {
2306 auto a_cpu = at::rand({3, 8, 29, 83}, at::device(at::kCPU).dtype(at::kFloat));
2307 auto a_vulkan = a_cpu.vulkan();
2308
2309 const auto b_cpu = at::rand({8, 1, 1}, at::device(at::kCPU).dtype(at::kFloat)) + 0.01;
2310 const auto b_vulkan = b_cpu.vulkan();
2311
2312 a_cpu.div_(b_cpu);
2313 a_vulkan.div_(b_vulkan);
2314
2315 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
2316 if (!check) {
2317 showRtol(b_cpu, b_vulkan.cpu());
2318 }
2319
2320 ASSERT_TRUE(check);
2321 }
2322
TEST_F(VulkanAPITest,div_scalar)2323 TEST_F(VulkanAPITest, div_scalar) {
2324
2325 const auto a_cpu = at::rand({17, 213, 213, 7}, at::device(at::kCPU).dtype(at::kFloat));
2326 const auto a_vulkan = a_cpu.vulkan();
2327
2328 const float b_scalar = 3.1415f;
2329
2330 const auto c_cpu = at::div(a_cpu, b_scalar);
2331 const auto c_vulkan = at::div(a_vulkan, b_scalar);
2332
2333 const auto check = almostEqual(c_cpu, c_vulkan.cpu());
2334 if (!check) {
2335 showRtol(c_cpu, c_vulkan.cpu());
2336 }
2337
2338 ASSERT_TRUE(check);
2339 }
2340
TEST_F(VulkanAPITest,div_scalar_)2341 TEST_F(VulkanAPITest, div_scalar_) {
2342 auto a_cpu = at::rand({11, 7, 139, 109}, at::device(at::kCPU).dtype(at::kFloat));
2343 auto a_vulkan = a_cpu.vulkan();
2344
2345 const float b_scalar = 3.1415f;
2346
2347 a_cpu.div_(b_scalar);
2348 a_vulkan.div_(b_scalar);
2349
2350 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
2351 if (!check) {
2352 showRtol(a_cpu, a_vulkan.cpu());
2353 }
2354
2355 ASSERT_TRUE(check);
2356 }
2357
TEST_F(VulkanAPITest,div_scalar_wrapped)2358 TEST_F(VulkanAPITest, div_scalar_wrapped) {
2359 if (!at::is_vulkan_available()) {
2360 return;
2361 }
2362
2363 const auto a_cpu = at::rand({17, 213, 213, 7}, at::device(at::kCPU).dtype(at::kFloat));
2364 const auto a_vulkan = a_cpu.vulkan();
2365
2366 const auto b_scalar = at::rand({1}, at::device(at::kCPU).dtype(at::kFloat)) + 0.01;
2367
2368 const auto c_cpu = at::div(a_cpu, b_scalar);
2369 const auto c_vulkan = at::div(a_vulkan, b_scalar);
2370
2371 const auto check = almostEqual(c_cpu, c_vulkan.cpu());
2372 if (!check) {
2373 showRtol(c_cpu, c_vulkan.cpu());
2374 }
2375
2376 ASSERT_TRUE(check);
2377 }
2378
TEST_F(VulkanAPITest,div_scalar_wrapped_)2379 TEST_F(VulkanAPITest, div_scalar_wrapped_) {
2380 if (!at::is_vulkan_available()) {
2381 return;
2382 }
2383
2384 auto a_cpu = at::rand({11, 7, 139, 109}, at::device(at::kCPU).dtype(at::kFloat));
2385 auto a_vulkan = a_cpu.vulkan();
2386
2387 const auto b_scalar = at::rand({1}, at::device(at::kCPU).dtype(at::kFloat)) + 0.01;
2388
2389 a_cpu.div_(b_scalar);
2390 a_vulkan.div_(b_scalar);
2391
2392 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
2393 if (!check) {
2394 showRtol(a_cpu, a_vulkan.cpu());
2395 }
2396
2397 ASSERT_TRUE(check);
2398 }
2399
TEST_F(VulkanAPITest,div_to_scalar_wrapped)2400 TEST_F(VulkanAPITest, div_to_scalar_wrapped) {
2401 if (!at::is_vulkan_available()) {
2402 return;
2403 }
2404
2405 const auto a = at::rand({1}, at::device(at::kCPU).dtype(at::kFloat));
2406
2407 const auto b_cpu = at::rand({2, 3, 5, 7}, at::device(at::kCPU).dtype(at::kFloat)) + 0.01;
2408 const auto b_vulkan = b_cpu.vulkan();
2409
2410 const auto c_cpu = at::div(a, b_cpu);
2411 const auto c_vulkan = at::div(a, b_vulkan);
2412
2413 const auto check = almostEqual(c_cpu, c_vulkan.cpu());
2414 if (!check) {
2415 showRtol(c_cpu, c_vulkan.cpu());
2416 }
2417
2418 ASSERT_TRUE(check);
2419 }
2420
TEST_F(VulkanAPITest,empty)2421 TEST_F(VulkanAPITest, empty) {
2422
2423 ASSERT_NO_THROW(at::empty({1, 17, 41, 53}, at::device(at::kVulkan).dtype(at::kFloat)));
2424 }
2425
test_expand(const at::IntArrayRef input_shape,const at::IntArrayRef output_shape)2426 void test_expand(const at::IntArrayRef input_shape, const at::IntArrayRef output_shape) {
2427 c10::InferenceMode mode;
2428 const auto cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
2429 const auto vulkan = cpu.vulkan();
2430
2431 cpu.expand(output_shape);
2432 vulkan.expand(output_shape);
2433
2434 const auto check = almostEqual(cpu, vulkan.cpu());
2435 if (!check) {
2436 showRtol(cpu, vulkan.cpu());
2437 }
2438 ASSERT_TRUE(check);
2439 }
2440
TEST_F(VulkanAPITest,expand_exceptions)2441 TEST_F(VulkanAPITest, expand_exceptions) {
2442 // Vulkan expand supports input dims <= 4
2443 auto in_cpu = at::rand({1, 2, 3, 4, 5}, at::device(at::kCPU).dtype(at::kFloat));
2444 EXPECT_THROW(const auto out_vulkan = in_cpu.vulkan().expand({1, 2, 3, 4}), ::std::exception);
2445
2446 // Vulkan expand supports output_size <= 4
2447 in_cpu = at::rand({1, 2, 3, 4}, at::device(at::kCPU).dtype(at::kFloat));
2448 EXPECT_THROW(const auto out_vulkan = in_cpu.vulkan().expand({1, 1, 2, 3, 4}), ::std::exception);
2449
2450 // Vulkan expand expects output size >= input
2451 in_cpu = at::rand({1, 2, 3}, at::device(at::kCPU).dtype(at::kFloat));
2452 EXPECT_THROW(const auto out_vulkan = in_cpu.vulkan().expand({2, 3}), ::std::exception);
2453
2454 // Non-singleton dimensions must match
2455 in_cpu = at::rand({3, 1}, at::device(at::kCPU).dtype(at::kFloat));
2456 EXPECT_THROW(const auto out_vulkan = in_cpu.vulkan().expand({1, 1}), ::std::exception);
2457
2458 // -1 not allowed in leading, non-existing dimension
2459 in_cpu = at::rand({3, 1}, at::device(at::kCPU).dtype(at::kFloat));
2460 EXPECT_THROW(const auto out_vulkan = in_cpu.vulkan().expand({-1, 3, 1}), ::std::exception);
2461 }
2462
TEST_F(VulkanAPITest,expand_1d)2463 TEST_F(VulkanAPITest, expand_1d) {
2464 test_expand({1}, {3});
2465
2466 test_expand({1}, {9, 3}); // 1d->2d
2467 test_expand({1}, {8, 9, 3}); // 1d->3d
2468 test_expand({1}, {7, 8, 9, 3}); // 1d->4d
2469 }
2470
TEST_F(VulkanAPITest,expand_2d)2471 TEST_F(VulkanAPITest, expand_2d) {
2472 test_expand({5, 1}, {-1, 5}); // W
2473 test_expand({1, 5}, {5, 5}); // H
2474
2475 test_expand({5, 1}, {2, -1, 5}); // 2d->3d
2476 test_expand({1, 5}, {2, 5, 3, -1}); // 2d->4d
2477 }
2478
TEST_F(VulkanAPITest,expand_3d)2479 TEST_F(VulkanAPITest, expand_3d) {
2480 test_expand({3, 4, 1}, {3, 4, -1}); // W
2481 test_expand({3, 1, 5}, {-1, 4, 5}); // H
2482 test_expand({1, 4, 5}, {3, -1, 5}); // C
2483
2484 test_expand({5, 4, 3}, {2, -1, -1, -1}); // 3d->4d
2485 }
2486
TEST_F(VulkanAPITest,expand_4d)2487 TEST_F(VulkanAPITest, expand_4d) {
2488 test_expand({5, 4, 3, 1}, {5, 4, 3, 9}); // W
2489 test_expand({5, 4, 1, 2}, {5, 4, 9, 2}); // H
2490 test_expand({5, 1, 3, 2}, {5, 9, 3, 2}); // C
2491 test_expand({1, 4, 3, 2}, {9, 4, 3, 2}); // N
2492 }
2493
TEST_F(VulkanAPITest,expand_as)2494 TEST_F(VulkanAPITest, expand_as) {
2495 // expand_as calls into expand, without negative sizes, those tests should be sufficient.
2496 c10::InferenceMode mode;
2497 const auto cpu = at::rand({1, 1, 1, 1}, at::device(at::kCPU).dtype(at::kFloat));
2498 const auto vulkan = cpu.vulkan();
2499 const auto other = at::rand({9, 11, 33, 22}, at::device(at::kCPU).dtype(at::kFloat));
2500
2501 cpu.expand_as(other);
2502 vulkan.expand_as(other);
2503
2504 const auto check = almostEqual(cpu, vulkan.cpu());
2505 if (!check) {
2506 showRtol(cpu, vulkan.cpu());
2507 }
2508 ASSERT_TRUE(check);
2509 }
2510
test_flip(const at::IntArrayRef input_shape,const at::IntArrayRef dim_list)2511 void test_flip(const at::IntArrayRef input_shape, const at::IntArrayRef dim_list) {
2512 c10::InferenceMode mode;
2513 const auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
2514 const auto in_vulkan = in_cpu.vulkan();
2515
2516 const auto out_cpu = at::flip(in_cpu, dim_list);
2517 const auto out_vulkan = at::flip(in_vulkan, dim_list);
2518
2519 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
2520 if (!check) {
2521 showRtol(out_cpu, out_vulkan.cpu());
2522 std::cout << "test flip failed with input_shape: " << input_shape
2523 << " and dim_list: " << dim_list << std::endl;
2524 }
2525
2526 ASSERT_TRUE(check);
2527 }
2528
TEST_F(VulkanAPITest,flip_1d)2529 TEST_F(VulkanAPITest, flip_1d) {
2530 test_flip({5}, {0});
2531 test_flip({5}, {-1});
2532 }
2533
TEST_F(VulkanAPITest,flip_2d)2534 TEST_F(VulkanAPITest, flip_2d) {
2535 test_flip({5, 5}, {-1});
2536 test_flip({2, 7}, {-2});
2537
2538 test_flip({5, 5}, {0, 1});
2539 }
2540
TEST_F(VulkanAPITest,flip_3d)2541 TEST_F(VulkanAPITest, flip_3d) {
2542 test_flip({5, 7, 5}, {-1});
2543 test_flip({2, 9, 7}, {-2});
2544 test_flip({9, 7, 5}, {-3});
2545
2546 test_flip({10, 7, 5}, {0, 1});
2547 test_flip({10, 7, 5}, {0, 2});
2548 test_flip({10, 7, 5}, {1, 2});
2549
2550 test_flip({10, 7, 5}, {2, 1, 0});
2551 }
2552
TEST_F(VulkanAPITest,flip_4d)2553 TEST_F(VulkanAPITest, flip_4d) {
2554 test_flip({2, 9, 1, 1}, {-1});
2555 test_flip({7, 5, 9, 3}, {-2});
2556 test_flip({3, 8, 5, 2}, {-3});
2557 test_flip({7, 9, 5, 3}, {-4});
2558
2559 test_flip({10, 7, 5, 6}, {0, 1});
2560 test_flip({10, 7, 5, 6}, {0, 2});
2561 test_flip({10, 7, 5, 6}, {0, 3});
2562 test_flip({10, 7, 5, 6}, {1, 2});
2563 test_flip({10, 7, 5, 6}, {1, 3});
2564 test_flip({10, 7, 5, 6}, {2, 3});
2565
2566 test_flip({10, 7, 5, 6}, {0, 1, 2});
2567 test_flip({10, 7, 5, 6}, {0, 1, 3});
2568 test_flip({10, 7, 5, 6}, {0, 2, 3});
2569 test_flip({10, 7, 5, 6}, {3, 2, 1});
2570
2571 test_flip({10, 7, 5, 6}, {3, 2, 1, 0});
2572 }
2573
TEST_F(VulkanAPITest,gelu)2574 TEST_F(VulkanAPITest, gelu) {
2575 const auto in_cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat));
2576 const auto in_vulkan = in_cpu.vulkan();
2577
2578 auto out_cpu = at::gelu(in_cpu, "tanh");
2579 auto out_vulkan = at::gelu(in_vulkan, "tanh");
2580
2581 auto check = almostEqual(out_cpu, out_vulkan.cpu());
2582
2583 if (!check) {
2584 showRtol(out_cpu, out_vulkan.cpu());
2585 }
2586
2587 ASSERT_TRUE(check);
2588 }
2589
TEST_F(VulkanAPITest,gelu_)2590 TEST_F(VulkanAPITest, gelu_) {
2591 auto cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat));
2592 auto vulkan = cpu.vulkan();
2593
2594 at::gelu_(cpu, "tanh");
2595 at::gelu_(vulkan, "tanh");
2596
2597 auto check = almostEqual(cpu, vulkan.cpu());
2598 if (!check) {
2599 showRtol(cpu, vulkan.cpu());
2600 }
2601
2602 ASSERT_TRUE(check);
2603 }
2604
test_glu(const at::IntArrayRef input_shape)2605 void test_glu(const at::IntArrayRef input_shape) {
2606 const auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
2607 const auto in_vulkan = in_cpu.vulkan();
2608
2609 const auto out_cpu = at::glu(in_cpu, 1);
2610 const auto out_vulkan = at::glu(in_vulkan, 1);
2611
2612 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
2613 if (!check) {
2614 showRtol(out_cpu, out_vulkan.cpu());
2615 }
2616
2617 ASSERT_TRUE(check);
2618 }
2619
TEST_F(VulkanAPITest,glu_ch_200)2620 TEST_F(VulkanAPITest, glu_ch_200) {
2621 test_glu({17, 200, 302, 5});
2622 }
2623
TEST_F(VulkanAPITest,glu_ch_64)2624 TEST_F(VulkanAPITest, glu_ch_64) {
2625 test_glu({1, 64, 100, 8});
2626 }
2627
TEST_F(VulkanAPITest,glu_ch_32)2628 TEST_F(VulkanAPITest, glu_ch_32) {
2629 test_glu({1, 32, 100, 19});
2630 }
2631
2632 // Re-enable once glu_channel shader is fixed
TEST_F(VulkanAPITest,DISABLED_glu_ch_10)2633 TEST_F(VulkanAPITest, DISABLED_glu_ch_10) {
2634 test_glu({17, 10, 57, 41});
2635 }
2636
2637 // Re-enable once glu_channel shader is fixed
TEST_F(VulkanAPITest,DISABLED_glu_ch_2)2638 TEST_F(VulkanAPITest, DISABLED_glu_ch_2) {
2639 test_glu({1, 2, 100, 40});
2640 }
2641
TEST_F(VulkanAPITest,hardsigmoid)2642 TEST_F(VulkanAPITest, hardsigmoid) {
2643 const auto in_cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat))*12 - 6;
2644 const auto in_vulkan = in_cpu.vulkan();
2645
2646 const auto out_cpu = at::hardsigmoid(in_cpu);
2647 const auto out_vulkan = at::hardsigmoid(in_vulkan);
2648
2649 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
2650 if (!check) {
2651 showRtol(out_cpu, out_vulkan.cpu());
2652 }
2653
2654 ASSERT_TRUE(check);
2655 }
2656
TEST_F(VulkanAPITest,hardsigmoid_)2657 TEST_F(VulkanAPITest, hardsigmoid_) {
2658 auto cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat))*12 - 6;
2659 auto vulkan = cpu.vulkan();
2660
2661 at::hardsigmoid_(cpu);
2662 at::hardsigmoid_(vulkan);
2663
2664 const auto check = almostEqual(cpu, vulkan.cpu());
2665 if (!check) {
2666 showRtol(cpu, vulkan.cpu());
2667 }
2668
2669 ASSERT_TRUE(check);
2670 }
2671
TEST_F(VulkanAPITest,hardshrink)2672 TEST_F(VulkanAPITest, hardshrink) {
2673 for (const auto lambd_value : {-4.2, -1.0, 0.42, 1.0, 4.2, 13.7}) {
2674 // Generate values between -10 and +10
2675 const auto in_cpu = (at::rand({3, 63, 79, 17}, at::device(at::kCPU).dtype(at::kFloat)) - 0.5) * 20;
2676 const auto in_vulkan = in_cpu.vulkan();
2677
2678 const auto out_vulkan = at::hardshrink(in_vulkan, lambd_value);
2679
2680 const auto check = checkHardShrink(in_cpu, out_vulkan.cpu(), lambd_value);
2681 ASSERT_TRUE(check);
2682 }
2683 }
2684
TEST_F(VulkanAPITest,hardshrink_)2685 TEST_F(VulkanAPITest, hardshrink_) {
2686 for (const auto lambd_value : {0.42, 1.0, 4.2, 13.7}) {
2687 // Generate values between -10 and +10
2688 const auto in_cpu = (at::rand({3, 63, 79, 17}, at::device(at::kCPU).dtype(at::kFloat)) - 0.5) * 20;
2689 const auto in_vulkan = in_cpu.vulkan();
2690
2691 const auto out_cpu = in_cpu.hardshrink(lambd_value);
2692 const auto out_vulkan = in_vulkan.hardshrink(lambd_value).cpu();
2693
2694 const auto check = checkHardShrink(out_cpu, out_vulkan, lambd_value);
2695 ASSERT_TRUE(check);
2696 }
2697 }
2698
TEST_F(VulkanAPITest,hardtanh)2699 TEST_F(VulkanAPITest, hardtanh) {
2700 const auto in_cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat)) * 10;
2701 const auto in_vulkan = in_cpu.vulkan();
2702
2703 const auto out_cpu = at::hardtanh(in_cpu, 3, 7);
2704 const auto out_vulkan = at::hardtanh(in_vulkan, 3, 7);
2705
2706 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
2707 if (!check) {
2708 showRtol(out_cpu, out_vulkan.cpu());
2709 }
2710
2711 ASSERT_TRUE(check);
2712 }
2713
TEST_F(VulkanAPITest,hardtanh_)2714 TEST_F(VulkanAPITest, hardtanh_) {
2715 auto a_cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat)) * 10;
2716 auto a_vulkan = a_cpu.vulkan();
2717
2718 at::hardtanh_(a_cpu, 3, 7);
2719 at::hardtanh_(a_vulkan, 3, 7);
2720
2721 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
2722 if (!check) {
2723 showRtol(a_cpu, a_vulkan.cpu());
2724 }
2725
2726 ASSERT_TRUE(check);
2727 }
2728
test_packed_layer_norm(const at::IntArrayRef input_shape,const at::IntArrayRef normalized_shape,const at::IntArrayRef weight_shape,const at::IntArrayRef bias_shape,const float eps)2729 void test_packed_layer_norm(
2730 const at::IntArrayRef input_shape,
2731 const at::IntArrayRef normalized_shape,
2732 const at::IntArrayRef weight_shape,
2733 const at::IntArrayRef bias_shape,
2734 const float eps) {
2735 c10::InferenceMode mode;
2736
2737 const auto input_cpu =
2738 at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
2739 const auto input_vulkan = input_cpu.vulkan();
2740
2741 const auto weight_cpu =
2742 at::rand(weight_shape, at::device(at::kCPU).dtype(at::kFloat));
2743
2744 const auto bias_cpu =
2745 at::rand(bias_shape, at::device(at::kCPU).dtype(at::kFloat));
2746
2747 const auto output_cpu = at::layer_norm(
2748 input_cpu, normalized_shape, weight_cpu, bias_cpu, eps, false);
2749
2750 auto prepack = callOpByName(
2751 "vulkan_prepack::create_layernorm_context",
2752 "",
2753 weight_cpu, bias_cpu, eps);
2754
2755 auto vulkan_output = callOpByName(
2756 "vulkan_prepack::run_layernorm_context",
2757 "",
2758 input_cpu.vulkan(), normalized_shape, prepack[0]);
2759
2760 auto output_vulkan = vulkan_output[0].toTensor();
2761
2762 const auto check = almostEqual(output_cpu, output_vulkan.cpu());
2763 if (!check) {
2764 showRtol(output_cpu, output_vulkan.cpu());
2765 }
2766
2767 ASSERT_TRUE(check);
2768 }
2769
TEST_F(VulkanAPITest,packed_layer_norm_2d)2770 TEST_F(VulkanAPITest, packed_layer_norm_2d) {
2771 test_packed_layer_norm({5, 7}, {7}, {7}, {7}, 1e-05);
2772 test_packed_layer_norm({5, 7}, {5, 7}, {5, 7}, {5, 7}, 1e-05);
2773 }
2774
TEST_F(VulkanAPITest,packed_layer_norm_3d)2775 TEST_F(VulkanAPITest, packed_layer_norm_3d) {
2776 test_packed_layer_norm({11, 5, 7}, {7}, {7}, {7}, 1e-05);
2777 test_packed_layer_norm({11, 5, 7}, {5, 7}, {5, 7}, {5, 7}, 1e-05);
2778 test_packed_layer_norm({11, 5, 7}, {11, 5, 7}, {11, 5, 7}, {11, 5, 7}, 1e-05);
2779 }
2780
TEST_F(VulkanAPITest,packed_layer_norm_4d)2781 TEST_F(VulkanAPITest, packed_layer_norm_4d) {
2782 test_packed_layer_norm({3, 11, 5, 7}, {7}, {7}, {7}, 1e-05);
2783 test_packed_layer_norm({3, 11, 5, 7}, {5, 7}, {5, 7}, {5, 7}, 1e-05);
2784 test_packed_layer_norm({3, 11, 5, 7}, {11, 5, 7}, {11, 5, 7}, {11, 5, 7}, 1e-05);
2785 test_packed_layer_norm(
2786 {3, 11, 5, 7}, {3, 11, 5, 7}, {3, 11, 5, 7}, {3, 11, 5, 7}, 1e-05);
2787 }
2788
TEST_F(VulkanAPITest,layer_norm_invalid_inputs)2789 TEST_F(VulkanAPITest, layer_norm_invalid_inputs) {
2790 c10::InferenceMode mode;
2791
2792 // Act: incorrect normalized shape
2793 EXPECT_THROW({
2794 at::layer_norm(
2795 at::rand({3, 5, 7}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
2796 {8, 5},
2797 at::rand({8, 5}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
2798 at::rand({8, 5}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
2799 1e-05,
2800 false);
2801 }, ::std::exception);
2802
2803 // Act: incorrect weight dimensions
2804 EXPECT_THROW({
2805 at::layer_norm(
2806 at::rand({3, 5, 7}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
2807 {3, 5, 7},
2808 at::rand({3, 5}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
2809 at::rand({3, 5, 7}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
2810 1e-05,
2811 false);
2812 }, ::std::exception);
2813
2814 // Act: incorrect bias dimensions
2815 EXPECT_THROW({
2816 at::layer_norm(
2817 at::rand({3, 5, 7}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
2818 {3, 5, 7},
2819 at::rand({3, 5, 7}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
2820 at::rand({5, 7}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
2821 1e-05,
2822 false);
2823 }, ::std::exception);
2824
2825 // Act: input has too many dimensions
2826 EXPECT_THROW({
2827 at::layer_norm(
2828 at::rand({1, 2, 3, 5, 7}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
2829 {3, 5, 7},
2830 at::rand({3, 5, 7}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
2831 at::rand({3, 5, 7}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
2832 1e-05,
2833 false);
2834 }, ::std::exception);
2835 }
2836
test_layer_norm(const at::IntArrayRef input_shape,const at::IntArrayRef normalized_shape,const at::IntArrayRef weight_shape,const at::IntArrayRef bias_shape,const float eps)2837 void test_layer_norm(
2838 const at::IntArrayRef input_shape,
2839 const at::IntArrayRef normalized_shape,
2840 const at::IntArrayRef weight_shape,
2841 const at::IntArrayRef bias_shape,
2842 const float eps) {
2843 c10::InferenceMode mode;
2844
2845 const auto input_cpu =
2846 at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
2847 const auto input_vulkan = input_cpu.vulkan();
2848
2849 const auto weight_cpu =
2850 at::rand(weight_shape, at::device(at::kCPU).dtype(at::kFloat));
2851 const auto weight_vulkan = weight_cpu.vulkan();
2852
2853 const auto bias_cpu =
2854 at::rand(bias_shape, at::device(at::kCPU).dtype(at::kFloat));
2855 const auto bias_vulkan = bias_cpu.vulkan();
2856
2857 const auto output_cpu = at::layer_norm(
2858 input_cpu, normalized_shape, weight_cpu, bias_cpu, eps, false);
2859 const auto output_vulkan = at::layer_norm(
2860 input_vulkan, normalized_shape, weight_vulkan, bias_vulkan, eps, false);
2861
2862 const auto check = almostEqual(output_cpu, output_vulkan.cpu());
2863 if (!check) {
2864 showRtol(output_cpu, output_vulkan.cpu());
2865 }
2866
2867 ASSERT_TRUE(check);
2868 }
2869
TEST_F(VulkanAPITest,layer_norm_2d)2870 TEST_F(VulkanAPITest, layer_norm_2d) {
2871 test_layer_norm({5, 7}, {7}, {7}, {7}, 1e-05);
2872 test_layer_norm({5, 7}, {5, 7}, {5, 7}, {5, 7}, 1e-05);
2873 }
2874
TEST_F(VulkanAPITest,layer_norm_3d)2875 TEST_F(VulkanAPITest, layer_norm_3d) {
2876 test_layer_norm({11, 5, 7}, {7}, {7}, {7}, 1e-05);
2877 test_layer_norm({11, 5, 7}, {5, 7}, {5, 7}, {5, 7}, 1e-05);
2878 test_layer_norm({11, 5, 7}, {11, 5, 7}, {11, 5, 7}, {11, 5, 7}, 1e-05);
2879 }
2880
TEST_F(VulkanAPITest,layer_norm_4d)2881 TEST_F(VulkanAPITest, layer_norm_4d) {
2882 test_layer_norm({3, 11, 5, 7}, {7}, {7}, {7}, 1e-05);
2883 test_layer_norm({3, 11, 5, 7}, {5, 7}, {5, 7}, {5, 7}, 1e-05);
2884 test_layer_norm({3, 11, 5, 7}, {11, 5, 7}, {11, 5, 7}, {11, 5, 7}, 1e-05);
2885 test_layer_norm(
2886 {3, 11, 5, 7}, {3, 11, 5, 7}, {3, 11, 5, 7}, {3, 11, 5, 7}, 1e-05);
2887 }
2888
test_native_layer_norm(const at::IntArrayRef input_shape,const at::IntArrayRef normalized_shape,const at::IntArrayRef weight_shape,const at::IntArrayRef bias_shape,const float eps)2889 void test_native_layer_norm(
2890 const at::IntArrayRef input_shape,
2891 const at::IntArrayRef normalized_shape,
2892 const at::IntArrayRef weight_shape,
2893 const at::IntArrayRef bias_shape,
2894 const float eps) {
2895 c10::InferenceMode mode;
2896
2897 const auto input_cpu =
2898 at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
2899 const auto input_vulkan = input_cpu.vulkan();
2900
2901 const auto weight_cpu =
2902 at::rand(weight_shape, at::device(at::kCPU).dtype(at::kFloat));
2903 const auto weight_vulkan = weight_cpu.vulkan();
2904
2905 const auto bias_cpu =
2906 at::rand(bias_shape, at::device(at::kCPU).dtype(at::kFloat));
2907 const auto bias_vulkan = bias_cpu.vulkan();
2908
2909 const auto output_cpu = at::native_layer_norm(
2910 input_cpu, normalized_shape, weight_cpu, bias_cpu, eps);
2911 const auto output_vulkan = at::native_layer_norm(
2912 input_vulkan, normalized_shape, weight_vulkan, bias_vulkan, eps);
2913
2914 const auto check0 =
2915 almostEqual(std::get<0>(output_cpu), std::get<0>(output_vulkan).cpu());
2916 const auto check1 =
2917 almostEqual(std::get<1>(output_cpu), std::get<1>(output_vulkan).cpu());
2918 const auto check2 =
2919 almostEqual(std::get<2>(output_cpu), std::get<2>(output_vulkan).cpu());
2920
2921 if (!check0) {
2922 std::cout
2923 << "the first output of native_layer_norm: layer_norm is incorrect"
2924 << std::endl;
2925 showRtol(std::get<0>(output_cpu), std::get<0>(output_vulkan).cpu());
2926 }
2927 if (!check1) {
2928 std::cout << "the second output of native_layer_norm: mean is incorrect"
2929 << std::endl;
2930 showRtol(std::get<1>(output_cpu), std::get<1>(output_vulkan).cpu());
2931 }
2932 if (!check2) {
2933 std::cout
2934 << "the third output of native_layer_norm: 1/sqrt(var+eps) is incorrect"
2935 << std::endl;
2936 showRtol(std::get<2>(output_cpu), std::get<2>(output_vulkan).cpu());
2937 }
2938
2939 ASSERT_TRUE(check0 && check2 && check2);
2940 }
2941
TEST_F(VulkanAPITest,native_layer_norm_2d)2942 TEST_F(VulkanAPITest, native_layer_norm_2d) {
2943 test_native_layer_norm({5, 7}, {7}, {7}, {7}, 1e-05);
2944 test_native_layer_norm({5, 7}, {5, 7}, {5, 7}, {5, 7}, 1e-05);
2945 }
2946
TEST_F(VulkanAPITest,native_layer_norm_3d)2947 TEST_F(VulkanAPITest, native_layer_norm_3d) {
2948 test_native_layer_norm({11, 5, 7}, {7}, {7}, {7}, 1e-05);
2949 test_native_layer_norm({11, 5, 7}, {5, 7}, {5, 7}, {5, 7}, 1e-05);
2950 test_native_layer_norm({11, 5, 7}, {11, 5, 7}, {11, 5, 7}, {11, 5, 7}, 1e-05);
2951 }
2952
TEST_F(VulkanAPITest,native_layer_norm_4d)2953 TEST_F(VulkanAPITest, native_layer_norm_4d) {
2954 test_native_layer_norm({3, 11, 5, 7}, {7}, {7}, {7}, 1e-05);
2955 test_native_layer_norm({3, 11, 5, 7}, {5, 7}, {5, 7}, {5, 7}, 1e-05);
2956 test_native_layer_norm(
2957 {3, 11, 5, 7}, {11, 5, 7}, {11, 5, 7}, {11, 5, 7}, 1e-05);
2958 test_native_layer_norm(
2959 {3, 11, 5, 7}, {3, 11, 5, 7}, {3, 11, 5, 7}, {3, 11, 5, 7}, 1e-05);
2960 }
2961
TEST_F(VulkanAPITest,leaky_relu)2962 TEST_F(VulkanAPITest, leaky_relu) {
2963 for (const auto negative_slope : {0.01, 0.001, 1.0, -0.001}) {
2964 const auto in_cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat));
2965 const auto in_vulkan = in_cpu.vulkan();
2966
2967 const auto out_cpu = at::leaky_relu(in_cpu, negative_slope);
2968 const auto out_vulkan = at::leaky_relu(in_vulkan, negative_slope);
2969
2970 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
2971
2972 if (!check) {
2973 showRtol(out_cpu, out_vulkan.cpu());
2974 }
2975
2976 ASSERT_TRUE(check);
2977 }
2978 }
2979
TEST_F(VulkanAPITest,leaky_relu_)2980 TEST_F(VulkanAPITest, leaky_relu_) {
2981 for (const auto negative_slope : {0.01, 0.001, 1.0, -0.001}) {
2982 auto cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat));
2983 auto vulkan = cpu.vulkan();
2984
2985 at::leaky_relu_(cpu, negative_slope);
2986 at::leaky_relu_(vulkan, negative_slope);
2987
2988 const auto check = almostEqual(cpu, vulkan.cpu());
2989 if (!check) {
2990 showRtol(cpu, vulkan.cpu());
2991 }
2992
2993 ASSERT_TRUE(check);
2994 }
2995 }
2996
TEST_F(VulkanAPITest,lerp)2997 TEST_F(VulkanAPITest, lerp) {
2998 const auto a_cpu = at::rand({11, 7, 139, 109}, at::device(at::kCPU).dtype(at::kFloat));
2999 const auto a_vulkan = a_cpu.vulkan();
3000
3001 const auto b_cpu = at::rand({11, 7, 139, 109}, at::device(at::kCPU).dtype(at::kFloat));
3002 const auto b_vulkan = b_cpu.vulkan();
3003
3004 const auto w_cpu = at::rand({11, 7, 139, 109}, at::device(at::kCPU).dtype(at::kFloat));
3005 const auto w_vulkan = w_cpu.vulkan();
3006
3007 const auto c_cpu = at::lerp(a_cpu, b_cpu, w_cpu);
3008 const auto c_vulkan = at::lerp(a_vulkan, b_vulkan, w_vulkan);
3009
3010 const auto check = almostEqual(c_cpu, c_vulkan.cpu());
3011 if (!check) {
3012 showRtol(c_cpu, c_vulkan.cpu());
3013 }
3014
3015 ASSERT_TRUE(check);
3016 }
3017
TEST_F(VulkanAPITest,lerp_broadcast0)3018 TEST_F(VulkanAPITest, lerp_broadcast0) {
3019 const auto a_cpu = at::rand({3, 5, 179, 221}, at::device(at::kCPU).dtype(at::kFloat));
3020 const auto a_vulkan = a_cpu.vulkan();
3021
3022 const auto b_cpu = at::rand({3, 5, 1, 1}, at::device(at::kCPU).dtype(at::kFloat));
3023 const auto b_vulkan = b_cpu.vulkan();
3024
3025 const auto w_cpu = at::rand({3, 5, 1, 221}, at::device(at::kCPU).dtype(at::kFloat));
3026 const auto w_vulkan = w_cpu.vulkan();
3027
3028 const auto c_cpu = at::lerp(a_cpu, b_cpu, w_cpu);
3029 const auto c_vulkan = at::lerp(a_vulkan, b_vulkan, w_vulkan);
3030
3031 const auto check = almostEqual(c_cpu, c_vulkan.cpu());
3032 if (!check) {
3033 showRtol(c_cpu, c_vulkan.cpu());
3034 }
3035
3036 ASSERT_TRUE(check);
3037 }
3038
TEST_F(VulkanAPITest,lerp_broadcast1)3039 TEST_F(VulkanAPITest, lerp_broadcast1) {
3040 const auto a_cpu = at::rand({3, 4, 179, 221}, at::device(at::kCPU).dtype(at::kFloat));
3041 const auto a_vulkan = a_cpu.vulkan();
3042
3043 const auto b_cpu = at::rand({4, 179, 221}, at::device(at::kCPU).dtype(at::kFloat));
3044 const auto b_vulkan = b_cpu.vulkan();
3045
3046 const auto w_cpu = at::rand({4, 179, 221}, at::device(at::kCPU).dtype(at::kFloat));
3047 const auto w_vulkan = w_cpu.vulkan();
3048
3049 const auto c_cpu = at::lerp(a_cpu, b_cpu, w_cpu);
3050 const auto c_vulkan = at::lerp(a_vulkan, b_vulkan, w_vulkan);
3051
3052 const auto check = almostEqual(c_cpu, c_vulkan.cpu());
3053 if (!check) {
3054 showRtol(c_cpu, c_vulkan.cpu());
3055 }
3056
3057 ASSERT_TRUE(check);
3058 }
3059
TEST_F(VulkanAPITest,lerp_)3060 TEST_F(VulkanAPITest, lerp_) {
3061 auto a_cpu = at::rand({61, 17, 29, 83}, at::device(at::kCPU).dtype(at::kFloat));
3062 auto a_vulkan = a_cpu.vulkan();
3063
3064 const auto b_cpu = at::rand({61, 17, 29, 83}, at::device(at::kCPU).dtype(at::kFloat));
3065 const auto b_vulkan = b_cpu.vulkan();
3066
3067 const auto w_cpu = at::rand({61, 17, 29, 83}, at::device(at::kCPU).dtype(at::kFloat));
3068 const auto w_vulkan = w_cpu.vulkan();
3069
3070 a_cpu.lerp_(b_cpu, w_cpu);
3071 a_vulkan.lerp_(b_vulkan, w_vulkan);
3072
3073 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
3074 if (!check) {
3075 showRtol(a_cpu, a_vulkan.cpu());
3076 }
3077
3078 ASSERT_TRUE(check);
3079 }
3080
TEST_F(VulkanAPITest,lerp_broadcast0_)3081 TEST_F(VulkanAPITest, lerp_broadcast0_) {
3082 auto a_cpu = at::rand({3, 5, 179, 221}, at::device(at::kCPU).dtype(at::kFloat));
3083 auto a_vulkan = a_cpu.vulkan();
3084
3085 const auto b_cpu = at::rand({3, 5, 1, 1}, at::device(at::kCPU).dtype(at::kFloat));
3086 const auto b_vulkan = b_cpu.vulkan();
3087
3088 const auto w_cpu = at::rand({3, 5, 1, 221}, at::device(at::kCPU).dtype(at::kFloat));
3089 const auto w_vulkan = w_cpu.vulkan();
3090
3091 a_cpu.lerp_(b_cpu, w_cpu);
3092 a_vulkan.lerp_(b_vulkan, w_vulkan);
3093
3094 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
3095 if (!check) {
3096 showRtol(a_cpu, a_vulkan.cpu());
3097 }
3098
3099 ASSERT_TRUE(check);
3100 }
3101
TEST_F(VulkanAPITest,lerp_broadcast1_)3102 TEST_F(VulkanAPITest, lerp_broadcast1_) {
3103 auto a_cpu = at::rand({3, 4, 179, 221}, at::device(at::kCPU).dtype(at::kFloat));
3104 auto a_vulkan = a_cpu.vulkan();
3105
3106 const auto b_cpu = at::rand({4, 179, 221}, at::device(at::kCPU).dtype(at::kFloat));
3107 const auto b_vulkan = b_cpu.vulkan();
3108
3109 const auto w_cpu = at::rand({4, 179, 221}, at::device(at::kCPU).dtype(at::kFloat));
3110 const auto w_vulkan = w_cpu.vulkan();
3111
3112 a_cpu.lerp_(b_cpu, w_cpu);
3113 a_vulkan.lerp_(b_vulkan, w_vulkan);
3114
3115 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
3116 if (!check) {
3117 showRtol(a_cpu, a_vulkan.cpu());
3118 }
3119
3120 ASSERT_TRUE(check);
3121 }
3122
TEST_F(VulkanAPITest,lerp_scalar)3123 TEST_F(VulkanAPITest, lerp_scalar) {
3124 const auto a_cpu = at::rand({13, 23, 59, 73}, at::device(at::kCPU).dtype(at::kFloat));
3125 const auto a_vulkan = a_cpu.vulkan();
3126
3127 const auto b_cpu = at::rand({13, 23, 59, 73}, at::device(at::kCPU).dtype(at::kFloat));
3128 const auto b_vulkan = b_cpu.vulkan();
3129
3130 const float w_scalar = 3.1415f;
3131
3132 const auto c_cpu = at::lerp(a_cpu, b_cpu, w_scalar);
3133 const auto c_vulkan = at::lerp(a_vulkan, b_vulkan, w_scalar);
3134
3135 const auto check = almostEqual(c_cpu, c_vulkan.cpu());
3136 if (!check) {
3137 showRtol(c_cpu, c_vulkan.cpu());
3138 }
3139
3140 ASSERT_TRUE(check);
3141 }
3142
TEST_F(VulkanAPITest,lerp_scalar_)3143 TEST_F(VulkanAPITest, lerp_scalar_) {
3144 auto a_cpu = at::rand({47, 2, 23, 97}, at::device(at::kCPU).dtype(at::kFloat));
3145 auto a_vulkan = a_cpu.vulkan();
3146
3147 const auto b_cpu = at::rand({47, 2, 23, 97}, at::device(at::kCPU).dtype(at::kFloat));
3148 const auto b_vulkan = b_cpu.vulkan();
3149
3150 const float w_scalar = 3.1415f;
3151
3152 a_cpu.lerp_(b_cpu, w_scalar);
3153 a_vulkan.lerp_(b_vulkan, w_scalar);
3154
3155 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
3156 if (!check) {
3157 showRtol(a_cpu, a_vulkan.cpu());
3158 }
3159
3160 ASSERT_TRUE(check);
3161 }
3162
TEST_F(VulkanAPITest,hardswish)3163 TEST_F(VulkanAPITest, hardswish) {
3164 const auto in_cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat))*12 - 6;
3165 const auto in_vulkan = in_cpu.vulkan();
3166
3167 const auto out_cpu = at::hardswish(in_cpu);
3168 const auto out_vulkan = at::hardswish(in_vulkan);
3169
3170 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
3171 if (!check) {
3172 showRtol(out_cpu, out_vulkan.cpu());
3173 }
3174
3175 ASSERT_TRUE(check);
3176 }
3177
TEST_F(VulkanAPITest,threshold)3178 TEST_F(VulkanAPITest, threshold) {
3179 const auto in_cpu = at::rand({2, 11, 57, 23}, at::device(at::kCPU).dtype(at::kFloat))*12 - 6;
3180 const auto in_vulkan = in_cpu.vulkan();
3181
3182 const float threshold = 2.0f;
3183 const float value = 5.0f;
3184
3185 const auto out_cpu = at::threshold(in_cpu, threshold, value);
3186 const auto out_vulkan = at::threshold(in_vulkan, threshold, value);
3187
3188 const auto check = checkThreshold(out_cpu, out_vulkan.cpu(), threshold, value);
3189 ASSERT_TRUE(check);
3190 }
3191
TEST_F(VulkanAPITest,hardswish_)3192 TEST_F(VulkanAPITest, hardswish_) {
3193 auto cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat))*12 - 6;
3194 auto vulkan = cpu.vulkan();
3195
3196 at::hardswish_(cpu);
3197 at::hardswish_(vulkan);
3198
3199 const auto check = almostEqual(cpu, vulkan.cpu());
3200 if (!check) {
3201 showRtol(cpu, vulkan.cpu());
3202 }
3203
3204 ASSERT_TRUE(check);
3205 }
3206
TEST_F(VulkanAPITest,masked_fill_invalidinputs_exceptions)3207 TEST_F(VulkanAPITest, masked_fill_invalidinputs_exceptions) {
3208 // Arrange: Vulkan masked_fill expects inputs of dim <= 4
3209 {
3210 const auto in_cpu =
3211 at::rand({3, 5, 2, 3, 2}, at::device(at::kCPU).dtype(at::kFloat));
3212 const auto mask_cpu =
3213 at::randint(0, 2, {2, 3, 2}, at::device(at::kCPU).dtype(at::kBool));
3214
3215 // Act
3216 EXPECT_THROW(
3217 {
3218 const auto out_vulkan =
3219 in_cpu.vulkan().masked_fill(mask_cpu.vulkan(), -7.0f);
3220 ;
3221 },
3222 ::std::exception);
3223 }
3224
3225 // Arrange: Vulkan masked_fill expects mask of dim <= 4
3226 {
3227 const auto in_cpu =
3228 at::rand({2, 3, 2}, at::device(at::kCPU).dtype(at::kFloat));
3229 const auto mask_cpu = at::randint(
3230 0, 2, {3, 5, 2, 3, 2}, at::device(at::kCPU).dtype(at::kBool));
3231
3232 // Act
3233 EXPECT_THROW(
3234 {
3235 const auto out_vulkan =
3236 in_cpu.vulkan().masked_fill(mask_cpu.vulkan(), -7.0f);
3237 ;
3238 },
3239 ::std::exception);
3240 }
3241
3242 // Arrange: shapes of input tensor and mask tensor should be broadcastable
3243 {
3244 const auto in_cpu =
3245 at::rand({2, 3, 2}, at::device(at::kCPU).dtype(at::kFloat));
3246 const auto mask_cpu =
3247 at::randint(0, 2, {3, 3, 2}, at::device(at::kCPU).dtype(at::kBool));
3248
3249 // Act
3250 EXPECT_THROW(
3251 {
3252 const auto out_vulkan =
3253 in_cpu.vulkan().masked_fill(mask_cpu.vulkan(), -7.0f);
3254 ;
3255 },
3256 ::std::exception);
3257 }
3258
3259 // Arrange: value should be a 0-dimensional value tensor or a scalar
3260 {
3261 const auto in_cpu =
3262 at::rand({2, 3, 2}, at::device(at::kCPU).dtype(at::kFloat));
3263 const auto mask_cpu =
3264 at::randint(0, 2, {2, 3, 2}, at::device(at::kCPU).dtype(at::kBool));
3265
3266 // Act
3267 EXPECT_THROW(
3268 {
3269 const auto out_vulkan =
3270 in_cpu.vulkan().masked_fill(mask_cpu.vulkan(), at::rand({1, 2}));
3271 ;
3272 },
3273 ::std::exception);
3274 }
3275 }
3276
print_shape(const std::vector<int64_t> & shape)3277 void print_shape(const std::vector<int64_t>& shape) {
3278 for (const auto& num : shape) {
3279 std::cout << num << " ";
3280 }
3281 }
3282
test_masked_fill_scalar(const at::IntArrayRef input_shape,const at::IntArrayRef mask_shape)3283 void test_masked_fill_scalar(
3284 const at::IntArrayRef input_shape,
3285 const at::IntArrayRef mask_shape) {
3286 c10::InferenceMode mode;
3287
3288 /**
3289 * We test masked_fill by considering all possible broadcasting cases of
3290 * input_shape and mask_shape. The given input_shape and mask_shape are
3291 * identical, e.g. both are equal to [3, 5, 2, 3]. First we truncate all
3292 * possible proceeding dimensions of input_shape and mask_shape respectively.
3293 * Denote the results as curr_input_shape and curr_mask_shape, e.g.
3294 * curr_input_shape = [5, 2, 3] and curr_mask_shape = [2, 3]. Then for both
3295 * curr_input_shape and curr_mask_shape we generate all possible subsets of
3296 * the indices and set the corresponding elements to 1 for each subset. For
3297 * example, for curr_input_shape = [5, 2, 3], a possible input_idx_subset =
3298 * [0, 2]. We set the 0th and 2nd elements of curr_input_shape to be 1, then
3299 * curr_input_shape = [1, 2, 1]. Similarly for curr_mask_shape = [2, 3], a
3300 * possible mask_idx_subset = [0], then the updated curr_mask_shape = [1, 3].
3301 * In the end, we test masked_fill with the combinations of curr_input_shape
3302 * and curr_mask_shape. In the example above, an output tensor of shape [1, 2,
3303 * 3] will be generated.
3304 */
3305 const size_t input_dim = input_shape.size();
3306 const size_t mask_dim = mask_shape.size();
3307 for (int input_shape_id = input_dim - 1; input_shape_id >= 0;
3308 --input_shape_id) {
3309 // truncate input_shape by the proceeding dimensitions
3310 auto curr_input_shape =
3311 input_shape.slice(input_shape_id, input_dim - input_shape_id);
3312
3313 // generate all possible subsets of numbers between 0 and input_dim -
3314 // input_shape_id - 1 (inclusive)
3315 std::vector<std::vector<int64_t>> input_indices_subsets;
3316 std::vector<int64_t> curr_input_indices;
3317 gen_all_subsets(
3318 input_indices_subsets,
3319 input_dim - input_shape_id,
3320 0,
3321 curr_input_indices);
3322
3323 for (auto input_idx_subset : input_indices_subsets) {
3324 // set the elements at indices of the subset of curr_input_shape to 1
3325 auto tmp_curr_input_shape = curr_input_shape.vec();
3326 for (auto input_idx : input_idx_subset) {
3327 tmp_curr_input_shape[input_idx] = 1;
3328 }
3329
3330 for (int mask_shape_id = mask_dim - 1; mask_shape_id >= 0;
3331 --mask_shape_id) {
3332 // truncate amsk_shape by the proceeding dimensitions
3333 auto curr_mask_shape =
3334 mask_shape.slice(mask_shape_id, mask_dim - mask_shape_id);
3335
3336 // generate all possible subsets of numbers between 0 and mask_dim -
3337 // mask_shape_id - 1 (inclusive)
3338 std::vector<std::vector<int64_t>> mask_indices_subsets;
3339 std::vector<int64_t> curr_mask_indices;
3340 gen_all_subsets(
3341 mask_indices_subsets,
3342 mask_dim - mask_shape_id,
3343 0,
3344 curr_mask_indices);
3345
3346 for (auto mask_idx_subset : mask_indices_subsets) {
3347 // set the elements at indices of the subset of curr_mask_shape to 1
3348 auto tmp_curr_mask_shape = curr_mask_shape.vec();
3349 for (auto mask_idx : mask_idx_subset) {
3350 tmp_curr_mask_shape[mask_idx] = 1;
3351 }
3352
3353 at::Tensor in_cpu = at::rand(
3354 tmp_curr_input_shape, at::device(at::kCPU).dtype(at::kFloat));
3355 at::Tensor mask_cpu = at::randint(
3356 0, 2, tmp_curr_mask_shape, at::device(at::kCPU).dtype(at::kBool));
3357 at::Tensor out_cpu = in_cpu.masked_fill(mask_cpu, -7.0f);
3358
3359 at::Tensor in_vulkan = in_cpu.vulkan();
3360 at::Tensor mask_vulkan = mask_cpu.vulkan();
3361 at::Tensor out_vulkan = in_vulkan.masked_fill(mask_vulkan, -7.0f);
3362 const bool check = almostEqual(out_cpu, out_vulkan.cpu());
3363
3364 if (!check) {
3365 showRtol(out_cpu, out_vulkan.cpu());
3366 std::cout << "Masked_fill test failed when input is of shape [";
3367 print_shape(tmp_curr_input_shape);
3368 std::cout << "], and mask of shape [";
3369 print_shape(tmp_curr_mask_shape);
3370 std::cout << "]" << std::endl;
3371 }
3372
3373 ASSERT_TRUE(check);
3374 }
3375 }
3376 }
3377 }
3378 }
3379
TEST_F(VulkanAPITest,masked_fill_scalar_mult4ch)3380 TEST_F(VulkanAPITest, masked_fill_scalar_mult4ch) {
3381 test_masked_fill_scalar({3, 4, 5, 7}, {3, 4, 5, 7});
3382 }
3383
TEST_F(VulkanAPITest,masked_fill_scalar_nonmult4ch)3384 TEST_F(VulkanAPITest, masked_fill_scalar_nonmult4ch) {
3385 test_masked_fill_scalar({3, 5, 2, 3}, {3, 5, 2, 3});
3386 }
3387
test_masked_fill_tensor(const at::IntArrayRef input_shape,const at::IntArrayRef mask_shape)3388 void test_masked_fill_tensor(
3389 const at::IntArrayRef input_shape,
3390 const at::IntArrayRef mask_shape) {
3391 c10::InferenceMode mode;
3392
3393 at::Tensor in_cpu =
3394 at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
3395 at::Tensor mask_cpu =
3396 at::randint(0, 2, mask_shape, at::device(at::kCPU).dtype(at::kBool));
3397 at::Tensor out_cpu = in_cpu.masked_fill(mask_cpu, at::scalar_tensor(-7.0f));
3398 at::Tensor in_vulkan = in_cpu.vulkan();
3399 at::Tensor mask_vulkan = mask_cpu.vulkan();
3400 at::Tensor out_vulkan =
3401 in_vulkan.masked_fill(mask_vulkan, at::scalar_tensor(-7.0f));
3402 const bool check = almostEqual(out_cpu, out_vulkan.cpu());
3403 if (!check) {
3404 showRtol(out_cpu, out_vulkan.cpu());
3405 }
3406
3407 ASSERT_TRUE(check);
3408 }
3409
TEST_F(VulkanAPITest,masked_fill_tensor_mult4ch)3410 TEST_F(VulkanAPITest, masked_fill_tensor_mult4ch) {
3411 test_masked_fill_tensor({3, 4, 2, 3}, {1, 4, 1, 1});
3412 }
3413
TEST_F(VulkanAPITest,masked_fill_tensor_nonmult4ch)3414 TEST_F(VulkanAPITest, masked_fill_tensor_nonmult4ch) {
3415 test_masked_fill_tensor({3, 5, 2, 3}, {1, 5, 1, 1});
3416 }
3417
TEST_F(VulkanAPITest,max_pool2d)3418 TEST_F(VulkanAPITest, max_pool2d) {
3419 c10::InferenceMode mode;
3420
3421 const auto in_cpu = at::rand({5, 13, 55, 68}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
3422 const auto out_cpu = at::max_pool2d(in_cpu, {3, 4}, {2, 1}, {1, 1}, {1, 1}, false);
3423 const auto out_vulkan = at::max_pool2d(in_cpu.vulkan(), {3, 4}, {2, 1}, {1, 1}, {1,1}, false);
3424
3425 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
3426 if (!check) {
3427 showRtol(out_cpu, out_vulkan.cpu());
3428 }
3429
3430 ASSERT_TRUE(check);
3431 }
3432
3433
TEST_F(VulkanAPITest,mean_invalid_inputs)3434 TEST_F(VulkanAPITest, mean_invalid_inputs) {
3435 c10::InferenceMode mode;
3436
3437 // Act: input dimension too large
3438 EXPECT_THROW({
3439 at::mean(at::rand({3, 5, 7, 8, 9}, at::device(at::kCPU).dtype(at::kFloat))
3440 .vulkan(), {3});
3441 }, ::std::exception);
3442
3443 // Act: dimension out of range
3444 EXPECT_THROW({
3445 at::mean(at::rand({7, 8, 9}, at::device(at::kCPU).dtype(at::kFloat))
3446 .vulkan(), {3});
3447 }, ::std::exception);
3448
3449 // Act: dimension out of range
3450 EXPECT_THROW({
3451 at::mean(at::rand({7, 8, 9}, at::device(at::kCPU).dtype(at::kFloat))
3452 .vulkan(), {-4});
3453 }, ::std::exception);
3454
3455 // Act: repeated dimensions
3456 EXPECT_THROW({
3457 at::mean(at::rand({7, 8, 9}, at::device(at::kCPU).dtype(at::kFloat))
3458 .vulkan(), {1, 1});
3459 }, ::std::exception);
3460
3461 // Act: repeated dimensions
3462 EXPECT_THROW({
3463 at::mean(at::rand({7, 8, 9}, at::device(at::kCPU).dtype(at::kFloat))
3464 .vulkan(), {1, -2});
3465 }, ::std::exception);
3466 }
3467
test_mean_dim(const at::IntArrayRef input_shape,const at::IntArrayRef dim_list,bool keepdim=false)3468 void test_mean_dim(const at::IntArrayRef input_shape, const at::IntArrayRef dim_list, bool keepdim=false) {
3469 const auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
3470 const auto in_vulkan = in_cpu.vulkan();
3471
3472 const auto out_cpu = at::mean(in_cpu, dim_list, keepdim);
3473 const auto out_vulkan = at::mean(in_vulkan, dim_list, keepdim);
3474
3475 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
3476 if (!check) {
3477 std::cout << "mean_dim test failed with input shape: "
3478 << input_shape << " and dim_list: " << dim_list << std::endl;
3479 showRtol(out_cpu, out_vulkan.cpu());
3480 }
3481
3482 ASSERT_TRUE(check);
3483 }
3484
TEST_F(VulkanAPITest,mean_dim_2d)3485 TEST_F(VulkanAPITest, mean_dim_2d) {
3486 test_mean_dim({2, 3}, {-1});
3487 test_mean_dim({2, 7}, {-2});
3488 }
3489
TEST_F(VulkanAPITest,mean_dim_3d)3490 TEST_F(VulkanAPITest, mean_dim_3d) {
3491 test_mean_dim({9, 7, 5}, {-1});
3492 test_mean_dim({5, 7, 9}, {-2});
3493 test_mean_dim({5, 7, 9}, {-3});
3494
3495 test_mean_dim({10, 7, 5}, {0, 1});
3496 test_mean_dim({10, 7, 5}, {0, 2});
3497 test_mean_dim({10, 7, 5}, {1, 2});
3498 test_mean_dim({10, 7, 5}, {-1, -2});
3499 test_mean_dim({10, 7, 5}, {0, -2});
3500 }
3501
TEST_F(VulkanAPITest,mean_dim_4d)3502 TEST_F(VulkanAPITest, mean_dim_4d) {
3503 test_mean_dim({7, 9, 6, 5}, {-1});
3504 test_mean_dim({6, 5, 7, 9}, {-2});
3505 test_mean_dim({6, 5, 7, 9}, {-3});
3506 test_mean_dim({6, 5, 7, 9}, {-4});
3507
3508 test_mean_dim({10, 7, 5, 6}, {0, 1});
3509 test_mean_dim({10, 7, 5, 6}, {0, 2});
3510 test_mean_dim({10, 7, 5, 6}, {0, 3});
3511 test_mean_dim({10, 7, 5, 6}, {1, 2});
3512 test_mean_dim({10, 7, 5, 6}, {1, 3});
3513 test_mean_dim({10, 7, 5, 6}, {2, 3});
3514 test_mean_dim({10, 7, 5, 6}, {-2, -4});
3515
3516 test_mean_dim({10, 7, 5, 6}, {0, 1, 2});
3517 test_mean_dim({10, 7, 5, 6}, {0, 1, 3});
3518 test_mean_dim({10, 7, 5, 6}, {0, 2, 3});
3519 test_mean_dim({10, 7, 5, 6}, {3, 2, 1});
3520 test_mean_dim({10, 7, 5, 6}, {3, -2, 1});
3521 test_mean_dim({10, 7, 5, 6}, {-3, -2, -1});
3522 }
3523
TEST_F(VulkanAPITest,mean_dim_keepdim_2d)3524 TEST_F(VulkanAPITest, mean_dim_keepdim_2d) {
3525 test_mean_dim({5, 7}, {-1}, true);
3526 test_mean_dim({5, 7}, {-2}, true);
3527 }
3528
TEST_F(VulkanAPITest,mean_dim_keepdim_3d)3529 TEST_F(VulkanAPITest, mean_dim_keepdim_3d) {
3530 test_mean_dim({9, 5, 7}, {-1}, true);
3531 test_mean_dim({5, 9, 7}, {-2}, true);
3532 test_mean_dim({7, 9, 5}, {-3}, true);
3533
3534 test_mean_dim({9, 5, 7}, {0, 1}, true);
3535 test_mean_dim({5, 9, 7}, {0, 2}, true);
3536 test_mean_dim({7, 9, 5}, {1, 2}, true);
3537 }
3538
TEST_F(VulkanAPITest,mean_dim_keepdim_4d)3539 TEST_F(VulkanAPITest, mean_dim_keepdim_4d) {
3540 test_mean_dim({9, 5, 7, 11}, {-1}, true);
3541 test_mean_dim({5, 9, 11, 7}, {-2}, true);
3542 test_mean_dim({7, 11, 9, 5}, {-3}, true);
3543 test_mean_dim({11, 7, 9, 5}, {-4}, true);
3544
3545 test_mean_dim({9, 5, 7, 11}, {0, 1}, true);
3546 test_mean_dim({5, 9, 11, 7}, {0, 2}, true);
3547 test_mean_dim({7, 11, 9, 5}, {0, 3}, true);
3548 test_mean_dim({11, 7, 9, 5}, {1, 2}, true);
3549 test_mean_dim({9, 5, 7, 11}, {1, 3}, true);
3550 test_mean_dim({5, 9, 11, 7}, {2, 3}, true);
3551
3552 test_mean_dim({7, 11, 9, 5}, {-1, -2, -3}, true);
3553 test_mean_dim({11, 7, 9, 5}, {-1, -2, -4}, true);
3554 test_mean_dim({9, 5, 7, 11}, {-2, -3, -4}, true);
3555 }
3556
TEST_F(VulkanAPITest,mm)3557 TEST_F(VulkanAPITest, mm) {
3558 const auto m1_cpu = at::rand({179, 67}, at::device(at::kCPU).dtype(at::kFloat));
3559 const auto m2_cpu = at::rand({67, 163}, at::device(at::kCPU).dtype(at::kFloat));
3560 const auto out_cpu = m1_cpu.mm(m2_cpu);
3561
3562 const auto m1_vulkan = m1_cpu.vulkan();
3563 const auto out_vulkan = m1_vulkan.mm(m2_cpu);
3564
3565 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
3566 if (!check) {
3567 showRtol(out_cpu, out_vulkan.cpu());
3568 }
3569
3570 ASSERT_TRUE(check);
3571 }
3572
TEST_F(VulkanAPITest,mm_m2_is_variable)3573 TEST_F(VulkanAPITest, mm_m2_is_variable) {
3574 int n = 19;
3575 int p = 25;
3576 int m = 21;
3577 const auto m1_cpu = at::rand({n, p}, at::device(at::kCPU).dtype(at::kFloat));
3578 const auto m2_cpu = at::rand({p, m}, at::device(at::kCPU).dtype(at::kFloat));
3579
3580 const auto out_cpu = m1_cpu.mm(m2_cpu);
3581
3582 const auto m1_vulkan = m1_cpu.vulkan();
3583 const auto m2_vulkan = m2_cpu.vulkan();
3584
3585 const auto out_vulkan = m1_vulkan.mm(m2_vulkan);
3586 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
3587 if (!check) {
3588 showRtol(out_cpu, out_vulkan.cpu());
3589 }
3590
3591 ASSERT_TRUE(check);
3592 }
3593
TEST_F(VulkanAPITest,mm_m1_m2_variable)3594 TEST_F(VulkanAPITest, mm_m1_m2_variable) {
3595 int n = 19;
3596 int p = 25;
3597 int m = 21;
3598 const auto m1_cpu = at::rand({n, p}, at::device(at::kCPU).dtype(at::kFloat));
3599 const auto m2_cpu = at::rand({p, m}, at::device(at::kCPU).dtype(at::kFloat));
3600
3601 const auto out_cpu = at::mm(m1_cpu, m2_cpu);
3602
3603 const auto m1_vulkan = m1_cpu.vulkan();
3604 const auto m2_vulkan = m2_cpu.vulkan();
3605
3606 const auto out_vulkan = at::mm(m1_vulkan, m2_vulkan);
3607 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
3608 if (!check) {
3609 showRtol(out_cpu, out_vulkan.cpu());
3610 }
3611
3612 ASSERT_TRUE(check);
3613 }
3614
TEST_F(VulkanAPITest,mm_error)3615 TEST_F(VulkanAPITest, mm_error) {
3616 // mismatched dimensions of m1 and m2.
3617 const auto m1_cpu = at::rand({179, 99}, at::device(at::kCPU).dtype(at::kFloat));
3618 const auto m2_cpu = at::rand({67, 163}, at::device(at::kCPU).dtype(at::kFloat));
3619 const auto m1_vulkan = m1_cpu.vulkan();
3620
3621 EXPECT_THROW(m1_vulkan.mm(m2_cpu), ::std::exception);
3622 }
3623
test_mul(const at::IntArrayRef input_shape,const at::IntArrayRef other_shape)3624 void test_mul(const at::IntArrayRef input_shape, const at::IntArrayRef other_shape) {
3625 const auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
3626 const auto other_cpu = at::rand(other_shape, at::device(at::kCPU).dtype(at::kFloat));
3627
3628 const auto in_vulkan = in_cpu.vulkan();
3629 const auto other_vulkan = other_cpu.vulkan();
3630
3631 const auto out_cpu = at::mul(in_cpu, other_cpu);
3632 const auto out_vulkan = at::mul(in_vulkan, other_vulkan);
3633
3634 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
3635 if (!check) {
3636 showRtol(out_cpu, out_vulkan.cpu());
3637 }
3638
3639 ASSERT_TRUE(check);
3640 }
3641
TEST_F(VulkanAPITest,mul)3642 TEST_F(VulkanAPITest, mul) {
3643 test_mul({11, 7, 139, 109}, {11, 7, 139, 109});
3644 }
3645
TEST_F(VulkanAPITest,mul_broadcast0)3646 TEST_F(VulkanAPITest, mul_broadcast0) {
3647 test_mul({3, 5, 1, 1}, {3, 5, 179, 221});
3648 }
3649
TEST_F(VulkanAPITest,mul_broadcast1)3650 TEST_F(VulkanAPITest, mul_broadcast1) {
3651 test_mul({3, 5, 179, 221}, {3, 5, 1, 221});
3652 }
3653
TEST_F(VulkanAPITest,mul_broadcast2)3654 TEST_F(VulkanAPITest, mul_broadcast2) {
3655 test_mul({3, 4, 179, 221}, {4, 1, 1});
3656 }
3657
TEST_F(VulkanAPITest,mul_broadcast3)3658 TEST_F(VulkanAPITest, mul_broadcast3) {
3659 test_mul({3, 4, 179, 221}, {1, 1, 179, 221});
3660 }
3661
TEST_F(VulkanAPITest,mul_broadcast4)3662 TEST_F(VulkanAPITest, mul_broadcast4) {
3663 test_mul({3, 4, 179, 1}, {1, 179, 221});
3664 }
3665
TEST_F(VulkanAPITest,mul_broadcast5)3666 TEST_F(VulkanAPITest, mul_broadcast5) {
3667 test_mul({2, 1, 7, 1}, {1, 5, 1, 4});
3668 }
3669
TEST_F(VulkanAPITest,mul_broadcast6)3670 TEST_F(VulkanAPITest, mul_broadcast6) {
3671 test_mul({1, 15, 5, 4}, {21, 1, 5, 4});
3672 }
3673
TEST_F(VulkanAPITest,mul_zero_dim)3674 TEST_F(VulkanAPITest, mul_zero_dim) {
3675 test_mul({1, 15, 5, 4}, {});
3676 }
3677
TEST_F(VulkanAPITest,mul_)3678 TEST_F(VulkanAPITest, mul_) {
3679 auto a_cpu = at::rand({61, 17, 29, 83}, at::device(at::kCPU).dtype(at::kFloat));
3680 auto a_vulkan = a_cpu.vulkan();
3681
3682 const auto b_cpu = at::rand({61, 17, 29, 83}, at::device(at::kCPU).dtype(at::kFloat));
3683 const auto b_vulkan = b_cpu.vulkan();
3684
3685 a_cpu.mul_(b_cpu);
3686 a_vulkan.mul_(b_vulkan);
3687
3688 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
3689 if (!check) {
3690 showRtol(b_cpu, b_vulkan.cpu());
3691 }
3692
3693 ASSERT_TRUE(check);
3694 }
3695
TEST_F(VulkanAPITest,mul_broadcast0_)3696 TEST_F(VulkanAPITest, mul_broadcast0_) {
3697 auto a_cpu = at::rand({12, 17, 29, 83}, at::device(at::kCPU).dtype(at::kFloat));
3698 auto a_vulkan = a_cpu.vulkan();
3699
3700 const auto b_cpu = at::rand({12, 17, 29, 1}, at::device(at::kCPU).dtype(at::kFloat));
3701 const auto b_vulkan = b_cpu.vulkan();
3702
3703 a_cpu.mul_(b_cpu);
3704 a_vulkan.mul_(b_vulkan);
3705
3706 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
3707 if (!check) {
3708 showRtol(b_cpu, b_vulkan.cpu());
3709 }
3710
3711 ASSERT_TRUE(check);
3712 }
3713
TEST_F(VulkanAPITest,mul_broadcast1_)3714 TEST_F(VulkanAPITest, mul_broadcast1_) {
3715 auto a_cpu = at::rand({3, 8, 29, 83}, at::device(at::kCPU).dtype(at::kFloat));
3716 auto a_vulkan = a_cpu.vulkan();
3717
3718 const auto b_cpu = at::rand({8, 1, 1}, at::device(at::kCPU).dtype(at::kFloat));
3719 const auto b_vulkan = b_cpu.vulkan();
3720
3721 a_cpu.mul_(b_cpu);
3722 a_vulkan.mul_(b_vulkan);
3723
3724 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
3725 if (!check) {
3726 showRtol(b_cpu, b_vulkan.cpu());
3727 }
3728
3729 ASSERT_TRUE(check);
3730 }
3731
TEST_F(VulkanAPITest,mul_scalar)3732 TEST_F(VulkanAPITest, mul_scalar) {
3733 const auto a_cpu = at::rand({17, 213, 213, 7}, at::device(at::kCPU).dtype(at::kFloat));
3734 const auto a_vulkan = a_cpu.vulkan();
3735
3736 const float b_scalar = 3.1415f;
3737
3738 const auto c_cpu = at::mul(a_cpu, b_scalar);
3739 const auto c_vulkan = at::mul(a_vulkan, b_scalar);
3740
3741 const auto check = almostEqual(c_cpu, c_vulkan.cpu());
3742 if (!check) {
3743 showRtol(c_cpu, c_vulkan.cpu());
3744 }
3745
3746 ASSERT_TRUE(check);
3747 }
3748
TEST_F(VulkanAPITest,mul_scalar_)3749 TEST_F(VulkanAPITest, mul_scalar_) {
3750 auto a_cpu = at::rand({11, 7, 139, 109}, at::device(at::kCPU).dtype(at::kFloat));
3751 auto a_vulkan = a_cpu.vulkan();
3752
3753 const float b_scalar = 3.1415f;
3754
3755 a_cpu.mul_(b_scalar);
3756 a_vulkan.mul_(b_scalar);
3757
3758 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
3759 if (!check) {
3760 showRtol(a_cpu, a_vulkan.cpu());
3761 }
3762
3763 ASSERT_TRUE(check);
3764 }
3765
TEST_F(VulkanAPITest,mul_scalar_wrapped)3766 TEST_F(VulkanAPITest, mul_scalar_wrapped) {
3767 if (!at::is_vulkan_available()) {
3768 return;
3769 }
3770
3771 const auto a_cpu = at::rand({17, 213, 213, 7}, at::device(at::kCPU).dtype(at::kFloat));
3772 const auto a_vulkan = a_cpu.vulkan();
3773
3774 const auto b_scalar = at::rand({1}, at::device(at::kCPU).dtype(at::kFloat));
3775
3776 const auto c_cpu = at::mul(a_cpu, b_scalar);
3777 const auto c_vulkan = at::mul(a_vulkan, b_scalar);
3778
3779 const auto check = almostEqual(c_cpu, c_vulkan.cpu());
3780 if (!check) {
3781 showRtol(c_cpu, c_vulkan.cpu());
3782 }
3783
3784 ASSERT_TRUE(check);
3785 }
3786
TEST_F(VulkanAPITest,mul_scalar_wrapped_)3787 TEST_F(VulkanAPITest, mul_scalar_wrapped_) {
3788 if (!at::is_vulkan_available()) {
3789 return;
3790 }
3791
3792 auto a_cpu = at::rand({11, 7, 139, 109}, at::device(at::kCPU).dtype(at::kFloat));
3793 auto a_vulkan = a_cpu.vulkan();
3794
3795 const auto b_scalar = at::rand({1}, at::device(at::kCPU).dtype(at::kFloat));
3796
3797 a_cpu.mul_(b_scalar);
3798 a_vulkan.mul_(b_scalar);
3799
3800 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
3801 if (!check) {
3802 showRtol(a_cpu, a_vulkan.cpu());
3803 }
3804
3805 ASSERT_TRUE(check);
3806 }
3807
TEST_F(VulkanAPITest,mul_to_scalar_wrapped)3808 TEST_F(VulkanAPITest, mul_to_scalar_wrapped) {
3809 if (!at::is_vulkan_available()) {
3810 return;
3811 }
3812
3813 const auto a = at::rand({1}, at::device(at::kCPU).dtype(at::kFloat));
3814
3815 const auto b_cpu = at::rand({11, 7, 139, 109}, at::device(at::kCPU).dtype(at::kFloat));
3816 const auto b_vulkan = b_cpu.vulkan();
3817
3818 const auto c_cpu = at::mul(a, b_cpu);
3819 const auto c_vulkan = at::mul(a, b_vulkan);
3820
3821 const auto check = almostEqual(c_cpu, c_vulkan.cpu());
3822 if (!check) {
3823 showRtol(c_cpu, c_vulkan.cpu());
3824 }
3825
3826 ASSERT_TRUE(check);
3827 }
3828
test_pow(const at::IntArrayRef input_shape,const at::IntArrayRef other_shape)3829 void test_pow(const at::IntArrayRef input_shape, const at::IntArrayRef other_shape) {
3830 const auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
3831 const auto other_cpu = at::rand(other_shape, at::device(at::kCPU).dtype(at::kFloat));
3832
3833 const auto in_vulkan = in_cpu.vulkan();
3834 const auto other_vulkan = other_cpu.vulkan();
3835
3836 const auto out_cpu = at::pow(in_cpu, other_cpu);
3837 const auto out_vulkan = at::pow(in_vulkan, other_vulkan);
3838
3839 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
3840 if (!check) {
3841 showRtol(out_cpu, out_vulkan.cpu());
3842 std::cout << "pow test failed with input shape: "
3843 << input_shape << " and other shape: " << other_shape << std::endl;
3844 }
3845
3846 ASSERT_TRUE(check);
3847 }
3848
TEST_F(VulkanAPITest,pow)3849 TEST_F(VulkanAPITest, pow) {
3850 test_pow({4}, {4});
3851 test_pow({4, 2}, {4, 2});
3852 test_pow({11, 7, 9}, {11, 7, 9});
3853 test_pow({3, 11, 9, 7}, {3, 11, 9, 7});
3854 }
3855
TEST_F(VulkanAPITest,pow_broadcast)3856 TEST_F(VulkanAPITest, pow_broadcast) {
3857 // broadcast input
3858 test_pow({1}, {3});
3859 test_pow({1, 1}, {3, 2});
3860 test_pow({2, 1, 3}, {2, 2, 5, 3});
3861 test_pow({1, 1, 4}, {4, 8, 5, 4}); // mul4ch
3862 test_pow({3, 7, 1, 4}, {3, 7, 9, 4});
3863
3864 // broadcast other
3865 test_pow({3}, {1});
3866 test_pow({3, 2}, {1, 2});
3867 test_pow({2, 2, 5, 3}, {2, 1, 3});
3868 test_pow({3, 7, 9, 4}, {3, 7, 1, 4});
3869 test_pow({3, 8, 2, 5}, {1, 1, 2, 5}); // mul4ch
3870
3871 // broadcast both
3872 test_pow({2, 1, 2}, {1, 5, 1});
3873 test_pow({5, 1, 4}, {7, 1, 2, 1});
3874 test_pow({2, 1, 7, 1}, {1, 5, 1, 4});
3875 test_pow({1, 15, 5, 4}, {21, 1, 5, 4});
3876 test_pow({1, 1, 5, 5}, {8, 8, 1, 1}); // mul4ch
3877 }
3878
TEST_F(VulkanAPITest,pow_zero_dim)3879 TEST_F(VulkanAPITest, pow_zero_dim) {
3880 test_mul({1, 15, 5, 4}, {});
3881 }
3882
test_pow_(const at::IntArrayRef input_shape,const at::IntArrayRef other_shape)3883 void test_pow_(const at::IntArrayRef input_shape, const at::IntArrayRef other_shape) {
3884 const auto cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
3885 const auto other_cpu = at::rand(other_shape, at::device(at::kCPU).dtype(at::kFloat));
3886
3887 const auto vulkan = cpu.vulkan();
3888 const auto other_vulkan = other_cpu.vulkan();
3889
3890 cpu.pow_(other_cpu);
3891 vulkan.pow_(other_vulkan);
3892
3893 const auto check = almostEqual(cpu, vulkan.cpu());
3894 if (!check) {
3895 showRtol(cpu, vulkan.cpu());
3896 std::cout << "pow_ test failed with input shape: "
3897 << input_shape << " and other shape: " << other_shape << std::endl;
3898 }
3899
3900 ASSERT_TRUE(check);
3901 }
3902
TEST_F(VulkanAPITest,pow_)3903 TEST_F(VulkanAPITest, pow_) {
3904 test_pow_({4}, {4});
3905 test_pow_({4, 2}, {4, 2});
3906 test_pow_({11, 7, 9}, {11, 7, 9});
3907 test_pow_({3, 11, 9, 7}, {3, 11, 9, 7});
3908 }
3909
TEST_F(VulkanAPITest,pow_broadcast_other_)3910 TEST_F(VulkanAPITest, pow_broadcast_other_) {
3911 test_pow_({3}, {1});
3912 test_pow_({3, 2}, {1, 2});
3913 test_pow_({2, 2, 5, 3}, {2, 1, 3});
3914 test_pow_({3, 7, 9, 4}, {3, 7, 1, 4});
3915 }
3916
test_pow_tensor_scalar(const at::IntArrayRef input_shape,const float exp)3917 void test_pow_tensor_scalar(const at::IntArrayRef input_shape, const float exp) {
3918 const auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
3919 const auto in_vulkan = in_cpu.vulkan();
3920
3921 const auto out_cpu = at::pow(in_cpu, exp);
3922 const auto out_vulkan = at::pow(in_vulkan, exp);
3923
3924 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
3925 if (!check) {
3926 showRtol(out_cpu, out_vulkan.cpu());
3927 std::cout << "pow_tensor_scalar test failed with input shape: "
3928 << input_shape << std::endl;
3929 }
3930
3931 ASSERT_TRUE(check);
3932 }
3933
TEST_F(VulkanAPITest,pow_tensor_scalar)3934 TEST_F(VulkanAPITest, pow_tensor_scalar) {
3935 test_pow_tensor_scalar({4}, 2.5); // 1d
3936 test_pow_tensor_scalar({4, 2}, -1); // 2d
3937 test_pow_tensor_scalar({11, 7, 9}, 7.7); // 3d
3938 test_pow_tensor_scalar({3, 11, 9, 7}, -0.03); // 4d
3939 }
3940
test_pow_tensor_scalar_(const at::IntArrayRef input_shape,const float exp)3941 void test_pow_tensor_scalar_(const at::IntArrayRef input_shape, const float exp) {
3942 // Make sure inputs are not 0, cannot compare
3943 const auto cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
3944 const auto vulkan = cpu.vulkan();
3945
3946 cpu.pow_(exp);
3947 vulkan.pow_(exp);
3948
3949 const auto check = almostEqual(cpu, vulkan.cpu());
3950 if (!check) {
3951 showRtol(cpu, vulkan.cpu());
3952 std::cout << "pow_scalar_ test failed with input shape: "
3953 << input_shape << std::endl;
3954 }
3955
3956 ASSERT_TRUE(check);
3957 }
3958
TEST_F(VulkanAPITest,pow_tensor_scalar_)3959 TEST_F(VulkanAPITest, pow_tensor_scalar_) {
3960 test_pow_tensor_scalar_({4}, 2.5); // 1d
3961 test_pow_tensor_scalar_({4, 2}, -1); // 2d
3962 test_pow_tensor_scalar_({11, 7, 9}, 7.7); // 3d
3963 test_pow_tensor_scalar_({3, 11, 9, 7}, -0.03); // 4d
3964 }
3965
test_pow_scalar_tensor(const float base,const at::IntArrayRef other)3966 void test_pow_scalar_tensor(const float base, const at::IntArrayRef other) {
3967 const auto other_cpu = at::rand(other, at::device(at::kCPU).dtype(at::kFloat));
3968 const auto other_vulkan = other_cpu.vulkan();
3969
3970 const auto out_cpu = at::pow(base, other_cpu);
3971 const auto out_vulkan = at::pow(base, other_vulkan);
3972
3973 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
3974 if (!check) {
3975 showRtol(out_cpu, out_vulkan.cpu());
3976 std::cout << "pow_scalar_tensor test failed with other shape: "
3977 << other << std::endl;
3978 }
3979
3980 ASSERT_TRUE(check);
3981 }
3982
TEST_F(VulkanAPITest,pow_scalar_tensor)3983 TEST_F(VulkanAPITest, pow_scalar_tensor) {
3984 test_pow_scalar_tensor(2.5, {4}); // 1d
3985 test_pow_scalar_tensor(2, {4, 2}); // 2d
3986 test_pow_scalar_tensor(7.7, {11, 7, 9}); // 3d
3987 test_pow_scalar_tensor(3, {3, 11, 9, 7}); // 4d
3988 }
3989
test_floor_divide_scalar(const at::IntArrayRef input_shape,float input_scale,float other)3990 void test_floor_divide_scalar(const at::IntArrayRef input_shape, float input_scale, float other) {
3991 c10::InferenceMode mode;
3992
3993 auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
3994 in_cpu = at::mul(in_cpu, input_scale);
3995
3996 auto in_vulkan = in_cpu.vulkan();
3997 auto out_vk = at::floor_divide(in_vulkan, other);
3998 auto out_cpu = at::floor_divide(in_cpu, other);
3999
4000 // max tolerance is 1.0 due to floor.
4001 // may consider adding extra check on number of violation. it should be rare.
4002 const auto check = checkRtol(out_cpu - out_vk.cpu(), 1.0f);
4003 if (!check) {
4004 std::cout << "floor_divide test failed with "
4005 << "scale: " << input_scale
4006 << " other: " << other
4007 << std::endl;
4008 }
4009
4010 ASSERT_TRUE(check);
4011 }
4012
TEST_F(VulkanAPITest,floor_divide_scalar)4013 TEST_F(VulkanAPITest, floor_divide_scalar) {
4014 test_floor_divide_scalar({3, 3, 12, 12}, 100.0, 10.0);
4015 test_floor_divide_scalar({12, 12}, 10.0, 3.4);
4016 test_floor_divide_scalar({4, 5, 12, 12}, 100.0, 10.0);
4017 test_floor_divide_scalar({3, 3, 12, 12}, 0.3, 0.08);
4018 }
4019
TEST_F(VulkanAPITest,floor_divide_scalar_error)4020 TEST_F(VulkanAPITest, floor_divide_scalar_error) {
4021 c10::InferenceMode mode;
4022
4023 auto in_cpu = at::rand({2, 3, 4}, at::device(at::kCPU).dtype(at::kFloat));
4024 auto in_vulkan = in_cpu.vulkan();
4025 EXPECT_THROW(at::floor_divide(in_vulkan, 0.0f), ::std::exception);
4026 }
4027
test_floor_divide_scalar_inplace(const at::IntArrayRef input_shape,float input_scale,float other)4028 void test_floor_divide_scalar_inplace(const at::IntArrayRef input_shape, float input_scale, float other) {
4029 c10::InferenceMode mode;
4030
4031 auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
4032 in_cpu = at::mul(in_cpu, input_scale);
4033 auto in_vk = in_cpu.vulkan();
4034
4035 in_cpu.floor_divide_(other);
4036 in_vk.floor_divide_(other);
4037
4038 // max tolerance is 1.0 due to floor.
4039 // may consider adding extra check on number of violation. it should be rare.
4040 const auto check = checkRtol(in_cpu - in_vk.cpu(), 1.0f);
4041 if (!check) {
4042 std::cout << "floor_divide test failed with "
4043 << "scale: " << input_scale
4044 << " other: " << other
4045 << std::endl;
4046 }
4047
4048 ASSERT_TRUE(check);
4049 }
4050
TEST_F(VulkanAPITest,floor_divide_scalar_inplace_error)4051 TEST_F(VulkanAPITest, floor_divide_scalar_inplace_error) {
4052 c10::InferenceMode mode;
4053
4054 auto in_cpu = at::rand({2, 3, 4}, at::device(at::kCPU).dtype(at::kFloat));
4055 auto in_vulkan = in_cpu.vulkan();
4056 EXPECT_THROW(in_vulkan.floor_divide(0.0f), ::std::exception);
4057 }
4058
TEST_F(VulkanAPITest,floor_divide_scalar_inplace)4059 TEST_F(VulkanAPITest, floor_divide_scalar_inplace) {
4060 test_floor_divide_scalar_inplace({3, 3, 12, 12}, 100.0, 10.0);
4061 test_floor_divide_scalar_inplace({12, 12}, 10.0, 3.4);
4062 test_floor_divide_scalar_inplace({4, 5, 12, 12}, 100.0, 10.0);
4063 test_floor_divide_scalar_inplace({3, 3, 12, 12}, 0.3, 0.08);
4064 }
4065
TEST_F(VulkanAPITest,floor_divide_zero_dim_tensor)4066 TEST_F(VulkanAPITest, floor_divide_zero_dim_tensor) {
4067 c10::InferenceMode mode;
4068
4069 std::vector<int64_t> input_shape{5, 3, 4, 5};
4070 float input_scale = 100.0;
4071
4072 auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
4073 in_cpu = at::mul(in_cpu, input_scale);
4074 auto in_vk = in_cpu.vulkan();
4075
4076 auto other_cpu = at::zeros({}, at::device(at::kCPU).dtype(at::kFloat)) + 10.0f;
4077 auto other_vk = other_cpu.vulkan();
4078
4079 auto out_cpu = at::floor_divide(in_cpu, other_cpu);
4080 auto out_vk = at::floor_divide(in_vk, other_vk);
4081
4082 // max tolerance is 1.0 due to floor.
4083 // may consider adding extra check on number of violation. it should be rare.
4084 const auto check = checkRtol(out_cpu - out_vk.cpu(), 1.0f);
4085 if (!check) {
4086 std::cout << "floor_divide test failed with "
4087 << "scale: " << input_scale
4088 << std::endl;
4089 }
4090
4091 ASSERT_TRUE(check);
4092 }
4093
TEST_F(VulkanAPITest,floor_divide_tensor)4094 TEST_F(VulkanAPITest, floor_divide_tensor) {
4095 c10::InferenceMode mode;
4096
4097 std::vector<int64_t> input_shape{6, 3, 5, 5};
4098 float input_scale = 10.0;
4099
4100 auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
4101 in_cpu = at::mul(in_cpu, input_scale);
4102 // "other" is at least 0.5 to avoid rounding error causes by very small
4103 // values.
4104 auto other_cpu =
4105 at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat)) + 0.5;
4106
4107 auto in_vk = in_cpu.vulkan();
4108 auto other_vk = other_cpu.vulkan();
4109
4110 auto out_cpu = at::floor_divide(in_cpu, other_cpu);
4111 auto out_vk = at::floor_divide(in_vk, other_vk);
4112
4113 // max tolerance is 1.0 due to floor.
4114 // may consider adding extra check on number of violation. it should be rare.
4115 const auto check = checkRtol(out_cpu - out_vk.cpu(), 1.0f);
4116 if (!check) {
4117 std::cout << "floor_divide test failed with "
4118 << "scale: " << input_scale << std::endl;
4119 }
4120
4121 ASSERT_TRUE(check);
4122 }
4123
TEST_F(VulkanAPITest,floor_divide_tensor_inplace)4124 TEST_F(VulkanAPITest, floor_divide_tensor_inplace) {
4125 c10::InferenceMode mode;
4126
4127 std::vector<int64_t> input_shape{5, 3, 5, 5};
4128 float input_scale = 10.0;
4129
4130 auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
4131 in_cpu = at::mul(in_cpu, input_scale);
4132 // "other" is at least 0.5 to avoid rounding error causes by very small
4133 // values.
4134 auto other_cpu =
4135 at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat)) + 0.5;
4136
4137 auto in_vk = in_cpu.vulkan();
4138 auto other_vk = other_cpu.vulkan();
4139
4140 in_cpu.floor_divide_(other_cpu);
4141 in_vk.floor_divide_(other_vk);
4142
4143 // max tolerance is 1.0 due to floor.
4144 // may consider adding extra check on number of violation. it should be rare.
4145 const auto check = checkRtol(in_cpu - in_vk.cpu(), 1.0f);
4146 if (!check) {
4147 std::cout << "floor_divide test failed with "
4148 << "scale: " << input_scale << std::endl;
4149 }
4150
4151 ASSERT_TRUE(check);
4152 }
4153
TEST_F(VulkanAPITest,relu)4154 TEST_F(VulkanAPITest, relu) {
4155 const auto in_cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat));
4156 const auto in_vulkan = in_cpu.vulkan();
4157
4158 const auto out_cpu = at::relu(in_cpu);
4159 const auto out_vulkan = at::relu(in_vulkan);
4160
4161 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
4162
4163 if (!check) {
4164 showRtol(out_cpu, out_vulkan.cpu());
4165 }
4166
4167 ASSERT_TRUE(check);
4168 }
4169
TEST_F(VulkanAPITest,relu_)4170 TEST_F(VulkanAPITest, relu_) {
4171 auto a_cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat));
4172 auto a_vulkan = a_cpu.vulkan();
4173
4174 at::relu_(a_cpu);
4175 at::relu_(a_vulkan);
4176
4177 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
4178
4179 if (!check) {
4180 showRtol(a_cpu, a_vulkan.cpu());
4181 }
4182
4183 ASSERT_TRUE(check);
4184 }
4185
TEST_F(VulkanAPITest,reflection_pad2d)4186 TEST_F(VulkanAPITest, reflection_pad2d) {
4187 const auto a_cpu = at::rand({2, 3, 47, 63}, at::device(at::kCPU).dtype(at::kFloat));
4188 const auto a_vulkan = a_cpu.vulkan();
4189
4190 const auto out_cpu = at::reflection_pad2d(a_cpu, {9,8,5,12});
4191 const auto out_vulkan = at::reflection_pad2d(a_vulkan, {9,8,5,12}).cpu();
4192
4193 const auto check = almostEqual(out_cpu, out_vulkan);
4194 if (!check) {
4195 showRtol(out_cpu, out_vulkan);
4196 }
4197
4198 ASSERT_TRUE(check);
4199 }
4200
TEST_F(VulkanAPITest,repeat_invalid_inputs_outputs_exceptions)4201 TEST_F(VulkanAPITest, repeat_invalid_inputs_outputs_exceptions) {
4202 // Arrange: Vulkan repeat only supports input of dims <= 4
4203 {
4204 const auto in_cpu =
4205 at::rand({3, 9, 11, 7, 3}, at::device(at::kCPU).dtype(at::kFloat));
4206 const at::IntArrayRef repeats = {5, 7, 3, 9, 2};
4207
4208 // Act
4209 EXPECT_THROW(
4210 { const auto out_vulkan = in_cpu.vulkan().repeat(repeats); },
4211 ::std::exception);
4212 }
4213
4214 // Arrange: Number of dimensions of repeat dims can not be smaller than
4215 // number of dimensions of tensor
4216 {
4217 const auto in_cpu =
4218 at::rand({3, 5, 11, 13}, at::device(at::kCPU).dtype(at::kFloat));
4219 const at::IntArrayRef repeats = {5, 7};
4220
4221 // Act
4222 EXPECT_THROW(
4223 { const auto out_vulkan = in_cpu.vulkan().repeat(repeats); },
4224 ::std::exception);
4225 }
4226
4227 // Arrange: Vulkan repeat only supports output of dims <= 4
4228 {
4229 const auto in_cpu =
4230 at::rand({3, 9, 11, 7}, at::device(at::kCPU).dtype(at::kFloat));
4231 const at::IntArrayRef repeats = {5, 7, 3, 9, 2};
4232
4233 // Act
4234 EXPECT_THROW(
4235 { const auto out_vulkan = in_cpu.vulkan().repeat(repeats); },
4236 ::std::exception);
4237 }
4238 }
4239
test_repeat(const at::IntArrayRef input_shape,const at::IntArrayRef repeats)4240 void test_repeat(
4241 const at::IntArrayRef input_shape,
4242 const at::IntArrayRef repeats) {
4243 c10::InferenceMode mode;
4244
4245 at::Tensor in_cpu;
4246 at::Tensor out_cpu;
4247 at::Tensor in_vulkan;
4248 at::Tensor out_vulkan;
4249 at::IntArrayRef repeat;
4250 bool check = true;
4251 for (int idx_input = 1; (unsigned)idx_input < input_shape.size() + 1; ++idx_input) {
4252 for (int idx_repeat = idx_input; (unsigned)idx_repeat < repeats.size() + 1;
4253 ++idx_repeat) {
4254 in_cpu = at::rand(
4255 input_shape.slice(0, idx_input),
4256 at::device(at::kCPU).dtype(at::kFloat));
4257 repeat = repeats.slice(0, idx_repeat);
4258 out_cpu = in_cpu.repeat(repeats);
4259 in_vulkan = in_cpu.vulkan();
4260 out_vulkan = in_vulkan.repeat(repeats);
4261 bool local_check = almostEqual(out_cpu, out_vulkan.cpu());
4262 if (!local_check) {
4263 check = false;
4264 std::cout << "Repeat test failed when input is of shape "
4265 << input_shape.slice(0, idx_input) << " and repeat of "
4266 << repeat << std::endl;
4267 showRtol(out_cpu, out_vulkan.cpu());
4268 }
4269 }
4270 }
4271
4272 ASSERT_TRUE(check);
4273 }
4274
TEST_F(VulkanAPITest,repeat)4275 TEST_F(VulkanAPITest, repeat) {
4276 test_repeat({13, 5, 13, 7}, {7, 2, 3, 5});
4277 }
4278
TEST_F(VulkanAPITest,replication_pad2d)4279 TEST_F(VulkanAPITest, replication_pad2d) {
4280 const auto a_cpu = at::rand({2, 3, 47, 63}, at::device(at::kCPU).dtype(at::kFloat));
4281 const auto a_vulkan = a_cpu.vulkan();
4282
4283 constexpr std::array<int64_t, 4u> padding_params{9, 8, 5, 12};
4284
4285 const auto out_cpu = at::replication_pad2d(a_cpu, padding_params);
4286 const auto out_vulkan = at::replication_pad2d(a_vulkan, padding_params).cpu();
4287
4288 const auto check = almostEqual(out_cpu, out_vulkan);
4289 if (!check) {
4290 showRtol(out_cpu, out_vulkan);
4291 }
4292
4293 ASSERT_TRUE(check);
4294 }
4295
TEST_F(VulkanAPITest,reshape)4296 TEST_F(VulkanAPITest, reshape) {
4297 c10::InferenceMode mode;
4298
4299 const auto in_cpu = at::rand({7, 11, 8, 9}, at::device(at::kCPU).dtype(at::kFloat));
4300 const auto in_vulkan = in_cpu.vulkan();
4301
4302 const std::array<int64_t, 2> shape{7 * 8, 11 * 9};
4303
4304 const auto out_cpu = at::reshape(in_cpu, shape);
4305 const auto out_vulkan = at::reshape(in_vulkan, shape);
4306
4307 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
4308 if (!check) {
4309 showRtol(out_cpu, out_vulkan.cpu());
4310 }
4311
4312 ASSERT_TRUE(check);
4313 }
4314
TEST_F(VulkanAPITest,reshape_)4315 TEST_F(VulkanAPITest, reshape_) {
4316 c10::InferenceMode mode;
4317
4318 const auto cpu = at::rand({9, 4, 12, 6}, at::device(at::kCPU).dtype(at::kFloat));
4319 const auto vulkan = cpu.vulkan();
4320
4321 const std::array<int64_t, 3> shape{9, 4 * 6, 12};
4322
4323 cpu.reshape(shape);
4324 vulkan.reshape(shape);
4325
4326 const auto check = almostEqual(cpu, vulkan.cpu());
4327 if (!check) {
4328 showRtol(cpu, vulkan.cpu());
4329 }
4330
4331 ASSERT_TRUE(check);
4332 }
4333
test_select(const at::IntArrayRef input_shape,int64_t dim,int64_t index)4334 void test_select(const at::IntArrayRef input_shape, int64_t dim, int64_t index) {
4335 const auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
4336 const auto out_cpu = at::select(in_cpu, dim, index);
4337
4338 const auto in_vulkan = in_cpu.vulkan();
4339 const auto out_vulkan = at::select(in_vulkan, dim, index);
4340
4341 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
4342 if (!check) {
4343 showRtol(out_cpu, out_vulkan.cpu());
4344 }
4345
4346 ASSERT_TRUE(check);
4347 }
4348
TEST_F(VulkanAPITest,select_3d_depth_small)4349 TEST_F(VulkanAPITest, select_3d_depth_small) {
4350 test_select({1, 1, 1}, 0, 0);
4351 }
4352
TEST_F(VulkanAPITest,select_3d_depth_medium)4353 TEST_F(VulkanAPITest, select_3d_depth_medium) {
4354 test_select({3, 2, 5}, 0, 2);
4355 }
4356
TEST_F(VulkanAPITest,select_3d_depth_large)4357 TEST_F(VulkanAPITest, select_3d_depth_large) {
4358 test_select({100, 1, 144}, 0, 50);
4359 }
4360
TEST_F(VulkanAPITest,select_3d_height_small)4361 TEST_F(VulkanAPITest, select_3d_height_small) {
4362 test_select({1, 1, 1}, 1, 0);
4363 }
4364
TEST_F(VulkanAPITest,select_3d_height_medium)4365 TEST_F(VulkanAPITest, select_3d_height_medium) {
4366 test_select({3, 5, 2}, 1, 2);
4367 }
4368
TEST_F(VulkanAPITest,select_3d_height_medium1)4369 TEST_F(VulkanAPITest, select_3d_height_medium1) {
4370 test_select({16, 16, 5}, 1, 6);
4371 }
4372
TEST_F(VulkanAPITest,select_3d_height_medium2)4373 TEST_F(VulkanAPITest, select_3d_height_medium2) {
4374 test_select({17, 17, 5}, 1, 6);
4375 }
4376
TEST_F(VulkanAPITest,select_3d_height_large)4377 TEST_F(VulkanAPITest, select_3d_height_large) {
4378 test_select({100, 144, 5}, 1, 50);
4379 }
4380
TEST_F(VulkanAPITest,select_3d_width_small)4381 TEST_F(VulkanAPITest, select_3d_width_small) {
4382 test_select({1, 1, 1}, 2, 0);
4383 }
4384
TEST_F(VulkanAPITest,select_3d_width_medium)4385 TEST_F(VulkanAPITest, select_3d_width_medium) {
4386 test_select({3, 5, 3}, 2, 2);
4387 }
4388
TEST_F(VulkanAPITest,select_3d_width_medium2)4389 TEST_F(VulkanAPITest, select_3d_width_medium2) {
4390 test_select({17, 17, 8}, 2, 6);
4391 }
4392
TEST_F(VulkanAPITest,select_3d_width_large)4393 TEST_F(VulkanAPITest, select_3d_width_large) {
4394 test_select({100, 3, 144}, 2, 50);
4395 }
4396
TEST_F(VulkanAPITest,select_4d_batch_small)4397 TEST_F(VulkanAPITest, select_4d_batch_small) {
4398 test_select({1, 1, 1, 1}, 0, 0);
4399 }
4400
TEST_F(VulkanAPITest,select_4d_batch_medium)4401 TEST_F(VulkanAPITest, select_4d_batch_medium) {
4402 test_select({3, 2, 5, 4}, 0, 1);
4403 }
4404
TEST_F(VulkanAPITest,select_4d_batch_large)4405 TEST_F(VulkanAPITest, select_4d_batch_large) {
4406 test_select({30, 8, 12, 17}, 0, 27);
4407 }
4408
TEST_F(VulkanAPITest,select_4d_depth_small)4409 TEST_F(VulkanAPITest, select_4d_depth_small) {
4410 test_select({1, 1, 1, 1}, 1, 0);
4411 }
4412
TEST_F(VulkanAPITest,select_4d_depth_medium)4413 TEST_F(VulkanAPITest, select_4d_depth_medium) {
4414 test_select({7, 5, 2, 4}, 1, 4);
4415 }
4416
TEST_F(VulkanAPITest,select_4d_depth_large)4417 TEST_F(VulkanAPITest, select_4d_depth_large) {
4418 test_select({5, 30, 12, 30}, 1, 23);
4419 }
4420
TEST_F(VulkanAPITest,select_4d_height_small)4421 TEST_F(VulkanAPITest, select_4d_height_small) {
4422 test_select({1, 1, 1, 1}, 2, 0);
4423 }
4424
TEST_F(VulkanAPITest,select_4d_height_medium)4425 TEST_F(VulkanAPITest, select_4d_height_medium) {
4426 test_select({3, 5, 4, 2}, 2, 3);
4427 }
4428
TEST_F(VulkanAPITest,select_4d_height_large)4429 TEST_F(VulkanAPITest, select_4d_height_large) {
4430 test_select({5, 8, 50, 50}, 2, 41);
4431 }
4432
TEST_F(VulkanAPITest,select_4d_width_small)4433 TEST_F(VulkanAPITest, select_4d_width_small) {
4434 test_select({1, 1, 1, 1}, 3, 0);
4435 }
4436
TEST_F(VulkanAPITest,select_4d_width_medium)4437 TEST_F(VulkanAPITest, select_4d_width_medium) {
4438 test_select({3, 5, 4, 2}, 3, 1);
4439 }
4440
TEST_F(VulkanAPITest,select_4d_width_large)4441 TEST_F(VulkanAPITest, select_4d_width_large) {
4442 test_select({5, 8, 50, 50}, 3, 33);
4443 }
4444
TEST_F(VulkanAPITest,sigmoid)4445 TEST_F(VulkanAPITest, sigmoid) {
4446 const auto in_cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat));
4447 const auto in_vulkan = in_cpu.vulkan();
4448
4449 const auto out_cpu = at::sigmoid(in_cpu);
4450 const auto out_vulkan = at::sigmoid(in_vulkan);
4451
4452 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
4453 if (!check) {
4454 showRtol(out_cpu, out_vulkan.cpu());
4455 }
4456
4457 ASSERT_TRUE(check);
4458 }
4459
TEST_F(VulkanAPITest,sigmoid_)4460 TEST_F(VulkanAPITest, sigmoid_) {
4461 auto cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat));
4462 auto vulkan = cpu.vulkan();
4463
4464 at::sigmoid_(cpu);
4465 at::sigmoid_(vulkan);
4466
4467 const auto check = almostEqual(cpu, vulkan.cpu());
4468 if (!check) {
4469 showRtol(cpu, vulkan.cpu());
4470 }
4471
4472 ASSERT_TRUE(check);
4473 }
4474
TEST_F(VulkanAPITest,DISABLED_log_softmax_underflow_exception)4475 TEST_F(VulkanAPITest, DISABLED_log_softmax_underflow_exception) {
4476 // We apply softmax and log in a sequence to the tesnor [20, 0].
4477 // The output of softmax on CPU is [1.0000e+00, 2.0612e-09]; while
4478 // the output on Vulkan is [1, 0] since 2.0612e-09 is smaller than
4479 // the smallest represetable positive 5.96e−8. We expect to see nan
4480 // or -inf when applying log.
4481 float data[] = {20, 0};
4482 const auto in_cpu = at::from_blob(data, {2}, at::kFloat);
4483 const auto in_vulkan = in_cpu.vulkan();
4484
4485 const auto softmax_out_cpu = at::softmax(in_cpu, 0);
4486 const auto softmax_out_vulkan = at::softmax(in_vulkan, 0);
4487
4488 const auto log_out_cpu = at::log(softmax_out_cpu);
4489 const auto log_out_vulkan = at::log(softmax_out_vulkan);
4490
4491 auto has_nan = log_out_vulkan.cpu().isnan().any().item().to<bool>();
4492 auto has_inf = log_out_vulkan.cpu().isinf().any().item().to<bool>();
4493
4494 // We expect the output of log containing nan or inf.
4495 const auto check = has_nan || has_inf;
4496 if (!check) {
4497 std::cout << "expect log_out_vulkan contains nan or inf, but got" << std::endl;
4498 std::cout << log_out_vulkan.cpu() << std::endl;
4499 }
4500 ASSERT_TRUE(check);
4501 }
4502
TEST_F(VulkanAPITest,log_softmax_underflow)4503 TEST_F(VulkanAPITest, log_softmax_underflow) {
4504 // The minimum strictly positive (subnormal) value of float16 on Vulkan is 2−24 ≈ 5.96 × 10^−8.
4505 // https://en.wikipedia.org/wiki/Half-precision_floating-point_format#Exponent_encoding
4506 // then smallest_representable_log = log(5.96 × 10^−8) = -16.64.
4507 // The implementation of `log_softmax` adds 6e-8 to the output of softmax before applying `log`
4508 // to deal with underflow, so there won't be nan or -inf as shown in the
4509 // `log_softmax_underflow_exception` test above
4510 float smallest_representable_log = -16.64f;
4511 float data[] = {20, 0};
4512 const auto in_cpu = at::from_blob(data, {2}, at::kFloat);
4513 const auto in_vulkan = in_cpu.vulkan();
4514
4515 const auto log_softmax_cpu = at::log_softmax(in_cpu, 0);
4516 const auto log_softmax_vulkan = at::log_softmax(in_vulkan, 0);
4517
4518 const auto check = checkRtol(log_softmax_cpu - log_softmax_vulkan.cpu(), -smallest_representable_log);
4519 if (!check) {
4520 showRtol(log_softmax_cpu, log_softmax_vulkan.cpu());
4521 }
4522 ASSERT_TRUE(check);
4523 }
4524
test_softmax(const at::IntArrayRef shape,bool log_softmax=false)4525 void test_softmax(const at::IntArrayRef shape, bool log_softmax = false) {
4526 at::Tensor in_cpu =
4527 at::rand(shape, at::TensorOptions(at::kCPU).dtype(at::kFloat));
4528 const at::Tensor in_vulkan = in_cpu.vulkan();
4529
4530 // Cast to signed to test negative index for dim
4531 int64_t size = static_cast<int64_t>(shape.size());
4532
4533 // Test on all dim
4534 for (auto dim = -size; dim < size; dim++) {
4535 const at::Tensor out_cpu =
4536 log_softmax ? at::log_softmax(in_cpu, dim) : at::softmax(in_cpu, dim);
4537
4538 const at::Tensor out_vulkan = log_softmax ? at::log_softmax(in_vulkan, dim)
4539 : at::softmax(in_vulkan, dim);
4540 const bool check = almostEqual(out_cpu, out_vulkan.cpu());
4541
4542 if (!check) {
4543 std::cout << "Softmax test failed on axis " << dim << "for tensor dims {";
4544 for (uint32_t place = 0; place < shape.size() - 1; place++) {
4545 std::cout << shape[place] << " ";
4546 }
4547 std::cout << shape.back() << "}" << std::endl;
4548 showRtol(out_cpu, out_vulkan.cpu());
4549 }
4550 ASSERT_TRUE(check);
4551 }
4552 }
4553
TEST_F(VulkanAPITest,softmax)4554 TEST_F(VulkanAPITest, softmax) {
4555 c10::InferenceMode mode;
4556 std::vector<std::vector<int64_t>> test_in_dims = {
4557 {1, 3, 4, 2},
4558 {4, 8, 5, 7},
4559 {9, 11, 12, 12},
4560 };
4561 bool log_softmax = false;
4562 for (const std::vector<int64_t>& dim_vec : test_in_dims) {
4563 for (uint32_t trunc = 0; trunc < dim_vec.size(); trunc++) {
4564 const std::vector<int64_t> trunc_dim_vec =
4565 std::vector<int64_t>(dim_vec.begin(), dim_vec.end() - trunc);
4566 test_softmax(trunc_dim_vec, log_softmax);
4567 }
4568 }
4569 }
4570
TEST_F(VulkanAPITest,DISABLED_log_softmax)4571 TEST_F(VulkanAPITest, DISABLED_log_softmax) {
4572 c10::InferenceMode mode;
4573 std::vector<std::vector<int64_t>> test_in_dims = {
4574 {1, 3, 4, 2},
4575 {4, 8, 5, 7},
4576 {9, 11, 12, 12},
4577 };
4578 bool log_softmax = true;
4579 for (const std::vector<int64_t>& dim_vec : test_in_dims) {
4580 for (uint32_t trunc = 0; trunc < dim_vec.size(); trunc++) {
4581 const std::vector<int64_t> trunc_dim_vec =
4582 std::vector<int64_t>(dim_vec.begin(), dim_vec.end() - trunc);
4583 test_softmax(trunc_dim_vec, log_softmax);
4584 }
4585 }
4586 }
4587
TEST_F(VulkanAPITest,abs)4588 TEST_F(VulkanAPITest, abs) {
4589 const auto in_cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat)) * 30;
4590 const auto in_vulkan = in_cpu.vulkan();
4591
4592 const auto out_cpu = at::abs(in_cpu);
4593 const auto out_vulkan = at::abs(in_vulkan);
4594
4595 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
4596 if (!check) {
4597 showRtol(out_cpu, out_vulkan.cpu());
4598 }
4599
4600 ASSERT_TRUE(check);
4601 }
4602
TEST_F(VulkanAPITest,abs_)4603 TEST_F(VulkanAPITest, abs_) {
4604 auto cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat)) * 30;
4605 auto vulkan = cpu.vulkan();
4606
4607 at::abs_(cpu);
4608 at::abs_(vulkan);
4609
4610 const auto check = almostEqual(cpu, vulkan.cpu());
4611 if (!check) {
4612 showRtol(cpu, vulkan.cpu());
4613 }
4614
4615 ASSERT_TRUE(check);
4616 }
4617
TEST_F(VulkanAPITest,tanh)4618 TEST_F(VulkanAPITest, tanh) {
4619 const auto in_cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat)) * 30;
4620 const auto in_vulkan = in_cpu.vulkan();
4621
4622 const auto out_cpu = at::tanh(in_cpu);
4623 const auto out_vulkan = at::tanh(in_vulkan);
4624
4625 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
4626 if (!check) {
4627 showRtol(out_cpu, out_vulkan.cpu());
4628 }
4629
4630 ASSERT_TRUE(check);
4631 }
4632
TEST_F(VulkanAPITest,tanh_)4633 TEST_F(VulkanAPITest, tanh_) {
4634 auto cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat)) * 30;
4635 auto vulkan = cpu.vulkan();
4636
4637 at::tanh_(cpu);
4638 at::tanh_(vulkan);
4639
4640 const auto check = almostEqual(cpu, vulkan.cpu());
4641 if (!check) {
4642 showRtol(cpu, vulkan.cpu());
4643 }
4644
4645 ASSERT_TRUE(check);
4646 }
4647
test_sub(const at::IntArrayRef input_shape,const at::IntArrayRef other_shape,float alpha)4648 void test_sub(const at::IntArrayRef input_shape, const at::IntArrayRef other_shape, float alpha) {
4649 const auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
4650 const auto other_cpu = at::rand(other_shape, at::device(at::kCPU).dtype(at::kFloat));
4651
4652 const auto in_vulkan = in_cpu.vulkan();
4653 const auto other_vulkan = other_cpu.vulkan();
4654
4655 const auto out_cpu = at::sub(in_cpu, other_cpu, alpha);
4656 const auto out_vulkan = at::sub(in_vulkan, other_vulkan, alpha);
4657
4658 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
4659 if (!check) {
4660 showRtol(out_cpu, out_vulkan.cpu());
4661 }
4662
4663 ASSERT_TRUE(check);
4664 }
4665
TEST_F(VulkanAPITest,sub)4666 TEST_F(VulkanAPITest, sub) {
4667 test_sub({11, 7, 139, 109}, {11, 7, 139, 109}, 2.1f);
4668 }
4669
TEST_F(VulkanAPITest,sub_broadcast0)4670 TEST_F(VulkanAPITest, sub_broadcast0) {
4671 test_sub({3, 5, 179, 221}, {3, 5, 1, 1}, 1.8f);
4672 }
4673
TEST_F(VulkanAPITest,sub_broadcast1)4674 TEST_F(VulkanAPITest, sub_broadcast1) {
4675 test_sub({3, 5, 179, 221}, {3, 5, 1, 221}, 1.8f);
4676 }
4677
TEST_F(VulkanAPITest,sub_broadcast2)4678 TEST_F(VulkanAPITest, sub_broadcast2) {
4679 test_sub({3, 4, 179, 221}, {4, 1, 1}, 2.5f);
4680 }
4681
TEST_F(VulkanAPITest,sub_broadcast3)4682 TEST_F(VulkanAPITest, sub_broadcast3) {
4683 test_sub({3, 4, 179, 221}, {1, 1, 179, 221}, 2.5f);
4684 }
4685
TEST_F(VulkanAPITest,sub_broadcast4)4686 TEST_F(VulkanAPITest, sub_broadcast4) {
4687 test_sub({3, 4, 179, 1}, {1, 179, 221}, 2.5f);
4688 }
4689
TEST_F(VulkanAPITest,sub_broadcast5)4690 TEST_F(VulkanAPITest, sub_broadcast5) {
4691 test_sub({2, 1, 7, 1}, {1, 5, 1, 4}, 1.2f);
4692 }
4693
TEST_F(VulkanAPITest,sub_broadcast6)4694 TEST_F(VulkanAPITest, sub_broadcast6) {
4695 test_sub({1, 15, 5, 4}, {21, 1, 5, 4}, 1.8f);
4696 }
4697
TEST_F(VulkanAPITest,sub_zero_dim)4698 TEST_F(VulkanAPITest, sub_zero_dim) {
4699 test_sub({1, 15, 5, 4}, {}, 1.8f);
4700 }
4701
TEST_F(VulkanAPITest,sub_)4702 TEST_F(VulkanAPITest, sub_) {
4703 auto a_cpu = at::rand({61, 17, 29, 83}, at::device(at::kCPU).dtype(at::kFloat));
4704 auto a_vulkan = a_cpu.vulkan();
4705
4706 const auto b_cpu = at::rand({61, 17, 29, 83}, at::device(at::kCPU).dtype(at::kFloat));
4707 const auto b_vulkan = b_cpu.vulkan();
4708
4709 a_cpu.sub_(b_cpu, 2.1f);
4710 a_vulkan.sub_(b_vulkan, 2.1f);
4711
4712 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
4713 if (!check) {
4714 showRtol(b_cpu, b_vulkan.cpu());
4715 }
4716
4717 ASSERT_TRUE(check);
4718 }
4719
TEST_F(VulkanAPITest,sub_broadcast0_)4720 TEST_F(VulkanAPITest, sub_broadcast0_) {
4721 auto a_cpu = at::rand({16, 17, 29, 83}, at::device(at::kCPU).dtype(at::kFloat));
4722 auto a_vulkan = a_cpu.vulkan();
4723
4724 const auto b_cpu = at::rand({16, 17, 29, 1}, at::device(at::kCPU).dtype(at::kFloat));
4725 const auto b_vulkan = b_cpu.vulkan();
4726
4727 a_cpu.sub_(b_cpu, 2.1f);
4728 a_vulkan.sub_(b_vulkan, 2.1f);
4729
4730 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
4731 if (!check) {
4732 showRtol(b_cpu, b_vulkan.cpu());
4733 }
4734
4735 ASSERT_TRUE(check);
4736 }
4737
TEST_F(VulkanAPITest,sub_broadcast1_)4738 TEST_F(VulkanAPITest, sub_broadcast1_) {
4739 auto a_cpu = at::rand({3, 8, 29, 83}, at::device(at::kCPU).dtype(at::kFloat));
4740 auto a_vulkan = a_cpu.vulkan();
4741
4742 const auto b_cpu = at::rand({3, 8, 1, 1}, at::device(at::kCPU).dtype(at::kFloat));
4743 const auto b_vulkan = b_cpu.vulkan();
4744
4745 a_cpu.sub_(b_cpu, 2.1f);
4746 a_vulkan.sub_(b_vulkan, 2.1f);
4747
4748 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
4749 if (!check) {
4750 showRtol(b_cpu, b_vulkan.cpu());
4751 }
4752
4753 ASSERT_TRUE(check);
4754 }
4755
TEST_F(VulkanAPITest,sub_scalar)4756 TEST_F(VulkanAPITest, sub_scalar) {
4757 if (!at::is_vulkan_available()) {
4758 return;
4759 }
4760
4761 const auto a_cpu = at::rand({13, 23, 59, 73}, at::device(at::kCPU).dtype(at::kFloat));
4762 const auto a_vulkan = a_cpu.vulkan();
4763
4764 const float b_scalar = 3.1415f;
4765
4766 const auto c_cpu = at::sub(a_cpu, b_scalar, 2.1f);
4767 const auto c_vulkan = at::sub(a_vulkan, b_scalar, 2.1f);
4768
4769 const auto check = almostEqual(c_cpu, c_vulkan.cpu());
4770 if (!check) {
4771 showRtol(c_cpu, c_vulkan.cpu());
4772 }
4773
4774 ASSERT_TRUE(check);
4775 }
4776
TEST_F(VulkanAPITest,sub_scalar_)4777 TEST_F(VulkanAPITest, sub_scalar_) {
4778 if (!at::is_vulkan_available()) {
4779 return;
4780 }
4781
4782 auto a_cpu = at::rand({47, 2, 23, 97}, at::device(at::kCPU).dtype(at::kFloat));
4783 auto a_vulkan = a_cpu.vulkan();
4784
4785 const float b_scalar = 3.1415f;
4786
4787 a_cpu.sub_(b_scalar, 2.1f);
4788 a_vulkan.sub_(b_scalar, 2.1f);
4789
4790 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
4791 if (!check) {
4792 showRtol(a_cpu, a_vulkan.cpu());
4793 }
4794
4795 ASSERT_TRUE(check);
4796 }
4797
TEST_F(VulkanAPITest,sub_scalar_wrapped)4798 TEST_F(VulkanAPITest, sub_scalar_wrapped) {
4799 if (!at::is_vulkan_available()) {
4800 return;
4801 }
4802
4803 const auto a_cpu = at::rand({13, 23, 59, 73}, at::device(at::kCPU).dtype(at::kFloat));
4804 const auto a_vulkan = a_cpu.vulkan();
4805
4806 const auto b_scalar = at::rand({1}, at::device(at::kCPU).dtype(at::kFloat));
4807
4808 const auto c_cpu = at::sub(a_cpu, b_scalar, 2.1f);
4809 const auto c_vulkan = at::sub(a_vulkan, b_scalar, 2.1f);
4810
4811 const auto check = almostEqual(c_cpu, c_vulkan.cpu());
4812 if (!check) {
4813 showRtol(c_cpu, c_vulkan.cpu());
4814 }
4815
4816 ASSERT_TRUE(check);
4817 }
4818
TEST_F(VulkanAPITest,sub_scalar_wrapped_)4819 TEST_F(VulkanAPITest, sub_scalar_wrapped_) {
4820 if (!at::is_vulkan_available()) {
4821 return;
4822 }
4823
4824 auto a_cpu = at::rand({47, 2, 23, 97}, at::device(at::kCPU).dtype(at::kFloat));
4825 auto a_vulkan = a_cpu.vulkan();
4826
4827 const auto b_scalar = at::rand({1}, at::device(at::kCPU).dtype(at::kFloat));
4828
4829 a_cpu.sub_(b_scalar, 2.1f);
4830 a_vulkan.sub_(b_scalar, 2.1f);
4831
4832 const auto check = almostEqual(a_cpu, a_vulkan.cpu());
4833 if (!check) {
4834 showRtol(a_cpu, a_vulkan.cpu());
4835 }
4836
4837 ASSERT_TRUE(check);
4838 }
4839
TEST_F(VulkanAPITest,sub_to_scalar_wrapped)4840 TEST_F(VulkanAPITest, sub_to_scalar_wrapped) {
4841 if (!at::is_vulkan_available()) {
4842 return;
4843 }
4844
4845 const auto a = at::rand({1}, at::device(at::kCPU).dtype(at::kFloat));
4846
4847 const auto b_cpu = at::rand({11, 7, 139, 109}, at::device(at::kCPU).dtype(at::kFloat));
4848 const auto b_vulkan = b_cpu.vulkan();
4849
4850 const auto c_cpu = at::sub(a, b_cpu, 2.1f);
4851 const auto c_vulkan = at::sub(a, b_vulkan, 2.1f);
4852
4853 const auto check = almostEqual(c_cpu, c_vulkan.cpu());
4854 if (!check) {
4855 showRtol(c_cpu, c_vulkan.cpu());
4856 }
4857
4858 ASSERT_TRUE(check);
4859 }
4860
TEST_F(VulkanAPITest,sum_invalid_inputs)4861 TEST_F(VulkanAPITest, sum_invalid_inputs) {
4862 c10::InferenceMode mode;
4863
4864 // Act: input dimension too large
4865 EXPECT_THROW({
4866 at::sum(at::rand({3, 5, 7, 8, 9}, at::device(at::kCPU).dtype(at::kFloat))
4867 .vulkan(), {3});
4868 }, ::std::exception);
4869
4870 // Act: dimension out of range
4871 EXPECT_THROW({
4872 at::sum(at::rand({7, 8, 9}, at::device(at::kCPU).dtype(at::kFloat))
4873 .vulkan(), {3});
4874 }, ::std::exception);
4875
4876 // Act: dimension out of range
4877 EXPECT_THROW({
4878 at::sum(at::rand({7, 8, 9}, at::device(at::kCPU).dtype(at::kFloat))
4879 .vulkan(), {-4});
4880 }, ::std::exception);
4881
4882 // Act: repeated dimensions
4883 EXPECT_THROW({
4884 at::sum(at::rand({7, 8, 9}, at::device(at::kCPU).dtype(at::kFloat))
4885 .vulkan(), {1, 1});
4886 }, ::std::exception);
4887
4888 // Act: repeated dimensions
4889 EXPECT_THROW({
4890 at::sum(at::rand({7, 8, 9}, at::device(at::kCPU).dtype(at::kFloat))
4891 .vulkan(), {1, -2});
4892 }, ::std::exception);
4893 }
4894
test_sum_dim(const at::IntArrayRef input_shape,const at::IntArrayRef dim_list,bool keepdim=false)4895 void test_sum_dim(const at::IntArrayRef input_shape, const at::IntArrayRef dim_list, bool keepdim=false) {
4896 const auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
4897 const auto in_vulkan = in_cpu.vulkan();
4898
4899 const auto out_cpu = at::sum(in_cpu, dim_list, keepdim);
4900 const auto out_vulkan = at::sum(in_vulkan, dim_list, keepdim);
4901
4902 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
4903 if (!check) {
4904 std::cout << "sum_dim test failed with input shape: "
4905 << input_shape << " and dim_list: " << dim_list << std::endl;
4906 showRtol(out_cpu, out_vulkan.cpu());
4907 }
4908
4909 ASSERT_TRUE(check);
4910 }
4911
TEST_F(VulkanAPITest,sum_dim_1d)4912 TEST_F(VulkanAPITest, sum_dim_1d) {
4913 test_sum_dim({7}, {-1});
4914 test_sum_dim({3}, {0});
4915 }
4916
TEST_F(VulkanAPITest,sum_dim_2d)4917 TEST_F(VulkanAPITest, sum_dim_2d) {
4918 test_sum_dim({2, 3}, {-1});
4919 test_sum_dim({2, 7}, {-2});
4920 test_sum_dim({2, 7}, {-1, -2});
4921 }
4922
TEST_F(VulkanAPITest,sum_dim_3d)4923 TEST_F(VulkanAPITest, sum_dim_3d) {
4924 test_sum_dim({9, 7, 5}, {-1});
4925 test_sum_dim({5, 7, 9}, {-2});
4926 test_sum_dim({5, 7, 9}, {-3});
4927
4928 test_sum_dim({10, 7, 5}, {0, 1});
4929 test_sum_dim({10, 7, 5}, {0, 2});
4930 test_sum_dim({10, 7, 5}, {1, 2});
4931
4932 test_sum_dim({10, 7, 5}, {-1, -2});
4933 test_sum_dim({10, 7, 5}, {-1, -3});
4934 test_sum_dim({10, 7, 5}, {-2, -3});
4935
4936 test_sum_dim({10, 7, 5}, {0, 1, 2});
4937 test_sum_dim({10, 7, 5}, {-1, -2, -3});
4938 }
4939
TEST_F(VulkanAPITest,sum_dim_4d)4940 TEST_F(VulkanAPITest, sum_dim_4d) {
4941 test_sum_dim({7, 9, 6, 5}, {-1});
4942 test_sum_dim({6, 5, 7, 9}, {-2});
4943 test_sum_dim({6, 5, 7, 9}, {-3});
4944 test_sum_dim({6, 5, 7, 9}, {-4});
4945
4946 test_sum_dim({10, 7, 5, 6}, {0, 1});
4947 test_sum_dim({10, 7, 5, 6}, {0, 2});
4948 test_sum_dim({10, 7, 5, 6}, {0, 3});
4949 test_sum_dim({10, 7, 5, 6}, {1, 2});
4950 test_sum_dim({10, 7, 5, 6}, {1, 3});
4951 test_sum_dim({10, 7, 5, 6}, {2, 3});
4952 test_sum_dim({10, 7, 5, 6}, {-2, -4});
4953
4954 test_sum_dim({10, 7, 5, 6}, {0, 1, 2});
4955 test_sum_dim({10, 7, 5, 6}, {0, 1, 3});
4956 test_sum_dim({10, 7, 5, 6}, {0, 2, 3});
4957 test_sum_dim({10, 7, 5, 6}, {3, 2, 1});
4958 test_sum_dim({10, 7, 5, 6}, {3, -2, 1});
4959 test_sum_dim({10, 7, 5, 6}, {-3, -2, -1});
4960
4961 test_sum_dim({10, 7, 5, 6}, {-1, -2, -3});
4962 test_sum_dim({10, 7, 5, 6}, {-1, -2, -4});
4963 test_sum_dim({10, 7, 5, 6}, {-1, -3, -4});
4964 test_sum_dim({10, 7, 5, 6}, {-2, -3, -4});
4965
4966 test_sum_dim({10, 7, 5, 6}, {-1, -2, -3, -4});
4967 }
4968
TEST_F(VulkanAPITest,sum_dim_keepdim_1d)4969 TEST_F(VulkanAPITest, sum_dim_keepdim_1d) {
4970 test_sum_dim({5}, {-1}, true);
4971 test_sum_dim({3}, {-1}, true);
4972 }
4973
TEST_F(VulkanAPITest,sum_dim_keepdim_2d)4974 TEST_F(VulkanAPITest, sum_dim_keepdim_2d) {
4975 test_sum_dim({5, 7}, {-1}, true);
4976 test_sum_dim({5, 7}, {-2}, true);
4977 }
4978
TEST_F(VulkanAPITest,sum_dim_keepdim_3d)4979 TEST_F(VulkanAPITest, sum_dim_keepdim_3d) {
4980 test_sum_dim({9, 5, 7}, {-1}, true);
4981 test_sum_dim({5, 9, 7}, {-2}, true);
4982 test_sum_dim({7, 9, 5}, {-3}, true);
4983
4984 test_sum_dim({9, 5, 7}, {0, 1}, true);
4985 test_sum_dim({5, 9, 7}, {0, 2}, true);
4986 test_sum_dim({7, 9, 5}, {1, 2}, true);
4987
4988 test_sum_dim({7, 9, 5}, {0, 1, 2}, true);
4989 }
4990
TEST_F(VulkanAPITest,sum_dim_keepdim_4d)4991 TEST_F(VulkanAPITest, sum_dim_keepdim_4d) {
4992 test_sum_dim({9, 5, 7, 11}, {-1}, true);
4993 test_sum_dim({5, 9, 11, 7}, {-2}, true);
4994 test_sum_dim({7, 11, 9, 5}, {-3}, true);
4995 test_sum_dim({11, 7, 9, 5}, {-4}, true);
4996
4997 test_sum_dim({9, 5, 7, 11}, {0, 1}, true);
4998 test_sum_dim({5, 9, 11, 7}, {0, 2}, true);
4999 test_sum_dim({7, 11, 9, 5}, {0, 3}, true);
5000 test_sum_dim({11, 7, 9, 5}, {1, 2}, true);
5001 test_sum_dim({9, 5, 7, 11}, {1, 3}, true);
5002 test_sum_dim({5, 9, 11, 7}, {2, 3}, true);
5003
5004 test_sum_dim({7, 11, 9, 5}, {-1, -2, -3}, true);
5005 test_sum_dim({11, 7, 9, 5}, {-1, -2, -4}, true);
5006 test_sum_dim({9, 5, 7, 11}, {-2, -3, -4}, true);
5007
5008 test_sum_dim({9, 5, 7, 11}, {-1, -2, -3, -4}, true);
5009 }
5010
test_sum(const at::IntArrayRef input_shape)5011 void test_sum(const at::IntArrayRef input_shape) {
5012 const auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
5013 const auto in_vulkan = in_cpu.vulkan();
5014
5015 const auto out_cpu = at::sum(in_cpu);
5016 const auto out_vulkan = at::sum(in_vulkan);
5017
5018 ASSERT_TRUE(out_vulkan.dim() == 0);
5019 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
5020 if (!check) {
5021 std::cout << "sum test failed with input shape: "
5022 << input_shape << std::endl;
5023 showRtol(out_cpu, out_vulkan.cpu());
5024 }
5025
5026 ASSERT_TRUE(check);
5027 }
5028
TEST_F(VulkanAPITest,sum_test)5029 TEST_F(VulkanAPITest, sum_test) {
5030 test_sum({6});
5031 test_sum({5, 6});
5032 test_sum({0, 3, 1});
5033 test_sum({5, 0, 1});
5034 test_sum({5, 3, 0});
5035 test_sum({3, 3, 1});
5036 test_sum({7, 6, 6});
5037 test_sum({7, 8, 5, 6});
5038 }
5039
5040
test_uniform(at::Tensor a_vulkan,const float a_min,const float a_max)5041 void test_uniform(at::Tensor a_vulkan, const float a_min, const float a_max) {
5042 auto a_cpu = a_vulkan.cpu();
5043 ASSERT_TRUE(a_cpu.max().item<float>() <= a_max);
5044 ASSERT_TRUE(a_cpu.min().item<float>() >= a_min);
5045
5046 // Verify range, also perform a loose check with on histogram distribution.
5047 float b_min = 0.0f;
5048 float b_max = 10.0f;
5049
5050 auto b_vulkan =
5051 at::rand({80, 7, 12, 10}, at::device(at::kCPU).dtype(at::kFloat))
5052 .vulkan();
5053 b_vulkan.uniform_(b_min, b_max);
5054 auto b_cpu = b_vulkan.cpu();
5055
5056 int bins = 10;
5057 auto b_hist_tuple = at::histogram(b_cpu, bins);
5058
5059 int64_t expected_per_bin = b_vulkan.numel() / bins;
5060 auto b_hist = std::get<0>(b_hist_tuple);
5061
5062 // Very relaxed definition of uniform. Pass if all bins are within 5% of
5063 // expected.
5064 ASSERT_TRUE(
5065 (b_hist - expected_per_bin).abs().max().item<float>() <=
5066 (expected_per_bin * 0.05));
5067 }
5068
TEST_F(VulkanAPITest,uniform)5069 TEST_F(VulkanAPITest, uniform) {
5070 float a_min = -8.2f;
5071 float a_max = -1.4f;
5072 auto a_vulkan =
5073 at::rand({8, 7, 12, 10}, at::device(at::kCPU).dtype(at::kFloat)).vulkan();
5074 a_vulkan.uniform_(a_min, a_max);
5075 test_uniform(a_vulkan, a_min, a_max);
5076 }
5077
TEST_F(VulkanAPITest,rand_like)5078 TEST_F(VulkanAPITest, rand_like) {
5079 float a_min = 0.0f;
5080 float a_max = 1.0f;
5081 auto a_vulkan =
5082 at::zeros({8, 7, 12, 10}, at::device(at::kCPU).dtype(at::kFloat)).vulkan();
5083 const auto out_vulkan = at::rand_like(a_vulkan);
5084 // verify that the input are still all zeros (not in-place)
5085 ASSERT_TRUE(at::mean(a_vulkan.cpu()).item<float>() == 0.0);
5086 test_uniform(out_vulkan, a_min, a_max);
5087 }
5088
test_normal(at::Tensor out_vulkan,const float mean,const float std)5089 void test_normal(at::Tensor out_vulkan, const float mean, const float std) {
5090 // Verify the distribution is normal. The difference between given mean vs generated mean should be within 5% of standard deviation, and the same for standard deviation itself.
5091 ASSERT_TRUE(std::abs(at::mean(out_vulkan.cpu()).item<float>() - mean) < std::abs(std) * 0.05);
5092 ASSERT_TRUE(std::abs(at::std(out_vulkan.cpu()).item<float>() - std) < std::abs(std) * 0.05);
5093 }
5094
TEST_F(VulkanAPITest,normal_)5095 TEST_F(VulkanAPITest, normal_) {
5096 float a_mean = -10.0;
5097 float a_std = 2.0;
5098
5099 auto a_vulkan =
5100 at::zeros({3, 4, 5, 6}, at::device(at::kCPU).dtype(at::kFloat)).vulkan();
5101 a_vulkan.normal_(a_mean, a_std);
5102
5103 test_normal(a_vulkan, a_mean, a_std);
5104 }
5105
TEST_F(VulkanAPITest,normal_large)5106 TEST_F(VulkanAPITest, normal_large) {
5107 float a_mean = 1.0;
5108 float a_std = 0.01;
5109
5110 auto a_vulkan =
5111 at::zeros({30, 40, 50, 60}, at::device(at::kCPU).dtype(at::kFloat)).vulkan();
5112 a_vulkan.normal_(a_mean, a_std);
5113
5114 test_normal(a_vulkan, a_mean, a_std);
5115 }
5116
TEST_F(VulkanAPITest,normal_error)5117 TEST_F(VulkanAPITest, normal_error) {
5118 float a_mean = 1.0;
5119 float a_std = -1;
5120
5121 auto a_vulkan =
5122 at::zeros({30, 40, 50, 60}, at::device(at::kCPU).dtype(at::kFloat)).vulkan();
5123 EXPECT_THROW(a_vulkan.normal_(a_mean, a_std), ::std::exception);
5124 }
5125
TEST_F(VulkanAPITest,randn_like)5126 TEST_F(VulkanAPITest, randn_like) {
5127 float a_mean = 0.0;
5128 float a_std = 1.0;
5129
5130 auto a_vulkan =
5131 at::zeros({8, 7, 6, 5}, at::device(at::kCPU).dtype(at::kFloat)).vulkan();
5132 const auto out_vulkan = at::randn_like(a_vulkan);
5133 // verify that the input are still all zeros (not in-place)
5134 ASSERT_TRUE(at::mean(a_vulkan.cpu()).item<float>() == 0.0);
5135 test_normal(out_vulkan, a_mean, a_std);
5136 }
5137
TEST_F(VulkanAPITest,randn_like_large)5138 TEST_F(VulkanAPITest, randn_like_large) {
5139 float a_mean = 0.0;
5140 float a_std = 1.0;
5141
5142 auto a_vulkan =
5143 at::zeros({80, 70, 60, 50}, at::device(at::kCPU).dtype(at::kFloat)).vulkan();
5144 const auto out_vulkan = at::randn_like(a_vulkan);
5145
5146 test_normal(out_vulkan, a_mean, a_std);
5147 }
5148
test_t(const at::IntArrayRef input_shape)5149 void test_t(const at::IntArrayRef input_shape) {
5150 const auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
5151 const auto out_cpu = at::t(in_cpu);
5152
5153 const auto in_vulkan = in_cpu.vulkan();
5154 const auto out_vulkan = at::t(in_vulkan);
5155
5156 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
5157 if (!check) {
5158 showRtol(out_cpu, out_vulkan.cpu());
5159 }
5160
5161 ASSERT_TRUE(check);
5162 }
5163
TEST_F(VulkanAPITest,transpose_t_1d)5164 TEST_F(VulkanAPITest, transpose_t_1d) {
5165 test_t({7});
5166 }
5167
TEST_F(VulkanAPITest,transpose_t_2d_small)5168 TEST_F(VulkanAPITest, transpose_t_2d_small) {
5169 test_t({1, 1});
5170 }
5171
TEST_F(VulkanAPITest,transpose_t_2d_medium)5172 TEST_F(VulkanAPITest, transpose_t_2d_medium) {
5173 test_t({7, 5});
5174 }
5175
TEST_F(VulkanAPITest,transpose_t_2d_large)5176 TEST_F(VulkanAPITest, transpose_t_2d_large) {
5177 test_t({53, 117});
5178 }
5179
test_transpose(const at::IntArrayRef input_shape,int64_t index0,int64_t index1)5180 void test_transpose(const at::IntArrayRef input_shape, int64_t index0, int64_t index1) {
5181 const auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
5182 const auto out_cpu = at::transpose(in_cpu, index0, index1);
5183
5184 const auto in_vulkan = in_cpu.vulkan();
5185 const auto out_vulkan = at::transpose(in_vulkan, index0, index1);
5186
5187 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
5188 if (!check) {
5189 showRtol(out_cpu, out_vulkan.cpu());
5190 }
5191
5192 ASSERT_TRUE(check);
5193 }
5194
TEST_F(VulkanAPITest,transpose_2d_height_and_width_small)5195 TEST_F(VulkanAPITest, transpose_2d_height_and_width_small) {
5196 test_transpose({1, 1}, 0, 1);
5197 }
5198
TEST_F(VulkanAPITest,transpose_2d_height_and_width_medium)5199 TEST_F(VulkanAPITest, transpose_2d_height_and_width_medium) {
5200 test_transpose({7, 5}, 0, 1);
5201 }
5202
TEST_F(VulkanAPITest,transpose_2d_height_and_width_large)5203 TEST_F(VulkanAPITest, transpose_2d_height_and_width_large) {
5204 test_transpose({53, 117}, 0, 1);
5205 }
5206
TEST_F(VulkanAPITest,transpose_2d_height_and_height_large)5207 TEST_F(VulkanAPITest, transpose_2d_height_and_height_large) {
5208 test_transpose({53, 117}, 0, 0);
5209 }
5210
TEST_F(VulkanAPITest,transpose_2d_width_and_width_large)5211 TEST_F(VulkanAPITest, transpose_2d_width_and_width_large) {
5212 test_transpose({53, 117}, 1, 1);
5213 }
5214
TEST_F(VulkanAPITest,transpose_3d_height_and_width_small)5215 TEST_F(VulkanAPITest, transpose_3d_height_and_width_small) {
5216 test_transpose({1, 1, 1}, 1, 2);
5217 }
5218
TEST_F(VulkanAPITest,transpose_3d_height_and_width_medium)5219 TEST_F(VulkanAPITest, transpose_3d_height_and_width_medium) {
5220 test_transpose({3, 2, 5}, 1, 2);
5221 }
5222
TEST_F(VulkanAPITest,transpose_3d_height_and_width_large)5223 TEST_F(VulkanAPITest, transpose_3d_height_and_width_large) {
5224 test_transpose({100, 1, 144}, 1, 2);
5225 }
5226
TEST_F(VulkanAPITest,transpose_3d_width_and_width_large)5227 TEST_F(VulkanAPITest, transpose_3d_width_and_width_large) {
5228 test_transpose({100, 1, 144}, 2, 2);
5229 }
5230
TEST_F(VulkanAPITest,transpose_3d_depth_and_width_small)5231 TEST_F(VulkanAPITest, transpose_3d_depth_and_width_small) {
5232 test_transpose({1, 1, 1}, 0, 2);
5233 }
5234
TEST_F(VulkanAPITest,transpose_3d_depth_and_width_medium)5235 TEST_F(VulkanAPITest, transpose_3d_depth_and_width_medium) {
5236 test_transpose({3, 2, 5}, 0, 2);
5237 }
5238
TEST_F(VulkanAPITest,transpose_3d_depth_and_width_large)5239 TEST_F(VulkanAPITest, transpose_3d_depth_and_width_large) {
5240 test_transpose({113, 1, 141}, 0, 2);
5241 }
5242
TEST_F(VulkanAPITest,transpose_3d_depth_and_depth_large)5243 TEST_F(VulkanAPITest, transpose_3d_depth_and_depth_large) {
5244 test_transpose({113, 2, 131}, 0, 0);
5245 }
5246
TEST_F(VulkanAPITest,transpose_3d_depth_and_height_small)5247 TEST_F(VulkanAPITest, transpose_3d_depth_and_height_small) {
5248 test_transpose({1, 1, 1}, 0, 1);
5249 }
5250
TEST_F(VulkanAPITest,transpose_3d_depth_and_height_medium)5251 TEST_F(VulkanAPITest, transpose_3d_depth_and_height_medium) {
5252 test_transpose({3, 7, 5}, 0, 1);
5253 }
5254
TEST_F(VulkanAPITest,transpose_3d_depth_and_height_large)5255 TEST_F(VulkanAPITest, transpose_3d_depth_and_height_large) {
5256 test_transpose({113, 141, 1}, 0, 1);
5257 }
5258
TEST_F(VulkanAPITest,transpose_3d_height_and_height_large)5259 TEST_F(VulkanAPITest, transpose_3d_height_and_height_large) {
5260 test_transpose({101, 1, 141}, 1, 1);
5261 }
5262
TEST_F(VulkanAPITest,transpose_4d_batch_and_batch_large)5263 TEST_F(VulkanAPITest, transpose_4d_batch_and_batch_large) {
5264 test_transpose({7, 51, 41, 3}, 0, 0);
5265 }
5266
TEST_F(VulkanAPITest,transpose_4d_depth_and_depth_large)5267 TEST_F(VulkanAPITest, transpose_4d_depth_and_depth_large) {
5268 test_transpose({7, 51, 41, 3}, 1, 1);
5269 }
5270
TEST_F(VulkanAPITest,transpose_4d_height_and_height_large)5271 TEST_F(VulkanAPITest, transpose_4d_height_and_height_large) {
5272 test_transpose({7, 51, 41, 3}, 2, 2);
5273 }
5274
TEST_F(VulkanAPITest,transpose_4d_width_and_width_large)5275 TEST_F(VulkanAPITest, transpose_4d_width_and_width_large) {
5276 test_transpose({7, 51, 41, 3}, 3, 3);
5277 }
5278
TEST_F(VulkanAPITest,transpose_4d_batch_and_depth_large)5279 TEST_F(VulkanAPITest, transpose_4d_batch_and_depth_large) {
5280 test_transpose({7, 51, 41, 3}, 0, 1);
5281 }
5282
TEST_F(VulkanAPITest,transpose_4d_batch_and_height_large)5283 TEST_F(VulkanAPITest, transpose_4d_batch_and_height_large) {
5284 test_transpose({7, 51, 41, 3}, 0, 2);
5285 }
5286
TEST_F(VulkanAPITest,transpose_4d_batch_and_width_large)5287 TEST_F(VulkanAPITest, transpose_4d_batch_and_width_large) {
5288 test_transpose({7, 51, 41, 3}, 0, 3);
5289 }
5290
TEST_F(VulkanAPITest,transpose_4d_depth_and_height_large)5291 TEST_F(VulkanAPITest, transpose_4d_depth_and_height_large) {
5292 test_transpose({7, 51, 41, 3}, 1, 2);
5293 }
5294
TEST_F(VulkanAPITest,transpose_4d_depth_and_width_large)5295 TEST_F(VulkanAPITest, transpose_4d_depth_and_width_large) {
5296 test_transpose({7, 51, 41, 3}, 1, 3);
5297 }
5298
TEST_F(VulkanAPITest,transpose_4d_height_and_width_large)5299 TEST_F(VulkanAPITest, transpose_4d_height_and_width_large) {
5300 test_transpose({7, 51, 41, 3}, 2, 3);
5301 }
5302
5303 // Test Unary Ops
test_exp(const at::IntArrayRef input_shape)5304 void test_exp(const at::IntArrayRef input_shape) {
5305 c10::InferenceMode mode;
5306 const auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
5307 const auto out_cpu = at::exp(in_cpu);
5308
5309 const auto in_vulkan = in_cpu.vulkan();
5310 const auto out_vulkan = at::exp(in_vulkan);
5311
5312 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
5313 if (!check) {
5314 showRtol(out_cpu, out_vulkan.cpu());
5315 std::cout << "exp test failed with input shape: "
5316 << input_shape << std::endl;
5317 }
5318 ASSERT_TRUE(check);
5319 }
5320
TEST_F(VulkanAPITest,unary_op_exp)5321 TEST_F(VulkanAPITest, unary_op_exp) {
5322 test_exp({5});
5323 test_exp({5, 6});
5324 test_exp({7, 3, 5});
5325 test_exp({11, 1, 4, 2});
5326 }
5327
test_exp_(const at::IntArrayRef input_shape)5328 void test_exp_(const at::IntArrayRef input_shape) {
5329 c10::InferenceMode mode;
5330 const auto cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
5331 const auto vulkan = cpu.vulkan();
5332
5333 cpu.exp_();
5334 vulkan.exp_();
5335
5336 const auto check = almostEqual(cpu, vulkan.cpu());
5337 if (!check) {
5338 showRtol(cpu, vulkan.cpu());
5339 std::cout << "exp_ test failed with input shape: "
5340 << input_shape << std::endl;
5341 }
5342 ASSERT_TRUE(check);
5343 }
5344
TEST_F(VulkanAPITest,unary_op_exp_)5345 TEST_F(VulkanAPITest, unary_op_exp_) {
5346 test_exp_({5});
5347 test_exp_({5, 6});
5348 test_exp_({7, 3, 5});
5349 test_exp_({11, 1, 4, 2});
5350 }
5351
test_sqrt(const at::IntArrayRef input_shape)5352 void test_sqrt(const at::IntArrayRef input_shape) {
5353 c10::InferenceMode mode;
5354 const auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
5355 const auto out_cpu = at::sqrt(in_cpu);
5356
5357 const auto in_vulkan = in_cpu.vulkan();
5358 const auto out_vulkan = at::sqrt(in_vulkan);
5359
5360 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
5361 if (!check) {
5362 showRtol(out_cpu, out_vulkan.cpu());
5363 std::cout << "sqrt test failed with input shape: "
5364 << input_shape << std::endl;
5365 }
5366 ASSERT_TRUE(check);
5367 }
5368
TEST_F(VulkanAPITest,unary_op_sqrt)5369 TEST_F(VulkanAPITest, unary_op_sqrt) {
5370 test_sqrt({5});
5371 test_sqrt({5, 6});
5372 test_sqrt({7, 3, 5});
5373 test_sqrt({11, 1, 4, 2});
5374 }
5375
test_sqrt_(const at::IntArrayRef input_shape)5376 void test_sqrt_(const at::IntArrayRef input_shape) {
5377 c10::InferenceMode mode;
5378 const auto cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
5379 const auto vulkan = cpu.vulkan();
5380
5381 cpu.sqrt_();
5382 vulkan.sqrt_();
5383
5384 const auto check = almostEqual(cpu, vulkan.cpu());
5385 if (!check) {
5386 showRtol(cpu, vulkan.cpu());
5387 std::cout << "sqrt_ test failed with input shape: "
5388 << input_shape << std::endl;
5389 }
5390 ASSERT_TRUE(check);
5391 }
5392
TEST_F(VulkanAPITest,unary_op_sqrt_)5393 TEST_F(VulkanAPITest, unary_op_sqrt_) {
5394 test_sqrt_({5});
5395 test_sqrt_({5, 6});
5396 test_sqrt_({7, 3, 5});
5397 test_sqrt_({11, 1, 4, 2});
5398 }
5399
test_log(const at::IntArrayRef input_shape)5400 void test_log(const at::IntArrayRef input_shape) {
5401 c10::InferenceMode mode;
5402 // Need to add a very small constant to avoid 0.
5403 const auto in_cpu =
5404 at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat)) + 0.0001;
5405 const auto out_cpu = at::log(in_cpu);
5406
5407 const auto in_vulkan = in_cpu.vulkan();
5408 const auto out_vulkan = at::log(in_vulkan);
5409
5410 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
5411 if (!check) {
5412 showRtol(out_cpu, out_vulkan.cpu());
5413 std::cout << "log test failed with input shape: " << input_shape
5414 << std::endl;
5415 }
5416 ASSERT_TRUE(check);
5417 }
5418
TEST_F(VulkanAPITest,unary_op_log)5419 TEST_F(VulkanAPITest, unary_op_log) {
5420 test_log({5});
5421 test_log({5, 6});
5422 test_log({7, 3, 5});
5423 test_log({11, 1, 4, 2});
5424 }
5425
test_log_(const at::IntArrayRef input_shape)5426 void test_log_(const at::IntArrayRef input_shape) {
5427 c10::InferenceMode mode;
5428 // Need to add a very small constant to avoid 0.
5429 const auto cpu =
5430 at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat)) + 0.0001;
5431 const auto vulkan = cpu.vulkan();
5432
5433 cpu.log_();
5434 vulkan.log_();
5435
5436 const auto check = almostEqual(cpu, vulkan.cpu());
5437 if (!check) {
5438 showRtol(cpu, vulkan.cpu());
5439 std::cout << "log_ test failed with input shape: " << input_shape
5440 << std::endl;
5441 }
5442 ASSERT_TRUE(check);
5443 }
5444
TEST_F(VulkanAPITest,unary_op_log_)5445 TEST_F(VulkanAPITest, unary_op_log_) {
5446 test_log_({5});
5447 test_log_({5, 6});
5448 test_log_({7, 3, 5});
5449 test_log_({11, 1, 4, 2});
5450 }
5451
test_unsqueeze(const at::IntArrayRef input_shape,int64_t dim)5452 void test_unsqueeze(const at::IntArrayRef input_shape, int64_t dim) {
5453 c10::InferenceMode mode;
5454 const auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
5455 const auto out_cpu = at::unsqueeze(in_cpu, dim);
5456
5457 const auto in_vulkan = in_cpu.vulkan();
5458 const auto out_vulkan = at::unsqueeze(in_vulkan, dim);
5459
5460 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
5461 if (!check) {
5462 showRtol(out_cpu, out_vulkan.cpu());
5463 std::cout << "unsqueeze test failed with input shape: "
5464 << input_shape << std::endl;
5465 }
5466 ASSERT_TRUE(check);
5467 }
5468
TEST_F(VulkanAPITest,unsqueeze_0dto1d_dim0)5469 TEST_F(VulkanAPITest, unsqueeze_0dto1d_dim0) {
5470 test_unsqueeze({}, 0);
5471 test_unsqueeze({}, -1);
5472 }
5473
TEST_F(VulkanAPITest,unsqueeze_1dto2d_dim0)5474 TEST_F(VulkanAPITest, unsqueeze_1dto2d_dim0) {
5475 test_unsqueeze({5}, 0);
5476 test_unsqueeze({6}, -2);
5477 test_unsqueeze({111}, 0);
5478 test_unsqueeze({112}, -2);
5479 }
5480
TEST_F(VulkanAPITest,unsqueeze_1dto2d_dim1)5481 TEST_F(VulkanAPITest, unsqueeze_1dto2d_dim1) {
5482 test_unsqueeze({5}, 1);
5483 test_unsqueeze({6}, -1);
5484 test_unsqueeze({111}, 1);
5485 test_unsqueeze({112}, -1);
5486 }
5487
TEST_F(VulkanAPITest,unsqueeze_2dto3d_dim0)5488 TEST_F(VulkanAPITest, unsqueeze_2dto3d_dim0) {
5489 test_unsqueeze({1, 5}, 2);
5490 test_unsqueeze({5, 7}, 0);
5491 test_unsqueeze({7, 5}, -3);
5492 test_unsqueeze({111, 222}, 0);
5493 test_unsqueeze({222, 111}, -3);
5494 }
5495
TEST_F(VulkanAPITest,unsqueeze_2dto3d_dim1)5496 TEST_F(VulkanAPITest, unsqueeze_2dto3d_dim1) {
5497 test_unsqueeze({5, 7}, 1);
5498 test_unsqueeze({7, 5}, -2);
5499 test_unsqueeze({111, 222}, 1);
5500 test_unsqueeze({222, 111}, -2);
5501 }
5502
TEST_F(VulkanAPITest,unsqueeze_2dto3d_dim2)5503 TEST_F(VulkanAPITest, unsqueeze_2dto3d_dim2) {
5504 test_unsqueeze({5, 7}, 2);
5505 test_unsqueeze({7, 5}, -1);
5506 test_unsqueeze({111, 222}, 2);
5507 test_unsqueeze({222, 111}, -1);
5508 }
5509
TEST_F(VulkanAPITest,unsqueeze_3dto4d_dim0)5510 TEST_F(VulkanAPITest, unsqueeze_3dto4d_dim0) {
5511 test_unsqueeze({2, 3, 4}, 0);
5512 test_unsqueeze({4, 3, 2}, -4);
5513 test_unsqueeze({22, 33, 11}, 0);
5514 test_unsqueeze({33, 11, 22}, -4);
5515 }
5516
TEST_F(VulkanAPITest,unsqueeze_3dto4d_dim1)5517 TEST_F(VulkanAPITest, unsqueeze_3dto4d_dim1) {
5518 test_unsqueeze({2, 3, 4}, 1);
5519 test_unsqueeze({4, 3, 2}, -3);
5520 test_unsqueeze({22, 33, 11}, 1);
5521 test_unsqueeze({33, 11, 22}, -3);
5522 }
5523
TEST_F(VulkanAPITest,unsqueeze_3dto4d_dim2)5524 TEST_F(VulkanAPITest, unsqueeze_3dto4d_dim2) {
5525 test_unsqueeze({2, 3, 4}, 2);
5526 test_unsqueeze({4, 3, 2}, -2);
5527 test_unsqueeze({22, 33, 11}, 2);
5528 test_unsqueeze({33, 11, 22}, -2);
5529 }
5530
TEST_F(VulkanAPITest,unsqueeze_3dto4d_dim3)5531 TEST_F(VulkanAPITest, unsqueeze_3dto4d_dim3) {
5532 test_unsqueeze({1, 5, 2}, 3);
5533 test_unsqueeze({2, 3, 4}, 3);
5534 test_unsqueeze({4, 3, 2}, -1);
5535 test_unsqueeze({22, 33, 11}, 3);
5536 test_unsqueeze({33, 11, 22}, -1);
5537 }
5538
TEST_F(VulkanAPITest,upsample_nearest2d)5539 TEST_F(VulkanAPITest, upsample_nearest2d) {
5540 const auto in_cpu = at::rand({1, 2, 2, 3}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
5541 const auto out_cpu = at::upsample_nearest2d(in_cpu, {4, 6});
5542
5543 const auto in_vulkan = in_cpu.vulkan();
5544 const auto out_vulkan = at::upsample_nearest2d(in_vulkan, {4, 6});
5545
5546 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
5547 if (!check) {
5548 showRtol(out_cpu, out_vulkan.cpu());
5549 }
5550
5551 ASSERT_TRUE(check);
5552 }
5553
TEST_F(VulkanAPITest,upsample_bilinear2d_align_false_small)5554 TEST_F(VulkanAPITest, upsample_bilinear2d_align_false_small) {
5555 const auto in_cpu = at::rand({1, 2, 2, 3}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
5556 const auto out_cpu = at::upsample_bilinear2d(in_cpu, {4, 6}, false);
5557
5558 const auto in_vulkan = in_cpu.vulkan();
5559 const auto out_vulkan = at::upsample_bilinear2d(in_vulkan, {4, 6}, false);
5560
5561 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
5562 if (!check) {
5563 showRtol(out_cpu, out_vulkan.cpu());
5564 }
5565
5566 ASSERT_TRUE(check);
5567 }
5568
TEST_F(VulkanAPITest,upsample_bilinear2d_align_false_large)5569 TEST_F(VulkanAPITest, upsample_bilinear2d_align_false_large) {
5570 const auto in_cpu = at::rand({1, 7, 25, 25}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
5571 const auto out_cpu = at::upsample_bilinear2d(in_cpu, {45, 45}, false);
5572
5573 const auto in_vulkan = in_cpu.vulkan();
5574 const auto out_vulkan = at::upsample_bilinear2d(in_vulkan, {45, 45}, false);
5575
5576 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
5577 if (!check) {
5578 showRtol(out_cpu, out_vulkan.cpu());
5579 }
5580
5581 ASSERT_TRUE(check);
5582 }
5583
TEST_F(VulkanAPITest,upsample_bilinear2d_align_true_small)5584 TEST_F(VulkanAPITest, upsample_bilinear2d_align_true_small) {
5585 const auto in_cpu = at::rand({1, 2, 2, 3}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
5586 const auto out_cpu = at::upsample_bilinear2d(in_cpu, {4, 6}, true);
5587
5588 const auto in_vulkan = in_cpu.vulkan();
5589 const auto out_vulkan = at::upsample_bilinear2d(in_vulkan, {4, 6}, true);
5590
5591 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
5592 if (!check) {
5593 showRtol(out_cpu, out_vulkan.cpu());
5594 }
5595
5596 ASSERT_TRUE(check);
5597 }
5598
TEST_F(VulkanAPITest,upsample_bilinear2d_align_true_large)5599 TEST_F(VulkanAPITest, upsample_bilinear2d_align_true_large) {
5600 const auto in_cpu = at::rand({1, 7, 25, 25}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
5601 const auto out_cpu = at::upsample_bilinear2d(in_cpu, {45, 45}, true);
5602
5603 const auto in_vulkan = in_cpu.vulkan();
5604 const auto out_vulkan = at::upsample_bilinear2d(in_vulkan, {45, 45}, true);
5605
5606 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
5607 if (!check) {
5608 showRtol(out_cpu, out_vulkan.cpu());
5609 }
5610
5611 ASSERT_TRUE(check);
5612 }
5613
test_unbind(const at::IntArrayRef input_shape,int64_t dim)5614 void test_unbind(const at::IntArrayRef input_shape, int64_t dim) {
5615 const auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
5616 const auto out_cpu = at::unbind(in_cpu, dim);
5617
5618 const auto in_vulkan = in_cpu.vulkan();
5619 const auto out_vulkan = at::unbind(in_vulkan, dim);
5620
5621 int64_t size = out_vulkan.size();
5622
5623 for (const auto i : c10::irange(size)) {
5624 const auto check = almostEqual(out_cpu[i], out_vulkan[i].cpu());
5625 if (!check) {
5626 std::cout << "The " << i << "th vectors aren't equal." << std::endl;
5627 showRtol(out_cpu[i], out_vulkan[i].cpu());
5628 }
5629
5630 ASSERT_TRUE(check);
5631 }
5632 }
5633
TEST_F(VulkanAPITest,unbind_3d_depth_small)5634 TEST_F(VulkanAPITest, unbind_3d_depth_small) {
5635 test_unbind({1, 1, 1}, 0);
5636 }
5637
TEST_F(VulkanAPITest,unbind_3d_depth_medium)5638 TEST_F(VulkanAPITest, unbind_3d_depth_medium) {
5639 test_unbind({3, 2, 5}, 0);
5640 }
5641
TEST_F(VulkanAPITest,unbind_3d_depth_large)5642 TEST_F(VulkanAPITest, unbind_3d_depth_large) {
5643 test_unbind({100, 1, 144}, 0);
5644 }
5645
test_var(const at::IntArrayRef input_shape,const at::IntArrayRef dim_list,bool unbiased=true,bool keepdim=false)5646 void test_var(const at::IntArrayRef input_shape, const at::IntArrayRef dim_list, bool unbiased=true, bool keepdim=false) {
5647 c10::InferenceMode mode;
5648
5649 const auto in_cpu = at::rand(input_shape, at::TensorOptions(at::kCPU).dtype(at::kFloat));
5650 const auto out_cpu = at::var(in_cpu, dim_list, unbiased, keepdim);
5651
5652 const auto in_vulkan = in_cpu.vulkan();
5653 const auto out_vulkan = at::var(in_vulkan, dim_list, unbiased, keepdim);
5654
5655 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
5656 if (!check) {
5657 showRtol(out_cpu, out_vulkan.cpu());
5658 }
5659
5660 ASSERT_TRUE(check);
5661 }
5662
TEST_F(VulkanAPITest,var_2d_unbiased)5663 TEST_F(VulkanAPITest, var_2d_unbiased) {
5664 test_var({3, 5}, {1}, true, true);
5665 test_var({3, 5}, {1}, true, false);
5666
5667 // inpu.dim() == dim_list.size(), only keepdim == true is supported
5668 test_var({3, 5}, {0, 1}, true, true);
5669 }
5670
TEST_F(VulkanAPITest,var_2d_biased)5671 TEST_F(VulkanAPITest, var_2d_biased) {
5672 test_var({3, 5}, {1}, false, true);
5673 test_var({3, 5}, {1}, false, false);
5674
5675 // inpu.dim() == dim_list.size(), only keepdim == true is supported
5676 test_var({3, 5}, {0, 1}, false, true);
5677 }
5678
TEST_F(VulkanAPITest,var_3d_unbiased)5679 TEST_F(VulkanAPITest, var_3d_unbiased) {
5680 test_var({3, 5, 7}, {1}, true, true);
5681 test_var({3, 5, 7}, {1}, true, false);
5682
5683 test_var({3, 5, 7}, {0, 1}, true, true);
5684 test_var({3, 5, 7}, {0, 1}, true, false);
5685
5686 test_var({3, 5, 7}, {0, 2}, true, true);
5687 test_var({3, 5, 7}, {0, 2}, true, false);
5688
5689 test_var({3, 5, 7}, {-1, -2}, true, true);
5690 test_var({3, 5, 7}, {-1, -2}, true, false);
5691
5692 test_var({3, 5, 7}, {0, 1, 2}, true, true);
5693 }
5694
TEST_F(VulkanAPITest,var_3d_biased)5695 TEST_F(VulkanAPITest, var_3d_biased) {
5696 test_var({3, 5, 7}, {1}, false, true);
5697 test_var({3, 5, 7}, {1}, false, false);
5698
5699 test_var({3, 5, 7}, {0, 1}, false, true);
5700 test_var({3, 5, 7}, {0, 1}, false, false);
5701
5702 test_var({3, 5, 7}, {0, 2}, false, true);
5703 test_var({3, 5, 7}, {0, 2}, false, false);
5704
5705 test_var({3, 5, 7}, {-1, -2}, false, true);
5706 test_var({3, 5, 7}, {-1, -2}, false, false);
5707
5708 test_var({3, 5, 7}, {0, 1, 2}, false, true);
5709 }
5710
TEST_F(VulkanAPITest,var_4d_unbiased)5711 TEST_F(VulkanAPITest, var_4d_unbiased) {
5712 test_var({3, 5, 7, 11}, {0}, true, true);
5713 test_var({3, 5, 7, 11}, {1}, true, false);
5714
5715 test_var({3, 5, 7, 11}, {0, 1}, true, true);
5716 test_var({3, 5, 7, 11}, {0, 1}, true, false);
5717
5718 test_var({3, 5, 7, 11}, {0, 2}, true, true);
5719 test_var({3, 5, 7, 11}, {0, 2}, true, false);
5720
5721 test_var({3, 5, 7, 11}, {-1, -2}, true, true);
5722 test_var({3, 5, 7, 11}, {-1, -2}, true, false);
5723
5724 test_var({3, 5, 7, 11}, {0, 1, 2}, true, true);
5725 test_var({3, 5, 7, 11}, {0, -1, 2}, true, false);
5726
5727 test_var({3, 5, 7, 11}, {0, 1, 2, 3}, true, true);
5728 }
5729
TEST_F(VulkanAPITest,var_4d_biased)5730 TEST_F(VulkanAPITest, var_4d_biased) {
5731 test_var({3, 5, 7, 11}, {0}, false, true);
5732 test_var({3, 5, 7, 11}, {1}, false, false);
5733
5734 test_var({3, 5, 7, 11}, {0, 1}, false, true);
5735 test_var({3, 5, 7, 11}, {0, 1}, false, false);
5736
5737 test_var({3, 5, 7, 11}, {0, 2}, false, true);
5738 test_var({3, 5, 7, 11}, {0, 2}, false, false);
5739
5740 test_var({3, 5, 7, 11}, {-1, -2}, false, true);
5741 test_var({3, 5, 7, 11}, {-1, -2}, false, false);
5742
5743 test_var({3, 5, 7, 11}, {0, 1, 2}, false, true);
5744 test_var({3, 5, 7, 11}, {0, -1, 2}, false, false);
5745
5746 test_var({3, 5, 7, 11}, {0, 1, 2, 3}, false, true);
5747 }
5748
TEST_F(VulkanAPITest,view_explicit)5749 TEST_F(VulkanAPITest, view_explicit) {
5750 c10::InferenceMode mode;
5751
5752 const auto in_cpu = at::rand({7, 8, 9}, at::device(at::kCPU).dtype(at::kFloat));
5753 const auto in_vulkan = in_cpu.vulkan();
5754
5755 const std::array<int64_t, 4> shape{7, 8, 9, 1};
5756
5757 const auto out_cpu = in_cpu.view(shape);
5758 const auto out_vulkan = in_vulkan.view(shape);
5759
5760 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
5761 if (!check) {
5762 showRtol(out_cpu, out_vulkan.cpu());
5763 }
5764
5765 ASSERT_TRUE(check);
5766 }
5767
TEST_F(VulkanAPITest,view_inferred)5768 TEST_F(VulkanAPITest, view_inferred) {
5769 c10::InferenceMode mode;
5770
5771 const auto in_cpu = at::rand({7, 11, 8, 9}, at::device(at::kCPU).dtype(at::kFloat));
5772 const auto in_vulkan = in_cpu.vulkan();
5773
5774 const std::array<int64_t, 3> shape{7, 11, -1};
5775
5776 const auto out_cpu = in_cpu.view(shape);
5777 const auto out_vulkan = in_vulkan.view(shape);
5778
5779 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
5780 if (!check) {
5781 showRtol(out_cpu, out_vulkan.cpu());
5782 }
5783
5784 ASSERT_TRUE(check);
5785 }
5786
TEST_F(VulkanAPITest,view_invalid_inputs)5787 TEST_F(VulkanAPITest, view_invalid_inputs) {
5788 c10::InferenceMode mode;
5789
5790 // Act: only one dimension can be inferred
5791 EXPECT_THROW({
5792 at::rand({7, 8, 9}, at::device(at::kCPU).dtype(at::kFloat))
5793 .vulkan().view({7, -1, -1});
5794 }, ::std::runtime_error);
5795
5796 // Act: invalid shape dimension
5797 EXPECT_THROW({
5798 at::rand({7, 8, 9}, at::device(at::kCPU).dtype(at::kFloat))
5799 .vulkan().view({7, 8, -2});
5800 }, ::std::exception);
5801
5802 // Act: incompatible shape
5803 EXPECT_THROW({
5804 at::rand({7, 8, 9}, at::device(at::kCPU).dtype(at::kFloat))
5805 .vulkan().view({7, 70});
5806 }, ::std::runtime_error);
5807 }
5808
TEST_F(VulkanAPITest,cat_4d_dim0_invalidinputs_exceptions)5809 TEST_F(VulkanAPITest, cat_4d_dim0_invalidinputs_exceptions) {
5810 // Arrange: Vulkan cat inputs must have matching sizes except concatenated dimension
5811 {
5812 const auto in_cpu1 = at::rand({3, 5, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
5813 const auto in_cpu2 = at::rand({3, 9, 112, 193}, at::device(at::kCPU).dtype(at::kFloat));
5814 const auto in_cpu3 = at::rand({3, 9, 331, 193}, at::device(at::kCPU).dtype(at::kFloat));
5815
5816 // Act
5817 EXPECT_THROW({
5818 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 0);
5819 }, ::std::exception);
5820 }
5821
5822 // Arrange: Vulkan cat expects 4 dimensional inputs
5823 {
5824 const auto in_cpu1 = at::rand({3, 9, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
5825 const auto in_cpu2 = at::rand({9, 112, 193}, at::device(at::kCPU).dtype(at::kFloat));
5826 const auto in_cpu3 = at::rand({3, 9, 331, 193}, at::device(at::kCPU).dtype(at::kFloat));
5827
5828 // Act
5829 EXPECT_THROW({
5830 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 0);
5831 }, ::std::exception);
5832 }
5833 }
5834
TEST_F(VulkanAPITest,cat_4d_dim0_samebatch_success)5835 TEST_F(VulkanAPITest, cat_4d_dim0_samebatch_success) {
5836 // Arrange
5837 const auto in_cpu1 = at::rand({221, 3, 9, 193}, at::device(at::kCPU).dtype(at::kFloat));
5838 const auto in_cpu2 = at::rand({221, 3, 9, 193}, at::device(at::kCPU).dtype(at::kFloat));
5839 const auto in_cpu3 = at::rand({221, 3, 9, 193}, at::device(at::kCPU).dtype(at::kFloat));
5840
5841 // Act
5842 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 0);
5843 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 0); // dim=batch
5844
5845 // Assert
5846 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
5847 if (!check) {
5848 showRtol(out_cpu, out_vulkan.cpu());
5849 }
5850
5851 ASSERT_TRUE(check);
5852 }
5853
TEST_F(VulkanAPITest,cat_4d_dim0_diffbatch_success)5854 TEST_F(VulkanAPITest, cat_4d_dim0_diffbatch_success) {
5855 // Arrange
5856 const auto in_cpu1 = at::rand({221, 3, 9, 193}, at::device(at::kCPU).dtype(at::kFloat));
5857 const auto in_cpu2 = at::rand({117, 3, 9, 193}, at::device(at::kCPU).dtype(at::kFloat));
5858 const auto in_cpu3 = at::rand({139, 3, 9, 193}, at::device(at::kCPU).dtype(at::kFloat));
5859
5860 // Act
5861 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 0);
5862 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 0); // dim=batch
5863
5864 // Assert
5865 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
5866 if (!check) {
5867 showRtol(out_cpu, out_vulkan.cpu());
5868 }
5869
5870 ASSERT_TRUE(check);
5871 }
5872
TEST_F(VulkanAPITest,cat_4d_dim0_singledepth_success)5873 TEST_F(VulkanAPITest, cat_4d_dim0_singledepth_success) {
5874 // Arrange: batch x channel (1x1) = single depth texture
5875 const auto in_cpu1 = at::rand({1, 1, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
5876 const auto in_cpu2 = at::rand({1, 1, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
5877 const auto in_cpu3 = at::rand({1, 1, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
5878
5879 // Act
5880 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 0);
5881 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 0); // dim=batch
5882
5883 // Assert
5884 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
5885 if (!check) {
5886 showRtol(out_cpu, out_vulkan.cpu());
5887 }
5888
5889 ASSERT_TRUE(check);
5890 }
5891
TEST_F(VulkanAPITest,cat_4d_dim0_singletensor_success)5892 TEST_F(VulkanAPITest, cat_4d_dim0_singletensor_success) {
5893 // Arrange: single input tensor
5894 const auto in_cpu1 = at::rand({3, 7, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
5895
5896 // Act
5897 const auto out_cpu = at::cat({in_cpu1}, 0);
5898 const auto out_vulkan = at::cat({in_cpu1}, 0); // dim=batch
5899
5900 // Assert
5901 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
5902 if (!check) {
5903 showRtol(out_cpu, out_vulkan.cpu());
5904 }
5905
5906 ASSERT_TRUE(check);
5907 }
5908
TEST_F(VulkanAPITest,cat_4d_dim0_twotensors_success)5909 TEST_F(VulkanAPITest, cat_4d_dim0_twotensors_success) {
5910 // Arrange: two input tensors
5911 const auto in_cpu1 = at::rand({3, 7, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
5912 const auto in_cpu2 = at::rand({3, 7, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
5913
5914 // Act
5915 const auto out_cpu = at::cat({in_cpu1, in_cpu2}, 0);
5916 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan()}, 0); // dim=batch
5917
5918 // Assert
5919 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
5920 if (!check) {
5921 showRtol(out_cpu, out_vulkan.cpu());
5922 }
5923
5924 ASSERT_TRUE(check);
5925 }
5926
TEST_F(VulkanAPITest,cat_4d_dim0_negdim_success)5927 TEST_F(VulkanAPITest, cat_4d_dim0_negdim_success) {
5928 // Arrange
5929 const auto in_cpu1 = at::rand({221, 9, 193, 3}, at::device(at::kCPU).dtype(at::kFloat));
5930 const auto in_cpu2 = at::rand({113, 9, 193, 3}, at::device(at::kCPU).dtype(at::kFloat));
5931 const auto in_cpu3 = at::rand({331, 9, 193, 3}, at::device(at::kCPU).dtype(at::kFloat));
5932
5933 // Act
5934 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, -4);
5935 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, -4);
5936
5937 // Assert
5938 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
5939 if (!check) {
5940 showRtol(out_cpu, out_vulkan.cpu());
5941 }
5942
5943 ASSERT_TRUE(check);
5944 }
5945
TEST_F(VulkanAPITest,cat_4d_dim1_negdim_success)5946 TEST_F(VulkanAPITest, cat_4d_dim1_negdim_success) {
5947 // Arrange
5948 const auto in_cpu1 = at::rand({9, 221, 193, 3}, at::device(at::kCPU).dtype(at::kFloat));
5949 const auto in_cpu2 = at::rand({9, 113, 193, 3}, at::device(at::kCPU).dtype(at::kFloat));
5950 const auto in_cpu3 = at::rand({9, 331, 193, 3}, at::device(at::kCPU).dtype(at::kFloat));
5951
5952 // Act
5953 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, -3);
5954 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, -3);
5955
5956 // Assert
5957 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
5958 if (!check) {
5959 showRtol(out_cpu, out_vulkan.cpu());
5960 }
5961
5962 ASSERT_TRUE(check);
5963 }
5964
TEST_F(VulkanAPITest,cat_4d_dim2_negdim_success)5965 TEST_F(VulkanAPITest, cat_4d_dim2_negdim_success) {
5966 // Arrange
5967 const auto in_cpu1 = at::rand({9, 193, 221, 3}, at::device(at::kCPU).dtype(at::kFloat));
5968 const auto in_cpu2 = at::rand({9, 193, 113, 3}, at::device(at::kCPU).dtype(at::kFloat));
5969 const auto in_cpu3 = at::rand({9, 193, 331, 3}, at::device(at::kCPU).dtype(at::kFloat));
5970
5971 // Act
5972 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, -2);
5973 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, -2);
5974
5975 // Assert
5976 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
5977 if (!check) {
5978 showRtol(out_cpu, out_vulkan.cpu());
5979 }
5980
5981 ASSERT_TRUE(check);
5982 }
5983
TEST_F(VulkanAPITest,cat_4d_dim3_negdim_success)5984 TEST_F(VulkanAPITest, cat_4d_dim3_negdim_success) {
5985 // Arrange
5986 const auto in_cpu1 = at::rand({9, 193, 3, 221}, at::device(at::kCPU).dtype(at::kFloat));
5987 const auto in_cpu2 = at::rand({9, 193, 3, 113}, at::device(at::kCPU).dtype(at::kFloat));
5988 const auto in_cpu3 = at::rand({9, 193, 3, 331}, at::device(at::kCPU).dtype(at::kFloat));
5989
5990 // Act
5991 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, -1);
5992 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, -1);
5993
5994 // Assert
5995 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
5996 if (!check) {
5997 showRtol(out_cpu, out_vulkan.cpu());
5998 }
5999
6000 ASSERT_TRUE(check);
6001 }
6002
6003 #if !defined(__APPLE__)
TEST_F(VulkanAPITest,DISABLED_cat_4d_dim1_samefeature_success)6004 TEST_F(VulkanAPITest, DISABLED_cat_4d_dim1_samefeature_success) {
6005 // Arrange
6006 const auto in_cpu1 = at::rand({3, 9, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6007 const auto in_cpu2 = at::rand({3, 9, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6008 const auto in_cpu3 = at::rand({3, 9, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6009
6010 // Act
6011 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 1);
6012 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 1); // dim=feature(channel)
6013
6014 // Assert
6015 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6016 if (!check) {
6017 showRtol(out_cpu, out_vulkan.cpu());
6018 }
6019
6020 ASSERT_TRUE(check);
6021 }
6022
TEST_F(VulkanAPITest,DISABLED_cat_4d_dim1_difffeature_success)6023 TEST_F(VulkanAPITest, DISABLED_cat_4d_dim1_difffeature_success) {
6024 // Arrange
6025 const auto in_cpu1 = at::rand({3, 3, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6026 const auto in_cpu2 = at::rand({3, 8, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6027 const auto in_cpu3 = at::rand({3, 11, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6028
6029 // Act
6030 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 1);
6031 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 1); // dim=feature(channel)
6032
6033 // Assert
6034 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6035 if (!check) {
6036 showRtol(out_cpu, out_vulkan.cpu());
6037 }
6038
6039 ASSERT_TRUE(check);
6040 }
6041
TEST_F(VulkanAPITest,cat_4d_dim1_texture2d_success)6042 TEST_F(VulkanAPITest, cat_4d_dim1_texture2d_success) {
6043 // Arrange: 2D Texture (VK_IMAGE_VIEW_TYPE_2D)
6044 const auto in_cpu1 = at::rand({2, 3, 2, 2}, at::device(at::kCPU).dtype(at::kFloat));
6045 const auto in_cpu2 = at::rand({2, 3, 2, 2}, at::device(at::kCPU).dtype(at::kFloat));
6046 const auto in_cpu3 = at::rand({2, 3, 2, 2}, at::device(at::kCPU).dtype(at::kFloat));
6047
6048 // Act
6049 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 1);
6050 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 1); // dim=feature(channel)
6051
6052 // Assert
6053 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6054 if (!check) {
6055 showRtol(out_cpu, out_vulkan.cpu());
6056 }
6057
6058 ASSERT_TRUE(check);
6059 }
6060 #endif /* !defined(__APPLE__) */
6061
TEST_F(VulkanAPITest,cat_4d_dim1_singledepth_success)6062 TEST_F(VulkanAPITest, cat_4d_dim1_singledepth_success) {
6063 // Arrange: batch x channel (1x1) = single depth texture
6064 const auto in_cpu1 = at::rand({1, 1, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6065 const auto in_cpu2 = at::rand({1, 1, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6066 const auto in_cpu3 = at::rand({1, 1, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6067
6068 // Act
6069 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 1);
6070 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 1); // dim=feature(channel)
6071
6072 // Assert
6073 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6074 if (!check) {
6075 showRtol(out_cpu, out_vulkan.cpu());
6076 }
6077
6078 ASSERT_TRUE(check);
6079 }
6080
TEST_F(VulkanAPITest,cat_4d_dim1_singletensor_success)6081 TEST_F(VulkanAPITest, cat_4d_dim1_singletensor_success) {
6082 // Arrange: single input tensor
6083 const auto in_cpu1 = at::rand({3, 7, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6084
6085 // Act
6086 const auto out_cpu = at::cat({in_cpu1}, 1);
6087 const auto out_vulkan = at::cat({in_cpu1}, 1); // dim=feature(channel)
6088
6089 // Assert
6090 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6091 if (!check) {
6092 showRtol(out_cpu, out_vulkan.cpu());
6093 }
6094
6095 ASSERT_TRUE(check);
6096 }
6097
TEST_F(VulkanAPITest,DISABLED_cat_4d_dim1_twotensors_success)6098 TEST_F(VulkanAPITest, DISABLED_cat_4d_dim1_twotensors_success) {
6099 // Arrange: two input tensors
6100 const auto in_cpu1 = at::rand({3, 7, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6101 const auto in_cpu2 = at::rand({3, 7, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6102
6103 // Act
6104 const auto out_cpu = at::cat({in_cpu1, in_cpu2}, 1);
6105 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan()}, 1); // dim=feature(channel)
6106
6107 // Assert
6108 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6109 if (!check) {
6110 showRtol(out_cpu, out_vulkan.cpu());
6111 }
6112
6113 ASSERT_TRUE(check);
6114 }
6115
TEST_F(VulkanAPITest,cat_4d_dim1_bat1_mult4ch_success)6116 TEST_F(VulkanAPITest, cat_4d_dim1_bat1_mult4ch_success) {
6117 // Arrange: batch=1 and channel (a multiple of 4 <-> channel %4 == 0)
6118 const auto in_cpu1 = at::rand({1, 4, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6119 const auto in_cpu2 = at::rand({1, 4, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6120 const auto in_cpu3 = at::rand({1, 4, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6121
6122 // Act
6123 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 1);
6124 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 1); // dim=feature(channel)
6125
6126 // Assert
6127 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6128 if (!check) {
6129 showRtol(out_cpu, out_vulkan.cpu());
6130 }
6131
6132 ASSERT_TRUE(check);
6133 }
6134
TEST_F(VulkanAPITest,cat_4d_dim1_bat2_mult4ch_success)6135 TEST_F(VulkanAPITest, cat_4d_dim1_bat2_mult4ch_success) {
6136 // Arrange: batch=2 and channel (a multiple of 4 <-> channel %4 == 0)
6137 const auto in_cpu1 = at::rand({2, 4, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6138 const auto in_cpu2 = at::rand({2, 4, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6139 const auto in_cpu3 = at::rand({2, 4, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6140
6141 // Act
6142 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 1);
6143 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 1); // dim=feature(channel)
6144
6145 // Assert
6146 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6147 if (!check) {
6148 showRtol(out_cpu, out_vulkan.cpu());
6149 }
6150
6151 ASSERT_TRUE(check);
6152 }
6153
TEST_F(VulkanAPITest,cat_4d_dim1_mult4ch_mixed_success)6154 TEST_F(VulkanAPITest, cat_4d_dim1_mult4ch_mixed_success) {
6155 // Arrange: batch=1 and channel (different multiples of 4 <-> channel %4 == 0)
6156 const auto in_cpu1 = at::rand({3, 4, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6157 const auto in_cpu2 = at::rand({3, 8, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6158 const auto in_cpu3 = at::rand({3, 12, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6159
6160 // Act
6161 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 1);
6162 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 1); // dim=feature(channel)
6163
6164 // Assert
6165 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6166 if (!check) {
6167 showRtol(out_cpu, out_vulkan.cpu());
6168 }
6169
6170 ASSERT_TRUE(check);
6171 }
6172
TEST_F(VulkanAPITest,DISABLED_cat_4d_dim1_mult4ch_nonmult4ch_success)6173 TEST_F(VulkanAPITest, DISABLED_cat_4d_dim1_mult4ch_nonmult4ch_success) {
6174 // Arrange: batch=1 and channel (a mixed set of multiples and non-multiples of 4)
6175 const auto in_cpu1 = at::rand({3, 3, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6176 const auto in_cpu2 = at::rand({3, 4, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6177 const auto in_cpu3 = at::rand({3, 7, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6178 const auto in_cpu4 = at::rand({3, 8, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6179
6180 // Act
6181 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3, in_cpu4}, 1);
6182 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan(), in_cpu4.vulkan()}, 1); // dim=feature(channel)
6183
6184 // Assert
6185 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6186 if (!check) {
6187 showRtol(out_cpu, out_vulkan.cpu());
6188 }
6189
6190 ASSERT_TRUE(check);
6191 }
6192
TEST_F(VulkanAPITest,cat_4d_dim2_sameheight_success)6193 TEST_F(VulkanAPITest, cat_4d_dim2_sameheight_success) {
6194 // Arrange
6195 const auto in_cpu1 = at::rand({3, 9, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6196 const auto in_cpu2 = at::rand({3, 9, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6197 const auto in_cpu3 = at::rand({3, 9, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6198
6199 // Act
6200 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 2);
6201 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 2);
6202
6203 // Assert
6204 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6205 if (!check) {
6206 showRtol(out_cpu, out_vulkan.cpu());
6207 }
6208
6209 ASSERT_TRUE(check);
6210 }
6211
TEST_F(VulkanAPITest,cat_4d_dim2_diffheight_success)6212 TEST_F(VulkanAPITest, cat_4d_dim2_diffheight_success) {
6213 // Arrange
6214 const auto in_cpu1 = at::rand({3, 9, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6215 const auto in_cpu2 = at::rand({3, 9, 112, 193}, at::device(at::kCPU).dtype(at::kFloat));
6216 const auto in_cpu3 = at::rand({3, 9, 331, 193}, at::device(at::kCPU).dtype(at::kFloat));
6217
6218 // Act
6219 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 2);
6220 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 2);
6221
6222 // Assert
6223 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6224 if (!check) {
6225 showRtol(out_cpu, out_vulkan.cpu());
6226 }
6227
6228 ASSERT_TRUE(check);
6229 }
6230
TEST_F(VulkanAPITest,cat_4d_dim2_singledepth_success)6231 TEST_F(VulkanAPITest, cat_4d_dim2_singledepth_success) {
6232 // Arrange: batch x channel (1x1) = single depth texture
6233 const auto in_cpu1 = at::rand({1, 1, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6234 const auto in_cpu2 = at::rand({1, 1, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6235 const auto in_cpu3 = at::rand({1, 1, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6236
6237 // Act
6238 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 2);
6239 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 2);
6240
6241 // Assert
6242 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6243 if (!check) {
6244 showRtol(out_cpu, out_vulkan.cpu());
6245 }
6246
6247 ASSERT_TRUE(check);
6248 }
6249
TEST_F(VulkanAPITest,cat_4d_dim2_invalidinputs_exceptions)6250 TEST_F(VulkanAPITest, cat_4d_dim2_invalidinputs_exceptions) {
6251 // Arrange: Vulkan cat inputs must have matching sizes except concatenated dimension
6252 {
6253 const auto in_cpu1 = at::rand({3, 5, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6254 const auto in_cpu2 = at::rand({3, 9, 112, 193}, at::device(at::kCPU).dtype(at::kFloat));
6255 const auto in_cpu3 = at::rand({3, 9, 331, 193}, at::device(at::kCPU).dtype(at::kFloat));
6256
6257 // Act
6258 EXPECT_THROW({
6259 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 2);
6260 }, ::std::exception);
6261 }
6262
6263 // Arrange: Vulkan cat expects inputs of same dimensions
6264 {
6265 const auto in_cpu1 = at::rand({3, 9, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6266 const auto in_cpu2 = at::rand({9, 112, 193}, at::device(at::kCPU).dtype(at::kFloat));
6267 const auto in_cpu3 = at::rand({3, 9, 331, 193}, at::device(at::kCPU).dtype(at::kFloat));
6268
6269 // Act
6270 EXPECT_THROW({
6271 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 2);
6272 }, ::std::exception);
6273 }
6274 }
6275
TEST_F(VulkanAPITest,cat_4d_dim3_invalidinputs_exceptions)6276 TEST_F(VulkanAPITest, cat_4d_dim3_invalidinputs_exceptions) {
6277 // Arrange: Vulkan cat inputs must have matching sizes except concatenated dimension
6278 {
6279 const auto in_cpu1 = at::rand({3, 5, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6280 const auto in_cpu2 = at::rand({3, 9, 112, 193}, at::device(at::kCPU).dtype(at::kFloat));
6281 const auto in_cpu3 = at::rand({3, 9, 331, 193}, at::device(at::kCPU).dtype(at::kFloat));
6282
6283 // Act
6284 EXPECT_THROW({
6285 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 3);
6286 }, ::std::exception);
6287 }
6288
6289 // Arrange: Vulkan cat expects 4 dimensional inputs
6290 {
6291 const auto in_cpu1 = at::rand({3, 9, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6292 const auto in_cpu2 = at::rand({9, 112, 193}, at::device(at::kCPU).dtype(at::kFloat));
6293 const auto in_cpu3 = at::rand({3, 9, 331, 193}, at::device(at::kCPU).dtype(at::kFloat));
6294
6295 // Act
6296 EXPECT_THROW({
6297 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 3);
6298 }, ::std::exception);
6299 }
6300 }
6301
TEST_F(VulkanAPITest,cat_4d_dim3_samewidth_success)6302 TEST_F(VulkanAPITest, cat_4d_dim3_samewidth_success) {
6303 // Arrange
6304 const auto in_cpu1 = at::rand({3, 9, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6305 const auto in_cpu2 = at::rand({3, 9, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6306 const auto in_cpu3 = at::rand({3, 9, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6307
6308 // Act
6309 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 3);
6310 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 3);
6311
6312 // Assert
6313 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6314 if (!check) {
6315 showRtol(out_cpu, out_vulkan.cpu());
6316 }
6317
6318 ASSERT_TRUE(check);
6319 }
6320
TEST_F(VulkanAPITest,cat_4d_dim3_diffwidth_success)6321 TEST_F(VulkanAPITest, cat_4d_dim3_diffwidth_success) {
6322 // Arrange
6323 const auto in_cpu1 = at::rand({3, 9, 193, 221}, at::device(at::kCPU).dtype(at::kFloat));
6324 const auto in_cpu2 = at::rand({3, 9, 193, 113}, at::device(at::kCPU).dtype(at::kFloat));
6325 const auto in_cpu3 = at::rand({3, 9, 193, 331}, at::device(at::kCPU).dtype(at::kFloat));
6326
6327 // Act
6328 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 3);
6329 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 3);
6330
6331 // Assert
6332 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6333 if (!check) {
6334 showRtol(out_cpu, out_vulkan.cpu());
6335 }
6336
6337 ASSERT_TRUE(check);
6338 }
6339
TEST_F(VulkanAPITest,cat_3d_dim0_mult4ch_success)6340 TEST_F(VulkanAPITest, cat_3d_dim0_mult4ch_success) {
6341 // Arrange
6342 const auto in_cpu1 = at::rand({4, 193, 113}, at::device(at::kCPU).dtype(at::kFloat));
6343 const auto in_cpu2 = at::rand({4, 193, 113}, at::device(at::kCPU).dtype(at::kFloat));
6344 const auto in_cpu3 = at::rand({4, 193, 113}, at::device(at::kCPU).dtype(at::kFloat));
6345
6346 // Act
6347 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 0);
6348 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 0);
6349
6350 // Assert
6351 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6352 if (!check) {
6353 showRtol(out_cpu, out_vulkan.cpu());
6354 }
6355
6356 ASSERT_TRUE(check);
6357 }
6358
TEST_F(VulkanAPITest,cat_3d_dim0_diff_channel_success)6359 TEST_F(VulkanAPITest, cat_3d_dim0_diff_channel_success) {
6360 // Arrange
6361 const auto in_cpu1 = at::rand({221, 9, 193}, at::device(at::kCPU).dtype(at::kFloat));
6362 const auto in_cpu2 = at::rand({113, 9, 193}, at::device(at::kCPU).dtype(at::kFloat));
6363 const auto in_cpu3 = at::rand({331, 9, 193}, at::device(at::kCPU).dtype(at::kFloat));
6364
6365 // Act
6366 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 0);
6367 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 0);
6368
6369 // Assert
6370 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6371 if (!check) {
6372 showRtol(out_cpu, out_vulkan.cpu());
6373 }
6374
6375 ASSERT_TRUE(check);
6376 }
6377
TEST_F(VulkanAPITest,cat_3d_dim0_same_channel_success)6378 TEST_F(VulkanAPITest, cat_3d_dim0_same_channel_success) {
6379 // Arrange
6380 const auto in_cpu1 = at::rand({9, 193, 113}, at::device(at::kCPU).dtype(at::kFloat));
6381 const auto in_cpu2 = at::rand({9, 193, 113}, at::device(at::kCPU).dtype(at::kFloat));
6382 const auto in_cpu3 = at::rand({9, 193, 113}, at::device(at::kCPU).dtype(at::kFloat));
6383
6384 // Act
6385 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 0);
6386 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 0);
6387
6388 // Assert
6389 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6390 if (!check) {
6391 showRtol(out_cpu, out_vulkan.cpu());
6392 }
6393
6394 ASSERT_TRUE(check);
6395 }
6396
TEST_F(VulkanAPITest,cat_3d_dim1_diffheight_success)6397 TEST_F(VulkanAPITest, cat_3d_dim1_diffheight_success) {
6398 // Arrange
6399 const auto in_cpu1 = at::rand({9, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6400 const auto in_cpu2 = at::rand({9, 113, 193}, at::device(at::kCPU).dtype(at::kFloat));
6401 const auto in_cpu3 = at::rand({9, 331, 193}, at::device(at::kCPU).dtype(at::kFloat));
6402
6403 // Act
6404 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 1);
6405 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 1);
6406
6407 // Assert
6408 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6409 if (!check) {
6410 showRtol(out_cpu, out_vulkan.cpu());
6411 }
6412
6413 ASSERT_TRUE(check);
6414 }
6415
TEST_F(VulkanAPITest,cat_3d_dim1_same_height_success)6416 TEST_F(VulkanAPITest, cat_3d_dim1_same_height_success) {
6417 // Arrange
6418 const auto in_cpu1 = at::rand({9, 193, 113}, at::device(at::kCPU).dtype(at::kFloat));
6419 const auto in_cpu2 = at::rand({9, 193, 113}, at::device(at::kCPU).dtype(at::kFloat));
6420 const auto in_cpu3 = at::rand({9, 193, 113}, at::device(at::kCPU).dtype(at::kFloat));
6421
6422 // Act
6423 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 1);
6424 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 1);
6425
6426 // Assert
6427 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6428 if (!check) {
6429 showRtol(out_cpu, out_vulkan.cpu());
6430 }
6431
6432 ASSERT_TRUE(check);
6433 }
6434
TEST_F(VulkanAPITest,cat_3d_dim2_diffwidth_success)6435 TEST_F(VulkanAPITest, cat_3d_dim2_diffwidth_success) {
6436 // Arrange
6437 const auto in_cpu1 = at::rand({9, 193, 221}, at::device(at::kCPU).dtype(at::kFloat));
6438 const auto in_cpu2 = at::rand({9, 193, 113}, at::device(at::kCPU).dtype(at::kFloat));
6439 const auto in_cpu3 = at::rand({9, 193, 331}, at::device(at::kCPU).dtype(at::kFloat));
6440
6441 // Act
6442 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 2);
6443 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 2);
6444
6445 // Assert
6446 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6447 if (!check) {
6448 showRtol(out_cpu, out_vulkan.cpu());
6449 }
6450
6451 ASSERT_TRUE(check);
6452 }
6453
TEST_F(VulkanAPITest,cat_3d_dim2_samewidth_success)6454 TEST_F(VulkanAPITest, cat_3d_dim2_samewidth_success) {
6455 // Arrange
6456 const auto in_cpu1 = at::rand({9, 193, 113}, at::device(at::kCPU).dtype(at::kFloat));
6457 const auto in_cpu2 = at::rand({9, 193, 113}, at::device(at::kCPU).dtype(at::kFloat));
6458 const auto in_cpu3 = at::rand({9, 193, 113}, at::device(at::kCPU).dtype(at::kFloat));
6459
6460 // Act
6461 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 2);
6462 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 2);
6463
6464 // Assert
6465 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6466 if (!check) {
6467 showRtol(out_cpu, out_vulkan.cpu());
6468 }
6469
6470 ASSERT_TRUE(check);
6471 }
6472
TEST_F(VulkanAPITest,cat_3d_dim0_negdim_success)6473 TEST_F(VulkanAPITest, cat_3d_dim0_negdim_success) {
6474 // Arrange
6475 const auto in_cpu1 = at::rand({221, 9, 193}, at::device(at::kCPU).dtype(at::kFloat));
6476 const auto in_cpu2 = at::rand({113, 9, 193}, at::device(at::kCPU).dtype(at::kFloat));
6477 const auto in_cpu3 = at::rand({331, 9, 193}, at::device(at::kCPU).dtype(at::kFloat));
6478
6479 // Act
6480 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, -3);
6481 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, -3);
6482
6483 // Assert
6484 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6485 if (!check) {
6486 showRtol(out_cpu, out_vulkan.cpu());
6487 }
6488
6489 ASSERT_TRUE(check);
6490 }
6491
TEST_F(VulkanAPITest,cat_3d_dim1_negdim_success)6492 TEST_F(VulkanAPITest, cat_3d_dim1_negdim_success) {
6493 // Arrange
6494 const auto in_cpu1 = at::rand({9, 221, 193}, at::device(at::kCPU).dtype(at::kFloat));
6495 const auto in_cpu2 = at::rand({9, 113, 193}, at::device(at::kCPU).dtype(at::kFloat));
6496 const auto in_cpu3 = at::rand({9, 331, 193}, at::device(at::kCPU).dtype(at::kFloat));
6497
6498 // Act
6499 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, -2);
6500 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, -2);
6501
6502 // Assert
6503 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6504 if (!check) {
6505 showRtol(out_cpu, out_vulkan.cpu());
6506 }
6507
6508 ASSERT_TRUE(check);
6509 }
6510
TEST_F(VulkanAPITest,cat_3d_dim2_negdim_success)6511 TEST_F(VulkanAPITest, cat_3d_dim2_negdim_success) {
6512 // Arrange
6513 const auto in_cpu1 = at::rand({193, 13, 89}, at::device(at::kCPU).dtype(at::kFloat));
6514 const auto in_cpu2 = at::rand({193, 13, 59}, at::device(at::kCPU).dtype(at::kFloat));
6515 const auto in_cpu3 = at::rand({193, 13, 67}, at::device(at::kCPU).dtype(at::kFloat));
6516
6517 // Act
6518 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, -1);
6519 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, -1);
6520
6521 // Assert
6522 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6523 if (!check) {
6524 showRtol(out_cpu, out_vulkan.cpu());
6525 }
6526
6527 ASSERT_TRUE(check);
6528 }
6529
TEST_F(VulkanAPITest,cat_2d_dim0_same_height_success)6530 TEST_F(VulkanAPITest, cat_2d_dim0_same_height_success) {
6531 // Arrange
6532 const auto in_cpu1 = at::rand({193, 113}, at::device(at::kCPU).dtype(at::kFloat));
6533 const auto in_cpu2 = at::rand({193, 113}, at::device(at::kCPU).dtype(at::kFloat));
6534 const auto in_cpu3 = at::rand({193, 113}, at::device(at::kCPU).dtype(at::kFloat));
6535
6536 // Act
6537 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 0);
6538 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 0);
6539
6540 // Assert
6541 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6542 if (!check) {
6543 showRtol(out_cpu, out_vulkan.cpu());
6544 }
6545
6546 ASSERT_TRUE(check);
6547 }
6548
TEST_F(VulkanAPITest,cat_2d_dim0_diff_height_success)6549 TEST_F(VulkanAPITest, cat_2d_dim0_diff_height_success) {
6550 // Arrange
6551 const auto in_cpu1 = at::rand({193, 113}, at::device(at::kCPU).dtype(at::kFloat));
6552 const auto in_cpu2 = at::rand({191, 113}, at::device(at::kCPU).dtype(at::kFloat));
6553 const auto in_cpu3 = at::rand({137, 113}, at::device(at::kCPU).dtype(at::kFloat));
6554
6555 // Act
6556 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 0);
6557 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 0);
6558
6559 // Assert
6560 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6561 if (!check) {
6562 showRtol(out_cpu, out_vulkan.cpu());
6563 }
6564
6565 ASSERT_TRUE(check);
6566 }
6567
TEST_F(VulkanAPITest,cat_2d_dim1_same_width_success)6568 TEST_F(VulkanAPITest, cat_2d_dim1_same_width_success) {
6569 // Arrange
6570 const auto in_cpu1 = at::rand({193, 113}, at::device(at::kCPU).dtype(at::kFloat));
6571 const auto in_cpu2 = at::rand({193, 113}, at::device(at::kCPU).dtype(at::kFloat));
6572 const auto in_cpu3 = at::rand({193, 113}, at::device(at::kCPU).dtype(at::kFloat));
6573
6574 // Act
6575 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 1);
6576 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 1);
6577
6578 // Assert
6579 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6580 if (!check) {
6581 showRtol(out_cpu, out_vulkan.cpu());
6582 }
6583
6584 ASSERT_TRUE(check);
6585 }
6586
TEST_F(VulkanAPITest,cat_2d_dim1_diff_width_success)6587 TEST_F(VulkanAPITest, cat_2d_dim1_diff_width_success) {
6588 // Arrange
6589 const auto in_cpu1 = at::rand({193, 113}, at::device(at::kCPU).dtype(at::kFloat));
6590 const auto in_cpu2 = at::rand({193, 131}, at::device(at::kCPU).dtype(at::kFloat));
6591 const auto in_cpu3 = at::rand({193, 127}, at::device(at::kCPU).dtype(at::kFloat));
6592
6593 // Act
6594 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 1);
6595 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 1);
6596
6597 // Assert
6598 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6599 if (!check) {
6600 showRtol(out_cpu, out_vulkan.cpu());
6601 }
6602
6603 ASSERT_TRUE(check);
6604 }
6605
TEST_F(VulkanAPITest,cat_2d_dim0_negdim_success)6606 TEST_F(VulkanAPITest, cat_2d_dim0_negdim_success) {
6607 // Arrange
6608 const auto in_cpu1 = at::rand({113, 193}, at::device(at::kCPU).dtype(at::kFloat));
6609 const auto in_cpu2 = at::rand({131, 193}, at::device(at::kCPU).dtype(at::kFloat));
6610 const auto in_cpu3 = at::rand({127, 193}, at::device(at::kCPU).dtype(at::kFloat));
6611
6612 // Act
6613 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, -2);
6614 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, -2);
6615
6616 // Assert
6617 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6618 if (!check) {
6619 showRtol(out_cpu, out_vulkan.cpu());
6620 }
6621
6622 ASSERT_TRUE(check);
6623 }
6624
TEST_F(VulkanAPITest,cat_2d_dim1_negdim_success)6625 TEST_F(VulkanAPITest, cat_2d_dim1_negdim_success) {
6626 // Arrange
6627 const auto in_cpu1 = at::rand({193, 113}, at::device(at::kCPU).dtype(at::kFloat));
6628 const auto in_cpu2 = at::rand({193, 131}, at::device(at::kCPU).dtype(at::kFloat));
6629 const auto in_cpu3 = at::rand({193, 127}, at::device(at::kCPU).dtype(at::kFloat));
6630
6631 // Act
6632 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, -1);
6633 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, -1);
6634
6635 // Assert
6636 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6637 if (!check) {
6638 showRtol(out_cpu, out_vulkan.cpu());
6639 }
6640
6641 ASSERT_TRUE(check);
6642 }
6643
TEST_F(VulkanAPITest,cat_1d_dim0_same_width_success)6644 TEST_F(VulkanAPITest, cat_1d_dim0_same_width_success) {
6645 // Arrange
6646 const auto in_cpu1 = at::rand({193}, at::device(at::kCPU).dtype(at::kFloat));
6647 const auto in_cpu2 = at::rand({193}, at::device(at::kCPU).dtype(at::kFloat));
6648 const auto in_cpu3 = at::rand({193}, at::device(at::kCPU).dtype(at::kFloat));
6649
6650 // Act
6651 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 0);
6652 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 0);
6653
6654 // Assert
6655 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6656 if (!check) {
6657 showRtol(out_cpu, out_vulkan.cpu());
6658 }
6659
6660 ASSERT_TRUE(check);
6661 }
6662
TEST_F(VulkanAPITest,cat_1d_dim0_diff_width_success)6663 TEST_F(VulkanAPITest, cat_1d_dim0_diff_width_success) {
6664 // Arrange
6665 const auto in_cpu1 = at::rand({193}, at::device(at::kCPU).dtype(at::kFloat));
6666 const auto in_cpu2 = at::rand({137}, at::device(at::kCPU).dtype(at::kFloat));
6667 const auto in_cpu3 = at::rand({131}, at::device(at::kCPU).dtype(at::kFloat));
6668
6669 // Act
6670 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, 0);
6671 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, 0);
6672
6673 // Assert
6674 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6675 if (!check) {
6676 showRtol(out_cpu, out_vulkan.cpu());
6677 }
6678
6679 ASSERT_TRUE(check);
6680 }
6681
TEST_F(VulkanAPITest,cat_1d_dim0_negdim_success)6682 TEST_F(VulkanAPITest, cat_1d_dim0_negdim_success) {
6683 // Arrange
6684 const auto in_cpu1 = at::rand({193}, at::device(at::kCPU).dtype(at::kFloat));
6685 const auto in_cpu2 = at::rand({137}, at::device(at::kCPU).dtype(at::kFloat));
6686 const auto in_cpu3 = at::rand({131}, at::device(at::kCPU).dtype(at::kFloat));
6687
6688 // Act
6689 const auto out_cpu = at::cat({in_cpu1, in_cpu2, in_cpu3}, -1);
6690 const auto out_vulkan = at::cat({in_cpu1.vulkan(), in_cpu2.vulkan(), in_cpu3.vulkan()}, -1);
6691
6692 // Assert
6693 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6694 if (!check) {
6695 showRtol(out_cpu, out_vulkan.cpu());
6696 }
6697
6698 ASSERT_TRUE(check);
6699 }
6700
TEST_F(VulkanAPITest,permute_2d_success)6701 TEST_F(VulkanAPITest, permute_2d_success) {
6702 // Arrange
6703 const auto in_cpu = at::rand({2, 3}, at::device(at::kCPU).dtype(at::kFloat));
6704
6705 // Act
6706 const auto out_cpu = at::permute(in_cpu, {1, 0});
6707 const auto out_vulkan = at::permute(in_cpu.vulkan(), {1, 0});
6708
6709 // Assert
6710 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6711 if (!check) {
6712 showRtol(out_cpu, out_vulkan.cpu());
6713 }
6714
6715 ASSERT_TRUE(check);
6716 }
6717
TEST_F(VulkanAPITest,permute_3d_success)6718 TEST_F(VulkanAPITest, permute_3d_success) {
6719 // Arrange
6720 const auto in_cpu = at::rand({2, 3, 2}, at::device(at::kCPU).dtype(at::kFloat));
6721 std::vector<std::vector<int64_t>> all_dims;
6722 std::vector<int64_t> in{0, 1, 2};
6723 gen_allpermutations(all_dims, in, 0);
6724
6725 for (const auto i : c10::irange(1, all_dims.size())) {
6726 const auto dims = all_dims[i];
6727
6728 // Act
6729 const auto out_cpu = at::permute(in_cpu, dims);
6730 const auto out_vulkan = at::permute(in_cpu.vulkan(), dims);
6731
6732 // Assert
6733 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6734 if (!check) {
6735 showRtol(out_cpu, out_vulkan.cpu());
6736 }
6737
6738 ASSERT_TRUE(check);
6739 }
6740 }
6741
TEST_F(VulkanAPITest,permute_4d_success)6742 TEST_F(VulkanAPITest, permute_4d_success) {
6743 // Arrange
6744 const auto in_cpu = at::rand({2, 3, 4, 5}, at::device(at::kCPU).dtype(at::kFloat));
6745 std::vector<std::vector<int64_t>> all_dims;
6746 std::vector<int64_t> in{0, 1, 2, 3};
6747 gen_allpermutations(all_dims, in, 0);
6748
6749 for (const auto i : c10::irange(1, all_dims.size())) {
6750 const auto dims = all_dims[i];
6751
6752 // Act
6753 const auto out_cpu = at::permute(in_cpu, dims);
6754 const auto out_vulkan = at::permute(in_cpu.vulkan(), dims);
6755
6756 // Assert
6757 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6758 if (!check) {
6759 showRtol(out_cpu, out_vulkan.cpu());
6760 }
6761
6762 ASSERT_TRUE(check);
6763 }
6764 }
6765
TEST_F(VulkanAPITest,permute_4dmclaren_success)6766 TEST_F(VulkanAPITest, permute_4dmclaren_success) {
6767 // Arrange: McLaren Model usage
6768 const auto in_cpu = at::rand({1, 2, 1, 161}, at::device(at::kCPU).dtype(at::kFloat));
6769
6770 // Act
6771 const auto out_cpu = at::permute(in_cpu, {0, 2, 1, 3});
6772 const auto out_vulkan = at::permute(in_cpu.vulkan(), {0, 2, 1, 3});
6773
6774 // Assert
6775 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6776 if (!check) {
6777 showRtol(out_cpu, out_vulkan.cpu());
6778 }
6779
6780 ASSERT_TRUE(check);
6781 }
6782
TEST_F(VulkanAPITest,permute_4dbig_success)6783 TEST_F(VulkanAPITest, permute_4dbig_success) {
6784 // Arrange
6785 const auto in_cpu = at::rand({3, 9, 51, 41}, at::device(at::kCPU).dtype(at::kFloat));
6786 std::vector<std::vector<int64_t>> all_dims;
6787 std::vector<int64_t> in{0, 1, 2, 3};
6788 gen_allpermutations(all_dims, in, 0);
6789
6790 for (const auto i : c10::irange(1, all_dims.size())) {
6791 const auto dims = all_dims[i];
6792 // Act
6793 const auto out_cpu = at::permute(in_cpu, dims);
6794 const auto out_vulkan = at::permute(in_cpu.vulkan(), dims);
6795
6796 // Assert
6797 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6798 if (!check) {
6799 showRtol(out_cpu, out_vulkan.cpu());
6800 }
6801
6802 ASSERT_TRUE(check);
6803 }
6804 }
6805
TEST_F(VulkanAPITest,permute_negativedims_success)6806 TEST_F(VulkanAPITest, permute_negativedims_success) {
6807 // Arrange
6808 const auto in_cpu = at::rand({5, 4, 3, 2}, at::device(at::kCPU).dtype(at::kFloat));
6809
6810 // Act: {-1,-2,-3,0} is equivalent to {3,2,1,0}
6811 const auto out_cpu = at::permute(in_cpu, {-1, -2, -3, 0});
6812 const auto out_vulkan = at::permute(in_cpu.vulkan(), {-1, -2, -3, 0});
6813
6814 // Assert
6815 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6816 if (!check) {
6817 showRtol(out_cpu, out_vulkan.cpu());
6818 }
6819
6820 ASSERT_TRUE(check);
6821 }
6822
TEST_F(VulkanAPITest,permute_invalidinputs_exceptions)6823 TEST_F(VulkanAPITest, permute_invalidinputs_exceptions) {
6824 // Arrange
6825 const auto in_cpu = at::rand({1, 2, 1, 161}, at::device(at::kCPU).dtype(at::kFloat));
6826
6827 // Act: Repeated dim
6828 EXPECT_THROW({
6829 const auto out_vulkan = at::permute(in_cpu.vulkan(), {2, 2, 1, 0});
6830 }, ::std::exception);
6831
6832 EXPECT_THROW({
6833 const auto out_vulkan = in_cpu.vulkan();
6834 out_vulkan.permute({2, 2, 1, 0});
6835 }, ::std::exception);
6836
6837 // Act: Number of dims don't match
6838 EXPECT_THROW({
6839 const auto out_vulkan = at::permute(in_cpu.vulkan(), {4, 3, 2, 1, 0});
6840 }, ::std::exception);
6841
6842 EXPECT_THROW({
6843 const auto out_vulkan = at::permute(in_cpu.vulkan(), {2, 1, 0});
6844 }, ::std::exception);
6845
6846 EXPECT_THROW({
6847 const auto out_vulkan = in_cpu.vulkan();
6848 out_vulkan.permute({4, 3, 2, 1, 0});
6849 }, ::std::exception);
6850
6851 EXPECT_THROW({
6852 const auto out_vulkan = in_cpu.vulkan();
6853 out_vulkan.permute({2, 1, 0});
6854 }, ::std::exception);
6855
6856 // Act: Dim out of range
6857 EXPECT_THROW({
6858 const auto out_vulkan = at::permute(in_cpu.vulkan(), {5, 2, 1, 0});
6859 }, ::std::exception);
6860
6861 EXPECT_THROW({
6862 const auto out_vulkan = in_cpu.vulkan();
6863 out_vulkan.permute({5, 2, 1, 0});
6864 }, ::std::exception);
6865
6866 // Act: Input tensor size > 4D
6867 const auto in_cpu_5d = at::rand({1, 2, 1, 2, 161}, at::device(at::kCPU).dtype(at::kFloat));
6868 EXPECT_THROW({
6869 const auto out_vulkan_5d = at::permute(in_cpu_5d.vulkan(), {4, 3, 2, 1, 0});
6870 }, ::std::exception);
6871
6872 EXPECT_THROW({
6873 const auto out_vulkan_5d = in_cpu_5d.vulkan();
6874 out_vulkan_5d.permute({4, 3, 2, 1, 0});
6875 }, ::std::exception);
6876 }
6877
TEST_F(VulkanAPITest,slice_width_success)6878 TEST_F(VulkanAPITest, slice_width_success) {
6879 // Arrange
6880 std::unordered_map<int64_t, std::vector<int64_t>> dim2sizes {
6881 {3, {2, 3, 40, 50}}, // 4D tensors with dim=width
6882 {2, {3, 40, 50}}, // 3D tensors with dim=width
6883 {1, {40, 50}}, // 2D tensors with dim=width
6884 {0, {50}}, // 1D tensors with dim=width
6885 };
6886
6887 // Act/Assert
6888 slice_tests(dim2sizes);
6889 }
6890
TEST_F(VulkanAPITest,slice_height_success)6891 TEST_F(VulkanAPITest, slice_height_success) {
6892 // Arrange
6893 std::unordered_map<int64_t, std::vector<int64_t>> dim2sizes {
6894 {2, {2, 3, 40, 50}}, // 4D tensors with dim=height
6895 {1, {3, 40, 50}}, // 3D tensors with dim=height
6896 {0, {40, 50}}, // 2D tensors with dim=height
6897 // 1D tesnors don't have height dim for test
6898 };
6899
6900 // Act/Assert
6901 slice_tests(dim2sizes);
6902 }
6903
TEST_F(VulkanAPITest,slice_feature_success)6904 TEST_F(VulkanAPITest, slice_feature_success) {
6905 // Arrange
6906 std::unordered_map<int64_t, std::vector<int64_t>> dim2sizes {
6907 {1, {2, 40, 13, 14}}, // 4D tensors with dim=feature(channel)
6908 {0, {40, 13, 14}}, // 3D tensors with dim=feature(channel)
6909 // 1D and 2D tesnors don't have feature(channel) dim for test
6910 };
6911
6912 // Act/Assert
6913 slice_tests(dim2sizes);
6914 }
6915
TEST_F(VulkanAPITest,slice_batch_success)6916 TEST_F(VulkanAPITest, slice_batch_success) {
6917 // Arrange
6918 std::unordered_map<int64_t, std::vector<int64_t>> dim2sizes {
6919 {0, {40, 3, 13, 14}}, // 4D tensors with dim=batch
6920 // 1D, 2D and 3D tesnors don't have batch dim for test
6921 };
6922
6923 // Act/Assert
6924 slice_tests(dim2sizes);
6925 }
6926
TEST_F(VulkanAPITest,slice_zero_sized)6927 TEST_F(VulkanAPITest, slice_zero_sized) {
6928 // When start == end
6929 slice_test({2, 3, 4, 5}, 3, 0, 0, 1);
6930 // When start > end
6931 slice_test({2, 3, 4, 5}, 3, 3, 2, 1);
6932 }
6933
TEST_F(VulkanAPITest,slice_invalidinputs_exceptions)6934 TEST_F(VulkanAPITest, slice_invalidinputs_exceptions) {
6935 // Act: slice step must be positive
6936 EXPECT_THROW({
6937 slice_test({2, 3, 4, 5}, 3, 0, 3, 0);
6938 }, ::std::exception);
6939 }
6940
TEST_F(VulkanAPITest,stack_invalid_inputs)6941 TEST_F(VulkanAPITest, stack_invalid_inputs) {
6942 // Act: Vulkan stack expects at least one tensor
6943 EXPECT_THROW({
6944 at::stack({}, 0);
6945 }, ::std::exception);
6946
6947 // Act: Vulkan stack inputs must have matching sizes
6948 EXPECT_THROW({
6949 at::stack({
6950 at::rand({5, 7}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
6951 at::rand({5, 7}, at::device(at::kCPU).dtype(at::kFloat)).vulkan(),
6952 at::rand({6, 7}, at::device(at::kCPU).dtype(at::kFloat)).vulkan()}, 0);
6953 }, ::std::exception);
6954 }
6955
test_stack(const at::IntArrayRef input_shape,int64_t dim,int numTensors)6956 void test_stack(const at::IntArrayRef input_shape, int64_t dim, int numTensors) {
6957 std::vector<at::Tensor> tensors_cpu = {};
6958 std::vector<at::Tensor> tensors_vulkan = {};
6959
6960 for (int i = 0; i < numTensors; i++) {
6961 at::Tensor in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
6962 tensors_cpu.emplace_back(in_cpu);
6963 tensors_vulkan.emplace_back(in_cpu.vulkan());
6964 }
6965
6966 at::Tensor out_cpu = at::stack(tensors_cpu, 0);
6967 at::Tensor out_vulkan = at::stack(tensors_vulkan, 0);
6968 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
6969 if (!check) {
6970 std::cout << "Error when stacking " << numTensors << " tensors" << std::endl;
6971 showRtol(out_cpu, out_vulkan.cpu());
6972 }
6973 ASSERT_TRUE(check);
6974 }
6975
TEST_F(VulkanAPITest,stack_0d)6976 TEST_F(VulkanAPITest, stack_0d) {
6977 test_stack({}, 0, 1);
6978 test_stack({}, 0, 2);
6979 test_stack({}, 0, 3);
6980 }
6981
TEST_F(VulkanAPITest,stack_1d)6982 TEST_F(VulkanAPITest, stack_1d) {
6983 test_stack({221}, 0, 2);
6984 test_stack({193}, 1, 3);
6985
6986 test_stack({221}, -1, 2);
6987 test_stack({193}, -2, 3);
6988 }
6989
TEST_F(VulkanAPITest,stack_2d)6990 TEST_F(VulkanAPITest, stack_2d) {
6991 test_stack({221, 193}, 0, 2);
6992 test_stack({221, 193}, 1, 3);
6993 test_stack({221, 193}, 2, 4);
6994
6995 test_stack({221, 193}, -1, 2);
6996 test_stack({221, 193}, -2, 3);
6997 test_stack({221, 193}, -3, 4);
6998 }
6999
TEST_F(VulkanAPITest,stack_3d)7000 TEST_F(VulkanAPITest, stack_3d) {
7001 test_stack({221, 193, 11}, 0, 2);
7002 test_stack({221, 193, 11}, 1, 3);
7003 test_stack({221, 193, 11}, 2, 4);
7004 test_stack({221, 193, 11}, 3, 5);
7005
7006 test_stack({221, 193, 11}, -1, 2);
7007 test_stack({221, 193, 11}, -2, 3);
7008 test_stack({221, 193, 11}, -3, 4);
7009 test_stack({221, 193, 11}, -4, 5);
7010 }
7011
TEST_F(VulkanAPITest,tile_invalid_inputs_exceptions)7012 TEST_F(VulkanAPITest, tile_invalid_inputs_exceptions) {
7013 // Arrange: Vulkan tile only supports input of dims <= 4
7014 {
7015 const auto in_cpu =
7016 at::rand({3, 9, 5, 7, 3}, at::device(at::kCPU).dtype(at::kFloat));
7017 const at::IntArrayRef repeats = {7, 3, 9, 2};
7018
7019 // Act
7020 EXPECT_THROW(
7021 { const auto out_vulkan = at::tile(in_cpu.vulkan(), repeats); },
7022 ::std::exception);
7023 }
7024 }
7025
TEST_F(VulkanAPITest,tile_invalid_outpus_exceptions)7026 TEST_F(VulkanAPITest, tile_invalid_outpus_exceptions) {
7027 // Arrange: Vulkan tile only supports output of dims <= 4
7028 {
7029 const auto in_cpu =
7030 at::rand({3, 9, 5, 13}, at::device(at::kCPU).dtype(at::kFloat));
7031 const at::IntArrayRef repeats = {5, 7, 3, 9, 2};
7032
7033 // Act
7034 EXPECT_THROW(
7035 { const auto out_vulkan = at::tile(in_cpu.vulkan(), repeats); },
7036 ::std::exception);
7037 }
7038 }
7039
test_tile(const at::IntArrayRef input_shape,const at::IntArrayRef repeats)7040 void test_tile(
7041 const at::IntArrayRef input_shape,
7042 const at::IntArrayRef repeats) {
7043 c10::InferenceMode mode;
7044
7045 at::Tensor in_cpu;
7046 at::Tensor out_cpu;
7047 at::Tensor in_vulkan;
7048 at::Tensor out_vulkan;
7049 at::IntArrayRef repeat;
7050 bool check = true;
7051 for (int idx_input = 1; (unsigned)idx_input < input_shape.size() + 1; ++idx_input) {
7052 for (int idx_repeat = 1; (unsigned)idx_repeat < repeats.size() + 1; ++idx_repeat) {
7053 in_cpu = at::rand(
7054 input_shape.slice(0, idx_input),
7055 at::device(at::kCPU).dtype(at::kFloat));
7056 repeat = repeats.slice(0, idx_repeat);
7057 out_cpu = at::tile(in_cpu, repeat);
7058 in_vulkan = in_cpu.vulkan();
7059 out_vulkan = at::tile(in_vulkan, repeat);
7060 check = almostEqual(out_cpu, out_vulkan.cpu());
7061 if (!check) {
7062 check = false;
7063 std::cout << "Tile test failed when input is of shape "
7064 << input_shape.slice(0, idx_input) << " and repeat of "
7065 << repeat << std::endl;
7066 showRtol(out_cpu, out_vulkan.cpu());
7067 }
7068 }
7069 }
7070
7071 ASSERT_TRUE(check);
7072 }
7073
TEST_F(VulkanAPITest,tile)7074 TEST_F(VulkanAPITest, tile) {
7075 test_tile({13, 5, 13, 7}, {7, 2, 3, 5});
7076 }
7077
test_zero_(const at::IntArrayRef input_shape)7078 void test_zero_(const at::IntArrayRef input_shape) {
7079 auto cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
7080 auto vulkan = cpu.vulkan();
7081
7082 cpu.zero_();
7083 vulkan.zero_();
7084
7085 const auto check = almostEqual(cpu, vulkan.cpu());
7086 if (!check) {
7087 showRtol(cpu, vulkan.cpu());
7088 std::cout << "zero_ test failed with input shape: "
7089 << input_shape << std::endl;
7090 }
7091 ASSERT_TRUE(check);
7092 }
7093
TEST_F(VulkanAPITest,zero_)7094 TEST_F(VulkanAPITest, zero_) {
7095 test_zero_({5});
7096 test_zero_({5, 7});
7097 test_zero_({9, 7, 5});
7098 test_zero_({22, 11, 19, 17});
7099 }
7100
test_zeros(const at::IntArrayRef input_shape)7101 void test_zeros(const at::IntArrayRef input_shape) {
7102 auto cpu = at::zeros(input_shape);
7103 auto vulkan = at::zeros(input_shape, at::device(at::kVulkan));
7104
7105 const auto check = almostEqual(cpu, vulkan.cpu());
7106 if (!check) {
7107 showRtol(cpu, vulkan.cpu());
7108 std::cout << "zeros test failed with input shape: "
7109 << input_shape << std::endl;
7110 }
7111 ASSERT_TRUE(check);
7112 }
7113
TEST_F(VulkanAPITest,zeros)7114 TEST_F(VulkanAPITest, zeros) {
7115 test_zeros({5});
7116 test_zeros({5, 7});
7117 test_zeros({9, 7, 5});
7118 test_zeros({22, 11, 19, 17});
7119 }
7120
TEST_F(VulkanAPITest,clone_success)7121 TEST_F(VulkanAPITest, clone_success) {
7122 // Arrange
7123 std::multimap<std::optional<c10::MemoryFormat>, std::vector<int64_t>> mem2sizes {
7124 {c10::MemoryFormat::Preserve, {2, 3, 5, 161}}, // 4D tensors with MemoryFormat::Preserve
7125 {c10::MemoryFormat::Contiguous, {2, 3, 5, 161}}, // 4D tensors with MemoryFormat::Contiguous
7126 {{}, {2, 3, 5, 161}}, // 4D tensors with null
7127 {c10::MemoryFormat::Preserve, {3, 5, 161}}, // 3D tensors with MemoryFormat::Preserve
7128 {c10::MemoryFormat::Contiguous, {3, 5, 161}}, // 3D tensors with MemoryFormat::Contiguous
7129 {{}, {3, 5, 161}}, // 3D tensors with null
7130 {c10::MemoryFormat::Preserve, {5, 161}}, // 2D tensors with MemoryFormat::Preserve
7131 {c10::MemoryFormat::Contiguous, {5, 161}}, // 2D tensors with MemoryFormat::Contiguous
7132 {{}, {5, 161}}, // 2D tensors with null
7133 {c10::MemoryFormat::Preserve, {161}}, // 1D tensors with MemoryFormat::Preserve
7134 {c10::MemoryFormat::Contiguous, {161}}, // 1D tensors with MemoryFormat::Contiguous
7135 {{}, {161}}, // 1D tensors with null
7136 };
7137
7138 // Act/Assert
7139 for (const auto& mem2size : mem2sizes) {
7140 clone_test(mem2size.second, mem2size.first);
7141 }
7142 }
7143
TEST_F(VulkanAPITest,clone_invalidinputs_exceptions)7144 TEST_F(VulkanAPITest, clone_invalidinputs_exceptions) {
7145 // Act: Vulkan supports Preserve and Contiguous memory foramts
7146 EXPECT_THROW({
7147 clone_test({2, 3, 5, 161}, c10::MemoryFormat::ChannelsLast);
7148 }, ::std::exception);
7149
7150 // Act: Vulkan supports Preserve and Contiguous memory foramts
7151 EXPECT_THROW({
7152 clone_test({2, 3, 5, 161}, c10::MemoryFormat::ChannelsLast3d);
7153 }, ::std::exception);
7154 }
7155
7156 enum class OpType {
7157 addmm,
7158 conv2d,
7159 hardtanh_,
7160 mean,
7161 };
7162
7163 class BaseOp {
7164 public:
BaseOp(const OpType)7165 explicit BaseOp(const OpType) {}
7166 virtual ~BaseOp() = default;
7167
7168 virtual at::Tensor run(at::Tensor&) const = 0;
7169 virtual std::string toString() const = 0;
7170
7171 };
7172
7173 class Addmm final : public BaseOp {
7174 public:
Addmm(const int64_t m1H,const int64_t m1W,const int64_t m2W,const float beta,const float alpha)7175 Addmm(
7176 const int64_t m1H,
7177 const int64_t m1W,
7178 const int64_t m2W,
7179 const float beta,
7180 const float alpha)
7181 : BaseOp(OpType::addmm),
7182 m2_(at::rand(c10::IntArrayRef({m1W, m2W}), at::device(at::kCPU).dtype(at::kFloat))),
7183 b_(at::rand(c10::IntArrayRef({m1H, m2W}), at::device(at::kCPU).dtype(at::kFloat))),
7184 beta_(beta),
7185 alpha_(alpha) {
7186 }
7187
run(at::Tensor & t) const7188 at::Tensor run(at::Tensor& t) const override {
7189 if (t.is_vulkan()) {
7190 return at::addmm(b_, t, m2_, beta_, alpha_);
7191 }
7192
7193 return at::addmm(b_, t, m2_, beta_, alpha_);
7194 }
7195
toString() const7196 std::string toString() const override {
7197 return "addmm";
7198 }
7199
7200 private:
7201 at::Tensor m2_;
7202 at::Tensor b_;
7203 float beta_;
7204 float alpha_;
7205 };
7206
7207 class Conv2d final : public BaseOp {
7208 public:
Conv2d(const c10::IntArrayRef wsizes,const int64_t groups,const int64_t stride,const int64_t padding)7209 Conv2d(
7210 const c10::IntArrayRef wsizes,
7211 const int64_t groups,
7212 const int64_t stride,
7213 const int64_t padding)
7214 : BaseOp(OpType::conv2d),
7215 groups_(groups),
7216 stride_(stride),
7217 padding_(padding),
7218 w_(at::rand(wsizes, at::device(at::kCPU).dtype(at::kFloat))),
7219 b_(at::rand(wsizes[0], at::device(at::kCPU).dtype(at::kFloat))){
7220 }
7221
run(at::Tensor & t) const7222 at::Tensor run(at::Tensor& t) const override {
7223 return at::conv2d(t, w_, b_, {stride_}, {padding_}, {1}, groups_);
7224 }
7225
toString() const7226 std::string toString() const override {
7227 return "conv2d";
7228 }
7229
7230 private:
7231 int64_t groups_;
7232 int64_t stride_;
7233 int64_t padding_;
7234 at::Tensor w_;
7235 at::Tensor b_;
7236 };
7237
7238 class Hardtanh_ final : public BaseOp {
7239 public:
Hardtanh_()7240 Hardtanh_() : BaseOp(OpType::hardtanh_) {}
7241
run(at::Tensor & input) const7242 at::Tensor run(at::Tensor& input) const override {
7243 return at::hardtanh_(input, 0, 6);
7244 }
7245
toString() const7246 std::string toString() const override {
7247 return "hardtanh_";
7248 }
7249 };
7250
7251 class Mean final : public BaseOp {
7252 public:
Mean()7253 Mean() : BaseOp(OpType::mean) {}
7254
run(at::Tensor & input) const7255 at::Tensor run(at::Tensor& input) const override {
7256 return at::mean(input, {2, 3}, false);
7257 }
7258
toString() const7259 std::string toString() const override {
7260 return "mean";
7261 }
7262 };
7263
7264 class OpsList {
7265 public:
OpsList()7266 OpsList() {}
OpsList(std::vector<std::unique_ptr<BaseOp>> ops)7267 explicit OpsList(std::vector<std::unique_ptr<BaseOp>> ops)
7268 : ops_(std::move(ops)) {
7269 }
7270
run(const at::Tensor & input)7271 auto run(const at::Tensor& input) {
7272 at::Tensor output = input;
7273
7274 for (const auto& op : ops_) {
7275 output = op->run(output);
7276 }
7277
7278 return output;
7279 }
7280
run(const at::Tensor & input,const at::Tensor & v_input)7281 auto run(const at::Tensor& input, const at::Tensor& v_input) {
7282 at::Tensor output = input;
7283 at::Tensor v_output = v_input;
7284
7285 for (const auto& op : ops_) {
7286 output = op->run(output);
7287 v_output = op->run(v_output);
7288 }
7289
7290 return std::make_pair(output, v_output);
7291 }
7292
7293 protected:
7294 std::vector<std::unique_ptr<BaseOp>> ops_;
7295 };
7296
7297 class MobileNetV2 final : public OpsList {
7298 public:
MobileNetV2()7299 MobileNetV2() {
7300 ops_.emplace_back(new Conv2d({32, 3, 3, 3}, 1, 2, 1));
7301 ops_.emplace_back(new Hardtanh_());
7302 ops_.emplace_back(new Conv2d({32, 1, 3, 3}, 32, 1, 1));
7303 ops_.emplace_back(new Hardtanh_());
7304 ops_.emplace_back(new Conv2d({16, 32, 1, 1}, 1, 1, 0));
7305 ops_.emplace_back(new Conv2d({96, 16, 1, 1}, 1, 1, 0));
7306 ops_.emplace_back(new Hardtanh_());
7307 ops_.emplace_back(new Conv2d({96, 1, 3, 3}, 96, 2, 1));
7308 ops_.emplace_back(new Hardtanh_());
7309 ops_.emplace_back(new Conv2d({24, 96, 1, 1}, 1, 1, 0));
7310 ops_.emplace_back(new Conv2d({144, 24, 1, 1}, 1, 1, 0));
7311 ops_.emplace_back(new Hardtanh_());
7312 ops_.emplace_back(new Conv2d({144, 1, 3, 3}, 144, 1, 1));
7313 ops_.emplace_back(new Hardtanh_());
7314 ops_.emplace_back(new Conv2d({24, 144, 1, 1}, 1, 1, 0));
7315 ops_.emplace_back(new Conv2d({144, 24, 1, 1}, 1, 1, 0));
7316 ops_.emplace_back(new Hardtanh_());
7317 ops_.emplace_back(new Conv2d({144, 1, 3, 3}, 144, 2, 1));
7318 ops_.emplace_back(new Hardtanh_());
7319 ops_.emplace_back(new Conv2d({32, 144, 1, 1}, 1, 1, 0));
7320 ops_.emplace_back(new Conv2d({192, 32, 1, 1}, 1, 1, 0));
7321 ops_.emplace_back(new Hardtanh_());
7322 ops_.emplace_back(new Conv2d({192, 1, 3, 3}, 192, 1, 1));
7323 ops_.emplace_back(new Hardtanh_());
7324 ops_.emplace_back(new Conv2d({32, 192, 1, 1}, 1, 1, 0));
7325 ops_.emplace_back(new Conv2d({192, 32, 1, 1}, 1, 1, 0));
7326 ops_.emplace_back(new Hardtanh_());
7327 ops_.emplace_back(new Conv2d({192, 1, 3, 3}, 192, 1, 1));
7328 ops_.emplace_back(new Hardtanh_());
7329 ops_.emplace_back(new Conv2d({32, 192, 1, 1}, 1, 1, 0));
7330 ops_.emplace_back(new Conv2d({192, 32, 1, 1}, 1, 1, 0));
7331 ops_.emplace_back(new Hardtanh_());
7332 ops_.emplace_back(new Conv2d({192, 1, 3, 3}, 192, 2, 1));
7333 ops_.emplace_back(new Hardtanh_());
7334 ops_.emplace_back(new Conv2d({64, 192, 1, 1}, 1, 1, 0));
7335 ops_.emplace_back(new Conv2d({384, 64, 1, 1}, 1, 1, 0));
7336 ops_.emplace_back(new Hardtanh_());
7337 ops_.emplace_back(new Conv2d({384, 1, 3, 3}, 384, 1, 1));
7338 ops_.emplace_back(new Hardtanh_());
7339 ops_.emplace_back(new Conv2d({64, 384, 1, 1}, 1, 1, 0));
7340 ops_.emplace_back(new Conv2d({384, 64, 1, 1}, 1, 1, 0));
7341 ops_.emplace_back(new Hardtanh_());
7342 ops_.emplace_back(new Conv2d({384, 1, 3, 3}, 384, 1, 1));
7343 ops_.emplace_back(new Hardtanh_());
7344 ops_.emplace_back(new Conv2d({64, 384, 1, 1}, 1, 1, 0));
7345 ops_.emplace_back(new Conv2d({384, 64, 1, 1}, 1, 1, 0));
7346 ops_.emplace_back(new Hardtanh_());
7347 ops_.emplace_back(new Conv2d({384, 1, 3, 3}, 384, 1, 1));
7348 ops_.emplace_back(new Hardtanh_());
7349 ops_.emplace_back(new Conv2d({64, 384, 1, 1}, 1, 1, 0));
7350 ops_.emplace_back(new Conv2d({384, 64, 1, 1}, 1, 1, 0));
7351 ops_.emplace_back(new Hardtanh_());
7352 ops_.emplace_back(new Conv2d({384, 1, 3, 3}, 384, 1, 1));
7353 ops_.emplace_back(new Hardtanh_());
7354 ops_.emplace_back(new Conv2d({96, 384, 1, 1}, 1, 1, 0));
7355 ops_.emplace_back(new Conv2d({576, 96, 1, 1}, 1, 1, 0));
7356 ops_.emplace_back(new Hardtanh_());
7357 ops_.emplace_back(new Conv2d({576, 1, 3, 3}, 576, 1, 1));
7358 ops_.emplace_back(new Hardtanh_());
7359 ops_.emplace_back(new Conv2d({96, 576, 1, 1}, 1, 1, 0));
7360 ops_.emplace_back(new Conv2d({576, 96, 1, 1}, 1, 1, 0));
7361 ops_.emplace_back(new Hardtanh_());
7362 ops_.emplace_back(new Conv2d({576, 1, 3, 3}, 576, 1, 1));
7363 ops_.emplace_back(new Hardtanh_());
7364 ops_.emplace_back(new Conv2d({96, 576, 1, 1}, 1, 1, 0));
7365 ops_.emplace_back(new Conv2d({576, 96, 1, 1}, 1, 1, 0));
7366 ops_.emplace_back(new Hardtanh_());
7367 ops_.emplace_back(new Conv2d({576, 1, 3, 3}, 576, 2, 1));
7368 ops_.emplace_back(new Hardtanh_());
7369 ops_.emplace_back(new Conv2d({160, 576, 1, 1}, 1, 1, 0));
7370 ops_.emplace_back(new Conv2d({960, 160, 1, 1}, 1, 1, 0));
7371 ops_.emplace_back(new Hardtanh_());
7372 ops_.emplace_back(new Conv2d({960, 1, 3, 3}, 960, 1, 1));
7373 ops_.emplace_back(new Hardtanh_());
7374 ops_.emplace_back(new Conv2d({160, 960, 1, 1}, 1, 1, 0));
7375 ops_.emplace_back(new Conv2d({960, 160, 1, 1}, 1, 1, 0));
7376 ops_.emplace_back(new Hardtanh_());
7377 ops_.emplace_back(new Conv2d({960, 1, 3, 3}, 960, 1, 1));
7378 ops_.emplace_back(new Hardtanh_());
7379 ops_.emplace_back(new Conv2d({160, 960, 1, 1}, 1, 1, 0));
7380 ops_.emplace_back(new Conv2d({960, 160, 1, 1}, 1, 1, 0));
7381 ops_.emplace_back(new Hardtanh_());
7382 ops_.emplace_back(new Conv2d({960, 1, 3, 3}, 960, 1, 1));
7383 ops_.emplace_back(new Hardtanh_());
7384 ops_.emplace_back(new Conv2d({320, 960, 1, 1}, 1, 1, 0));
7385 ops_.emplace_back(new Conv2d({1280, 320, 1, 1}, 1, 1, 0));
7386 ops_.emplace_back(new Hardtanh_());
7387 ops_.emplace_back(new Mean());
7388 ops_.emplace_back(new Addmm(1, 1280, 1000, 0, 1));
7389 }
7390 };
7391
TEST_F(VulkanAPITest,mobilenetv2)7392 TEST_F(VulkanAPITest, mobilenetv2) {
7393 c10::InferenceMode mode;
7394
7395 MobileNetV2 mn2;
7396
7397 const auto input = at::rand({1, 3, 224, 224}, at::device(at::kCPU).dtype(at::kFloat));
7398 const auto output = mn2.run(input, input.vulkan());
7399
7400 const auto check = almostEqual(output.first, output.second.cpu());
7401 if (!check) {
7402 showRtol(output.first, output.second.cpu());
7403 }
7404
7405 ASSERT_TRUE(check);
7406 }
7407
TEST_F(VulkanAPITest,gru_success)7408 TEST_F(VulkanAPITest, gru_success) {
7409 // Arrange
7410 const int H_in = 5; // input_size
7411 const int H_out = 7; // hidden_size
7412 const int num_layers = 3;
7413 const int L = 1;
7414 const int N = 1;
7415 const double gru_dropout = .0;
7416 const bool has_biases = true;
7417 const bool train = false;
7418 const bool bidirectional = false;
7419 const bool batch_first = true;
7420 const auto in_cpu = at::rand({N, L, H_in}, at::device(at::kCPU).dtype(at::kFloat));
7421 const auto h0_cpu = at::rand({num_layers, N, H_out}, at::device(at::kCPU).dtype(at::kFloat));
7422
7423 c10::List<at::Tensor> weight_ih_l; // shape (3 * hidden_size, input_size)
7424 c10::List<at::Tensor> weight_hh_l; // shape (3 * hidden_size, hidden_size)
7425 c10::List<at::Tensor> bias_ih_l; // shape (3 * hidden_size)
7426 c10::List<at::Tensor> bias_hh_l; // shape (3 * hidden_size)
7427 for (int i = 0; i < num_layers; ++i) {
7428 if (i == 0) {
7429 weight_ih_l.emplace_back(at::rand({3 * H_out, H_in}, at::device(at::kCPU).dtype(at::kFloat)));
7430 } else {
7431 weight_ih_l.emplace_back(at::rand({3 * H_out, H_out}, at::device(at::kCPU).dtype(at::kFloat)));
7432 }
7433 weight_hh_l.emplace_back(at::rand({3 * H_out, H_out}, at::device(at::kCPU).dtype(at::kFloat)));
7434 bias_ih_l.emplace_back(at::rand({3 * H_out}, at::device(at::kCPU).dtype(at::kFloat)));
7435 bias_hh_l.emplace_back(at::rand({3 * H_out}, at::device(at::kCPU).dtype(at::kFloat)));
7436 }
7437
7438 // put this guard here to run inference inststead of training
7439 // to avoid the following error:
7440 // C++ exception with description "0INTERNAL ASSERT FAILED at "xplat/caffe2/aten/src/ATen/core/boxing/KernelFunction.cpp":31, please report a bug to PyTorch. aten::gru.input has kernels registered to both CompositeImplicitAutograd and a backend mapped to AutogradOther. This makes the backend kernel unreachable; the dispatcher will always prefer the CompositeImplicitAutograd lowering (see Note [Ambiguity in AutogradOther kernel]). If you want to override CompositeImplicitAutograd, please open an issue to request a dedicated Autograd dispatch key for the backend.
7441 // If you only want to run inference instead of training, add `c10::InferenceMode mode;` before model.forward(). Note this guard is only available in C++ but not Python at present.
7442 c10::InferenceMode mode;
7443
7444 // Act
7445 const auto out_cpu = at::gru(in_cpu, h0_cpu,
7446 { weight_ih_l[0], weight_hh_l[0], bias_ih_l[0], bias_hh_l[0],
7447 weight_ih_l[1], weight_hh_l[1], bias_ih_l[1], bias_hh_l[1],
7448 weight_ih_l[2], weight_hh_l[2], bias_ih_l[2], bias_hh_l[2] },
7449 has_biases, num_layers, gru_dropout, train, bidirectional, batch_first);
7450
7451 // weights/biases should be always on CPU.
7452 const auto out_vulkan = at::gru(in_cpu.vulkan(), h0_cpu.vulkan(),
7453 { weight_ih_l.get(0), weight_hh_l.get(0), bias_ih_l.get(0), bias_hh_l.get(0),
7454 weight_ih_l.get(1), weight_hh_l.get(1), bias_ih_l.get(1), bias_hh_l.get(1),
7455 weight_ih_l.get(2), weight_hh_l.get(2), bias_ih_l.get(2), bias_hh_l.get(2) },
7456 has_biases, num_layers, gru_dropout, train, bidirectional, batch_first);
7457
7458 auto cpu_output = std::get<0>(out_cpu);
7459 auto cpu_hidden = std::get<1>(out_cpu);
7460 auto vulkan_output = std::get<0>(out_vulkan);
7461 auto vulkan_hidden = std::get<1>(out_vulkan);
7462
7463 // Assert
7464 const auto check_output = almostEqual(cpu_output, vulkan_output.cpu());
7465 if (!check_output) {
7466 showRtol(cpu_output, vulkan_output.cpu());
7467 }
7468 ASSERT_TRUE(check_output);
7469
7470 const auto check_hidden = almostEqual(cpu_hidden, vulkan_hidden.cpu());
7471 if (!check_hidden) {
7472 showRtol(cpu_hidden, vulkan_hidden.cpu());
7473 }
7474 ASSERT_TRUE(check_hidden);
7475 }
7476
TEST_F(VulkanAPITest,gru_mclareninputs_success)7477 TEST_F(VulkanAPITest, gru_mclareninputs_success) {
7478 // Arrange
7479 const int H_in = 384; // input_size
7480 const int H_out = 384; // hidden_size
7481 const int num_layers = 2;
7482 const int L = 1;
7483 const int N = 1;
7484 const double gru_dropout = .0;
7485 const bool has_biases = true;
7486 const bool train = false;
7487 const bool bidirectional = false;
7488 const bool batch_first = true;
7489 const auto in_cpu = at::rand({N, L, H_in}, at::device(at::kCPU).dtype(at::kFloat));
7490 const auto h0_cpu = at::rand({num_layers, N, H_out}, at::device(at::kCPU).dtype(at::kFloat));
7491
7492 c10::List<at::Tensor> weight_ih_l; // shape (3 * hidden_size, input_size)
7493 c10::List<at::Tensor> weight_hh_l; // shape (3 * hidden_size, hidden_size)
7494 c10::List<at::Tensor> bias_ih_l; // shape (3 * hidden_size)
7495 c10::List<at::Tensor> bias_hh_l; // shape (3 * hidden_size)
7496 for (int i = 0; i < num_layers; ++i) {
7497 if (i == 0) {
7498 weight_ih_l.emplace_back(at::rand({3 * H_out, H_in}, at::device(at::kCPU).dtype(at::kFloat)));
7499 } else {
7500 weight_ih_l.emplace_back(at::rand({3 * H_out, H_out}, at::device(at::kCPU).dtype(at::kFloat)));
7501 }
7502 weight_hh_l.emplace_back(at::rand({3 * H_out, H_out}, at::device(at::kCPU).dtype(at::kFloat)));
7503 bias_ih_l.emplace_back(at::rand({3 * H_out}, at::device(at::kCPU).dtype(at::kFloat)));
7504 bias_hh_l.emplace_back(at::rand({3 * H_out}, at::device(at::kCPU).dtype(at::kFloat)));
7505 }
7506
7507 // put this guard here to run inference inststead of training
7508 // to avoid the following error:
7509 // C++ exception with description "0INTERNAL ASSERT FAILED at "xplat/caffe2/aten/src/ATen/core/boxing/KernelFunction.cpp":31, please report a bug to PyTorch. aten::gru.input has kernels registered to both CompositeImplicitAutograd and a backend mapped to AutogradOther. This makes the backend kernel unreachable; the dispatcher will always prefer the CompositeImplicitAutograd lowering (see Note [Ambiguity in AutogradOther kernel]). If you want to override CompositeImplicitAutograd, please open an issue to request a dedicated Autograd dispatch key for the backend.
7510 // If you only want to run inference instead of training, add `c10::InferenceMode mode;` before model.forward(). Note this guard is only available in C++ but not Python at present.
7511 c10::InferenceMode mode;
7512
7513 // Act
7514 const auto out_cpu = at::gru(in_cpu, h0_cpu,
7515 { weight_ih_l[0], weight_hh_l[0], bias_ih_l[0], bias_hh_l[0], weight_ih_l[1], weight_hh_l[1], bias_ih_l[1], bias_hh_l[1] },
7516 has_biases, num_layers, gru_dropout, train, bidirectional, batch_first);
7517
7518 // weights/biases should be always on CPU.
7519 const auto out_vulkan = at::gru(in_cpu.vulkan(), h0_cpu.vulkan(), { weight_ih_l.get(0), weight_hh_l.get(0), bias_ih_l.get(0), bias_hh_l.get(0),
7520 weight_ih_l.get(1), weight_hh_l.get(1), bias_ih_l.get(1), bias_hh_l.get(1) },
7521 has_biases, num_layers, gru_dropout, train, bidirectional, batch_first);
7522
7523 auto cpu_output = std::get<0>(out_cpu);
7524 auto cpu_hidden = std::get<1>(out_cpu);
7525 auto vulkan_output = std::get<0>(out_vulkan);
7526 auto vulkan_hidden = std::get<1>(out_vulkan);
7527
7528 // Assert
7529 const auto check_output = almostEqual(cpu_output, vulkan_output.cpu());
7530 if (!check_output) {
7531 showRtol(cpu_output, vulkan_output.cpu());
7532 }
7533 ASSERT_TRUE(check_output);
7534
7535 const auto check_hidden = almostEqual(cpu_hidden, vulkan_hidden.cpu());
7536 if (!check_hidden) {
7537 showRtol(cpu_hidden, vulkan_hidden.cpu());
7538 }
7539 ASSERT_TRUE(check_hidden);
7540 }
7541
TEST_F(VulkanAPITest,gru_invalidinputs_exceptions)7542 TEST_F(VulkanAPITest, gru_invalidinputs_exceptions) {
7543 // Arrange
7544 const int H_in = 17; // input_size
7545 const int H_out = 50; // hidden_size
7546 const int num_layers = 2;
7547 const int L = 5;
7548 const int N = 4;
7549 const double gru_dropout = .0;
7550 const bool has_biases = true;
7551 const bool train = false;
7552 const bool bidirectional = false;
7553 const bool batch_first = true;
7554 const auto in_cpu = at::rand({N, L, H_in}, at::device(at::kCPU).dtype(at::kFloat));
7555 const auto h0_cpu = at::rand({num_layers, N, H_out}, at::device(at::kCPU).dtype(at::kFloat));
7556
7557 c10::List<at::Tensor> weight_ih_l; // shape (3 * hidden_size, input_size)
7558 c10::List<at::Tensor> weight_hh_l; // shape (3 * hidden_size, hidden_size)
7559 c10::List<at::Tensor> bias_ih_l; // shape (3 * hidden_size)
7560 c10::List<at::Tensor> bias_hh_l; // shape (3 * hidden_size)
7561 for (int i = 0; i < num_layers; ++i) {
7562 if (i == 0) {
7563 weight_ih_l.emplace_back(at::rand({3 * H_out, H_in}, at::device(at::kCPU).dtype(at::kFloat)));
7564 } else {
7565 weight_ih_l.emplace_back(at::rand({3 * H_out, H_out}, at::device(at::kCPU).dtype(at::kFloat)));
7566 }
7567 weight_hh_l.emplace_back(at::rand({3 * H_out, H_out}, at::device(at::kCPU).dtype(at::kFloat)));
7568 bias_ih_l.emplace_back(at::rand({3 * H_out}, at::device(at::kCPU).dtype(at::kFloat)));
7569 bias_hh_l.emplace_back(at::rand({3 * H_out}, at::device(at::kCPU).dtype(at::kFloat)));
7570 }
7571
7572 // put this guard here to run inference inststead of training
7573 // to avoid the following error:
7574 // C++ exception with description "0INTERNAL ASSERT FAILED at "xplat/caffe2/aten/src/ATen/core/boxing/KernelFunction.cpp":31, please report a bug to PyTorch. aten::gru.input has kernels registered to both CompositeImplicitAutograd and a backend mapped to AutogradOther. This makes the backend kernel unreachable; the dispatcher will always prefer the CompositeImplicitAutograd lowering (see Note [Ambiguity in AutogradOther kernel]). If you want to override CompositeImplicitAutograd, please open an issue to request a dedicated Autograd dispatch key for the backend.
7575 // If you only want to run inference instead of training, add `c10::InferenceMode mode;` before model.forward(). Note this guard is only available in C++ but not Python at present.
7576 c10::InferenceMode mode;
7577
7578 // Act: incorrect # of weights/biases
7579 EXPECT_THROW({
7580 at::gru(in_cpu.vulkan(), h0_cpu.vulkan(), { weight_ih_l.get(0), weight_hh_l.get(0), bias_ih_l.get(0), bias_hh_l.get(0),
7581 weight_ih_l.get(1), weight_hh_l.get(1), bias_ih_l.get(1) },
7582 has_biases, num_layers, gru_dropout, train, bidirectional, batch_first);
7583 }, ::std::exception);
7584
7585 // Act: non-3D input tensor
7586 EXPECT_THROW({
7587 const auto in_cpu_2d = at::rand({1, H_in}, at::device(at::kCPU).dtype(at::kFloat));
7588 at::gru(in_cpu_2d.vulkan(), h0_cpu.vulkan(), { weight_ih_l.get(0), weight_hh_l.get(0), bias_ih_l.get(0), bias_hh_l.get(0),
7589 weight_ih_l.get(1), weight_hh_l.get(1), bias_ih_l.get(1), bias_hh_l.get(1) },
7590 has_biases, num_layers, gru_dropout, train, bidirectional, batch_first);
7591 }, ::std::exception);
7592
7593 // Act: non-3D hidden tensor
7594 EXPECT_THROW({
7595 const auto h0_cpu_2d = at::rand({num_layers, H_out}, at::device(at::kCPU).dtype(at::kFloat));
7596 at::gru(in_cpu.vulkan(), h0_cpu_2d.vulkan(), { weight_ih_l.get(0), weight_hh_l.get(0), bias_ih_l.get(0), bias_hh_l.get(0),
7597 weight_ih_l.get(1), weight_hh_l.get(1), bias_ih_l.get(1), bias_hh_l.get(1) },
7598 has_biases, num_layers, gru_dropout, train, bidirectional, batch_first);
7599 }, ::std::exception);
7600
7601 // Act: has_biases should be true
7602 EXPECT_THROW({
7603 at::gru(in_cpu.vulkan(), h0_cpu.vulkan(), { weight_ih_l.get(0), weight_hh_l.get(0), bias_ih_l.get(0), bias_hh_l.get(0),
7604 weight_ih_l.get(1), weight_hh_l.get(1), bias_ih_l.get(1), bias_hh_l.get(1) },
7605 false, num_layers, gru_dropout, train, bidirectional, batch_first);
7606 }, ::std::exception);
7607
7608 // Act: train should be false
7609 EXPECT_THROW({
7610 at::gru(in_cpu.vulkan(), h0_cpu.vulkan(), { weight_ih_l.get(0), weight_hh_l.get(0), bias_ih_l.get(0), bias_hh_l.get(0),
7611 weight_ih_l.get(1), weight_hh_l.get(1), bias_ih_l.get(1), bias_hh_l.get(1) },
7612 has_biases, num_layers, gru_dropout, true, bidirectional, batch_first);
7613 }, ::std::exception);
7614
7615 // Act: bidirectional should be false
7616 EXPECT_THROW({
7617 at::gru(in_cpu.vulkan(), h0_cpu.vulkan(), { weight_ih_l.get(0), weight_hh_l.get(0), bias_ih_l.get(0), bias_hh_l.get(0),
7618 weight_ih_l.get(1), weight_hh_l.get(1), bias_ih_l.get(1), bias_hh_l.get(1) },
7619 has_biases, num_layers, gru_dropout, train, true, batch_first);
7620 }, ::std::exception);
7621
7622 // Act: batch_first should be true
7623 EXPECT_THROW({
7624 at::gru(in_cpu.vulkan(), h0_cpu.vulkan(), { weight_ih_l.get(0), weight_hh_l.get(0), bias_ih_l.get(0), bias_hh_l.get(0),
7625 weight_ih_l.get(1), weight_hh_l.get(1), bias_ih_l.get(1), bias_hh_l.get(1) },
7626 has_biases, num_layers, gru_dropout, train, bidirectional, false);
7627 }, ::std::exception);
7628
7629 // Act: dropout should be 0.0
7630 EXPECT_THROW({
7631 at::gru(in_cpu.vulkan(), h0_cpu.vulkan(), { weight_ih_l.get(0), weight_hh_l.get(0), bias_ih_l.get(0), bias_hh_l.get(0),
7632 weight_ih_l.get(1), weight_hh_l.get(1), bias_ih_l.get(1), bias_hh_l.get(1) },
7633 has_biases, num_layers, 1.0, train, bidirectional, batch_first);
7634 }, ::std::exception);
7635 }
7636
TEST_F(VulkanAPITest,gru_prepack_success)7637 TEST_F(VulkanAPITest, gru_prepack_success) {
7638 // Arrange
7639 const int H_in = 81; // input_size
7640 const int H_out = 10; // hidden_size
7641 const int num_layers = 2;
7642 const int L = 1;
7643 const int N = 1;
7644 const double gru_dropout = .0;
7645 const bool has_biases = true;
7646 const bool train = false;
7647 const bool bidirectional = false;
7648 const bool batch_first = true;
7649 const auto in_cpu = at::rand({N, L, H_in}, at::device(at::kCPU).dtype(at::kFloat));
7650 const auto h0_cpu = at::rand({num_layers, N, H_out}, at::device(at::kCPU).dtype(at::kFloat));
7651
7652 c10::List<at::Tensor> weight_ih_l; // shape (3 * hidden_size, input_size)
7653 c10::List<at::Tensor> weight_hh_l; // shape (3 * hidden_size, hidden_size)
7654 c10::List<at::Tensor> bias_ih_l; // shape (3 * hidden_size)
7655 c10::List<at::Tensor> bias_hh_l; // shape (3 * hidden_size)
7656 for (int i = 0; i < num_layers; ++i) {
7657 if (i == 0) {
7658 weight_ih_l.emplace_back(at::rand({3 * H_out, H_in}, at::device(at::kCPU).dtype(at::kFloat)));
7659 } else {
7660 weight_ih_l.emplace_back(at::rand({3 * H_out, H_out}, at::device(at::kCPU).dtype(at::kFloat)));
7661 }
7662 weight_hh_l.emplace_back(at::rand({3 * H_out, H_out}, at::device(at::kCPU).dtype(at::kFloat)));
7663 bias_ih_l.emplace_back(at::rand({3 * H_out}, at::device(at::kCPU).dtype(at::kFloat)));
7664 bias_hh_l.emplace_back(at::rand({3 * H_out}, at::device(at::kCPU).dtype(at::kFloat)));
7665 }
7666
7667 // put this guard here to run inference inststead of training
7668 // to avoid the following error:
7669 // C++ exception with description "0INTERNAL ASSERT FAILED at "xplat/caffe2/aten/src/ATen/core/boxing/KernelFunction.cpp":31, please report a bug to PyTorch. aten::gru.input has kernels registered to both CompositeImplicitAutograd and a backend mapped to AutogradOther. This makes the backend kernel unreachable; the dispatcher will always prefer the CompositeImplicitAutograd lowering (see Note [Ambiguity in AutogradOther kernel]). If you want to override CompositeImplicitAutograd, please open an issue to request a dedicated Autograd dispatch key for the backend.
7670 // If you only want to run inference instead of training, add `c10::InferenceMode mode;` before model.forward(). Note this guard is only available in C++ but not Python at present.
7671 c10::InferenceMode mode;
7672
7673 // Act
7674 const auto out_cpu = at::gru(in_cpu, h0_cpu,
7675 { weight_ih_l[0], weight_hh_l[0], bias_ih_l[0], bias_hh_l[0], weight_ih_l[1], weight_hh_l[1], bias_ih_l[1], bias_hh_l[1] },
7676 has_biases, num_layers, gru_dropout, train, bidirectional, batch_first);
7677
7678 auto prepack = callOpByName(
7679 "vulkan_prepack::create_gru_context",
7680 "",
7681 std::vector<at::Tensor>({ weight_ih_l.get(0), weight_hh_l.get(0), bias_ih_l.get(0), bias_hh_l.get(0),
7682 weight_ih_l.get(1), weight_hh_l.get(1), bias_ih_l.get(1), bias_hh_l.get(1) }),
7683 has_biases, num_layers, gru_dropout, train, bidirectional, batch_first);
7684 auto out_vulkan = callOpByName(
7685 "vulkan_prepack::run_gru_context",
7686 "",
7687 in_cpu.vulkan(), h0_cpu.vulkan(), prepack[0]);
7688
7689 auto cpu_output = std::get<0>(out_cpu);
7690 auto cpu_hidden = std::get<1>(out_cpu);
7691 auto vulkan_output = out_vulkan[0].toTensor();
7692 auto vulkan_hidden = out_vulkan[1].toTensor();
7693
7694 // Assert
7695 const auto check_output = almostEqual(cpu_output, vulkan_output.cpu());
7696 if (!check_output) {
7697 showRtol(cpu_output, vulkan_output.cpu());
7698 }
7699 ASSERT_TRUE(check_output);
7700
7701 const auto check_hidden = almostEqual(cpu_hidden, vulkan_hidden.cpu());
7702 if (!check_hidden) {
7703 showRtol(cpu_hidden, vulkan_hidden.cpu());
7704 }
7705 ASSERT_TRUE(check_hidden);
7706 }
7707
TEST_F(VulkanAPITest,gru_prepack_invalidinputs_exceptions)7708 TEST_F(VulkanAPITest, gru_prepack_invalidinputs_exceptions) {
7709 // Arrange
7710 const int H_in = 70; // input_size
7711 const int H_out = 2; // hidden_size
7712 const int num_layers = 2;
7713 const int L = 3;
7714 const int N = 5;
7715 const double gru_dropout = .0;
7716 const bool has_biases = true;
7717 const bool train = false;
7718 const bool bidirectional = false;
7719 const bool batch_first = true;
7720 const auto in_cpu = at::rand({N, L, H_in}, at::device(at::kCPU).dtype(at::kFloat));
7721 const auto h0_cpu = at::rand({num_layers, N, H_out}, at::device(at::kCPU).dtype(at::kFloat));
7722
7723 c10::List<at::Tensor> weight_ih_l; // shape (3 * hidden_size, input_size)
7724 c10::List<at::Tensor> weight_hh_l; // shape (3 * hidden_size, hidden_size)
7725 c10::List<at::Tensor> bias_ih_l; // shape (3 * hidden_size)
7726 c10::List<at::Tensor> bias_hh_l; // shape (3 * hidden_size)
7727 for (int i = 0; i < num_layers; ++i) {
7728 if (i == 0) {
7729 weight_ih_l.emplace_back(at::rand({3 * H_out, H_in}, at::device(at::kCPU).dtype(at::kFloat)));
7730 } else {
7731 weight_ih_l.emplace_back(at::rand({3 * H_out, H_out}, at::device(at::kCPU).dtype(at::kFloat)));
7732 }
7733 weight_hh_l.emplace_back(at::rand({3 * H_out, H_out}, at::device(at::kCPU).dtype(at::kFloat)));
7734 bias_ih_l.emplace_back(at::rand({3 * H_out}, at::device(at::kCPU).dtype(at::kFloat)));
7735 bias_hh_l.emplace_back(at::rand({3 * H_out}, at::device(at::kCPU).dtype(at::kFloat)));
7736 }
7737
7738 // put this guard here to run inference inststead of training
7739 // to avoid the following error:
7740 // C++ exception with description "0INTERNAL ASSERT FAILED at "xplat/caffe2/aten/src/ATen/core/boxing/KernelFunction.cpp":31, please report a bug to PyTorch. aten::gru.input has kernels registered to both CompositeImplicitAutograd and a backend mapped to AutogradOther. This makes the backend kernel unreachable; the dispatcher will always prefer the CompositeImplicitAutograd lowering (see Note [Ambiguity in AutogradOther kernel]). If you want to override CompositeImplicitAutograd, please open an issue to request a dedicated Autograd dispatch key for the backend.
7741 // If you only want to run inference instead of training, add `c10::InferenceMode mode;` before model.forward(). Note this guard is only available in C++ but not Python at present.
7742 c10::InferenceMode mode;
7743
7744 // Act: incorrect # of weights/biases
7745 EXPECT_THROW({
7746 auto prepack = callOpByName(
7747 "vulkan_prepack::create_gru_context",
7748 "",
7749 std::vector<at::Tensor>({ weight_ih_l.get(0), weight_hh_l.get(0), bias_ih_l.get(0), bias_hh_l.get(0),
7750 weight_ih_l.get(1), weight_hh_l.get(1), bias_ih_l.get(1) }),
7751 has_biases, num_layers, gru_dropout, train, bidirectional, batch_first);
7752 }, ::std::exception);
7753
7754 // Act: non-3D input tensor
7755 EXPECT_THROW({
7756 const auto in_cpu_2d = at::rand({1, H_in}, at::device(at::kCPU).dtype(at::kFloat));
7757 auto prepack = callOpByName(
7758 "vulkan_prepack::create_gru_context",
7759 "",
7760 std::vector<at::Tensor>({ weight_ih_l.get(0), weight_hh_l.get(0), bias_ih_l.get(0), bias_hh_l.get(0),
7761 weight_ih_l.get(1), weight_hh_l.get(1), bias_ih_l.get(1), bias_hh_l.get(1) }),
7762 has_biases, num_layers, gru_dropout, train, bidirectional, batch_first);
7763 auto out_vulkan = callOpByName(
7764 "vulkan_prepack::run_gru_context",
7765 "",
7766 in_cpu_2d.vulkan(), h0_cpu.vulkan(), prepack[0]);
7767 }, ::std::exception);
7768
7769 // Act: non-3D hidden tensor
7770 EXPECT_THROW({
7771 const auto h0_cpu_2d = at::rand({num_layers, H_out}, at::device(at::kCPU).dtype(at::kFloat));
7772 auto prepack = callOpByName(
7773 "vulkan_prepack::create_gru_context",
7774 "",
7775 std::vector<at::Tensor>({ weight_ih_l.get(0), weight_hh_l.get(0), bias_ih_l.get(0), bias_hh_l.get(0),
7776 weight_ih_l.get(1), weight_hh_l.get(1), bias_ih_l.get(1), bias_hh_l.get(1) }),
7777 has_biases, num_layers, gru_dropout, train, bidirectional, batch_first);
7778 auto out_vulkan = callOpByName(
7779 "vulkan_prepack::run_gru_context",
7780 "",
7781 in_cpu.vulkan(), h0_cpu_2d.vulkan(), prepack[0]);
7782 }, ::std::exception);
7783
7784 // Act: has_biases should be true
7785 EXPECT_THROW({
7786 auto prepack = callOpByName(
7787 "vulkan_prepack::create_gru_context",
7788 "",
7789 std::vector<at::Tensor>({ weight_ih_l.get(0), weight_hh_l.get(0), bias_ih_l.get(0), bias_hh_l.get(0),
7790 weight_ih_l.get(1), weight_hh_l.get(1), bias_ih_l.get(1), bias_hh_l.get(1) }),
7791 false, num_layers, gru_dropout, train, bidirectional, batch_first);
7792 }, ::std::exception);
7793
7794 // Act: train should be false
7795 EXPECT_THROW({
7796 auto prepack = callOpByName(
7797 "vulkan_prepack::create_gru_context",
7798 "",
7799 std::vector<at::Tensor>({ weight_ih_l.get(0), weight_hh_l.get(0), bias_ih_l.get(0), bias_hh_l.get(0),
7800 weight_ih_l.get(1), weight_hh_l.get(1), bias_ih_l.get(1), bias_hh_l.get(1) }),
7801 has_biases, num_layers, gru_dropout, true, bidirectional, batch_first);
7802 }, ::std::exception);
7803
7804 // Act: bidirectional should be false
7805 EXPECT_THROW({
7806 auto prepack = callOpByName(
7807 "vulkan_prepack::create_gru_context",
7808 "",
7809 std::vector<at::Tensor>({ weight_ih_l.get(0), weight_hh_l.get(0), bias_ih_l.get(0), bias_hh_l.get(0),
7810 weight_ih_l.get(1), weight_hh_l.get(1), bias_ih_l.get(1), bias_hh_l.get(1) }),
7811 has_biases, num_layers, gru_dropout, train, true, batch_first);
7812 }, ::std::exception);
7813
7814 // Act: batch_first should be true
7815 EXPECT_THROW({
7816 auto prepack = callOpByName(
7817 "vulkan_prepack::create_gru_context",
7818 "",
7819 std::vector<at::Tensor>({ weight_ih_l.get(0), weight_hh_l.get(0), bias_ih_l.get(0), bias_hh_l.get(0),
7820 weight_ih_l.get(1), weight_hh_l.get(1), bias_ih_l.get(1), bias_hh_l.get(1) }),
7821 has_biases, num_layers, gru_dropout, train, bidirectional, false);
7822 auto out_vulkan = callOpByName(
7823 "vulkan_prepack::run_gru_context",
7824 "",
7825 in_cpu.vulkan(), h0_cpu.vulkan(), prepack[0]);
7826 }, ::std::exception);
7827
7828 // Act: dropout should be 0.0
7829 EXPECT_THROW({
7830 auto prepack = callOpByName(
7831 "vulkan_prepack::create_gru_context",
7832 "",
7833 std::vector<at::Tensor>({ weight_ih_l.get(0), weight_hh_l.get(0), bias_ih_l.get(0), bias_hh_l.get(0),
7834 weight_ih_l.get(1), weight_hh_l.get(1), bias_ih_l.get(1), bias_hh_l.get(1) }),
7835 has_biases, num_layers, 1.0, train, bidirectional, batch_first);
7836 }, ::std::exception);
7837 }
7838
test_linear(const at::IntArrayRef input_shape,const at::IntArrayRef weight_shape,const at::IntArrayRef bias_shape)7839 void test_linear(
7840 const at::IntArrayRef input_shape,
7841 const at::IntArrayRef weight_shape,
7842 const at::IntArrayRef bias_shape) {
7843 c10::InferenceMode mode;
7844
7845 const auto input_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
7846 const auto weight = at::rand(weight_shape, at::device(at::kCPU).dtype(at::kFloat));
7847 const auto bias = at::rand(bias_shape, at::device(at::kCPU).dtype(at::kFloat));
7848
7849 const auto out_cpu = at::linear(input_cpu, weight, bias);
7850
7851 auto prepack = callOpByName(
7852 "vulkan_prepack::create_linear_context",
7853 "",
7854 weight.t(), bias);
7855
7856 auto vulkan_output = callOpByName(
7857 "vulkan_prepack::run_linear_context",
7858 "",
7859 input_cpu.vulkan(), prepack[0]);
7860
7861 auto out_vulkan = vulkan_output[0].toTensor();
7862
7863 const auto check = almostEqual(out_cpu, out_vulkan.cpu());
7864 if (!check) {
7865 showRtol(out_cpu, out_vulkan.cpu());
7866 }
7867
7868 ASSERT_TRUE(check);
7869 }
7870
TEST_F(VulkanAPITest,linear_1d_small)7871 TEST_F(VulkanAPITest, linear_1d_small) {
7872 test_linear({3}, {4, 3}, {4});
7873 }
7874
TEST_F(VulkanAPITest,linear_1d_large)7875 TEST_F(VulkanAPITest, linear_1d_large) {
7876 test_linear({37}, {23, 37}, {23});
7877 }
7878
TEST_F(VulkanAPITest,linear_2d_flat)7879 TEST_F(VulkanAPITest, linear_2d_flat) {
7880 test_linear({1, 37}, {41, 37}, {41});
7881 }
7882
TEST_F(VulkanAPITest,linear_2d_small)7883 TEST_F(VulkanAPITest, linear_2d_small) {
7884 test_linear({2, 3}, {4, 3}, {4});
7885 }
7886
TEST_F(VulkanAPITest,linear_2d_large)7887 TEST_F(VulkanAPITest, linear_2d_large) {
7888 test_linear({49, 37}, {23, 37}, {23});
7889 }
7890
TEST_F(VulkanAPITest,linear_3d_flat)7891 TEST_F(VulkanAPITest, linear_3d_flat) {
7892 test_linear({1, 1, 37}, {41, 37}, {41});
7893 }
7894
TEST_F(VulkanAPITest,linear_3d_small)7895 TEST_F(VulkanAPITest, linear_3d_small) {
7896 test_linear({2, 3, 4}, {5, 4}, {5});
7897 }
7898
TEST_F(VulkanAPITest,linear_3d_large)7899 TEST_F(VulkanAPITest, linear_3d_large) {
7900 test_linear({23, 17, 41}, {15, 41}, {15});
7901 }
7902
TEST_F(VulkanAPITest,linear_4d_flat)7903 TEST_F(VulkanAPITest, linear_4d_flat) {
7904 test_linear({1, 1, 1, 37}, {41, 37}, {41});
7905 }
7906
TEST_F(VulkanAPITest,linear_4d_small)7907 TEST_F(VulkanAPITest, linear_4d_small) {
7908 test_linear({2, 3, 4, 5}, {6, 5}, {6});
7909 }
7910
TEST_F(VulkanAPITest,linear_4d_large)7911 TEST_F(VulkanAPITest, linear_4d_large) {
7912 test_linear({9, 13, 11, 17}, {23, 17}, {23});
7913 }
7914
TEST_F(VulkanAPITest,lstm_success)7915 TEST_F(VulkanAPITest, lstm_success) {
7916 // Arrange
7917 const int input_size = 5;
7918 const int hidden_size = 7;
7919 const int num_layers = 4;
7920 const int L = 1;
7921 const int N = 1;
7922 const double lstm_dropout = .0;
7923 const bool has_biases = true;
7924 const bool train = false;
7925 const bool bidirectional = false;
7926 const bool batch_first = true;
7927 const auto in_cpu = at::rand({N, L, input_size}, at::device(at::kCPU).dtype(at::kFloat));
7928 const auto h0_cpu = at::rand({num_layers, N, hidden_size}, at::device(at::kCPU).dtype(at::kFloat));
7929 const auto c0_cpu = at::rand({num_layers, N, hidden_size}, at::device(at::kCPU).dtype(at::kFloat));
7930
7931 c10::List<at::Tensor> weight_ih_l; // shape (4 * hidden_size, input_size)
7932 c10::List<at::Tensor> weight_hh_l; // shape (4 * hidden_size, hidden_size)
7933 c10::List<at::Tensor> bias_ih_l; // shape (4 * hidden_size)
7934 c10::List<at::Tensor> bias_hh_l; // shape (4 * hidden_size)
7935 for (int l = 0; l < num_layers; ++l) {
7936 if (l == 0) {
7937 weight_ih_l.emplace_back(at::rand({4 * hidden_size, input_size}, at::device(at::kCPU).dtype(at::kFloat)));
7938 } else {
7939 weight_ih_l.emplace_back(at::rand({4 * hidden_size, hidden_size}, at::device(at::kCPU).dtype(at::kFloat)));
7940 }
7941 weight_hh_l.emplace_back(at::rand({4 * hidden_size, hidden_size}, at::device(at::kCPU).dtype(at::kFloat)));
7942 bias_ih_l.emplace_back(at::rand({4 * hidden_size}, at::device(at::kCPU).dtype(at::kFloat)));
7943 bias_hh_l.emplace_back(at::rand({4 * hidden_size}, at::device(at::kCPU).dtype(at::kFloat)));
7944 }
7945
7946 // put this guard here to run inference inststead of training
7947 // to avoid the following error:
7948 // C++ exception with description "0INTERNAL ASSERT FAILED at "xplat/caffe2/aten/src/ATen/core/boxing/KernelFunction.cpp":31, please report a bug to PyTorch. aten::gru.input has kernels registered to both CompositeImplicitAutograd and a backend mapped to AutogradOther. This makes the backend kernel unreachable; the dispatcher will always prefer the CompositeImplicitAutograd lowering (see Note [Ambiguity in AutogradOther kernel]). If you want to override CompositeImplicitAutograd, please open an issue to request a dedicated Autograd dispatch key for the backend.
7949 // If you only want to run inference instead of training, add `c10::InferenceMode mode;` before model.forward(). Note this guard is only available in C++ but not Python at present.
7950 c10::InferenceMode mode;
7951
7952 // Act
7953 const auto out_cpu = at::lstm(in_cpu, {h0_cpu, c0_cpu},
7954 { weight_ih_l[0], weight_hh_l[0], bias_ih_l[0], bias_hh_l[0],
7955 weight_ih_l[1], weight_hh_l[1], bias_ih_l[1], bias_hh_l[1],
7956 weight_ih_l[2], weight_hh_l[2], bias_ih_l[2], bias_hh_l[2],
7957 weight_ih_l[3], weight_hh_l[3], bias_ih_l[3], bias_hh_l[3] },
7958 has_biases, num_layers, lstm_dropout, train, bidirectional, batch_first);
7959
7960 // weights/biases should be always on CPU.
7961 const auto out_vulkan = at::lstm(in_cpu.vulkan(), {h0_cpu.vulkan(), c0_cpu.vulkan()},
7962 { weight_ih_l.get(0), weight_hh_l.get(0), bias_ih_l.get(0), bias_hh_l.get(0),
7963 weight_ih_l.get(1), weight_hh_l.get(1), bias_ih_l.get(1), bias_hh_l.get(1),
7964 weight_ih_l.get(2), weight_hh_l.get(2), bias_ih_l.get(2), bias_hh_l.get(2),
7965 weight_ih_l.get(3), weight_hh_l.get(3), bias_ih_l.get(3), bias_hh_l.get(3) },
7966 has_biases, num_layers, lstm_dropout, train, bidirectional, batch_first);
7967
7968 auto cpu_output = std::get<0>(out_cpu);
7969 auto cpu_hidden = std::get<1>(out_cpu);
7970 auto cpu_cell = std::get<2>(out_cpu);
7971 auto vulkan_output = std::get<0>(out_vulkan);
7972 auto vulkan_hidden = std::get<1>(out_vulkan);
7973 auto vulkan_cell = std::get<2>(out_vulkan);
7974
7975 // Assert
7976 const auto check_output = almostEqual(cpu_output, vulkan_output.cpu());
7977 if (!check_output) {
7978 showRtol(cpu_output, vulkan_output.cpu());
7979 }
7980 ASSERT_TRUE(check_output);
7981
7982 const auto check_hidden = almostEqual(cpu_hidden, vulkan_hidden.cpu());
7983 if (!check_hidden) {
7984 showRtol(cpu_hidden, vulkan_hidden.cpu());
7985 }
7986 ASSERT_TRUE(check_hidden);
7987
7988 const auto check_cell = almostEqual(cpu_cell, vulkan_cell.cpu());
7989 if (!check_cell) {
7990 showRtol(cpu_cell, vulkan_cell.cpu());
7991 }
7992 ASSERT_TRUE(check_cell);
7993 }
7994
TEST_F(VulkanAPITest,lstm_mclareninputs_success)7995 TEST_F(VulkanAPITest, lstm_mclareninputs_success) {
7996 // Arrange
7997 const int input_size = 384;
7998 const int hidden_size = 384;
7999 const int num_layers = 2;
8000 const int L = 1;
8001 const int N = 1;
8002 const double lstm_dropout = .0;
8003 const bool has_biases = true;
8004 const bool train = false;
8005 const bool bidirectional = false;
8006 const bool batch_first = true;
8007 const auto in_cpu = at::rand({N, L, input_size}, at::device(at::kCPU).dtype(at::kFloat));
8008 const auto h0_cpu = at::rand({num_layers, N, hidden_size}, at::device(at::kCPU).dtype(at::kFloat));
8009 const auto c0_cpu = at::rand({num_layers, N, hidden_size}, at::device(at::kCPU).dtype(at::kFloat));
8010
8011 c10::List<at::Tensor> weight_ih_l; // shape (4 * hidden_size, input_size)
8012 c10::List<at::Tensor> weight_hh_l; // shape (4 * hidden_size, hidden_size)
8013 c10::List<at::Tensor> bias_ih_l; // shape (4 * hidden_size)
8014 c10::List<at::Tensor> bias_hh_l; // shape (4 * hidden_size)
8015 for (int l = 0; l < num_layers; ++l) {
8016 if (l == 0) {
8017 weight_ih_l.emplace_back(at::rand({4 * hidden_size, input_size}, at::device(at::kCPU).dtype(at::kFloat)));
8018 } else {
8019 weight_ih_l.emplace_back(at::rand({4 * hidden_size, hidden_size}, at::device(at::kCPU).dtype(at::kFloat)));
8020 }
8021 weight_hh_l.emplace_back(at::rand({4 * hidden_size, hidden_size}, at::device(at::kCPU).dtype(at::kFloat)));
8022 bias_ih_l.emplace_back(at::rand({4 * hidden_size}, at::device(at::kCPU).dtype(at::kFloat)));
8023 bias_hh_l.emplace_back(at::rand({4 * hidden_size}, at::device(at::kCPU).dtype(at::kFloat)));
8024 }
8025
8026 // put this guard here to run inference inststead of training
8027 // to avoid the following error:
8028 // C++ exception with description "0INTERNAL ASSERT FAILED at "xplat/caffe2/aten/src/ATen/core/boxing/KernelFunction.cpp":31, please report a bug to PyTorch. aten::gru.input has kernels registered to both CompositeImplicitAutograd and a backend mapped to AutogradOther. This makes the backend kernel unreachable; the dispatcher will always prefer the CompositeImplicitAutograd lowering (see Note [Ambiguity in AutogradOther kernel]). If you want to override CompositeImplicitAutograd, please open an issue to request a dedicated Autograd dispatch key for the backend.
8029 // If you only want to run inference instead of training, add `c10::InferenceMode mode;` before model.forward(). Note this guard is only available in C++ but not Python at present.
8030 c10::InferenceMode mode;
8031
8032 // Act
8033 const auto out_cpu = at::lstm(in_cpu, {h0_cpu, c0_cpu},
8034 { weight_ih_l[0], weight_hh_l[0], bias_ih_l[0], bias_hh_l[0],
8035 weight_ih_l[1], weight_hh_l[1], bias_ih_l[1], bias_hh_l[1] },
8036 has_biases, num_layers, lstm_dropout, train, bidirectional, batch_first);
8037
8038 // weights/biases should be always on CPU.
8039 const auto out_vulkan = at::lstm(in_cpu.vulkan(), {h0_cpu.vulkan(), c0_cpu.vulkan()},
8040 { weight_ih_l.get(0), weight_hh_l.get(0), bias_ih_l.get(0), bias_hh_l.get(0),
8041 weight_ih_l.get(1), weight_hh_l.get(1), bias_ih_l.get(1), bias_hh_l.get(1) },
8042 has_biases, num_layers, lstm_dropout, train, bidirectional, batch_first);
8043
8044 auto cpu_output = std::get<0>(out_cpu);
8045 auto cpu_hidden = std::get<1>(out_cpu);
8046 auto cpu_cell = std::get<2>(out_cpu);
8047 auto vulkan_output = std::get<0>(out_vulkan);
8048 auto vulkan_hidden = std::get<1>(out_vulkan);
8049 auto vulkan_cell = std::get<2>(out_vulkan);
8050
8051 // Assert
8052 const auto check_output = almostEqual(cpu_output, vulkan_output.cpu());
8053 if (!check_output) {
8054 showRtol(cpu_output, vulkan_output.cpu());
8055 }
8056 ASSERT_TRUE(check_output);
8057
8058 const auto check_hidden = almostEqual(cpu_hidden, vulkan_hidden.cpu());
8059 if (!check_hidden) {
8060 showRtol(cpu_hidden, vulkan_hidden.cpu());
8061 }
8062 ASSERT_TRUE(check_hidden);
8063
8064 const auto check_cell = almostEqual(cpu_cell, vulkan_cell.cpu());
8065 if (!check_cell) {
8066 showRtol(cpu_cell, vulkan_cell.cpu());
8067 }
8068 ASSERT_TRUE(check_cell);
8069 }
8070
TEST_F(VulkanAPITest,lstm_prepack_success)8071 TEST_F(VulkanAPITest, lstm_prepack_success) {
8072 // Arrange
8073 const int input_size = 81;
8074 const int hidden_size = 10;
8075 const int num_layers = 2;
8076 const int L = 1;
8077 const int N = 1;
8078 const double lstm_dropout = .0;
8079 const bool has_biases = true;
8080 const bool train = false;
8081 const bool bidirectional = false;
8082 const bool batch_first = true;
8083 const auto in_cpu = at::rand({N, L, input_size}, at::device(at::kCPU).dtype(at::kFloat));
8084 const auto h0_cpu = at::rand({num_layers, N, hidden_size}, at::device(at::kCPU).dtype(at::kFloat));
8085 const auto c0_cpu = at::rand({num_layers, N, hidden_size}, at::device(at::kCPU).dtype(at::kFloat));
8086
8087 c10::List<at::Tensor> weight_ih_l; // shape (4 * hidden_size, l == 0 ? input_size : hidden_size)
8088 c10::List<at::Tensor> weight_hh_l; // shape (4 * hidden_size, hidden_size)
8089 c10::List<at::Tensor> bias_ih_l; // shape (4 * hidden_size)
8090 c10::List<at::Tensor> bias_hh_l; // shape (4 * hidden_size)
8091 for (int l = 0; l < num_layers; ++l) {
8092 if (l == 0) {
8093 weight_ih_l.emplace_back(at::rand({4 * hidden_size, input_size}, at::device(at::kCPU).dtype(at::kFloat)));
8094 } else {
8095 weight_ih_l.emplace_back(at::rand({4 * hidden_size, hidden_size}, at::device(at::kCPU).dtype(at::kFloat)));
8096 }
8097 weight_hh_l.emplace_back(at::rand({4 * hidden_size, hidden_size}, at::device(at::kCPU).dtype(at::kFloat)));
8098 bias_ih_l.emplace_back(at::rand({4 * hidden_size}, at::device(at::kCPU).dtype(at::kFloat)));
8099 bias_hh_l.emplace_back(at::rand({4 * hidden_size}, at::device(at::kCPU).dtype(at::kFloat)));
8100 }
8101
8102 // put this guard here to run inference inststead of training
8103 // to avoid the following error:
8104 // C++ exception with description "0INTERNAL ASSERT FAILED at "xplat/caffe2/aten/src/ATen/core/boxing/KernelFunction.cpp":31, please report a bug to PyTorch. aten::gru.input has kernels registered to both CompositeImplicitAutograd and a backend mapped to AutogradOther. This makes the backend kernel unreachable; the dispatcher will always prefer the CompositeImplicitAutograd lowering (see Note [Ambiguity in AutogradOther kernel]). If you want to override CompositeImplicitAutograd, please open an issue to request a dedicated Autograd dispatch key for the backend.
8105 // If you only want to run inference instead of training, add `c10::InferenceMode mode;` before model.forward(). Note this guard is only available in C++ but not Python at present.
8106 c10::InferenceMode mode;
8107
8108 // Act
8109 const auto out_cpu = at::lstm(in_cpu, {h0_cpu, c0_cpu},
8110 { weight_ih_l[0], weight_hh_l[0], bias_ih_l[0], bias_hh_l[0],
8111 weight_ih_l[1], weight_hh_l[1], bias_ih_l[1], bias_hh_l[1] },
8112 has_biases, num_layers, lstm_dropout, train, bidirectional, batch_first);
8113
8114 auto prepack = callOpByName(
8115 "vulkan_prepack::create_lstm_context",
8116 "",
8117 std::vector<at::Tensor>({ weight_ih_l.get(0), weight_hh_l.get(0), bias_ih_l.get(0), bias_hh_l.get(0),
8118 weight_ih_l.get(1), weight_hh_l.get(1), bias_ih_l.get(1), bias_hh_l.get(1) }),
8119 has_biases, num_layers, lstm_dropout, train, bidirectional, batch_first);
8120
8121 auto out_vulkan = callOpByName(
8122 "vulkan_prepack::run_lstm_context",
8123 "",
8124 in_cpu.vulkan(), h0_cpu.vulkan(), c0_cpu.vulkan(), prepack[0]);
8125
8126 auto cpu_output = std::get<0>(out_cpu);
8127 auto cpu_hidden = std::get<1>(out_cpu);
8128 auto cpu_cell = std::get<2>(out_cpu);
8129 auto vulkan_output = out_vulkan[0].toTensor();
8130 auto vulkan_hidden = out_vulkan[1].toTensor();
8131 auto vulkan_cell = out_vulkan[2].toTensor();
8132
8133 // Assert
8134 const auto check_output = almostEqual(cpu_output, vulkan_output.cpu());
8135 if (!check_output) {
8136 showRtol(cpu_output, vulkan_output.cpu());
8137 }
8138 ASSERT_TRUE(check_output);
8139
8140 const auto check_hidden = almostEqual(cpu_hidden, vulkan_hidden.cpu());
8141 if (!check_hidden) {
8142 showRtol(cpu_hidden, vulkan_hidden.cpu());
8143 }
8144 ASSERT_TRUE(check_hidden);
8145
8146 const auto check_cell = almostEqual(cpu_cell, vulkan_cell.cpu());
8147 if (!check_cell) {
8148 showRtol(cpu_cell, vulkan_cell.cpu());
8149 }
8150 ASSERT_TRUE(check_cell);
8151 }
8152
TEST_F(VulkanAPITest,querypool_flushed_shader_log)8153 TEST_F(VulkanAPITest, querypool_flushed_shader_log) {
8154 #if defined(USE_VULKAN_GPU_DIAGNOSTICS) && defined(__ANDROID__)
8155 const bool op_profiling_enabled_initially =
8156 at::native::vulkan::api::context()->op_profiling_enabled();
8157
8158 at::native::vulkan::api::context()->enable_op_profiling();
8159
8160 const at::Tensor a_add_cpu =
8161 at::rand({11, 7, 139, 109}, at::device(at::kCPU).dtype(at::kFloat));
8162 const at::Tensor a_add_vulkan = a_add_cpu.vulkan();
8163
8164 const at::Tensor b_add_cpu =
8165 at::rand({11, 7, 139, 109}, at::device(at::kCPU).dtype(at::kFloat));
8166 const at::Tensor b_add_vulkan = b_add_cpu.vulkan();
8167
8168 at::add(a_add_vulkan, b_add_vulkan, 2.1f).cpu();
8169
8170 at::native::vulkan::api::context()->querypool().extract_results();
8171 at::native::vulkan::api::context()->reset_querypool();
8172
8173 const at::Tensor a_sub_cpu =
8174 at::rand({11, 7, 139, 109}, at::device(at::kCPU).dtype(at::kFloat));
8175 const at::Tensor a_sub_vulkan = a_sub_cpu.vulkan();
8176
8177 const at::Tensor b_sub_cpu =
8178 at::rand({11, 7, 139, 109}, at::device(at::kCPU).dtype(at::kFloat));
8179 const at::Tensor b_sub_vulkan = b_sub_cpu.vulkan();
8180
8181 at::sub(a_sub_vulkan, b_sub_vulkan, 2.1f).cpu();
8182
8183 at::native::vulkan::api::context()->querypool().extract_results();
8184 at::native::vulkan::api::context()->reset_querypool();
8185
8186 const at::Tensor a_mul_cpu =
8187 at::rand({11, 7, 139, 109}, at::device(at::kCPU).dtype(at::kFloat));
8188 const at::Tensor a_mul_vulkan = a_mul_cpu.vulkan();
8189
8190 const at::Tensor b_mul_cpu =
8191 at::rand({11, 7, 139, 109}, at::device(at::kCPU).dtype(at::kFloat));
8192 const at::Tensor b_mul_vulkan = b_mul_cpu.vulkan();
8193
8194 at::mul(a_mul_vulkan, b_mul_vulkan).cpu();
8195
8196 /*
8197 The most recent shaders should be
8198 (-12) vulkan.nchw_to_image
8199 (-11) vulkan.nchw_to_image
8200 (-10) vulkan.add
8201 (-9) vulkan.image_to_nchw
8202
8203 (-8) vulkan.nchw_to_image
8204 (-7) vulkan.nchw_to_image
8205 (-6) vulkan.sub
8206 (-5) vulkan.image_to_nchw
8207
8208 (-4) vulkan.nchw_to_image
8209 (-3) vulkan.nchw_to_image
8210 (-2) vulkan.mul
8211 (-1) vulkan.image_to_nchw
8212 */
8213
8214 const size_t entry_count =
8215 at::native::vulkan::api::context()->querypool().shader_logs_entry_count();
8216
8217 std::tuple<std::string, uint64_t> add_shader_details =
8218 at::native::vulkan::api::context()
8219 ->querypool()
8220 .get_shader_name_and_execution_duration_ns(entry_count - 10);
8221 std::tuple<std::string, uint64_t> sub_shader_details =
8222 at::native::vulkan::api::context()
8223 ->querypool()
8224 .get_shader_name_and_execution_duration_ns(entry_count - 6);
8225 std::tuple<std::string, uint64_t> mul_shader_details =
8226 at::native::vulkan::api::context()
8227 ->querypool()
8228 .get_shader_name_and_execution_duration_ns(entry_count - 2);
8229
8230 EXPECT_EQ(std::get<0>(add_shader_details), "vulkan.add");
8231 EXPECT_EQ(std::get<0>(sub_shader_details), "vulkan.sub");
8232 EXPECT_EQ(std::get<0>(mul_shader_details), "vulkan.mul");
8233
8234 if (!op_profiling_enabled_initially) {
8235 at::native::vulkan::api::context()->reset_querypool();
8236 at::native::vulkan::api::context()->disable_op_profiling();
8237 }
8238 #else
8239 GTEST_SKIP() << "QueryPool is not available";
8240 #endif
8241 }
8242
8243 } // namespace
8244
8245 #endif /* USE_VULKAN_API */
8246