xref: /aosp_15_r20/external/pytorch/aten/src/ATen/native/vulkan/ops/BinaryOp.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #ifdef USE_VULKAN_API
2 #include <ATen/ArrayRef.h>
3 #include <ATen/native/vulkan/ops/Common.h>
4 #include <ATen/native/vulkan/ops/QuantizedFunctions.h>
5 #include <ATen/native/vulkan/ops/Utils.h>
6 #include <torch/library.h>
7 
8 namespace at {
9 namespace native {
10 namespace vulkan {
11 namespace ops {
12 
13 using namespace api::utils;
14 
binary_op_scalar(const Tensor & self_arg,const Scalar & other,const std::optional<Scalar> & alpha_arg,const api::ShaderInfo & shader_descriptor)15 static Tensor binary_op_scalar(
16     const Tensor& self_arg,
17     const Scalar& other,
18     const std::optional<Scalar>& alpha_arg,
19     const api::ShaderInfo& shader_descriptor) {
20   api::Context* const context = api::context();
21 
22   const Tensor self = self_arg.is_vulkan() ? self_arg : self_arg.vulkan();
23   const vTensor& v_self = convert(self);
24 
25   vTensor v_output{
26       context,
27       v_self.sizes(),
28       v_self.dtype(),
29   };
30 
31   const float other_val = alpha_arg ? other.to<float>() * alpha_arg->to<float>()
32                                     : other.to<float>();
33   const struct Block final {
34     uvec3 extents;
35     int fill0;
36     float other;
37   } block{
38       v_self.extents(),
39       0,
40       other_val,
41   };
42 
43   api::UniformParamsBuffer params(context, block);
44   api::PipelineBarrier pipeline_barrier{};
45 
46   context->submit_compute_job(
47       // shader descriptor
48       shader_descriptor,
49       // pipeline barrier
50       pipeline_barrier,
51       // global work group size
52       v_output.extents(),
53       // local work group size
54       adaptive_work_group_size(v_output.extents()),
55       // fence handle
56       VK_NULL_HANDLE,
57       // shader arguments
58       v_output.image(
59           pipeline_barrier,
60           api::PipelineStage::COMPUTE,
61           api::MemoryAccessType::WRITE),
62       v_self.image(pipeline_barrier, api::PipelineStage::COMPUTE),
63       // params buffer
64       params.buffer());
65 
66   return convert(v_output);
67 }
68 
binary_op_preprocess_other_arg(const Tensor & other_arg)69 static Tensor binary_op_preprocess_other_arg(const Tensor& other_arg) {
70   // Similar to binary_op_scalar where tensors is mapped to float, we
71   // also map known integer types (but not quant types) tensor to float.
72 
73   // Such conversion can only to be done before moving to vulkan, since vulkan
74   // doesn't yet support integer types.
75   Tensor other = other_arg;
76   if (!other.is_vulkan()) {
77     switch (other.scalar_type()) {
78       case at::kByte:
79       case at::kChar:
80       case at::kShort:
81       case at::kInt:
82       case at::kLong:
83       case at::kDouble:
84         other = other.to(kFloat);
85         break;
86       case at::kFloat:
87         // No op for expected type.
88         break;
89       default:
90         TORCH_CHECK(
91             false,
92             "binary_op_tensor, doesn't support type %s",
93             other.scalar_type());
94         break;
95     }
96     other = other.vulkan();
97   }
98 
99   return other;
100 }
101 
binary_op_scalar_(Tensor & self_arg,const Scalar & other,const std::optional<Scalar> & alpha_arg,const api::ShaderInfo & shader_descriptor)102 static Tensor& binary_op_scalar_(
103     Tensor& self_arg,
104     const Scalar& other,
105     const std::optional<Scalar>& alpha_arg,
106     const api::ShaderInfo& shader_descriptor) {
107   TORCH_CHECK(
108       self_arg.is_vulkan(),
109       "Vulkan: In-place operator is only supported on Vulkan tensors.");
110 
111   api::Context* const context = api::context();
112 
113   vTensor& v_self = convert(self_arg);
114 
115   const float other_val = alpha_arg ? other.to<float>() * alpha_arg->to<float>()
116                                     : other.to<float>();
117   const struct Block final {
118     uvec3 extents;
119     int fill0;
120     float other;
121   } block{
122       v_self.extents(),
123       0,
124       other_val,
125   };
126 
127   api::UniformParamsBuffer params(context, block);
128   api::PipelineBarrier pipeline_barrier{};
129 
130   context->submit_compute_job(
131       // shader descriptor
132       shader_descriptor,
133       // pipeline barrier
134       pipeline_barrier,
135       // global work group size
136       v_self.extents(),
137       // local work group size
138       adaptive_work_group_size(v_self.extents()),
139       // fence handle
140       VK_NULL_HANDLE,
141       // shader arguments
142       v_self.image(
143           pipeline_barrier,
144           api::PipelineStage::COMPUTE,
145           api::MemoryAccessType::READ | api::MemoryAccessType::WRITE),
146       // params buffer
147       params.buffer());
148 
149   return self_arg;
150 }
151 
binary_op_tensor(const Tensor & self_arg,const Tensor & other_arg,const std::optional<Scalar> & alpha_arg,const api::ShaderInfo & shader_descriptor)152 static Tensor binary_op_tensor(
153     const Tensor& self_arg,
154     const Tensor& other_arg,
155     const std::optional<Scalar>& alpha_arg,
156     const api::ShaderInfo& shader_descriptor) {
157   utils::is_broadcastable(self_arg, other_arg);
158   api::Context* const context = api::context();
159 
160   const Tensor self = self_arg.is_vulkan() ? self_arg : self_arg.vulkan();
161   const vTensor& v_self = convert(self);
162 
163   Tensor other = binary_op_preprocess_other_arg(other_arg);
164 
165   const vTensor& v_other = convert(other);
166 
167   vTensor v_output{
168       context,
169       utils::broadcast_size(self_arg, other_arg),
170       v_self.dtype(),
171   };
172 
173   const double alpha = alpha_arg ? alpha_arg->to<double>() : 1.0;
174   const struct Block final {
175     uvec4 output_tensor_size;
176     uvec4 input_tensor_size;
177     uvec4 other_tensor_size;
178     float alpha;
179   } block{
180       {get_dim<Dim4D::Width>(v_output),
181        get_dim<Dim4D::Height>(v_output),
182        get_dim<Dim4D::Channel>(v_output),
183        get_dim<Dim4D::Batch>(v_output)},
184 
185       {get_dim<Dim4D::Width>(v_self),
186        get_dim<Dim4D::Height>(v_self),
187        get_dim<Dim4D::Channel>(v_self),
188        get_dim<Dim4D::Batch>(v_self)},
189 
190       {get_dim<Dim4D::Width>(v_other),
191        get_dim<Dim4D::Height>(v_other),
192        get_dim<Dim4D::Channel>(v_other),
193        get_dim<Dim4D::Batch>(v_other)},
194       // alpha
195       safe_downcast<float>(alpha),
196   };
197 
198   api::UniformParamsBuffer params(context, block);
199   api::PipelineBarrier pipeline_barrier{};
200 
201   context->submit_compute_job(
202       // shader descriptor
203       shader_descriptor,
204       // pipeline barrier
205       pipeline_barrier,
206       // global work group size
207       v_output.extents(),
208       // local work group size
209       adaptive_work_group_size(v_output.extents()),
210       // fence handle
211       VK_NULL_HANDLE,
212       // shader arguments
213       v_output.image(
214           pipeline_barrier,
215           api::PipelineStage::COMPUTE,
216           api::MemoryAccessType::WRITE),
217       v_self.image(pipeline_barrier, api::PipelineStage::COMPUTE),
218       v_other.image(pipeline_barrier, api::PipelineStage::COMPUTE),
219       // params buffer
220       params.buffer());
221 
222   return convert(v_output);
223 }
224 
quantized_binary_op_tensor(const Tensor & self_arg,const Tensor & other_arg,const double scale,const int64_t zero_point,const api::ShaderInfo & shader_descriptor)225 static Tensor quantized_binary_op_tensor(
226     const Tensor& self_arg,
227     const Tensor& other_arg,
228     const double scale,
229     const int64_t zero_point,
230     const api::ShaderInfo& shader_descriptor) {
231   utils::is_broadcastable(self_arg, other_arg);
232   api::Context* const context = api::context();
233 
234   const Tensor self = self_arg.is_vulkan() ? self_arg : self_arg.vulkan();
235   const vTensor& v_self = convert(self);
236   const Tensor other = other_arg.is_vulkan() ? other_arg : other_arg.vulkan();
237   const vTensor& v_other = convert(other);
238 
239   TORCH_CHECK(v_self.is_quantized(), "Input tensor is not quantized");
240   TORCH_CHECK(v_other.is_quantized(), "Input tensor is not quantized");
241 
242   vTensor v_output{
243       context,
244       utils::broadcast_size(self_arg, other_arg),
245       scale,
246       zero_point,
247       api::kQUInt8,
248   };
249 
250   const double scale1 = v_self.get_scale();
251   const double scale2 = v_other.get_scale();
252   const int64_t zero_point1 = v_self.get_zero_point();
253   const int64_t zero_point2 = v_other.get_zero_point();
254   const struct Block final {
255     uvec3 extents;
256     uint32_t channelSize;
257     uvec3 input1Extents;
258     uint32_t channelBatchSize1;
259     uvec3 input2Extents;
260     uint32_t channelBatchSize2;
261     float scale1;
262     float scale2;
263     int32_t zeroPoint1;
264     int32_t zeroPoint2;
265     float scale;
266     float fill1;
267     int32_t zeroPoint;
268     int32_t fill2;
269   } block{
270       v_output.extents(),
271       get_dim<Dim4D::Channel>(v_output),
272       v_self.extents(),
273       get_dim<Dim4D::Channel>(self) * get_dim<Dim4D::Batch>(self),
274       v_other.extents(),
275       get_dim<Dim4D::Channel>(other) * get_dim<Dim4D::Batch>(other),
276       safe_downcast<float>(scale1),
277       safe_downcast<float>(scale2),
278       safe_downcast<int32_t>(zero_point1),
279       safe_downcast<int32_t>(zero_point2),
280       safe_downcast<float>(scale),
281       0.0f,
282       safe_downcast<int32_t>(zero_point),
283       0u,
284   };
285 
286   api::UniformParamsBuffer params(context, block);
287   api::PipelineBarrier pipeline_barrier{};
288 
289   context->submit_compute_job(
290       // shader descriptor
291       shader_descriptor,
292       // pipeline barrier
293       pipeline_barrier,
294       // global work group size
295       v_output.extents(),
296       // local work group size
297       adaptive_work_group_size(v_output.extents()),
298       // fence handle
299       VK_NULL_HANDLE,
300       // shader arguments
301       v_output.image(
302           pipeline_barrier,
303           api::PipelineStage::COMPUTE,
304           api::MemoryAccessType::WRITE),
305       v_self.image(pipeline_barrier, api::PipelineStage::COMPUTE),
306       v_other.image(pipeline_barrier, api::PipelineStage::COMPUTE),
307       // params buffer
308       params.buffer());
309 
310   return convert_quantized(v_output);
311 }
312 
binary_op_tensor_(Tensor & self_arg,const Tensor & other_arg,const std::optional<Scalar> & alpha_arg,const api::ShaderInfo & shader_descriptor)313 static Tensor& binary_op_tensor_(
314     Tensor& self_arg,
315     const Tensor& other_arg,
316     const std::optional<Scalar>& alpha_arg,
317     const api::ShaderInfo& shader_descriptor) {
318   TORCH_CHECK(
319       get_dim<Dim4D::Batch>(self_arg) >= get_dim<Dim4D::Batch>(other_arg) &&
320           get_dim<Dim4D::Channel>(self_arg) >=
321               get_dim<Dim4D::Channel>(other_arg) &&
322           get_dim<Dim4D::Height>(self_arg) >=
323               get_dim<Dim4D::Height>(other_arg) &&
324           get_dim<Dim4D::Width>(self_arg) >= get_dim<Dim4D::Width>(other_arg),
325       "Dimensions of input tensor to Vulkan in-place binary elementwise op "
326       "must be less than or equal the dimensions of the underlying tensor.");
327 
328   utils::is_broadcastable(self_arg, other_arg);
329 
330   TORCH_CHECK(
331       self_arg.is_vulkan(),
332       "Vulkan: In-place operator is only supported on Vulkan tensors.");
333 
334   api::Context* const context = api::context();
335 
336   vTensor& v_self = convert(self_arg);
337 
338   Tensor other = binary_op_preprocess_other_arg(other_arg);
339 
340   const vTensor& v_other = convert(other);
341 
342   const double alpha = alpha_arg ? alpha_arg->to<double>() : 1.0;
343   const struct Block final {
344     uvec4 input_tensor_size;
345     uvec4 other_tensor_size;
346     float alpha;
347   } block{
348       {get_dim<Dim4D::Width>(v_self),
349        get_dim<Dim4D::Height>(v_self),
350        get_dim<Dim4D::Channel>(v_self),
351        get_dim<Dim4D::Batch>(v_self)},
352 
353       {get_dim<Dim4D::Width>(v_other),
354        get_dim<Dim4D::Height>(v_other),
355        get_dim<Dim4D::Channel>(v_other),
356        get_dim<Dim4D::Batch>(v_other)},
357       // alpha
358       safe_downcast<float>(alpha),
359   };
360 
361   api::UniformParamsBuffer params(context, block);
362   api::PipelineBarrier pipeline_barrier{};
363 
364   context->submit_compute_job(
365       // shader descriptor
366       shader_descriptor,
367       // pipeline barrier
368       pipeline_barrier,
369       // global work group size
370       v_self.extents(),
371       // local work group size
372       adaptive_work_group_size(v_self.extents()),
373       // fence handle
374       VK_NULL_HANDLE,
375       // shader arguments
376       v_self.image(
377           pipeline_barrier,
378           api::PipelineStage::COMPUTE,
379           api::MemoryAccessType::READ | api::MemoryAccessType::WRITE),
380       v_other.image(pipeline_barrier, api::PipelineStage::COMPUTE),
381       // params buffer
382       params.buffer());
383 
384   return self_arg;
385 }
386 
add_scalar(const Tensor & self_arg,const Scalar & other,const Scalar & alpha)387 static Tensor add_scalar(
388     const Tensor& self_arg,
389     const Scalar& other,
390     const Scalar& alpha) {
391   return binary_op_scalar(
392       self_arg, other, std::optional<Scalar>(alpha), VK_KERNEL(add_scalar));
393 }
394 
add_scalar_(Tensor & self,const Scalar & other,const Scalar & alpha)395 static Tensor& add_scalar_(
396     Tensor& self,
397     const Scalar& other,
398     const Scalar& alpha) {
399   return binary_op_scalar_(
400       self, other, std::optional<Scalar>(alpha), VK_KERNEL(add_scalar_inplace));
401 }
402 
quantized_add(const Tensor & self_arg,const Tensor & other_arg,const double scale,const int64_t zero_point)403 Tensor quantized_add(
404     const Tensor& self_arg,
405     const Tensor& other_arg,
406     const double scale,
407     const int64_t zero_point) {
408   return quantized_binary_op_tensor(
409       self_arg, other_arg, scale, zero_point, VK_KERNEL(quantized_add));
410 }
411 
quantized_sub(const Tensor & self_arg,const Tensor & other_arg,const double scale,const int64_t zero_point)412 Tensor quantized_sub(
413     const Tensor& self_arg,
414     const Tensor& other_arg,
415     const double scale,
416     const int64_t zero_point) {
417   return quantized_binary_op_tensor(
418       self_arg, other_arg, scale, zero_point, VK_KERNEL(quantized_sub));
419 }
420 
quantized_mul(const Tensor & self_arg,const Tensor & other_arg,const double scale,const int64_t zero_point)421 Tensor quantized_mul(
422     const Tensor& self_arg,
423     const Tensor& other_arg,
424     const double scale,
425     const int64_t zero_point) {
426   return quantized_binary_op_tensor(
427       self_arg, other_arg, scale, zero_point, VK_KERNEL(quantized_mul));
428 }
429 
quantized_div(const Tensor & self_arg,const Tensor & other_arg,const double scale,const int64_t zero_point)430 Tensor quantized_div(
431     const Tensor& self_arg,
432     const Tensor& other_arg,
433     const double scale,
434     const int64_t zero_point) {
435   return quantized_binary_op_tensor(
436       self_arg, other_arg, scale, zero_point, VK_KERNEL(quantized_div));
437 }
438 
add_tensor(const Tensor & self_arg,const Tensor & other_arg,const Scalar & alpha)439 static Tensor add_tensor(
440     const Tensor& self_arg,
441     const Tensor& other_arg,
442     const Scalar& alpha) {
443   return binary_op_tensor(
444       self_arg, other_arg, std::optional<Scalar>(alpha), VK_KERNEL(add));
445 }
446 
add_tensor_(Tensor & self,const Tensor & other_arg,const Scalar & alpha)447 static Tensor& add_tensor_(
448     Tensor& self,
449     const Tensor& other_arg,
450     const Scalar& alpha) {
451   return binary_op_tensor_(
452       self, other_arg, std::optional<Scalar>(alpha), VK_KERNEL(add_inplace));
453 }
454 
sub_scalar(const Tensor & self_arg,const Scalar & other,const Scalar & alpha)455 static Tensor sub_scalar(
456     const Tensor& self_arg,
457     const Scalar& other,
458     const Scalar& alpha) {
459   return binary_op_scalar(
460       self_arg,
461       other,
462       std::optional<Scalar>(-1 * alpha.to<float>()),
463       VK_KERNEL(add_scalar));
464 }
465 
sub_scalar_(Tensor & self,const Scalar & other,const Scalar & alpha)466 static Tensor& sub_scalar_(
467     Tensor& self,
468     const Scalar& other,
469     const Scalar& alpha) {
470   return binary_op_scalar_(
471       self,
472       other,
473       std::optional<Scalar>(-1 * alpha.to<float>()),
474       VK_KERNEL(add_scalar_inplace));
475 }
476 
sub_tensor(const Tensor & self_arg,const Tensor & other_arg,const Scalar & alpha)477 static Tensor sub_tensor(
478     const Tensor& self_arg,
479     const Tensor& other_arg,
480     const Scalar& alpha) {
481   return binary_op_tensor(
482       self_arg, other_arg, std::optional<Scalar>(alpha), VK_KERNEL(sub));
483 }
484 
sub_tensor_(Tensor & self,const Tensor & other_arg,const Scalar & alpha)485 static Tensor& sub_tensor_(
486     Tensor& self,
487     const Tensor& other_arg,
488     const Scalar& alpha) {
489   return binary_op_tensor_(
490       self, other_arg, std::optional<Scalar>(alpha), VK_KERNEL(sub_inplace));
491 }
492 
mul_scalar(const Tensor & self_arg,const Scalar & other)493 static Tensor mul_scalar(const Tensor& self_arg, const Scalar& other) {
494   return binary_op_scalar(
495       self_arg, other, std::optional<Scalar>(), VK_KERNEL(mul_scalar));
496 }
497 
mul_scalar_(Tensor & self,const Scalar & other)498 static Tensor& mul_scalar_(Tensor& self, const Scalar& other) {
499   return binary_op_scalar_(
500       self, other, std::optional<Scalar>(), VK_KERNEL(mul_scalar_inplace));
501 }
502 
mul_tensor(const Tensor & self_arg,const Tensor & other_arg)503 static Tensor mul_tensor(const Tensor& self_arg, const Tensor& other_arg) {
504   return binary_op_tensor(
505       self_arg, other_arg, std::optional<Scalar>(), VK_KERNEL(mul));
506 }
507 
mul_tensor_(Tensor & self,const Tensor & other_arg)508 static Tensor& mul_tensor_(Tensor& self, const Tensor& other_arg) {
509   return binary_op_tensor_(
510       self, other_arg, std::optional<Scalar>(), VK_KERNEL(mul_inplace));
511 }
512 
div_scalar(const Tensor & self_arg,const Scalar & other)513 static Tensor div_scalar(const Tensor& self_arg, const Scalar& other) {
514   return binary_op_scalar(
515       self_arg,
516       1.0 / other.to<float>(),
517       std::optional<Scalar>(),
518       VK_KERNEL(mul_scalar));
519 }
520 
div_scalar_(Tensor & self,const Scalar & other)521 static Tensor& div_scalar_(Tensor& self, const Scalar& other) {
522   return binary_op_scalar_(
523       self,
524       1.0 / other.to<float>(),
525       std::optional<Scalar>(),
526       VK_KERNEL(mul_scalar_inplace));
527 }
528 
div_tensor(const Tensor & self_arg,const Tensor & other_arg)529 static Tensor div_tensor(const Tensor& self_arg, const Tensor& other_arg) {
530   return binary_op_tensor(
531       self_arg, other_arg, std::optional<Scalar>(), VK_KERNEL(div));
532 }
533 
div_tensor_(Tensor & self,const Tensor & other_arg)534 static Tensor& div_tensor_(Tensor& self, const Tensor& other_arg) {
535   return binary_op_tensor_(
536       self, other_arg, std::optional<Scalar>(), VK_KERNEL(div_inplace));
537 }
538 
pow(const Tensor & self,const Tensor & other)539 static Tensor pow(const Tensor& self, const Tensor& other) {
540   return binary_op_tensor(self, other, std::optional<Scalar>(), VK_KERNEL(pow));
541 }
542 
pow_(Tensor & self,const Tensor & other)543 static Tensor& pow_(Tensor& self, const Tensor& other) {
544   return binary_op_tensor_(
545       self, other, std::optional<Scalar>(), VK_KERNEL(pow_inplace));
546 }
547 
pow_tensor_scalar(const Tensor & self,const Scalar & other)548 static Tensor pow_tensor_scalar(const Tensor& self, const Scalar& other) {
549   return binary_op_scalar(
550       self, other, std::optional<Scalar>(), VK_KERNEL(pow_tensor_scalar));
551 }
552 
pow_tensor_scalar_(Tensor & self,const Scalar & other)553 static Tensor& pow_tensor_scalar_(Tensor& self, const Scalar& other) {
554   return binary_op_scalar_(
555       self,
556       other,
557       std::optional<Scalar>(),
558       VK_KERNEL(pow_tensor_scalar_inplace));
559 }
560 
pow_scalar_tensor(const Scalar & self,const Tensor & other)561 static Tensor pow_scalar_tensor(const Scalar& self, const Tensor& other) {
562   return binary_op_scalar(
563       other, self, std::optional<Scalar>(), VK_KERNEL(pow_scalar_tensor));
564 }
565 
floor_divide_scalar(const Tensor & self,const Scalar & other)566 static Tensor floor_divide_scalar(const Tensor& self, const Scalar& other) {
567   TORCH_CHECK(
568       other.to<float>() != 0.0f, "floor_divide_scalar: can't divide by zero");
569   return binary_op_scalar(
570       self,
571       1.0 / other.to<float>(),
572       std::optional<Scalar>(),
573       VK_KERNEL(floor_mul_scalar));
574 }
575 
floor_divide_scalar_(Tensor & self,const Scalar & other)576 static Tensor& floor_divide_scalar_(Tensor& self, const Scalar& other) {
577   TORCH_CHECK(
578       other.to<float>() != 0.0f, "floor_divide_scalar_: can't divide by zero");
579   return binary_op_scalar_(
580       self,
581       1.0 / other.to<float>(),
582       std::optional<Scalar>(),
583       VK_KERNEL(floor_mul_scalar_inplace));
584 }
585 
floor_divide_tensor(const Tensor & self,const Tensor & other)586 static Tensor floor_divide_tensor(const Tensor& self, const Tensor& other) {
587   return binary_op_tensor(
588       self, other, std::optional<Scalar>(), VK_KERNEL(floor_divide));
589 }
590 
floor_divide_tensor_(Tensor & self,const Tensor & other_arg)591 static Tensor& floor_divide_tensor_(Tensor& self, const Tensor& other_arg) {
592   return binary_op_tensor_(
593       self,
594       other_arg,
595       std::optional<Scalar>(),
596       VK_KERNEL(floor_divide_inplace));
597 }
598 
TORCH_LIBRARY_IMPL(aten,Vulkan,m)599 TORCH_LIBRARY_IMPL(aten, Vulkan, m) {
600   m.impl(TORCH_SELECTIVE_NAME("aten::add.Scalar"), TORCH_FN(add_scalar));
601   m.impl(TORCH_SELECTIVE_NAME("aten::add_.Scalar"), TORCH_FN(add_scalar_));
602   m.impl(TORCH_SELECTIVE_NAME("aten::add.Tensor"), TORCH_FN(add_tensor));
603   m.impl(TORCH_SELECTIVE_NAME("aten::add_.Tensor"), TORCH_FN(add_tensor_));
604   m.impl(TORCH_SELECTIVE_NAME("aten::sub.Scalar"), TORCH_FN(sub_scalar));
605   m.impl(TORCH_SELECTIVE_NAME("aten::sub_.Scalar"), TORCH_FN(sub_scalar_));
606   m.impl(TORCH_SELECTIVE_NAME("aten::sub.Tensor"), TORCH_FN(sub_tensor));
607   m.impl(TORCH_SELECTIVE_NAME("aten::sub_.Tensor"), TORCH_FN(sub_tensor_));
608   m.impl(TORCH_SELECTIVE_NAME("aten::mul.Scalar"), TORCH_FN(mul_scalar));
609   m.impl(TORCH_SELECTIVE_NAME("aten::mul_.Scalar"), TORCH_FN(mul_scalar_));
610   m.impl(TORCH_SELECTIVE_NAME("aten::mul.Tensor"), TORCH_FN(mul_tensor));
611   m.impl(TORCH_SELECTIVE_NAME("aten::mul_.Tensor"), TORCH_FN(mul_tensor_));
612   m.impl(TORCH_SELECTIVE_NAME("aten::div.Scalar"), TORCH_FN(div_scalar));
613   m.impl(TORCH_SELECTIVE_NAME("aten::div_.Scalar"), TORCH_FN(div_scalar_));
614   m.impl(TORCH_SELECTIVE_NAME("aten::div.Tensor"), TORCH_FN(div_tensor));
615   m.impl(TORCH_SELECTIVE_NAME("aten::div_.Tensor"), TORCH_FN(div_tensor_));
616   m.impl(TORCH_SELECTIVE_NAME("aten::pow.Tensor_Tensor"), TORCH_FN(pow));
617   m.impl(TORCH_SELECTIVE_NAME("aten::pow_.Tensor"), TORCH_FN(pow_));
618   m.impl(
619       TORCH_SELECTIVE_NAME("aten::pow.Tensor_Scalar"),
620       TORCH_FN(pow_tensor_scalar));
621   m.impl(
622       TORCH_SELECTIVE_NAME("aten::pow_.Scalar"), TORCH_FN(pow_tensor_scalar_));
623   m.impl(TORCH_SELECTIVE_NAME("aten::pow.Scalar"), TORCH_FN(pow_scalar_tensor));
624   m.impl(
625       TORCH_SELECTIVE_NAME("aten::floor_divide.Scalar"),
626       TORCH_FN(floor_divide_scalar));
627   m.impl(
628       TORCH_SELECTIVE_NAME("aten::floor_divide_.Scalar"),
629       TORCH_FN(floor_divide_scalar_));
630   m.impl(
631       TORCH_SELECTIVE_NAME("aten::floor_divide"),
632       TORCH_FN(floor_divide_tensor));
633   m.impl(
634       TORCH_SELECTIVE_NAME("aten::floor_divide_.Tensor"),
635       TORCH_FN(floor_divide_tensor_));
636 }
637 
638 } // namespace ops
639 } // namespace vulkan
640 } // namespace native
641 } // namespace at
642 #endif /* USE_VULKAN_API */
643