1 #define TORCH_ASSERT_ONLY_METHOD_OPERATORS
2 #include <ATen/core/Tensor.h>
3 #include <ATen/Context.h>
4 #include <ATen/Dispatch.h>
5 #include <ATen/ExpandUtils.h>
6 #include <torch/library.h>
7 #include <ATen/quantized/Quantizer.h>
8 #include <ATen/native/quantized/cpu/BinaryOps.h>
9 #include <ATen/native/quantized/cpu/QuantizedOps.h>
10 #include <ATen/native/quantized/cpu/init_qnnpack.h>
11 #include <ATen/native/quantized/cpu/QnnpackUtils.h>
12 #include <ATen/native/quantized/cpu/XnnpackUtils.h>
13 #include <caffe2/utils/threadpool/pthreadpool-cpp.h>
14
15 #ifndef AT_PER_OPERATOR_HEADERS
16 #include <ATen/Functions.h>
17 #include <ATen/NativeFunctions.h>
18 #else
19 #include <ATen/ops/_empty_affine_quantized.h>
20 #include <ATen/ops/_empty_affine_quantized_native.h>
21 #include <ATen/ops/empty_like.h>
22 #include <ATen/ops/relu_native.h>
23 #endif
24
25 #include <algorithm>
26 #include <utility>
27
28 namespace at {
29 namespace native {
30
31 DEFINE_DISPATCH(qadd_relu_stub);
32 DEFINE_DISPATCH(qadd_stub);
33 DEFINE_DISPATCH(qadd_scalar_relu_stub);
34 DEFINE_DISPATCH(qadd_scalar_stub);
35
36 namespace {
37
check_inputs(const Tensor & qa,const Tensor & qb)38 inline void check_inputs(const Tensor& qa, const Tensor& qb) {
39 TORCH_CHECK(
40 qa.qscheme() == kPerTensorAffine,
41 "Only per tensor quantization is supported in Add.");
42 TORCH_CHECK(
43 qa.qscheme() == qb.qscheme(),
44 "Both inputs to Add must have the same quantization scheme.");
45 TORCH_CHECK(
46 qa.scalar_type() == qb.scalar_type(),
47 "Add operands should have same data type.");
48 }
49
50 // Note: out is assumed to be the same size as self and other.
51 // Note: Addition is only supported when self, other, out are of the same dtype.
52 template <bool ReLUFused = false>
_add_out(Tensor & out,const Tensor & self,const Tensor & other)53 Tensor _add_out(Tensor& out, const Tensor& self, const Tensor& other) {
54 if (ReLUFused) {
55 qadd_relu_stub(self.device().type(), out, self, other);
56 } else {
57 qadd_stub(self.device().type(), out, self, other);
58 }
59 return out;
60 }
61
62 template <bool ReLUFused = false>
_add_scalar_out(Tensor & out,const Tensor & self,const Scalar & other)63 Tensor _add_scalar_out(Tensor& out, const Tensor& self, const Scalar& other) {
64 TORCH_CHECK(
65 self.qscheme() == kPerTensorAffine,
66 "Only per tensor affine is supported for now!!");
67 // To implement tensor-scalar addition in quantized space, we simply
68 // adjust the quantization parameters based on the following rules:
69 //
70 // Let s = scale, z = zero point, c = other.toFloat(), c_q = round(c/s)
71 // q_min = lowest representable value of scalar type
72 // q_max = highest representable value of scalar type
73 //
74 // Let s' = the calculated scale or the output
75 // z' = the calculated zero-point for the output
76 //
77 // If q_min > z - c_q
78 // s' = [(q_max - (z - c_q)]/[q_max - q_min] * s
79 // z' = q_min
80 // Xq' = at::requantize_from_int(Xq - z + c_q, s/s', z')
81 // If q_max < z - c_q
82 // s' = [z - c_q -q_min]/[q_max - q_min] * s
83 // z' = q_max
84 // Xq' = at::requantize_from_int(Xq - z + c_q, s/s', z')
85 // Else
86 // s' = s
87 // z' = z - c_q
88
89 AT_DISPATCH_QINT_TYPES(self.scalar_type(), "qadd_scalar", [&]() {
90 double s = self.q_scale();
91 int64_t z = self.q_zero_point();
92 double c = other.toDouble();
93 // NOLINTNEXTLINE(bugprone-signed-char-misuse)
94 int64_t q_min = std::numeric_limits<underlying_t>::min();
95 int64_t q_max = std::numeric_limits<underlying_t>::max();
96
97 int64_t c_q = std::nearbyint(c / s);
98
99 double s_prime;
100 int64_t z_prime;
101
102 if (q_min > z - c_q) {
103 s_prime = (((double)q_max - (z - c_q))) / ((double)q_max - q_min) * s;
104 z_prime = q_min;
105 set_quantizer_(out, make_per_tensor_affine_quantizer(
106 s_prime, z_prime, self.scalar_type()));
107 if (ReLUFused) {
108 qadd_scalar_relu_stub(self.device().type(), out, self, c_q);
109 } else {
110 qadd_scalar_stub(self.device().type(), out, self, c_q);
111 }
112 } else if (q_max < z - c_q) {
113 s_prime = ((double)(z - c_q) - q_min) / ((double)q_max - q_min) * s;
114 z_prime = q_max;
115 set_quantizer_(out, make_per_tensor_affine_quantizer(
116 s_prime, z_prime, self.scalar_type()));
117 if (ReLUFused) {
118 qadd_scalar_relu_stub(self.device().type(), out, self, c_q);
119 } else {
120 qadd_scalar_stub(self.device().type(), out, self, c_q);
121 }
122 } else {
123 s_prime = s;
124 z_prime = z - c_q;
125 out.copy_(self);
126 set_quantizer_(out, make_per_tensor_affine_quantizer(
127 s_prime, z_prime, self.scalar_type()));
128 if (ReLUFused) {
129 at::native::relu_quantized_cpu_(out);
130 }
131 }
132 });
133 return out;
134 }
135
136
137 #ifdef USE_PYTORCH_QNNPACK
138 template <bool ReLUFused = false>
qnnpack_add(Tensor qa,Tensor qb,double scale,int64_t zero_point)139 Tensor qnnpack_add(Tensor qa, Tensor qb, double scale, int64_t zero_point) {
140 TORCH_CHECK(qa.ndimension() > 0, "qnnpack_add(): Got empty input tensor.");
141 TORCH_CHECK(qa.scalar_type() == c10::kQUInt8 && qb.scalar_type() == c10::kQUInt8,
142 "qnnpack_add(): Expected both input data types to be ",
143 toString(c10::kQUInt8),
144 " but got ",
145 toString(qa.scalar_type()),
146 " and ",
147 toString(qb.scalar_type()));
148 Tensor qa_contig = qa.contiguous(qa.suggest_memory_format());
149 // Reason for use qa's memory format for qb is that for the underlying
150 // kernel can flatten all the dims and iterate over both the tensors.
151 // In most cases, both qa and qb are in same memory format.
152 // When they are not there is a copy overhead to make it contiguous
153 // in qa's memory format.
154 Tensor qb_contig = qb.contiguous(qa.suggest_memory_format());
155
156 const auto a_zero_point = qa_contig.q_zero_point();
157 const auto b_zero_point = qb_contig.q_zero_point();
158 const auto a_scale = qa_contig.q_scale();
159 const auto b_scale = qb_contig.q_scale();
160
161 Tensor qy = at::native::empty_affine_quantized(
162 qa_contig.sizes(),
163 kQUInt8,
164 std::nullopt /* layout */,
165 kCPU,
166 std::nullopt /* pin_memory */,
167 scale,
168 zero_point,
169 qa.suggest_memory_format());
170
171 if (qa_contig.size(0) == 0) {
172 return qy;
173 }
174
175 initQNNPACK();
176
177 pytorch_qnnp_operator_t qnnpack_operator{nullptr};
178
179 size_t num_elems = qa_contig.numel() / qa_contig.size(0);
180 auto output_min = ReLUFused
181 // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
182 ? activationLimits<uint8_t>(scale, zero_point, Activation::RELU)
183 .first
184 : std::numeric_limits<uint8_t>::min();
185 auto output_max = ReLUFused
186 // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
187 ? activationLimits<uint8_t>(scale, zero_point, Activation::RELU)
188 .second
189 : std::numeric_limits<uint8_t>::max();
190 const pytorch_qnnp_status createStatus = pytorch_qnnp_create_add_nc_q8(
191 num_elems /* input size */,
192 a_zero_point /* a zero_point */,
193 a_scale /* a scale */,
194 b_zero_point /* b zero_point */,
195 b_scale /* b scale */,
196 static_cast<uint8_t>(zero_point) /* sum zero_point */,
197 // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
198 scale /* sum scale */,
199 output_min /* output min */,
200 output_max /* output max */,
201 0 /* flags */,
202 &qnnpack_operator);
203
204 TORCH_INTERNAL_ASSERT(
205 createStatus == pytorch_qnnp_status_success,
206 "failed to create QNNPACK Add operator");
207
208 std::unique_ptr<pytorch_qnnp_operator, QnnpackOperatorDeleter>
209 qnnpack_uniq_ptr(qnnpack_operator);
210
211 const pytorch_qnnp_status setupStatus = pytorch_qnnp_setup_add_nc_q8(
212 qnnpack_operator /* add op */,
213 qa_contig.size(0) /* batch size */,
214 (uint8_t*)qa_contig.data_ptr<c10::quint8>() /* a data */,
215 num_elems /* A stride */,
216 (uint8_t*)qb_contig.data_ptr<c10::quint8>() /* b data */,
217 num_elems /* B stride */,
218 (uint8_t*)qy.data_ptr<c10::quint8>() /* output data */,
219 num_elems /* sum stride */);
220 TORCH_INTERNAL_ASSERT(
221 setupStatus == pytorch_qnnp_status_success,
222 "failed to setup QNNPACK Add operator");
223
224 pthreadpool_t threadpool = caffe2::pthreadpool_();
225 const pytorch_qnnp_status runStatus =
226 pytorch_qnnp_run_operator(qnnpack_operator, threadpool);
227
228 TORCH_INTERNAL_ASSERT(
229 runStatus == pytorch_qnnp_status_success,
230 "failed to run QNNPACK Add operator");
231
232 return qy;
233 }
234 #endif // USE_PYTORCH_QNNPACK
235
236 #ifdef USE_XNNPACK
237 C10_ALWAYS_INLINE
xnnp_create_add_nd(int8_t azp,float ascale,int8_t bzp,float bscale,int8_t czp,float cscale,int8_t output_min,int8_t output_max,uint32_t flags,xnn_operator_t * op)238 enum xnn_status xnnp_create_add_nd(
239 int8_t azp,
240 float ascale,
241 int8_t bzp,
242 float bscale,
243 int8_t czp,
244 float cscale,
245 int8_t output_min,
246 int8_t output_max,
247 uint32_t flags,
248 xnn_operator_t* op) {
249 return xnn_create_add_nd_qs8(
250 azp, /* int8_t input1_zero_point */
251 ascale, /* float input1_scale */
252 bzp, /* int8_t input2_zero_point */
253 bscale, /* float input2_scale */
254 czp, /* int8_t output_zero_point */
255 cscale, /* float output_scale */
256 output_min, /* int8_t output_min */
257 output_max, /* int8_t output_max */
258 flags, /* uint32_t flags */
259 op); /* xnn_operator_t* add_op_out */
260 }
261
262 C10_ALWAYS_INLINE
xnnp_reshape_add_nd(xnn_operator_t op,const std::vector<size_t> & a_shape,const std::vector<size_t> & b_shape,pthreadpool_t pt_pool)263 enum xnn_status xnnp_reshape_add_nd(
264 xnn_operator_t op,
265 const std::vector<size_t>& a_shape,
266 const std::vector<size_t>& b_shape,
267 pthreadpool_t pt_pool) {
268 return xnn_reshape_add_nd_qs8(
269 op, /* xnn_operator_t add_op */
270 a_shape.size(), /* size_t num_input1_dims */
271 a_shape.data(), /* const size_t* input1_shape */
272 b_shape.size(), /* size_t num_input2_dims */
273 b_shape.data(), /* const size_t* input2_shape */
274 pt_pool); /* pthreadpool_t threadpool */
275 }
276
277 C10_ALWAYS_INLINE
xnnp_setup_add_nd(xnn_operator_t op,const int8_t * da,const int8_t * db,int8_t * dc,pthreadpool_t pt_pool)278 enum xnn_status xnnp_setup_add_nd(
279 xnn_operator_t op,
280 const int8_t* da,
281 const int8_t* db,
282 int8_t* dc,
283 pthreadpool_t pt_pool) {
284 return xnn_setup_add_nd_qs8(
285 op, /* xnn_operator_t add_op */
286 da, /* const int8_t* input1 */
287 db, /* const int8_t* input2 */
288 dc); /* int8_t* output */
289 }
290
291 template <typename scalar_t, bool ReLUFused = false>
xnnp_add(Tensor qa,Tensor qb,double scale,int64_t zero_point)292 Tensor xnnp_add(Tensor qa, Tensor qb, double scale, int64_t zero_point) {
293 using underlying_t = typename scalar_t::underlying;
294 const string func_name = "xnnp_add()";
295 TORCH_CHECK(qa.ndimension() > 0, func_name, ": Got empty input tensor.");
296 TORCH_CHECK(at::native::xnnpack::available(), func_name, ": XNNPACK is not available")
297
298 // using qa memory format for qb to allow xnnpack kernel to flatten all the
299 // dims
300 auto qa_mem_format = qa.suggest_memory_format();
301 Tensor qa_contig = qa.contiguous(qa_mem_format);
302 Tensor qb_contig = qb.contiguous(qa_mem_format);
303
304 const auto a_zero_point = qa_contig.q_zero_point();
305 const auto b_zero_point = qb_contig.q_zero_point();
306 const auto a_scale = qa_contig.q_scale();
307 const auto b_scale = qb_contig.q_scale();
308
309 Tensor qy = at::native::empty_affine_quantized(
310 at::infer_size_dimvector(qa_contig.sizes(), qb_contig.sizes()),
311 qa.scalar_type(),
312 std::nullopt /* layout */,
313 kCPU,
314 std::nullopt /* pin_memory */,
315 scale,
316 zero_point,
317 qa_mem_format);
318
319 if (qa_contig.size(0) == 0) {
320 return qy;
321 }
322
323 xnn_operator_t xnnp_op = nullptr;
324 xnnpack_operator xnnp_add_operator;
325
326 auto output_max = std::numeric_limits<underlying_t>::max();
327 auto output_min = std::numeric_limits<underlying_t>::min();
328 if (ReLUFused) {
329 /*
330 * FIXME: use activationLimits<T>()
331 * With <T>, MSVC runs into "error C3862: identifier activationLimits not found".
332 */
333 constexpr int64_t qmin = std::numeric_limits<underlying_t>::min();
334 constexpr int64_t qmax = std::numeric_limits<underlying_t>::max();
335 int64_t qvalue = static_cast<int64_t>(zero_point);
336 qvalue = std::max<int64_t>(qvalue, qmin);
337 output_min = static_cast<underlying_t>(std::min<int64_t>(qvalue, qmax));
338 }
339
340 // Create an operator
341 auto status = xnnp_create_add_nd(
342 a_zero_point,
343 a_scale,
344 b_zero_point,
345 b_scale,
346 static_cast<underlying_t>(zero_point),
347 static_cast<float>(scale),
348 output_min,
349 output_max,
350 0,
351 &xnnp_op);
352 xnnp_add_operator = xnnpack_operator(xnnp_op);
353 TORCH_CHECK(
354 status == xnn_status_success,
355 func_name, ": xnn create operator failed(", status,")!");
356
357 const auto qa_shape = xnnp_utils::get_mem_format_aware_shape(qa_contig);
358 const auto qb_shape = xnnp_utils::get_mem_format_aware_shape(qb_contig);
359
360 // Reshape the operator
361 status = xnnp_reshape_add_nd(
362 xnnp_add_operator.get(),
363 qa_shape,
364 qb_shape,
365 caffe2::pthreadpool_());
366
367 TORCH_CHECK(
368 status == xnn_status_success,
369 func_name, ": xnn reshape operator failed(", status,")!");
370
371 // Setup the operator
372 status = xnnp_setup_add_nd(
373 xnnp_add_operator.get(),
374 reinterpret_cast<const underlying_t*>(qa_contig.data_ptr<scalar_t>()),
375 reinterpret_cast<const underlying_t*>(qb_contig.data_ptr<scalar_t>()),
376 reinterpret_cast<underlying_t*>(qy.data_ptr<scalar_t>()),
377 caffe2::pthreadpool_());
378 TORCH_CHECK(
379 status == xnn_status_success,
380 func_name, ": xnn setup operator failed(", status,")!");
381
382 // Run the operator
383 status = xnn_run_operator(
384 xnnp_add_operator.get(), /* xnn_operator_t op */
385 caffe2::pthreadpool_()); /* pthreadpool_t threadpool */
386 TORCH_CHECK(
387 status == xnn_status_success,
388 func_name, ": xnn run operator failed(", status,")");
389 return qy;
390 }
391 #endif // USE_XNNPACK
392
393 template <bool ReLUFused = false>
qadd(Tensor qa,Tensor qb,double scale,int64_t zero_point)394 Tensor qadd(Tensor qa, Tensor qb, double scale, int64_t zero_point) {
395 check_inputs(qa, qb);
396
397 if (at::globalContext().qEngine() == at::QEngine::QNNPACK) {
398 TORCH_CHECK(
399 qa.scalar_type() == qb.scalar_type(),
400 "Both inputs to qadd must have same type");
401
402 #ifdef USE_XNNPACK
403 if (qa.scalar_type() == kQInt8) {
404 return xnnp_add<c10::qint8, ReLUFused>(qa, qb, scale, zero_point);
405 }
406 #endif // USE_XNNPACK
407
408 #ifdef USE_PYTORCH_QNNPACK
409 if(qa.sizes() == qb.sizes() && /* qnnpack does not support boardcasting */
410 qa.scalar_type() == kQUInt8) {
411 return qnnpack_add<ReLUFused>(qa, qb, scale, zero_point);
412 }
413 #endif // USE_PYTORCH_QNNPACK
414 }
415 auto qc = at::_empty_affine_quantized(
416 qa.sizes(),
417 at::device(kCPU)
418 .dtype(qa.scalar_type())
419 .memory_format(qa.suggest_memory_format()),
420 scale,
421 zero_point,
422 std::nullopt);
423 return _add_out<ReLUFused>(qc, qa, qb);
424 }
425
426 template <bool ReLUFused = false>
qadd_out(Tensor qa,Tensor qb,Tensor out)427 Tensor qadd_out(Tensor qa, Tensor qb, Tensor out) {
428 check_inputs(qa, qb);
429 check_inputs(qa, out);
430 return _add_out<ReLUFused>(out, qa, qb);
431 }
432
433
434 template <bool ReLUFused = false>
qadd_scalar(Tensor qa,const Scalar & b)435 Tensor qadd_scalar(Tensor qa, const Scalar& b) {
436 TORCH_CHECK(qa.qscheme() == kPerTensorAffine ||
437 qa.qscheme() == kPerTensorSymmetric,
438 "Only per tensor quantization is supported in Add.");
439 auto qc = at::empty_like(qa, qa.suggest_memory_format());
440 return _add_scalar_out<ReLUFused>(qc, qa, b);
441 }
442
443 template <bool ReLUFused = false>
qadd_scalar2(Scalar b,Tensor qa)444 Tensor qadd_scalar2(Scalar b, Tensor qa) {
445 TORCH_CHECK(qa.qscheme() == kPerTensorAffine ||
446 qa.qscheme() == kPerTensorSymmetric,
447 "Only per tensor quantization is supported in Add.");
448 auto qc = at::empty_like(qa, qa.suggest_memory_format());
449 return _add_scalar_out<ReLUFused>(qc, qa, b);
450 }
451
452 template <bool ReLUFused = false>
qadd_scalar_out(Tensor qa,const Scalar & b,Tensor out)453 Tensor qadd_scalar_out(Tensor qa, const Scalar& b, Tensor out) {
454 check_inputs(qa, out);
455 return _add_scalar_out<ReLUFused>(out, qa, b);
456 }
457
458 // `torch.jit.trace` will trace Scalar as Tensor
459 // This can be removed after broadcast is supported and
460 // all variations of `quantized::add` is merged into `quantized::add`
461 template <bool ReLUFused = false>
qadd_scalar_tensor(Tensor qa,Tensor b)462 Tensor qadd_scalar_tensor(Tensor qa, Tensor b) {
463 return qadd_scalar(std::move(qa), b.item());
464 }
465
466 // `torch.jit.trace` will trace Scalar as Tensor
467 // This can be removed after broadcast is supported and
468 // all variations of `quantized::add` is merged into `quantized::add`
469 template <bool ReLUFused = false>
qadd_scalar_tensor_out(Tensor qa,Tensor b,Tensor out)470 Tensor qadd_scalar_tensor_out(Tensor qa, Tensor b, Tensor out) {
471 return qadd_scalar_out(std::move(qa), b.item(), std::move(out));
472 }
473
TORCH_LIBRARY_IMPL(quantized,QuantizedCPU,m)474 TORCH_LIBRARY_IMPL(quantized, QuantizedCPU, m) {
475 m.impl(TORCH_SELECTIVE_NAME("quantized::add"), TORCH_FN(qadd</*ReLUFused=*/false>));
476 m.impl(TORCH_SELECTIVE_NAME("quantized::add.out"), TORCH_FN(qadd_out</*ReLUFused=*/false>));
477 m.impl(TORCH_SELECTIVE_NAME("quantized::add.Scalar"), TORCH_FN(qadd_scalar</*ReLUFused=*/false>));
478 m.impl(TORCH_SELECTIVE_NAME("quantized::add.Scalar2"), TORCH_FN(qadd_scalar2</*ReLUFused=*/false>));
479 m.impl(TORCH_SELECTIVE_NAME("quantized::add.Scalar_out"), TORCH_FN(qadd_scalar_out</*ReLUFused=*/false>));
480 m.impl(TORCH_SELECTIVE_NAME("quantized::add_relu"), TORCH_FN(qadd</*ReLUFused=*/true>));
481 m.impl(TORCH_SELECTIVE_NAME("quantized::add_relu.out"), TORCH_FN(qadd_out</*ReLUFused=*/true>));
482 m.impl(TORCH_SELECTIVE_NAME("quantized::add_relu.Scalar"), TORCH_FN(qadd_scalar</*ReLUFused=*/true>));
483 m.impl(TORCH_SELECTIVE_NAME("quantized::add_relu.Scalar2"), TORCH_FN(qadd_scalar2</*ReLUFused=*/true>));
484 m.impl(TORCH_SELECTIVE_NAME("quantized::add_relu.Scalar_out"), TORCH_FN(qadd_scalar_out</*ReLUFused=*/true>));
485 // deprecated functions, kept for backward compatibility
486 m.impl(TORCH_SELECTIVE_NAME("quantized::add_out"), TORCH_FN(qadd_out</*ReLUFused=*/false>));
487 m.impl(TORCH_SELECTIVE_NAME("quantized::add_relu_out"), TORCH_FN(qadd_out</*ReLUFused=*/true>));
488 m.impl(TORCH_SELECTIVE_NAME("quantized::add_scalar"), TORCH_FN(qadd_scalar</*ReLUFused=*/false>));
489 m.impl(TORCH_SELECTIVE_NAME("quantized::add_scalar_relu"), TORCH_FN(qadd_scalar</*ReLUFused=*/true>));
490 m.impl(TORCH_SELECTIVE_NAME("quantized::add_scalar_out"), TORCH_FN(qadd_scalar_out</*ReLUFused=*/false>));
491 m.impl(TORCH_SELECTIVE_NAME("quantized::add_scalar_relu_out"), TORCH_FN(qadd_scalar_out</*ReLUFused=*/true>));
492 m.impl(TORCH_SELECTIVE_NAME("quantized::add_scalar.Tensor"), TORCH_FN(qadd_scalar_tensor</*ReLUFused=*/false>));
493 m.impl(TORCH_SELECTIVE_NAME("quantized::add_scalar_relu.Tensor"), TORCH_FN(qadd_scalar_tensor</*ReLUFused=*/true>));
494 m.impl(TORCH_SELECTIVE_NAME("quantized::add_scalar_out.Tensor"), TORCH_FN(qadd_scalar_tensor_out</*ReLUFused=*/false>));
495 m.impl(TORCH_SELECTIVE_NAME("quantized::add_scalar_relu_out.Tensor"), TORCH_FN(qadd_scalar_tensor_out</*ReLUFused=*/true>));
496 }
497
TORCH_LIBRARY_IMPL(_quantized,QuantizedCPU,m)498 TORCH_LIBRARY_IMPL(_quantized, QuantizedCPU, m) {
499 m.impl(TORCH_SELECTIVE_NAME("_quantized::add"), TORCH_FN(qadd</*ReLUFused=*/false>));
500 }
501
502 } // namespace
503
quantized_add(Tensor qa,Tensor qb,double scale,int64_t zero_point)504 Tensor quantized_add(Tensor qa, Tensor qb, double scale, int64_t zero_point){
505 return qadd<false>(std::move(qa), std::move(qb), scale, zero_point);
506 }
507
508 }} // namespace at::native
509