1 #define TORCH_ASSERT_ONLY_METHOD_OPERATORS
2 #include <ATen/core/Tensor.h>
3 #include <ATen/Context.h>
4 #include <ATen/native/quantized/cpu/QuantizedOps.h>
5 #include <ATen/native/quantized/cpu/init_qnnpack.h>
6 #include <ATen/native/quantized/cpu/QnnpackUtils.h>
7 #include <caffe2/utils/threadpool/pthreadpool-cpp.h>
8
9 #ifndef AT_PER_OPERATOR_HEADERS
10 #include <ATen/Functions.h>
11 #include <ATen/NativeFunctions.h>
12 #else
13 #include <ATen/ops/_empty_affine_quantized.h>
14 #include <ATen/ops/hardsigmoid_native.h>
15 #endif
16
17 #include <algorithm>
18
19 namespace at {
20 namespace native {
21
22 DEFINE_DISPATCH(qhardsigmoid_stub);
23
24 namespace {
25
26 #ifdef USE_PYTORCH_QNNPACK
qnnpack_hardsigmoid(Tensor input)27 Tensor qnnpack_hardsigmoid(Tensor input) {
28 TORCH_CHECK(input.ndimension() > 0, "qnnpack_hardsigmoid(): Got empty input tensor");
29 TORCH_CHECK(input.scalar_type() == c10::kQUInt8,
30 "qnnpack_hardsigmoid(): Expected input data type ",
31 toString(c10::kQUInt8),
32 " but got ",
33 toString(input.scalar_type()));
34 initQNNPACK();
35
36 Tensor input_contig = input.contiguous(input.suggest_memory_format());
37 size_t num_elems = input_contig.numel() / input_contig.size(0);
38 const auto i_zero_point = input_contig.q_zero_point();
39 const auto i_scale = input_contig.q_scale();
40 constexpr float o_scale = 1.0f / 256.0f;
41 constexpr int32_t o_zero_point = 0;
42
43 pytorch_qnnp_operator_t hardsigmoid_op{nullptr};
44 const pytorch_qnnp_status createStatus = pytorch_qnnp_create_hardsigmoid_nc_q8(
45 num_elems, // channels
46 i_zero_point,
47 i_scale,
48 o_zero_point,
49 o_scale,
50 std::numeric_limits<uint8_t>::min(), // output min
51 std::numeric_limits<uint8_t>::max(), // output max
52 0, // flags
53 &hardsigmoid_op);
54
55 std::unique_ptr<pytorch_qnnp_operator, QnnpackOperatorDeleter>
56 qnnpack_uniq_ptr(hardsigmoid_op);
57
58 TORCH_INTERNAL_ASSERT(createStatus == pytorch_qnnp_status_success,
59 "failed to create QNNPACK Hardsigmoid operator");
60 Tensor qy = at::_empty_affine_quantized(
61 input_contig.sizes(),
62 at::device(kCPU).dtype(input_contig.dtype()),
63 o_scale,
64 o_zero_point,
65 input_contig.suggest_memory_format());
66
67 const pytorch_qnnp_status setupStatus = pytorch_qnnp_setup_hardsigmoid_nc_q8(
68 hardsigmoid_op,
69 input_contig.size(0), // batch size
70 (uint8_t*)input_contig.data_ptr<c10::quint8>(), // input data
71 num_elems, // input stride
72 (uint8_t*)qy.data_ptr<c10::quint8>(), // output data
73 num_elems); // output stride
74 TORCH_INTERNAL_ASSERT(setupStatus == pytorch_qnnp_status_success,
75 "failed to setup QNNPACK Hardsigmoid operator");
76
77 pthreadpool_t threadpool = caffe2::pthreadpool_();
78
79 const pytorch_qnnp_status runStatus =
80 pytorch_qnnp_run_operator(hardsigmoid_op, threadpool);
81
82 TORCH_INTERNAL_ASSERT(
83 runStatus == pytorch_qnnp_status_success,
84 "failed to run QNNPACK Hardsigmoid operator");
85 return qy;
86 }
87 #endif // USE_PYTORCH_QNNPACK
88
89 } // namespace
hardsigmoid_quantized_cpu(const Tensor & qx)90 Tensor hardsigmoid_quantized_cpu(const Tensor& qx) {
91 #ifdef USE_PYTORCH_QNNPACK
92 if (at::globalContext().qEngine() == at::QEngine::QNNPACK &&
93 qx.scalar_type() == kQUInt8) {
94 return qnnpack_hardsigmoid(qx);
95 }
96 #endif // USE_PYTORCH_QNNPACK
97 Tensor qy;
98 qhardsigmoid_stub(qx.device().type(), qx, qy);
99 return qy;
100 }
101
hardsigmoid_out_quantized_cpu(const Tensor & qx,Tensor & result)102 Tensor& hardsigmoid_out_quantized_cpu(const Tensor& qx, Tensor& result) {
103 // Note: we create a new temporary tensor because the output of hardsigmoid
104 // usually has different quantization parameters from the input, and
105 // quantization are currently only supported per entire tensor or per entire
106 // channel of a tensor.
107 Tensor qy = hardsigmoid_quantized_cpu(qx);
108 result.copy_(qy);
109 return result;
110 }
111
112 }} // namespace at::native
113