xref: /aosp_15_r20/external/pytorch/aten/src/ATen/quantized/Quantizer.h (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #pragma once
2 
3 #include <c10/core/QScheme.h>
4 #include <c10/core/MemoryFormat.h>
5 #include <c10/macros/Macros.h>
6 #include <c10/util/Exception.h>
7 #include <c10/util/intrusive_ptr.h>
8 #include <c10/core/ScalarType.h>
9 #include <c10/core/TensorOptions.h>
10 
11 #include <ATen/Tensor.h>
12 #include <ATen/TensorUtils.h>
13 
14 #include <ATen/core/QuantizerBase.h>
15 
16 #include <cmath>
17 #include <memory>
18 #include <utility>
19 
20 namespace at {
21 
22 /**
23  * UnknownQuantizer is a placeholder quantizer for functions that implement
24  * quantization in a two step process.  First a tensor is allocated but with
25  * unknown quantizer, and then the quantization kernel decides what the final
26  * quantizer will be.
27  */
28 struct TORCH_API UnknownQuantizer : public Quantizer {
UnknownQuantizerUnknownQuantizer29   explicit UnknownQuantizer(ScalarType scalar_type)
30     : Quantizer(scalar_type) {}
31 
32   Tensor quantize(const Tensor& tensor) override;
33   Tensor dequantize(const Tensor& qtensor) override;
34   Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
35   QScheme qscheme() const override;
36   bool equalTo(QuantizerPtr other) const override;
37 };
38 
39 /**
40  * UniformQuantizer is the parent class for all uniform quantizers.
41  * These quantization scheme will map float value uniformly to
42  * the quantized value. For example, affine quantizer is
43  * the most commonly used scheme in this category.
44  */
45 struct TORCH_API UniformQuantizer : public Quantizer {
UniformQuantizerUniformQuantizer46   explicit UniformQuantizer(ScalarType scalar_type) : Quantizer(scalar_type) {}
47 };
48 
49 /**
50  * NonUniformQuantizer is the parent class for all non-uniform quantizers.
51  * These quantization scheme may map float value non-uniformly to the quantized
52  * value. K-means quantization is a representative example in this category.
53  */
54 struct TORCH_API NonUniformQuantizer : public Quantizer {
NonUniformQuantizerNonUniformQuantizer55   explicit NonUniformQuantizer(ScalarType scalar_type) : Quantizer(scalar_type) {}
56 };
57 
58 // There is also StochasticQuantizer which is uniform but not affine
59 
60 /**
61  * AffineQuantizer uses affine transformation to do quantization.
62  *
63  * For quantize:
64  * Y = clamp(round(X / scale + zero_point), min, max)
65  * For dequantize:
66  * X = (Y - zero_point) * scale
67  */
68 struct TORCH_API AffineQuantizer : public UniformQuantizer {
AffineQuantizerAffineQuantizer69   explicit AffineQuantizer(ScalarType scalar_type) : UniformQuantizer(scalar_type) {}
70 };
71 
72 // Note that we will not have Symmetric Quantizer in backend to reduce
73 // complications in quantized kernel implementation.
74 
75 /**
76  * PerTensorAffineQuantizer stores a scale and a zero_point, which is used for
77  * all the values in the Tensor.
78  */
79 struct TORCH_API PerTensorAffineQuantizer : public AffineQuantizer {
PerTensorAffineQuantizerPerTensorAffineQuantizer80   explicit PerTensorAffineQuantizer(ScalarType scalar_type, double scale, int64_t zero_point)
81     : AffineQuantizer(scalar_type),
82         scale_(scale),
83         zero_point_(zero_point) {}
84 
85   Tensor quantize(const Tensor& tensor) override;
86   Tensor dequantize(const Tensor& qtensor) override;
87   Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
88 
qschemePerTensorAffineQuantizer89   QScheme qscheme() const override {
90     return kPerTensorAffine;
91   }
92 
scalePerTensorAffineQuantizer93   double scale() const {
94     return scale_;
95   }
96 
zero_pointPerTensorAffineQuantizer97   int64_t zero_point() const {
98     return zero_point_;
99   }
100 
equalToPerTensorAffineQuantizer101   bool equalTo(QuantizerPtr other) const override {
102     if (!other.get() || other->qscheme() != kPerTensorAffine) {
103       return false;
104     }
105     auto* other_per_tensor_affine =
106         static_cast<PerTensorAffineQuantizer*>(other.get());
107     return scalar_type() == other_per_tensor_affine->scalar_type() &&
108         scale() == other_per_tensor_affine->scale() &&
109         zero_point() == other_per_tensor_affine->zero_point();
110   }
111 
112  private:
113   const double scale_;
114   // We use int64_t for consistency with Python
115   const int64_t zero_point_;
116 };
117 
118 /**
119  * PerChannelAffineQuantizer is the same as PerTensorAffineQuantizer
120  * except that we have an independent scale and zero_point parameter
121  * for each channel.
122  *
123  * Also note that per channel quantization is mostly applied to output channels
124  * of weights since per-input channel of weight quantization or per-channel
125  * quantization for activations can't be efficiently supported in most of
126  * processors since it requires each multiplication result within a single
127  * dot-product to have a different scale.
128  */
129 struct TORCH_API PerChannelAffineQuantizer : public AffineQuantizer {
PerChannelAffineQuantizerPerChannelAffineQuantizer130   explicit PerChannelAffineQuantizer(
131       ScalarType scalar_type,
132       Tensor scales,
133       Tensor zero_points,
134       int64_t axis)
135       : AffineQuantizer(scalar_type),
136         scales_(std::move(scales)),
137         zero_points_(std::move(zero_points)),
138         axis_(axis) {}
139 
qschemePerChannelAffineQuantizer140   QScheme qscheme() const override {
141     return kPerChannelAffine;
142   }
143 
scalesPerChannelAffineQuantizer144   Tensor scales() const {
145     return scales_;
146   }
147 
zero_pointsPerChannelAffineQuantizer148   Tensor zero_points() const {
149     return zero_points_;
150   }
151 
axisPerChannelAffineQuantizer152   int64_t axis() const {
153     return axis_;
154   }
155 
156   Tensor quantize(const Tensor& tensor) override;
157   Tensor dequantize(const Tensor& qtensor) override;
158   Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
159 
equalToPerChannelAffineQuantizer160   bool equalTo(QuantizerPtr other) const override {
161     if (!other.get() || other->qscheme() != kPerChannelAffine) {
162       return false;
163     }
164     auto* other_per_channel_affine =
165         static_cast<PerChannelAffineQuantizer*>(other.get());
166     return scalar_type() == other_per_channel_affine->scalar_type() &&
167         scales().equal(other_per_channel_affine->scales()) &&
168         zero_points().equal(other_per_channel_affine->zero_points()) &&
169         axis() == other_per_channel_affine->axis();
170   }
171 
172  protected:
173   Tensor scales_;
174   Tensor zero_points_;
175   const int64_t axis_;
176 };
177 
178 /**
179  * PerChannelAffineFloatQParamsQuantizer is the same as PerChannelAffineQuantizer
180  * except that it expects both scale and zero point to be floating point values.
181  *
182  * This quantizer uses the kPerChannelAffineFloatQParams qscheme which is a variant of
183  * kPerChannelAffine.
184  *
185  * The quantize equation in this case looks like -
186  * Xq = (Xf - zero_point) * inv_scale, where inv_scale = 1.0/scale
187  *
188  * Note: Usage of floating point zero point is useful in cases where 0 doesn't need to
189  * be exactly represented in the quantized space. We can get additional precision by
190  * using floating point values for zero point.
191  */
192 struct TORCH_API PerChannelAffineFloatQParamsQuantizer : public PerChannelAffineQuantizer {
PerChannelAffineFloatQParamsQuantizerPerChannelAffineFloatQParamsQuantizer193   explicit PerChannelAffineFloatQParamsQuantizer(
194       ScalarType scalar_type,
195       Tensor scales,
196       Tensor zero_points,
197       int64_t axis)
198       : PerChannelAffineQuantizer(scalar_type,
199         scales,
200         zero_points,
201         axis) {}
202 
qschemePerChannelAffineFloatQParamsQuantizer203   QScheme qscheme() const override {
204     return kPerChannelAffineFloatQParams;
205   }
206 
207   Tensor quantize(const Tensor& tensor) override;
208   Tensor dequantize(const Tensor& qtensor) override;
209   Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
210 
equalToPerChannelAffineFloatQParamsQuantizer211   bool equalTo(QuantizerPtr other) const override {
212     if (!other.get() || other->qscheme() != kPerChannelAffineFloatQParams) {
213       return false;
214     }
215     auto* other_per_channel_float_qparams =
216         static_cast<PerChannelAffineFloatQParamsQuantizer*>(other.get());
217     return scalar_type() == other_per_channel_float_qparams->scalar_type() &&
218         scales().equal(other_per_channel_float_qparams->scales()) &&
219         zero_points().equal(other_per_channel_float_qparams->zero_points()) &&
220         axis() == other_per_channel_float_qparams->axis();
221   }
222 };
223 
224 // This is an internal utility function for getting at the QTensorImpl,
225 // You should only use this for writing low level
226 // setters/getters for QTensorImpl fields; otherwise, you should use
227 // the low level setters/getters that were implemented using this.
228 // This may be called repeatedly, so make sure it's pretty cheap.
229 TORCH_API QTensorImpl* get_qtensorimpl(const TensorBase& self);
230 
231 // double and int64_t are because of the native function API, we only have these
232 // argument types right now in native functions
233 TORCH_API QuantizerPtr
234 make_per_tensor_affine_quantizer(
235     double scale, int64_t zero_point, ScalarType scalar_type);
236 
237 TORCH_API QuantizerPtr make_per_channel_affine_quantizer(
238     const Tensor& scales,
239     const Tensor& zero_points,
240     int64_t axis,
241     ScalarType scalar_type);
242 
243 TORCH_API QuantizerPtr make_unknown_quantizer(ScalarType scalar_type);
244 
245 // Create a Quantized Tensor given arguments for normal Tensor and a quantizer
246 TORCH_API Tensor new_qtensor(
247     IntArrayRef sizes,
248     const TensorOptions& options,
249     QuantizerPtr quantizer);
250 
251 TORCH_API void set_quantizer_(const Tensor& self, ConstQuantizerPtr quantizer);
252 
253 TORCH_API Tensor from_blob_quantized_per_tensor_affine(
254     void* data,
255     IntArrayRef sizes,
256     IntArrayRef strides,
257     std::function<void(void*)> deleter,
258     const float scale,
259     const int64_t zeroPoint,
260     const TensorOptions& options);
261 
262 TORCH_API Tensor from_blob_quantized_per_tensor_affine(
263     void* data,
264     IntArrayRef sizes,
265     std::function<void(void*)> deleter,
266     const float scale,
267     const int64_t zeroPoint,
268     const TensorOptions& options);
269 
270 TORCH_API Tensor from_blob_quantized_per_channel_affine(
271     void* data,
272     IntArrayRef sizes,
273     std::function<void(void*)> deleter,
274     const Tensor& scales,
275     const Tensor& zero_points,
276     const int64_t axis,
277     const TensorOptions& options);
278 
279 } // namespace at
280