xref: /aosp_15_r20/external/tensorflow/tensorflow/core/kernels/uniform_quant_ops/math_utils.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_CORE_KERNELS_UNIFORM_QUANT_OPS_MATH_UTILS_H_
16 #define TENSORFLOW_CORE_KERNELS_UNIFORM_QUANT_OPS_MATH_UTILS_H_
17 
18 #include <algorithm>
19 #include <cmath>
20 #include <limits>
21 
22 #include "tensorflow/core/framework/tensor.h"
23 #include "tensorflow/core/platform/status.h"
24 
25 namespace tensorflow {
26 
27 namespace internal {
28 
29 // Multiply by the effective quantized multiplier and shift.
30 // Caller is responsible for guaranteeing:
31 // quantized_multiplier >= 0
32 // shift >= -31 && shift <= 30
33 // The usage of this function is restricted to "multiply by quantized_multiplier
34 // and shift which were calcluated from QuantizeMultiplier() function below",
35 // so the conditions are expected to be met.
36 //
37 // Reference (TFLite MultiplyByQuantizedMultiplier with TFLITE_SINGLE_ROUNDING):
38 // https://github.com/tensorflow/tensorflow/blob/47c640a961874f644cd071752835c7b792450bb8/tensorflow/lite/kernels/internal/common.h#L145
39 // Above implementation refers from ruy MultiplyByQuantizedMultiplier
40 // (https://github.com/google/ruy/blob/97ebb72aa0655c0af98896b317476a5d0dacad9c/ruy/apply_multiplier.cc)
41 //
42 // After mutiplying fixed point quantized_multiplier, apply single rounding
43 // operation (addition of 'round' to result and then shift right by
44 // total_shift). where round=(1 << (30 - shift)) and total_shift=(31 - shift)
MultiplyByQuantizedMultiplier(int32_t x,int32_t quantized_multiplier,int shift)45 inline int32_t MultiplyByQuantizedMultiplier(int32_t x,
46                                              int32_t quantized_multiplier,
47                                              int shift) {
48   const int64_t total_shift = 31 - shift;
49   const int64_t round = static_cast<int64_t>(1) << (total_shift - 1);
50   int64_t result = x * static_cast<int64_t>(quantized_multiplier) + round;
51   result = result >> total_shift;
52 
53   result = std::clamp(
54       result, static_cast<int64_t>(std::numeric_limits<int32_t>::min()),
55       static_cast<int64_t>(std::numeric_limits<int32_t>::max()));
56   return static_cast<int32_t>(result);
57 }
58 
59 }  // namespace internal
60 
61 // Quantize eigen Tensor input_tensor using given inv_scale and zero_point,
62 // using the formula:
63 // quantized_val = floor(input_val * inv_scale + 0.5f) + zero_point
64 //
65 // The caller is reponsible for the validity of the inv_scale (Avoid precision
66 // loss from taking inverse, and ensure that inv_scale is a finite number.)
67 template <typename ConstTensorTin, typename TensorTout>
AffineQuantize(const ConstTensorTin & input_tensor,float inv_scale,int32_t zero_point,int32_t quantization_min_val,int32_t quantization_max_val,TensorTout quantized_tensor)68 void AffineQuantize(const ConstTensorTin& input_tensor, float inv_scale,
69                     int32_t zero_point, int32_t quantization_min_val,
70                     int32_t quantization_max_val, TensorTout quantized_tensor) {
71   quantized_tensor = ((input_tensor.template cast<float>() * inv_scale + 0.5f)
72                           .floor()
73                           .template cast<int32_t>() +
74                       zero_point)
75                          .cwiseMin(quantization_max_val)
76                          .cwiseMax(quantization_min_val)
77                          .template cast<typename TensorTout::Scalar>();
78 }
79 
80 // Dequantize eigen Tensor input_tensor using given scale and zero_point, using
81 // the formula:
82 // dequantized_val = (input_val - zero_point) * scale
83 template <typename ConstTensorTin, typename TensorTout>
AffineDequantize(const ConstTensorTin & input_tensor,float scale,int32_t zero_point,TensorTout dequantized_tensor)84 void AffineDequantize(const ConstTensorTin& input_tensor, float scale,
85                       int32_t zero_point, TensorTout dequantized_tensor) {
86   dequantized_tensor = (((input_tensor.template cast<int32_t>() - zero_point))
87                             .template cast<float>() *
88                         scale)
89                            .template cast<typename TensorTout::Scalar>();
90 }
91 
92 // Given a portion of input float tensor, quantizes the data and writes output
93 // to the corresponding portion in quantized_tensor. The quantization scale and
94 // zero_point is calculated using the input data min and max.
95 // This function is used for dynamic range quantization in hybrid (float x qint)
96 // kernels.
97 //
98 // This function behavior aligns with TFLite AsymmetricQuantize() to achieve
99 // feature parity with TFLite which is required since supporting mobile
100 // executions is the one of the major use cases. The behavior is same except for
101 // following difference:
102 // TFLite AsymmetricQuantize() uses
103 // round(input / scale + zero_point),
104 // while AffineQuantize() uses
105 // floor(input_val * (1./scale) + 0.5) + zero_point
106 void AsymmetricQuantize(const Tensor& tensor, int apply_offset, int apply_size,
107                         int32_t quantization_min_val,
108                         int32_t quantization_max_val, float& scale,
109                         int32_t& zero_point, Tensor& quantized_tensor);
110 
111 // Given double_multiplier, quantize it where it is represented by two int32_t,
112 // quantized_multiplier and shift.
113 //
114 // double_multiplier must be a positive finite number. Otherwise returns
115 // InvalidArgument.
116 //
117 // Output quantized_multiplier is clamped to range [0, INT32_MAX],
118 // and shift is clamped to range [-31, 30].
119 Status QuantizeMultiplier(double double_multiplier,
120                           int32_t& quantized_multiplier, int32_t& shift);
121 
122 // Requantize input_val given quantized effective_muliplier|shift and
123 // input|output zero_point.
124 // Effective multiplier and shift should be calculated from effective scale
125 // which is:
126 // (product of input scales) / (product of output scales).
127 template <typename Tin, typename Tout>
AffineRequantizeWithQuantizedMultiplierAndShift(Tin input_val,int32_t effective_quantized_multiplier,int effective_shift,int32_t input_zero_point,int32_t output_zero_point,int32_t quantization_min_val,int32_t quantization_max_val)128 Tout AffineRequantizeWithQuantizedMultiplierAndShift(
129     Tin input_val, int32_t effective_quantized_multiplier, int effective_shift,
130     int32_t input_zero_point, int32_t output_zero_point,
131     int32_t quantization_min_val, int32_t quantization_max_val) {
132   const int32_t input = static_cast<int32_t>(input_val) - input_zero_point;
133 
134   const int32_t unclamped =
135       internal::MultiplyByQuantizedMultiplier(
136           input, effective_quantized_multiplier, effective_shift) +
137       output_zero_point;
138 
139   // Clamp with [quantization_min_val, quantization_max_val].
140   return static_cast<Tout>(
141       std::max<int32_t>(std::min<int32_t>(unclamped, quantization_max_val),
142                         quantization_min_val));
143 }
144 
145 }  // namespace tensorflow
146 
147 #endif  // TENSORFLOW_CORE_KERNELS_UNIFORM_QUANT_OPS_MATH_UTILS_H_
148