1 #pragma once 2 #include <c10/macros/Export.h> 3 #include <c10/core/ScalarType.h> 4 5 namespace at { 6 namespace native { 7 8 // Quantize a float value into a uint value given scale and zero_point 9 template <typename T> 10 TORCH_API T quantize_val(double scale, int64_t zero_point, float value); 11 // TODO combine this with quantize_val once the numerics for ARM are aligned 12 // with it 13 template <typename T> 14 T quantize_val_arm( 15 const float scale, 16 const int32_t zero_point, 17 const float value); 18 template <typename T, int precision = 8> 19 void quantize_vec( 20 double scale, 21 int64_t zero_point, 22 const float* src, 23 T* dst, 24 size_t count = 8); 25 template <typename T> 26 TORCH_API float dequantize_val(double scale, int64_t zero_point, T value); 27 template <typename T> 28 TORCH_API float dequantize_vec( 29 double scale, 30 int64_t zero_point, 31 const T* src, 32 float* dst, 33 size_t count = 8); 34 template <typename SRC_T, typename DST_T> 35 TORCH_API DST_T requantize_val(double, int64_t, double, int64_t, SRC_T src); 36 37 // Given a multiplier and a zero_point, requantize int32_t computed values back 38 // to quantized values. See comment above 39 // make_per_tensor_affine_quantizer function for the usage of int64_t 40 template <typename DST_T> 41 TORCH_API DST_T 42 requantize_from_int(double multiplier, int64_t zero_point, int64_t src); 43 44 int quantize_val_float_qparams(float scale, float zero_point, float value, int qmin, int qmax); 45 46 } // namespace native 47 } // namespace at 48