xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
16 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
17 
18 #include <algorithm>
19 
20 #include "tensorflow/lite/kernels/internal/common.h"
21 
22 namespace tflite {
23 namespace reference_integer_ops {
24 
25 // For per-channel functions, since it is defined in quantization spec that
26 // weights are symmetric
27 // (https://www.tensorflow.org/lite/performance/quantization_spec#symmetric_vs_asymmetric),
28 // zero_point (params.weights_offset) is always 0.
29 // However, for per-tensor functions, params.weights_offset is still applied for
30 // backward compatibility.
31 
FullyConnectedPerChannel(const FullyConnectedParams & params,const int32_t * output_multiplier,const int * output_shift,const RuntimeShape & input_shape,const int8_t * input_data,const RuntimeShape & filter_shape,const int8_t * filter_data,const RuntimeShape & bias_shape,const int32_t * bias_data,const RuntimeShape & output_shape,int8_t * output_data)32 inline void FullyConnectedPerChannel(
33     const FullyConnectedParams& params, const int32_t* output_multiplier,
34     const int* output_shift, const RuntimeShape& input_shape,
35     const int8_t* input_data, const RuntimeShape& filter_shape,
36     const int8_t* filter_data, const RuntimeShape& bias_shape,
37     const int32_t* bias_data, const RuntimeShape& output_shape,
38     int8_t* output_data) {
39   const int32_t input_offset = params.input_offset;
40   const int32_t output_offset = params.output_offset;
41   const int32_t output_activation_min = params.quantized_activation_min;
42   const int32_t output_activation_max = params.quantized_activation_max;
43   TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
44   TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
45 
46   TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
47   const int filter_dim_count = filter_shape.DimensionsCount();
48   const int batches = output_shape.Dims(0);
49   const int output_depth = output_shape.Dims(1);
50   TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
51   const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
52   for (int b = 0; b < batches; ++b) {
53     for (int out_c = 0; out_c < output_depth; ++out_c) {
54       int32_t acc = 0;
55       for (int d = 0; d < accum_depth; ++d) {
56         int32_t input_val = input_data[b * accum_depth + d];
57         int32_t filter_val = filter_data[out_c * accum_depth + d];
58         acc += filter_val * (input_val + input_offset);
59       }
60       if (bias_data) {
61         acc += bias_data[out_c];
62       }
63       acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[out_c],
64                                           output_shift[out_c]);
65       acc += output_offset;
66       acc = std::max(acc, output_activation_min);
67       acc = std::min(acc, output_activation_max);
68       output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
69     }
70   }
71 }
72 
73 template <typename AccumScalar>
FullyConnectedPerChannel(const FullyConnectedParams & params,const int32_t * output_multiplier,const int * output_shift,const RuntimeShape & input_shape,const int16_t * input_data,const RuntimeShape & filter_shape,const int8_t * filter_data,const RuntimeShape & bias_shape,const AccumScalar * bias_data,const RuntimeShape & output_shape,int16_t * output_data)74 inline void FullyConnectedPerChannel(
75     const FullyConnectedParams& params, const int32_t* output_multiplier,
76     const int* output_shift, const RuntimeShape& input_shape,
77     const int16_t* input_data, const RuntimeShape& filter_shape,
78     const int8_t* filter_data, const RuntimeShape& bias_shape,
79     const AccumScalar* bias_data, const RuntimeShape& output_shape,
80     int16_t* output_data) {
81   const int32_t output_activation_min = params.quantized_activation_min;
82   const int32_t output_activation_max = params.quantized_activation_max;
83   TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
84   TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
85 
86   TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
87   const int filter_dim_count = filter_shape.DimensionsCount();
88   const int output_dim_count = output_shape.DimensionsCount();
89   const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
90   const int output_depth = output_shape.Dims(output_dim_count - 1);
91   TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
92   const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
93   for (int b = 0; b < batches; ++b) {
94     for (int out_c = 0; out_c < output_depth; ++out_c) {
95       AccumScalar acc = 0;
96       for (int d = 0; d < accum_depth; ++d) {
97         int32_t input_val = input_data[b * accum_depth + d];
98         int32_t filter_val = filter_data[out_c * accum_depth + d];
99         acc += filter_val * input_val;
100       }
101       if (bias_data) {
102         acc += bias_data[out_c];
103       }
104       int32_t acc_scaled = MultiplyByQuantizedMultiplier(
105           acc, output_multiplier[out_c], output_shift[out_c]);
106       acc_scaled = std::max(acc_scaled, output_activation_min);
107       acc_scaled = std::min(acc_scaled, output_activation_max);
108       output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
109     }
110   }
111 }
112 
FullyConnected(const FullyConnectedParams & params,const RuntimeShape & input_shape,const int8_t * input_data,const RuntimeShape & filter_shape,const int8_t * filter_data,const RuntimeShape & bias_shape,const int32_t * bias_data,const RuntimeShape & output_shape,int8_t * output_data)113 inline void FullyConnected(
114     const FullyConnectedParams& params, const RuntimeShape& input_shape,
115     const int8_t* input_data, const RuntimeShape& filter_shape,
116     const int8_t* filter_data, const RuntimeShape& bias_shape,
117     const int32_t* bias_data, const RuntimeShape& output_shape,
118     int8_t* output_data) {
119   const int32_t input_offset = params.input_offset;
120   const int32_t filter_offset = params.weights_offset;
121   const int32_t output_offset = params.output_offset;
122   const int32_t output_multiplier = params.output_multiplier;
123   const int output_shift = params.output_shift;
124   const int32_t output_activation_min = params.quantized_activation_min;
125   const int32_t output_activation_max = params.quantized_activation_max;
126   TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
127   TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
128 
129   TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
130   const int filter_dim_count = filter_shape.DimensionsCount();
131   const int output_dim_count = output_shape.DimensionsCount();
132   const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
133   const int output_depth = output_shape.Dims(output_dim_count - 1);
134   TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
135   const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
136   for (int b = 0; b < batches; ++b) {
137     for (int out_c = 0; out_c < output_depth; ++out_c) {
138       int32_t acc = 0;
139       for (int d = 0; d < accum_depth; ++d) {
140         int32_t input_val = input_data[b * accum_depth + d];
141         int32_t filter_val = filter_data[out_c * accum_depth + d];
142         acc += (filter_val + filter_offset) * (input_val + input_offset);
143       }
144       if (bias_data) {
145         acc += bias_data[out_c];
146       }
147       acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
148       acc += output_offset;
149       acc = std::max(acc, output_activation_min);
150       acc = std::min(acc, output_activation_max);
151       output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
152     }
153   }
154 }
155 
156 template <typename AccumScalar>
FullyConnected(const FullyConnectedParams & params,const RuntimeShape & input_shape,const int16_t * input_data,const RuntimeShape & filter_shape,const int8_t * filter_data,const RuntimeShape & bias_shape,const AccumScalar * bias_data,const RuntimeShape & output_shape,int16_t * output_data)157 inline void FullyConnected(
158     const FullyConnectedParams& params, const RuntimeShape& input_shape,
159     const int16_t* input_data, const RuntimeShape& filter_shape,
160     const int8_t* filter_data, const RuntimeShape& bias_shape,
161     const AccumScalar* bias_data, const RuntimeShape& output_shape,
162     int16_t* output_data) {
163   const int32_t filter_offset = params.weights_offset;
164   const int32_t output_multiplier = params.output_multiplier;
165   const int output_shift = params.output_shift;
166   const int32_t output_activation_min = params.quantized_activation_min;
167   const int32_t output_activation_max = params.quantized_activation_max;
168   TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
169   TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
170 
171   TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
172   const int filter_dim_count = filter_shape.DimensionsCount();
173   const int output_dim_count = output_shape.DimensionsCount();
174   const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
175   const int output_depth = output_shape.Dims(output_dim_count - 1);
176   TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
177   const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
178   for (int b = 0; b < batches; ++b) {
179     for (int out_c = 0; out_c < output_depth; ++out_c) {
180       AccumScalar acc = 0;
181       for (int d = 0; d < accum_depth; ++d) {
182         int32_t input_val = input_data[b * accum_depth + d];
183         int32_t filter_val = filter_data[out_c * accum_depth + d];
184         acc += (filter_val + filter_offset) * input_val;
185       }
186       if (bias_data) {
187         acc += bias_data[out_c];
188       }
189       int32_t acc_scaled =
190           MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
191       acc_scaled = std::max(acc_scaled, output_activation_min);
192       acc_scaled = std::min(acc_scaled, output_activation_max);
193       output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
194     }
195   }
196 }
197 
198 }  // namespace reference_integer_ops
199 }  // namespace tflite
200 
201 #endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
202