1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
16 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
17
18 #include <algorithm>
19
20 #include "tensorflow/lite/kernels/internal/common.h"
21
22 namespace tflite {
23 namespace reference_integer_ops {
24
25 // For per-channel functions, since it is defined in quantization spec that
26 // weights are symmetric
27 // (https://www.tensorflow.org/lite/performance/quantization_spec#symmetric_vs_asymmetric),
28 // zero_point (params.weights_offset) is always 0.
29 // However, for per-tensor functions, params.weights_offset is still applied for
30 // backward compatibility.
31
FullyConnectedPerChannel(const FullyConnectedParams & params,const int32_t * output_multiplier,const int * output_shift,const RuntimeShape & input_shape,const int8_t * input_data,const RuntimeShape & filter_shape,const int8_t * filter_data,const RuntimeShape & bias_shape,const int32_t * bias_data,const RuntimeShape & output_shape,int8_t * output_data)32 inline void FullyConnectedPerChannel(
33 const FullyConnectedParams& params, const int32_t* output_multiplier,
34 const int* output_shift, const RuntimeShape& input_shape,
35 const int8_t* input_data, const RuntimeShape& filter_shape,
36 const int8_t* filter_data, const RuntimeShape& bias_shape,
37 const int32_t* bias_data, const RuntimeShape& output_shape,
38 int8_t* output_data) {
39 const int32_t input_offset = params.input_offset;
40 const int32_t output_offset = params.output_offset;
41 const int32_t output_activation_min = params.quantized_activation_min;
42 const int32_t output_activation_max = params.quantized_activation_max;
43 TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
44 TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
45
46 TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
47 const int filter_dim_count = filter_shape.DimensionsCount();
48 const int batches = output_shape.Dims(0);
49 const int output_depth = output_shape.Dims(1);
50 TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
51 const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
52 for (int b = 0; b < batches; ++b) {
53 for (int out_c = 0; out_c < output_depth; ++out_c) {
54 int32_t acc = 0;
55 for (int d = 0; d < accum_depth; ++d) {
56 int32_t input_val = input_data[b * accum_depth + d];
57 int32_t filter_val = filter_data[out_c * accum_depth + d];
58 acc += filter_val * (input_val + input_offset);
59 }
60 if (bias_data) {
61 acc += bias_data[out_c];
62 }
63 acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[out_c],
64 output_shift[out_c]);
65 acc += output_offset;
66 acc = std::max(acc, output_activation_min);
67 acc = std::min(acc, output_activation_max);
68 output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
69 }
70 }
71 }
72
73 template <typename AccumScalar>
FullyConnectedPerChannel(const FullyConnectedParams & params,const int32_t * output_multiplier,const int * output_shift,const RuntimeShape & input_shape,const int16_t * input_data,const RuntimeShape & filter_shape,const int8_t * filter_data,const RuntimeShape & bias_shape,const AccumScalar * bias_data,const RuntimeShape & output_shape,int16_t * output_data)74 inline void FullyConnectedPerChannel(
75 const FullyConnectedParams& params, const int32_t* output_multiplier,
76 const int* output_shift, const RuntimeShape& input_shape,
77 const int16_t* input_data, const RuntimeShape& filter_shape,
78 const int8_t* filter_data, const RuntimeShape& bias_shape,
79 const AccumScalar* bias_data, const RuntimeShape& output_shape,
80 int16_t* output_data) {
81 const int32_t output_activation_min = params.quantized_activation_min;
82 const int32_t output_activation_max = params.quantized_activation_max;
83 TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
84 TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
85
86 TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
87 const int filter_dim_count = filter_shape.DimensionsCount();
88 const int output_dim_count = output_shape.DimensionsCount();
89 const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
90 const int output_depth = output_shape.Dims(output_dim_count - 1);
91 TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
92 const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
93 for (int b = 0; b < batches; ++b) {
94 for (int out_c = 0; out_c < output_depth; ++out_c) {
95 AccumScalar acc = 0;
96 for (int d = 0; d < accum_depth; ++d) {
97 int32_t input_val = input_data[b * accum_depth + d];
98 int32_t filter_val = filter_data[out_c * accum_depth + d];
99 acc += filter_val * input_val;
100 }
101 if (bias_data) {
102 acc += bias_data[out_c];
103 }
104 int32_t acc_scaled = MultiplyByQuantizedMultiplier(
105 acc, output_multiplier[out_c], output_shift[out_c]);
106 acc_scaled = std::max(acc_scaled, output_activation_min);
107 acc_scaled = std::min(acc_scaled, output_activation_max);
108 output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
109 }
110 }
111 }
112
FullyConnected(const FullyConnectedParams & params,const RuntimeShape & input_shape,const int8_t * input_data,const RuntimeShape & filter_shape,const int8_t * filter_data,const RuntimeShape & bias_shape,const int32_t * bias_data,const RuntimeShape & output_shape,int8_t * output_data)113 inline void FullyConnected(
114 const FullyConnectedParams& params, const RuntimeShape& input_shape,
115 const int8_t* input_data, const RuntimeShape& filter_shape,
116 const int8_t* filter_data, const RuntimeShape& bias_shape,
117 const int32_t* bias_data, const RuntimeShape& output_shape,
118 int8_t* output_data) {
119 const int32_t input_offset = params.input_offset;
120 const int32_t filter_offset = params.weights_offset;
121 const int32_t output_offset = params.output_offset;
122 const int32_t output_multiplier = params.output_multiplier;
123 const int output_shift = params.output_shift;
124 const int32_t output_activation_min = params.quantized_activation_min;
125 const int32_t output_activation_max = params.quantized_activation_max;
126 TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
127 TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
128
129 TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
130 const int filter_dim_count = filter_shape.DimensionsCount();
131 const int output_dim_count = output_shape.DimensionsCount();
132 const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
133 const int output_depth = output_shape.Dims(output_dim_count - 1);
134 TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
135 const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
136 for (int b = 0; b < batches; ++b) {
137 for (int out_c = 0; out_c < output_depth; ++out_c) {
138 int32_t acc = 0;
139 for (int d = 0; d < accum_depth; ++d) {
140 int32_t input_val = input_data[b * accum_depth + d];
141 int32_t filter_val = filter_data[out_c * accum_depth + d];
142 acc += (filter_val + filter_offset) * (input_val + input_offset);
143 }
144 if (bias_data) {
145 acc += bias_data[out_c];
146 }
147 acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
148 acc += output_offset;
149 acc = std::max(acc, output_activation_min);
150 acc = std::min(acc, output_activation_max);
151 output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
152 }
153 }
154 }
155
156 template <typename AccumScalar>
FullyConnected(const FullyConnectedParams & params,const RuntimeShape & input_shape,const int16_t * input_data,const RuntimeShape & filter_shape,const int8_t * filter_data,const RuntimeShape & bias_shape,const AccumScalar * bias_data,const RuntimeShape & output_shape,int16_t * output_data)157 inline void FullyConnected(
158 const FullyConnectedParams& params, const RuntimeShape& input_shape,
159 const int16_t* input_data, const RuntimeShape& filter_shape,
160 const int8_t* filter_data, const RuntimeShape& bias_shape,
161 const AccumScalar* bias_data, const RuntimeShape& output_shape,
162 int16_t* output_data) {
163 const int32_t filter_offset = params.weights_offset;
164 const int32_t output_multiplier = params.output_multiplier;
165 const int output_shift = params.output_shift;
166 const int32_t output_activation_min = params.quantized_activation_min;
167 const int32_t output_activation_max = params.quantized_activation_max;
168 TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
169 TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
170
171 TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
172 const int filter_dim_count = filter_shape.DimensionsCount();
173 const int output_dim_count = output_shape.DimensionsCount();
174 const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
175 const int output_depth = output_shape.Dims(output_dim_count - 1);
176 TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
177 const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
178 for (int b = 0; b < batches; ++b) {
179 for (int out_c = 0; out_c < output_depth; ++out_c) {
180 AccumScalar acc = 0;
181 for (int d = 0; d < accum_depth; ++d) {
182 int32_t input_val = input_data[b * accum_depth + d];
183 int32_t filter_val = filter_data[out_c * accum_depth + d];
184 acc += (filter_val + filter_offset) * input_val;
185 }
186 if (bias_data) {
187 acc += bias_data[out_c];
188 }
189 int32_t acc_scaled =
190 MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
191 acc_scaled = std::max(acc_scaled, output_activation_min);
192 acc_scaled = std::min(acc_scaled, output_activation_max);
193 output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
194 }
195 }
196 }
197
198 } // namespace reference_integer_ops
199 } // namespace tflite
200
201 #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
202