1 /* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
16 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
17
18 #include <algorithm>
19 #include <cstdint>
20 #include <limits>
21
22 #include "tensorflow/lite/kernels/internal/common.h"
23 #include "tensorflow/lite/kernels/internal/compatibility.h"
24
25 namespace tflite {
26 namespace reference_ops {
27
28 template <typename T>
CumSum(const T * input_data,const RuntimeShape & shape,int32_t axis,bool exclusive,bool reverse,T * output_data)29 inline void CumSum(const T* input_data, const RuntimeShape& shape, int32_t axis,
30 bool exclusive, bool reverse, T* output_data) {
31 const int32_t rank = shape.DimensionsCount();
32 TFLITE_DCHECK_GE(rank, 1);
33 TFLITE_DCHECK_GE(axis, 0);
34 TFLITE_DCHECK_LT(axis, rank);
35
36 size_t inner = 1;
37 size_t outer = 1;
38 size_t depth = 1;
39 for (int32_t i = 0; i < rank; i++) {
40 if (i < axis)
41 inner *= shape.Dims(i);
42 else if (i > axis)
43 outer *= shape.Dims(i);
44 else
45 depth = shape.Dims(i);
46 }
47
48 for (size_t outer_index = 0; outer_index < outer; outer_index++) {
49 size_t outer_index_adj;
50 if (reverse)
51 outer_index_adj = (outer - 1) - outer_index;
52 else
53 outer_index_adj = outer_index;
54 for (size_t inner_index = 0; inner_index < inner; inner_index++) {
55 T accumulator = 0;
56 size_t inner_index_adj;
57 if (reverse)
58 inner_index_adj = (inner - 1) - inner_index;
59 else
60 inner_index_adj = inner_index;
61 for (size_t depth_index = 0; depth_index < depth; depth_index++) {
62 size_t depth_index_adj;
63 if (reverse)
64 depth_index_adj = (depth - 1) - depth_index;
65 else
66 depth_index_adj = depth_index;
67
68 size_t index = outer_index_adj;
69 index += inner_index_adj * depth * outer;
70 index += depth_index_adj * outer;
71
72 if (exclusive) {
73 output_data[index] = accumulator;
74 accumulator += input_data[index];
75 } else {
76 accumulator += input_data[index];
77 output_data[index] = accumulator;
78 }
79 }
80 }
81 }
82 }
83
84 //
85 // Quantized INT8 CUMSUM
86 //
CumSum(const ArithmeticParams & params,const int8_t * input_data,const RuntimeShape & shape,int32_t axis,bool exclusive,bool reverse,int8_t * output_data)87 inline void CumSum(const ArithmeticParams& params, const int8_t* input_data,
88 const RuntimeShape& shape, int32_t axis, bool exclusive,
89 bool reverse, int8_t* output_data) {
90 TFLITE_DCHECK_LE(params.quantized_activation_min,
91 params.quantized_activation_max);
92 // Input offset is negative input zero point. Activation tensors are
93 // asymmetric quantized so they span the full int8 range.
94 // All inputs should have same zero-point and scale, this is checked during
95 // Prepare stage.
96 TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
97 TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
98
99 const int32_t rank = shape.DimensionsCount();
100 TFLITE_DCHECK_GE(rank, 1);
101 TFLITE_DCHECK_GE(axis, 0);
102 TFLITE_DCHECK_LT(axis, rank);
103
104 size_t inner = 1;
105 size_t outer = 1;
106 size_t depth = 1;
107 for (int32_t i = 0; i < rank; i++) {
108 if (i < axis)
109 inner *= shape.Dims(i);
110 else if (i > axis)
111 outer *= shape.Dims(i);
112 else
113 depth = shape.Dims(i);
114 }
115
116 for (size_t outer_index = 0; outer_index < outer; outer_index++) {
117 size_t outer_index_adj;
118 if (reverse)
119 outer_index_adj = (outer - 1) - outer_index;
120 else
121 outer_index_adj = outer_index;
122 for (size_t inner_index = 0; inner_index < inner; inner_index++) {
123 int32_t accumulator = params.input1_offset; // accumulator = 0
124 accumulator *= (1 << params.left_shift);
125 accumulator = MultiplyByQuantizedMultiplierSmallerThanOneExp(
126 accumulator, params.input1_multiplier, params.input1_shift);
127
128 size_t inner_index_adj;
129 if (reverse)
130 inner_index_adj = (inner - 1) - inner_index;
131 else
132 inner_index_adj = inner_index;
133
134 for (size_t depth_index = 0; depth_index < depth; depth_index++) {
135 size_t depth_index_adj;
136 if (reverse)
137 depth_index_adj = (depth - 1) - depth_index;
138 else
139 depth_index_adj = depth_index;
140
141 size_t index = outer_index_adj;
142 index += inner_index_adj * depth * outer;
143 index += depth_index_adj * outer;
144
145 const int32_t y = params.input1_offset + input_data[index];
146 const int32_t shifted_y = y * (1 << params.left_shift);
147 const int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp(
148 shifted_y, params.input1_multiplier, params.input1_shift);
149
150 int32_t scaled_output;
151 if (exclusive) {
152 scaled_output = accumulator;
153 accumulator += scaled_y;
154 } else {
155 accumulator += scaled_y;
156 scaled_output = accumulator;
157 }
158
159 const int32_t raw_output =
160 MultiplyByQuantizedMultiplierSmallerThanOneExp(
161 scaled_output, params.output_multiplier, params.output_shift) +
162 params.output_offset;
163 const int32_t clamped_output =
164 std::min(params.quantized_activation_max,
165 std::max(params.quantized_activation_min, raw_output));
166 output_data[index] = static_cast<int8_t>(clamped_output);
167 }
168 }
169 }
170 }
171
172 } // namespace reference_ops
173 } // namespace tflite
174
175 #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
176