xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/kernels/internal/reference/cumsum.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
16 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
17 
18 #include <algorithm>
19 #include <cstdint>
20 #include <limits>
21 
22 #include "tensorflow/lite/kernels/internal/common.h"
23 #include "tensorflow/lite/kernels/internal/compatibility.h"
24 
25 namespace tflite {
26 namespace reference_ops {
27 
28 template <typename T>
CumSum(const T * input_data,const RuntimeShape & shape,int32_t axis,bool exclusive,bool reverse,T * output_data)29 inline void CumSum(const T* input_data, const RuntimeShape& shape, int32_t axis,
30                    bool exclusive, bool reverse, T* output_data) {
31   const int32_t rank = shape.DimensionsCount();
32   TFLITE_DCHECK_GE(rank, 1);
33   TFLITE_DCHECK_GE(axis, 0);
34   TFLITE_DCHECK_LT(axis, rank);
35 
36   size_t inner = 1;
37   size_t outer = 1;
38   size_t depth = 1;
39   for (int32_t i = 0; i < rank; i++) {
40     if (i < axis)
41       inner *= shape.Dims(i);
42     else if (i > axis)
43       outer *= shape.Dims(i);
44     else
45       depth = shape.Dims(i);
46   }
47 
48   for (size_t outer_index = 0; outer_index < outer; outer_index++) {
49     size_t outer_index_adj;
50     if (reverse)
51       outer_index_adj = (outer - 1) - outer_index;
52     else
53       outer_index_adj = outer_index;
54     for (size_t inner_index = 0; inner_index < inner; inner_index++) {
55       T accumulator = 0;
56       size_t inner_index_adj;
57       if (reverse)
58         inner_index_adj = (inner - 1) - inner_index;
59       else
60         inner_index_adj = inner_index;
61       for (size_t depth_index = 0; depth_index < depth; depth_index++) {
62         size_t depth_index_adj;
63         if (reverse)
64           depth_index_adj = (depth - 1) - depth_index;
65         else
66           depth_index_adj = depth_index;
67 
68         size_t index = outer_index_adj;
69         index += inner_index_adj * depth * outer;
70         index += depth_index_adj * outer;
71 
72         if (exclusive) {
73           output_data[index] = accumulator;
74           accumulator += input_data[index];
75         } else {
76           accumulator += input_data[index];
77           output_data[index] = accumulator;
78         }
79       }
80     }
81   }
82 }
83 
84 //
85 // Quantized INT8 CUMSUM
86 //
CumSum(const ArithmeticParams & params,const int8_t * input_data,const RuntimeShape & shape,int32_t axis,bool exclusive,bool reverse,int8_t * output_data)87 inline void CumSum(const ArithmeticParams& params, const int8_t* input_data,
88                    const RuntimeShape& shape, int32_t axis, bool exclusive,
89                    bool reverse, int8_t* output_data) {
90   TFLITE_DCHECK_LE(params.quantized_activation_min,
91                    params.quantized_activation_max);
92   // Input offset is negative input zero point. Activation tensors are
93   // asymmetric quantized so they span the full int8 range.
94   // All inputs should have same zero-point and scale, this is checked during
95   // Prepare stage.
96   TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
97   TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
98 
99   const int32_t rank = shape.DimensionsCount();
100   TFLITE_DCHECK_GE(rank, 1);
101   TFLITE_DCHECK_GE(axis, 0);
102   TFLITE_DCHECK_LT(axis, rank);
103 
104   size_t inner = 1;
105   size_t outer = 1;
106   size_t depth = 1;
107   for (int32_t i = 0; i < rank; i++) {
108     if (i < axis)
109       inner *= shape.Dims(i);
110     else if (i > axis)
111       outer *= shape.Dims(i);
112     else
113       depth = shape.Dims(i);
114   }
115 
116   for (size_t outer_index = 0; outer_index < outer; outer_index++) {
117     size_t outer_index_adj;
118     if (reverse)
119       outer_index_adj = (outer - 1) - outer_index;
120     else
121       outer_index_adj = outer_index;
122     for (size_t inner_index = 0; inner_index < inner; inner_index++) {
123       int32_t accumulator = params.input1_offset;  // accumulator = 0
124       accumulator *= (1 << params.left_shift);
125       accumulator = MultiplyByQuantizedMultiplierSmallerThanOneExp(
126           accumulator, params.input1_multiplier, params.input1_shift);
127 
128       size_t inner_index_adj;
129       if (reverse)
130         inner_index_adj = (inner - 1) - inner_index;
131       else
132         inner_index_adj = inner_index;
133 
134       for (size_t depth_index = 0; depth_index < depth; depth_index++) {
135         size_t depth_index_adj;
136         if (reverse)
137           depth_index_adj = (depth - 1) - depth_index;
138         else
139           depth_index_adj = depth_index;
140 
141         size_t index = outer_index_adj;
142         index += inner_index_adj * depth * outer;
143         index += depth_index_adj * outer;
144 
145         const int32_t y = params.input1_offset + input_data[index];
146         const int32_t shifted_y = y * (1 << params.left_shift);
147         const int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp(
148             shifted_y, params.input1_multiplier, params.input1_shift);
149 
150         int32_t scaled_output;
151         if (exclusive) {
152           scaled_output = accumulator;
153           accumulator += scaled_y;
154         } else {
155           accumulator += scaled_y;
156           scaled_output = accumulator;
157         }
158 
159         const int32_t raw_output =
160             MultiplyByQuantizedMultiplierSmallerThanOneExp(
161                 scaled_output, params.output_multiplier, params.output_shift) +
162             params.output_offset;
163         const int32_t clamped_output =
164             std::min(params.quantized_activation_max,
165                      std::max(params.quantized_activation_min, raw_output));
166         output_data[index] = static_cast<int8_t>(clamped_output);
167       }
168     }
169   }
170 }
171 
172 }  // namespace reference_ops
173 }  // namespace tflite
174 
175 #endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
176