xref: /aosp_15_r20/external/gemmlowp/meta/transform_kernels.h (revision 5f39d1b313f0528e11bae88b3029b54b9e1033e7)
1*5f39d1b3SJooyung Han // Copyright 2016 The Gemmlowp Authors. All Rights Reserved.
2*5f39d1b3SJooyung Han //
3*5f39d1b3SJooyung Han // Licensed under the Apache License, Version 2.0 (the "License");
4*5f39d1b3SJooyung Han // you may not use this file except in compliance with the License.
5*5f39d1b3SJooyung Han // You may obtain a copy of the License at
6*5f39d1b3SJooyung Han //
7*5f39d1b3SJooyung Han //     http://www.apache.org/licenses/LICENSE-2.0
8*5f39d1b3SJooyung Han //
9*5f39d1b3SJooyung Han // Unless required by applicable law or agreed to in writing, software
10*5f39d1b3SJooyung Han // distributed under the License is distributed on an "AS IS" BASIS,
11*5f39d1b3SJooyung Han // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*5f39d1b3SJooyung Han // See the License for the specific language governing permissions and
13*5f39d1b3SJooyung Han // limitations under the License.
14*5f39d1b3SJooyung Han 
15*5f39d1b3SJooyung Han #ifndef GEMMLOWP_META_TRANSFORM_KERNELS_H_
16*5f39d1b3SJooyung Han #define GEMMLOWP_META_TRANSFORM_KERNELS_H_
17*5f39d1b3SJooyung Han 
18*5f39d1b3SJooyung Han #include "base.h"
19*5f39d1b3SJooyung Han 
20*5f39d1b3SJooyung Han namespace gemmlowp {
21*5f39d1b3SJooyung Han namespace meta {
22*5f39d1b3SJooyung Han 
23*5f39d1b3SJooyung Han struct Quantize {
24*5f39d1b3SJooyung Han   float range_min;
25*5f39d1b3SJooyung Han   float range_offset;
26*5f39d1b3SJooyung Han   float range_scale;
27*5f39d1b3SJooyung Han   int count;
28*5f39d1b3SJooyung Han };
29*5f39d1b3SJooyung Han 
30*5f39d1b3SJooyung Han struct Dequantize {
31*5f39d1b3SJooyung Han   float range_min;
32*5f39d1b3SJooyung Han   float range_offset;
33*5f39d1b3SJooyung Han   float range_scale;
34*5f39d1b3SJooyung Han   int count;
35*5f39d1b3SJooyung Han };
36*5f39d1b3SJooyung Han 
37*5f39d1b3SJooyung Han struct Requantize {
38*5f39d1b3SJooyung Han   float input_range_min;
39*5f39d1b3SJooyung Han   float input_range_offset;
40*5f39d1b3SJooyung Han   float input_range_scale;
41*5f39d1b3SJooyung Han   float output_range_min;
42*5f39d1b3SJooyung Han   float output_range_offset;
43*5f39d1b3SJooyung Han   float one_over_output_range_scale;
44*5f39d1b3SJooyung Han   int count;
45*5f39d1b3SJooyung Han };
46*5f39d1b3SJooyung Han 
47*5f39d1b3SJooyung Han template <typename Type>
48*5f39d1b3SJooyung Han struct MinMax {
49*5f39d1b3SJooyung Han   Type min;
50*5f39d1b3SJooyung Han   Type max;
51*5f39d1b3SJooyung Han   int count;
52*5f39d1b3SJooyung Han };
53*5f39d1b3SJooyung Han 
54*5f39d1b3SJooyung Han template <typename BiasType>
55*5f39d1b3SJooyung Han struct BiasAdd {
56*5f39d1b3SJooyung Han   float input_range_min;
57*5f39d1b3SJooyung Han   float input_range_offset;
58*5f39d1b3SJooyung Han   float input_range_scale;
59*5f39d1b3SJooyung Han   float bias_range_min;
60*5f39d1b3SJooyung Han   float bias_range_offset;
61*5f39d1b3SJooyung Han   float bias_range_scale;
62*5f39d1b3SJooyung Han   float output_range_min;
63*5f39d1b3SJooyung Han   float output_range_offset;
64*5f39d1b3SJooyung Han   float one_over_output_range_scale;
65*5f39d1b3SJooyung Han   int count;
66*5f39d1b3SJooyung Han   int rows;
67*5f39d1b3SJooyung Han   const BiasType* bias;
68*5f39d1b3SJooyung Han };
69*5f39d1b3SJooyung Han 
70*5f39d1b3SJooyung Han template <typename InType, typename OutType, int kernel_size, int leftovers>
71*5f39d1b3SJooyung Han class Transform1DKernel<InType, OutType, Quantize, kernel_size, leftovers> {
72*5f39d1b3SJooyung Han  public:
Transform(const InType * in,const Quantize & params,OutType * output)73*5f39d1b3SJooyung Han   static void Transform(const InType* in, const Quantize& params,
74*5f39d1b3SJooyung Han                         OutType* output) {
75*5f39d1b3SJooyung Han #ifdef DEBUG
76*5f39d1b3SJooyung Han #ifdef DEBUG_METAGEMM_VERBOSE
77*5f39d1b3SJooyung Han     std::cout << "Quantize::Transform(" << std::string(typeid(InType).name())
78*5f39d1b3SJooyung Han               << ", " << std::string(typeid(OutType).name()) << ") -- "
79*5f39d1b3SJooyung Han               << kernel_size << "x" << leftovers << std::endl;
80*5f39d1b3SJooyung Han #endif
81*5f39d1b3SJooyung Han #else
82*5f39d1b3SJooyung Han     std::cerr << "FATAL: Quantize::Transform not implemented." << std::endl;
83*5f39d1b3SJooyung Han     std::exit(1);
84*5f39d1b3SJooyung Han #endif
85*5f39d1b3SJooyung Han   }
86*5f39d1b3SJooyung Han };
87*5f39d1b3SJooyung Han 
88*5f39d1b3SJooyung Han template <typename InType, typename OutType, int kernel_size, int leftovers>
89*5f39d1b3SJooyung Han class Transform1DKernel<InType, OutType, Dequantize, kernel_size, leftovers> {
90*5f39d1b3SJooyung Han  public:
Transform(const InType * in,const Dequantize & params,OutType * output)91*5f39d1b3SJooyung Han   static void Transform(const InType* in, const Dequantize& params,
92*5f39d1b3SJooyung Han                         OutType* output) {
93*5f39d1b3SJooyung Han #ifdef DEBUG
94*5f39d1b3SJooyung Han #ifdef DEBUG_METAGEMM_VERBOSE
95*5f39d1b3SJooyung Han     std::cout << "Dequantize::Transform(" << std::string(typeid(InType).name())
96*5f39d1b3SJooyung Han               << ", " << std::string(typeid(OutType).name()) << ") -- "
97*5f39d1b3SJooyung Han               << kernel_size << "x" << leftovers << std::endl;
98*5f39d1b3SJooyung Han #endif
99*5f39d1b3SJooyung Han #else
100*5f39d1b3SJooyung Han     std::cerr << "FATAL: Dequantize::Transform not implemented." << std::endl;
101*5f39d1b3SJooyung Han     std::exit(1);
102*5f39d1b3SJooyung Han #endif
103*5f39d1b3SJooyung Han   }
104*5f39d1b3SJooyung Han };
105*5f39d1b3SJooyung Han 
106*5f39d1b3SJooyung Han template <typename InType, typename OutType, int kernel_size, int leftovers>
107*5f39d1b3SJooyung Han class Transform1DKernel<InType, OutType, Requantize, kernel_size, leftovers> {
108*5f39d1b3SJooyung Han  public:
Transform(const InType * in,const Requantize & params,OutType * output)109*5f39d1b3SJooyung Han   static void Transform(const InType* in, const Requantize& params,
110*5f39d1b3SJooyung Han                         OutType* output) {
111*5f39d1b3SJooyung Han #ifdef DEBUG
112*5f39d1b3SJooyung Han #ifdef DEBUG_METAGEMM_VERBOSE
113*5f39d1b3SJooyung Han     std::cout << "Requantize::Transform(" << std::string(typeid(InType).name())
114*5f39d1b3SJooyung Han               << ", " << std::string(typeid(OutType).name()) << ") -- "
115*5f39d1b3SJooyung Han               << kernel_size << "x" << leftovers << std::endl;
116*5f39d1b3SJooyung Han #endif
117*5f39d1b3SJooyung Han #else
118*5f39d1b3SJooyung Han     std::cerr << "FATAL: Requantize::Transform not implemented." << std::endl;
119*5f39d1b3SJooyung Han     std::exit(1);
120*5f39d1b3SJooyung Han #endif
121*5f39d1b3SJooyung Han   }
122*5f39d1b3SJooyung Han };
123*5f39d1b3SJooyung Han 
124*5f39d1b3SJooyung Han template <typename InType, typename OutType, int kernel_size, int leftovers,
125*5f39d1b3SJooyung Han           typename Type>
126*5f39d1b3SJooyung Han class Transform1DKernel<InType, OutType, MinMax<Type>, kernel_size, leftovers> {
127*5f39d1b3SJooyung Han  public:
Transform(const InType * in,const MinMax<Type> & params,OutType * output)128*5f39d1b3SJooyung Han   static void Transform(const InType* in, const MinMax<Type>& params,
129*5f39d1b3SJooyung Han                         OutType* output) {
130*5f39d1b3SJooyung Han #ifdef DEBUG
131*5f39d1b3SJooyung Han #ifdef DEBUG_METAGEMM_VERBOSE
132*5f39d1b3SJooyung Han     std::cout << "MinMax::Transform(" << std::string(typeid(InType).name())
133*5f39d1b3SJooyung Han               << ", " << std::string(typeid(OutType).name()) << ") -- "
134*5f39d1b3SJooyung Han               << kernel_size << "x" << leftovers << std::endl;
135*5f39d1b3SJooyung Han #endif
136*5f39d1b3SJooyung Han #else
137*5f39d1b3SJooyung Han     std::cerr << "FATAL: MinMax::Transform not implemented." << std::endl;
138*5f39d1b3SJooyung Han     std::exit(1);
139*5f39d1b3SJooyung Han #endif
140*5f39d1b3SJooyung Han   }
141*5f39d1b3SJooyung Han };
142*5f39d1b3SJooyung Han 
143*5f39d1b3SJooyung Han template <typename InType, typename OutType, int kernel_size, int leftovers,
144*5f39d1b3SJooyung Han           typename Type>
145*5f39d1b3SJooyung Han class Transform1DKernel<InType, OutType, BiasAdd<Type>, kernel_size,
146*5f39d1b3SJooyung Han                         leftovers> {
147*5f39d1b3SJooyung Han  public:
Transform(const InType * in,const BiasAdd<Type> & params,OutType * output)148*5f39d1b3SJooyung Han   static void Transform(const InType* in, const BiasAdd<Type>& params,
149*5f39d1b3SJooyung Han                         OutType* output) {
150*5f39d1b3SJooyung Han #ifdef DEBUG
151*5f39d1b3SJooyung Han #ifdef DEBUG_METAGEMM_VERBOSE
152*5f39d1b3SJooyung Han     std::cout << "BiasAdd::Transform(" << std::string(typeid(InType).name())
153*5f39d1b3SJooyung Han               << ", " << std::string(typeid(OutType).name()) << ") -- "
154*5f39d1b3SJooyung Han               << kernel_size << "x" << leftovers << std::endl;
155*5f39d1b3SJooyung Han #endif
156*5f39d1b3SJooyung Han #else
157*5f39d1b3SJooyung Han     std::cerr << "FATAL: BiasAdd::Transform not implemented." << std::endl;
158*5f39d1b3SJooyung Han     std::exit(1);
159*5f39d1b3SJooyung Han #endif
160*5f39d1b3SJooyung Han   }
161*5f39d1b3SJooyung Han };
162*5f39d1b3SJooyung Han 
163*5f39d1b3SJooyung Han template <typename InType, typename OutType>
164*5f39d1b3SJooyung Han class Transform1DUtil<InType, OutType, Quantize> {
165*5f39d1b3SJooyung Han  public:
EstimateComputeCost(const Quantize & params)166*5f39d1b3SJooyung Han   static int EstimateComputeCost(const Quantize& params) {
167*5f39d1b3SJooyung Han     return params.count * 8;
168*5f39d1b3SJooyung Han   }
169*5f39d1b3SJooyung Han 
OffsetInput(const Quantize & params,const InType * input,int offset)170*5f39d1b3SJooyung Han   static const InType* OffsetInput(const Quantize& params, const InType* input,
171*5f39d1b3SJooyung Han                                    int offset) {
172*5f39d1b3SJooyung Han     return input + offset;
173*5f39d1b3SJooyung Han   }
174*5f39d1b3SJooyung Han 
OffsetOutput(const Quantize & params,OutType * output,int offset)175*5f39d1b3SJooyung Han   static OutType* OffsetOutput(const Quantize& params, OutType* output,
176*5f39d1b3SJooyung Han                                int offset) {
177*5f39d1b3SJooyung Han     return output + offset;
178*5f39d1b3SJooyung Han   }
179*5f39d1b3SJooyung Han };
180*5f39d1b3SJooyung Han 
181*5f39d1b3SJooyung Han template <typename InType, typename OutType>
182*5f39d1b3SJooyung Han class Transform1DUtil<InType, OutType, Requantize> {
183*5f39d1b3SJooyung Han  public:
EstimateComputeCost(const Requantize & params)184*5f39d1b3SJooyung Han   static int EstimateComputeCost(const Requantize& params) {
185*5f39d1b3SJooyung Han     return params.count * 12;
186*5f39d1b3SJooyung Han   }
187*5f39d1b3SJooyung Han 
OffsetInput(const Requantize & params,const InType * input,int offset)188*5f39d1b3SJooyung Han   static const InType* OffsetInput(const Requantize& params,
189*5f39d1b3SJooyung Han                                    const InType* input, int offset) {
190*5f39d1b3SJooyung Han     return input + offset;
191*5f39d1b3SJooyung Han   }
192*5f39d1b3SJooyung Han 
OffsetOutput(const Requantize & params,OutType * output,int offset)193*5f39d1b3SJooyung Han   static OutType* OffsetOutput(const Requantize& params, OutType* output,
194*5f39d1b3SJooyung Han                                int offset) {
195*5f39d1b3SJooyung Han     return output + offset;
196*5f39d1b3SJooyung Han   }
197*5f39d1b3SJooyung Han };
198*5f39d1b3SJooyung Han 
199*5f39d1b3SJooyung Han template <typename InType, typename OutType>
200*5f39d1b3SJooyung Han class Transform1DUtil<InType, OutType, Dequantize> {
201*5f39d1b3SJooyung Han  public:
EstimateComputeCost(const Dequantize & params)202*5f39d1b3SJooyung Han   static int EstimateComputeCost(const Dequantize& params) {
203*5f39d1b3SJooyung Han     return params.count * 12;
204*5f39d1b3SJooyung Han   }
205*5f39d1b3SJooyung Han 
OffsetInput(const Dequantize & params,const InType * input,int offset)206*5f39d1b3SJooyung Han   static const InType* OffsetInput(const Dequantize& params,
207*5f39d1b3SJooyung Han                                    const InType* input, int offset) {
208*5f39d1b3SJooyung Han     return input + offset;
209*5f39d1b3SJooyung Han   }
210*5f39d1b3SJooyung Han 
OffsetOutput(const Dequantize & params,OutType * output,int offset)211*5f39d1b3SJooyung Han   static OutType* OffsetOutput(const Dequantize& params, OutType* output,
212*5f39d1b3SJooyung Han                                int offset) {
213*5f39d1b3SJooyung Han     return output + offset;
214*5f39d1b3SJooyung Han   }
215*5f39d1b3SJooyung Han };
216*5f39d1b3SJooyung Han 
217*5f39d1b3SJooyung Han template <typename InType, typename OutType, typename MinMaxType>
218*5f39d1b3SJooyung Han class Transform1DUtil<InType, OutType, MinMax<MinMaxType>> {
219*5f39d1b3SJooyung Han  public:
EstimateComputeCost(const MinMax<MinMaxType> & params)220*5f39d1b3SJooyung Han   static int EstimateComputeCost(const MinMax<MinMaxType>& params) {
221*5f39d1b3SJooyung Han     return params.count * 4;
222*5f39d1b3SJooyung Han   }
223*5f39d1b3SJooyung Han 
OffsetInput(const MinMax<MinMaxType> & params,const InType * input,int offset)224*5f39d1b3SJooyung Han   static const InType* OffsetInput(const MinMax<MinMaxType>& params,
225*5f39d1b3SJooyung Han                                    const InType* input, int offset) {
226*5f39d1b3SJooyung Han     return input + offset;
227*5f39d1b3SJooyung Han   }
228*5f39d1b3SJooyung Han 
OffsetOutput(const MinMax<MinMaxType> & params,OutType * output,int offset)229*5f39d1b3SJooyung Han   static OutType* OffsetOutput(const MinMax<MinMaxType>& params,
230*5f39d1b3SJooyung Han                                OutType* output, int offset) {
231*5f39d1b3SJooyung Han     return output + offset;
232*5f39d1b3SJooyung Han   }
233*5f39d1b3SJooyung Han };
234*5f39d1b3SJooyung Han 
235*5f39d1b3SJooyung Han }  // namespace meta
236*5f39d1b3SJooyung Han }  // namespace gemmlowp
237*5f39d1b3SJooyung Han 
238*5f39d1b3SJooyung Han #ifdef GEMMLOWP_NEON_32
239*5f39d1b3SJooyung Han #include "transform_kernels_arm_32.h"
240*5f39d1b3SJooyung Han #elif defined(GEMMLOWP_NEON_64)
241*5f39d1b3SJooyung Han #include "transform_kernels_arm_64.h"
242*5f39d1b3SJooyung Han #endif
243*5f39d1b3SJooyung Han 
244*5f39d1b3SJooyung Han #endif  // GEMMLOWP_META_TRANSFORM_KERNELS_H_
245