xref: /aosp_15_r20/external/gemmlowp/meta/quantized_mul_kernels.h (revision 5f39d1b313f0528e11bae88b3029b54b9e1033e7)
1*5f39d1b3SJooyung Han // Copyright 2016 The Gemmlowp Authors. All Rights Reserved.
2*5f39d1b3SJooyung Han //
3*5f39d1b3SJooyung Han // Licensed under the Apache License, Version 2.0 (the "License");
4*5f39d1b3SJooyung Han // you may not use this file except in compliance with the License.
5*5f39d1b3SJooyung Han // You may obtain a copy of the License at
6*5f39d1b3SJooyung Han //
7*5f39d1b3SJooyung Han //     http://www.apache.org/licenses/LICENSE-2.0
8*5f39d1b3SJooyung Han //
9*5f39d1b3SJooyung Han // Unless required by applicable law or agreed to in writing, software
10*5f39d1b3SJooyung Han // distributed under the License is distributed on an "AS IS" BASIS,
11*5f39d1b3SJooyung Han // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*5f39d1b3SJooyung Han // See the License for the specific language governing permissions and
13*5f39d1b3SJooyung Han // limitations under the License.
14*5f39d1b3SJooyung Han 
15*5f39d1b3SJooyung Han #ifndef GEMMLOWP_META_QUANTIZED_MUL_KERNELS_H_
16*5f39d1b3SJooyung Han #define GEMMLOWP_META_QUANTIZED_MUL_KERNELS_H_
17*5f39d1b3SJooyung Han 
18*5f39d1b3SJooyung Han #include <iostream>
19*5f39d1b3SJooyung Han #include <typeinfo>
20*5f39d1b3SJooyung Han 
21*5f39d1b3SJooyung Han #include "base.h"
22*5f39d1b3SJooyung Han #include "streams.h"
23*5f39d1b3SJooyung Han 
24*5f39d1b3SJooyung Han namespace gemmlowp {
25*5f39d1b3SJooyung Han namespace meta {
26*5f39d1b3SJooyung Han 
27*5f39d1b3SJooyung Han struct QuantizedStaticPreprocessed {
28*5f39d1b3SJooyung Han  public:
29*5f39d1b3SJooyung Han   int multiplicative_offset;
30*5f39d1b3SJooyung Han   int rounding_offset;
31*5f39d1b3SJooyung Han   int shift;
32*5f39d1b3SJooyung Han   int count;
33*5f39d1b3SJooyung Han };
34*5f39d1b3SJooyung Han 
35*5f39d1b3SJooyung Han template <typename InType, typename OutType, int m, int n, int k>
36*5f39d1b3SJooyung Han class MulKernel<InType, OutType, QuantizedStaticPreprocessed, RowMajor, m, n,
37*5f39d1b3SJooyung Han                 k> {
38*5f39d1b3SJooyung Han  public:
39*5f39d1b3SJooyung Han   typedef FusedKernelParams<QuantizedStaticPreprocessed, RowMajor> FusedKernel;
40*5f39d1b3SJooyung Han 
Multiply(const InType * lhs,const InType *,const FusedKernel & params,OutType * result)41*5f39d1b3SJooyung Han   static void Multiply(const InType* lhs, const InType*,
42*5f39d1b3SJooyung Han                        const FusedKernel& params, OutType* result) {
43*5f39d1b3SJooyung Han #ifdef DEBUG
44*5f39d1b3SJooyung Han #ifdef DEBUG_METAGEMM_VERBOSE
45*5f39d1b3SJooyung Han     std::cout << "MulQSPR(" << typeid(InType).name() << ", "
46*5f39d1b3SJooyung Han               << typeid(OutType).name() << ")::Multiply() -- " << m << "x" << n
47*5f39d1b3SJooyung Han               << "x" << k << std::endl;
48*5f39d1b3SJooyung Han #endif
49*5f39d1b3SJooyung Han #else
50*5f39d1b3SJooyung Han     if (m != 0 && n != 0) {
51*5f39d1b3SJooyung Han       std::cerr << "FATAL: QuantizedStaticPreprocessed_RowMajor::Multiply not "
52*5f39d1b3SJooyung Han                 << "implemented." << std::endl;
53*5f39d1b3SJooyung Han       std::exit(1);
54*5f39d1b3SJooyung Han     }
55*5f39d1b3SJooyung Han #endif
56*5f39d1b3SJooyung Han   }
57*5f39d1b3SJooyung Han 
58*5f39d1b3SJooyung Han #ifdef DEBUG
59*5f39d1b3SJooyung Han #ifdef DEBUG_METAGEMM_VERBOSE
Debug(const FusedKernel & params)60*5f39d1b3SJooyung Han   static void Debug(const FusedKernel& params) {
61*5f39d1b3SJooyung Han     std::cout << "MulQSPR(" << typeid(InType).name() << ", "
62*5f39d1b3SJooyung Han               << typeid(OutType).name() << ") -- " << m << "x" << n << "x" << k
63*5f39d1b3SJooyung Han               << std::endl;
64*5f39d1b3SJooyung Han     std::cout << "  params:" << std::endl;
65*5f39d1b3SJooyung Han     std::cout << "    kernel.multiplicative_offset: "
66*5f39d1b3SJooyung Han               << params.kernel.multiplicative_offset << std::endl;
67*5f39d1b3SJooyung Han     std::cout << "    kernel.rounding_offset: " << params.kernel.rounding_offset
68*5f39d1b3SJooyung Han               << std::endl;
69*5f39d1b3SJooyung Han     std::cout << "    kernel.shift: " << params.kernel.shift << std::endl;
70*5f39d1b3SJooyung Han     std::cout << "    kernel.count: " << params.kernel.count << std::endl;
71*5f39d1b3SJooyung Han     std::cout << "    output_stream.stride: " << params.output_stream.stride
72*5f39d1b3SJooyung Han               << std::endl;
73*5f39d1b3SJooyung Han   }
74*5f39d1b3SJooyung Han #endif
75*5f39d1b3SJooyung Han #endif
76*5f39d1b3SJooyung Han };
77*5f39d1b3SJooyung Han 
78*5f39d1b3SJooyung Han struct QuantizedStaticPreprocessedAsInt32 {
79*5f39d1b3SJooyung Han  public:
80*5f39d1b3SJooyung Han   int count;
81*5f39d1b3SJooyung Han };
82*5f39d1b3SJooyung Han 
83*5f39d1b3SJooyung Han template <typename InType, typename OutType, int m, int n, int k>
84*5f39d1b3SJooyung Han class MulKernel<InType, OutType, QuantizedStaticPreprocessedAsInt32, RowMajor,
85*5f39d1b3SJooyung Han                 m, n, k> {
86*5f39d1b3SJooyung Han  public:
87*5f39d1b3SJooyung Han   typedef FusedKernelParams<QuantizedStaticPreprocessedAsInt32, RowMajor>
88*5f39d1b3SJooyung Han       FusedKernel;
89*5f39d1b3SJooyung Han 
Multiply(const InType * lhs,const InType *,const FusedKernel & params,OutType * result)90*5f39d1b3SJooyung Han   static void Multiply(const InType* lhs, const InType*,
91*5f39d1b3SJooyung Han                        const FusedKernel& params, OutType* result) {
92*5f39d1b3SJooyung Han #ifdef DEBUG
93*5f39d1b3SJooyung Han #ifdef DEBUG_METAGEMM_VERBOSE
94*5f39d1b3SJooyung Han     std::cout << "MulQSPI32R(" << typeid(InType).name() << ", "
95*5f39d1b3SJooyung Han               << typeid(OutType).name() << ")::Multiply() -- " << m << "x" << n
96*5f39d1b3SJooyung Han               << "x" << k << std::endl;
97*5f39d1b3SJooyung Han #endif
98*5f39d1b3SJooyung Han #else
99*5f39d1b3SJooyung Han     if (m != 0 && n != 0) {
100*5f39d1b3SJooyung Han       std::cerr << "FATAL: QuantizedStaticPreprocessedAsInt32_RowMajor::"
101*5f39d1b3SJooyung Han                 << "Multiply not implemented." << std::endl;
102*5f39d1b3SJooyung Han       std::exit(1);
103*5f39d1b3SJooyung Han     }
104*5f39d1b3SJooyung Han #endif
105*5f39d1b3SJooyung Han   }
106*5f39d1b3SJooyung Han 
107*5f39d1b3SJooyung Han #ifdef DEBUG
108*5f39d1b3SJooyung Han #ifdef DEBUG_METAGEMM_VERBOSE
Debug(const FusedKernel & params)109*5f39d1b3SJooyung Han   static void Debug(const FusedKernel& params) {
110*5f39d1b3SJooyung Han     std::cout << "MulQSPI32R(" << typeid(InType).name() << ", "
111*5f39d1b3SJooyung Han               << typeid(OutType).name() << ") -- " << m << "x" << n << "x" << k
112*5f39d1b3SJooyung Han               << std::endl;
113*5f39d1b3SJooyung Han     std::cout << "  params:" << std::endl;
114*5f39d1b3SJooyung Han     std::cout << "    kernel.count: " << params.kernel.count << std::endl;
115*5f39d1b3SJooyung Han     std::cout << "    output_stream.stride: " << params.output_stream.stride
116*5f39d1b3SJooyung Han               << std::endl;
117*5f39d1b3SJooyung Han   }
118*5f39d1b3SJooyung Han #endif
119*5f39d1b3SJooyung Han #endif
120*5f39d1b3SJooyung Han };
121*5f39d1b3SJooyung Han 
122*5f39d1b3SJooyung Han struct QuantizedStaticPreprocessedAsFloat {
123*5f39d1b3SJooyung Han  public:
124*5f39d1b3SJooyung Han   int count;
125*5f39d1b3SJooyung Han   float scale;
126*5f39d1b3SJooyung Han };
127*5f39d1b3SJooyung Han 
128*5f39d1b3SJooyung Han template <typename InType, typename OutType, int m, int n, int k>
129*5f39d1b3SJooyung Han class MulKernel<InType, OutType, QuantizedStaticPreprocessedAsFloat, RowMajor,
130*5f39d1b3SJooyung Han                 m, n, k> {
131*5f39d1b3SJooyung Han  public:
132*5f39d1b3SJooyung Han   typedef FusedKernelParams<QuantizedStaticPreprocessedAsFloat, RowMajor>
133*5f39d1b3SJooyung Han       FusedKernel;
134*5f39d1b3SJooyung Han 
Multiply(const InType * lhs,const InType *,const FusedKernel & params,OutType * result)135*5f39d1b3SJooyung Han   static void Multiply(const InType* lhs, const InType*,
136*5f39d1b3SJooyung Han                        const FusedKernel& params, OutType* result) {
137*5f39d1b3SJooyung Han #ifdef DEBUG
138*5f39d1b3SJooyung Han #ifdef DEBUG_METAGEMM_VERBOSE
139*5f39d1b3SJooyung Han     std::cout << "MulQSPFR(" << typeid(InType).name() << ", "
140*5f39d1b3SJooyung Han               << typeid(OutType).name() << ")::Multiply() -- " << m << "x" << n
141*5f39d1b3SJooyung Han               << "x" << k << std::endl;
142*5f39d1b3SJooyung Han #endif
143*5f39d1b3SJooyung Han #else
144*5f39d1b3SJooyung Han     if (m != 0 && n != 0) {
145*5f39d1b3SJooyung Han       std::cerr << "FATAL: QuantizedStaticPreprocessedAsFloat_RowMajor::"
146*5f39d1b3SJooyung Han                 << "Multiply not implemented." << std::endl;
147*5f39d1b3SJooyung Han       std::exit(1);
148*5f39d1b3SJooyung Han     }
149*5f39d1b3SJooyung Han #endif
150*5f39d1b3SJooyung Han   }
151*5f39d1b3SJooyung Han 
152*5f39d1b3SJooyung Han #ifdef DEBUG
153*5f39d1b3SJooyung Han #ifdef DEBUG_METAGEMM_VERBOSE
Debug(const FusedKernel & params)154*5f39d1b3SJooyung Han   static void Debug(const FusedKernel& params) {
155*5f39d1b3SJooyung Han     std::cout << "MulQSPFR(" << typeid(InType).name() << ", "
156*5f39d1b3SJooyung Han               << typeid(OutType).name() << ") -- " << m << "x" << n << "x" << k
157*5f39d1b3SJooyung Han               << std::endl;
158*5f39d1b3SJooyung Han     std::cout << "  params:" << std::endl;
159*5f39d1b3SJooyung Han     std::cout << "    kernel.count: " << params.kernel.count << std::endl;
160*5f39d1b3SJooyung Han     std::cout << "    kernel.scale: " << params.kernel.scale << std::endl;
161*5f39d1b3SJooyung Han     std::cout << "    output_stream.stride: " << params.output_stream.stride
162*5f39d1b3SJooyung Han               << std::endl;
163*5f39d1b3SJooyung Han   }
164*5f39d1b3SJooyung Han #endif
165*5f39d1b3SJooyung Han #endif
166*5f39d1b3SJooyung Han };
167*5f39d1b3SJooyung Han 
168*5f39d1b3SJooyung Han }  // namespace meta
169*5f39d1b3SJooyung Han }  // namespace gemmlowp
170*5f39d1b3SJooyung Han 
171*5f39d1b3SJooyung Han #ifdef GEMMLOWP_NEON_32
172*5f39d1b3SJooyung Han #include "quantized_mul_kernels_arm_32.h"
173*5f39d1b3SJooyung Han #elif defined(GEMMLOWP_NEON_64)
174*5f39d1b3SJooyung Han #include "quantized_mul_kernels_arm_64.h"
175*5f39d1b3SJooyung Han #endif
176*5f39d1b3SJooyung Han 
177*5f39d1b3SJooyung Han #endif  // GEMMLOWP_META_QUANTIZED_MUL_KERNELS_H_
178