xref: /aosp_15_r20/external/gemmlowp/internal/kernel_default.h (revision 5f39d1b313f0528e11bae88b3029b54b9e1033e7)
1*5f39d1b3SJooyung Han // Copyright 2015 The Gemmlowp Authors. All Rights Reserved.
2*5f39d1b3SJooyung Han //
3*5f39d1b3SJooyung Han // Licensed under the Apache License, Version 2.0 (the "License");
4*5f39d1b3SJooyung Han // you may not use this file except in compliance with the License.
5*5f39d1b3SJooyung Han // You may obtain a copy of the License at
6*5f39d1b3SJooyung Han //
7*5f39d1b3SJooyung Han //     http://www.apache.org/licenses/LICENSE-2.0
8*5f39d1b3SJooyung Han //
9*5f39d1b3SJooyung Han // Unless required by applicable law or agreed to in writing, software
10*5f39d1b3SJooyung Han // distributed under the License is distributed on an "AS IS" BASIS,
11*5f39d1b3SJooyung Han // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*5f39d1b3SJooyung Han // See the License for the specific language governing permissions and
13*5f39d1b3SJooyung Han // limitations under the License.
14*5f39d1b3SJooyung Han 
15*5f39d1b3SJooyung Han // kernel_default.h: Chooses default GEMM and GEMV kernels for the
16*5f39d1b3SJooyung Han // host platform.
17*5f39d1b3SJooyung Han 
18*5f39d1b3SJooyung Han #ifndef GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_
19*5f39d1b3SJooyung Han #define GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_
20*5f39d1b3SJooyung Han 
21*5f39d1b3SJooyung Han #include "../public/bit_depth.h"
22*5f39d1b3SJooyung Han #include "common.h"
23*5f39d1b3SJooyung Han #include "kernel.h"
24*5f39d1b3SJooyung Han #include "kernel_reference.h"
25*5f39d1b3SJooyung Han 
26*5f39d1b3SJooyung Han namespace gemmlowp {
27*5f39d1b3SJooyung Han 
28*5f39d1b3SJooyung Han template <bool MaxProductIsLessThan4096, bool IsUnsigned, bool LhsNonZero>
29*5f39d1b3SJooyung Han struct DefaultKernelImpl {};
30*5f39d1b3SJooyung Han 
31*5f39d1b3SJooyung Han // Partial specialization implementing the logic that if we want to use
32*5f39d1b3SJooyung Han // a kernel for MaxProductIsLessThan4096 but do not have such a kernel, then we
33*5f39d1b3SJooyung Han // fall back to a generic kernel not taking advantage of
34*5f39d1b3SJooyung Han // MaxProductIsLessThan4096.
35*5f39d1b3SJooyung Han template <bool LhsNonZero>
36*5f39d1b3SJooyung Han struct DefaultKernelImpl<true, true, LhsNonZero>
37*5f39d1b3SJooyung Han     : DefaultKernelImpl<false, true, LhsNonZero> {};
38*5f39d1b3SJooyung Han 
39*5f39d1b3SJooyung Han // Partial specialization implementing the logic that if we want to use
40*5f39d1b3SJooyung Han // a kernel for LhsNonZero but do not have such a kernel, then we fall
41*5f39d1b3SJooyung Han // back to a generic kernel not taking advantage of LhsNonZero.
42*5f39d1b3SJooyung Han template <bool MaxProductIsLessThan4096>
43*5f39d1b3SJooyung Han struct DefaultKernelImpl<MaxProductIsLessThan4096, true, true>
44*5f39d1b3SJooyung Han     : DefaultKernelImpl<MaxProductIsLessThan4096, true, false> {};
45*5f39d1b3SJooyung Han 
46*5f39d1b3SJooyung Han template <typename BitDepthParams>
47*5f39d1b3SJooyung Han struct DefaultKernel
48*5f39d1b3SJooyung Han     : DefaultKernelImpl<(BitDepthParams::LhsRange::kMaxValue *
49*5f39d1b3SJooyung Han                              BitDepthParams::RhsRange::kMaxValue <
50*5f39d1b3SJooyung Han                          4096),
51*5f39d1b3SJooyung Han                         (BitDepthParams::LhsRange::kMinValue >= 0),
52*5f39d1b3SJooyung Han                         (BitDepthParams::LhsRange::kMinValue > 0 ||
53*5f39d1b3SJooyung Han                          (BitDepthParams::LhsRange::kMaxValue <= 127 &&
54*5f39d1b3SJooyung Han                           BitDepthParams::LhsRange::kMinValue > -128))> {};
55*5f39d1b3SJooyung Han 
56*5f39d1b3SJooyung Han }  // end namespace gemmlowp
57*5f39d1b3SJooyung Han 
58*5f39d1b3SJooyung Han #define GEMMLOWP_SET_DEFAULT_KERNEL(MaxProductIsLessThan4096, IsUnsigned, \
59*5f39d1b3SJooyung Han                                     LhsAlwaysNonZero, Kernel)             \
60*5f39d1b3SJooyung Han   namespace gemmlowp {                                                    \
61*5f39d1b3SJooyung Han   template <>                                                             \
62*5f39d1b3SJooyung Han   struct DefaultKernelImpl<MaxProductIsLessThan4096, IsUnsigned,          \
63*5f39d1b3SJooyung Han                            LhsAlwaysNonZero> : Kernel {};                 \
64*5f39d1b3SJooyung Han   }
65*5f39d1b3SJooyung Han 
66*5f39d1b3SJooyung Han // User-provided int8 inputs is only supported in the NEON path currently.
67*5f39d1b3SJooyung Han #if defined GEMMLOWP_NEON_32
68*5f39d1b3SJooyung Han #include "kernel_neon.h"
69*5f39d1b3SJooyung Han GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, NEON_32_Kernel12x4Depth2)
70*5f39d1b3SJooyung Han GEMMLOWP_SET_DEFAULT_KERNEL(true, true, false,
71*5f39d1b3SJooyung Han                             NEON_32_Kernel12x4Depth2Assuming12BitProducts)
72*5f39d1b3SJooyung Han GEMMLOWP_SET_DEFAULT_KERNEL(false, true, true,
73*5f39d1b3SJooyung Han                             NEON_32bit_GEMM_Int8Operands_LhsNonzero)
74*5f39d1b3SJooyung Han GEMMLOWP_SET_DEFAULT_KERNEL(false, false, true,
75*5f39d1b3SJooyung Han                             NEON_32bit_GEMM_Int8Operands_LhsNonzero_Int8Inputs)
76*5f39d1b3SJooyung Han #elif defined GEMMLOWP_NEON_64
77*5f39d1b3SJooyung Han #include "kernel_neon.h"
78*5f39d1b3SJooyung Han #if defined GEMMLOWP_DOTPROD_KERNEL
79*5f39d1b3SJooyung Han GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false,
80*5f39d1b3SJooyung Han                             NEON_64_Kernel12x8Depth4_dotprod)
81*5f39d1b3SJooyung Han #else
82*5f39d1b3SJooyung Han GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, NEON_64_Kernel12x8Depth2)
83*5f39d1b3SJooyung Han GEMMLOWP_SET_DEFAULT_KERNEL(false, true, true,
84*5f39d1b3SJooyung Han                             NEON_64bit_GEMM_Int8Operands_LhsNonzero)
85*5f39d1b3SJooyung Han #endif
86*5f39d1b3SJooyung Han GEMMLOWP_SET_DEFAULT_KERNEL(false, false, true,
87*5f39d1b3SJooyung Han                             NEON_64bit_GEMM_Int8Operands_LhsNonzero_Int8Inputs)
88*5f39d1b3SJooyung Han #elif defined(GEMMLOWP_MSA)
89*5f39d1b3SJooyung Han #include "kernel_msa.h"
90*5f39d1b3SJooyung Han GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, MSA_Kernel12x8Depth2)
91*5f39d1b3SJooyung Han GEMMLOWP_SET_DEFAULT_KERNEL(false, true, true, MSA_GEMM_Int8Operands_LhsNonzero)
92*5f39d1b3SJooyung Han #elif defined GEMMLOWP_SSE4_32
93*5f39d1b3SJooyung Han #include "kernel_sse.h"
94*5f39d1b3SJooyung Han GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, SSE4_32_Kernel4x4Depth2)
95*5f39d1b3SJooyung Han #elif defined GEMMLOWP_SSE4_64
96*5f39d1b3SJooyung Han #include "kernel_sse.h"
97*5f39d1b3SJooyung Han GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, SSE4_64_Kernel12x4Depth2)
98*5f39d1b3SJooyung Han #elif defined GEMMLOWP_AVX2_64
99*5f39d1b3SJooyung Han #include "kernel_avx.h"
100*5f39d1b3SJooyung Han GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, AVX2_64_Kernel24x8Depth2)
101*5f39d1b3SJooyung Han #else
102*5f39d1b3SJooyung Han #include "kernel_reference.h"
103*5f39d1b3SJooyung Han namespace gemmlowp {
104*5f39d1b3SJooyung Han typedef ReferenceKernel<KernelFormat<
105*5f39d1b3SJooyung Han     KernelSideFormat<CellFormat<4, 16, CellOrder::WidthMajor>, 1>,
106*5f39d1b3SJooyung Han     KernelSideFormat<CellFormat<4, 16, CellOrder::WidthMajor>, 1> > >
107*5f39d1b3SJooyung Han     DefaultReferenceKernel;
108*5f39d1b3SJooyung Han }
109*5f39d1b3SJooyung Han GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, DefaultReferenceKernel)
110*5f39d1b3SJooyung Han #endif
111*5f39d1b3SJooyung Han 
112*5f39d1b3SJooyung Han #endif  // GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_
113