1 // Copyright 2015 The Gemmlowp Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // kernel_default.h: Chooses default GEMM and GEMV kernels for the 16 // host platform. 17 18 #ifndef GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_ 19 #define GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_ 20 21 #include "../public/bit_depth.h" 22 #include "common.h" 23 #include "kernel.h" 24 #include "kernel_reference.h" 25 26 namespace gemmlowp { 27 28 template <bool MaxProductIsLessThan4096, bool IsUnsigned, bool LhsNonZero> 29 struct DefaultKernelImpl {}; 30 31 // Partial specialization implementing the logic that if we want to use 32 // a kernel for MaxProductIsLessThan4096 but do not have such a kernel, then we 33 // fall back to a generic kernel not taking advantage of 34 // MaxProductIsLessThan4096. 35 template <bool LhsNonZero> 36 struct DefaultKernelImpl<true, true, LhsNonZero> 37 : DefaultKernelImpl<false, true, LhsNonZero> {}; 38 39 // Partial specialization implementing the logic that if we want to use 40 // a kernel for LhsNonZero but do not have such a kernel, then we fall 41 // back to a generic kernel not taking advantage of LhsNonZero. 42 template <bool MaxProductIsLessThan4096> 43 struct DefaultKernelImpl<MaxProductIsLessThan4096, true, true> 44 : DefaultKernelImpl<MaxProductIsLessThan4096, true, false> {}; 45 46 template <typename BitDepthParams> 47 struct DefaultKernel 48 : DefaultKernelImpl<(BitDepthParams::LhsRange::kMaxValue * 49 BitDepthParams::RhsRange::kMaxValue < 50 4096), 51 (BitDepthParams::LhsRange::kMinValue >= 0), 52 (BitDepthParams::LhsRange::kMinValue > 0 || 53 (BitDepthParams::LhsRange::kMaxValue <= 127 && 54 BitDepthParams::LhsRange::kMinValue > -128))> {}; 55 56 } // end namespace gemmlowp 57 58 #define GEMMLOWP_SET_DEFAULT_KERNEL(MaxProductIsLessThan4096, IsUnsigned, \ 59 LhsAlwaysNonZero, Kernel) \ 60 namespace gemmlowp { \ 61 template <> \ 62 struct DefaultKernelImpl<MaxProductIsLessThan4096, IsUnsigned, \ 63 LhsAlwaysNonZero> : Kernel {}; \ 64 } 65 66 // User-provided int8 inputs is only supported in the NEON path currently. 67 #if defined GEMMLOWP_NEON_32 68 #include "kernel_neon.h" 69 GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, NEON_32_Kernel12x4Depth2) 70 GEMMLOWP_SET_DEFAULT_KERNEL(true, true, false, 71 NEON_32_Kernel12x4Depth2Assuming12BitProducts) 72 GEMMLOWP_SET_DEFAULT_KERNEL(false, true, true, 73 NEON_32bit_GEMM_Int8Operands_LhsNonzero) 74 GEMMLOWP_SET_DEFAULT_KERNEL(false, false, true, 75 NEON_32bit_GEMM_Int8Operands_LhsNonzero_Int8Inputs) 76 #elif defined GEMMLOWP_NEON_64 77 #include "kernel_neon.h" 78 #if defined GEMMLOWP_DOTPROD_KERNEL 79 GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, 80 NEON_64_Kernel12x8Depth4_dotprod) 81 #else 82 GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, NEON_64_Kernel12x8Depth2) 83 GEMMLOWP_SET_DEFAULT_KERNEL(false, true, true, 84 NEON_64bit_GEMM_Int8Operands_LhsNonzero) 85 #endif 86 GEMMLOWP_SET_DEFAULT_KERNEL(false, false, true, 87 NEON_64bit_GEMM_Int8Operands_LhsNonzero_Int8Inputs) 88 #elif defined(GEMMLOWP_MSA) 89 #include "kernel_msa.h" 90 GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, MSA_Kernel12x8Depth2) 91 GEMMLOWP_SET_DEFAULT_KERNEL(false, true, true, MSA_GEMM_Int8Operands_LhsNonzero) 92 #elif defined GEMMLOWP_SSE4_32 93 #include "kernel_sse.h" 94 GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, SSE4_32_Kernel4x4Depth2) 95 #elif defined GEMMLOWP_SSE4_64 96 #include "kernel_sse.h" 97 GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, SSE4_64_Kernel12x4Depth2) 98 #elif defined GEMMLOWP_AVX2_64 99 #include "kernel_avx.h" 100 GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, AVX2_64_Kernel24x8Depth2) 101 #else 102 #include "kernel_reference.h" 103 namespace gemmlowp { 104 typedef ReferenceKernel<KernelFormat< 105 KernelSideFormat<CellFormat<4, 16, CellOrder::WidthMajor>, 1>, 106 KernelSideFormat<CellFormat<4, 16, CellOrder::WidthMajor>, 1> > > 107 DefaultReferenceKernel; 108 } 109 GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, DefaultReferenceKernel) 110 #endif 111 112 #endif // GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_ 113