xref: /aosp_15_r20/external/ComputeLibrary/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2019-2020, 2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifdef __aarch64__
25 
26 #include "arm_gemm.hpp"
27 
28 #include "kernels/a64_gemm_s16_8x12.hpp"
29 #include "kernels/a64_gemm_s8_4x4.hpp"
30 #include "kernels/a64_gemm_s8_8x12.hpp"
31 #include "kernels/a64_hybrid_s8qa_dot_4x16.hpp"
32 #include "kernels/a64_hybrid_s8qa_mmla_4x16.hpp"
33 #include "kernels/a64_hybrid_s8qs_dot_6x16.hpp"
34 #include "kernels/a64_hybrid_s8qs_mmla_6x16.hpp"
35 #include "kernels/a64_hybrid_s8s32_dot_6x16.hpp"
36 #include "kernels/a64_hybrid_s8s32_mmla_6x16.hpp"
37 #include "kernels/a64_interleaved_s8s32_mmla_8x12.hpp"
38 #include "kernels/a64_smallK_hybrid_s8s32_dot_6x4.hpp"
39 #include "kernels/a64_smallK_hybrid_s8s32_dot_8x4.hpp"
40 
41 #ifdef ARM_COMPUTE_ENABLE_SVE
42 #ifdef ARM_COMPUTE_ENABLE_SME2
43 #include "kernels/sme2_gemv_s8qa_dot_16VL.hpp"
44 #include "kernels/sme2_interleaved_nomerge_s8q_mopa_1VLx4VL.hpp"
45 #include "kernels/sme2_interleaved_nomerge_s8q_mopa_2VLx2VL.hpp"
46 #include "kernels/sme2_interleaved_nomerge_s8q_mopa_4VLx1VL.hpp"
47 #endif // ARM_COMPUTE_ENABLE_SME2
48 
49 #include "kernels/sve_hybrid_s8qa_dot_4x4VL.hpp"
50 #include "kernels/sve_hybrid_s8qa_mmla_4x4VL.hpp"
51 #include "kernels/sve_hybrid_s8qs_dot_6x4VL.hpp"
52 #include "kernels/sve_hybrid_s8qs_mmla_6x4VL.hpp"
53 #include "kernels/sve_hybrid_s8s32_dot_6x4VL.hpp"
54 #include "kernels/sve_hybrid_s8s32_mmla_6x4VL.hpp"
55 #include "kernels/sve_interleaved_s8s32_dot_8x3VL.hpp"
56 #include "kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp"
57 #include "kernels/sve_smallK_hybrid_s8s32_dot_8x1VL.hpp"
58 #endif // ARM_COMPUTE_ENABLE_SVE
59 
60 #include "gemm_hybrid_indirect.hpp"
61 #include "gemm_hybrid_quantized.hpp"
62 #include "gemm_hybrid_quantized_inline.hpp"
63 #include "gemm_interleaved.hpp"
64 #include "gemv_pretransposed.hpp"
65 #include "quantize_wrapper.hpp"
66 #include "utils.hpp"
67 
68 namespace arm_gemm {
69 
70 static const GemmImplementation<int8_t, int8_t, Requantize32> gemm_qint8_methods[] =
71 {
72 #ifdef ARM_COMPUTE_ENABLE_SVE
73 #ifdef ARM_COMPUTE_ENABLE_SME2
74 {
75     GemmMethod::GEMM_HYBRID,
76     "sme2_gemv_s8qa_dot_16VL",
__anon1bcad4d70102() 77     [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sme2() && quant_hybrid_asymmetric(qp) && args._Msize == 1 && !args._indirect_input && args._nbatches == 1;  },
78     nullptr,
__anon1bcad4d70202() 79     [](const GemmArgs &args, const Requantize32 &qp) { return new GemvPretransposed<cls_sme2_gemv_s8qa_dot_16VL, int8_t, int8_t, Requantize32>(args, qp); }
80 },
81 {
82     GemmMethod::GEMM_INTERLEAVED,
83     "sme2_interleaved_nomerge_s8q_mopa_1VLx4VL",
__anon1bcad4d70302() 84     [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sme2() && args._maxthreads == 1 && ((qp.per_channel_requant && (qp.per_channel_left_shifts == nullptr)) || (!qp.per_channel_requant && (qp.per_layer_left_shift == 0)));},
__anon1bcad4d70402() 85     [](const GemmArgs &args, const Requantize32 &) { const auto VL = sme::get_vector_length<int32_t>();
86                                return args._Msize <= VL || (2*VL < args._Msize && args._Msize <= 3*VL); },
__anon1bcad4d70502() 87     [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedPretransposedNoMergeQuantizedInline<cls_sme2_interleaved_nomerge_s8q_mopa_1VLx4VL, int8_t, int8_t>(args, qp); }
88 },
89 {
90     GemmMethod::GEMM_INTERLEAVED,
91     "sme2_interleaved_nomerge_s8q_mopa_4VLx1VL",
__anon1bcad4d70602() 92     [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sme2() && args._maxthreads == 1 && ((qp.per_channel_requant && (qp.per_channel_left_shifts == nullptr)) || (!qp.per_channel_requant && (qp.per_layer_left_shift == 0)));},
__anon1bcad4d70702() 93     [](const GemmArgs &args, const Requantize32 &) { const auto VL = sme::get_vector_length<int32_t>();
94                                return args._Nsize <= VL || (2*VL < args._Nsize && args._Nsize <= 3*VL); },
__anon1bcad4d70802() 95     [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedPretransposedNoMergeQuantizedInline<cls_sme2_interleaved_nomerge_s8q_mopa_4VLx1VL, int8_t, int8_t>(args, qp); }
96 },
97 {
98     GemmMethod::GEMM_INTERLEAVED,
99     "sme2_interleaved_nomerge_s8q_mopa_2VLx2VL",
__anon1bcad4d70902() 100     [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sme2() && args._maxthreads == 1 && ((qp.per_channel_requant && (qp.per_channel_left_shifts == nullptr)) || (!qp.per_channel_requant && (qp.per_layer_left_shift == 0)));},
101     nullptr,
__anon1bcad4d70a02() 102     [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedPretransposedNoMergeQuantizedInline<cls_sme2_interleaved_nomerge_s8q_mopa_2VLx2VL, int8_t, int8_t>(args, qp); }
103 },
104 #endif // ARM_COMPUTE_ENABLE_SME2
105 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
106     GemmMethod::GEMM_HYBRID,
107     "sve_hybrid_s8qa_mmla_4x4VL",
__anon1bcad4d70b02() 108     [](const GemmArgs &args, const Requantize32 &qp) { return quant_hybrid_asymmetric(qp) && args._ci->has_sve2() && args._ci->has_svei8mm(); },
__anon1bcad4d70c02() 109     [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_sve_hybrid_s8qa_mmla_4x4VL, int8_t, int8_t, Requantize32>::estimate_cycles<int8_t>(args); },
__anon1bcad4d70d02() 110     [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8qa_mmla_4x4VL, int8_t, int8_t, Requantize32>(args, qp); }
111 ),
112 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
113     GemmMethod::GEMM_HYBRID,
114     "sve_hybrid_s8qs_mmla_6x4VL",
__anon1bcad4d70e02() 115     [](const GemmArgs &args, const Requantize32 &qp) { return quant_hybrid_symmetric(qp) && args._ci->has_sve2() && args._ci->has_svei8mm(); },
__anon1bcad4d70f02() 116     [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_sve_hybrid_s8qs_mmla_6x4VL, int8_t, int8_t, Requantize32>::estimate_cycles<int8_t>(args); },
__anon1bcad4d71002() 117     [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8qs_mmla_6x4VL, int8_t, int8_t, Requantize32>(args, qp); }
118 ),
119 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
120     GemmMethod::GEMM_INTERLEAVED,
121     "sve_interleaved_s8s32_mmla_8x3VL",
__anon1bcad4d71102() 122     [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_svei8mm() && (args._Ksize>8); },
__anon1bcad4d71202() 123     [](const GemmArgs &args, const Requantize32 &) { return GemmInterleavedQuantized<cls_sve_interleaved_s8s32_mmla_8x3VL, int8_t, int8_t>::estimate_cycles<int8_t>(args); },
__anon1bcad4d71302() 124     [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_sve_interleaved_s8s32_mmla_8x3VL, int8_t, int8_t>(args, qp); }
125 ),
126 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
127     GemmMethod::GEMM_INTERLEAVED,
128     "sve_hybrid_s8s32_mmla_6x4VL",
__anon1bcad4d71402() 129     [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_svei8mm(); },
__anon1bcad4d71502() 130     [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_sve_hybrid_s8s32_mmla_6x4VL, int8_t, int8_t, Requantize32, true>::estimate_cycles<int8_t>(args); },
__anon1bcad4d71602() 131     [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8s32_mmla_6x4VL, int8_t, int8_t, Requantize32, true>(args, qp); }
132 ),
133 {
134     GemmMethod::GEMM_HYBRID_QUANTIZED,
135     "sve_smallK_hybrid_s8s32_dot_8x1VL",
__anon1bcad4d71702() 136     [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_sve() && args._Ksize<=64 && !args._indirect_input; },
__anon1bcad4d71802() 137     [](const GemmArgs &args, const Requantize32 &) { return !(args._ci->has_svei8mm() || args._ci->has_i8mm()); },
__anon1bcad4d71902() 138     [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridQuantized<cls_sve_smallK_hybrid_s8s32_dot_8x1VL, int8_t, int8_t>(args, qp); }
139 },
140 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
141     GemmMethod::GEMM_HYBRID,
142     "sve_hybrid_s8qs_dot_6x4VL",
__anon1bcad4d71a02() 143     [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve2() && quant_hybrid_symmetric(qp); },
__anon1bcad4d71b02() 144     [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_sve_hybrid_s8qs_dot_6x4VL, int8_t, int8_t, Requantize32>::estimate_cycles<int8_t>(args); },
__anon1bcad4d71c02() 145     [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8qs_dot_6x4VL, int8_t, int8_t, Requantize32>(args, qp); }
146 ),
147 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
148     GemmMethod::GEMM_HYBRID,
149     "sve_hybrid_s8qa_dot_4x4VL",
__anon1bcad4d71d02() 150     [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve2() && quant_hybrid_asymmetric(qp); },
__anon1bcad4d71e02() 151     [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_sve_hybrid_s8qa_dot_4x4VL, int8_t, int8_t, Requantize32>::estimate_cycles<int8_t>(args); },
__anon1bcad4d71f02() 152     [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8qa_dot_4x4VL, int8_t, int8_t, Requantize32>(args, qp); }
153 ),
154 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
155     GemmMethod::GEMM_HYBRID,
156     "sve_hybrid_s8s32_dot_6x4VL",
__anon1bcad4d72002() 157     [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_sve(); },
__anon1bcad4d72102() 158     [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_sve_hybrid_s8s32_dot_6x4VL, int8_t, int8_t, Requantize32, true>::estimate_cycles<int8_t>(args); },
__anon1bcad4d72202() 159     [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8s32_dot_6x4VL, int8_t, int8_t, Requantize32, true>(args, qp); }
160 ),
161 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
162     GemmMethod::GEMM_INTERLEAVED,
163     "sve_interleaved_s8s32_dot_8x3VL",
__anon1bcad4d72302() 164     [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_sve() && (args._Ksize>4); },
__anon1bcad4d72402() 165     [](const GemmArgs &args, const Requantize32 &) { return GemmInterleavedQuantized<cls_sve_interleaved_s8s32_dot_8x3VL, int8_t, int8_t>::estimate_cycles<int8_t>(args); },
__anon1bcad4d72502() 166     [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_sve_interleaved_s8s32_dot_8x3VL, int8_t, int8_t>(args, qp); }
167 ),
168 #endif // ARM_COMPUTE_ENABLE_SVE
169 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
170     GemmMethod::GEMM_HYBRID,
171     "a64_hybrid_s8qa_mmla_4x16",
__anon1bcad4d72602() 172     [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_i8mm() && quant_hybrid_asymmetric(qp); },
__anon1bcad4d72702() 173     [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_a64_hybrid_s8qa_mmla_4x16, int8_t, int8_t, Requantize32>::estimate_cycles<int8_t>(args); },
__anon1bcad4d72802() 174     [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_a64_hybrid_s8qa_mmla_4x16, int8_t, int8_t, Requantize32>(args, qp); }
175 ),
176 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
177     GemmMethod::GEMM_HYBRID,
178     "a64_hybrid_s8qs_mmla_6x16",
__anon1bcad4d72902() 179     [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_i8mm() && quant_hybrid_symmetric(qp); },
__anon1bcad4d72a02() 180     [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_a64_hybrid_s8qs_mmla_6x16, int8_t, int8_t, Requantize32>::estimate_cycles<int8_t>(args); },
__anon1bcad4d72b02() 181     [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_a64_hybrid_s8qs_mmla_6x16, int8_t, int8_t, Requantize32>(args, qp); }
182 ),
183 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
184     GemmMethod::GEMM_INTERLEAVED,
185     "a64_interleaved_s8s32_mmla_8x12",
__anon1bcad4d72c02() 186     [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_i8mm() && (args._Ksize>8); },
__anon1bcad4d72d02() 187     [](const GemmArgs &args, const Requantize32 &) { return GemmInterleavedQuantized<cls_a64_interleaved_s8s32_mmla_8x12, int8_t, int8_t>::estimate_cycles<int8_t>(args); },
__anon1bcad4d72e02() 188     [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_a64_interleaved_s8s32_mmla_8x12, int8_t, int8_t>(args, qp); }
189 ),
190 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
191     GemmMethod::GEMM_INTERLEAVED,
192     "a64_hybrid_s8s32_mmla_6x16",
__anon1bcad4d72f02() 193     [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_i8mm(); },
__anon1bcad4d73002() 194     [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_a64_hybrid_s8s32_mmla_6x16, int8_t, int8_t, Requantize32, true>::estimate_cycles<int8_t>(args); },
__anon1bcad4d73102() 195     [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_a64_hybrid_s8s32_mmla_6x16, int8_t, int8_t, Requantize32, true>(args, qp); }
196 ),
197 {
198     GemmMethod::GEMM_HYBRID_QUANTIZED,
199     "a64_smallK_hybrid_s8s32_dot_8x4",
__anon1bcad4d73202() 200     [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_dotprod() && (args._Nsize % 4 == 0) && (args._Ksize<=32) && !args._indirect_input; },
__anon1bcad4d73302() 201     [](const GemmArgs &args, const Requantize32 &) { return !(args._ci->has_svei8mm() || args._ci->has_i8mm()); },
__anon1bcad4d73402() 202     [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridQuantized<cls_a64_smallK_hybrid_s8s32_dot_8x4, int8_t, int8_t>(args, qp); }
203 },
204 {
205     GemmMethod::GEMM_HYBRID_QUANTIZED,
206     "a64_smallK_hybrid_s8s32_dot_6x4",
__anon1bcad4d73502() 207     [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_dotprod() && (args._Nsize % 4 == 0) && (args._Ksize>32) && (args._Ksize<=64) && !args._indirect_input; },
__anon1bcad4d73602() 208     [](const GemmArgs &args, const Requantize32 &) { return !(args._ci->has_svei8mm() || args._ci->has_i8mm()); },
__anon1bcad4d73702() 209     [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridQuantized<cls_a64_smallK_hybrid_s8s32_dot_6x4, int8_t, int8_t>(args, qp); }
210 },
211 {
212     GemmMethod::GEMM_INTERLEAVED,
213     "a64_gemm_s16_8x12",
214     nullptr,
__anon1bcad4d73802() 215     [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() == CPUModel::A53 && ((args._Msize > 28) || ((args._Msize % 8) > 4)); },
__anon1bcad4d73902() 216     [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_a64_gemm_s16_8x12, int8_t, int8_t>(args, qp); }
217 },
218 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
219     GemmMethod::GEMM_HYBRID,
220     "a64_hybrid_s8qs_dot_6x16",
__anon1bcad4d73a02() 221     [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_dotprod() && quant_hybrid_symmetric(qp); },
__anon1bcad4d73b02() 222     [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_a64_hybrid_s8qs_dot_6x16, int8_t, int8_t, Requantize32>::estimate_cycles<int8_t>(args); },
__anon1bcad4d73c02() 223     [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_a64_hybrid_s8qs_dot_6x16, int8_t, int8_t, Requantize32>(args, qp); }
224 ),
225 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
226     GemmMethod::GEMM_HYBRID,
227     "a64_hybrid_s8qa_dot_4x16",
__anon1bcad4d73d02() 228     [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_dotprod() && quant_hybrid_asymmetric(qp); },
__anon1bcad4d73e02() 229     [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_a64_hybrid_s8qa_dot_4x16, int8_t, int8_t, Requantize32>::estimate_cycles<int8_t>(args); },
__anon1bcad4d73f02() 230     [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_a64_hybrid_s8qa_dot_4x16, int8_t, int8_t, Requantize32>(args, qp); }
231 ),
232 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
233     GemmMethod::GEMM_HYBRID,
234     "a64_hybrid_s8s32_dot_6x16",
__anon1bcad4d74002() 235     [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_dotprod(); },
__anon1bcad4d74102() 236     [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_a64_hybrid_s8s32_dot_6x16, int8_t, int8_t, Requantize32, true>::estimate_cycles<int8_t>(args); },
__anon1bcad4d74202() 237     [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_a64_hybrid_s8s32_dot_6x16, int8_t, int8_t, Requantize32, true>(args, qp); }
238 ),
239 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
240     GemmMethod::GEMM_INTERLEAVED,
241     "a64_gemm_s8_8x12",
__anon1bcad4d74302() 242     [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_dotprod(); },
__anon1bcad4d74402() 243     [](const GemmArgs &args, const Requantize32 &) { return GemmInterleavedQuantized<cls_a64_gemm_s8_8x12, int8_t, int8_t>::estimate_cycles<int8_t>(args); },
__anon1bcad4d74502() 244     [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_a64_gemm_s8_8x12, int8_t, int8_t>(args, qp); }
245 ),
246 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
247     GemmMethod::GEMM_INTERLEAVED,
248     "a64_gemm_s8_4x4",
249     nullptr,
__anon1bcad4d74602() 250     [](const GemmArgs &args, const Requantize32 &) { return GemmInterleavedQuantized<cls_a64_gemm_s8_4x4, int8_t, int8_t>::estimate_cycles<int8_t>(args); },
__anon1bcad4d74702() 251     [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_a64_gemm_s8_4x4, int8_t, int8_t>(args, qp); }
252 ),
253 {
254     GemmMethod::QUANTIZE_WRAPPER,
255     "quantized_wrapper",
__anon1bcad4d74802() 256     [](const GemmArgs &args, const Requantize32 &) { return !args._indirect_input; },
__anon1bcad4d74902() 257     [](const GemmArgs &, const Requantize32 &) { return false; },
__anon1bcad4d74a02() 258     [](const GemmArgs &args, const Requantize32 &qp) { return new QuantizeWrapper<int8_t, int8_t, int32_t>(args, qp); }
259 },
260 {
261     GemmMethod::DEFAULT,
262     "",
263     nullptr,
264     nullptr,
265     nullptr
266 }
267 };
268 
269 template<>
gemm_implementation_list()270 const GemmImplementation<int8_t, int8_t, Requantize32> *gemm_implementation_list<int8_t, int8_t, Requantize32>() {
271     return gemm_qint8_methods;
272 }
273 
274 template UniqueGemmCommon<int8_t, int8_t> gemm<int8_t, int8_t, Requantize32>(const GemmArgs &args, const Requantize32 &os);
275 template bool has_opt_gemm<int8_t, int8_t, Requantize32>(WeightFormat &weight_format, const GemmArgs &args, const Requantize32 &os);
276 template std::vector<KernelDescription> get_compatible_kernels<int8_t, int8_t, Requantize32>(const GemmArgs &args, const Requantize32 &os);
277 
278 } // namespace arm_gemm
279 
280 #endif // __aarch64__
281