1 /*
2 * Copyright (c) 2019-2020, 2022 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24 #ifdef __aarch64__
25
26 #include "arm_gemm.hpp"
27
28 #include "kernels/a64_gemm_s16_8x12.hpp"
29 #include "kernels/a64_gemm_s8_4x4.hpp"
30 #include "kernels/a64_gemm_s8_8x12.hpp"
31 #include "kernels/a64_hybrid_s8qa_dot_4x16.hpp"
32 #include "kernels/a64_hybrid_s8qa_mmla_4x16.hpp"
33 #include "kernels/a64_hybrid_s8qs_dot_6x16.hpp"
34 #include "kernels/a64_hybrid_s8qs_mmla_6x16.hpp"
35 #include "kernels/a64_hybrid_s8s32_dot_6x16.hpp"
36 #include "kernels/a64_hybrid_s8s32_mmla_6x16.hpp"
37 #include "kernels/a64_interleaved_s8s32_mmla_8x12.hpp"
38 #include "kernels/a64_smallK_hybrid_s8s32_dot_6x4.hpp"
39 #include "kernels/a64_smallK_hybrid_s8s32_dot_8x4.hpp"
40
41 #ifdef ARM_COMPUTE_ENABLE_SVE
42 #ifdef ARM_COMPUTE_ENABLE_SME2
43 #include "kernels/sme2_gemv_s8qa_dot_16VL.hpp"
44 #include "kernels/sme2_interleaved_nomerge_s8q_mopa_1VLx4VL.hpp"
45 #include "kernels/sme2_interleaved_nomerge_s8q_mopa_2VLx2VL.hpp"
46 #include "kernels/sme2_interleaved_nomerge_s8q_mopa_4VLx1VL.hpp"
47 #endif // ARM_COMPUTE_ENABLE_SME2
48
49 #include "kernels/sve_hybrid_s8qa_dot_4x4VL.hpp"
50 #include "kernels/sve_hybrid_s8qa_mmla_4x4VL.hpp"
51 #include "kernels/sve_hybrid_s8qs_dot_6x4VL.hpp"
52 #include "kernels/sve_hybrid_s8qs_mmla_6x4VL.hpp"
53 #include "kernels/sve_hybrid_s8s32_dot_6x4VL.hpp"
54 #include "kernels/sve_hybrid_s8s32_mmla_6x4VL.hpp"
55 #include "kernels/sve_interleaved_s8s32_dot_8x3VL.hpp"
56 #include "kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp"
57 #include "kernels/sve_smallK_hybrid_s8s32_dot_8x1VL.hpp"
58 #endif // ARM_COMPUTE_ENABLE_SVE
59
60 #include "gemm_hybrid_indirect.hpp"
61 #include "gemm_hybrid_quantized.hpp"
62 #include "gemm_hybrid_quantized_inline.hpp"
63 #include "gemm_interleaved.hpp"
64 #include "gemv_pretransposed.hpp"
65 #include "quantize_wrapper.hpp"
66 #include "utils.hpp"
67
68 namespace arm_gemm {
69
70 static const GemmImplementation<int8_t, int8_t, Requantize32> gemm_qint8_methods[] =
71 {
72 #ifdef ARM_COMPUTE_ENABLE_SVE
73 #ifdef ARM_COMPUTE_ENABLE_SME2
74 {
75 GemmMethod::GEMM_HYBRID,
76 "sme2_gemv_s8qa_dot_16VL",
__anon1bcad4d70102() 77 [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sme2() && quant_hybrid_asymmetric(qp) && args._Msize == 1 && !args._indirect_input && args._nbatches == 1; },
78 nullptr,
__anon1bcad4d70202() 79 [](const GemmArgs &args, const Requantize32 &qp) { return new GemvPretransposed<cls_sme2_gemv_s8qa_dot_16VL, int8_t, int8_t, Requantize32>(args, qp); }
80 },
81 {
82 GemmMethod::GEMM_INTERLEAVED,
83 "sme2_interleaved_nomerge_s8q_mopa_1VLx4VL",
__anon1bcad4d70302() 84 [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sme2() && args._maxthreads == 1 && ((qp.per_channel_requant && (qp.per_channel_left_shifts == nullptr)) || (!qp.per_channel_requant && (qp.per_layer_left_shift == 0)));},
__anon1bcad4d70402() 85 [](const GemmArgs &args, const Requantize32 &) { const auto VL = sme::get_vector_length<int32_t>();
86 return args._Msize <= VL || (2*VL < args._Msize && args._Msize <= 3*VL); },
__anon1bcad4d70502() 87 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedPretransposedNoMergeQuantizedInline<cls_sme2_interleaved_nomerge_s8q_mopa_1VLx4VL, int8_t, int8_t>(args, qp); }
88 },
89 {
90 GemmMethod::GEMM_INTERLEAVED,
91 "sme2_interleaved_nomerge_s8q_mopa_4VLx1VL",
__anon1bcad4d70602() 92 [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sme2() && args._maxthreads == 1 && ((qp.per_channel_requant && (qp.per_channel_left_shifts == nullptr)) || (!qp.per_channel_requant && (qp.per_layer_left_shift == 0)));},
__anon1bcad4d70702() 93 [](const GemmArgs &args, const Requantize32 &) { const auto VL = sme::get_vector_length<int32_t>();
94 return args._Nsize <= VL || (2*VL < args._Nsize && args._Nsize <= 3*VL); },
__anon1bcad4d70802() 95 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedPretransposedNoMergeQuantizedInline<cls_sme2_interleaved_nomerge_s8q_mopa_4VLx1VL, int8_t, int8_t>(args, qp); }
96 },
97 {
98 GemmMethod::GEMM_INTERLEAVED,
99 "sme2_interleaved_nomerge_s8q_mopa_2VLx2VL",
__anon1bcad4d70902() 100 [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sme2() && args._maxthreads == 1 && ((qp.per_channel_requant && (qp.per_channel_left_shifts == nullptr)) || (!qp.per_channel_requant && (qp.per_layer_left_shift == 0)));},
101 nullptr,
__anon1bcad4d70a02() 102 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedPretransposedNoMergeQuantizedInline<cls_sme2_interleaved_nomerge_s8q_mopa_2VLx2VL, int8_t, int8_t>(args, qp); }
103 },
104 #endif // ARM_COMPUTE_ENABLE_SME2
105 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
106 GemmMethod::GEMM_HYBRID,
107 "sve_hybrid_s8qa_mmla_4x4VL",
__anon1bcad4d70b02() 108 [](const GemmArgs &args, const Requantize32 &qp) { return quant_hybrid_asymmetric(qp) && args._ci->has_sve2() && args._ci->has_svei8mm(); },
__anon1bcad4d70c02() 109 [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_sve_hybrid_s8qa_mmla_4x4VL, int8_t, int8_t, Requantize32>::estimate_cycles<int8_t>(args); },
__anon1bcad4d70d02() 110 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8qa_mmla_4x4VL, int8_t, int8_t, Requantize32>(args, qp); }
111 ),
112 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
113 GemmMethod::GEMM_HYBRID,
114 "sve_hybrid_s8qs_mmla_6x4VL",
__anon1bcad4d70e02() 115 [](const GemmArgs &args, const Requantize32 &qp) { return quant_hybrid_symmetric(qp) && args._ci->has_sve2() && args._ci->has_svei8mm(); },
__anon1bcad4d70f02() 116 [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_sve_hybrid_s8qs_mmla_6x4VL, int8_t, int8_t, Requantize32>::estimate_cycles<int8_t>(args); },
__anon1bcad4d71002() 117 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8qs_mmla_6x4VL, int8_t, int8_t, Requantize32>(args, qp); }
118 ),
119 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
120 GemmMethod::GEMM_INTERLEAVED,
121 "sve_interleaved_s8s32_mmla_8x3VL",
__anon1bcad4d71102() 122 [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_svei8mm() && (args._Ksize>8); },
__anon1bcad4d71202() 123 [](const GemmArgs &args, const Requantize32 &) { return GemmInterleavedQuantized<cls_sve_interleaved_s8s32_mmla_8x3VL, int8_t, int8_t>::estimate_cycles<int8_t>(args); },
__anon1bcad4d71302() 124 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_sve_interleaved_s8s32_mmla_8x3VL, int8_t, int8_t>(args, qp); }
125 ),
126 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
127 GemmMethod::GEMM_INTERLEAVED,
128 "sve_hybrid_s8s32_mmla_6x4VL",
__anon1bcad4d71402() 129 [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_svei8mm(); },
__anon1bcad4d71502() 130 [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_sve_hybrid_s8s32_mmla_6x4VL, int8_t, int8_t, Requantize32, true>::estimate_cycles<int8_t>(args); },
__anon1bcad4d71602() 131 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8s32_mmla_6x4VL, int8_t, int8_t, Requantize32, true>(args, qp); }
132 ),
133 {
134 GemmMethod::GEMM_HYBRID_QUANTIZED,
135 "sve_smallK_hybrid_s8s32_dot_8x1VL",
__anon1bcad4d71702() 136 [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_sve() && args._Ksize<=64 && !args._indirect_input; },
__anon1bcad4d71802() 137 [](const GemmArgs &args, const Requantize32 &) { return !(args._ci->has_svei8mm() || args._ci->has_i8mm()); },
__anon1bcad4d71902() 138 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridQuantized<cls_sve_smallK_hybrid_s8s32_dot_8x1VL, int8_t, int8_t>(args, qp); }
139 },
140 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
141 GemmMethod::GEMM_HYBRID,
142 "sve_hybrid_s8qs_dot_6x4VL",
__anon1bcad4d71a02() 143 [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve2() && quant_hybrid_symmetric(qp); },
__anon1bcad4d71b02() 144 [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_sve_hybrid_s8qs_dot_6x4VL, int8_t, int8_t, Requantize32>::estimate_cycles<int8_t>(args); },
__anon1bcad4d71c02() 145 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8qs_dot_6x4VL, int8_t, int8_t, Requantize32>(args, qp); }
146 ),
147 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
148 GemmMethod::GEMM_HYBRID,
149 "sve_hybrid_s8qa_dot_4x4VL",
__anon1bcad4d71d02() 150 [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve2() && quant_hybrid_asymmetric(qp); },
__anon1bcad4d71e02() 151 [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_sve_hybrid_s8qa_dot_4x4VL, int8_t, int8_t, Requantize32>::estimate_cycles<int8_t>(args); },
__anon1bcad4d71f02() 152 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8qa_dot_4x4VL, int8_t, int8_t, Requantize32>(args, qp); }
153 ),
154 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
155 GemmMethod::GEMM_HYBRID,
156 "sve_hybrid_s8s32_dot_6x4VL",
__anon1bcad4d72002() 157 [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_sve(); },
__anon1bcad4d72102() 158 [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_sve_hybrid_s8s32_dot_6x4VL, int8_t, int8_t, Requantize32, true>::estimate_cycles<int8_t>(args); },
__anon1bcad4d72202() 159 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8s32_dot_6x4VL, int8_t, int8_t, Requantize32, true>(args, qp); }
160 ),
161 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
162 GemmMethod::GEMM_INTERLEAVED,
163 "sve_interleaved_s8s32_dot_8x3VL",
__anon1bcad4d72302() 164 [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_sve() && (args._Ksize>4); },
__anon1bcad4d72402() 165 [](const GemmArgs &args, const Requantize32 &) { return GemmInterleavedQuantized<cls_sve_interleaved_s8s32_dot_8x3VL, int8_t, int8_t>::estimate_cycles<int8_t>(args); },
__anon1bcad4d72502() 166 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_sve_interleaved_s8s32_dot_8x3VL, int8_t, int8_t>(args, qp); }
167 ),
168 #endif // ARM_COMPUTE_ENABLE_SVE
169 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
170 GemmMethod::GEMM_HYBRID,
171 "a64_hybrid_s8qa_mmla_4x16",
__anon1bcad4d72602() 172 [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_i8mm() && quant_hybrid_asymmetric(qp); },
__anon1bcad4d72702() 173 [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_a64_hybrid_s8qa_mmla_4x16, int8_t, int8_t, Requantize32>::estimate_cycles<int8_t>(args); },
__anon1bcad4d72802() 174 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_a64_hybrid_s8qa_mmla_4x16, int8_t, int8_t, Requantize32>(args, qp); }
175 ),
176 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
177 GemmMethod::GEMM_HYBRID,
178 "a64_hybrid_s8qs_mmla_6x16",
__anon1bcad4d72902() 179 [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_i8mm() && quant_hybrid_symmetric(qp); },
__anon1bcad4d72a02() 180 [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_a64_hybrid_s8qs_mmla_6x16, int8_t, int8_t, Requantize32>::estimate_cycles<int8_t>(args); },
__anon1bcad4d72b02() 181 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_a64_hybrid_s8qs_mmla_6x16, int8_t, int8_t, Requantize32>(args, qp); }
182 ),
183 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
184 GemmMethod::GEMM_INTERLEAVED,
185 "a64_interleaved_s8s32_mmla_8x12",
__anon1bcad4d72c02() 186 [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_i8mm() && (args._Ksize>8); },
__anon1bcad4d72d02() 187 [](const GemmArgs &args, const Requantize32 &) { return GemmInterleavedQuantized<cls_a64_interleaved_s8s32_mmla_8x12, int8_t, int8_t>::estimate_cycles<int8_t>(args); },
__anon1bcad4d72e02() 188 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_a64_interleaved_s8s32_mmla_8x12, int8_t, int8_t>(args, qp); }
189 ),
190 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
191 GemmMethod::GEMM_INTERLEAVED,
192 "a64_hybrid_s8s32_mmla_6x16",
__anon1bcad4d72f02() 193 [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_i8mm(); },
__anon1bcad4d73002() 194 [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_a64_hybrid_s8s32_mmla_6x16, int8_t, int8_t, Requantize32, true>::estimate_cycles<int8_t>(args); },
__anon1bcad4d73102() 195 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_a64_hybrid_s8s32_mmla_6x16, int8_t, int8_t, Requantize32, true>(args, qp); }
196 ),
197 {
198 GemmMethod::GEMM_HYBRID_QUANTIZED,
199 "a64_smallK_hybrid_s8s32_dot_8x4",
__anon1bcad4d73202() 200 [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_dotprod() && (args._Nsize % 4 == 0) && (args._Ksize<=32) && !args._indirect_input; },
__anon1bcad4d73302() 201 [](const GemmArgs &args, const Requantize32 &) { return !(args._ci->has_svei8mm() || args._ci->has_i8mm()); },
__anon1bcad4d73402() 202 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridQuantized<cls_a64_smallK_hybrid_s8s32_dot_8x4, int8_t, int8_t>(args, qp); }
203 },
204 {
205 GemmMethod::GEMM_HYBRID_QUANTIZED,
206 "a64_smallK_hybrid_s8s32_dot_6x4",
__anon1bcad4d73502() 207 [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_dotprod() && (args._Nsize % 4 == 0) && (args._Ksize>32) && (args._Ksize<=64) && !args._indirect_input; },
__anon1bcad4d73602() 208 [](const GemmArgs &args, const Requantize32 &) { return !(args._ci->has_svei8mm() || args._ci->has_i8mm()); },
__anon1bcad4d73702() 209 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridQuantized<cls_a64_smallK_hybrid_s8s32_dot_6x4, int8_t, int8_t>(args, qp); }
210 },
211 {
212 GemmMethod::GEMM_INTERLEAVED,
213 "a64_gemm_s16_8x12",
214 nullptr,
__anon1bcad4d73802() 215 [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() == CPUModel::A53 && ((args._Msize > 28) || ((args._Msize % 8) > 4)); },
__anon1bcad4d73902() 216 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_a64_gemm_s16_8x12, int8_t, int8_t>(args, qp); }
217 },
218 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
219 GemmMethod::GEMM_HYBRID,
220 "a64_hybrid_s8qs_dot_6x16",
__anon1bcad4d73a02() 221 [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_dotprod() && quant_hybrid_symmetric(qp); },
__anon1bcad4d73b02() 222 [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_a64_hybrid_s8qs_dot_6x16, int8_t, int8_t, Requantize32>::estimate_cycles<int8_t>(args); },
__anon1bcad4d73c02() 223 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_a64_hybrid_s8qs_dot_6x16, int8_t, int8_t, Requantize32>(args, qp); }
224 ),
225 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
226 GemmMethod::GEMM_HYBRID,
227 "a64_hybrid_s8qa_dot_4x16",
__anon1bcad4d73d02() 228 [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_dotprod() && quant_hybrid_asymmetric(qp); },
__anon1bcad4d73e02() 229 [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_a64_hybrid_s8qa_dot_4x16, int8_t, int8_t, Requantize32>::estimate_cycles<int8_t>(args); },
__anon1bcad4d73f02() 230 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_a64_hybrid_s8qa_dot_4x16, int8_t, int8_t, Requantize32>(args, qp); }
231 ),
232 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
233 GemmMethod::GEMM_HYBRID,
234 "a64_hybrid_s8s32_dot_6x16",
__anon1bcad4d74002() 235 [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_dotprod(); },
__anon1bcad4d74102() 236 [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_a64_hybrid_s8s32_dot_6x16, int8_t, int8_t, Requantize32, true>::estimate_cycles<int8_t>(args); },
__anon1bcad4d74202() 237 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_a64_hybrid_s8s32_dot_6x16, int8_t, int8_t, Requantize32, true>(args, qp); }
238 ),
239 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
240 GemmMethod::GEMM_INTERLEAVED,
241 "a64_gemm_s8_8x12",
__anon1bcad4d74302() 242 [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_dotprod(); },
__anon1bcad4d74402() 243 [](const GemmArgs &args, const Requantize32 &) { return GemmInterleavedQuantized<cls_a64_gemm_s8_8x12, int8_t, int8_t>::estimate_cycles<int8_t>(args); },
__anon1bcad4d74502() 244 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_a64_gemm_s8_8x12, int8_t, int8_t>(args, qp); }
245 ),
246 GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
247 GemmMethod::GEMM_INTERLEAVED,
248 "a64_gemm_s8_4x4",
249 nullptr,
__anon1bcad4d74602() 250 [](const GemmArgs &args, const Requantize32 &) { return GemmInterleavedQuantized<cls_a64_gemm_s8_4x4, int8_t, int8_t>::estimate_cycles<int8_t>(args); },
__anon1bcad4d74702() 251 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_a64_gemm_s8_4x4, int8_t, int8_t>(args, qp); }
252 ),
253 {
254 GemmMethod::QUANTIZE_WRAPPER,
255 "quantized_wrapper",
__anon1bcad4d74802() 256 [](const GemmArgs &args, const Requantize32 &) { return !args._indirect_input; },
__anon1bcad4d74902() 257 [](const GemmArgs &, const Requantize32 &) { return false; },
__anon1bcad4d74a02() 258 [](const GemmArgs &args, const Requantize32 &qp) { return new QuantizeWrapper<int8_t, int8_t, int32_t>(args, qp); }
259 },
260 {
261 GemmMethod::DEFAULT,
262 "",
263 nullptr,
264 nullptr,
265 nullptr
266 }
267 };
268
269 template<>
gemm_implementation_list()270 const GemmImplementation<int8_t, int8_t, Requantize32> *gemm_implementation_list<int8_t, int8_t, Requantize32>() {
271 return gemm_qint8_methods;
272 }
273
274 template UniqueGemmCommon<int8_t, int8_t> gemm<int8_t, int8_t, Requantize32>(const GemmArgs &args, const Requantize32 &os);
275 template bool has_opt_gemm<int8_t, int8_t, Requantize32>(WeightFormat &weight_format, const GemmArgs &args, const Requantize32 &os);
276 template std::vector<KernelDescription> get_compatible_kernels<int8_t, int8_t, Requantize32>(const GemmArgs &args, const Requantize32 &os);
277
278 } // namespace arm_gemm
279
280 #endif // __aarch64__
281