xref: /aosp_15_r20/external/ComputeLibrary/src/core/NEON/kernels/arm_gemm/gemm_implementation.hpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2018-2020, 2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #include "arm_gemm.hpp"
26 
27 #include "kernel_weight_format.hpp"
28 
29 #include <cstdint>
30 #include <functional>
31 
32 namespace arm_gemm {
33 
34 /* Structure describing an implementation.  For each supported combination
35  * of types, a static list of these structures is built up to describe the
36  * implementations available.
37  */
38 template<typename Top, typename Tret, class OutputStage = Nothing>
39 struct GemmImplementation {
40     const GemmMethod                                                               method;
41     const char *                                                                   name;
42     const KernelWeightFormat                                                       kernel_weight_format = KernelWeightFormat::NON_FIXED;
43     std::function<bool(const GemmArgs &, const OutputStage &)>                     is_supported = {};
44     std::function<uint64_t(const GemmArgs &, const OutputStage &)>                 cycle_estimate = {};
45     std::function<GemmCommon<Top, Tret> *(const GemmArgs &, const OutputStage &)>  instantiate = {};
46 
do_is_supportedarm_gemm::GemmImplementation47     bool do_is_supported(const GemmArgs &args, const OutputStage &os) const {
48 	// Check supplied is_supported() function first.
49         if (is_supported != nullptr && !is_supported(args, os)) {
50             return false;
51         }
52 
53         // Check weight format is appropriate.
54         if (args._fixed_format == false) {
55             // Can't return a fixed format kernel if we weren't asked for one.
56             return (kernel_weight_format == KernelWeightFormat::NON_FIXED);
57         } else {
58             // Fixed format kernel requested: if this is a non-fixed format kernel we can't use it.
59             if (kernel_weight_format == KernelWeightFormat::NON_FIXED) {
60                 return false;
61             }
62 
63             // If there's no config, or the config says ANY then this one is OK.
64             if (!args._cfg || args._cfg->weight_format == WeightFormat::ANY) {
65                 return true;
66             }
67 
68             // If we get here it means there is a config and it specifies a format.  Check it matches this kernel.
69             // NOTE: this will execute SVE instructions if it's an SVE kernel, so it's important that is_supported()
70             // was called above first.
71             return (args._cfg->weight_format == get_weight_format(kernel_weight_format, sizeof(Top)));
72         }
73     }
74 
do_cycle_estimatearm_gemm::GemmImplementation75     uint64_t do_cycle_estimate(const GemmArgs &args, const OutputStage &os) const {
76         if (cycle_estimate != nullptr) {
77             return cycle_estimate(args, os);
78         } else {
79             return 0;
80         }
81     }
82 
do_instantiatearm_gemm::GemmImplementation83     GemmCommon<Top, Tret> *do_instantiate(const GemmArgs &args, const OutputStage &os) const {
84         return instantiate(args, os);
85     }
86 
with_estimatearm_gemm::GemmImplementation87     static GemmImplementation with_estimate(GemmMethod m, const char *n,
88                        std::function<bool(const GemmArgs &, const OutputStage &)> is_supported, std::function<uint64_t(const GemmArgs &, const OutputStage &)> cycle_estimate,
89                        std::function<GemmCommon<Top, Tret> *(const GemmArgs &, const OutputStage &)> instantiate) {
90         GemmImplementation impl(m,n);
91 
92         impl.is_supported=is_supported;
93         impl.cycle_estimate=cycle_estimate;
94         impl.instantiate=instantiate;
95 
96         return impl;
97     }
98 
99     GemmImplementation(const GemmImplementation &) = default;
100     GemmImplementation & operator= (const GemmImplementation &) = default;
101 
GemmImplementationarm_gemm::GemmImplementation102     GemmImplementation(GemmMethod m, const char * n) : method(m), name(n) {}
103 
GemmImplementationarm_gemm::GemmImplementation104     GemmImplementation(GemmMethod m, const char *n,
105                        std::function<bool(const GemmArgs &, const OutputStage &)> is_supported, std::function<bool(const GemmArgs &, const OutputStage &)> is_recommended,
106                        std::function<GemmCommon<Top, Tret> *(const GemmArgs &, const OutputStage &)> instantiate) :
107                        method(m), name(n), is_supported(is_supported),
108                        cycle_estimate( [is_recommended](const GemmArgs &args, const OutputStage &os) { return (is_recommended == nullptr) ? 0 : (is_recommended(args, os) ? 0 : UINT64_MAX); } ),
109                        instantiate(instantiate) {   }
110 
GemmImplementationarm_gemm::GemmImplementation111     GemmImplementation(GemmMethod m, const char *n, KernelWeightFormat kwf,
112                        std::function<bool(const GemmArgs &, const OutputStage &)> is_supported, std::function<bool(const GemmArgs &, const OutputStage &)> is_recommended,
113                        std::function<GemmCommon<Top, Tret> *(const GemmArgs &, const OutputStage &)> instantiate) :
114                        method(m), name(n), kernel_weight_format(kwf), is_supported(is_supported),
115                        cycle_estimate( [is_recommended](const GemmArgs &args, const OutputStage &os) { return (is_recommended == nullptr) ? 0 : (is_recommended(args, os) ? 0 : UINT64_MAX); } ),
116                        instantiate(instantiate) {   }
117 };
118 
119 /* Slightly different version of above for straightforward GEMMs with no
120  * output stage, so the std::functions there don't have to deal with the
121  * unnecessary second argument.  */
122 template<typename Top, typename Tret>
123 struct GemmImplementation<Top, Tret, Nothing> {
124     const GemmMethod                                          method;
125     const char *                                              name;
126     const KernelWeightFormat                                  kernel_weight_format = KernelWeightFormat::NON_FIXED;
127     std::function<bool(const GemmArgs &)>                     is_supported = {};
128     std::function<uint64_t(const GemmArgs &)>                 cycle_estimate = {};
129     std::function<GemmCommon<Top, Tret> *(const GemmArgs &)>  instantiate = {};
130 
do_is_supportedarm_gemm::GemmImplementation131     bool do_is_supported(const GemmArgs &args, const Nothing &) const {
132 	// Check supplied is_supported() function first.
133         if (is_supported != nullptr && !is_supported(args)) {
134             return false;
135         }
136 
137         // Check weight format is appropriate.
138         if (args._fixed_format == false) {
139             // Can't return a fixed format kernel if we weren't asked for one.
140             return (kernel_weight_format == KernelWeightFormat::NON_FIXED);
141         } else {
142             // Fixed format kernel requested: if this is a non-fixed format kernel we can't use it.
143             if (kernel_weight_format == KernelWeightFormat::NON_FIXED) {
144                 return false;
145             }
146 
147             // If there's no config, or the config says ANY then this one is OK.
148             if (!args._cfg || args._cfg->weight_format == WeightFormat::ANY) {
149                 return true;
150             }
151 
152             // If we get here it means there is a config and it specifies a format.  Check it matches this kernel.
153             // NOTE: this will execute SVE instructions if it's an SVE kernel, so it's important that is_supported()
154             // was called above first.
155             return (args._cfg->weight_format == get_weight_format(kernel_weight_format, sizeof(Top)));
156         }
157     }
158 
do_cycle_estimatearm_gemm::GemmImplementation159     uint64_t do_cycle_estimate(const GemmArgs &args, const Nothing &) const {
160         if (cycle_estimate != nullptr) {
161             return cycle_estimate(args);
162         } else {
163             return 0;
164         }
165     }
166 
do_instantiatearm_gemm::GemmImplementation167     GemmCommon<Top, Tret> *do_instantiate(const GemmArgs &args, const Nothing &) const {
168         return instantiate(args);
169     }
170 
with_estimatearm_gemm::GemmImplementation171     static GemmImplementation with_estimate(GemmMethod m, const char *n,
172                        std::function<bool(const GemmArgs &)> is_supported, std::function<uint64_t(const GemmArgs &)> cycle_estimate,
173                        std::function<GemmCommon<Top, Tret> *(const GemmArgs &)> instantiate) {
174         GemmImplementation impl(m,n);
175 
176         impl.is_supported=is_supported;
177         impl.cycle_estimate=cycle_estimate;
178         impl.instantiate=instantiate;
179 
180         return impl;
181     }
182 
with_estimatearm_gemm::GemmImplementation183     static GemmImplementation with_estimate(GemmMethod m, const char *n, KernelWeightFormat f,
184                        std::function<bool(const GemmArgs &)> is_supported, std::function<uint64_t(const GemmArgs &)> cycle_estimate,
185                        std::function<GemmCommon<Top, Tret> *(const GemmArgs &)> instantiate) {
186         GemmImplementation impl(m,n,f);
187 
188         impl.is_supported=is_supported;
189         impl.cycle_estimate=cycle_estimate;
190         impl.instantiate=instantiate;
191 
192         return impl;
193     }
194 
195     GemmImplementation(const GemmImplementation &) = default;
196     GemmImplementation & operator= (const GemmImplementation &) = default;
197 
GemmImplementationarm_gemm::GemmImplementation198     GemmImplementation(GemmMethod m, const char *n, KernelWeightFormat f=KernelWeightFormat::NON_FIXED) : method(m), name(n), kernel_weight_format(f) {}
199 
GemmImplementationarm_gemm::GemmImplementation200     GemmImplementation(GemmMethod m, const char *n,
201                        std::function<bool(const GemmArgs &)> is_supported, std::function<bool(const GemmArgs &)> is_recommended,
202                        std::function<GemmCommon<Top, Tret> *(const GemmArgs &)> instantiate) :
203                        method(m), name(n), is_supported(is_supported),
204                        cycle_estimate( [is_recommended](const GemmArgs &args) -> uint64_t { return (is_recommended == nullptr) ? 0 : (is_recommended(args) ? 0 : UINT64_MAX); } ),
205                        instantiate(instantiate) {   }
206 
GemmImplementationarm_gemm::GemmImplementation207     GemmImplementation(GemmMethod m, const char *n, KernelWeightFormat kwf,
208                        std::function<bool(const GemmArgs &)> is_supported, std::function<bool(const GemmArgs &)> is_recommended,
209                        std::function<GemmCommon<Top, Tret> *(const GemmArgs &)> instantiate) :
210                        method(m), name(n), kernel_weight_format(kwf), is_supported(is_supported),
211                        cycle_estimate( [is_recommended](const GemmArgs &args) -> uint64_t { return (is_recommended == nullptr) ? 0 : (is_recommended(args) ? 0 : UINT64_MAX); } ),
212                        instantiate(instantiate) {   }
213 };
214 
215 /* "Main" function implemented for each valid combination of types.
216  * Returns a list of GEMM implementation descriptors for processing by the
217  * other functions, ended by an implementation with
218  * method==GemmMethod::DEFAULT.  */
219 template<typename Top, typename Tret, class OutputStage = Nothing>
220 const GemmImplementation<Top, Tret, OutputStage> *gemm_implementation_list();
221 
222 /*
223  * Select a GEMM implementation for the given arguments.
224  *
225  * The logic here returns the method on the list which supports the
226  * requested problem parameters, matches the provided filters (method and/or
227  * name string match) and offers the lowest cycle estimate.  A cycle
228  * estimate of '0' is treated as a special value, causing the corresponding
229  * method to be selected immediately.
230  *
231  * If no method supports the requested parameters and passes the filters,
232  * this function returns false and doesn't touch the provided pointer
233  * reference.
234  */
235 template<typename Top, typename Tret, class OutputStage>
find_implementation(const GemmArgs & args,const OutputStage & os,const GemmImplementation<Top,Tret,OutputStage> * & impl)236 bool find_implementation(const GemmArgs &args, const OutputStage &os, const GemmImplementation<Top, Tret, OutputStage> * &impl) {
237     auto gemms = gemm_implementation_list<Top, Tret, OutputStage>();
238     const GemmConfig *cfg = args._cfg;
239 
240     const GemmImplementation<Top, Tret, OutputStage> *saved_impl = nullptr;
241     uint64_t best_estimate = 0;
242 
243     for (const GemmImplementation<Top, Tret, OutputStage> *i = gemms; i->method != GemmMethod::DEFAULT; i++) {
244         /* Skip if this implementation doesn't support these args. */
245         if (!i->do_is_supported(args, os)) {
246             continue;
247         }
248 
249         /* Skip if a specific method is requested and this is a different one. */
250         if (cfg && cfg->method != GemmMethod::DEFAULT && i->method != cfg->method) {
251             continue;
252         }
253 
254         /* Skip if a filter is to be applied and it doesn't match. */
255         if (cfg && cfg->filter != "" && !strstr(i->name, cfg->filter.c_str())) {
256             continue;
257         }
258 
259         /* Test the cycle estimate */
260         uint64_t estimate = i->do_cycle_estimate(args, os);
261 
262         /* Short circuit - if the estimate is zero, return this one immediately. */
263         if (estimate==0) {
264             impl=i;
265             return true;
266         }
267 
268         /* Otherwise, remember this is our best so far if we don't yet have
269          * a valid candidate, or we beat the estimate.  */
270         if ((saved_impl == nullptr) || (estimate < best_estimate)) {
271             saved_impl = i;
272             best_estimate = estimate;
273         }
274     }
275 
276     /* Return whichever method gave the best estimate. */
277     if (saved_impl != nullptr) {
278         impl = saved_impl;
279         return true;
280     }
281 
282     return false;
283 }
284 
285 template<typename Top, typename Tret, class OutputStage>
get_compatible_kernels(const GemmArgs & args,const OutputStage & os)286 std::vector<KernelDescription> get_compatible_kernels(const GemmArgs &args, const OutputStage &os) {
287     std::vector<KernelDescription> res;
288 
289     /* Find out what the default implementation in so we can set the flag accordingly later. */
290     const GemmImplementation<Top, Tret, OutputStage> *default_impl;
291     find_implementation(args, os, default_impl);
292 
293     auto gemms = gemm_implementation_list<Top, Tret, OutputStage>();
294 
295     for (const GemmImplementation<Top, Tret, OutputStage> *i = gemms; i->method != GemmMethod::DEFAULT; i++) {
296         /* Check that this implementation supports the presented problem. */
297 
298         if (!i->do_is_supported(args, os)) {
299             continue;
300         }
301 
302         res.push_back(KernelDescription(i->method, i->name, i==default_impl, i->do_cycle_estimate(args, os)));
303     }
304 
305     return res;
306 }
307 
308 template<typename Top, typename Tret, class OutputStage>
has_opt_gemm(WeightFormat & wf,const GemmArgs & args,const OutputStage & os)309 bool has_opt_gemm(WeightFormat &wf, const GemmArgs &args, const OutputStage &os) {
310     const GemmImplementation<Top, Tret, OutputStage> *impl;
311     const bool success =  find_implementation<Top, Tret, OutputStage>(args, os, impl);
312     if (success)
313       wf = UniqueGemmCommon<Top, Tret>(impl->do_instantiate(args, os))->get_config().weight_format;
314     return success;
315 }
316 
317 template<typename Top, typename Tret, class OutputStage>
gemm(const GemmArgs & args,const OutputStage & os)318 UniqueGemmCommon<Top, Tret> gemm(const GemmArgs &args, const OutputStage &os) {
319     const GemmImplementation<Top, Tret, OutputStage> *impl;
320 
321     if (find_implementation<Top, Tret, OutputStage>(args, os, impl)) {
322         return UniqueGemmCommon<Top, Tret>(impl->do_instantiate(args, os));
323     }
324 
325     return UniqueGemmCommon<Top, Tret>(nullptr);
326 }
327 
328 template<typename Top, typename Tret, class OutputStage>
get_gemm_method(const GemmArgs & args,const OutputStage & os)329 KernelDescription get_gemm_method(const GemmArgs &args, const OutputStage &os) {
330     const GemmImplementation<Top, Tret, OutputStage> *impl;
331 
332     if (find_implementation<Top, Tret>(args, os, impl)) {
333         return KernelDescription(impl->method, impl->name);
334     }
335 
336     /* This shouldn't happen - there should always be at least one valid implementation. */
337     return KernelDescription();
338 }
339 
340 
341 } // namespace arm_gemm
342