xref: /aosp_15_r20/external/ComputeLibrary/arm_compute/runtime/CL/CLTuner.h (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2017-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_CLTUNER_H
25 #define ARM_COMPUTE_CLTUNER_H
26 
27 #include "arm_compute/core/CL/OpenCL.h"
28 #include "arm_compute/core/utils/misc/Macros.h"
29 #include "arm_compute/runtime/CL/CLTunerTypes.h"
30 #include "arm_compute/runtime/CL/CLTuningParams.h"
31 #include "arm_compute/runtime/CL/ICLTuner.h"
32 
33 #include <unordered_map>
34 
35 namespace arm_compute
36 {
37 class ICLKernel;
38 
39 /** Basic implementation of the OpenCL tuner interface */
40 class CLTuner : public ICLTuner
41 {
42 public:
43     /** Constructor
44      *
45      * @param[in] tune_new_kernels Find the optimal local workgroup size for kernels which are not present in the table ?
46      * @param[in] tuning_info      (Optional) opencl parameters to tune
47      *
48      */
49     CLTuner(bool tune_new_kernels = true, CLTuningInfo tuning_info = CLTuningInfo());
50 
51     /** Destructor */
52     ~CLTuner() = default;
53 
54     /** Setter for tune_new_kernels option
55      *
56      * @param[in] tune_new_kernels Find the optimal local workgroup size for kernels which are not present in the table ?
57      */
58     void set_tune_new_kernels(bool tune_new_kernels);
59 
60     /** Tune kernels that are not in the tuning parameters table
61      *
62      * @return True if tuning of new kernels is enabled.
63      */
64     bool tune_new_kernels() const;
65 
66     /** Setter for tune parameters option
67      *
68      * @param[in] tuning_info opencl parameters to tune
69      */
70     void set_tuning_parameters(CLTuningInfo tuning_info);
71 
72     /** Set OpenCL tuner mode
73      *
74      * @param[in] mode Indicates how exhaustive the search for the optimal tuning parameters should be while tuning. Default is Exhaustive mode
75      */
76     void set_tuner_mode(CLTunerMode mode);
77 
78     /** Manually add tuning parameters for a kernel
79      *
80      * @param[in] kernel_id             Unique identifiant of the kernel
81      * @param[in] optimal_tuning_params Optimal tuning parameters to use for the given kernel
82      */
83     void add_tuning_params(const std::string &kernel_id, CLTuningParams optimal_tuning_params);
84 
85     /** Import tuning parameters table
86      *
87      * @param[in] tuning_params_table The unordered_map container to import
88      */
89     void import_tuning_params(const std::unordered_map<std::string, CLTuningParams> &tuning_params_table);
90 
91     /** Give read access to the tuning params table
92      *
93      * @return The tuning params table as unordered_map container
94      */
95     const std::unordered_map<std::string, CLTuningParams> &tuning_params_table() const;
96 
97     /** Set the OpenCL kernel event
98      *
99      * @note The interceptor can use this function to store the event associated to the OpenCL kernel
100      *
101      * @param[in] kernel_event The OpenCL kernel event
102      */
103     void set_cl_kernel_event(cl_event kernel_event);
104 
105     /** clEnqueueNDRangeKernel symbol */
106     std::function<decltype(clEnqueueNDRangeKernel)> real_clEnqueueNDRangeKernel;
107 
108     /** Load the tuning parameters table from file. It also sets up the tuning read from the file
109      *
110      * @param[in] filename Load the tuning parameters table from this file.(Must exist)
111      *
112      */
113     void load_from_file(const std::string &filename);
114 
115     /** Save the content of the tuning parameters table to file
116      *
117      * @param[in] filename Save the tuning parameters table to this file. (Content will be overwritten)
118      *
119      * @return true if the file was created
120      */
121     bool save_to_file(const std::string &filename) const;
122 
123     // Inherited methods overridden:
124     void tune_kernel_static(ICLKernel &kernel) override;
125     void tune_kernel_dynamic(ICLKernel &kernel) override;
126     void tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors) override;
127     /** Is the kernel_event set ?
128      *
129      * @return true if the kernel_event is set.
130      */
131     bool kernel_event_is_set() const;
132 
133     /** A wrapper wrapping tensors and other objects needed for running the kernel
134      */
135     struct IKernelData;
136 
137 private:
138     /** Perform tune_kernel_dynamic
139      *
140      * @param[in]     kernel OpenCL kernel to be tuned with tuning parameters
141      * @param[in,out] data   IKernelData object wrapping tensors and other objects needed for running the kernel
142      *
143      */
144     void do_tune_kernel_dynamic(ICLKernel &kernel, IKernelData *data);
145     /** Find optimal tuning parameters using brute-force approach
146      *
147      * @param[in]     kernel OpenCL kernel to be tuned with tuning parameters
148      * @param[in,out] data   IKernelData object wrapping tensors and other objects needed for running the kernel
149      *
150      * @return The optimal tuning parameters to use
151      */
152     CLTuningParams find_optimal_tuning_params(ICLKernel &kernel, IKernelData *data);
153 
154     std::unordered_map<std::string, CLTuningParams> _tuning_params_table;
155     std::unordered_map<std::string, cl::NDRange>    _lws_table;
156     cl::Event    _kernel_event;
157     bool         _tune_new_kernels;
158     CLTuningInfo _tuning_info;
159 };
160 } // namespace arm_compute
161 #endif /*ARM_COMPUTE_CLTUNER_H */
162