1 /*
2 * Copyright (c) 2022-2023 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24 #include "ClTemplateWriter.h"
25
26 #include "arm_compute/core/CL/CLKernelLibrary.h"
27 #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
28 #include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
29
30 namespace arm_compute
31 {
32 namespace experimental
33 {
34 namespace dynamic_fusion
35 {
36 /// @note: some tags can be unused since they could be used only for the macros, or only for the component code
replace_tags(const std::string & code_template,const TagLUT & tags)37 std::string ClTemplateWriter::replace_tags(const std::string &code_template, const TagLUT &tags)
38 {
39 std::string replaced_code = "";
40 bool scanning_pattern = false;
41 std::string pattern_found = "";
42 for(size_t i = 0; i < code_template.size() - 1; ++i)
43 {
44 if(!scanning_pattern)
45 {
46 if(code_template[i] == '{' && code_template[i + 1] == '{')
47 {
48 i += 1;
49 scanning_pattern = true;
50 pattern_found = "";
51 }
52 else
53 {
54 replaced_code += code_template[i];
55 }
56 }
57 else
58 {
59 if(code_template[i] == '}' && code_template[i + 1] == '}')
60 {
61 i += 1;
62 scanning_pattern = false;
63 std::string err = "Pattern " + pattern_found + " not found in tags";
64 ARM_COMPUTE_ERROR_ON_MSG(tags.find(pattern_found) == tags.end(), err.c_str());
65 replaced_code += tags.find(pattern_found)->second.value;
66 }
67 else
68 {
69 pattern_found += code_template[i];
70 }
71 }
72 }
73
74 return replaced_code;
75 }
~ClTemplateWriter()76 ClTemplateWriter::~ClTemplateWriter()
77 {
78 }
ClTemplateWriter(const GpuKernelComponentGroup & components)79 ClTemplateWriter::ClTemplateWriter(const GpuKernelComponentGroup &components)
80 : _components{ components }
81 {
82 }
get_name()83 std::string ClTemplateWriter::get_name()
84 {
85 return write_kernel_name();
86 }
get_code()87 std::string ClTemplateWriter::get_code()
88 {
89 return write_code();
90 }
get_config_id()91 std::string ClTemplateWriter::get_config_id()
92 {
93 std::string config_id = get_name();
94 for(const auto &comp : _components)
95 {
96 config_id += "--" + comp->template_writer()->get_config_id() + "--";
97 }
98
99 return config_id;
100 }
101
get_build_options()102 CLBuildOptions ClTemplateWriter::get_build_options()
103 {
104 CLBuildOptions build_opts{};
105
106 for(const auto &comp : _components)
107 {
108 build_opts.add_options(comp->template_writer()->get_build_options(_components).options());
109 }
110
111 return build_opts;
112 }
113
get_window() const114 Window ClTemplateWriter::get_window() const
115 {
116 const auto root_comp = _components.get_root_component();
117 ARM_COMPUTE_ERROR_ON_MSG(root_comp == nullptr, "No root component found");
118 return root_comp->template_writer()->get_window();
119 }
120
get_tensors()121 std::map<ITensorInfo::Id, GpuKernelArgument> ClTemplateWriter::get_tensors()
122 {
123 // Assemble GpuKernelArguments
124 std::map<ITensorInfo::Id, GpuKernelArgument> tensors;
125 for(const auto t : _components.get_argument_tensors())
126 {
127 tensors.emplace(
128 t->id(),
129 GpuKernelArgument{ *t, _vtable.get_variable(t).kernel_argument_info });
130 }
131 return tensors;
132 }
133
write_code()134 std::string ClTemplateWriter::write_code()
135 {
136 ARM_COMPUTE_ERROR_ON_MSG(_components.empty(), "No components found");
137
138 // These data structures will hold the data from all the components in the blueprint
139 std::set<std::string> headers_list{};
140 std::set<std::string> additional_macros{};
141 std::vector<std::string> component_codes{}; // vector because order matters
142
143 // Pass 1: Declare all kernel variables
144 for(auto &component : _components)
145 {
146 component->template_writer()->declare_variables(_vtable, _components);
147 }
148 // Pass 2: Generate component codes
149 for(auto &component : _components)
150 {
151 const auto component_writer = component->template_writer();
152 auto curr_headers_list = component_writer->get_headers_list();
153 auto curr_additional_macros = component_writer->get_additional_macros();
154 auto curr_component_code = component_writer->get_component_code(_components);
155 const auto var_lut = component_writer->get_tag_lut(_vtable, _components); // Ideally can be merged with get_component_code once we have finer-grained code generation technique
156 component_codes.push_back(replace_tags(curr_component_code, var_lut));
157
158 headers_list.insert(curr_headers_list.begin(), curr_headers_list.end());
159 if(!additional_macros.empty()) // Some components might not have any
160 {
161 additional_macros.insert(replace_tags(curr_additional_macros, var_lut));
162 }
163 }
164
165 // Step 3: Assemble the data gathered by traversing the graph into the string "code"
166 std::string code = "";
167
168 for(auto &header : headers_list)
169 {
170 #if defined(EMBEDDED_KERNELS)
171 code += CLKernelLibrary::get().get_program(header).first;
172 #else // defined(EMBEDDED_KERNELS)
173 code += "#include \"" + header + "\"\n";
174 #endif // defined(EMBEDDED_KERNELS)
175 }
176
177 for(auto ¯os : additional_macros)
178 {
179 code += macros;
180 }
181
182 auto arguments = _components.get_argument_tensors();
183 std::sort(arguments.begin(), arguments.end(), [](const ITensorInfo * l, const ITensorInfo * r)
184 {
185 return l->id() < r->id();
186 });
187 code += write_kernel_signature(_vtable.get_variable_list(arguments));
188
189 code += "\n{\n\n";
190
191 code += " //------------------ START KERNEL_BUILDER_COORDINATE ---------------------\n\n";
192 code += write_global_section();
193 code += " //------------------ END KERNEL_BUILDER_COORDINATE ---------------------\n";
194
195 {
196 const auto tiles = _components.get_tiles();
197 std::stringstream tiles_ss;
198
199 tiles_ss << " //------------------ START TILE DECLARATION ---------------------\n";
200
201 for(auto tile : tiles)
202 {
203 const auto var = _vtable.get_variable(tile);
204 const auto data_type = get_cl_type_from_data_type(tile->data_type());
205 const auto var_name = var.uniq_name;
206
207 tiles_ss << " TILE(" << data_type << ", M0, N0, " << var_name << ");\n";
208 }
209
210 tiles_ss << " //------------------ END TILE DECLARATION ---------------------\n";
211
212 code += tiles_ss.str();
213 }
214
215 for(const auto &component_code : component_codes)
216 {
217 code += component_code;
218 code += "\n";
219 }
220
221 code += "}\n";
222
223 return code;
224 }
write_global_section() const225 std::string ClTemplateWriter::write_global_section() const
226 {
227 const auto dst_info = _components.get_any_dst_tensor();
228 const auto dst_w = dst_info->dimension(0);
229 const auto tile_w = std::max(1, get_window().x().step());
230 const auto tile_h = std::max(1, get_window().y().step());
231 auto leftover_w = dst_w % tile_w;
232
233 std::string code = "";
234 code += std::string(" int g_ind_0 = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " + std::to_string(leftover_w) + ");\n";
235 code += std::string(" int g_ind_1 = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n";
236 code += std::string(" int g_ind_2 = GET_SPATIAL_IDX(2, 1, 0);\n\n");
237
238 code += " const bool g_cond_x = (g_ind_0 == 0);\n";
239 code += " const bool g_cond_y = (g_ind_1 == 0);\n";
240
241 return code;
242 }
write_argument_declaration(const GpuKernelVariableTable::TensorVariable & var) const243 std::string ClTemplateWriter::write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const
244 {
245 std::string code;
246 switch(var.kernel_argument_info.type)
247 {
248 case GpuKernelArgumentInfo::Type::Vector:
249 {
250 code += "\n VECTOR_DECLARATION(" + var.uniq_name + ")";
251 break;
252 }
253 case GpuKernelArgumentInfo::Type::Image:
254 {
255 code += "\n IMAGE_DECLARATION(" + var.uniq_name + ")";
256 break;
257 }
258 case GpuKernelArgumentInfo::Type::Image_3D:
259 {
260 code += "\n IMAGE_DECLARATION(" + var.uniq_name + "),";
261 code += "\n unsigned int " + var.uniq_name + "_stride_z";
262 break;
263 }
264 case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
265 {
266 code += "\n __read_only image2d_t " + var.uniq_name + "_img,";
267 code += "\n unsigned int " + var.uniq_name + "_stride_z";
268 break;
269 }
270 case GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer:
271 {
272 code += "\n TENSOR4D_T(" + var.uniq_name + ", BUFFER)";
273 break;
274 }
275 case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
276 {
277 code += "\n TENSOR4D_T(" + var.uniq_name + ", IMAGE)";
278 break;
279 }
280 case GpuKernelArgumentInfo::Type::Tensor_3D:
281 {
282 code += "\n TENSOR3D_DECLARATION(" + var.uniq_name + ")";
283 break;
284 }
285 default:
286 {
287 ARM_COMPUTE_ERROR("Unsupported declaration generation for GpuKernelArgumentInfo::Type");
288 }
289 }
290 return code;
291 }
write_kernel_signature(const GpuKernelVariableTable::VariableList & argument_list) const292 std::string ClTemplateWriter::write_kernel_signature(const GpuKernelVariableTable::VariableList &argument_list) const
293 {
294 std::string code = "\n__kernel void " + write_kernel_name() + "(";
295
296 for(int i = 0; i < static_cast<int>(argument_list.size()) - 1; ++i)
297 {
298 code += write_argument_declaration(argument_list[i]) + ",";
299 }
300 if(static_cast<int>(argument_list.size()) - 1 >= 0)
301 {
302 code += write_argument_declaration(argument_list[argument_list.size() - 1]);
303 }
304
305 code += ')';
306
307 return code;
308 }
write_kernel_name() const309 std::string ClTemplateWriter::write_kernel_name() const
310 {
311 if(_components.empty())
312 {
313 return "empty_kernel";
314 }
315 std::string name = _components.empty() ? "" : _components[0]->template_writer()->get_name();
316 for(size_t i = 1; i < _components.size(); ++i)
317 {
318 name += "___";
319 name += _components[i]->template_writer()->get_name();
320 }
321
322 return name;
323 }
324 } // namespace dynamic_fusion
325 } // namespace experimental
326 } // namespace arm_compute
327