1*6467f958SSadaf Ebrahimi //
2*6467f958SSadaf Ebrahimi // Copyright (c) 2017 The Khronos Group Inc.
3*6467f958SSadaf Ebrahimi //
4*6467f958SSadaf Ebrahimi // Licensed under the Apache License, Version 2.0 (the "License");
5*6467f958SSadaf Ebrahimi // you may not use this file except in compliance with the License.
6*6467f958SSadaf Ebrahimi // You may obtain a copy of the License at
7*6467f958SSadaf Ebrahimi //
8*6467f958SSadaf Ebrahimi // http://www.apache.org/licenses/LICENSE-2.0
9*6467f958SSadaf Ebrahimi //
10*6467f958SSadaf Ebrahimi // Unless required by applicable law or agreed to in writing, software
11*6467f958SSadaf Ebrahimi // distributed under the License is distributed on an "AS IS" BASIS,
12*6467f958SSadaf Ebrahimi // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*6467f958SSadaf Ebrahimi // See the License for the specific language governing permissions and
14*6467f958SSadaf Ebrahimi // limitations under the License.
15*6467f958SSadaf Ebrahimi //
16*6467f958SSadaf Ebrahimi #ifndef SUBHELPERS_H
17*6467f958SSadaf Ebrahimi #define SUBHELPERS_H
18*6467f958SSadaf Ebrahimi
19*6467f958SSadaf Ebrahimi #include "testHarness.h"
20*6467f958SSadaf Ebrahimi #include "kernelHelpers.h"
21*6467f958SSadaf Ebrahimi #include "typeWrappers.h"
22*6467f958SSadaf Ebrahimi #include "imageHelpers.h"
23*6467f958SSadaf Ebrahimi
24*6467f958SSadaf Ebrahimi #include <limits>
25*6467f958SSadaf Ebrahimi #include <vector>
26*6467f958SSadaf Ebrahimi #include <type_traits>
27*6467f958SSadaf Ebrahimi #include <bitset>
28*6467f958SSadaf Ebrahimi #include <regex>
29*6467f958SSadaf Ebrahimi #include <map>
30*6467f958SSadaf Ebrahimi
31*6467f958SSadaf Ebrahimi #define NR_OF_ACTIVE_WORK_ITEMS 4
32*6467f958SSadaf Ebrahimi
33*6467f958SSadaf Ebrahimi extern MTdata gMTdata;
34*6467f958SSadaf Ebrahimi typedef std::bitset<128> bs128;
35*6467f958SSadaf Ebrahimi extern cl_half_rounding_mode g_rounding_mode;
36*6467f958SSadaf Ebrahimi
37*6467f958SSadaf Ebrahimi bs128 cl_uint4_to_bs128(cl_uint4 v);
38*6467f958SSadaf Ebrahimi cl_uint4 bs128_to_cl_uint4(bs128 v);
39*6467f958SSadaf Ebrahimi cl_uint4 generate_bit_mask(cl_uint subgroup_local_id,
40*6467f958SSadaf Ebrahimi const std::string &mask_type,
41*6467f958SSadaf Ebrahimi cl_uint max_sub_group_size);
42*6467f958SSadaf Ebrahimi
43*6467f958SSadaf Ebrahimi // limit possible input values to avoid arithmetic rounding/overflow issues.
44*6467f958SSadaf Ebrahimi // for each subgroup values defined different values
45*6467f958SSadaf Ebrahimi // for rest of workitems set 1 shuffle values
46*6467f958SSadaf Ebrahimi void fill_and_shuffle_safe_values(std::vector<cl_ulong> &safe_values,
47*6467f958SSadaf Ebrahimi size_t sb_size);
48*6467f958SSadaf Ebrahimi
49*6467f958SSadaf Ebrahimi struct WorkGroupParams
50*6467f958SSadaf Ebrahimi {
51*6467f958SSadaf Ebrahimi
52*6467f958SSadaf Ebrahimi WorkGroupParams(size_t gws, size_t lws, int dm_arg = -1, int cs_arg = -1)
global_workgroup_sizeWorkGroupParams53*6467f958SSadaf Ebrahimi : global_workgroup_size(gws), local_workgroup_size(lws),
54*6467f958SSadaf Ebrahimi divergence_mask_arg(dm_arg), cluster_size_arg(cs_arg)
55*6467f958SSadaf Ebrahimi {
56*6467f958SSadaf Ebrahimi subgroup_size = 0;
57*6467f958SSadaf Ebrahimi cluster_size = 0;
58*6467f958SSadaf Ebrahimi work_items_mask = 0;
59*6467f958SSadaf Ebrahimi use_core_subgroups = true;
60*6467f958SSadaf Ebrahimi dynsc = 0;
61*6467f958SSadaf Ebrahimi load_masks();
62*6467f958SSadaf Ebrahimi }
63*6467f958SSadaf Ebrahimi size_t global_workgroup_size;
64*6467f958SSadaf Ebrahimi size_t local_workgroup_size;
65*6467f958SSadaf Ebrahimi size_t subgroup_size;
66*6467f958SSadaf Ebrahimi cl_uint cluster_size;
67*6467f958SSadaf Ebrahimi bs128 work_items_mask;
68*6467f958SSadaf Ebrahimi size_t dynsc;
69*6467f958SSadaf Ebrahimi bool use_core_subgroups;
70*6467f958SSadaf Ebrahimi std::vector<bs128> all_work_item_masks;
71*6467f958SSadaf Ebrahimi int divergence_mask_arg;
72*6467f958SSadaf Ebrahimi int cluster_size_arg;
73*6467f958SSadaf Ebrahimi void save_kernel_source(const std::string &source, std::string name = "")
74*6467f958SSadaf Ebrahimi {
75*6467f958SSadaf Ebrahimi if (name == "")
76*6467f958SSadaf Ebrahimi {
77*6467f958SSadaf Ebrahimi name = "default";
78*6467f958SSadaf Ebrahimi }
79*6467f958SSadaf Ebrahimi if (kernel_function_name.find(name) != kernel_function_name.end())
80*6467f958SSadaf Ebrahimi {
81*6467f958SSadaf Ebrahimi log_info("Kernel definition duplication. Source will be "
82*6467f958SSadaf Ebrahimi "overwritten for function name %s\n",
83*6467f958SSadaf Ebrahimi name.c_str());
84*6467f958SSadaf Ebrahimi }
85*6467f958SSadaf Ebrahimi kernel_function_name[name] = source;
86*6467f958SSadaf Ebrahimi };
87*6467f958SSadaf Ebrahimi // return specific defined kernel or default.
get_kernel_sourceWorkGroupParams88*6467f958SSadaf Ebrahimi std::string get_kernel_source(std::string name)
89*6467f958SSadaf Ebrahimi {
90*6467f958SSadaf Ebrahimi if (kernel_function_name.find(name) == kernel_function_name.end())
91*6467f958SSadaf Ebrahimi {
92*6467f958SSadaf Ebrahimi return kernel_function_name["default"];
93*6467f958SSadaf Ebrahimi }
94*6467f958SSadaf Ebrahimi return kernel_function_name[name];
95*6467f958SSadaf Ebrahimi }
96*6467f958SSadaf Ebrahimi
97*6467f958SSadaf Ebrahimi
98*6467f958SSadaf Ebrahimi private:
99*6467f958SSadaf Ebrahimi std::map<std::string, std::string> kernel_function_name;
load_masksWorkGroupParams100*6467f958SSadaf Ebrahimi void load_masks()
101*6467f958SSadaf Ebrahimi {
102*6467f958SSadaf Ebrahimi if (divergence_mask_arg != -1)
103*6467f958SSadaf Ebrahimi {
104*6467f958SSadaf Ebrahimi // 1 in string will be set 1, 0 will be set 0
105*6467f958SSadaf Ebrahimi bs128 mask_0xf0f0f0f0("11110000111100001111000011110000"
106*6467f958SSadaf Ebrahimi "11110000111100001111000011110000"
107*6467f958SSadaf Ebrahimi "11110000111100001111000011110000"
108*6467f958SSadaf Ebrahimi "11110000111100001111000011110000",
109*6467f958SSadaf Ebrahimi 128, '0', '1');
110*6467f958SSadaf Ebrahimi all_work_item_masks.push_back(mask_0xf0f0f0f0);
111*6467f958SSadaf Ebrahimi // 1 in string will be set 0, 0 will be set 1
112*6467f958SSadaf Ebrahimi bs128 mask_0x0f0f0f0f("11110000111100001111000011110000"
113*6467f958SSadaf Ebrahimi "11110000111100001111000011110000"
114*6467f958SSadaf Ebrahimi "11110000111100001111000011110000"
115*6467f958SSadaf Ebrahimi "11110000111100001111000011110000",
116*6467f958SSadaf Ebrahimi 128, '1', '0');
117*6467f958SSadaf Ebrahimi all_work_item_masks.push_back(mask_0x0f0f0f0f);
118*6467f958SSadaf Ebrahimi bs128 mask_0x5555aaaa("10101010101010101010101010101010"
119*6467f958SSadaf Ebrahimi "10101010101010101010101010101010"
120*6467f958SSadaf Ebrahimi "10101010101010101010101010101010"
121*6467f958SSadaf Ebrahimi "10101010101010101010101010101010",
122*6467f958SSadaf Ebrahimi 128, '0', '1');
123*6467f958SSadaf Ebrahimi all_work_item_masks.push_back(mask_0x5555aaaa);
124*6467f958SSadaf Ebrahimi bs128 mask_0xaaaa5555("10101010101010101010101010101010"
125*6467f958SSadaf Ebrahimi "10101010101010101010101010101010"
126*6467f958SSadaf Ebrahimi "10101010101010101010101010101010"
127*6467f958SSadaf Ebrahimi "10101010101010101010101010101010",
128*6467f958SSadaf Ebrahimi 128, '1', '0');
129*6467f958SSadaf Ebrahimi all_work_item_masks.push_back(mask_0xaaaa5555);
130*6467f958SSadaf Ebrahimi // 0x0f0ff0f0
131*6467f958SSadaf Ebrahimi bs128 mask_0x0f0ff0f0("00001111000011111111000011110000"
132*6467f958SSadaf Ebrahimi "00001111000011111111000011110000"
133*6467f958SSadaf Ebrahimi "00001111000011111111000011110000"
134*6467f958SSadaf Ebrahimi "00001111000011111111000011110000",
135*6467f958SSadaf Ebrahimi 128, '0', '1');
136*6467f958SSadaf Ebrahimi all_work_item_masks.push_back(mask_0x0f0ff0f0);
137*6467f958SSadaf Ebrahimi // 0xff0000ff
138*6467f958SSadaf Ebrahimi bs128 mask_0xff0000ff("11111111000000000000000011111111"
139*6467f958SSadaf Ebrahimi "11111111000000000000000011111111"
140*6467f958SSadaf Ebrahimi "11111111000000000000000011111111"
141*6467f958SSadaf Ebrahimi "11111111000000000000000011111111",
142*6467f958SSadaf Ebrahimi 128, '0', '1');
143*6467f958SSadaf Ebrahimi all_work_item_masks.push_back(mask_0xff0000ff);
144*6467f958SSadaf Ebrahimi // 0xff00ff00
145*6467f958SSadaf Ebrahimi bs128 mask_0xff00ff00("11111111000000001111111100000000"
146*6467f958SSadaf Ebrahimi "11111111000000001111111100000000"
147*6467f958SSadaf Ebrahimi "11111111000000001111111100000000"
148*6467f958SSadaf Ebrahimi "11111111000000001111111100000000",
149*6467f958SSadaf Ebrahimi 128, '0', '1');
150*6467f958SSadaf Ebrahimi all_work_item_masks.push_back(mask_0xff00ff00);
151*6467f958SSadaf Ebrahimi // 0x00ffff00
152*6467f958SSadaf Ebrahimi bs128 mask_0x00ffff00("00000000111111111111111100000000"
153*6467f958SSadaf Ebrahimi "00000000111111111111111100000000"
154*6467f958SSadaf Ebrahimi "00000000111111111111111100000000"
155*6467f958SSadaf Ebrahimi "00000000111111111111111100000000",
156*6467f958SSadaf Ebrahimi 128, '0', '1');
157*6467f958SSadaf Ebrahimi all_work_item_masks.push_back(mask_0x00ffff00);
158*6467f958SSadaf Ebrahimi // 0x80 1 workitem highest id for 8 subgroup size
159*6467f958SSadaf Ebrahimi bs128 mask_0x80808080("10000000100000001000000010000000"
160*6467f958SSadaf Ebrahimi "10000000100000001000000010000000"
161*6467f958SSadaf Ebrahimi "10000000100000001000000010000000"
162*6467f958SSadaf Ebrahimi "10000000100000001000000010000000",
163*6467f958SSadaf Ebrahimi 128, '0', '1');
164*6467f958SSadaf Ebrahimi
165*6467f958SSadaf Ebrahimi all_work_item_masks.push_back(mask_0x80808080);
166*6467f958SSadaf Ebrahimi // 0x8000 1 workitem highest id for 16 subgroup size
167*6467f958SSadaf Ebrahimi bs128 mask_0x80008000("10000000000000001000000000000000"
168*6467f958SSadaf Ebrahimi "10000000000000001000000000000000"
169*6467f958SSadaf Ebrahimi "10000000000000001000000000000000"
170*6467f958SSadaf Ebrahimi "10000000000000001000000000000000",
171*6467f958SSadaf Ebrahimi 128, '0', '1');
172*6467f958SSadaf Ebrahimi all_work_item_masks.push_back(mask_0x80008000);
173*6467f958SSadaf Ebrahimi // 0x80000000 1 workitem highest id for 32 subgroup size
174*6467f958SSadaf Ebrahimi bs128 mask_0x80000000("10000000000000000000000000000000"
175*6467f958SSadaf Ebrahimi "10000000000000000000000000000000"
176*6467f958SSadaf Ebrahimi "10000000000000000000000000000000"
177*6467f958SSadaf Ebrahimi "10000000000000000000000000000000",
178*6467f958SSadaf Ebrahimi 128, '0', '1');
179*6467f958SSadaf Ebrahimi all_work_item_masks.push_back(mask_0x80000000);
180*6467f958SSadaf Ebrahimi // 0x80000000 00000000 1 workitem highest id for 64 subgroup size
181*6467f958SSadaf Ebrahimi // 0x80000000 1 workitem highest id for 32 subgroup size
182*6467f958SSadaf Ebrahimi bs128 mask_0x8000000000000000("10000000000000000000000000000000"
183*6467f958SSadaf Ebrahimi "00000000000000000000000000000000"
184*6467f958SSadaf Ebrahimi "10000000000000000000000000000000"
185*6467f958SSadaf Ebrahimi "00000000000000000000000000000000",
186*6467f958SSadaf Ebrahimi 128, '0', '1');
187*6467f958SSadaf Ebrahimi
188*6467f958SSadaf Ebrahimi all_work_item_masks.push_back(mask_0x8000000000000000);
189*6467f958SSadaf Ebrahimi // 0x80000000 00000000 00000000 00000000 1 workitem highest id for
190*6467f958SSadaf Ebrahimi // 128 subgroup size
191*6467f958SSadaf Ebrahimi bs128 mask_0x80000000000000000000000000000000(
192*6467f958SSadaf Ebrahimi "10000000000000000000000000000000"
193*6467f958SSadaf Ebrahimi "00000000000000000000000000000000"
194*6467f958SSadaf Ebrahimi "00000000000000000000000000000000"
195*6467f958SSadaf Ebrahimi "00000000000000000000000000000000",
196*6467f958SSadaf Ebrahimi 128, '0', '1');
197*6467f958SSadaf Ebrahimi all_work_item_masks.push_back(
198*6467f958SSadaf Ebrahimi mask_0x80000000000000000000000000000000);
199*6467f958SSadaf Ebrahimi
200*6467f958SSadaf Ebrahimi bs128 mask_0xffffffff("11111111111111111111111111111111"
201*6467f958SSadaf Ebrahimi "11111111111111111111111111111111"
202*6467f958SSadaf Ebrahimi "11111111111111111111111111111111"
203*6467f958SSadaf Ebrahimi "11111111111111111111111111111111",
204*6467f958SSadaf Ebrahimi 128, '0', '1');
205*6467f958SSadaf Ebrahimi all_work_item_masks.push_back(mask_0xffffffff);
206*6467f958SSadaf Ebrahimi }
207*6467f958SSadaf Ebrahimi }
208*6467f958SSadaf Ebrahimi };
209*6467f958SSadaf Ebrahimi
210*6467f958SSadaf Ebrahimi enum class SubgroupsBroadcastOp
211*6467f958SSadaf Ebrahimi {
212*6467f958SSadaf Ebrahimi broadcast,
213*6467f958SSadaf Ebrahimi broadcast_first,
214*6467f958SSadaf Ebrahimi non_uniform_broadcast
215*6467f958SSadaf Ebrahimi };
216*6467f958SSadaf Ebrahimi
217*6467f958SSadaf Ebrahimi enum class NonUniformVoteOp
218*6467f958SSadaf Ebrahimi {
219*6467f958SSadaf Ebrahimi elect,
220*6467f958SSadaf Ebrahimi all,
221*6467f958SSadaf Ebrahimi any,
222*6467f958SSadaf Ebrahimi all_equal
223*6467f958SSadaf Ebrahimi };
224*6467f958SSadaf Ebrahimi
225*6467f958SSadaf Ebrahimi enum class BallotOp
226*6467f958SSadaf Ebrahimi {
227*6467f958SSadaf Ebrahimi ballot,
228*6467f958SSadaf Ebrahimi inverse_ballot,
229*6467f958SSadaf Ebrahimi ballot_bit_extract,
230*6467f958SSadaf Ebrahimi ballot_bit_count,
231*6467f958SSadaf Ebrahimi ballot_inclusive_scan,
232*6467f958SSadaf Ebrahimi ballot_exclusive_scan,
233*6467f958SSadaf Ebrahimi ballot_find_lsb,
234*6467f958SSadaf Ebrahimi ballot_find_msb,
235*6467f958SSadaf Ebrahimi eq_mask,
236*6467f958SSadaf Ebrahimi ge_mask,
237*6467f958SSadaf Ebrahimi gt_mask,
238*6467f958SSadaf Ebrahimi le_mask,
239*6467f958SSadaf Ebrahimi lt_mask,
240*6467f958SSadaf Ebrahimi };
241*6467f958SSadaf Ebrahimi
242*6467f958SSadaf Ebrahimi enum class ShuffleOp
243*6467f958SSadaf Ebrahimi {
244*6467f958SSadaf Ebrahimi shuffle,
245*6467f958SSadaf Ebrahimi shuffle_up,
246*6467f958SSadaf Ebrahimi shuffle_down,
247*6467f958SSadaf Ebrahimi shuffle_xor,
248*6467f958SSadaf Ebrahimi rotate,
249*6467f958SSadaf Ebrahimi clustered_rotate,
250*6467f958SSadaf Ebrahimi };
251*6467f958SSadaf Ebrahimi
252*6467f958SSadaf Ebrahimi enum class ArithmeticOp
253*6467f958SSadaf Ebrahimi {
254*6467f958SSadaf Ebrahimi add_,
255*6467f958SSadaf Ebrahimi max_,
256*6467f958SSadaf Ebrahimi min_,
257*6467f958SSadaf Ebrahimi mul_,
258*6467f958SSadaf Ebrahimi and_,
259*6467f958SSadaf Ebrahimi or_,
260*6467f958SSadaf Ebrahimi xor_,
261*6467f958SSadaf Ebrahimi logical_and,
262*6467f958SSadaf Ebrahimi logical_or,
263*6467f958SSadaf Ebrahimi logical_xor
264*6467f958SSadaf Ebrahimi };
265*6467f958SSadaf Ebrahimi
266*6467f958SSadaf Ebrahimi const char *const operation_names(ArithmeticOp operation);
267*6467f958SSadaf Ebrahimi const char *const operation_names(BallotOp operation);
268*6467f958SSadaf Ebrahimi const char *const operation_names(ShuffleOp operation);
269*6467f958SSadaf Ebrahimi const char *const operation_names(NonUniformVoteOp operation);
270*6467f958SSadaf Ebrahimi const char *const operation_names(SubgroupsBroadcastOp operation);
271*6467f958SSadaf Ebrahimi
272*6467f958SSadaf Ebrahimi class subgroupsAPI {
273*6467f958SSadaf Ebrahimi public:
subgroupsAPI(cl_platform_id platform,bool use_core_subgroups)274*6467f958SSadaf Ebrahimi subgroupsAPI(cl_platform_id platform, bool use_core_subgroups)
275*6467f958SSadaf Ebrahimi {
276*6467f958SSadaf Ebrahimi static_assert(CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE
277*6467f958SSadaf Ebrahimi == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,
278*6467f958SSadaf Ebrahimi "Enums have to be the same");
279*6467f958SSadaf Ebrahimi static_assert(CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE
280*6467f958SSadaf Ebrahimi == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR,
281*6467f958SSadaf Ebrahimi "Enums have to be the same");
282*6467f958SSadaf Ebrahimi if (use_core_subgroups)
283*6467f958SSadaf Ebrahimi {
284*6467f958SSadaf Ebrahimi _clGetKernelSubGroupInfo_ptr = &clGetKernelSubGroupInfo;
285*6467f958SSadaf Ebrahimi clGetKernelSubGroupInfo_name = "clGetKernelSubGroupInfo";
286*6467f958SSadaf Ebrahimi }
287*6467f958SSadaf Ebrahimi else
288*6467f958SSadaf Ebrahimi {
289*6467f958SSadaf Ebrahimi _clGetKernelSubGroupInfo_ptr = (clGetKernelSubGroupInfoKHR_fn)
290*6467f958SSadaf Ebrahimi clGetExtensionFunctionAddressForPlatform(
291*6467f958SSadaf Ebrahimi platform, "clGetKernelSubGroupInfoKHR");
292*6467f958SSadaf Ebrahimi clGetKernelSubGroupInfo_name = "clGetKernelSubGroupInfoKHR";
293*6467f958SSadaf Ebrahimi }
294*6467f958SSadaf Ebrahimi }
clGetKernelSubGroupInfo_ptr()295*6467f958SSadaf Ebrahimi clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr()
296*6467f958SSadaf Ebrahimi {
297*6467f958SSadaf Ebrahimi return _clGetKernelSubGroupInfo_ptr;
298*6467f958SSadaf Ebrahimi }
299*6467f958SSadaf Ebrahimi const char *clGetKernelSubGroupInfo_name;
300*6467f958SSadaf Ebrahimi
301*6467f958SSadaf Ebrahimi private:
302*6467f958SSadaf Ebrahimi clGetKernelSubGroupInfoKHR_fn _clGetKernelSubGroupInfo_ptr;
303*6467f958SSadaf Ebrahimi };
304*6467f958SSadaf Ebrahimi
305*6467f958SSadaf Ebrahimi // Need to defined custom type for vector size = 3 and half type. This is
306*6467f958SSadaf Ebrahimi // because of 3-component types are otherwise indistinguishable from the
307*6467f958SSadaf Ebrahimi // 4-component types, and because the half type is indistinguishable from some
308*6467f958SSadaf Ebrahimi // other 16-bit type (ushort)
309*6467f958SSadaf Ebrahimi namespace subgroups {
310*6467f958SSadaf Ebrahimi struct cl_char3
311*6467f958SSadaf Ebrahimi {
312*6467f958SSadaf Ebrahimi ::cl_char3 data;
313*6467f958SSadaf Ebrahimi };
314*6467f958SSadaf Ebrahimi struct cl_uchar3
315*6467f958SSadaf Ebrahimi {
316*6467f958SSadaf Ebrahimi ::cl_uchar3 data;
317*6467f958SSadaf Ebrahimi };
318*6467f958SSadaf Ebrahimi struct cl_short3
319*6467f958SSadaf Ebrahimi {
320*6467f958SSadaf Ebrahimi ::cl_short3 data;
321*6467f958SSadaf Ebrahimi };
322*6467f958SSadaf Ebrahimi struct cl_ushort3
323*6467f958SSadaf Ebrahimi {
324*6467f958SSadaf Ebrahimi ::cl_ushort3 data;
325*6467f958SSadaf Ebrahimi };
326*6467f958SSadaf Ebrahimi struct cl_int3
327*6467f958SSadaf Ebrahimi {
328*6467f958SSadaf Ebrahimi ::cl_int3 data;
329*6467f958SSadaf Ebrahimi };
330*6467f958SSadaf Ebrahimi struct cl_uint3
331*6467f958SSadaf Ebrahimi {
332*6467f958SSadaf Ebrahimi ::cl_uint3 data;
333*6467f958SSadaf Ebrahimi };
334*6467f958SSadaf Ebrahimi struct cl_long3
335*6467f958SSadaf Ebrahimi {
336*6467f958SSadaf Ebrahimi ::cl_long3 data;
337*6467f958SSadaf Ebrahimi };
338*6467f958SSadaf Ebrahimi struct cl_ulong3
339*6467f958SSadaf Ebrahimi {
340*6467f958SSadaf Ebrahimi ::cl_ulong3 data;
341*6467f958SSadaf Ebrahimi };
342*6467f958SSadaf Ebrahimi struct cl_float3
343*6467f958SSadaf Ebrahimi {
344*6467f958SSadaf Ebrahimi ::cl_float3 data;
345*6467f958SSadaf Ebrahimi };
346*6467f958SSadaf Ebrahimi struct cl_double3
347*6467f958SSadaf Ebrahimi {
348*6467f958SSadaf Ebrahimi ::cl_double3 data;
349*6467f958SSadaf Ebrahimi };
350*6467f958SSadaf Ebrahimi struct cl_half
351*6467f958SSadaf Ebrahimi {
352*6467f958SSadaf Ebrahimi ::cl_half data;
353*6467f958SSadaf Ebrahimi };
354*6467f958SSadaf Ebrahimi struct cl_half2
355*6467f958SSadaf Ebrahimi {
356*6467f958SSadaf Ebrahimi ::cl_half2 data;
357*6467f958SSadaf Ebrahimi };
358*6467f958SSadaf Ebrahimi struct cl_half3
359*6467f958SSadaf Ebrahimi {
360*6467f958SSadaf Ebrahimi ::cl_half3 data;
361*6467f958SSadaf Ebrahimi };
362*6467f958SSadaf Ebrahimi struct cl_half4
363*6467f958SSadaf Ebrahimi {
364*6467f958SSadaf Ebrahimi ::cl_half4 data;
365*6467f958SSadaf Ebrahimi };
366*6467f958SSadaf Ebrahimi struct cl_half8
367*6467f958SSadaf Ebrahimi {
368*6467f958SSadaf Ebrahimi ::cl_half8 data;
369*6467f958SSadaf Ebrahimi };
370*6467f958SSadaf Ebrahimi struct cl_half16
371*6467f958SSadaf Ebrahimi {
372*6467f958SSadaf Ebrahimi ::cl_half16 data;
373*6467f958SSadaf Ebrahimi };
374*6467f958SSadaf Ebrahimi }
375*6467f958SSadaf Ebrahimi
376*6467f958SSadaf Ebrahimi // Declare operator<< for cl_ types, accessing the .s member.
377*6467f958SSadaf Ebrahimi #define OP_OSTREAM(Ty, VecSize) \
378*6467f958SSadaf Ebrahimi std::ostream &operator<<(std::ostream &os, const Ty##VecSize &val);
379*6467f958SSadaf Ebrahimi
380*6467f958SSadaf Ebrahimi // Declare operator<< for subgroups::cl_ types, accessing the .data member and
381*6467f958SSadaf Ebrahimi // forwarding to operator<< for the cl_ types.
382*6467f958SSadaf Ebrahimi #define OP_OSTREAM_SUBGROUP(Ty, VecSize) \
383*6467f958SSadaf Ebrahimi std::ostream &operator<<(std::ostream &os, const Ty##VecSize &val);
384*6467f958SSadaf Ebrahimi
385*6467f958SSadaf Ebrahimi // Declare operator<< for all vector sizes.
386*6467f958SSadaf Ebrahimi #define OP_OSTREAM_ALL_VEC(Ty) \
387*6467f958SSadaf Ebrahimi OP_OSTREAM(Ty, 2) \
388*6467f958SSadaf Ebrahimi OP_OSTREAM(Ty, 4) \
389*6467f958SSadaf Ebrahimi OP_OSTREAM(Ty, 8) \
390*6467f958SSadaf Ebrahimi OP_OSTREAM(Ty, 16) \
391*6467f958SSadaf Ebrahimi OP_OSTREAM_SUBGROUP(subgroups::Ty, 3)
392*6467f958SSadaf Ebrahimi
393*6467f958SSadaf Ebrahimi OP_OSTREAM_ALL_VEC(cl_char)
OP_OSTREAM_ALL_VEC(cl_uchar)394*6467f958SSadaf Ebrahimi OP_OSTREAM_ALL_VEC(cl_uchar)
395*6467f958SSadaf Ebrahimi OP_OSTREAM_ALL_VEC(cl_short)
396*6467f958SSadaf Ebrahimi OP_OSTREAM_ALL_VEC(cl_ushort)
397*6467f958SSadaf Ebrahimi OP_OSTREAM_ALL_VEC(cl_int)
398*6467f958SSadaf Ebrahimi OP_OSTREAM_ALL_VEC(cl_uint)
399*6467f958SSadaf Ebrahimi OP_OSTREAM_ALL_VEC(cl_long)
400*6467f958SSadaf Ebrahimi OP_OSTREAM_ALL_VEC(cl_ulong)
401*6467f958SSadaf Ebrahimi OP_OSTREAM_ALL_VEC(cl_float)
402*6467f958SSadaf Ebrahimi OP_OSTREAM_ALL_VEC(cl_double)
403*6467f958SSadaf Ebrahimi OP_OSTREAM_ALL_VEC(cl_half)
404*6467f958SSadaf Ebrahimi OP_OSTREAM_SUBGROUP(subgroups::cl_half, )
405*6467f958SSadaf Ebrahimi OP_OSTREAM_SUBGROUP(subgroups::cl_half, 2)
406*6467f958SSadaf Ebrahimi OP_OSTREAM_SUBGROUP(subgroups::cl_half, 4)
407*6467f958SSadaf Ebrahimi OP_OSTREAM_SUBGROUP(subgroups::cl_half, 8)
408*6467f958SSadaf Ebrahimi OP_OSTREAM_SUBGROUP(subgroups::cl_half, 16)
409*6467f958SSadaf Ebrahimi
410*6467f958SSadaf Ebrahimi #undef OP_OSTREAM
411*6467f958SSadaf Ebrahimi #undef OP_OSTREAM_SUBGROUP
412*6467f958SSadaf Ebrahimi #undef OP_OSTREAM_ALL_VEC
413*6467f958SSadaf Ebrahimi
414*6467f958SSadaf Ebrahimi template <typename Ty>
415*6467f958SSadaf Ebrahimi std::string print_expected_obtained(const Ty &expected, const Ty &obtained)
416*6467f958SSadaf Ebrahimi {
417*6467f958SSadaf Ebrahimi std::ostringstream oss;
418*6467f958SSadaf Ebrahimi oss << "Expected: " << expected << " Obtained: " << obtained;
419*6467f958SSadaf Ebrahimi return oss.str();
420*6467f958SSadaf Ebrahimi }
421*6467f958SSadaf Ebrahimi
int64_ok(cl_device_id device)422*6467f958SSadaf Ebrahimi static bool int64_ok(cl_device_id device)
423*6467f958SSadaf Ebrahimi {
424*6467f958SSadaf Ebrahimi char profile[128];
425*6467f958SSadaf Ebrahimi int error;
426*6467f958SSadaf Ebrahimi
427*6467f958SSadaf Ebrahimi error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile),
428*6467f958SSadaf Ebrahimi (void *)&profile, NULL);
429*6467f958SSadaf Ebrahimi if (error)
430*6467f958SSadaf Ebrahimi {
431*6467f958SSadaf Ebrahimi log_info("clGetDeviceInfo failed with CL_DEVICE_PROFILE\n");
432*6467f958SSadaf Ebrahimi return false;
433*6467f958SSadaf Ebrahimi }
434*6467f958SSadaf Ebrahimi
435*6467f958SSadaf Ebrahimi if (strcmp(profile, "EMBEDDED_PROFILE") == 0)
436*6467f958SSadaf Ebrahimi return is_extension_available(device, "cles_khr_int64");
437*6467f958SSadaf Ebrahimi
438*6467f958SSadaf Ebrahimi return true;
439*6467f958SSadaf Ebrahimi }
440*6467f958SSadaf Ebrahimi
double_ok(cl_device_id device)441*6467f958SSadaf Ebrahimi static bool double_ok(cl_device_id device)
442*6467f958SSadaf Ebrahimi {
443*6467f958SSadaf Ebrahimi int error;
444*6467f958SSadaf Ebrahimi cl_device_fp_config c;
445*6467f958SSadaf Ebrahimi error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(c),
446*6467f958SSadaf Ebrahimi (void *)&c, NULL);
447*6467f958SSadaf Ebrahimi if (error)
448*6467f958SSadaf Ebrahimi {
449*6467f958SSadaf Ebrahimi log_info("clGetDeviceInfo failed with CL_DEVICE_DOUBLE_FP_CONFIG\n");
450*6467f958SSadaf Ebrahimi return false;
451*6467f958SSadaf Ebrahimi }
452*6467f958SSadaf Ebrahimi return c != 0;
453*6467f958SSadaf Ebrahimi }
454*6467f958SSadaf Ebrahimi
half_ok(cl_device_id device)455*6467f958SSadaf Ebrahimi static bool half_ok(cl_device_id device)
456*6467f958SSadaf Ebrahimi {
457*6467f958SSadaf Ebrahimi int error;
458*6467f958SSadaf Ebrahimi cl_device_fp_config c;
459*6467f958SSadaf Ebrahimi error = clGetDeviceInfo(device, CL_DEVICE_HALF_FP_CONFIG, sizeof(c),
460*6467f958SSadaf Ebrahimi (void *)&c, NULL);
461*6467f958SSadaf Ebrahimi if (error)
462*6467f958SSadaf Ebrahimi {
463*6467f958SSadaf Ebrahimi log_info("clGetDeviceInfo failed with CL_DEVICE_HALF_FP_CONFIG\n");
464*6467f958SSadaf Ebrahimi return false;
465*6467f958SSadaf Ebrahimi }
466*6467f958SSadaf Ebrahimi return c != 0;
467*6467f958SSadaf Ebrahimi }
468*6467f958SSadaf Ebrahimi
469*6467f958SSadaf Ebrahimi template <typename Ty> struct CommonTypeManager
470*6467f958SSadaf Ebrahimi {
471*6467f958SSadaf Ebrahimi
nameCommonTypeManager472*6467f958SSadaf Ebrahimi static const char *name() { return ""; }
add_typedefCommonTypeManager473*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "\n"; }
474*6467f958SSadaf Ebrahimi typedef std::false_type is_vector_type;
475*6467f958SSadaf Ebrahimi typedef std::false_type is_sb_vector_size3;
476*6467f958SSadaf Ebrahimi typedef std::false_type is_sb_vector_type;
477*6467f958SSadaf Ebrahimi typedef std::false_type is_sb_scalar_type;
type_supportedCommonTypeManager478*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id) { return true; }
identify_limitsCommonTypeManager479*6467f958SSadaf Ebrahimi static const Ty identify_limits(ArithmeticOp operation)
480*6467f958SSadaf Ebrahimi {
481*6467f958SSadaf Ebrahimi switch (operation)
482*6467f958SSadaf Ebrahimi {
483*6467f958SSadaf Ebrahimi case ArithmeticOp::add_: return (Ty)0;
484*6467f958SSadaf Ebrahimi case ArithmeticOp::max_: return (std::numeric_limits<Ty>::min)();
485*6467f958SSadaf Ebrahimi case ArithmeticOp::min_: return (std::numeric_limits<Ty>::max)();
486*6467f958SSadaf Ebrahimi case ArithmeticOp::mul_: return (Ty)1;
487*6467f958SSadaf Ebrahimi case ArithmeticOp::and_: return (Ty)~0;
488*6467f958SSadaf Ebrahimi case ArithmeticOp::or_: return (Ty)0;
489*6467f958SSadaf Ebrahimi case ArithmeticOp::xor_: return (Ty)0;
490*6467f958SSadaf Ebrahimi default: log_error("Unknown operation request\n"); break;
491*6467f958SSadaf Ebrahimi }
492*6467f958SSadaf Ebrahimi return 0;
493*6467f958SSadaf Ebrahimi }
494*6467f958SSadaf Ebrahimi };
495*6467f958SSadaf Ebrahimi
496*6467f958SSadaf Ebrahimi template <typename> struct TypeManager;
497*6467f958SSadaf Ebrahimi
498*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_int> : public CommonTypeManager<cl_int>
499*6467f958SSadaf Ebrahimi {
500*6467f958SSadaf Ebrahimi static const char *name() { return "int"; }
501*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef int Type;\n"; }
502*6467f958SSadaf Ebrahimi static cl_int identify_limits(ArithmeticOp operation)
503*6467f958SSadaf Ebrahimi {
504*6467f958SSadaf Ebrahimi switch (operation)
505*6467f958SSadaf Ebrahimi {
506*6467f958SSadaf Ebrahimi case ArithmeticOp::add_: return (cl_int)0;
507*6467f958SSadaf Ebrahimi case ArithmeticOp::max_:
508*6467f958SSadaf Ebrahimi return (std::numeric_limits<cl_int>::min)();
509*6467f958SSadaf Ebrahimi case ArithmeticOp::min_:
510*6467f958SSadaf Ebrahimi return (std::numeric_limits<cl_int>::max)();
511*6467f958SSadaf Ebrahimi case ArithmeticOp::mul_: return (cl_int)1;
512*6467f958SSadaf Ebrahimi case ArithmeticOp::and_: return (cl_int)~0;
513*6467f958SSadaf Ebrahimi case ArithmeticOp::or_: return (cl_int)0;
514*6467f958SSadaf Ebrahimi case ArithmeticOp::xor_: return (cl_int)0;
515*6467f958SSadaf Ebrahimi case ArithmeticOp::logical_and: return (cl_int)1;
516*6467f958SSadaf Ebrahimi case ArithmeticOp::logical_or: return (cl_int)0;
517*6467f958SSadaf Ebrahimi case ArithmeticOp::logical_xor: return (cl_int)0;
518*6467f958SSadaf Ebrahimi default: log_error("Unknown operation request\n"); break;
519*6467f958SSadaf Ebrahimi }
520*6467f958SSadaf Ebrahimi return 0;
521*6467f958SSadaf Ebrahimi }
522*6467f958SSadaf Ebrahimi };
523*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_int2> : public CommonTypeManager<cl_int2>
524*6467f958SSadaf Ebrahimi {
525*6467f958SSadaf Ebrahimi static const char *name() { return "int2"; }
526*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef int2 Type;\n"; }
527*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
528*6467f958SSadaf Ebrahimi using scalar_type = cl_int;
529*6467f958SSadaf Ebrahimi };
530*6467f958SSadaf Ebrahimi template <>
531*6467f958SSadaf Ebrahimi struct TypeManager<subgroups::cl_int3>
532*6467f958SSadaf Ebrahimi : public CommonTypeManager<subgroups::cl_int3>
533*6467f958SSadaf Ebrahimi {
534*6467f958SSadaf Ebrahimi static const char *name() { return "int3"; }
535*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef int3 Type;\n"; }
536*6467f958SSadaf Ebrahimi typedef std::true_type is_sb_vector_size3;
537*6467f958SSadaf Ebrahimi using scalar_type = cl_int;
538*6467f958SSadaf Ebrahimi };
539*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_int4> : public CommonTypeManager<cl_int4>
540*6467f958SSadaf Ebrahimi {
541*6467f958SSadaf Ebrahimi static const char *name() { return "int4"; }
542*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef int4 Type;\n"; }
543*6467f958SSadaf Ebrahimi using scalar_type = cl_int;
544*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
545*6467f958SSadaf Ebrahimi };
546*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_int8> : public CommonTypeManager<cl_int8>
547*6467f958SSadaf Ebrahimi {
548*6467f958SSadaf Ebrahimi static const char *name() { return "int8"; }
549*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef int8 Type;\n"; }
550*6467f958SSadaf Ebrahimi using scalar_type = cl_int;
551*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
552*6467f958SSadaf Ebrahimi };
553*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_int16> : public CommonTypeManager<cl_int16>
554*6467f958SSadaf Ebrahimi {
555*6467f958SSadaf Ebrahimi static const char *name() { return "int16"; }
556*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef int16 Type;\n"; }
557*6467f958SSadaf Ebrahimi using scalar_type = cl_int;
558*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
559*6467f958SSadaf Ebrahimi };
560*6467f958SSadaf Ebrahimi // cl_uint
561*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_uint> : public CommonTypeManager<cl_uint>
562*6467f958SSadaf Ebrahimi {
563*6467f958SSadaf Ebrahimi static const char *name() { return "uint"; }
564*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef uint Type;\n"; }
565*6467f958SSadaf Ebrahimi };
566*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_uint2> : public CommonTypeManager<cl_uint2>
567*6467f958SSadaf Ebrahimi {
568*6467f958SSadaf Ebrahimi static const char *name() { return "uint2"; }
569*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef uint2 Type;\n"; }
570*6467f958SSadaf Ebrahimi using scalar_type = cl_uint;
571*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
572*6467f958SSadaf Ebrahimi };
573*6467f958SSadaf Ebrahimi template <>
574*6467f958SSadaf Ebrahimi struct TypeManager<subgroups::cl_uint3>
575*6467f958SSadaf Ebrahimi : public CommonTypeManager<subgroups::cl_uint3>
576*6467f958SSadaf Ebrahimi {
577*6467f958SSadaf Ebrahimi static const char *name() { return "uint3"; }
578*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef uint3 Type;\n"; }
579*6467f958SSadaf Ebrahimi typedef std::true_type is_sb_vector_size3;
580*6467f958SSadaf Ebrahimi using scalar_type = cl_uint;
581*6467f958SSadaf Ebrahimi };
582*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_uint4> : public CommonTypeManager<cl_uint4>
583*6467f958SSadaf Ebrahimi {
584*6467f958SSadaf Ebrahimi static const char *name() { return "uint4"; }
585*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef uint4 Type;\n"; }
586*6467f958SSadaf Ebrahimi using scalar_type = cl_uint;
587*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
588*6467f958SSadaf Ebrahimi };
589*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_uint8> : public CommonTypeManager<cl_uint8>
590*6467f958SSadaf Ebrahimi {
591*6467f958SSadaf Ebrahimi static const char *name() { return "uint8"; }
592*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef uint8 Type;\n"; }
593*6467f958SSadaf Ebrahimi using scalar_type = cl_uint;
594*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
595*6467f958SSadaf Ebrahimi };
596*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_uint16> : public CommonTypeManager<cl_uint16>
597*6467f958SSadaf Ebrahimi {
598*6467f958SSadaf Ebrahimi static const char *name() { return "uint16"; }
599*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef uint16 Type;\n"; }
600*6467f958SSadaf Ebrahimi using scalar_type = cl_uint;
601*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
602*6467f958SSadaf Ebrahimi };
603*6467f958SSadaf Ebrahimi // cl_short
604*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_short> : public CommonTypeManager<cl_short>
605*6467f958SSadaf Ebrahimi {
606*6467f958SSadaf Ebrahimi static const char *name() { return "short"; }
607*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef short Type;\n"; }
608*6467f958SSadaf Ebrahimi };
609*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_short2> : public CommonTypeManager<cl_short2>
610*6467f958SSadaf Ebrahimi {
611*6467f958SSadaf Ebrahimi static const char *name() { return "short2"; }
612*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef short2 Type;\n"; }
613*6467f958SSadaf Ebrahimi using scalar_type = cl_short;
614*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
615*6467f958SSadaf Ebrahimi };
616*6467f958SSadaf Ebrahimi template <>
617*6467f958SSadaf Ebrahimi struct TypeManager<subgroups::cl_short3>
618*6467f958SSadaf Ebrahimi : public CommonTypeManager<subgroups::cl_short3>
619*6467f958SSadaf Ebrahimi {
620*6467f958SSadaf Ebrahimi static const char *name() { return "short3"; }
621*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef short3 Type;\n"; }
622*6467f958SSadaf Ebrahimi typedef std::true_type is_sb_vector_size3;
623*6467f958SSadaf Ebrahimi using scalar_type = cl_short;
624*6467f958SSadaf Ebrahimi };
625*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_short4> : public CommonTypeManager<cl_short4>
626*6467f958SSadaf Ebrahimi {
627*6467f958SSadaf Ebrahimi static const char *name() { return "short4"; }
628*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef short4 Type;\n"; }
629*6467f958SSadaf Ebrahimi using scalar_type = cl_short;
630*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
631*6467f958SSadaf Ebrahimi };
632*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_short8> : public CommonTypeManager<cl_short8>
633*6467f958SSadaf Ebrahimi {
634*6467f958SSadaf Ebrahimi static const char *name() { return "short8"; }
635*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef short8 Type;\n"; }
636*6467f958SSadaf Ebrahimi using scalar_type = cl_short;
637*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
638*6467f958SSadaf Ebrahimi };
639*6467f958SSadaf Ebrahimi template <>
640*6467f958SSadaf Ebrahimi struct TypeManager<cl_short16> : public CommonTypeManager<cl_short16>
641*6467f958SSadaf Ebrahimi {
642*6467f958SSadaf Ebrahimi static const char *name() { return "short16"; }
643*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef short16 Type;\n"; }
644*6467f958SSadaf Ebrahimi using scalar_type = cl_short;
645*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
646*6467f958SSadaf Ebrahimi };
647*6467f958SSadaf Ebrahimi // cl_ushort
648*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_ushort> : public CommonTypeManager<cl_ushort>
649*6467f958SSadaf Ebrahimi {
650*6467f958SSadaf Ebrahimi static const char *name() { return "ushort"; }
651*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef ushort Type;\n"; }
652*6467f958SSadaf Ebrahimi };
653*6467f958SSadaf Ebrahimi template <>
654*6467f958SSadaf Ebrahimi struct TypeManager<cl_ushort2> : public CommonTypeManager<cl_ushort2>
655*6467f958SSadaf Ebrahimi {
656*6467f958SSadaf Ebrahimi static const char *name() { return "ushort2"; }
657*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef ushort2 Type;\n"; }
658*6467f958SSadaf Ebrahimi using scalar_type = cl_ushort;
659*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
660*6467f958SSadaf Ebrahimi };
661*6467f958SSadaf Ebrahimi template <>
662*6467f958SSadaf Ebrahimi struct TypeManager<subgroups::cl_ushort3>
663*6467f958SSadaf Ebrahimi : public CommonTypeManager<subgroups::cl_ushort3>
664*6467f958SSadaf Ebrahimi {
665*6467f958SSadaf Ebrahimi static const char *name() { return "ushort3"; }
666*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef ushort3 Type;\n"; }
667*6467f958SSadaf Ebrahimi typedef std::true_type is_sb_vector_size3;
668*6467f958SSadaf Ebrahimi using scalar_type = cl_ushort;
669*6467f958SSadaf Ebrahimi };
670*6467f958SSadaf Ebrahimi template <>
671*6467f958SSadaf Ebrahimi struct TypeManager<cl_ushort4> : public CommonTypeManager<cl_ushort4>
672*6467f958SSadaf Ebrahimi {
673*6467f958SSadaf Ebrahimi static const char *name() { return "ushort4"; }
674*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef ushort4 Type;\n"; }
675*6467f958SSadaf Ebrahimi using scalar_type = cl_ushort;
676*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
677*6467f958SSadaf Ebrahimi };
678*6467f958SSadaf Ebrahimi template <>
679*6467f958SSadaf Ebrahimi struct TypeManager<cl_ushort8> : public CommonTypeManager<cl_ushort8>
680*6467f958SSadaf Ebrahimi {
681*6467f958SSadaf Ebrahimi static const char *name() { return "ushort8"; }
682*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef ushort8 Type;\n"; }
683*6467f958SSadaf Ebrahimi using scalar_type = cl_ushort;
684*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
685*6467f958SSadaf Ebrahimi };
686*6467f958SSadaf Ebrahimi template <>
687*6467f958SSadaf Ebrahimi struct TypeManager<cl_ushort16> : public CommonTypeManager<cl_ushort16>
688*6467f958SSadaf Ebrahimi {
689*6467f958SSadaf Ebrahimi static const char *name() { return "ushort16"; }
690*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef ushort16 Type;\n"; }
691*6467f958SSadaf Ebrahimi using scalar_type = cl_ushort;
692*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
693*6467f958SSadaf Ebrahimi };
694*6467f958SSadaf Ebrahimi // cl_char
695*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_char> : public CommonTypeManager<cl_char>
696*6467f958SSadaf Ebrahimi {
697*6467f958SSadaf Ebrahimi static const char *name() { return "char"; }
698*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef char Type;\n"; }
699*6467f958SSadaf Ebrahimi };
700*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_char2> : public CommonTypeManager<cl_char2>
701*6467f958SSadaf Ebrahimi {
702*6467f958SSadaf Ebrahimi static const char *name() { return "char2"; }
703*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef char2 Type;\n"; }
704*6467f958SSadaf Ebrahimi using scalar_type = cl_char;
705*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
706*6467f958SSadaf Ebrahimi };
707*6467f958SSadaf Ebrahimi template <>
708*6467f958SSadaf Ebrahimi struct TypeManager<subgroups::cl_char3>
709*6467f958SSadaf Ebrahimi : public CommonTypeManager<subgroups::cl_char3>
710*6467f958SSadaf Ebrahimi {
711*6467f958SSadaf Ebrahimi static const char *name() { return "char3"; }
712*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef char3 Type;\n"; }
713*6467f958SSadaf Ebrahimi typedef std::true_type is_sb_vector_size3;
714*6467f958SSadaf Ebrahimi using scalar_type = cl_char;
715*6467f958SSadaf Ebrahimi };
716*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_char4> : public CommonTypeManager<cl_char4>
717*6467f958SSadaf Ebrahimi {
718*6467f958SSadaf Ebrahimi static const char *name() { return "char4"; }
719*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef char4 Type;\n"; }
720*6467f958SSadaf Ebrahimi using scalar_type = cl_char;
721*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
722*6467f958SSadaf Ebrahimi };
723*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_char8> : public CommonTypeManager<cl_char8>
724*6467f958SSadaf Ebrahimi {
725*6467f958SSadaf Ebrahimi static const char *name() { return "char8"; }
726*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef char8 Type;\n"; }
727*6467f958SSadaf Ebrahimi using scalar_type = cl_char;
728*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
729*6467f958SSadaf Ebrahimi };
730*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_char16> : public CommonTypeManager<cl_char16>
731*6467f958SSadaf Ebrahimi {
732*6467f958SSadaf Ebrahimi static const char *name() { return "char16"; }
733*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef char16 Type;\n"; }
734*6467f958SSadaf Ebrahimi using scalar_type = cl_char;
735*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
736*6467f958SSadaf Ebrahimi };
737*6467f958SSadaf Ebrahimi // cl_uchar
738*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_uchar> : public CommonTypeManager<cl_uchar>
739*6467f958SSadaf Ebrahimi {
740*6467f958SSadaf Ebrahimi static const char *name() { return "uchar"; }
741*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef uchar Type;\n"; }
742*6467f958SSadaf Ebrahimi };
743*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_uchar2> : public CommonTypeManager<cl_uchar2>
744*6467f958SSadaf Ebrahimi {
745*6467f958SSadaf Ebrahimi static const char *name() { return "uchar2"; }
746*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef uchar2 Type;\n"; }
747*6467f958SSadaf Ebrahimi using scalar_type = cl_uchar;
748*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
749*6467f958SSadaf Ebrahimi };
750*6467f958SSadaf Ebrahimi template <>
751*6467f958SSadaf Ebrahimi struct TypeManager<subgroups::cl_uchar3>
752*6467f958SSadaf Ebrahimi : public CommonTypeManager<subgroups::cl_char3>
753*6467f958SSadaf Ebrahimi {
754*6467f958SSadaf Ebrahimi static const char *name() { return "uchar3"; }
755*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef uchar3 Type;\n"; }
756*6467f958SSadaf Ebrahimi typedef std::true_type is_sb_vector_size3;
757*6467f958SSadaf Ebrahimi using scalar_type = cl_uchar;
758*6467f958SSadaf Ebrahimi };
759*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_uchar4> : public CommonTypeManager<cl_uchar4>
760*6467f958SSadaf Ebrahimi {
761*6467f958SSadaf Ebrahimi static const char *name() { return "uchar4"; }
762*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef uchar4 Type;\n"; }
763*6467f958SSadaf Ebrahimi using scalar_type = cl_uchar;
764*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
765*6467f958SSadaf Ebrahimi };
766*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_uchar8> : public CommonTypeManager<cl_uchar8>
767*6467f958SSadaf Ebrahimi {
768*6467f958SSadaf Ebrahimi static const char *name() { return "uchar8"; }
769*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef uchar8 Type;\n"; }
770*6467f958SSadaf Ebrahimi using scalar_type = cl_uchar;
771*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
772*6467f958SSadaf Ebrahimi };
773*6467f958SSadaf Ebrahimi template <>
774*6467f958SSadaf Ebrahimi struct TypeManager<cl_uchar16> : public CommonTypeManager<cl_uchar16>
775*6467f958SSadaf Ebrahimi {
776*6467f958SSadaf Ebrahimi static const char *name() { return "uchar16"; }
777*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef uchar16 Type;\n"; }
778*6467f958SSadaf Ebrahimi using scalar_type = cl_uchar;
779*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
780*6467f958SSadaf Ebrahimi };
781*6467f958SSadaf Ebrahimi // cl_long
782*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_long> : public CommonTypeManager<cl_long>
783*6467f958SSadaf Ebrahimi {
784*6467f958SSadaf Ebrahimi static const char *name() { return "long"; }
785*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef long Type;\n"; }
786*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id device)
787*6467f958SSadaf Ebrahimi {
788*6467f958SSadaf Ebrahimi return int64_ok(device);
789*6467f958SSadaf Ebrahimi }
790*6467f958SSadaf Ebrahimi };
791*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_long2> : public CommonTypeManager<cl_long2>
792*6467f958SSadaf Ebrahimi {
793*6467f958SSadaf Ebrahimi static const char *name() { return "long2"; }
794*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef long2 Type;\n"; }
795*6467f958SSadaf Ebrahimi using scalar_type = cl_long;
796*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
797*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id device)
798*6467f958SSadaf Ebrahimi {
799*6467f958SSadaf Ebrahimi return int64_ok(device);
800*6467f958SSadaf Ebrahimi }
801*6467f958SSadaf Ebrahimi };
802*6467f958SSadaf Ebrahimi template <>
803*6467f958SSadaf Ebrahimi struct TypeManager<subgroups::cl_long3>
804*6467f958SSadaf Ebrahimi : public CommonTypeManager<subgroups::cl_long3>
805*6467f958SSadaf Ebrahimi {
806*6467f958SSadaf Ebrahimi static const char *name() { return "long3"; }
807*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef long3 Type;\n"; }
808*6467f958SSadaf Ebrahimi typedef std::true_type is_sb_vector_size3;
809*6467f958SSadaf Ebrahimi using scalar_type = cl_long;
810*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id device)
811*6467f958SSadaf Ebrahimi {
812*6467f958SSadaf Ebrahimi return int64_ok(device);
813*6467f958SSadaf Ebrahimi }
814*6467f958SSadaf Ebrahimi };
815*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_long4> : public CommonTypeManager<cl_long4>
816*6467f958SSadaf Ebrahimi {
817*6467f958SSadaf Ebrahimi static const char *name() { return "long4"; }
818*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef long4 Type;\n"; }
819*6467f958SSadaf Ebrahimi using scalar_type = cl_long;
820*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
821*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id device)
822*6467f958SSadaf Ebrahimi {
823*6467f958SSadaf Ebrahimi return int64_ok(device);
824*6467f958SSadaf Ebrahimi }
825*6467f958SSadaf Ebrahimi };
826*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_long8> : public CommonTypeManager<cl_long8>
827*6467f958SSadaf Ebrahimi {
828*6467f958SSadaf Ebrahimi static const char *name() { return "long8"; }
829*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef long8 Type;\n"; }
830*6467f958SSadaf Ebrahimi using scalar_type = cl_long;
831*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
832*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id device)
833*6467f958SSadaf Ebrahimi {
834*6467f958SSadaf Ebrahimi return int64_ok(device);
835*6467f958SSadaf Ebrahimi }
836*6467f958SSadaf Ebrahimi };
837*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_long16> : public CommonTypeManager<cl_long16>
838*6467f958SSadaf Ebrahimi {
839*6467f958SSadaf Ebrahimi static const char *name() { return "long16"; }
840*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef long16 Type;\n"; }
841*6467f958SSadaf Ebrahimi using scalar_type = cl_long;
842*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
843*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id device)
844*6467f958SSadaf Ebrahimi {
845*6467f958SSadaf Ebrahimi return int64_ok(device);
846*6467f958SSadaf Ebrahimi }
847*6467f958SSadaf Ebrahimi };
848*6467f958SSadaf Ebrahimi // cl_ulong
849*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_ulong> : public CommonTypeManager<cl_ulong>
850*6467f958SSadaf Ebrahimi {
851*6467f958SSadaf Ebrahimi static const char *name() { return "ulong"; }
852*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef ulong Type;\n"; }
853*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id device)
854*6467f958SSadaf Ebrahimi {
855*6467f958SSadaf Ebrahimi return int64_ok(device);
856*6467f958SSadaf Ebrahimi }
857*6467f958SSadaf Ebrahimi };
858*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_ulong2> : public CommonTypeManager<cl_ulong2>
859*6467f958SSadaf Ebrahimi {
860*6467f958SSadaf Ebrahimi static const char *name() { return "ulong2"; }
861*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef ulong2 Type;\n"; }
862*6467f958SSadaf Ebrahimi using scalar_type = cl_ulong;
863*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
864*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id device)
865*6467f958SSadaf Ebrahimi {
866*6467f958SSadaf Ebrahimi return int64_ok(device);
867*6467f958SSadaf Ebrahimi }
868*6467f958SSadaf Ebrahimi };
869*6467f958SSadaf Ebrahimi template <>
870*6467f958SSadaf Ebrahimi struct TypeManager<subgroups::cl_ulong3>
871*6467f958SSadaf Ebrahimi : public CommonTypeManager<subgroups::cl_ulong3>
872*6467f958SSadaf Ebrahimi {
873*6467f958SSadaf Ebrahimi static const char *name() { return "ulong3"; }
874*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef ulong3 Type;\n"; }
875*6467f958SSadaf Ebrahimi typedef std::true_type is_sb_vector_size3;
876*6467f958SSadaf Ebrahimi using scalar_type = cl_ulong;
877*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id device)
878*6467f958SSadaf Ebrahimi {
879*6467f958SSadaf Ebrahimi return int64_ok(device);
880*6467f958SSadaf Ebrahimi }
881*6467f958SSadaf Ebrahimi };
882*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_ulong4> : public CommonTypeManager<cl_ulong4>
883*6467f958SSadaf Ebrahimi {
884*6467f958SSadaf Ebrahimi static const char *name() { return "ulong4"; }
885*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef ulong4 Type;\n"; }
886*6467f958SSadaf Ebrahimi using scalar_type = cl_ulong;
887*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
888*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id device)
889*6467f958SSadaf Ebrahimi {
890*6467f958SSadaf Ebrahimi return int64_ok(device);
891*6467f958SSadaf Ebrahimi }
892*6467f958SSadaf Ebrahimi };
893*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_ulong8> : public CommonTypeManager<cl_ulong8>
894*6467f958SSadaf Ebrahimi {
895*6467f958SSadaf Ebrahimi static const char *name() { return "ulong8"; }
896*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef ulong8 Type;\n"; }
897*6467f958SSadaf Ebrahimi using scalar_type = cl_ulong;
898*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
899*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id device)
900*6467f958SSadaf Ebrahimi {
901*6467f958SSadaf Ebrahimi return int64_ok(device);
902*6467f958SSadaf Ebrahimi }
903*6467f958SSadaf Ebrahimi };
904*6467f958SSadaf Ebrahimi template <>
905*6467f958SSadaf Ebrahimi struct TypeManager<cl_ulong16> : public CommonTypeManager<cl_ulong16>
906*6467f958SSadaf Ebrahimi {
907*6467f958SSadaf Ebrahimi static const char *name() { return "ulong16"; }
908*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef ulong16 Type;\n"; }
909*6467f958SSadaf Ebrahimi using scalar_type = cl_ulong;
910*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
911*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id device)
912*6467f958SSadaf Ebrahimi {
913*6467f958SSadaf Ebrahimi return int64_ok(device);
914*6467f958SSadaf Ebrahimi }
915*6467f958SSadaf Ebrahimi };
916*6467f958SSadaf Ebrahimi
917*6467f958SSadaf Ebrahimi // cl_float
918*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_float> : public CommonTypeManager<cl_float>
919*6467f958SSadaf Ebrahimi {
920*6467f958SSadaf Ebrahimi static const char *name() { return "float"; }
921*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef float Type;\n"; }
922*6467f958SSadaf Ebrahimi static cl_float identify_limits(ArithmeticOp operation)
923*6467f958SSadaf Ebrahimi {
924*6467f958SSadaf Ebrahimi switch (operation)
925*6467f958SSadaf Ebrahimi {
926*6467f958SSadaf Ebrahimi case ArithmeticOp::add_: return 0.0f;
927*6467f958SSadaf Ebrahimi case ArithmeticOp::max_:
928*6467f958SSadaf Ebrahimi return -std::numeric_limits<float>::infinity();
929*6467f958SSadaf Ebrahimi case ArithmeticOp::min_:
930*6467f958SSadaf Ebrahimi return std::numeric_limits<float>::infinity();
931*6467f958SSadaf Ebrahimi case ArithmeticOp::mul_: return (cl_float)1;
932*6467f958SSadaf Ebrahimi default: log_error("Unknown operation request\n"); break;
933*6467f958SSadaf Ebrahimi }
934*6467f958SSadaf Ebrahimi return 0;
935*6467f958SSadaf Ebrahimi }
936*6467f958SSadaf Ebrahimi };
937*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_float2> : public CommonTypeManager<cl_float2>
938*6467f958SSadaf Ebrahimi {
939*6467f958SSadaf Ebrahimi static const char *name() { return "float2"; }
940*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef float2 Type;\n"; }
941*6467f958SSadaf Ebrahimi using scalar_type = cl_float;
942*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
943*6467f958SSadaf Ebrahimi };
944*6467f958SSadaf Ebrahimi template <>
945*6467f958SSadaf Ebrahimi struct TypeManager<subgroups::cl_float3>
946*6467f958SSadaf Ebrahimi : public CommonTypeManager<subgroups::cl_float3>
947*6467f958SSadaf Ebrahimi {
948*6467f958SSadaf Ebrahimi static const char *name() { return "float3"; }
949*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef float3 Type;\n"; }
950*6467f958SSadaf Ebrahimi typedef std::true_type is_sb_vector_size3;
951*6467f958SSadaf Ebrahimi using scalar_type = cl_float;
952*6467f958SSadaf Ebrahimi };
953*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_float4> : public CommonTypeManager<cl_float4>
954*6467f958SSadaf Ebrahimi {
955*6467f958SSadaf Ebrahimi static const char *name() { return "float4"; }
956*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef float4 Type;\n"; }
957*6467f958SSadaf Ebrahimi using scalar_type = cl_float;
958*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
959*6467f958SSadaf Ebrahimi };
960*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_float8> : public CommonTypeManager<cl_float8>
961*6467f958SSadaf Ebrahimi {
962*6467f958SSadaf Ebrahimi static const char *name() { return "float8"; }
963*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef float8 Type;\n"; }
964*6467f958SSadaf Ebrahimi using scalar_type = cl_float;
965*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
966*6467f958SSadaf Ebrahimi };
967*6467f958SSadaf Ebrahimi template <>
968*6467f958SSadaf Ebrahimi struct TypeManager<cl_float16> : public CommonTypeManager<cl_float16>
969*6467f958SSadaf Ebrahimi {
970*6467f958SSadaf Ebrahimi static const char *name() { return "float16"; }
971*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef float16 Type;\n"; }
972*6467f958SSadaf Ebrahimi using scalar_type = cl_float;
973*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
974*6467f958SSadaf Ebrahimi };
975*6467f958SSadaf Ebrahimi
976*6467f958SSadaf Ebrahimi // cl_double
977*6467f958SSadaf Ebrahimi template <> struct TypeManager<cl_double> : public CommonTypeManager<cl_double>
978*6467f958SSadaf Ebrahimi {
979*6467f958SSadaf Ebrahimi static const char *name() { return "double"; }
980*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef double Type;\n"; }
981*6467f958SSadaf Ebrahimi static cl_double identify_limits(ArithmeticOp operation)
982*6467f958SSadaf Ebrahimi {
983*6467f958SSadaf Ebrahimi switch (operation)
984*6467f958SSadaf Ebrahimi {
985*6467f958SSadaf Ebrahimi case ArithmeticOp::add_: return 0.0;
986*6467f958SSadaf Ebrahimi case ArithmeticOp::max_:
987*6467f958SSadaf Ebrahimi return -std::numeric_limits<double>::infinity();
988*6467f958SSadaf Ebrahimi case ArithmeticOp::min_:
989*6467f958SSadaf Ebrahimi return std::numeric_limits<double>::infinity();
990*6467f958SSadaf Ebrahimi case ArithmeticOp::mul_: return (cl_double)1;
991*6467f958SSadaf Ebrahimi default: log_error("Unknown operation request\n"); break;
992*6467f958SSadaf Ebrahimi }
993*6467f958SSadaf Ebrahimi return 0;
994*6467f958SSadaf Ebrahimi }
995*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id device)
996*6467f958SSadaf Ebrahimi {
997*6467f958SSadaf Ebrahimi return double_ok(device);
998*6467f958SSadaf Ebrahimi }
999*6467f958SSadaf Ebrahimi };
1000*6467f958SSadaf Ebrahimi template <>
1001*6467f958SSadaf Ebrahimi struct TypeManager<cl_double2> : public CommonTypeManager<cl_double2>
1002*6467f958SSadaf Ebrahimi {
1003*6467f958SSadaf Ebrahimi static const char *name() { return "double2"; }
1004*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef double2 Type;\n"; }
1005*6467f958SSadaf Ebrahimi using scalar_type = cl_double;
1006*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
1007*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id device)
1008*6467f958SSadaf Ebrahimi {
1009*6467f958SSadaf Ebrahimi return double_ok(device);
1010*6467f958SSadaf Ebrahimi }
1011*6467f958SSadaf Ebrahimi };
1012*6467f958SSadaf Ebrahimi template <>
1013*6467f958SSadaf Ebrahimi struct TypeManager<subgroups::cl_double3>
1014*6467f958SSadaf Ebrahimi : public CommonTypeManager<subgroups::cl_double3>
1015*6467f958SSadaf Ebrahimi {
1016*6467f958SSadaf Ebrahimi static const char *name() { return "double3"; }
1017*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef double3 Type;\n"; }
1018*6467f958SSadaf Ebrahimi typedef std::true_type is_sb_vector_size3;
1019*6467f958SSadaf Ebrahimi using scalar_type = cl_double;
1020*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id device)
1021*6467f958SSadaf Ebrahimi {
1022*6467f958SSadaf Ebrahimi return double_ok(device);
1023*6467f958SSadaf Ebrahimi }
1024*6467f958SSadaf Ebrahimi };
1025*6467f958SSadaf Ebrahimi template <>
1026*6467f958SSadaf Ebrahimi struct TypeManager<cl_double4> : public CommonTypeManager<cl_double4>
1027*6467f958SSadaf Ebrahimi {
1028*6467f958SSadaf Ebrahimi static const char *name() { return "double4"; }
1029*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef double4 Type;\n"; }
1030*6467f958SSadaf Ebrahimi using scalar_type = cl_double;
1031*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
1032*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id device)
1033*6467f958SSadaf Ebrahimi {
1034*6467f958SSadaf Ebrahimi return double_ok(device);
1035*6467f958SSadaf Ebrahimi }
1036*6467f958SSadaf Ebrahimi };
1037*6467f958SSadaf Ebrahimi template <>
1038*6467f958SSadaf Ebrahimi struct TypeManager<cl_double8> : public CommonTypeManager<cl_double8>
1039*6467f958SSadaf Ebrahimi {
1040*6467f958SSadaf Ebrahimi static const char *name() { return "double8"; }
1041*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef double8 Type;\n"; }
1042*6467f958SSadaf Ebrahimi using scalar_type = cl_double;
1043*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
1044*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id device)
1045*6467f958SSadaf Ebrahimi {
1046*6467f958SSadaf Ebrahimi return double_ok(device);
1047*6467f958SSadaf Ebrahimi }
1048*6467f958SSadaf Ebrahimi };
1049*6467f958SSadaf Ebrahimi template <>
1050*6467f958SSadaf Ebrahimi struct TypeManager<cl_double16> : public CommonTypeManager<cl_double16>
1051*6467f958SSadaf Ebrahimi {
1052*6467f958SSadaf Ebrahimi static const char *name() { return "double16"; }
1053*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef double16 Type;\n"; }
1054*6467f958SSadaf Ebrahimi using scalar_type = cl_double;
1055*6467f958SSadaf Ebrahimi typedef std::true_type is_vector_type;
1056*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id device)
1057*6467f958SSadaf Ebrahimi {
1058*6467f958SSadaf Ebrahimi return double_ok(device);
1059*6467f958SSadaf Ebrahimi }
1060*6467f958SSadaf Ebrahimi };
1061*6467f958SSadaf Ebrahimi
1062*6467f958SSadaf Ebrahimi // cl_half
1063*6467f958SSadaf Ebrahimi template <>
1064*6467f958SSadaf Ebrahimi struct TypeManager<subgroups::cl_half>
1065*6467f958SSadaf Ebrahimi : public CommonTypeManager<subgroups::cl_half>
1066*6467f958SSadaf Ebrahimi {
1067*6467f958SSadaf Ebrahimi static const char *name() { return "half"; }
1068*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef half Type;\n"; }
1069*6467f958SSadaf Ebrahimi typedef std::true_type is_sb_scalar_type;
1070*6467f958SSadaf Ebrahimi static subgroups::cl_half identify_limits(ArithmeticOp operation)
1071*6467f958SSadaf Ebrahimi {
1072*6467f958SSadaf Ebrahimi switch (operation)
1073*6467f958SSadaf Ebrahimi {
1074*6467f958SSadaf Ebrahimi case ArithmeticOp::add_: return { 0x0000 };
1075*6467f958SSadaf Ebrahimi case ArithmeticOp::max_: return { 0xfc00 };
1076*6467f958SSadaf Ebrahimi case ArithmeticOp::min_: return { 0x7c00 };
1077*6467f958SSadaf Ebrahimi case ArithmeticOp::mul_: return { 0x3c00 };
1078*6467f958SSadaf Ebrahimi default: log_error("Unknown operation request\n"); break;
1079*6467f958SSadaf Ebrahimi }
1080*6467f958SSadaf Ebrahimi return { 0 };
1081*6467f958SSadaf Ebrahimi }
1082*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id device)
1083*6467f958SSadaf Ebrahimi {
1084*6467f958SSadaf Ebrahimi return half_ok(device);
1085*6467f958SSadaf Ebrahimi }
1086*6467f958SSadaf Ebrahimi };
1087*6467f958SSadaf Ebrahimi template <>
1088*6467f958SSadaf Ebrahimi struct TypeManager<subgroups::cl_half2>
1089*6467f958SSadaf Ebrahimi : public CommonTypeManager<subgroups::cl_half2>
1090*6467f958SSadaf Ebrahimi {
1091*6467f958SSadaf Ebrahimi static const char *name() { return "half2"; }
1092*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef half2 Type;\n"; }
1093*6467f958SSadaf Ebrahimi using scalar_type = subgroups::cl_half;
1094*6467f958SSadaf Ebrahimi typedef std::true_type is_sb_vector_type;
1095*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id device)
1096*6467f958SSadaf Ebrahimi {
1097*6467f958SSadaf Ebrahimi return half_ok(device);
1098*6467f958SSadaf Ebrahimi }
1099*6467f958SSadaf Ebrahimi };
1100*6467f958SSadaf Ebrahimi template <>
1101*6467f958SSadaf Ebrahimi struct TypeManager<subgroups::cl_half3>
1102*6467f958SSadaf Ebrahimi : public CommonTypeManager<subgroups::cl_half3>
1103*6467f958SSadaf Ebrahimi {
1104*6467f958SSadaf Ebrahimi static const char *name() { return "half3"; }
1105*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef half3 Type;\n"; }
1106*6467f958SSadaf Ebrahimi typedef std::true_type is_sb_vector_size3;
1107*6467f958SSadaf Ebrahimi using scalar_type = subgroups::cl_half;
1108*6467f958SSadaf Ebrahimi
1109*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id device)
1110*6467f958SSadaf Ebrahimi {
1111*6467f958SSadaf Ebrahimi return half_ok(device);
1112*6467f958SSadaf Ebrahimi }
1113*6467f958SSadaf Ebrahimi };
1114*6467f958SSadaf Ebrahimi template <>
1115*6467f958SSadaf Ebrahimi struct TypeManager<subgroups::cl_half4>
1116*6467f958SSadaf Ebrahimi : public CommonTypeManager<subgroups::cl_half4>
1117*6467f958SSadaf Ebrahimi {
1118*6467f958SSadaf Ebrahimi static const char *name() { return "half4"; }
1119*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef half4 Type;\n"; }
1120*6467f958SSadaf Ebrahimi using scalar_type = subgroups::cl_half;
1121*6467f958SSadaf Ebrahimi typedef std::true_type is_sb_vector_type;
1122*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id device)
1123*6467f958SSadaf Ebrahimi {
1124*6467f958SSadaf Ebrahimi return half_ok(device);
1125*6467f958SSadaf Ebrahimi }
1126*6467f958SSadaf Ebrahimi };
1127*6467f958SSadaf Ebrahimi template <>
1128*6467f958SSadaf Ebrahimi struct TypeManager<subgroups::cl_half8>
1129*6467f958SSadaf Ebrahimi : public CommonTypeManager<subgroups::cl_half8>
1130*6467f958SSadaf Ebrahimi {
1131*6467f958SSadaf Ebrahimi static const char *name() { return "half8"; }
1132*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef half8 Type;\n"; }
1133*6467f958SSadaf Ebrahimi using scalar_type = subgroups::cl_half;
1134*6467f958SSadaf Ebrahimi typedef std::true_type is_sb_vector_type;
1135*6467f958SSadaf Ebrahimi
1136*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id device)
1137*6467f958SSadaf Ebrahimi {
1138*6467f958SSadaf Ebrahimi return half_ok(device);
1139*6467f958SSadaf Ebrahimi }
1140*6467f958SSadaf Ebrahimi };
1141*6467f958SSadaf Ebrahimi template <>
1142*6467f958SSadaf Ebrahimi struct TypeManager<subgroups::cl_half16>
1143*6467f958SSadaf Ebrahimi : public CommonTypeManager<subgroups::cl_half16>
1144*6467f958SSadaf Ebrahimi {
1145*6467f958SSadaf Ebrahimi static const char *name() { return "half16"; }
1146*6467f958SSadaf Ebrahimi static const char *add_typedef() { return "typedef half16 Type;\n"; }
1147*6467f958SSadaf Ebrahimi using scalar_type = subgroups::cl_half;
1148*6467f958SSadaf Ebrahimi typedef std::true_type is_sb_vector_type;
1149*6467f958SSadaf Ebrahimi static const bool type_supported(cl_device_id device)
1150*6467f958SSadaf Ebrahimi {
1151*6467f958SSadaf Ebrahimi return half_ok(device);
1152*6467f958SSadaf Ebrahimi }
1153*6467f958SSadaf Ebrahimi };
1154*6467f958SSadaf Ebrahimi
1155*6467f958SSadaf Ebrahimi // set scalar value to vector of halfs
1156*6467f958SSadaf Ebrahimi template <typename Ty, int N = 0>
1157*6467f958SSadaf Ebrahimi typename std::enable_if<TypeManager<Ty>::is_sb_vector_type::value>::type
1158*6467f958SSadaf Ebrahimi set_value(Ty &lhs, const cl_ulong &rhs)
1159*6467f958SSadaf Ebrahimi {
1160*6467f958SSadaf Ebrahimi const int size = sizeof(Ty) / sizeof(typename TypeManager<Ty>::scalar_type);
1161*6467f958SSadaf Ebrahimi for (auto i = 0; i < size; ++i)
1162*6467f958SSadaf Ebrahimi {
1163*6467f958SSadaf Ebrahimi lhs.data.s[i] = rhs;
1164*6467f958SSadaf Ebrahimi }
1165*6467f958SSadaf Ebrahimi }
1166*6467f958SSadaf Ebrahimi
1167*6467f958SSadaf Ebrahimi
1168*6467f958SSadaf Ebrahimi // set scalar value to vector
1169*6467f958SSadaf Ebrahimi template <typename Ty>
1170*6467f958SSadaf Ebrahimi typename std::enable_if<TypeManager<Ty>::is_vector_type::value>::type
1171*6467f958SSadaf Ebrahimi set_value(Ty &lhs, const cl_ulong &rhs)
1172*6467f958SSadaf Ebrahimi {
1173*6467f958SSadaf Ebrahimi const int size = sizeof(Ty) / sizeof(typename TypeManager<Ty>::scalar_type);
1174*6467f958SSadaf Ebrahimi for (auto i = 0; i < size; ++i)
1175*6467f958SSadaf Ebrahimi {
1176*6467f958SSadaf Ebrahimi lhs.s[i] = rhs;
1177*6467f958SSadaf Ebrahimi }
1178*6467f958SSadaf Ebrahimi }
1179*6467f958SSadaf Ebrahimi
1180*6467f958SSadaf Ebrahimi // set vector to vector value
1181*6467f958SSadaf Ebrahimi template <typename Ty>
1182*6467f958SSadaf Ebrahimi typename std::enable_if<TypeManager<Ty>::is_vector_type::value>::type
1183*6467f958SSadaf Ebrahimi set_value(Ty &lhs, const Ty &rhs)
1184*6467f958SSadaf Ebrahimi {
1185*6467f958SSadaf Ebrahimi lhs = rhs;
1186*6467f958SSadaf Ebrahimi }
1187*6467f958SSadaf Ebrahimi
1188*6467f958SSadaf Ebrahimi // set scalar value to vector size 3
1189*6467f958SSadaf Ebrahimi template <typename Ty, int N = 0>
1190*6467f958SSadaf Ebrahimi typename std::enable_if<TypeManager<Ty>::is_sb_vector_size3::value>::type
1191*6467f958SSadaf Ebrahimi set_value(Ty &lhs, const cl_ulong &rhs)
1192*6467f958SSadaf Ebrahimi {
1193*6467f958SSadaf Ebrahimi for (auto i = 0; i < 3; ++i)
1194*6467f958SSadaf Ebrahimi {
1195*6467f958SSadaf Ebrahimi lhs.data.s[i] = rhs;
1196*6467f958SSadaf Ebrahimi }
1197*6467f958SSadaf Ebrahimi }
1198*6467f958SSadaf Ebrahimi
1199*6467f958SSadaf Ebrahimi // set scalar value to scalar
1200*6467f958SSadaf Ebrahimi template <typename Ty>
1201*6467f958SSadaf Ebrahimi typename std::enable_if<std::is_scalar<Ty>::value>::type
1202*6467f958SSadaf Ebrahimi set_value(Ty &lhs, const cl_ulong &rhs)
1203*6467f958SSadaf Ebrahimi {
1204*6467f958SSadaf Ebrahimi lhs = static_cast<Ty>(rhs);
1205*6467f958SSadaf Ebrahimi }
1206*6467f958SSadaf Ebrahimi
1207*6467f958SSadaf Ebrahimi // set scalar value to half scalar
1208*6467f958SSadaf Ebrahimi template <typename Ty>
1209*6467f958SSadaf Ebrahimi typename std::enable_if<TypeManager<Ty>::is_sb_scalar_type::value>::type
1210*6467f958SSadaf Ebrahimi set_value(Ty &lhs, const cl_ulong &rhs)
1211*6467f958SSadaf Ebrahimi {
1212*6467f958SSadaf Ebrahimi lhs.data = cl_half_from_float(static_cast<cl_float>(rhs), g_rounding_mode);
1213*6467f958SSadaf Ebrahimi }
1214*6467f958SSadaf Ebrahimi
1215*6467f958SSadaf Ebrahimi // compare for common vectors
1216*6467f958SSadaf Ebrahimi template <typename Ty>
1217*6467f958SSadaf Ebrahimi typename std::enable_if<TypeManager<Ty>::is_vector_type::value, bool>::type
1218*6467f958SSadaf Ebrahimi compare(const Ty &lhs, const Ty &rhs)
1219*6467f958SSadaf Ebrahimi {
1220*6467f958SSadaf Ebrahimi const int size = sizeof(Ty) / sizeof(typename TypeManager<Ty>::scalar_type);
1221*6467f958SSadaf Ebrahimi for (auto i = 0; i < size; ++i)
1222*6467f958SSadaf Ebrahimi {
1223*6467f958SSadaf Ebrahimi if (lhs.s[i] != rhs.s[i])
1224*6467f958SSadaf Ebrahimi {
1225*6467f958SSadaf Ebrahimi return false;
1226*6467f958SSadaf Ebrahimi }
1227*6467f958SSadaf Ebrahimi }
1228*6467f958SSadaf Ebrahimi return true;
1229*6467f958SSadaf Ebrahimi }
1230*6467f958SSadaf Ebrahimi
1231*6467f958SSadaf Ebrahimi // compare for vectors 3
1232*6467f958SSadaf Ebrahimi template <typename Ty>
1233*6467f958SSadaf Ebrahimi typename std::enable_if<TypeManager<Ty>::is_sb_vector_size3::value, bool>::type
1234*6467f958SSadaf Ebrahimi compare(const Ty &lhs, const Ty &rhs)
1235*6467f958SSadaf Ebrahimi {
1236*6467f958SSadaf Ebrahimi for (auto i = 0; i < 3; ++i)
1237*6467f958SSadaf Ebrahimi {
1238*6467f958SSadaf Ebrahimi if (lhs.data.s[i] != rhs.data.s[i])
1239*6467f958SSadaf Ebrahimi {
1240*6467f958SSadaf Ebrahimi return false;
1241*6467f958SSadaf Ebrahimi }
1242*6467f958SSadaf Ebrahimi }
1243*6467f958SSadaf Ebrahimi return true;
1244*6467f958SSadaf Ebrahimi }
1245*6467f958SSadaf Ebrahimi
1246*6467f958SSadaf Ebrahimi // compare for half vectors
1247*6467f958SSadaf Ebrahimi template <typename Ty>
1248*6467f958SSadaf Ebrahimi typename std::enable_if<TypeManager<Ty>::is_sb_vector_type::value, bool>::type
1249*6467f958SSadaf Ebrahimi compare(const Ty &lhs, const Ty &rhs)
1250*6467f958SSadaf Ebrahimi {
1251*6467f958SSadaf Ebrahimi const int size = sizeof(Ty) / sizeof(typename TypeManager<Ty>::scalar_type);
1252*6467f958SSadaf Ebrahimi for (auto i = 0; i < size; ++i)
1253*6467f958SSadaf Ebrahimi {
1254*6467f958SSadaf Ebrahimi if (lhs.data.s[i] != rhs.data.s[i])
1255*6467f958SSadaf Ebrahimi {
1256*6467f958SSadaf Ebrahimi return false;
1257*6467f958SSadaf Ebrahimi }
1258*6467f958SSadaf Ebrahimi }
1259*6467f958SSadaf Ebrahimi return true;
1260*6467f958SSadaf Ebrahimi }
1261*6467f958SSadaf Ebrahimi
1262*6467f958SSadaf Ebrahimi // compare for scalars
1263*6467f958SSadaf Ebrahimi template <typename Ty>
1264*6467f958SSadaf Ebrahimi typename std::enable_if<std::is_scalar<Ty>::value, bool>::type
1265*6467f958SSadaf Ebrahimi compare(const Ty &lhs, const Ty &rhs)
1266*6467f958SSadaf Ebrahimi {
1267*6467f958SSadaf Ebrahimi return lhs == rhs;
1268*6467f958SSadaf Ebrahimi }
1269*6467f958SSadaf Ebrahimi
1270*6467f958SSadaf Ebrahimi // compare for scalar halfs
1271*6467f958SSadaf Ebrahimi template <typename Ty>
1272*6467f958SSadaf Ebrahimi typename std::enable_if<TypeManager<Ty>::is_sb_scalar_type::value, bool>::type
1273*6467f958SSadaf Ebrahimi compare(const Ty &lhs, const Ty &rhs)
1274*6467f958SSadaf Ebrahimi {
1275*6467f958SSadaf Ebrahimi return lhs.data == rhs.data;
1276*6467f958SSadaf Ebrahimi }
1277*6467f958SSadaf Ebrahimi
1278*6467f958SSadaf Ebrahimi template <typename Ty> inline bool compare_ordered(const Ty &lhs, const Ty &rhs)
1279*6467f958SSadaf Ebrahimi {
1280*6467f958SSadaf Ebrahimi return lhs == rhs;
1281*6467f958SSadaf Ebrahimi }
1282*6467f958SSadaf Ebrahimi
1283*6467f958SSadaf Ebrahimi template <>
1284*6467f958SSadaf Ebrahimi inline bool compare_ordered(const subgroups::cl_half &lhs,
1285*6467f958SSadaf Ebrahimi const subgroups::cl_half &rhs)
1286*6467f958SSadaf Ebrahimi {
1287*6467f958SSadaf Ebrahimi return cl_half_to_float(lhs.data) == cl_half_to_float(rhs.data);
1288*6467f958SSadaf Ebrahimi }
1289*6467f958SSadaf Ebrahimi
1290*6467f958SSadaf Ebrahimi template <typename Ty>
1291*6467f958SSadaf Ebrahimi inline bool compare_ordered(const subgroups::cl_half &lhs, const int &rhs)
1292*6467f958SSadaf Ebrahimi {
1293*6467f958SSadaf Ebrahimi return cl_half_to_float(lhs.data) == rhs;
1294*6467f958SSadaf Ebrahimi }
1295*6467f958SSadaf Ebrahimi
1296*6467f958SSadaf Ebrahimi template <typename Ty, typename Fns> class KernelExecutor {
1297*6467f958SSadaf Ebrahimi public:
1298*6467f958SSadaf Ebrahimi KernelExecutor(cl_context c, cl_command_queue q, cl_kernel k, size_t g,
1299*6467f958SSadaf Ebrahimi size_t l, Ty *id, size_t is, Ty *mid, Ty *mod, cl_int *md,
1300*6467f958SSadaf Ebrahimi size_t ms, Ty *od, size_t os, size_t ts = 0)
1301*6467f958SSadaf Ebrahimi : context(c), queue(q), kernel(k), global(g), local(l), idata(id),
1302*6467f958SSadaf Ebrahimi isize(is), mapin_data(mid), mapout_data(mod), mdata(md), msize(ms),
1303*6467f958SSadaf Ebrahimi odata(od), osize(os), tsize(ts)
1304*6467f958SSadaf Ebrahimi {
1305*6467f958SSadaf Ebrahimi has_status = false;
1306*6467f958SSadaf Ebrahimi run_failed = false;
1307*6467f958SSadaf Ebrahimi }
1308*6467f958SSadaf Ebrahimi cl_context context;
1309*6467f958SSadaf Ebrahimi cl_command_queue queue;
1310*6467f958SSadaf Ebrahimi cl_kernel kernel;
1311*6467f958SSadaf Ebrahimi size_t global;
1312*6467f958SSadaf Ebrahimi size_t local;
1313*6467f958SSadaf Ebrahimi Ty *idata;
1314*6467f958SSadaf Ebrahimi size_t isize;
1315*6467f958SSadaf Ebrahimi Ty *mapin_data;
1316*6467f958SSadaf Ebrahimi Ty *mapout_data;
1317*6467f958SSadaf Ebrahimi cl_int *mdata;
1318*6467f958SSadaf Ebrahimi size_t msize;
1319*6467f958SSadaf Ebrahimi Ty *odata;
1320*6467f958SSadaf Ebrahimi size_t osize;
1321*6467f958SSadaf Ebrahimi size_t tsize;
1322*6467f958SSadaf Ebrahimi bool run_failed;
1323*6467f958SSadaf Ebrahimi
1324*6467f958SSadaf Ebrahimi private:
1325*6467f958SSadaf Ebrahimi bool has_status;
1326*6467f958SSadaf Ebrahimi test_status status;
1327*6467f958SSadaf Ebrahimi
1328*6467f958SSadaf Ebrahimi public:
1329*6467f958SSadaf Ebrahimi // Run a test kernel to compute the result of a built-in on an input
1330*6467f958SSadaf Ebrahimi int run()
1331*6467f958SSadaf Ebrahimi {
1332*6467f958SSadaf Ebrahimi clMemWrapper in;
1333*6467f958SSadaf Ebrahimi clMemWrapper xy;
1334*6467f958SSadaf Ebrahimi clMemWrapper out;
1335*6467f958SSadaf Ebrahimi clMemWrapper tmp;
1336*6467f958SSadaf Ebrahimi int error;
1337*6467f958SSadaf Ebrahimi
1338*6467f958SSadaf Ebrahimi in = clCreateBuffer(context, CL_MEM_READ_ONLY, isize, NULL, &error);
1339*6467f958SSadaf Ebrahimi test_error(error, "clCreateBuffer failed");
1340*6467f958SSadaf Ebrahimi
1341*6467f958SSadaf Ebrahimi xy = clCreateBuffer(context, CL_MEM_WRITE_ONLY, msize, NULL, &error);
1342*6467f958SSadaf Ebrahimi test_error(error, "clCreateBuffer failed");
1343*6467f958SSadaf Ebrahimi
1344*6467f958SSadaf Ebrahimi out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, osize, NULL, &error);
1345*6467f958SSadaf Ebrahimi test_error(error, "clCreateBuffer failed");
1346*6467f958SSadaf Ebrahimi
1347*6467f958SSadaf Ebrahimi if (tsize)
1348*6467f958SSadaf Ebrahimi {
1349*6467f958SSadaf Ebrahimi tmp = clCreateBuffer(context,
1350*6467f958SSadaf Ebrahimi CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS,
1351*6467f958SSadaf Ebrahimi tsize, NULL, &error);
1352*6467f958SSadaf Ebrahimi test_error(error, "clCreateBuffer failed");
1353*6467f958SSadaf Ebrahimi }
1354*6467f958SSadaf Ebrahimi
1355*6467f958SSadaf Ebrahimi error = clSetKernelArg(kernel, 0, sizeof(in), (void *)&in);
1356*6467f958SSadaf Ebrahimi test_error(error, "clSetKernelArg failed");
1357*6467f958SSadaf Ebrahimi
1358*6467f958SSadaf Ebrahimi error = clSetKernelArg(kernel, 1, sizeof(xy), (void *)&xy);
1359*6467f958SSadaf Ebrahimi test_error(error, "clSetKernelArg failed");
1360*6467f958SSadaf Ebrahimi
1361*6467f958SSadaf Ebrahimi error = clSetKernelArg(kernel, 2, sizeof(out), (void *)&out);
1362*6467f958SSadaf Ebrahimi test_error(error, "clSetKernelArg failed");
1363*6467f958SSadaf Ebrahimi
1364*6467f958SSadaf Ebrahimi if (tsize)
1365*6467f958SSadaf Ebrahimi {
1366*6467f958SSadaf Ebrahimi error = clSetKernelArg(kernel, 3, sizeof(tmp), (void *)&tmp);
1367*6467f958SSadaf Ebrahimi test_error(error, "clSetKernelArg failed");
1368*6467f958SSadaf Ebrahimi }
1369*6467f958SSadaf Ebrahimi
1370*6467f958SSadaf Ebrahimi error = clEnqueueWriteBuffer(queue, in, CL_FALSE, 0, isize, idata, 0,
1371*6467f958SSadaf Ebrahimi NULL, NULL);
1372*6467f958SSadaf Ebrahimi test_error(error, "clEnqueueWriteBuffer failed");
1373*6467f958SSadaf Ebrahimi
1374*6467f958SSadaf Ebrahimi error = clEnqueueWriteBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0,
1375*6467f958SSadaf Ebrahimi NULL, NULL);
1376*6467f958SSadaf Ebrahimi test_error(error, "clEnqueueWriteBuffer failed");
1377*6467f958SSadaf Ebrahimi error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local,
1378*6467f958SSadaf Ebrahimi 0, NULL, NULL);
1379*6467f958SSadaf Ebrahimi test_error(error, "clEnqueueNDRangeKernel failed");
1380*6467f958SSadaf Ebrahimi
1381*6467f958SSadaf Ebrahimi error = clEnqueueReadBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0,
1382*6467f958SSadaf Ebrahimi NULL, NULL);
1383*6467f958SSadaf Ebrahimi test_error(error, "clEnqueueReadBuffer failed");
1384*6467f958SSadaf Ebrahimi
1385*6467f958SSadaf Ebrahimi error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, osize, odata, 0,
1386*6467f958SSadaf Ebrahimi NULL, NULL);
1387*6467f958SSadaf Ebrahimi test_error(error, "clEnqueueReadBuffer failed");
1388*6467f958SSadaf Ebrahimi
1389*6467f958SSadaf Ebrahimi error = clFinish(queue);
1390*6467f958SSadaf Ebrahimi test_error(error, "clFinish failed");
1391*6467f958SSadaf Ebrahimi
1392*6467f958SSadaf Ebrahimi return error;
1393*6467f958SSadaf Ebrahimi }
1394*6467f958SSadaf Ebrahimi
1395*6467f958SSadaf Ebrahimi private:
1396*6467f958SSadaf Ebrahimi test_status
1397*6467f958SSadaf Ebrahimi run_and_check_with_cluster_size(const WorkGroupParams &test_params)
1398*6467f958SSadaf Ebrahimi {
1399*6467f958SSadaf Ebrahimi cl_int error = run();
1400*6467f958SSadaf Ebrahimi if (error != CL_SUCCESS)
1401*6467f958SSadaf Ebrahimi {
1402*6467f958SSadaf Ebrahimi print_error(error, "Failed to run subgroup test kernel");
1403*6467f958SSadaf Ebrahimi status = TEST_FAIL;
1404*6467f958SSadaf Ebrahimi run_failed = true;
1405*6467f958SSadaf Ebrahimi return status;
1406*6467f958SSadaf Ebrahimi }
1407*6467f958SSadaf Ebrahimi
1408*6467f958SSadaf Ebrahimi test_status tmp_status =
1409*6467f958SSadaf Ebrahimi Fns::chk(idata, odata, mapin_data, mapout_data, mdata, test_params);
1410*6467f958SSadaf Ebrahimi
1411*6467f958SSadaf Ebrahimi if (!has_status || tmp_status == TEST_FAIL
1412*6467f958SSadaf Ebrahimi || (tmp_status == TEST_PASS && status != TEST_FAIL))
1413*6467f958SSadaf Ebrahimi {
1414*6467f958SSadaf Ebrahimi status = tmp_status;
1415*6467f958SSadaf Ebrahimi has_status = true;
1416*6467f958SSadaf Ebrahimi }
1417*6467f958SSadaf Ebrahimi
1418*6467f958SSadaf Ebrahimi return status;
1419*6467f958SSadaf Ebrahimi }
1420*6467f958SSadaf Ebrahimi
1421*6467f958SSadaf Ebrahimi public:
1422*6467f958SSadaf Ebrahimi test_status run_and_check(WorkGroupParams &test_params)
1423*6467f958SSadaf Ebrahimi {
1424*6467f958SSadaf Ebrahimi test_status tmp_status = TEST_SKIPPED_ITSELF;
1425*6467f958SSadaf Ebrahimi
1426*6467f958SSadaf Ebrahimi if (test_params.cluster_size_arg != -1)
1427*6467f958SSadaf Ebrahimi {
1428*6467f958SSadaf Ebrahimi for (cl_uint cluster_size = 1;
1429*6467f958SSadaf Ebrahimi cluster_size <= test_params.subgroup_size; cluster_size *= 2)
1430*6467f958SSadaf Ebrahimi {
1431*6467f958SSadaf Ebrahimi test_params.cluster_size = cluster_size;
1432*6467f958SSadaf Ebrahimi cl_int error =
1433*6467f958SSadaf Ebrahimi clSetKernelArg(kernel, test_params.cluster_size_arg,
1434*6467f958SSadaf Ebrahimi sizeof(cl_uint), &cluster_size);
1435*6467f958SSadaf Ebrahimi test_error_fail(error, "Unable to set cluster size");
1436*6467f958SSadaf Ebrahimi
1437*6467f958SSadaf Ebrahimi tmp_status = run_and_check_with_cluster_size(test_params);
1438*6467f958SSadaf Ebrahimi
1439*6467f958SSadaf Ebrahimi if (tmp_status == TEST_FAIL) break;
1440*6467f958SSadaf Ebrahimi }
1441*6467f958SSadaf Ebrahimi }
1442*6467f958SSadaf Ebrahimi else
1443*6467f958SSadaf Ebrahimi {
1444*6467f958SSadaf Ebrahimi tmp_status = run_and_check_with_cluster_size(test_params);
1445*6467f958SSadaf Ebrahimi }
1446*6467f958SSadaf Ebrahimi
1447*6467f958SSadaf Ebrahimi return tmp_status;
1448*6467f958SSadaf Ebrahimi }
1449*6467f958SSadaf Ebrahimi };
1450*6467f958SSadaf Ebrahimi
1451*6467f958SSadaf Ebrahimi // Driver for testing a single built in function
1452*6467f958SSadaf Ebrahimi template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
1453*6467f958SSadaf Ebrahimi {
1454*6467f958SSadaf Ebrahimi static test_status run(cl_device_id device, cl_context context,
1455*6467f958SSadaf Ebrahimi cl_command_queue queue, int num_elements,
1456*6467f958SSadaf Ebrahimi const char *kname, const char *src,
1457*6467f958SSadaf Ebrahimi WorkGroupParams test_params)
1458*6467f958SSadaf Ebrahimi {
1459*6467f958SSadaf Ebrahimi size_t tmp;
1460*6467f958SSadaf Ebrahimi cl_int error;
1461*6467f958SSadaf Ebrahimi size_t subgroup_size, num_subgroups;
1462*6467f958SSadaf Ebrahimi size_t global = test_params.global_workgroup_size;
1463*6467f958SSadaf Ebrahimi size_t local = test_params.local_workgroup_size;
1464*6467f958SSadaf Ebrahimi clProgramWrapper program;
1465*6467f958SSadaf Ebrahimi clKernelWrapper kernel;
1466*6467f958SSadaf Ebrahimi cl_platform_id platform;
1467*6467f958SSadaf Ebrahimi std::vector<cl_int> sgmap;
1468*6467f958SSadaf Ebrahimi sgmap.resize(4 * global);
1469*6467f958SSadaf Ebrahimi std::vector<Ty> mapin;
1470*6467f958SSadaf Ebrahimi mapin.resize(local);
1471*6467f958SSadaf Ebrahimi std::vector<Ty> mapout;
1472*6467f958SSadaf Ebrahimi mapout.resize(local);
1473*6467f958SSadaf Ebrahimi std::stringstream kernel_sstr;
1474*6467f958SSadaf Ebrahimi
1475*6467f958SSadaf Ebrahimi Fns::log_test(test_params, "");
1476*6467f958SSadaf Ebrahimi
1477*6467f958SSadaf Ebrahimi kernel_sstr << "#define NR_OF_ACTIVE_WORK_ITEMS ";
1478*6467f958SSadaf Ebrahimi kernel_sstr << NR_OF_ACTIVE_WORK_ITEMS << "\n";
1479*6467f958SSadaf Ebrahimi // Make sure a test of type Ty is supported by the device
1480*6467f958SSadaf Ebrahimi if (!TypeManager<Ty>::type_supported(device))
1481*6467f958SSadaf Ebrahimi {
1482*6467f958SSadaf Ebrahimi log_info("Data type not supported : %s\n", TypeManager<Ty>::name());
1483*6467f958SSadaf Ebrahimi return TEST_SKIPPED_ITSELF;
1484*6467f958SSadaf Ebrahimi }
1485*6467f958SSadaf Ebrahimi
1486*6467f958SSadaf Ebrahimi if (strstr(TypeManager<Ty>::name(), "double"))
1487*6467f958SSadaf Ebrahimi {
1488*6467f958SSadaf Ebrahimi kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
1489*6467f958SSadaf Ebrahimi }
1490*6467f958SSadaf Ebrahimi else if (strstr(TypeManager<Ty>::name(), "half"))
1491*6467f958SSadaf Ebrahimi {
1492*6467f958SSadaf Ebrahimi kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_fp16: enable\n";
1493*6467f958SSadaf Ebrahimi }
1494*6467f958SSadaf Ebrahimi
1495*6467f958SSadaf Ebrahimi error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
1496*6467f958SSadaf Ebrahimi (void *)&platform, NULL);
1497*6467f958SSadaf Ebrahimi test_error_fail(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM");
1498*6467f958SSadaf Ebrahimi if (test_params.use_core_subgroups)
1499*6467f958SSadaf Ebrahimi {
1500*6467f958SSadaf Ebrahimi kernel_sstr
1501*6467f958SSadaf Ebrahimi << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n";
1502*6467f958SSadaf Ebrahimi }
1503*6467f958SSadaf Ebrahimi kernel_sstr << "#define XY(M,I) M[I].x = get_sub_group_local_id(); "
1504*6467f958SSadaf Ebrahimi "M[I].y = get_sub_group_id();\n";
1505*6467f958SSadaf Ebrahimi kernel_sstr << TypeManager<Ty>::add_typedef();
1506*6467f958SSadaf Ebrahimi kernel_sstr << src;
1507*6467f958SSadaf Ebrahimi const std::string &kernel_str = kernel_sstr.str();
1508*6467f958SSadaf Ebrahimi const char *kernel_src = kernel_str.c_str();
1509*6467f958SSadaf Ebrahimi
1510*6467f958SSadaf Ebrahimi error = create_single_kernel_helper(context, &program, &kernel, 1,
1511*6467f958SSadaf Ebrahimi &kernel_src, kname);
1512*6467f958SSadaf Ebrahimi if (error != CL_SUCCESS) return TEST_FAIL;
1513*6467f958SSadaf Ebrahimi
1514*6467f958SSadaf Ebrahimi // Determine some local dimensions to use for the test.
1515*6467f958SSadaf Ebrahimi error = get_max_common_work_group_size(
1516*6467f958SSadaf Ebrahimi context, kernel, test_params.global_workgroup_size, &local);
1517*6467f958SSadaf Ebrahimi test_error_fail(error, "get_max_common_work_group_size failed");
1518*6467f958SSadaf Ebrahimi
1519*6467f958SSadaf Ebrahimi // Limit it a bit so we have muliple work groups
1520*6467f958SSadaf Ebrahimi // Ideally this will still be large enough to give us multiple
1521*6467f958SSadaf Ebrahimi if (local > test_params.local_workgroup_size)
1522*6467f958SSadaf Ebrahimi local = test_params.local_workgroup_size;
1523*6467f958SSadaf Ebrahimi
1524*6467f958SSadaf Ebrahimi
1525*6467f958SSadaf Ebrahimi // Get the sub group info
1526*6467f958SSadaf Ebrahimi subgroupsAPI subgroupsApiSet(platform, test_params.use_core_subgroups);
1527*6467f958SSadaf Ebrahimi clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr =
1528*6467f958SSadaf Ebrahimi subgroupsApiSet.clGetKernelSubGroupInfo_ptr();
1529*6467f958SSadaf Ebrahimi if (clGetKernelSubGroupInfo_ptr == NULL)
1530*6467f958SSadaf Ebrahimi {
1531*6467f958SSadaf Ebrahimi log_error("ERROR: %s function not available\n",
1532*6467f958SSadaf Ebrahimi subgroupsApiSet.clGetKernelSubGroupInfo_name);
1533*6467f958SSadaf Ebrahimi return TEST_FAIL;
1534*6467f958SSadaf Ebrahimi }
1535*6467f958SSadaf Ebrahimi error = clGetKernelSubGroupInfo_ptr(
1536*6467f958SSadaf Ebrahimi kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
1537*6467f958SSadaf Ebrahimi sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL);
1538*6467f958SSadaf Ebrahimi if (error != CL_SUCCESS)
1539*6467f958SSadaf Ebrahimi {
1540*6467f958SSadaf Ebrahimi log_error("ERROR: %s function error for "
1541*6467f958SSadaf Ebrahimi "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE\n",
1542*6467f958SSadaf Ebrahimi subgroupsApiSet.clGetKernelSubGroupInfo_name);
1543*6467f958SSadaf Ebrahimi return TEST_FAIL;
1544*6467f958SSadaf Ebrahimi }
1545*6467f958SSadaf Ebrahimi
1546*6467f958SSadaf Ebrahimi subgroup_size = tmp;
1547*6467f958SSadaf Ebrahimi
1548*6467f958SSadaf Ebrahimi error = clGetKernelSubGroupInfo_ptr(
1549*6467f958SSadaf Ebrahimi kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
1550*6467f958SSadaf Ebrahimi sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL);
1551*6467f958SSadaf Ebrahimi if (error != CL_SUCCESS)
1552*6467f958SSadaf Ebrahimi {
1553*6467f958SSadaf Ebrahimi log_error("ERROR: %s function error for "
1554*6467f958SSadaf Ebrahimi "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE\n",
1555*6467f958SSadaf Ebrahimi subgroupsApiSet.clGetKernelSubGroupInfo_name);
1556*6467f958SSadaf Ebrahimi return TEST_FAIL;
1557*6467f958SSadaf Ebrahimi }
1558*6467f958SSadaf Ebrahimi
1559*6467f958SSadaf Ebrahimi num_subgroups = tmp;
1560*6467f958SSadaf Ebrahimi // Make sure the number of sub groups is what we expect
1561*6467f958SSadaf Ebrahimi if (num_subgroups != (local + subgroup_size - 1) / subgroup_size)
1562*6467f958SSadaf Ebrahimi {
1563*6467f958SSadaf Ebrahimi log_error("ERROR: unexpected number of subgroups (%zu) returned\n",
1564*6467f958SSadaf Ebrahimi num_subgroups);
1565*6467f958SSadaf Ebrahimi return TEST_FAIL;
1566*6467f958SSadaf Ebrahimi }
1567*6467f958SSadaf Ebrahimi
1568*6467f958SSadaf Ebrahimi std::vector<Ty> idata;
1569*6467f958SSadaf Ebrahimi std::vector<Ty> odata;
1570*6467f958SSadaf Ebrahimi size_t input_array_size = global;
1571*6467f958SSadaf Ebrahimi size_t output_array_size = global;
1572*6467f958SSadaf Ebrahimi size_t dynscl = test_params.dynsc;
1573*6467f958SSadaf Ebrahimi
1574*6467f958SSadaf Ebrahimi if (dynscl != 0)
1575*6467f958SSadaf Ebrahimi {
1576*6467f958SSadaf Ebrahimi input_array_size = global / local * num_subgroups * dynscl;
1577*6467f958SSadaf Ebrahimi output_array_size = global / local * dynscl;
1578*6467f958SSadaf Ebrahimi }
1579*6467f958SSadaf Ebrahimi
1580*6467f958SSadaf Ebrahimi idata.resize(input_array_size);
1581*6467f958SSadaf Ebrahimi odata.resize(output_array_size);
1582*6467f958SSadaf Ebrahimi
1583*6467f958SSadaf Ebrahimi if (test_params.divergence_mask_arg != -1)
1584*6467f958SSadaf Ebrahimi {
1585*6467f958SSadaf Ebrahimi cl_uint4 mask_vector;
1586*6467f958SSadaf Ebrahimi mask_vector.x = 0xffffffffU;
1587*6467f958SSadaf Ebrahimi mask_vector.y = 0xffffffffU;
1588*6467f958SSadaf Ebrahimi mask_vector.z = 0xffffffffU;
1589*6467f958SSadaf Ebrahimi mask_vector.w = 0xffffffffU;
1590*6467f958SSadaf Ebrahimi error = clSetKernelArg(kernel, test_params.divergence_mask_arg,
1591*6467f958SSadaf Ebrahimi sizeof(cl_uint4), &mask_vector);
1592*6467f958SSadaf Ebrahimi test_error_fail(error, "Unable to set divergence mask argument");
1593*6467f958SSadaf Ebrahimi }
1594*6467f958SSadaf Ebrahimi
1595*6467f958SSadaf Ebrahimi if (test_params.cluster_size_arg != -1)
1596*6467f958SSadaf Ebrahimi {
1597*6467f958SSadaf Ebrahimi cl_uint dummy_cluster_size = 1;
1598*6467f958SSadaf Ebrahimi error = clSetKernelArg(kernel, test_params.cluster_size_arg,
1599*6467f958SSadaf Ebrahimi sizeof(cl_uint), &dummy_cluster_size);
1600*6467f958SSadaf Ebrahimi test_error_fail(error, "Unable to set dummy cluster size");
1601*6467f958SSadaf Ebrahimi }
1602*6467f958SSadaf Ebrahimi
1603*6467f958SSadaf Ebrahimi KernelExecutor<Ty, Fns> executor(
1604*6467f958SSadaf Ebrahimi context, queue, kernel, global, local, idata.data(),
1605*6467f958SSadaf Ebrahimi input_array_size * sizeof(Ty), mapin.data(), mapout.data(),
1606*6467f958SSadaf Ebrahimi sgmap.data(), global * sizeof(cl_int4), odata.data(),
1607*6467f958SSadaf Ebrahimi output_array_size * sizeof(Ty), TSIZE * sizeof(Ty));
1608*6467f958SSadaf Ebrahimi
1609*6467f958SSadaf Ebrahimi // Run the kernel once on zeroes to get the map
1610*6467f958SSadaf Ebrahimi memset(idata.data(), 0, input_array_size * sizeof(Ty));
1611*6467f958SSadaf Ebrahimi error = executor.run();
1612*6467f958SSadaf Ebrahimi test_error_fail(error, "Running kernel first time failed");
1613*6467f958SSadaf Ebrahimi
1614*6467f958SSadaf Ebrahimi // Generate the desired input for the kernel
1615*6467f958SSadaf Ebrahimi test_params.subgroup_size = subgroup_size;
1616*6467f958SSadaf Ebrahimi Fns::gen(idata.data(), mapin.data(), sgmap.data(), test_params);
1617*6467f958SSadaf Ebrahimi
1618*6467f958SSadaf Ebrahimi test_status status;
1619*6467f958SSadaf Ebrahimi
1620*6467f958SSadaf Ebrahimi if (test_params.divergence_mask_arg != -1)
1621*6467f958SSadaf Ebrahimi {
1622*6467f958SSadaf Ebrahimi for (auto &mask : test_params.all_work_item_masks)
1623*6467f958SSadaf Ebrahimi {
1624*6467f958SSadaf Ebrahimi test_params.work_items_mask = mask;
1625*6467f958SSadaf Ebrahimi cl_uint4 mask_vector = bs128_to_cl_uint4(mask);
1626*6467f958SSadaf Ebrahimi clSetKernelArg(kernel, test_params.divergence_mask_arg,
1627*6467f958SSadaf Ebrahimi sizeof(cl_uint4), &mask_vector);
1628*6467f958SSadaf Ebrahimi
1629*6467f958SSadaf Ebrahimi status = executor.run_and_check(test_params);
1630*6467f958SSadaf Ebrahimi
1631*6467f958SSadaf Ebrahimi if (status == TEST_FAIL) break;
1632*6467f958SSadaf Ebrahimi }
1633*6467f958SSadaf Ebrahimi }
1634*6467f958SSadaf Ebrahimi else
1635*6467f958SSadaf Ebrahimi {
1636*6467f958SSadaf Ebrahimi status = executor.run_and_check(test_params);
1637*6467f958SSadaf Ebrahimi }
1638*6467f958SSadaf Ebrahimi // Detailed failure and skip messages should be logged by
1639*6467f958SSadaf Ebrahimi // run_and_check.
1640*6467f958SSadaf Ebrahimi if (status == TEST_PASS)
1641*6467f958SSadaf Ebrahimi {
1642*6467f958SSadaf Ebrahimi Fns::log_test(test_params, " passed");
1643*6467f958SSadaf Ebrahimi }
1644*6467f958SSadaf Ebrahimi else if (!executor.run_failed && status == TEST_FAIL)
1645*6467f958SSadaf Ebrahimi {
1646*6467f958SSadaf Ebrahimi test_fail("Data verification failed\n");
1647*6467f958SSadaf Ebrahimi }
1648*6467f958SSadaf Ebrahimi return status;
1649*6467f958SSadaf Ebrahimi }
1650*6467f958SSadaf Ebrahimi };
1651*6467f958SSadaf Ebrahimi
1652*6467f958SSadaf Ebrahimi void set_last_workgroup_params(int non_uniform_size, int &number_of_subgroups,
1653*6467f958SSadaf Ebrahimi int subgroup_size, int &workgroup_size,
1654*6467f958SSadaf Ebrahimi int &last_subgroup_size);
1655*6467f958SSadaf Ebrahimi
1656*6467f958SSadaf Ebrahimi template <typename Ty>
1657*6467f958SSadaf Ebrahimi static void set_randomdata_for_subgroup(Ty *workgroup, int wg_offset,
1658*6467f958SSadaf Ebrahimi int current_sbs)
1659*6467f958SSadaf Ebrahimi {
1660*6467f958SSadaf Ebrahimi int randomize_data = (int)(genrand_int32(gMTdata) % 3);
1661*6467f958SSadaf Ebrahimi // Initialize data matrix indexed by local id and sub group id
1662*6467f958SSadaf Ebrahimi switch (randomize_data)
1663*6467f958SSadaf Ebrahimi {
1664*6467f958SSadaf Ebrahimi case 0:
1665*6467f958SSadaf Ebrahimi memset(&workgroup[wg_offset], 0, current_sbs * sizeof(Ty));
1666*6467f958SSadaf Ebrahimi break;
1667*6467f958SSadaf Ebrahimi case 1: {
1668*6467f958SSadaf Ebrahimi memset(&workgroup[wg_offset], 0, current_sbs * sizeof(Ty));
1669*6467f958SSadaf Ebrahimi int wi_id = (int)(genrand_int32(gMTdata) % (cl_uint)current_sbs);
1670*6467f958SSadaf Ebrahimi set_value(workgroup[wg_offset + wi_id], 41);
1671*6467f958SSadaf Ebrahimi }
1672*6467f958SSadaf Ebrahimi break;
1673*6467f958SSadaf Ebrahimi case 2:
1674*6467f958SSadaf Ebrahimi memset(&workgroup[wg_offset], 0xff, current_sbs * sizeof(Ty));
1675*6467f958SSadaf Ebrahimi break;
1676*6467f958SSadaf Ebrahimi }
1677*6467f958SSadaf Ebrahimi }
1678*6467f958SSadaf Ebrahimi
1679*6467f958SSadaf Ebrahimi struct RunTestForType
1680*6467f958SSadaf Ebrahimi {
1681*6467f958SSadaf Ebrahimi RunTestForType(cl_device_id device, cl_context context,
1682*6467f958SSadaf Ebrahimi cl_command_queue queue, int num_elements,
1683*6467f958SSadaf Ebrahimi WorkGroupParams test_params)
1684*6467f958SSadaf Ebrahimi : device_(device), context_(context), queue_(queue),
1685*6467f958SSadaf Ebrahimi num_elements_(num_elements), test_params_(test_params)
1686*6467f958SSadaf Ebrahimi {}
1687*6467f958SSadaf Ebrahimi template <typename T, typename U>
1688*6467f958SSadaf Ebrahimi int run_impl(const std::string &function_name)
1689*6467f958SSadaf Ebrahimi {
1690*6467f958SSadaf Ebrahimi int error = TEST_PASS;
1691*6467f958SSadaf Ebrahimi std::string source =
1692*6467f958SSadaf Ebrahimi std::regex_replace(test_params_.get_kernel_source(function_name),
1693*6467f958SSadaf Ebrahimi std::regex("\\%s"), function_name);
1694*6467f958SSadaf Ebrahimi std::string kernel_name = "test_" + function_name;
1695*6467f958SSadaf Ebrahimi error =
1696*6467f958SSadaf Ebrahimi test<T, U>::run(device_, context_, queue_, num_elements_,
1697*6467f958SSadaf Ebrahimi kernel_name.c_str(), source.c_str(), test_params_);
1698*6467f958SSadaf Ebrahimi
1699*6467f958SSadaf Ebrahimi // If we return TEST_SKIPPED_ITSELF here, then an entire suite may be
1700*6467f958SSadaf Ebrahimi // reported as having been skipped even if some tests within it
1701*6467f958SSadaf Ebrahimi // passed, as the status codes are erroneously ORed together:
1702*6467f958SSadaf Ebrahimi return error == TEST_FAIL ? TEST_FAIL : TEST_PASS;
1703*6467f958SSadaf Ebrahimi }
1704*6467f958SSadaf Ebrahimi
1705*6467f958SSadaf Ebrahimi private:
1706*6467f958SSadaf Ebrahimi cl_device_id device_;
1707*6467f958SSadaf Ebrahimi cl_context context_;
1708*6467f958SSadaf Ebrahimi cl_command_queue queue_;
1709*6467f958SSadaf Ebrahimi int num_elements_;
1710*6467f958SSadaf Ebrahimi WorkGroupParams test_params_;
1711*6467f958SSadaf Ebrahimi };
1712*6467f958SSadaf Ebrahimi
1713*6467f958SSadaf Ebrahimi #endif
1714