1 //
2 // Copyright (c) 2023 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 #include "harness/rounding_mode.h"
18 #include "harness/stringHelpers.h"
19
20 #include <CL/cl_half.h>
21
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27
28 #include <algorithm>
29 #include <functional>
30 #include <map>
31 #include <string>
32 #include <vector>
33
34 #include "procs.h"
35
36 static const char *fp_kernel_code = R"(
37 %s
38 __kernel void test_fp(__global TYPE *srcA, __global TYPE *srcB, __global TYPE *dst)
39 {
40 int tid = get_global_id(0);
41
42 dst[tid] = srcA[tid] OP srcB[tid];
43 })";
44
45 extern cl_half_rounding_mode halfRoundingMode;
46
47 #define HFF(num) cl_half_from_float(num, halfRoundingMode)
48 #define HTF(num) cl_half_to_float(num)
49
toDouble(T val)50 template <typename T> double toDouble(T val)
51 {
52 if (std::is_same<cl_half, T>::value)
53 return HTF(val);
54 else
55 return val;
56 }
57
isHalfNan(cl_half v)58 bool isHalfNan(cl_half v)
59 {
60 // Extract FP16 exponent and mantissa
61 uint16_t h_exp = (v >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
62 uint16_t h_mant = v & 0x3FF;
63
64 // NaN test
65 return (h_exp == 0x1F && h_mant != 0);
66 }
67
half_plus(cl_half a,cl_half b)68 cl_half half_plus(cl_half a, cl_half b)
69 {
70 return HFF(std::plus<float>()(HTF(a), HTF(b)));
71 }
72
half_minus(cl_half a,cl_half b)73 cl_half half_minus(cl_half a, cl_half b)
74 {
75 return HFF(std::minus<float>()(HTF(a), HTF(b)));
76 }
77
half_mult(cl_half a,cl_half b)78 cl_half half_mult(cl_half a, cl_half b)
79 {
80 return HFF(std::multiplies<float>()(HTF(a), HTF(b)));
81 }
82
83 template <typename T> struct TestDef
84 {
85 const char op;
86 std::function<T(T, T)> ref;
87 std::string type_str;
88 size_t vec_size;
89 };
90
91 template <typename T>
verify_fp(std::vector<T> (& input)[2],std::vector<T> & output,const TestDef<T> & test)92 int verify_fp(std::vector<T> (&input)[2], std::vector<T> &output,
93 const TestDef<T> &test)
94 {
95 auto &inA = input[0];
96 auto &inB = input[1];
97 for (size_t i = 0; i < output.size(); i++)
98 {
99 bool nan_test = false;
100
101 T r = test.ref(inA[i], inB[i]);
102
103 if (std::is_same<T, cl_half>::value)
104 nan_test = !(isHalfNan(r) && isHalfNan(output[i]));
105
106 if (r != output[i] && nan_test)
107 {
108 log_error("FP math test for type: %s, vec size: %zu, failed at "
109 "index %zu, %a '%c' %a, expected %a, get %a\n",
110 test.type_str.c_str(), test.vec_size, i, toDouble(inA[i]),
111 test.op, toDouble(inB[i]), toDouble(r),
112 toDouble(output[i]));
113 return -1;
114 }
115 }
116
117 return 0;
118 }
119
generate_random_inputs(std::vector<T> (& input)[2])120 template <typename T> void generate_random_inputs(std::vector<T> (&input)[2])
121 {
122 RandomSeed seed(gRandomSeed);
123
124 if (std::is_same<T, float>::value)
125 {
126 auto random_generator = [&seed]() {
127 return get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31),
128 MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), seed);
129 };
130 for (auto &v : input)
131 std::generate(v.begin(), v.end(), random_generator);
132 }
133 else if (std::is_same<T, double>::value)
134 {
135 auto random_generator = [&seed]() {
136 return get_random_double(-MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63),
137 MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63),
138 seed);
139 };
140 for (auto &v : input)
141 std::generate(v.begin(), v.end(), random_generator);
142 }
143 else
144 {
145 auto random_generator = [&seed]() {
146 return HFF(get_random_float(-MAKE_HEX_FLOAT(0x1.0p8f, 0x1, 8),
147 MAKE_HEX_FLOAT(0x1.0p8f, 0x1, 8),
148 seed));
149 };
150 for (auto &v : input)
151 std::generate(v.begin(), v.end(), random_generator);
152 }
153 }
154
155 struct TypesIterator
156 {
157 using TypeIter = std::tuple<cl_float, cl_half, cl_double>;
158
TypesIteratorTypesIterator159 TypesIterator(cl_device_id deviceID, cl_context context,
160 cl_command_queue queue, int num_elems)
161 : context(context), queue(queue), fpConfigHalf(0), fpConfigFloat(0),
162 num_elements(num_elems)
163 {
164 // typeid().name one day
165 type2name[sizeof(cl_half)] = "half";
166 type2name[sizeof(cl_float)] = "float";
167 type2name[sizeof(cl_double)] = "double";
168
169 fp16Support = is_extension_available(deviceID, "cl_khr_fp16");
170 fp64Support = is_extension_available(deviceID, "cl_khr_fp64");
171
172 fpConfigFloat = get_default_rounding_mode(deviceID);
173
174 if (fp16Support)
175 fpConfigHalf =
176 get_default_rounding_mode(deviceID, CL_DEVICE_HALF_FP_CONFIG);
177
178 for_each_elem(it);
179 }
180
test_fpmathTypesIterator181 template <typename T> int test_fpmath(TestDef<T> &test)
182 {
183 constexpr size_t vecSizes[] = { 1, 2, 4, 8, 16 };
184 cl_int err = CL_SUCCESS;
185
186 std::ostringstream sstr;
187 if (std::is_same<T, double>::value)
188 sstr << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
189
190 if (std::is_same<T, cl_half>::value)
191 sstr << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
192
193 std::string program_source =
194 str_sprintf(std::string(fp_kernel_code), sstr.str().c_str());
195
196 for (unsigned i = 0; i < ARRAY_SIZE(vecSizes); i++)
197 {
198 test.vec_size = vecSizes[i];
199
200 std::ostringstream vecNameStr;
201 vecNameStr << test.type_str;
202 if (test.vec_size != 1) vecNameStr << test.vec_size;
203
204 clMemWrapper streams[3];
205 clProgramWrapper program;
206 clKernelWrapper kernel;
207
208 size_t length = sizeof(T) * num_elements * test.vec_size;
209
210 bool isRTZ = false;
211 RoundingMode oldMode = kDefaultRoundingMode;
212
213
214 // If we only support rtz mode
215 if (std::is_same<T, cl_half>::value)
216 {
217 if (CL_FP_ROUND_TO_ZERO == fpConfigHalf)
218 {
219 isRTZ = true;
220 oldMode = get_round();
221 }
222 }
223 else if (std::is_same<T, float>::value)
224 {
225 if (CL_FP_ROUND_TO_ZERO == fpConfigFloat)
226 {
227 isRTZ = true;
228 oldMode = get_round();
229 }
230 }
231
232 std::vector<T> inputs[]{
233 std::vector<T>(test.vec_size * num_elements),
234 std::vector<T>(test.vec_size * num_elements)
235 };
236 std::vector<T> output =
237 std::vector<T>(test.vec_size * num_elements);
238
239 generate_random_inputs<T>(inputs);
240
241 for (size_t i = 0; i < ARRAY_SIZE(streams); i++)
242 {
243 streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, length,
244 NULL, &err);
245 test_error(err, "clCreateBuffer failed.");
246 }
247 for (size_t i = 0; i < ARRAY_SIZE(inputs); i++)
248 {
249 err =
250 clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, length,
251 inputs[i].data(), 0, NULL, NULL);
252 test_error(err, "clEnqueueWriteBuffer failed.");
253 }
254
255 std::string build_options = "-DTYPE=";
256 build_options.append(vecNameStr.str())
257 .append(" -DOP=")
258 .append(1, test.op);
259
260 const char *ptr = program_source.c_str();
261 err =
262 create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
263 "test_fp", build_options.c_str());
264
265 test_error(err, "create_single_kernel_helper failed");
266
267 for (size_t i = 0; i < ARRAY_SIZE(streams); i++)
268 {
269 err =
270 clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]);
271 test_error(err, "clSetKernelArgs failed.");
272 }
273
274 size_t threads[] = { static_cast<size_t>(num_elements) };
275 err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL,
276 0, NULL, NULL);
277 test_error(err, "clEnqueueNDRangeKernel failed.");
278
279 err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length,
280 output.data(), 0, NULL, NULL);
281 test_error(err, "clEnqueueReadBuffer failed.");
282
283 if (isRTZ) set_round(kRoundTowardZero, kfloat);
284
285 err = verify_fp(inputs, output, test);
286
287 if (isRTZ) set_round(oldMode, kfloat);
288
289 test_error(err, "test verification failed");
290 log_info("FP '%c' '%s' test passed\n", test.op,
291 vecNameStr.str().c_str());
292 }
293
294 return err;
295 }
296
test_fpmath_commonTypesIterator297 template <typename T> int test_fpmath_common()
298 {
299 int err = TEST_PASS;
300 if (std::is_same<cl_half, T>::value)
301 {
302 TestDef<T> tests[] = { { '+', half_plus, type2name[sizeof(T)] },
303 { '-', half_minus, type2name[sizeof(T)] },
304 { '*', half_mult, type2name[sizeof(T)] } };
305 for (auto &test : tests) err |= test_fpmath<T>(test);
306 }
307 else
308 {
309 TestDef<T> tests[] = {
310 { '+', std::plus<T>(), type2name[sizeof(T)] },
311 { '-', std::minus<T>(), type2name[sizeof(T)] },
312 { '*', std::multiplies<T>(), type2name[sizeof(T)] }
313 };
314 for (auto &test : tests) err |= test_fpmath<T>(test);
315 }
316
317 return err;
318 }
319
skip_typeTypesIterator320 template <typename T> bool skip_type()
321 {
322 if (std::is_same<double, T>::value && !fp64Support)
323 return true;
324 else if (std::is_same<cl_half, T>::value && !fp16Support)
325 return true;
326 return false;
327 }
328
329 template <std::size_t Cnt = 0, typename Type>
iterate_typeTypesIterator330 void iterate_type(const Type &t)
331 {
332 bool doTest = !skip_type<Type>();
333
334 if (doTest)
335 {
336 if (test_fpmath_common<Type>())
337 {
338 throw std::runtime_error("test_fpmath_common failed\n");
339 }
340 }
341 }
342
343 template <std::size_t Cnt = 0, typename... Tp>
344 inline typename std::enable_if<Cnt == sizeof...(Tp), void>::type
for_each_elemTypesIterator345 for_each_elem(
346 const std::tuple<Tp...> &) // Unused arguments are given no names.
347 {}
348
349 template <std::size_t Cnt = 0, typename... Tp>
350 inline typename std::enable_if < Cnt<sizeof...(Tp), void>::type
351 for_each_elem(const std::tuple<Tp...> &t)
352 {
353 iterate_type<Cnt>(std::get<Cnt>(t));
354 for_each_elem<Cnt + 1, Tp...>(t);
355 }
356
357 protected:
358 TypeIter it;
359
360 cl_context context;
361 cl_command_queue queue;
362
363 cl_device_fp_config fpConfigHalf;
364 cl_device_fp_config fpConfigFloat;
365
366 bool fp16Support;
367 bool fp64Support;
368
369 int num_elements;
370 std::map<size_t, std::string> type2name;
371 };
372
test_fpmath(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)373 int test_fpmath(cl_device_id device, cl_context context, cl_command_queue queue,
374 int num_elements)
375 {
376 try
377 {
378 TypesIterator(device, context, queue, num_elements);
379 } catch (const std::runtime_error &e)
380 {
381 log_error("%s", e.what());
382 return TEST_FAIL;
383 }
384
385 return TEST_PASS;
386 }
387