1 //
2 // Copyright (c) 2023 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include <stdio.h>
17 #include <string.h>
18 #include <sys/types.h>
19 #include <sys/stat.h>
20
21 #include "harness/stringHelpers.h"
22
23 #include "procs.h"
24 #include "test_base.h"
25
26
27 const char *mix_fn_code_pattern =
28 "%s\n" /* optional pragma */
29 "__kernel void test_fn(__global %s%s *x, __global %s%s *y, __global %s%s "
30 "*a, __global %s%s *dst)\n"
31 "{\n"
32 " int tid = get_global_id(0);\n"
33 " dst[tid] = mix(x[tid], y[tid], a[tid]);\n"
34 "}\n";
35
36 const char *mix_fn_code_pattern_v3 =
37 "%s\n" /* optional pragma */
38 "__kernel void test_fn(__global %s *x, __global %s *y, __global %s *a, "
39 "__global %s *dst)\n"
40 "{\n"
41 " int tid = get_global_id(0);\n"
42 "\n"
43 " vstore3(mix(vload3(tid, x), vload3(tid, y), vload3(tid, a)), tid, "
44 "dst);\n"
45 "}\n";
46
47 const char *mix_fn_code_pattern_v3_scalar =
48 "%s\n" /* optional pragma */
49 "__kernel void test_fn(__global %s *x, __global %s *y, __global %s *a, "
50 "__global %s *dst)\n"
51 "{\n"
52 " int tid = get_global_id(0);\n"
53 "\n"
54 " vstore3(mix(vload3(tid, x), vload3(tid, y), a[tid]), tid, dst);\n"
55 "}\n";
56
57 #define MAX_ERR 1e-3
58
59 namespace {
60
61 template <typename T>
verify_mix(const T * const inptrX,const T * const inptrY,const T * const inptrA,const T * const outptr,const int n,const int veclen,const bool vecParam)62 int verify_mix(const T *const inptrX, const T *const inptrY,
63 const T *const inptrA, const T *const outptr, const int n,
64 const int veclen, const bool vecParam)
65 {
66 double r, o;
67 float delta = 0.f, max_delta = 0.f;
68 int i;
69
70 if (vecParam)
71 {
72 for (i = 0; i < n * veclen; i++)
73 {
74 r = conv_to_dbl(inptrX[i])
75 + ((conv_to_dbl(inptrY[i]) - conv_to_dbl(inptrX[i]))
76 * conv_to_dbl(inptrA[i]));
77
78 o = conv_to_dbl(outptr[i]);
79 delta = fabs(double(r - o)) / r;
80 if (!std::is_same<T, half>::value)
81 {
82 if (delta > MAX_ERR)
83 {
84 log_error("%d) verification error: mix(%a, %a, %a) = *%a "
85 "vs. %a\n",
86 i, inptrX[i], inptrY[i], inptrA[i], r, outptr[i]);
87 return -1;
88 }
89 }
90 else
91 {
92 max_delta = std::max(max_delta, delta);
93 }
94 }
95 }
96 else
97 {
98 for (int i = 0; i < n; ++i)
99 {
100 int ii = i / veclen;
101 int vi = i * veclen;
102 for (int j = 0; j < veclen; ++j, ++vi)
103 {
104 r = conv_to_dbl(inptrX[vi])
105 + ((conv_to_dbl(inptrY[vi]) - conv_to_dbl(inptrX[vi]))
106 * conv_to_dbl(inptrA[i]));
107 delta = fabs(double(r - conv_to_dbl(outptr[vi]))) / r;
108 if (!std::is_same<T, half>::value)
109 {
110 if (delta > MAX_ERR)
111 {
112 log_error(
113 "{%d, element %d}) verification error: mix(%a, "
114 "%a, %a) = *%a vs. %a\n",
115 ii, j, inptrX[vi], inptrY[vi], inptrA[i], r,
116 outptr[vi]);
117 return -1;
118 }
119 }
120 else
121 {
122 max_delta = std::max(max_delta, delta);
123 }
124 }
125 }
126 }
127
128 // due to the fact that accuracy of mix for cl_khr_fp16 is implementation
129 // defined this test only reports maximum error without testing maximum
130 // error threshold
131 if (std::is_same<T, half>::value)
132 log_error("mix half verification result, max delta: %a\n", max_delta);
133
134 return 0;
135 }
136 } // namespace
137
138 template <typename T>
test_mix_fn(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems,bool vecParam)139 int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue,
140 int n_elems, bool vecParam)
141 {
142 clMemWrapper streams[4];
143 std::vector<T> input_ptr[3], output_ptr;
144
145 std::vector<clProgramWrapper> programs;
146 std::vector<clKernelWrapper> kernels;
147
148 int err, i;
149 MTdataHolder d(gRandomSeed);
150
151 assert(BaseFunctionTest::type2name.find(sizeof(T))
152 != BaseFunctionTest::type2name.end());
153 auto tname = BaseFunctionTest::type2name[sizeof(T)];
154
155 programs.resize(kTotalVecCount);
156 kernels.resize(kTotalVecCount);
157
158 int num_elements = n_elems * (1 << (kTotalVecCount - 1));
159
160
161 for (i = 0; i < 3; i++) input_ptr[i].resize(num_elements);
162 output_ptr.resize(num_elements);
163
164 for (i = 0; i < 4; i++)
165 {
166 streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
167 sizeof(T) * num_elements, NULL, &err);
168 test_error(err, "clCreateBuffer failed");
169 }
170
171 std::string pragma_str;
172 if (std::is_same<T, double>::value)
173 {
174 pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
175 }
176
177 if (std::is_same<T, half>::value)
178 {
179 pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
180 for (i = 0; i < num_elements; i++)
181 {
182 input_ptr[0][i] = conv_to_half((float)genrand_real1(d));
183 input_ptr[1][i] = conv_to_half((float)genrand_real1(d));
184 input_ptr[2][i] = conv_to_half((float)genrand_real1(d));
185 }
186 }
187 else
188 {
189 for (i = 0; i < num_elements; i++)
190 {
191 input_ptr[0][i] = (T)genrand_real1(d);
192 input_ptr[1][i] = (T)genrand_real1(d);
193 input_ptr[2][i] = (T)genrand_real1(d);
194 }
195 }
196
197 for (i = 0; i < 3; i++)
198 {
199 err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0,
200 sizeof(T) * num_elements,
201 &input_ptr[i].front(), 0, NULL, NULL);
202 test_error(err, "Unable to write input buffer");
203 }
204
205 char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" };
206 for (i = 0; i < kTotalVecCount; i++)
207 {
208 std::string kernelSource;
209 if (i >= kVectorSizeCount)
210 {
211 if (vecParam)
212 {
213 std::string str = mix_fn_code_pattern_v3;
214 kernelSource =
215 str_sprintf(str, pragma_str.c_str(), tname.c_str(),
216 tname.c_str(), tname.c_str(), tname.c_str());
217 }
218 else
219 {
220 std::string str = mix_fn_code_pattern_v3_scalar;
221 kernelSource =
222 str_sprintf(str, pragma_str.c_str(), tname.c_str(),
223 tname.c_str(), tname.c_str(), tname.c_str());
224 }
225 }
226 else
227 {
228 // regular path
229 std::string str = mix_fn_code_pattern;
230 kernelSource =
231 str_sprintf(str, pragma_str.c_str(), tname.c_str(),
232 vecSizeNames[i], tname.c_str(), vecSizeNames[i],
233 tname.c_str(), vecParam ? vecSizeNames[i] : "",
234 tname.c_str(), vecSizeNames[i]);
235 }
236 const char *programPtr = kernelSource.c_str();
237 err =
238 create_single_kernel_helper(context, &programs[i], &kernels[i], 1,
239 (const char **)&programPtr, "test_fn");
240 test_error(err, "Unable to create kernel");
241
242 for (int j = 0; j < 4; j++)
243 {
244 err =
245 clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]);
246 test_error(err, "Unable to set kernel argument");
247 }
248
249 size_t threads = (size_t)n_elems;
250
251 err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL,
252 0, NULL, NULL);
253 test_error(err, "Unable to execute kernel");
254
255 err = clEnqueueReadBuffer(queue, streams[3], true, 0,
256 sizeof(T) * num_elements, &output_ptr[0], 0,
257 NULL, NULL);
258 test_error(err, "Unable to read results");
259
260 if (verify_mix(&input_ptr[0].front(), &input_ptr[1].front(),
261 &input_ptr[2].front(), &output_ptr.front(), n_elems,
262 g_arrVecSizes[i], vecParam))
263 {
264 log_error("mix %s%d%s test failed\n", tname.c_str(),
265 ((g_arrVecSizes[i])),
266 vecParam ? "" : std::string(", " + tname).c_str());
267 err = -1;
268 }
269 else
270 {
271 log_info("mix %s%d%s test passed\n", tname.c_str(),
272 ((g_arrVecSizes[i])),
273 vecParam ? "" : std::string(", " + tname).c_str());
274 err = 0;
275 }
276
277 if (err) break;
278 }
279
280 return err;
281 }
282
Run()283 cl_int MixTest::Run()
284 {
285 cl_int error = CL_SUCCESS;
286 if (is_extension_available(device, "cl_khr_fp16"))
287 {
288 error = test_mix_fn<half>(device, context, queue, num_elems, vecParam);
289 test_error(error, "MixTest::Run<cl_half> failed");
290 }
291
292 error = test_mix_fn<float>(device, context, queue, num_elems, vecParam);
293 test_error(error, "MixTest::Run<float> failed");
294
295 if (is_extension_available(device, "cl_khr_fp64"))
296 {
297 error =
298 test_mix_fn<double>(device, context, queue, num_elems, vecParam);
299 test_error(error, "MixTest::Run<double> failed");
300 }
301
302 return error;
303 }
304
test_mix(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)305 int test_mix(cl_device_id device, cl_context context, cl_command_queue queue,
306 int n_elems)
307 {
308 return MakeAndRunTest<MixTest>(device, context, queue, n_elems, "mix",
309 true);
310 }
311
test_mixf(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)312 int test_mixf(cl_device_id device, cl_context context, cl_command_queue queue,
313 int n_elems)
314 {
315 return MakeAndRunTest<MixTest>(device, context, queue, n_elems, "mix",
316 false);
317 }
318