1 //
2 // Copyright (c) 2023 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include <stdio.h>
17 #include <string.h>
18 #include <sys/types.h>
19 #include <sys/stat.h>
20
21 #include "harness/stringHelpers.h"
22
23 #include "procs.h"
24 #include "test_base.h"
25
26 const char *smoothstep_fn_code_pattern =
27 "%s\n" /* optional pragma */
28 "__kernel void test_fn(__global %s%s *e0, __global %s%s *e1, __global %s%s "
29 "*x, __global %s%s *dst)\n"
30 "{\n"
31 " int tid = get_global_id(0);\n"
32 "\n"
33 " dst[tid] = smoothstep(e0[tid], e1[tid], x[tid]);\n"
34 "}\n";
35
36 const char *smoothstep_fn_code_pattern_v3 =
37 "%s\n" /* optional pragma */
38 "__kernel void test_fn(__global %s *e0, __global %s *e1, __global %s *x, "
39 "__global %s *dst)\n"
40 "{\n"
41 " int tid = get_global_id(0);\n"
42 "\n"
43 " vstore3(smoothstep(vload3(tid,e0), vload3(tid,e1), vload3(tid,x)), "
44 "tid, dst);\n"
45 "}\n";
46
47 const char *smoothstep_fn_code_pattern_v3_scalar =
48 "%s\n" /* optional pragma */
49 "__kernel void test_fn(__global %s *e0, __global %s *e1, __global %s *x, "
50 "__global %s *dst)\n"
51 "{\n"
52 " int tid = get_global_id(0);\n"
53 "\n"
54 " vstore3(smoothstep(e0[tid], e1[tid], vload3(tid,x)), tid, dst);\n"
55 "}\n";
56
57 #define MAX_ERR (1e-5f)
58
59 namespace {
60
61 template <typename T>
verify_smoothstep(const T * const edge0,const T * const edge1,const T * const x,const T * const outptr,const int n,const int veclen,const bool vecParam)62 int verify_smoothstep(const T *const edge0, const T *const edge1,
63 const T *const x, const T *const outptr, const int n,
64 const int veclen, const bool vecParam)
65 {
66 double r, t;
67 float delta = 0, max_delta = 0;
68
69 if (vecParam)
70 {
71 for (int i = 0; i < n * veclen; i++)
72 {
73 t = (conv_to_dbl(x[i]) - conv_to_dbl(edge0[i]))
74 / (conv_to_dbl(edge1[i]) - conv_to_dbl(edge0[i]));
75 if (t < 0.0)
76 t = 0.0;
77 else if (t > 1.0)
78 t = 1.0;
79 r = t * t * (3.0 - 2.0 * t);
80 delta = (float)fabs(r - conv_to_dbl(outptr[i]));
81 if (!std::is_same<T, half>::value)
82 {
83 if (delta > MAX_ERR)
84 {
85 log_error(
86 "%d) verification error: smoothstep(%a, %a, %a) = "
87 "*%a vs. %a\n",
88 i, x[i], edge0[i], edge1[i], r, outptr[i]);
89 return -1;
90 }
91 }
92 else
93 max_delta = std::max(max_delta, delta);
94 }
95 }
96 else
97 {
98 for (int i = 0; i < n; ++i)
99 {
100 int ii = i / veclen;
101 int vi = i * veclen;
102 for (int j = 0; j < veclen; ++j, ++vi)
103 {
104 t = (conv_to_dbl(x[vi]) - conv_to_dbl(edge0[i]))
105 / (conv_to_dbl(edge1[i]) - conv_to_dbl(edge0[i]));
106 if (t < 0.0)
107 t = 0.0;
108 else if (t > 1.0)
109 t = 1.0;
110 r = t * t * (3.0 - 2.0 * t);
111 delta = (float)fabs(r - conv_to_dbl(outptr[vi]));
112
113 if (!std::is_same<T, half>::value)
114 {
115 if (delta > MAX_ERR)
116 {
117 log_error("{%d, element %d}) verification error: "
118 "smoothstep(%a, %a, %a) = *%a vs. %a\n",
119 ii, j, x[vi], edge0[i], edge1[i], r,
120 outptr[vi]);
121 return -1;
122 }
123 }
124 else
125 max_delta = std::max(max_delta, delta);
126 }
127 }
128 }
129
130 // due to the fact that accuracy of smoothstep for cl_khr_fp16 is
131 // implementation defined this test only reports maximum error without
132 // testing maximum error threshold
133 if (std::is_same<T, half>::value)
134 log_error("smoothstep half verification result, max delta: %a\n",
135 max_delta);
136
137 return 0;
138 }
139
140 }
141
142 template <typename T>
test_smoothstep_fn(cl_device_id device,cl_context context,cl_command_queue queue,const int n_elems,const bool vecParam)143 int test_smoothstep_fn(cl_device_id device, cl_context context,
144 cl_command_queue queue, const int n_elems,
145 const bool vecParam)
146 {
147 clMemWrapper streams[4];
148 std::vector<T> input_ptr[3], output_ptr;
149
150 std::vector<clProgramWrapper> programs;
151 std::vector<clKernelWrapper> kernels;
152
153 int err, i;
154 MTdataHolder d = MTdataHolder(gRandomSeed);
155
156 assert(BaseFunctionTest::type2name.find(sizeof(T))
157 != BaseFunctionTest::type2name.end());
158 auto tname = BaseFunctionTest::type2name[sizeof(T)];
159
160 programs.resize(kTotalVecCount);
161 kernels.resize(kTotalVecCount);
162
163 int num_elements = n_elems * (1 << (kTotalVecCount - 1));
164
165 for (i = 0; i < 3; i++) input_ptr[i].resize(num_elements);
166 output_ptr.resize(num_elements);
167
168 for (i = 0; i < 4; i++)
169 {
170 streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
171 sizeof(T) * num_elements, NULL, &err);
172 test_error(err, "clCreateBuffer failed");
173 }
174
175 std::string pragma_str;
176 if (std::is_same<T, float>::value)
177 {
178 for (i = 0; i < num_elements; i++)
179 {
180 input_ptr[0][i] = get_random_float(-0x00200000, 0x00010000, d);
181 input_ptr[1][i] = get_random_float(input_ptr[0][i], 0x00200000, d);
182 input_ptr[2][i] = get_random_float(-0x20000000, 0x20000000, d);
183 }
184 }
185 else if (std::is_same<T, double>::value)
186 {
187 pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
188 for (i = 0; i < num_elements; i++)
189 {
190 input_ptr[0][i] = get_random_double(-0x00200000, 0x00010000, d);
191 input_ptr[1][i] = get_random_double(input_ptr[0][i], 0x00200000, d);
192 input_ptr[2][i] = get_random_double(-0x20000000, 0x20000000, d);
193 }
194 }
195 else if (std::is_same<T, half>::value)
196 {
197 pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
198 for (i = 0; i < num_elements; i++)
199 {
200 input_ptr[0][i] = conv_to_half(get_random_float(-65503, 65503, d));
201 input_ptr[1][i] = conv_to_half(
202 get_random_float(conv_to_flt(input_ptr[0][i]), 65503, d));
203 input_ptr[2][i] = conv_to_half(get_random_float(-65503, 65503, d));
204 }
205 }
206
207 for (i = 0; i < 3; i++)
208 {
209 err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0,
210 sizeof(T) * num_elements,
211 &input_ptr[i].front(), 0, NULL, NULL);
212 test_error(err, "Unable to write input buffer");
213 }
214
215 const char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" };
216
217 for (i = 0; i < kTotalVecCount; i++)
218 {
219 std::string kernelSource;
220 if (i >= kVectorSizeCount)
221 {
222 if (vecParam)
223 {
224 std::string str = smoothstep_fn_code_pattern_v3;
225 kernelSource =
226 str_sprintf(str, pragma_str.c_str(), tname.c_str(),
227 tname.c_str(), tname.c_str(), tname.c_str());
228 }
229 else
230 {
231 std::string str = smoothstep_fn_code_pattern_v3_scalar;
232 kernelSource =
233 str_sprintf(str, pragma_str.c_str(), tname.c_str(),
234 tname.c_str(), tname.c_str(), tname.c_str());
235 }
236 }
237 else
238 {
239 // regular path
240 std::string str = smoothstep_fn_code_pattern;
241 kernelSource =
242 str_sprintf(str, pragma_str.c_str(), tname.c_str(),
243 vecParam ? vecSizeNames[i] : "", tname.c_str(),
244 vecParam ? vecSizeNames[i] : "", tname.c_str(),
245 vecSizeNames[i], tname.c_str(), vecSizeNames[i]);
246 }
247
248 const char *programPtr = kernelSource.c_str();
249 err =
250 create_single_kernel_helper(context, &programs[i], &kernels[i], 1,
251 (const char **)&programPtr, "test_fn");
252 test_error(err, "Unable to create kernel");
253
254 for (int j = 0; j < 4; j++)
255 {
256 err =
257 clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]);
258 test_error(err, "Unable to set kernel argument");
259 }
260
261 size_t threads = (size_t)n_elems;
262
263 err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL,
264 0, NULL, NULL);
265 test_error(err, "Unable to execute kernel");
266
267 err = clEnqueueReadBuffer(queue, streams[3], true, 0,
268 sizeof(T) * num_elements, &output_ptr[0], 0,
269 NULL, NULL);
270 test_error(err, "Unable to read results");
271
272 if (verify_smoothstep((T *)&input_ptr[0].front(),
273 (T *)&input_ptr[1].front(),
274 (T *)&input_ptr[2].front(), &output_ptr[0],
275 n_elems, g_arrVecSizes[i], vecParam))
276 {
277 log_error("smoothstep %s%d%s test failed\n", tname.c_str(),
278 ((g_arrVecSizes[i])),
279 vecParam ? "" : std::string(", " + tname).c_str());
280 err = -1;
281 }
282 else
283 {
284 log_info("smoothstep %s%d%s test passed\n", tname.c_str(),
285 ((g_arrVecSizes[i])),
286 vecParam ? "" : std::string(", " + tname).c_str());
287 err = 0;
288 }
289
290 if (err) break;
291 }
292
293 return err;
294 }
295
Run()296 cl_int SmoothstepTest::Run()
297 {
298 cl_int error = CL_SUCCESS;
299 if (is_extension_available(device, "cl_khr_fp16"))
300 {
301 error = test_smoothstep_fn<half>(device, context, queue, num_elems,
302 vecParam);
303 test_error(error, "SmoothstepTest::Run<cl_half> failed");
304 }
305
306 error =
307 test_smoothstep_fn<float>(device, context, queue, num_elems, vecParam);
308 test_error(error, "SmoothstepTest::Run<float> failed");
309
310 if (is_extension_available(device, "cl_khr_fp64"))
311 {
312 error = test_smoothstep_fn<double>(device, context, queue, num_elems,
313 vecParam);
314 test_error(error, "SmoothstepTest::Run<double> failed");
315 }
316
317 return error;
318 }
319
test_smoothstep(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)320 int test_smoothstep(cl_device_id device, cl_context context,
321 cl_command_queue queue, int n_elems)
322 {
323 return MakeAndRunTest<SmoothstepTest>(device, context, queue, n_elems,
324 "smoothstep", true);
325 }
326
test_smoothstepf(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)327 int test_smoothstepf(cl_device_id device, cl_context context,
328 cl_command_queue queue, int n_elems)
329 {
330 return MakeAndRunTest<SmoothstepTest>(device, context, queue, n_elems,
331 "smoothstep", false);
332 }
333