xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/commonfns/test_step.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2023 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include <stdio.h>
17 #include <string.h>
18 #include <sys/types.h>
19 #include <sys/stat.h>
20 
21 #include "harness/stringHelpers.h"
22 
23 #include "procs.h"
24 #include "test_base.h"
25 
26 const char *step_fn_code_pattern = "%s\n" /* optional pragma */
27                                    "__kernel void test_fn(__global %s%s *edge, "
28                                    "__global %s%s *x, __global %s%s *dst)\n"
29                                    "{\n"
30                                    "    int  tid = get_global_id(0);\n"
31                                    "    dst[tid] = step(edge[tid], x[tid]);\n"
32                                    "}\n";
33 
34 const char *step_fn_code_pattern_v3 =
35     "%s\n" /* optional pragma */
36     "__kernel void test_fn(__global %s *edge, __global %s *x, __global %s "
37     "*dst)\n"
38     "{\n"
39     "    int  tid = get_global_id(0);\n"
40     "    vstore3(step(vload3(tid,edge), vload3(tid,x)), tid, dst);\n"
41     "}\n";
42 
43 const char *step_fn_code_pattern_v3_scalar =
44     "%s\n" /* optional pragma */
45     "__kernel void test_fn(__global %s *edge, __global %s *x, __global %s "
46     "*dst)\n"
47     "{\n"
48     "    int  tid = get_global_id(0);\n"
49     "    vstore3(step(edge[tid], vload3(tid,x)), tid, dst);\n"
50     "}\n";
51 
52 namespace {
53 
54 template <typename T>
verify_step(const T * const inptrA,const T * const inptrB,const T * const outptr,const int n,const int veclen,const bool vecParam)55 int verify_step(const T *const inptrA, const T *const inptrB,
56                 const T *const outptr, const int n, const int veclen,
57                 const bool vecParam)
58 {
59     T r;
60 
61     if (vecParam)
62     {
63         for (int i = 0; i < n * veclen; i++)
64         {
65             r = (conv_to_dbl(inptrB[i]) < conv_to_dbl(inptrA[i])) ? 0.0 : 1.0;
66             if (r != conv_to_dbl(outptr[i])) return -1;
67         }
68     }
69     else
70     {
71         for (int i = 0; i < n;)
72         {
73             int ii = i / veclen;
74             for (int j = 0; j < veclen && i < n; ++j, ++i)
75             {
76                 r = (conv_to_dbl(inptrB[i]) < conv_to_dbl(inptrA[ii])) ? 0.0f
77                                                                        : 1.0f;
78                 if (r != conv_to_dbl(outptr[i]))
79                 {
80                     if (std::is_same<T, half>::value)
81                         log_error(
82                             "Failure @ {%d, element %d}: step(%a,%a) -> *%a "
83                             "vs %a\n",
84                             ii, j, conv_to_flt(inptrA[ii]),
85                             conv_to_flt(inptrB[i]), r, conv_to_flt(outptr[i]));
86                     else
87                         log_error(
88                             "Failure @ {%d, element %d}: step(%a,%a) -> *%a "
89                             "vs %a\n",
90                             ii, j, inptrA[ii], inptrB[i], r, outptr[i]);
91                     return -1;
92                 }
93             }
94         }
95     }
96     return 0;
97 }
98 
99 }
100 
101 template <typename T>
test_step_fn(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems,bool vecParam)102 int test_step_fn(cl_device_id device, cl_context context,
103                  cl_command_queue queue, int n_elems, bool vecParam)
104 {
105     clMemWrapper streams[3];
106     std::vector<T> input_ptr[2], output_ptr;
107 
108     std::vector<clProgramWrapper> programs;
109     std::vector<clKernelWrapper> kernels;
110 
111     int err, i;
112     MTdataHolder d = MTdataHolder(gRandomSeed);
113 
114     assert(BaseFunctionTest::type2name.find(sizeof(T))
115            != BaseFunctionTest::type2name.end());
116     auto tname = BaseFunctionTest::type2name[sizeof(T)];
117     int num_elements = n_elems * (1 << (kTotalVecCount - 1));
118 
119     programs.resize(kTotalVecCount);
120     kernels.resize(kTotalVecCount);
121 
122     for (i = 0; i < 2; i++) input_ptr[i].resize(num_elements);
123     output_ptr.resize(num_elements);
124 
125     for (i = 0; i < 3; i++)
126     {
127         streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
128                                     sizeof(T) * num_elements, NULL, &err);
129         test_error(err, "clCreateBuffer failed");
130     }
131 
132     std::string pragma_str;
133     if (std::is_same<T, float>::value)
134     {
135         for (i = 0; i < num_elements; i++)
136         {
137             input_ptr[0][i] = get_random_float(-0x40000000, 0x40000000, d);
138             input_ptr[1][i] = get_random_float(-0x40000000, 0x40000000, d);
139         }
140     }
141     else if (std::is_same<T, double>::value)
142     {
143         pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
144         for (i = 0; i < num_elements; i++)
145         {
146             input_ptr[0][i] = get_random_double(-0x40000000, 0x40000000, d);
147             input_ptr[1][i] = get_random_double(-0x40000000, 0x40000000, d);
148         }
149     }
150     else if (std::is_same<T, half>::value)
151     {
152         const float fval = CL_HALF_MAX;
153         pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
154         for (i = 0; i < num_elements; i++)
155         {
156             input_ptr[0][i] = conv_to_half(get_random_float(-fval, fval, d));
157             input_ptr[1][i] = conv_to_half(get_random_float(-fval, fval, d));
158         }
159     }
160 
161     for (i = 0; i < 2; i++)
162     {
163         err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0,
164                                    sizeof(T) * num_elements,
165                                    &input_ptr[i].front(), 0, NULL, NULL);
166         test_error(err, "Unable to write input buffer");
167     }
168 
169     char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" };
170 
171     for (i = 0; i < kTotalVecCount; i++)
172     {
173         std::string kernelSource;
174         if (i >= kVectorSizeCount)
175         {
176             if (vecParam)
177             {
178                 std::string str = step_fn_code_pattern_v3;
179                 kernelSource =
180                     str_sprintf(str, pragma_str.c_str(), tname.c_str(),
181                                 tname.c_str(), tname.c_str());
182             }
183             else
184             {
185                 std::string str = step_fn_code_pattern_v3_scalar;
186                 kernelSource =
187                     str_sprintf(str, pragma_str.c_str(), tname.c_str(),
188                                 tname.c_str(), tname.c_str());
189             }
190         }
191         else
192         {
193             // regular path
194             std::string str = step_fn_code_pattern;
195             kernelSource =
196                 str_sprintf(str, pragma_str.c_str(), tname.c_str(),
197                             vecParam ? vecSizeNames[i] : "", tname.c_str(),
198                             vecSizeNames[i], tname.c_str(), vecSizeNames[i]);
199         }
200         const char *programPtr = kernelSource.c_str();
201         err =
202             create_single_kernel_helper(context, &programs[i], &kernels[i], 1,
203                                         (const char **)&programPtr, "test_fn");
204         test_error(err, "Unable to create kernel");
205 
206         for (int j = 0; j < 3; j++)
207         {
208             err =
209                 clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]);
210             test_error(err, "Unable to set kernel argument");
211         }
212 
213         size_t threads = (size_t)n_elems;
214 
215         err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL,
216                                      0, NULL, NULL);
217         test_error(err, "Unable to execute kernel");
218 
219         err = clEnqueueReadBuffer(queue, streams[2], true, 0,
220                                   sizeof(T) * num_elements, &output_ptr[0], 0,
221                                   NULL, NULL);
222         test_error(err, "Unable to read results");
223 
224         err = verify_step(&input_ptr[0].front(), &input_ptr[1].front(),
225                           &output_ptr.front(), n_elems, g_arrVecSizes[i],
226                           vecParam);
227         if (err)
228         {
229             log_error("step %s%d%s test failed\n", tname.c_str(),
230                       ((g_arrVecSizes[i])),
231                       vecParam ? "" : std::string(", " + tname).c_str());
232             err = -1;
233         }
234         else
235         {
236             log_info("step %s%d%s test passed\n", tname.c_str(),
237                      ((g_arrVecSizes[i])),
238                      vecParam ? "" : std::string(", " + tname).c_str());
239             err = 0;
240         }
241 
242         if (err)
243             break;
244     }
245 
246     return err;
247 }
248 
Run()249 cl_int StepTest::Run()
250 {
251     cl_int error = CL_SUCCESS;
252     if (is_extension_available(device, "cl_khr_fp16"))
253     {
254         error = test_step_fn<half>(device, context, queue, num_elems, vecParam);
255         test_error(error, "StepTest::Run<cl_half> failed");
256     }
257 
258     error = test_step_fn<float>(device, context, queue, num_elems, vecParam);
259     test_error(error, "StepTest::Run<float> failed");
260 
261     if (is_extension_available(device, "cl_khr_fp64"))
262     {
263         error =
264             test_step_fn<double>(device, context, queue, num_elems, vecParam);
265         test_error(error, "StepTest::Run<double> failed");
266     }
267 
268     return error;
269 }
270 
test_step(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)271 int test_step(cl_device_id device, cl_context context, cl_command_queue queue,
272               int n_elems)
273 {
274     return MakeAndRunTest<StepTest>(device, context, queue, n_elems, "step",
275                                     true);
276 }
277 
test_stepf(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)278 int test_stepf(cl_device_id device, cl_context context, cl_command_queue queue,
279                int n_elems)
280 {
281     return MakeAndRunTest<StepTest>(device, context, queue, n_elems, "step",
282                                     false);
283 }
284