xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/integer_ops/test_integer_dot_product.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2021 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include <algorithm>
18 #include <limits>
19 #include <numeric>
20 #include <string>
21 #include <vector>
22 
23 #include "procs.h"
24 #include "harness/integer_ops_test_info.h"
25 #include "harness/testHarness.h"
26 
27 template <size_t N, typename DstType, typename SrcTypeA, typename SrcTypeB>
28 static void
calculate_reference(std::vector<DstType> & ref,const std::vector<SrcTypeA> & a,const std::vector<SrcTypeB> & b,const bool AccSat=false,const std::vector<DstType> & acc={})29 calculate_reference(std::vector<DstType>& ref, const std::vector<SrcTypeA>& a,
30                     const std::vector<SrcTypeB>& b, const bool AccSat = false,
31                     const std::vector<DstType>& acc = {})
32 {
33     assert(a.size() == b.size());
34     assert(AccSat == false || acc.size() == a.size() / N);
35 
36     ref.resize(a.size() / N);
37     for (size_t r = 0; r < ref.size(); r++)
38     {
39         cl_long result = AccSat ? acc[r] : 0;
40         for (size_t c = 0; c < N; c++)
41         {
42             // OK to assume no overflow?
43             result += a[r * N + c] * b[r * N + c];
44         }
45         if (AccSat && result > std::numeric_limits<DstType>::max())
46         {
47             result = std::numeric_limits<DstType>::max();
48         }
49         ref[r] = static_cast<DstType>(result);
50     }
51 }
52 
53 template <typename SrcTypeA, typename SrcTypeB>
generate_inputs_with_special_values(std::vector<SrcTypeA> & a,std::vector<SrcTypeB> & b)54 void generate_inputs_with_special_values(std::vector<SrcTypeA>& a,
55                                          std::vector<SrcTypeB>& b)
56 {
57     const std::vector<SrcTypeA> specialValuesA(
58         { static_cast<SrcTypeA>(std::numeric_limits<SrcTypeA>::min()),
59           static_cast<SrcTypeA>(std::numeric_limits<SrcTypeA>::min() + 1),
60           static_cast<SrcTypeA>(std::numeric_limits<SrcTypeA>::min() / 2), 0,
61           static_cast<SrcTypeA>(std::numeric_limits<SrcTypeA>::max() / 2),
62           static_cast<SrcTypeA>(std::numeric_limits<SrcTypeA>::max() - 1),
63           static_cast<SrcTypeA>(std::numeric_limits<SrcTypeA>::max()) });
64     const std::vector<SrcTypeB> specialValuesB(
65         { static_cast<SrcTypeB>(std::numeric_limits<SrcTypeB>::min()),
66           static_cast<SrcTypeB>(std::numeric_limits<SrcTypeB>::min() + 1),
67           static_cast<SrcTypeB>(std::numeric_limits<SrcTypeB>::min() / 2), 0,
68           static_cast<SrcTypeB>(std::numeric_limits<SrcTypeB>::max() / 2),
69           static_cast<SrcTypeB>(std::numeric_limits<SrcTypeB>::max() - 1),
70           static_cast<SrcTypeB>(std::numeric_limits<SrcTypeB>::max()) });
71 
72     size_t count = 0;
73     for (auto svA : specialValuesA)
74     {
75         for (auto svB : specialValuesB)
76         {
77             a[count] = svA;
78             b[count] = svB;
79             ++count;
80         }
81     }
82 
83     // Generate random data for the rest of the inputs:
84     MTdataHolder d(gRandomSeed);
85     generate_random_data(TestInfo<SrcTypeA>::explicitType, a.size() - count, d,
86                          a.data() + count);
87     generate_random_data(TestInfo<SrcTypeB>::explicitType, b.size() - count, d,
88                          b.data() + count);
89 }
90 
91 template <typename SrcType>
generate_acc_sat_inputs(std::vector<SrcType> & acc)92 void generate_acc_sat_inputs(std::vector<SrcType>& acc)
93 {
94     // First generate random data:
95     fill_vector_with_random_data(acc);
96 
97     // Now go through the generated data, and make every other element large.
98     // This ensures we have some elements that need saturation.
99     for (size_t i = 0; i < acc.size(); i += 2)
100     {
101         acc[i] = std::numeric_limits<SrcType>::max() - acc[i];
102     }
103 }
104 
105 template <typename T> struct PackedTestInfo
106 {
107     static constexpr const char* deviceTypeName = "UNSUPPORTED";
108 };
109 template <> struct PackedTestInfo<cl_char>
110 {
111     static constexpr const char* deviceTypeName = "int";
112 };
113 template <> struct PackedTestInfo<cl_uchar>
114 {
115     static constexpr const char* deviceTypeName = "uint";
116 };
117 
118 static constexpr const char* kernel_source_dot = R"CLC(
119 __kernel void test_dot(__global DSTTYPE* dst, __global SRCTYPEA* a, __global SRCTYPEB* b)
120 {
121     int index = get_global_id(0);
122     dst[index] = DOT(a[index], b[index]);
123 }
124 )CLC";
125 
126 static constexpr const char* kernel_source_dot_acc_sat = R"CLC(
127 __kernel void test_dot_acc_sat(
128     __global DSTTYPE* dst,
129     __global SRCTYPEA* a, __global SRCTYPEB* b, __global DSTTYPE* acc)
130 {
131     int index = get_global_id(0);
132     dst[index] = DOT_ACC_SAT(a[index], b[index], acc[index]);
133 }
134 )CLC";
135 
136 template <typename DstType, typename SrcTypeA, typename SrcTypeB, size_t N>
test_case_dot(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool packed,bool sat)137 static int test_case_dot(cl_device_id deviceID, cl_context context,
138                          cl_command_queue queue, int num_elements, bool packed,
139                          bool sat)
140 {
141     log_info("    testing %s = dot%s%s(%s, %s)\n",
142              std::numeric_limits<DstType>::is_signed ? "signed" : "unsigned",
143              sat ? "_acc_sat" : "", packed ? "_packed" : "",
144              std::numeric_limits<SrcTypeA>::is_signed ? "signed" : "unsigned",
145              std::numeric_limits<SrcTypeB>::is_signed ? "signed" : "unsigned");
146 
147     cl_int error = CL_SUCCESS;
148 
149     clProgramWrapper program;
150     clKernelWrapper kernel;
151 
152     std::string buildOptions;
153     buildOptions += " -DDSTTYPE=";
154     buildOptions += TestInfo<DstType>::deviceTypeName;
155     buildOptions += " -DSRCTYPEA=";
156     buildOptions += packed
157         ? PackedTestInfo<SrcTypeA>::deviceTypeName
158         : TestInfo<SrcTypeA>::deviceTypeName + std::to_string(N);
159     buildOptions += " -DSRCTYPEB=";
160     buildOptions += packed
161         ? PackedTestInfo<SrcTypeB>::deviceTypeName
162         : TestInfo<SrcTypeB>::deviceTypeName + std::to_string(N);
163     std::string packedSuffix;
164     packedSuffix += std::numeric_limits<SrcTypeA>::is_signed ? "s" : "u";
165     packedSuffix += std::numeric_limits<SrcTypeB>::is_signed ? "s" : "u";
166     packedSuffix += std::numeric_limits<DstType>::is_signed ? "_int" : "_uint";
167     if (sat)
168     {
169         buildOptions += packed
170             ? " -DDOT_ACC_SAT=dot_acc_sat_4x8packed_" + packedSuffix
171             : " -DDOT_ACC_SAT=dot_acc_sat";
172     }
173     else
174     {
175         buildOptions +=
176             packed ? " -DDOT=dot_4x8packed_" + packedSuffix : " -DDOT=dot";
177     }
178 
179     std::vector<SrcTypeA> a(N * num_elements);
180     std::vector<SrcTypeB> b(N * num_elements);
181     generate_inputs_with_special_values(a, b);
182 
183     std::vector<DstType> acc;
184     if (sat)
185     {
186         acc.resize(num_elements);
187         generate_acc_sat_inputs(acc);
188     }
189 
190     std::vector<DstType> reference(num_elements);
191     calculate_reference<N>(reference, a, b, sat, acc);
192 
193     const char* source = sat ? kernel_source_dot_acc_sat : kernel_source_dot;
194     const char* name = sat ? "test_dot_acc_sat" : "test_dot";
195     error = create_single_kernel_helper(context, &program, &kernel, 1, &source,
196                                         name, buildOptions.c_str());
197     test_error(error, "Unable to create test kernel");
198 
199     clMemWrapper dst = clCreateBuffer(
200         context, 0, reference.size() * sizeof(DstType), NULL, &error);
201     test_error(error, "Unable to create output buffer");
202 
203     clMemWrapper srcA =
204         clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
205                        a.size() * sizeof(SrcTypeA), a.data(), &error);
206     test_error(error, "Unable to create srcA buffer");
207 
208     clMemWrapper srcB =
209         clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
210                        b.size() * sizeof(SrcTypeB), b.data(), &error);
211     test_error(error, "Unable to create srcB buffer");
212 
213     clMemWrapper srcAcc;
214     if (sat)
215     {
216         srcAcc =
217             clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
218                            acc.size() * sizeof(DstType), acc.data(), &error);
219         test_error(error, "Unable to create acc buffer");
220     }
221 
222     error = clSetKernelArg(kernel, 0, sizeof(dst), &dst);
223     test_error(error, "Unable to set output buffer kernel arg");
224 
225     error = clSetKernelArg(kernel, 1, sizeof(srcA), &srcA);
226     test_error(error, "Unable to set srcA buffer kernel arg");
227 
228     error = clSetKernelArg(kernel, 2, sizeof(srcB), &srcB);
229     test_error(error, "Unable to set srcB buffer kernel arg");
230 
231     if (sat)
232     {
233         error = clSetKernelArg(kernel, 3, sizeof(srcAcc), &srcAcc);
234         test_error(error, "Unable to set acc buffer kernel arg");
235     }
236 
237     size_t global_work_size[] = { reference.size() };
238     error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
239                                    NULL, 0, NULL, NULL);
240     test_error(error, "Unable to enqueue test kernel");
241 
242     error = clFinish(queue);
243     test_error(error, "clFinish failed after test kernel");
244 
245     std::vector<DstType> results(reference.size(), 99);
246     error = clEnqueueReadBuffer(queue, dst, CL_TRUE, 0,
247                                 results.size() * sizeof(DstType),
248                                 results.data(), 0, NULL, NULL);
249     test_error(error, "Unable to read data after test kernel");
250 
251     if (results != reference)
252     {
253         log_error("Result buffer did not match reference buffer!\n");
254         return TEST_FAIL;
255     }
256 
257     return TEST_PASS;
258 }
259 
260 template <typename SrcType, typename DstType, size_t N>
test_vectype(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)261 static int test_vectype(cl_device_id deviceID, cl_context context,
262                         cl_command_queue queue, int num_elements)
263 {
264     int result = TEST_PASS;
265 
266     typedef typename std::make_signed<SrcType>::type SSrcType;
267     typedef typename std::make_signed<DstType>::type SDstType;
268 
269     typedef typename std::make_unsigned<SrcType>::type USrcType;
270     typedef typename std::make_unsigned<DstType>::type UDstType;
271 
272     // dot testing:
273     result |= test_case_dot<UDstType, USrcType, USrcType, N>(
274         deviceID, context, queue, num_elements, false, false);
275     result |= test_case_dot<SDstType, SSrcType, SSrcType, N>(
276         deviceID, context, queue, num_elements, false, false);
277     result |= test_case_dot<SDstType, USrcType, SSrcType, N>(
278         deviceID, context, queue, num_elements, false, false);
279     result |= test_case_dot<SDstType, SSrcType, USrcType, N>(
280         deviceID, context, queue, num_elements, false, false);
281 
282     // dot_acc_sat testing:
283     result |= test_case_dot<UDstType, USrcType, USrcType, N>(
284         deviceID, context, queue, num_elements, false, true);
285     result |= test_case_dot<SDstType, SSrcType, SSrcType, N>(
286         deviceID, context, queue, num_elements, false, true);
287     result |= test_case_dot<SDstType, USrcType, SSrcType, N>(
288         deviceID, context, queue, num_elements, false, true);
289     result |= test_case_dot<SDstType, SSrcType, USrcType, N>(
290         deviceID, context, queue, num_elements, false, true);
291 
292     return result;
293 }
294 
295 template <typename SrcType, typename DstType, size_t N>
test_vectype_packed(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)296 static int test_vectype_packed(cl_device_id deviceID, cl_context context,
297                                cl_command_queue queue, int num_elements)
298 {
299     int result = TEST_PASS;
300 
301     typedef typename std::make_signed<SrcType>::type SSrcType;
302     typedef typename std::make_signed<DstType>::type SDstType;
303 
304     typedef typename std::make_unsigned<SrcType>::type USrcType;
305     typedef typename std::make_unsigned<DstType>::type UDstType;
306 
307     // packed dot testing:
308     result |= test_case_dot<UDstType, USrcType, USrcType, N>(
309         deviceID, context, queue, num_elements, true, false);
310     result |= test_case_dot<SDstType, SSrcType, SSrcType, N>(
311         deviceID, context, queue, num_elements, true, false);
312     result |= test_case_dot<SDstType, USrcType, SSrcType, N>(
313         deviceID, context, queue, num_elements, true, false);
314     result |= test_case_dot<SDstType, SSrcType, USrcType, N>(
315         deviceID, context, queue, num_elements, true, false);
316 
317     // packed dot_acc_sat testing:
318     result |= test_case_dot<UDstType, USrcType, USrcType, N>(
319         deviceID, context, queue, num_elements, true, true);
320     result |= test_case_dot<SDstType, SSrcType, SSrcType, N>(
321         deviceID, context, queue, num_elements, true, true);
322     result |= test_case_dot<SDstType, USrcType, SSrcType, N>(
323         deviceID, context, queue, num_elements, true, true);
324     result |= test_case_dot<SDstType, SSrcType, USrcType, N>(
325         deviceID, context, queue, num_elements, true, true);
326 
327     return result;
328 }
329 
test_integer_dot_product(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)330 int test_integer_dot_product(cl_device_id deviceID, cl_context context,
331                              cl_command_queue queue, int num_elements)
332 {
333     if (!is_extension_available(deviceID, "cl_khr_integer_dot_product"))
334     {
335         log_info("cl_khr_integer_dot_product is not supported\n");
336         return TEST_SKIPPED_ITSELF;
337     }
338 
339     Version deviceVersion = get_device_cl_version(deviceID);
340     cl_version extensionVersion;
341 
342     if ((deviceVersion >= Version(3, 0))
343         || is_extension_available(deviceID, "cl_khr_extended_versioning"))
344     {
345         extensionVersion =
346             get_extension_version(deviceID, "cl_khr_integer_dot_product");
347     }
348     else
349     {
350         // Assume 1.0.0 is supported if the version can't be queried
351         extensionVersion = CL_MAKE_VERSION(1, 0, 0);
352     }
353 
354     cl_int error = CL_SUCCESS;
355     int result = TEST_PASS;
356 
357     cl_device_integer_dot_product_capabilities_khr dotCaps = 0;
358     error = clGetDeviceInfo(deviceID,
359                             CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR,
360                             sizeof(dotCaps), &dotCaps, NULL);
361     test_error(
362         error,
363         "Unable to query CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR");
364 
365     // Check that the required capabilities are reported
366     test_assert_error(
367         dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR,
368         "When cl_khr_integer_dot_product is supported "
369         "CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR must be "
370         "supported");
371 
372     if (extensionVersion >= CL_MAKE_VERSION(2, 0, 0))
373     {
374         test_assert_error(
375             dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR,
376             "When cl_khr_integer_dot_product is supported with version >= 2.0.0"
377             "CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR must be "
378             "supported");
379     }
380 
381     // Check that acceleration properties can be queried
382     if (extensionVersion >= CL_MAKE_VERSION(2, 0, 0))
383     {
384         size_t size_ret;
385         error = clGetDeviceInfo(
386             deviceID,
387             CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR, 0,
388             nullptr, &size_ret);
389         test_error(
390             error,
391             "Unable to query size of data returned by "
392             "CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR");
393 
394         cl_device_integer_dot_product_acceleration_properties_khr
395             accelerationProperties;
396         error = clGetDeviceInfo(
397             deviceID,
398             CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR,
399             sizeof(accelerationProperties), &accelerationProperties, nullptr);
400         test_error(error, "Unable to query 8-bit acceleration properties");
401 
402         error = clGetDeviceInfo(
403             deviceID,
404             CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR,
405             0, nullptr, &size_ret);
406         test_error(
407             error,
408             "Unable to query size of data returned by "
409             "CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_"
410             "PACKED_KHR");
411 
412         error = clGetDeviceInfo(
413             deviceID,
414             CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR,
415             sizeof(accelerationProperties), &accelerationProperties, nullptr);
416         test_error(error,
417                    "Unable to query 4x8-bit packed acceleration properties");
418     }
419 
420     // Report when unknown capabilities are found
421     if (dotCaps
422         & ~(CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR
423             | CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR))
424     {
425         log_info("NOTE: found an unknown / untested capability!\n");
426     }
427 
428     // Test built-in functions
429     if (dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR)
430     {
431         result |= test_vectype<cl_uchar, cl_uint, 4>(deviceID, context, queue,
432                                                      num_elements);
433     }
434 
435     if (dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR)
436     {
437         result |= test_vectype_packed<cl_uchar, cl_uint, 4>(
438             deviceID, context, queue, num_elements);
439     }
440 
441     return result;
442 }
443