xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/basic/test_barrier.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 
24 #include <algorithm>
25 #include <numeric>
26 #include <vector>
27 
28 #include "procs.h"
29 
30 namespace {
31 const char *barrier_kernel_code = R"(
32 __kernel void compute_sum(__global int *a, int n, __global int *tmp_sum,
33                           __global int *sum)
34 {
35     int tid = get_local_id(0);
36     int lsize = get_local_size(0);
37     int i;
38 
39     tmp_sum[tid] = 0;
40     for (i = tid; i < n; i += lsize) tmp_sum[tid] += a[i];
41 
42     // updated to work for any workgroup size
43     for (i = hadd(lsize, 1); lsize > 1; i = hadd(i, 1))
44     {
45         BARRIER(CLK_GLOBAL_MEM_FENCE);
46         if (tid + i < lsize) tmp_sum[tid] += tmp_sum[tid + i];
47         lsize = i;
48     }
49 
50     // no barrier is required here because last person to write to tmp_sum[0]
51     // was tid 0
52     if (tid == 0) *sum = tmp_sum[0];
53 }
54 )";
55 
56 
generate_random_inputs(std::vector<cl_int> & v)57 void generate_random_inputs(std::vector<cl_int> &v)
58 {
59     RandomSeed seed(gRandomSeed);
60 
61     auto random_generator = [&seed]() {
62         return static_cast<cl_int>(
63             get_random_float(-0x01000000, 0x01000000, seed));
64     };
65 
66     std::generate(v.begin(), v.end(), random_generator);
67 }
68 
test_barrier_common(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements,std::string barrier_str)69 int test_barrier_common(cl_device_id device, cl_context context,
70                         cl_command_queue queue, int num_elements,
71                         std::string barrier_str)
72 {
73     clMemWrapper streams[3];
74     clProgramWrapper program;
75     clKernelWrapper kernel;
76 
77     cl_int output;
78     int err;
79 
80     size_t max_threadgroup_size = 0;
81     std::string build_options = std::string("-DBARRIER=") + barrier_str;
82     err = create_single_kernel_helper(context, &program, &kernel, 1,
83                                       &barrier_kernel_code, "compute_sum",
84                                       build_options.c_str());
85     test_error(err, "Failed to build kernel/program.");
86 
87     err = get_max_allowed_1d_work_group_size_on_device(device, kernel,
88                                                        &max_threadgroup_size);
89     test_error(err, "get_max_allowed_1d_work_group_size_on_device failed.");
90 
91     // work group size must divide evenly into the global size
92     while (num_elements % max_threadgroup_size) max_threadgroup_size--;
93 
94     std::vector<cl_int> input(num_elements);
95 
96     streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
97                                 sizeof(cl_int) * num_elements, nullptr, &err);
98     test_error(err, "clCreateBuffer failed.");
99     streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int),
100                                 nullptr, &err);
101     test_error(err, "clCreateBuffer failed.");
102     streams[2] =
103         clCreateBuffer(context, CL_MEM_READ_WRITE,
104                        sizeof(cl_int) * max_threadgroup_size, nullptr, &err);
105     test_error(err, "clCreateBuffer failed.");
106 
107     generate_random_inputs(input);
108 
109     err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0,
110                                sizeof(cl_int) * num_elements, input.data(), 0,
111                                nullptr, nullptr);
112     test_error(err, "clEnqueueWriteBuffer failed.");
113 
114     err = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
115     err |= clSetKernelArg(kernel, 1, sizeof(num_elements), &num_elements);
116     err |= clSetKernelArg(kernel, 2, sizeof(streams[2]), &streams[2]);
117     err |= clSetKernelArg(kernel, 3, sizeof(streams[1]), &streams[1]);
118     test_error(err, "clSetKernelArg failed.");
119 
120     size_t global_threads[] = { max_threadgroup_size };
121     size_t local_threads[] = { max_threadgroup_size };
122 
123     err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, global_threads,
124                                  local_threads, 0, nullptr, nullptr);
125     test_error(err, "clEnqueueNDRangeKernel failed.");
126 
127     err = clEnqueueReadBuffer(queue, streams[1], true, 0, sizeof(cl_int),
128                               &output, 0, nullptr, nullptr);
129     test_error(err, "clEnqueueReadBuffer failed.");
130 
131     if (std::accumulate(input.begin(), input.end(), 0) != output)
132     {
133         log_error("%s test failed\n", barrier_str.c_str());
134         err = -1;
135     }
136     else
137     {
138         log_info("%s test passed\n", barrier_str.c_str());
139     }
140 
141     return err;
142 }
143 }
144 
test_barrier(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)145 int test_barrier(cl_device_id device, cl_context context,
146                  cl_command_queue queue, int num_elements)
147 {
148     return test_barrier_common(device, context, queue, num_elements, "barrier");
149 }
150 
test_wg_barrier(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)151 int test_wg_barrier(cl_device_id device, cl_context context,
152                     cl_command_queue queue, int num_elements)
153 {
154     return test_barrier_common(device, context, queue, num_elements,
155                                "work_group_barrier");
156 }
157