xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/c11_atomics/test_atomics.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/testHarness.h"
17 #include "harness/kernelHelpers.h"
18 #include "harness/typeWrappers.h"
19 
20 #include "common.h"
21 #include "host_atomics.h"
22 
23 #include <sstream>
24 #include <vector>
25 
26 template <typename HostAtomicType, typename HostDataType>
27 class CBasicTestStore
28     : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
29 public:
30     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
31     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
32     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
33     using CBasicTestMemOrderScope<HostAtomicType,
34                                   HostDataType>::MemoryOrderScopeStr;
35     using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
CBasicTestStore(TExplicitAtomicType dataType,bool useSVM)36     CBasicTestStore(TExplicitAtomicType dataType, bool useSVM)
37         : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
38                                                                 useSVM)
39     {
40         OldValueCheck(false);
41     }
NumResults(cl_uint threadCount,cl_device_id deviceID)42     virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
43     {
44         return threadCount;
45     }
ExecuteSingleTest(cl_device_id deviceID,cl_context context,cl_command_queue queue)46     virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
47                                   cl_command_queue queue)
48     {
49         if (MemoryOrder() == MEMORY_ORDER_ACQUIRE
50             || MemoryOrder() == MEMORY_ORDER_ACQ_REL)
51             return 0; // skip test - not applicable
52 
53         if (CheckCapabilities(MemoryScope(), MemoryOrder())
54             == TEST_SKIPPED_ITSELF)
55             return 0; // skip test - not applicable
56 
57         return CBasicTestMemOrderScope<
58             HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
59                                                              queue);
60     }
ProgramCore()61     virtual std::string ProgramCore()
62     {
63         std::string memoryOrderScope = MemoryOrderScopeStr();
64         std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
65         return "  atomic_store" + postfix + "(&destMemory[tid], tid"
66             + memoryOrderScope + ");\n";
67     }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)68     virtual void HostFunction(cl_uint tid, cl_uint threadCount,
69                               volatile HostAtomicType *destMemory,
70                               HostDataType *oldValues)
71     {
72         host_atomic_store(&destMemory[tid], (HostDataType)tid, MemoryOrder());
73     }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)74     virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
75                                HostDataType *startRefValues,
76                                cl_uint whichDestValue)
77     {
78         expected = (HostDataType)whichDestValue;
79         return true;
80     }
81 };
82 
test_atomic_store_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)83 int test_atomic_store_generic(cl_device_id deviceID, cl_context context,
84                               cl_command_queue queue, int num_elements,
85                               bool useSVM)
86 {
87     int error = 0;
88     CBasicTestStore<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
89                                                         useSVM);
90     EXECUTE_TEST(error,
91                  test_int.Execute(deviceID, context, queue, num_elements));
92     CBasicTestStore<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
93                                                            useSVM);
94     EXECUTE_TEST(error,
95                  test_uint.Execute(deviceID, context, queue, num_elements));
96     CBasicTestStore<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
97                                                            useSVM);
98     EXECUTE_TEST(error,
99                  test_long.Execute(deviceID, context, queue, num_elements));
100     CBasicTestStore<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG,
101                                                               useSVM);
102     EXECUTE_TEST(error,
103                  test_ulong.Execute(deviceID, context, queue, num_elements));
104     CBasicTestStore<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT,
105                                                               useSVM);
106     EXECUTE_TEST(error,
107                  test_float.Execute(deviceID, context, queue, num_elements));
108     CBasicTestStore<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
109         TYPE_ATOMIC_DOUBLE, useSVM);
110     EXECUTE_TEST(error,
111                  test_double.Execute(deviceID, context, queue, num_elements));
112     if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
113     {
114         CBasicTestStore<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(
115             TYPE_ATOMIC_INTPTR_T, useSVM);
116         EXECUTE_TEST(
117             error,
118             test_intptr_t.Execute(deviceID, context, queue, num_elements));
119         CBasicTestStore<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
120             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
121         EXECUTE_TEST(
122             error,
123             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
124         CBasicTestStore<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
125             TYPE_ATOMIC_SIZE_T, useSVM);
126         EXECUTE_TEST(
127             error, test_size_t.Execute(deviceID, context, queue, num_elements));
128         CBasicTestStore<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
129             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
130         EXECUTE_TEST(
131             error,
132             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
133     }
134     else
135     {
136         CBasicTestStore<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(
137             TYPE_ATOMIC_INTPTR_T, useSVM);
138         EXECUTE_TEST(
139             error,
140             test_intptr_t.Execute(deviceID, context, queue, num_elements));
141         CBasicTestStore<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
142             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
143         EXECUTE_TEST(
144             error,
145             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
146         CBasicTestStore<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
147             TYPE_ATOMIC_SIZE_T, useSVM);
148         EXECUTE_TEST(
149             error, test_size_t.Execute(deviceID, context, queue, num_elements));
150         CBasicTestStore<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
151             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
152         EXECUTE_TEST(
153             error,
154             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
155     }
156     return error;
157 }
158 
test_atomic_store(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)159 int test_atomic_store(cl_device_id deviceID, cl_context context,
160                       cl_command_queue queue, int num_elements)
161 {
162     return test_atomic_store_generic(deviceID, context, queue, num_elements,
163                                      false);
164 }
165 
test_svm_atomic_store(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)166 int test_svm_atomic_store(cl_device_id deviceID, cl_context context,
167                           cl_command_queue queue, int num_elements)
168 {
169     return test_atomic_store_generic(deviceID, context, queue, num_elements,
170                                      true);
171 }
172 
173 template <typename HostAtomicType, typename HostDataType>
174 class CBasicTestInit : public CBasicTest<HostAtomicType, HostDataType> {
175 public:
176     using CBasicTest<HostAtomicType, HostDataType>::OldValueCheck;
CBasicTestInit(TExplicitAtomicType dataType,bool useSVM)177     CBasicTestInit(TExplicitAtomicType dataType, bool useSVM)
178         : CBasicTest<HostAtomicType, HostDataType>(dataType, useSVM)
179     {
180         OldValueCheck(false);
181     }
NumResults(cl_uint threadCount,cl_device_id deviceID)182     virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
183     {
184         return threadCount;
185     }
ProgramCore()186     virtual std::string ProgramCore()
187     {
188         return "  atomic_init(&destMemory[tid], tid);\n";
189     }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)190     virtual void HostFunction(cl_uint tid, cl_uint threadCount,
191                               volatile HostAtomicType *destMemory,
192                               HostDataType *oldValues)
193     {
194         host_atomic_init(&destMemory[tid], (HostDataType)tid);
195     }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)196     virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
197                                HostDataType *startRefValues,
198                                cl_uint whichDestValue)
199     {
200         expected = (HostDataType)whichDestValue;
201         return true;
202     }
203 };
204 
test_atomic_init_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)205 int test_atomic_init_generic(cl_device_id deviceID, cl_context context,
206                              cl_command_queue queue, int num_elements,
207                              bool useSVM)
208 {
209     int error = 0;
210     CBasicTestInit<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
211     EXECUTE_TEST(error,
212                  test_int.Execute(deviceID, context, queue, num_elements));
213     CBasicTestInit<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
214                                                           useSVM);
215     EXECUTE_TEST(error,
216                  test_uint.Execute(deviceID, context, queue, num_elements));
217     CBasicTestInit<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
218                                                           useSVM);
219     EXECUTE_TEST(error,
220                  test_long.Execute(deviceID, context, queue, num_elements));
221     CBasicTestInit<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG,
222                                                              useSVM);
223     EXECUTE_TEST(error,
224                  test_ulong.Execute(deviceID, context, queue, num_elements));
225     CBasicTestInit<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT,
226                                                              useSVM);
227     EXECUTE_TEST(error,
228                  test_float.Execute(deviceID, context, queue, num_elements));
229     CBasicTestInit<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
230         TYPE_ATOMIC_DOUBLE, useSVM);
231     EXECUTE_TEST(error,
232                  test_double.Execute(deviceID, context, queue, num_elements));
233     if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
234     {
235         CBasicTestInit<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(
236             TYPE_ATOMIC_INTPTR_T, useSVM);
237         EXECUTE_TEST(
238             error,
239             test_intptr_t.Execute(deviceID, context, queue, num_elements));
240         CBasicTestInit<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
241             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
242         EXECUTE_TEST(
243             error,
244             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
245         CBasicTestInit<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
246             TYPE_ATOMIC_SIZE_T, useSVM);
247         EXECUTE_TEST(
248             error, test_size_t.Execute(deviceID, context, queue, num_elements));
249         CBasicTestInit<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
250             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
251         EXECUTE_TEST(
252             error,
253             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
254     }
255     else
256     {
257         CBasicTestInit<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(
258             TYPE_ATOMIC_INTPTR_T, useSVM);
259         EXECUTE_TEST(
260             error,
261             test_intptr_t.Execute(deviceID, context, queue, num_elements));
262         CBasicTestInit<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
263             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
264         EXECUTE_TEST(
265             error,
266             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
267         CBasicTestInit<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
268             TYPE_ATOMIC_SIZE_T, useSVM);
269         EXECUTE_TEST(
270             error, test_size_t.Execute(deviceID, context, queue, num_elements));
271         CBasicTestInit<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
272             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
273         EXECUTE_TEST(
274             error,
275             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
276     }
277     return error;
278 }
279 
test_atomic_init(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)280 int test_atomic_init(cl_device_id deviceID, cl_context context,
281                      cl_command_queue queue, int num_elements)
282 {
283     return test_atomic_init_generic(deviceID, context, queue, num_elements,
284                                     false);
285 }
286 
test_svm_atomic_init(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)287 int test_svm_atomic_init(cl_device_id deviceID, cl_context context,
288                          cl_command_queue queue, int num_elements)
289 {
290     return test_atomic_init_generic(deviceID, context, queue, num_elements,
291                                     true);
292 }
293 
294 template <typename HostAtomicType, typename HostDataType>
295 class CBasicTestLoad
296     : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
297 public:
298     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
299     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
300     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
301     using CBasicTestMemOrderScope<HostAtomicType,
302                                   HostDataType>::MemoryOrderScopeStr;
303     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
304     using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
CBasicTestLoad(TExplicitAtomicType dataType,bool useSVM)305     CBasicTestLoad(TExplicitAtomicType dataType, bool useSVM)
306         : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
307                                                                 useSVM)
308     {
309         OldValueCheck(false);
310     }
NumResults(cl_uint threadCount,cl_device_id deviceID)311     virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
312     {
313         return threadCount;
314     }
ExecuteSingleTest(cl_device_id deviceID,cl_context context,cl_command_queue queue)315     virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
316                                   cl_command_queue queue)
317     {
318         if (MemoryOrder() == MEMORY_ORDER_RELEASE
319             || MemoryOrder() == MEMORY_ORDER_ACQ_REL)
320             return 0; // skip test - not applicable
321 
322         if (CheckCapabilities(MemoryScope(), MemoryOrder())
323             == TEST_SKIPPED_ITSELF)
324             return 0; // skip test - not applicable
325 
326         return CBasicTestMemOrderScope<
327             HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
328                                                              queue);
329     }
ProgramCore()330     virtual std::string ProgramCore()
331     {
332         // In the case this test is run with MEMORY_ORDER_ACQUIRE, the store
333         // should be MEMORY_ORDER_RELEASE
334         std::string memoryOrderScopeLoad = MemoryOrderScopeStr();
335         std::string memoryOrderScopeStore =
336             (MemoryOrder() == MEMORY_ORDER_ACQUIRE)
337             ? (", memory_order_release" + MemoryScopeStr())
338             : memoryOrderScopeLoad;
339         std::string postfix(memoryOrderScopeLoad.empty() ? "" : "_explicit");
340         return "  atomic_store" + postfix + "(&destMemory[tid], tid"
341             + memoryOrderScopeStore
342             + ");\n"
343               "  oldValues[tid] = atomic_load"
344             + postfix + "(&destMemory[tid]" + memoryOrderScopeLoad + ");\n";
345     }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)346     virtual void HostFunction(cl_uint tid, cl_uint threadCount,
347                               volatile HostAtomicType *destMemory,
348                               HostDataType *oldValues)
349     {
350         host_atomic_store(&destMemory[tid], (HostDataType)tid,
351                           MEMORY_ORDER_SEQ_CST);
352         oldValues[tid] = host_atomic_load<HostAtomicType, HostDataType>(
353             &destMemory[tid], MemoryOrder());
354     }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)355     virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
356                                HostDataType *startRefValues,
357                                cl_uint whichDestValue)
358     {
359         expected = (HostDataType)whichDestValue;
360         return true;
361     }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)362     virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
363                             HostDataType *refValues,
364                             HostAtomicType *finalValues)
365     {
366         correct = true;
367         for (cl_uint i = 0; i < threadCount; i++)
368         {
369             if (refValues[i] != (HostDataType)i)
370             {
371                 log_error("Invalid value for thread %u\n", (cl_uint)i);
372                 correct = false;
373                 return true;
374             }
375         }
376         return true;
377     }
378 };
379 
test_atomic_load_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)380 int test_atomic_load_generic(cl_device_id deviceID, cl_context context,
381                              cl_command_queue queue, int num_elements,
382                              bool useSVM)
383 {
384     int error = 0;
385     CBasicTestLoad<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
386     EXECUTE_TEST(error,
387                  test_int.Execute(deviceID, context, queue, num_elements));
388     CBasicTestLoad<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
389                                                           useSVM);
390     EXECUTE_TEST(error,
391                  test_uint.Execute(deviceID, context, queue, num_elements));
392     CBasicTestLoad<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
393                                                           useSVM);
394     EXECUTE_TEST(error,
395                  test_long.Execute(deviceID, context, queue, num_elements));
396     CBasicTestLoad<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG,
397                                                              useSVM);
398     EXECUTE_TEST(error,
399                  test_ulong.Execute(deviceID, context, queue, num_elements));
400     CBasicTestLoad<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT,
401                                                              useSVM);
402     EXECUTE_TEST(error,
403                  test_float.Execute(deviceID, context, queue, num_elements));
404     CBasicTestLoad<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
405         TYPE_ATOMIC_DOUBLE, useSVM);
406     EXECUTE_TEST(error,
407                  test_double.Execute(deviceID, context, queue, num_elements));
408     if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
409     {
410         CBasicTestLoad<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(
411             TYPE_ATOMIC_INTPTR_T, useSVM);
412         EXECUTE_TEST(
413             error,
414             test_intptr_t.Execute(deviceID, context, queue, num_elements));
415         CBasicTestLoad<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
416             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
417         EXECUTE_TEST(
418             error,
419             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
420         CBasicTestLoad<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
421             TYPE_ATOMIC_SIZE_T, useSVM);
422         EXECUTE_TEST(
423             error, test_size_t.Execute(deviceID, context, queue, num_elements));
424         CBasicTestLoad<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
425             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
426         EXECUTE_TEST(
427             error,
428             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
429     }
430     else
431     {
432         CBasicTestLoad<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(
433             TYPE_ATOMIC_INTPTR_T, useSVM);
434         EXECUTE_TEST(
435             error,
436             test_intptr_t.Execute(deviceID, context, queue, num_elements));
437         CBasicTestLoad<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
438             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
439         EXECUTE_TEST(
440             error,
441             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
442         CBasicTestLoad<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
443             TYPE_ATOMIC_SIZE_T, useSVM);
444         EXECUTE_TEST(
445             error, test_size_t.Execute(deviceID, context, queue, num_elements));
446         CBasicTestLoad<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
447             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
448         EXECUTE_TEST(
449             error,
450             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
451     }
452     return error;
453 }
454 
test_atomic_load(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)455 int test_atomic_load(cl_device_id deviceID, cl_context context,
456                      cl_command_queue queue, int num_elements)
457 {
458     return test_atomic_load_generic(deviceID, context, queue, num_elements,
459                                     false);
460 }
461 
test_svm_atomic_load(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)462 int test_svm_atomic_load(cl_device_id deviceID, cl_context context,
463                          cl_command_queue queue, int num_elements)
464 {
465     return test_atomic_load_generic(deviceID, context, queue, num_elements,
466                                     true);
467 }
468 
469 template <typename HostAtomicType, typename HostDataType>
470 class CBasicTestExchange
471     : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
472 public:
473     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
474     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
475     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
476     using CBasicTestMemOrderScope<HostAtomicType,
477                                   HostDataType>::MemoryOrderScopeStr;
478     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
479     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
CBasicTestExchange(TExplicitAtomicType dataType,bool useSVM)480     CBasicTestExchange(TExplicitAtomicType dataType, bool useSVM)
481         : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
482                                                                 useSVM)
483     {
484         StartValue(123456);
485     }
ProgramCore()486     virtual std::string ProgramCore()
487     {
488         std::string memoryOrderScope = MemoryOrderScopeStr();
489         std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
490         return "  oldValues[tid] = atomic_exchange" + postfix
491             + "(&destMemory[0], tid" + memoryOrderScope
492             + ");\n"
493               "  for(int i = 0; i < "
494             + IterationsStr()
495             + "; i++)\n"
496               "    oldValues[tid] = atomic_exchange"
497             + postfix + "(&destMemory[0], oldValues[tid]" + memoryOrderScope
498             + ");\n";
499     }
500 
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)501     virtual void HostFunction(cl_uint tid, cl_uint threadCount,
502                               volatile HostAtomicType *destMemory,
503                               HostDataType *oldValues)
504     {
505         oldValues[tid] = host_atomic_exchange(&destMemory[0], (HostDataType)tid,
506                                               MemoryOrder());
507         for (int i = 0; i < Iterations(); i++)
508             oldValues[tid] = host_atomic_exchange(
509                 &destMemory[0], oldValues[tid], MemoryOrder());
510     }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)511     virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
512                             HostDataType *refValues,
513                             HostAtomicType *finalValues)
514     {
515         OldValueCheck(
516             Iterations() % 2
517             == 0); // check is valid for even number of iterations only
518         correct = true;
519         /* We are expecting values from 0 to size-1 and initial value from
520          * atomic variable */
521         /* These values must be distributed across refValues array and atomic
522          * variable finalVaue[0] */
523         /* Any repeated value is treated as an error */
524         std::vector<bool> tidFound(threadCount);
525         bool startValueFound = false;
526         cl_uint i;
527 
528         for (i = 0; i <= threadCount; i++)
529         {
530             cl_uint value;
531             if (i == threadCount)
532                 value = (cl_uint)finalValues[0]; // additional value from atomic
533                                                  // variable (last written)
534             else
535                 value = (cl_uint)refValues[i];
536             if (value == (cl_uint)StartValue())
537             {
538                 // Special initial value
539                 if (startValueFound)
540                 {
541                     log_error("ERROR: Starting reference value (%u) occurred "
542                               "more thane once\n",
543                               (cl_uint)StartValue());
544                     correct = false;
545                     return true;
546                 }
547                 startValueFound = true;
548                 continue;
549             }
550             if (value >= threadCount)
551             {
552                 log_error(
553                     "ERROR: Reference value %u outside of valid range! (%u)\n",
554                     i, value);
555                 correct = false;
556                 return true;
557             }
558             if (tidFound[value])
559             {
560                 log_error("ERROR: Value (%u) occurred more thane once\n",
561                           value);
562                 correct = false;
563                 return true;
564             }
565             tidFound[value] = true;
566         }
567         return true;
568     }
569 };
570 
test_atomic_exchange_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)571 int test_atomic_exchange_generic(cl_device_id deviceID, cl_context context,
572                                  cl_command_queue queue, int num_elements,
573                                  bool useSVM)
574 {
575     int error = 0;
576     CBasicTestExchange<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
577                                                            useSVM);
578     EXECUTE_TEST(error,
579                  test_int.Execute(deviceID, context, queue, num_elements));
580     CBasicTestExchange<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
581                                                               useSVM);
582     EXECUTE_TEST(error,
583                  test_uint.Execute(deviceID, context, queue, num_elements));
584     CBasicTestExchange<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
585                                                               useSVM);
586     EXECUTE_TEST(error,
587                  test_long.Execute(deviceID, context, queue, num_elements));
588     CBasicTestExchange<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
589         TYPE_ATOMIC_ULONG, useSVM);
590     EXECUTE_TEST(error,
591                  test_ulong.Execute(deviceID, context, queue, num_elements));
592     CBasicTestExchange<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
593         TYPE_ATOMIC_FLOAT, useSVM);
594     EXECUTE_TEST(error,
595                  test_float.Execute(deviceID, context, queue, num_elements));
596     CBasicTestExchange<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
597         TYPE_ATOMIC_DOUBLE, useSVM);
598     EXECUTE_TEST(error,
599                  test_double.Execute(deviceID, context, queue, num_elements));
600     if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
601     {
602         CBasicTestExchange<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
603             test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
604         EXECUTE_TEST(
605             error,
606             test_intptr_t.Execute(deviceID, context, queue, num_elements));
607         CBasicTestExchange<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
608             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
609         EXECUTE_TEST(
610             error,
611             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
612         CBasicTestExchange<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
613             TYPE_ATOMIC_SIZE_T, useSVM);
614         EXECUTE_TEST(
615             error, test_size_t.Execute(deviceID, context, queue, num_elements));
616         CBasicTestExchange<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
617             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
618         EXECUTE_TEST(
619             error,
620             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
621     }
622     else
623     {
624         CBasicTestExchange<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
625             test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
626         EXECUTE_TEST(
627             error,
628             test_intptr_t.Execute(deviceID, context, queue, num_elements));
629         CBasicTestExchange<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
630             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
631         EXECUTE_TEST(
632             error,
633             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
634         CBasicTestExchange<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
635             TYPE_ATOMIC_SIZE_T, useSVM);
636         EXECUTE_TEST(
637             error, test_size_t.Execute(deviceID, context, queue, num_elements));
638         CBasicTestExchange<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
639             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
640         EXECUTE_TEST(
641             error,
642             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
643     }
644     return error;
645 }
646 
test_atomic_exchange(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)647 int test_atomic_exchange(cl_device_id deviceID, cl_context context,
648                          cl_command_queue queue, int num_elements)
649 {
650     return test_atomic_exchange_generic(deviceID, context, queue, num_elements,
651                                         false);
652 }
653 
test_svm_atomic_exchange(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)654 int test_svm_atomic_exchange(cl_device_id deviceID, cl_context context,
655                              cl_command_queue queue, int num_elements)
656 {
657     return test_atomic_exchange_generic(deviceID, context, queue, num_elements,
658                                         true);
659 }
660 
661 template <typename HostAtomicType, typename HostDataType>
662 class CBasicTestCompareStrong
663     : public CBasicTestMemOrder2Scope<HostAtomicType, HostDataType> {
664 public:
665     using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::StartValue;
666     using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::OldValueCheck;
667     using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder;
668     using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder2;
669     using CBasicTestMemOrder2Scope<HostAtomicType,
670                                    HostDataType>::MemoryOrderScope;
671     using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryScope;
672     using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::DataType;
673     using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::Iterations;
674     using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::IterationsStr;
675     using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
CBasicTestCompareStrong(TExplicitAtomicType dataType,bool useSVM)676     CBasicTestCompareStrong(TExplicitAtomicType dataType, bool useSVM)
677         : CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>(dataType,
678                                                                  useSVM)
679     {
680         StartValue(123456);
681         OldValueCheck(false);
682     }
ExecuteSingleTest(cl_device_id deviceID,cl_context context,cl_command_queue queue)683     virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
684                                   cl_command_queue queue)
685     {
686         if (MemoryOrder2() == MEMORY_ORDER_RELEASE
687             || MemoryOrder2() == MEMORY_ORDER_ACQ_REL)
688             return 0; // not allowed as 'failure' argument
689         if ((MemoryOrder() == MEMORY_ORDER_RELAXED
690              && MemoryOrder2() != MEMORY_ORDER_RELAXED)
691             || (MemoryOrder() != MEMORY_ORDER_SEQ_CST
692                 && MemoryOrder2() == MEMORY_ORDER_SEQ_CST))
693             return 0; // failure argument shall be no stronger than the success
694 
695         if (CheckCapabilities(MemoryScope(), MemoryOrder())
696             == TEST_SKIPPED_ITSELF)
697             return 0; // skip test - not applicable
698 
699         if (CheckCapabilities(MemoryScope(), MemoryOrder2())
700             == TEST_SKIPPED_ITSELF)
701             return 0; // skip test - not applicable
702 
703         return CBasicTestMemOrder2Scope<
704             HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
705                                                              queue);
706     }
ProgramCore()707     virtual std::string ProgramCore()
708     {
709         std::string memoryOrderScope = MemoryOrderScope();
710         std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
711         return std::string("  ") + DataType().RegularTypeName()
712             + " expected, previous;\n"
713               "  int successCount = 0;\n"
714               "  oldValues[tid] = tid;\n"
715               "  expected = tid;  // force failure at the beginning\n"
716               "  if(atomic_compare_exchange_strong"
717             + postfix + "(&destMemory[0], &expected, oldValues[tid]"
718             + memoryOrderScope
719             + ") || expected == tid)\n"
720               "    oldValues[tid] = threadCount+1; //mark unexpected success "
721               "with invalid value\n"
722               "  else\n"
723               "  {\n"
724               "    for(int i = 0; i < "
725             + IterationsStr()
726             + " || successCount == 0; i++)\n"
727               "    {\n"
728               "      previous = expected;\n"
729               "      if(atomic_compare_exchange_strong"
730             + postfix + "(&destMemory[0], &expected, oldValues[tid]"
731             + memoryOrderScope
732             + "))\n"
733               "      {\n"
734               "        oldValues[tid] = expected;\n"
735               "        successCount++;\n"
736               "      }\n"
737               "      else\n"
738               "      {\n"
739               "        if(previous == expected) // spurious failure - "
740               "shouldn't occur for 'strong'\n"
741               "        {\n"
742               "          oldValues[tid] = threadCount; //mark fail with "
743               "invalid value\n"
744               "          break;\n"
745               "        }\n"
746               "      }\n"
747               "    }\n"
748               "  }\n";
749     }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)750     virtual void HostFunction(cl_uint tid, cl_uint threadCount,
751                               volatile HostAtomicType *destMemory,
752                               HostDataType *oldValues)
753     {
754         HostDataType expected = (HostDataType)StartValue(), previous;
755         oldValues[tid] = (HostDataType)tid;
756         for (int i = 0; i < Iterations(); i++)
757         {
758             previous = expected;
759             if (host_atomic_compare_exchange(&destMemory[0], &expected,
760                                              oldValues[tid], MemoryOrder(),
761                                              MemoryOrder2()))
762                 oldValues[tid] = expected;
763             else
764             {
765                 if (previous == expected) // shouldn't occur for 'strong'
766                 {
767                     oldValues[tid] = threadCount; // mark fail with invalid
768                                                   // value
769                 }
770             }
771         }
772     }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)773     virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
774                             HostDataType *refValues,
775                             HostAtomicType *finalValues)
776     {
777         correct = true;
778         /* We are expecting values from 0 to size-1 and initial value from
779          * atomic variable */
780         /* These values must be distributed across refValues array and atomic
781          * variable finalVaue[0] */
782         /* Any repeated value is treated as an error */
783         std::vector<bool> tidFound(threadCount);
784         bool startValueFound = false;
785         cl_uint i;
786 
787         for (i = 0; i <= threadCount; i++)
788         {
789             cl_uint value;
790             if (i == threadCount)
791                 value = (cl_uint)finalValues[0]; // additional value from atomic
792                                                  // variable (last written)
793             else
794                 value = (cl_uint)refValues[i];
795             if (value == (cl_uint)StartValue())
796             {
797                 // Special initial value
798                 if (startValueFound)
799                 {
800                     log_error("ERROR: Starting reference value (%u) occurred "
801                               "more thane once\n",
802                               (cl_uint)StartValue());
803                     correct = false;
804                     return true;
805                 }
806                 startValueFound = true;
807                 continue;
808             }
809             if (value >= threadCount)
810             {
811                 if (value == threadCount)
812                     log_error("ERROR: Spurious failure detected for "
813                               "atomic_compare_exchange_strong\n");
814                 log_error(
815                     "ERROR: Reference value %u outside of valid range! (%u)\n",
816                     i, value);
817                 correct = false;
818                 return true;
819             }
820             if (tidFound[value])
821             {
822                 log_error("ERROR: Value (%u) occurred more thane once\n",
823                           value);
824                 correct = false;
825                 return true;
826             }
827             tidFound[value] = true;
828         }
829         return true;
830     }
831 };
832 
test_atomic_compare_exchange_strong_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)833 int test_atomic_compare_exchange_strong_generic(cl_device_id deviceID,
834                                                 cl_context context,
835                                                 cl_command_queue queue,
836                                                 int num_elements, bool useSVM)
837 {
838     int error = 0;
839     CBasicTestCompareStrong<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
840                                                                 useSVM);
841     EXECUTE_TEST(error,
842                  test_int.Execute(deviceID, context, queue, num_elements));
843     CBasicTestCompareStrong<HOST_ATOMIC_UINT, HOST_UINT> test_uint(
844         TYPE_ATOMIC_UINT, useSVM);
845     EXECUTE_TEST(error,
846                  test_uint.Execute(deviceID, context, queue, num_elements));
847     CBasicTestCompareStrong<HOST_ATOMIC_LONG, HOST_LONG> test_long(
848         TYPE_ATOMIC_LONG, useSVM);
849     EXECUTE_TEST(error,
850                  test_long.Execute(deviceID, context, queue, num_elements));
851     CBasicTestCompareStrong<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
852         TYPE_ATOMIC_ULONG, useSVM);
853     EXECUTE_TEST(error,
854                  test_ulong.Execute(deviceID, context, queue, num_elements));
855     if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
856     {
857         CBasicTestCompareStrong<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
858             test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
859         EXECUTE_TEST(
860             error,
861             test_intptr_t.Execute(deviceID, context, queue, num_elements));
862         CBasicTestCompareStrong<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
863             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
864         EXECUTE_TEST(
865             error,
866             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
867         CBasicTestCompareStrong<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32>
868             test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
869         EXECUTE_TEST(
870             error, test_size_t.Execute(deviceID, context, queue, num_elements));
871         CBasicTestCompareStrong<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
872             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
873         EXECUTE_TEST(
874             error,
875             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
876     }
877     else
878     {
879         CBasicTestCompareStrong<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
880             test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
881         EXECUTE_TEST(
882             error,
883             test_intptr_t.Execute(deviceID, context, queue, num_elements));
884         CBasicTestCompareStrong<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
885             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
886         EXECUTE_TEST(
887             error,
888             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
889         CBasicTestCompareStrong<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64>
890             test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
891         EXECUTE_TEST(
892             error, test_size_t.Execute(deviceID, context, queue, num_elements));
893         CBasicTestCompareStrong<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
894             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
895         EXECUTE_TEST(
896             error,
897             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
898     }
899     return error;
900 }
901 
test_atomic_compare_exchange_strong(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)902 int test_atomic_compare_exchange_strong(cl_device_id deviceID,
903                                         cl_context context,
904                                         cl_command_queue queue,
905                                         int num_elements)
906 {
907     return test_atomic_compare_exchange_strong_generic(deviceID, context, queue,
908                                                        num_elements, false);
909 }
910 
test_svm_atomic_compare_exchange_strong(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)911 int test_svm_atomic_compare_exchange_strong(cl_device_id deviceID,
912                                             cl_context context,
913                                             cl_command_queue queue,
914                                             int num_elements)
915 {
916     return test_atomic_compare_exchange_strong_generic(deviceID, context, queue,
917                                                        num_elements, true);
918 }
919 
920 template <typename HostAtomicType, typename HostDataType>
921 class CBasicTestCompareWeak
922     : public CBasicTestCompareStrong<HostAtomicType, HostDataType> {
923 public:
924     using CBasicTestCompareStrong<HostAtomicType, HostDataType>::StartValue;
925     using CBasicTestCompareStrong<HostAtomicType,
926                                   HostDataType>::MemoryOrderScope;
927     using CBasicTestCompareStrong<HostAtomicType, HostDataType>::DataType;
928     using CBasicTestCompareStrong<HostAtomicType, HostDataType>::Iterations;
929     using CBasicTestCompareStrong<HostAtomicType, HostDataType>::IterationsStr;
CBasicTestCompareWeak(TExplicitAtomicType dataType,bool useSVM)930     CBasicTestCompareWeak(TExplicitAtomicType dataType, bool useSVM)
931         : CBasicTestCompareStrong<HostAtomicType, HostDataType>(dataType,
932                                                                 useSVM)
933     {}
ProgramCore()934     virtual std::string ProgramCore()
935     {
936         std::string memoryOrderScope = MemoryOrderScope();
937         std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
938         return std::string("  ") + DataType().RegularTypeName()
939             + " expected , previous;\n"
940               "  int successCount = 0;\n"
941               "  oldValues[tid] = tid;\n"
942               "  expected = tid;  // force failure at the beginning\n"
943               "  if(atomic_compare_exchange_weak"
944             + postfix + "(&destMemory[0], &expected, oldValues[tid]"
945             + memoryOrderScope
946             + ") || expected == tid)\n"
947               "    oldValues[tid] = threadCount+1; //mark unexpected success "
948               "with invalid value\n"
949               "  else\n"
950               "  {\n"
951               "    for(int i = 0; i < "
952             + IterationsStr()
953             + " || successCount == 0; i++)\n"
954               "    {\n"
955               "      previous = expected;\n"
956               "      if(atomic_compare_exchange_weak"
957             + postfix + "(&destMemory[0], &expected, oldValues[tid]"
958             + memoryOrderScope
959             + "))\n"
960               "      {\n"
961               "        oldValues[tid] = expected;\n"
962               "        successCount++;\n"
963               "      }\n"
964               "    }\n"
965               "  }\n";
966     }
967 };
968 
test_atomic_compare_exchange_weak_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)969 int test_atomic_compare_exchange_weak_generic(cl_device_id deviceID,
970                                               cl_context context,
971                                               cl_command_queue queue,
972                                               int num_elements, bool useSVM)
973 {
974     int error = 0;
975     CBasicTestCompareWeak<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
976                                                               useSVM);
977     EXECUTE_TEST(error,
978                  test_int.Execute(deviceID, context, queue, num_elements));
979     CBasicTestCompareWeak<HOST_ATOMIC_UINT, HOST_UINT> test_uint(
980         TYPE_ATOMIC_UINT, useSVM);
981     EXECUTE_TEST(error,
982                  test_uint.Execute(deviceID, context, queue, num_elements));
983     CBasicTestCompareWeak<HOST_ATOMIC_LONG, HOST_LONG> test_long(
984         TYPE_ATOMIC_LONG, useSVM);
985     EXECUTE_TEST(error,
986                  test_long.Execute(deviceID, context, queue, num_elements));
987     CBasicTestCompareWeak<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
988         TYPE_ATOMIC_ULONG, useSVM);
989     EXECUTE_TEST(error,
990                  test_ulong.Execute(deviceID, context, queue, num_elements));
991     if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
992     {
993         CBasicTestCompareWeak<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
994             test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
995         EXECUTE_TEST(
996             error,
997             test_intptr_t.Execute(deviceID, context, queue, num_elements));
998         CBasicTestCompareWeak<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
999             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1000         EXECUTE_TEST(
1001             error,
1002             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1003         CBasicTestCompareWeak<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
1004             TYPE_ATOMIC_SIZE_T, useSVM);
1005         EXECUTE_TEST(
1006             error, test_size_t.Execute(deviceID, context, queue, num_elements));
1007         CBasicTestCompareWeak<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
1008             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1009         EXECUTE_TEST(
1010             error,
1011             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1012     }
1013     else
1014     {
1015         CBasicTestCompareWeak<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
1016             test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1017         EXECUTE_TEST(
1018             error,
1019             test_intptr_t.Execute(deviceID, context, queue, num_elements));
1020         CBasicTestCompareWeak<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
1021             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1022         EXECUTE_TEST(
1023             error,
1024             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1025         CBasicTestCompareWeak<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
1026             TYPE_ATOMIC_SIZE_T, useSVM);
1027         EXECUTE_TEST(
1028             error, test_size_t.Execute(deviceID, context, queue, num_elements));
1029         CBasicTestCompareWeak<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
1030             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1031         EXECUTE_TEST(
1032             error,
1033             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1034     }
1035     return error;
1036 }
1037 
test_atomic_compare_exchange_weak(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1038 int test_atomic_compare_exchange_weak(cl_device_id deviceID, cl_context context,
1039                                       cl_command_queue queue, int num_elements)
1040 {
1041     return test_atomic_compare_exchange_weak_generic(deviceID, context, queue,
1042                                                      num_elements, false);
1043 }
1044 
test_svm_atomic_compare_exchange_weak(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1045 int test_svm_atomic_compare_exchange_weak(cl_device_id deviceID,
1046                                           cl_context context,
1047                                           cl_command_queue queue,
1048                                           int num_elements)
1049 {
1050     return test_atomic_compare_exchange_weak_generic(deviceID, context, queue,
1051                                                      num_elements, true);
1052 }
1053 
1054 template <typename HostAtomicType, typename HostDataType>
1055 class CBasicTestFetchAdd
1056     : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
1057 public:
1058     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1059     using CBasicTestMemOrderScope<HostAtomicType,
1060                                   HostDataType>::MemoryOrderScopeStr;
1061     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1062     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
CBasicTestFetchAdd(TExplicitAtomicType dataType,bool useSVM)1063     CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM)
1064         : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
1065                                                                 useSVM)
1066     {}
ProgramCore()1067     virtual std::string ProgramCore()
1068     {
1069         std::string memoryOrderScope = MemoryOrderScopeStr();
1070         std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1071         return "  oldValues[tid] = atomic_fetch_add" + postfix
1072             + "(&destMemory[0], (" + DataType().AddSubOperandTypeName()
1073             + ")tid + 3" + memoryOrderScope + ");\n" + "  atomic_fetch_add"
1074             + postfix + "(&destMemory[0], ("
1075             + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope
1076             + ");\n"
1077               "  atomic_fetch_add"
1078             + postfix + "(&destMemory[0], ("
1079             + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope
1080             + ");\n"
1081               "  atomic_fetch_add"
1082             + postfix + "(&destMemory[0], (("
1083             + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof("
1084             + DataType().AddSubOperandTypeName() + ")-1)*8" + memoryOrderScope
1085             + ");\n";
1086     }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1087     virtual void HostFunction(cl_uint tid, cl_uint threadCount,
1088                               volatile HostAtomicType *destMemory,
1089                               HostDataType *oldValues)
1090     {
1091         oldValues[tid] = host_atomic_fetch_add(
1092             &destMemory[0], (HostDataType)tid + 3, MemoryOrder());
1093         host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3,
1094                               MemoryOrder());
1095         host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3,
1096                               MemoryOrder());
1097         host_atomic_fetch_add(&destMemory[0],
1098                               ((HostDataType)tid + 3)
1099                                   << (sizeof(HostDataType) - 1) * 8,
1100                               MemoryOrder());
1101     }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1102     virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
1103                                HostDataType *startRefValues,
1104                                cl_uint whichDestValue)
1105     {
1106         expected = StartValue();
1107         for (cl_uint i = 0; i < threadCount; i++)
1108             expected += ((HostDataType)i + 3) * 3
1109                 + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8);
1110         return true;
1111     }
1112 };
1113 
test_atomic_fetch_add_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1114 int test_atomic_fetch_add_generic(cl_device_id deviceID, cl_context context,
1115                                   cl_command_queue queue, int num_elements,
1116                                   bool useSVM)
1117 {
1118     int error = 0;
1119     CBasicTestFetchAdd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
1120                                                            useSVM);
1121     EXECUTE_TEST(error,
1122                  test_int.Execute(deviceID, context, queue, num_elements));
1123     CBasicTestFetchAdd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
1124                                                               useSVM);
1125     EXECUTE_TEST(error,
1126                  test_uint.Execute(deviceID, context, queue, num_elements));
1127     CBasicTestFetchAdd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
1128                                                               useSVM);
1129     EXECUTE_TEST(error,
1130                  test_long.Execute(deviceID, context, queue, num_elements));
1131     CBasicTestFetchAdd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
1132         TYPE_ATOMIC_ULONG, useSVM);
1133     EXECUTE_TEST(error,
1134                  test_ulong.Execute(deviceID, context, queue, num_elements));
1135     if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
1136     {
1137         CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
1138             test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1139         EXECUTE_TEST(
1140             error,
1141             test_intptr_t.Execute(deviceID, context, queue, num_elements));
1142         CBasicTestFetchAdd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
1143             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1144         EXECUTE_TEST(
1145             error,
1146             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1147         CBasicTestFetchAdd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
1148             TYPE_ATOMIC_SIZE_T, useSVM);
1149         EXECUTE_TEST(
1150             error, test_size_t.Execute(deviceID, context, queue, num_elements));
1151         CBasicTestFetchAdd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
1152             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1153         EXECUTE_TEST(
1154             error,
1155             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1156     }
1157     else
1158     {
1159         CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
1160             test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1161         EXECUTE_TEST(
1162             error,
1163             test_intptr_t.Execute(deviceID, context, queue, num_elements));
1164         CBasicTestFetchAdd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
1165             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1166         EXECUTE_TEST(
1167             error,
1168             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1169         CBasicTestFetchAdd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
1170             TYPE_ATOMIC_SIZE_T, useSVM);
1171         EXECUTE_TEST(
1172             error, test_size_t.Execute(deviceID, context, queue, num_elements));
1173         CBasicTestFetchAdd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
1174             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1175         EXECUTE_TEST(
1176             error,
1177             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1178     }
1179     return error;
1180 }
1181 
test_atomic_fetch_add(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1182 int test_atomic_fetch_add(cl_device_id deviceID, cl_context context,
1183                           cl_command_queue queue, int num_elements)
1184 {
1185     return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements,
1186                                          false);
1187 }
1188 
test_svm_atomic_fetch_add(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1189 int test_svm_atomic_fetch_add(cl_device_id deviceID, cl_context context,
1190                               cl_command_queue queue, int num_elements)
1191 {
1192     return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements,
1193                                          true);
1194 }
1195 
1196 template <typename HostAtomicType, typename HostDataType>
1197 class CBasicTestFetchSub
1198     : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
1199 public:
1200     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1201     using CBasicTestMemOrderScope<HostAtomicType,
1202                                   HostDataType>::MemoryOrderScopeStr;
1203     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1204     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
CBasicTestFetchSub(TExplicitAtomicType dataType,bool useSVM)1205     CBasicTestFetchSub(TExplicitAtomicType dataType, bool useSVM)
1206         : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
1207                                                                 useSVM)
1208     {}
ProgramCore()1209     virtual std::string ProgramCore()
1210     {
1211         std::string memoryOrderScope = MemoryOrderScopeStr();
1212         std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1213         return "  oldValues[tid] = atomic_fetch_sub" + postfix
1214             + "(&destMemory[0], tid + 3 +((("
1215             + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof("
1216             + DataType().AddSubOperandTypeName() + ")-1)*8)" + memoryOrderScope
1217             + ");\n";
1218     }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1219     virtual void HostFunction(cl_uint tid, cl_uint threadCount,
1220                               volatile HostAtomicType *destMemory,
1221                               HostDataType *oldValues)
1222     {
1223         oldValues[tid] = host_atomic_fetch_sub(
1224             &destMemory[0],
1225             (HostDataType)tid + 3
1226                 + (((HostDataType)tid + 3) << (sizeof(HostDataType) - 1) * 8),
1227             MemoryOrder());
1228     }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1229     virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
1230                                HostDataType *startRefValues,
1231                                cl_uint whichDestValue)
1232     {
1233         expected = StartValue();
1234         for (cl_uint i = 0; i < threadCount; i++)
1235             expected -= (HostDataType)i + 3
1236                 + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8);
1237         return true;
1238     }
1239 };
1240 
test_atomic_fetch_sub_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1241 int test_atomic_fetch_sub_generic(cl_device_id deviceID, cl_context context,
1242                                   cl_command_queue queue, int num_elements,
1243                                   bool useSVM)
1244 {
1245     int error = 0;
1246     CBasicTestFetchSub<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
1247                                                            useSVM);
1248     EXECUTE_TEST(error,
1249                  test_int.Execute(deviceID, context, queue, num_elements));
1250     CBasicTestFetchSub<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
1251                                                               useSVM);
1252     EXECUTE_TEST(error,
1253                  test_uint.Execute(deviceID, context, queue, num_elements));
1254     CBasicTestFetchSub<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
1255                                                               useSVM);
1256     EXECUTE_TEST(error,
1257                  test_long.Execute(deviceID, context, queue, num_elements));
1258     CBasicTestFetchSub<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
1259         TYPE_ATOMIC_ULONG, useSVM);
1260     EXECUTE_TEST(error,
1261                  test_ulong.Execute(deviceID, context, queue, num_elements));
1262     if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
1263     {
1264         CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
1265             test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1266         EXECUTE_TEST(
1267             error,
1268             test_intptr_t.Execute(deviceID, context, queue, num_elements));
1269         CBasicTestFetchSub<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
1270             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1271         EXECUTE_TEST(
1272             error,
1273             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1274         CBasicTestFetchSub<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
1275             TYPE_ATOMIC_SIZE_T, useSVM);
1276         EXECUTE_TEST(
1277             error, test_size_t.Execute(deviceID, context, queue, num_elements));
1278         CBasicTestFetchSub<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
1279             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1280         EXECUTE_TEST(
1281             error,
1282             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1283     }
1284     else
1285     {
1286         CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
1287             test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1288         EXECUTE_TEST(
1289             error,
1290             test_intptr_t.Execute(deviceID, context, queue, num_elements));
1291         CBasicTestFetchSub<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
1292             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1293         EXECUTE_TEST(
1294             error,
1295             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1296         CBasicTestFetchSub<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
1297             TYPE_ATOMIC_SIZE_T, useSVM);
1298         EXECUTE_TEST(
1299             error, test_size_t.Execute(deviceID, context, queue, num_elements));
1300         CBasicTestFetchSub<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
1301             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1302         EXECUTE_TEST(
1303             error,
1304             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1305     }
1306     return error;
1307 }
1308 
test_atomic_fetch_sub(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1309 int test_atomic_fetch_sub(cl_device_id deviceID, cl_context context,
1310                           cl_command_queue queue, int num_elements)
1311 {
1312     return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements,
1313                                          false);
1314 }
1315 
test_svm_atomic_fetch_sub(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1316 int test_svm_atomic_fetch_sub(cl_device_id deviceID, cl_context context,
1317                               cl_command_queue queue, int num_elements)
1318 {
1319     return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements,
1320                                          true);
1321 }
1322 
1323 template <typename HostAtomicType, typename HostDataType>
1324 class CBasicTestFetchOr
1325     : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
1326 public:
1327     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1328     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
1329     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1330     using CBasicTestMemOrderScope<HostAtomicType,
1331                                   HostDataType>::MemoryOrderScopeStr;
CBasicTestFetchOr(TExplicitAtomicType dataType,bool useSVM)1332     CBasicTestFetchOr(TExplicitAtomicType dataType, bool useSVM)
1333         : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
1334                                                                 useSVM)
1335     {
1336         StartValue(0);
1337     }
NumResults(cl_uint threadCount,cl_device_id deviceID)1338     virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
1339     {
1340         cl_uint numBits = DataType().Size(deviceID) * 8;
1341 
1342         return (threadCount + numBits - 1) / numBits;
1343     }
ProgramCore()1344     virtual std::string ProgramCore()
1345     {
1346         std::string memoryOrderScope = MemoryOrderScopeStr();
1347         std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1348         return std::string("    size_t numBits = sizeof(")
1349             + DataType().RegularTypeName()
1350             + ") * 8;\n"
1351               "    int whichResult = tid / numBits;\n"
1352               "    int bitIndex = tid - (whichResult * numBits);\n"
1353               "\n"
1354               "    oldValues[tid] = atomic_fetch_or"
1355             + postfix + "(&destMemory[whichResult], (("
1356             + DataType().RegularTypeName() + ")1 << bitIndex) "
1357             + memoryOrderScope + ");\n";
1358     }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1359     virtual void HostFunction(cl_uint tid, cl_uint threadCount,
1360                               volatile HostAtomicType *destMemory,
1361                               HostDataType *oldValues)
1362     {
1363         size_t numBits = sizeof(HostDataType) * 8;
1364         size_t whichResult = tid / numBits;
1365         size_t bitIndex = tid - (whichResult * numBits);
1366 
1367         oldValues[tid] =
1368             host_atomic_fetch_or(&destMemory[whichResult],
1369                                  ((HostDataType)1 << bitIndex), MemoryOrder());
1370     }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1371     virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
1372                                HostDataType *startRefValues,
1373                                cl_uint whichDestValue)
1374     {
1375         cl_uint numValues = (threadCount + (sizeof(HostDataType) * 8 - 1))
1376             / (sizeof(HostDataType) * 8);
1377         if (whichDestValue < numValues - 1)
1378         {
1379             expected = ~(HostDataType)0;
1380             return true;
1381         }
1382         // Last item doesn't get or'ed on every bit, so we have to mask away
1383         cl_uint numBits =
1384             threadCount - whichDestValue * (sizeof(HostDataType) * 8);
1385         expected = StartValue();
1386         for (cl_uint i = 0; i < numBits; i++)
1387             expected |= ((HostDataType)1 << i);
1388         return true;
1389     }
1390 };
1391 
test_atomic_fetch_or_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1392 int test_atomic_fetch_or_generic(cl_device_id deviceID, cl_context context,
1393                                  cl_command_queue queue, int num_elements,
1394                                  bool useSVM)
1395 {
1396     int error = 0;
1397     CBasicTestFetchOr<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
1398                                                           useSVM);
1399     EXECUTE_TEST(error,
1400                  test_int.Execute(deviceID, context, queue, num_elements));
1401     CBasicTestFetchOr<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
1402                                                              useSVM);
1403     EXECUTE_TEST(error,
1404                  test_uint.Execute(deviceID, context, queue, num_elements));
1405     CBasicTestFetchOr<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
1406                                                              useSVM);
1407     EXECUTE_TEST(error,
1408                  test_long.Execute(deviceID, context, queue, num_elements));
1409     CBasicTestFetchOr<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
1410         TYPE_ATOMIC_ULONG, useSVM);
1411     EXECUTE_TEST(error,
1412                  test_ulong.Execute(deviceID, context, queue, num_elements));
1413     if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
1414     {
1415         CBasicTestFetchOr<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
1416             test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1417         EXECUTE_TEST(
1418             error,
1419             test_intptr_t.Execute(deviceID, context, queue, num_elements));
1420         CBasicTestFetchOr<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
1421             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1422         EXECUTE_TEST(
1423             error,
1424             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1425         CBasicTestFetchOr<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
1426             TYPE_ATOMIC_SIZE_T, useSVM);
1427         EXECUTE_TEST(
1428             error, test_size_t.Execute(deviceID, context, queue, num_elements));
1429         CBasicTestFetchOr<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
1430             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1431         EXECUTE_TEST(
1432             error,
1433             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1434     }
1435     else
1436     {
1437         CBasicTestFetchOr<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
1438             test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1439         EXECUTE_TEST(
1440             error,
1441             test_intptr_t.Execute(deviceID, context, queue, num_elements));
1442         CBasicTestFetchOr<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
1443             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1444         EXECUTE_TEST(
1445             error,
1446             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1447         CBasicTestFetchOr<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
1448             TYPE_ATOMIC_SIZE_T, useSVM);
1449         EXECUTE_TEST(
1450             error, test_size_t.Execute(deviceID, context, queue, num_elements));
1451         CBasicTestFetchOr<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
1452             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1453         EXECUTE_TEST(
1454             error,
1455             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1456     }
1457     return error;
1458 }
1459 
test_atomic_fetch_or(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1460 int test_atomic_fetch_or(cl_device_id deviceID, cl_context context,
1461                          cl_command_queue queue, int num_elements)
1462 {
1463     return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements,
1464                                         false);
1465 }
1466 
test_svm_atomic_fetch_or(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1467 int test_svm_atomic_fetch_or(cl_device_id deviceID, cl_context context,
1468                              cl_command_queue queue, int num_elements)
1469 {
1470     return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements,
1471                                         true);
1472 }
1473 
1474 template <typename HostAtomicType, typename HostDataType>
1475 class CBasicTestFetchXor
1476     : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
1477 public:
1478     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1479     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1480     using CBasicTestMemOrderScope<HostAtomicType,
1481                                   HostDataType>::MemoryOrderScopeStr;
1482     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
CBasicTestFetchXor(TExplicitAtomicType dataType,bool useSVM)1483     CBasicTestFetchXor(TExplicitAtomicType dataType, bool useSVM)
1484         : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
1485                                                                 useSVM)
1486     {
1487         StartValue((HostDataType)0x2f08ab418ba0541LL);
1488     }
ProgramCore()1489     virtual std::string ProgramCore()
1490     {
1491         std::string memoryOrderScope = MemoryOrderScopeStr();
1492         std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1493         return std::string("  int numBits = sizeof(")
1494             + DataType().RegularTypeName()
1495             + ") * 8;\n"
1496               "  int bitIndex = (numBits-1)*(tid+1)/threadCount;\n"
1497               "\n"
1498               "  oldValues[tid] = atomic_fetch_xor"
1499             + postfix + "(&destMemory[0], ((" + DataType().RegularTypeName()
1500             + ")1 << bitIndex) " + memoryOrderScope + ");\n";
1501     }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1502     virtual void HostFunction(cl_uint tid, cl_uint threadCount,
1503                               volatile HostAtomicType *destMemory,
1504                               HostDataType *oldValues)
1505     {
1506         int numBits = sizeof(HostDataType) * 8;
1507         int bitIndex = (numBits - 1) * (tid + 1) / threadCount;
1508 
1509         oldValues[tid] = host_atomic_fetch_xor(
1510             &destMemory[0], ((HostDataType)1 << bitIndex), MemoryOrder());
1511     }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1512     virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
1513                                HostDataType *startRefValues,
1514                                cl_uint whichDestValue)
1515     {
1516         int numBits = sizeof(HostDataType) * 8;
1517         expected = StartValue();
1518         for (cl_uint i = 0; i < threadCount; i++)
1519         {
1520             int bitIndex = (numBits - 1) * (i + 1) / threadCount;
1521             expected ^= ((HostDataType)1 << bitIndex);
1522         }
1523         return true;
1524     }
1525 };
1526 
test_atomic_fetch_xor_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1527 int test_atomic_fetch_xor_generic(cl_device_id deviceID, cl_context context,
1528                                   cl_command_queue queue, int num_elements,
1529                                   bool useSVM)
1530 {
1531     int error = 0;
1532     CBasicTestFetchXor<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
1533                                                            useSVM);
1534     EXECUTE_TEST(error,
1535                  test_int.Execute(deviceID, context, queue, num_elements));
1536     CBasicTestFetchXor<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
1537                                                               useSVM);
1538     EXECUTE_TEST(error,
1539                  test_uint.Execute(deviceID, context, queue, num_elements));
1540     CBasicTestFetchXor<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
1541                                                               useSVM);
1542     EXECUTE_TEST(error,
1543                  test_long.Execute(deviceID, context, queue, num_elements));
1544     CBasicTestFetchXor<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
1545         TYPE_ATOMIC_ULONG, useSVM);
1546     EXECUTE_TEST(error,
1547                  test_ulong.Execute(deviceID, context, queue, num_elements));
1548     if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
1549     {
1550         CBasicTestFetchXor<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
1551             test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1552         EXECUTE_TEST(
1553             error,
1554             test_intptr_t.Execute(deviceID, context, queue, num_elements));
1555         CBasicTestFetchXor<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
1556             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1557         EXECUTE_TEST(
1558             error,
1559             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1560         CBasicTestFetchXor<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
1561             TYPE_ATOMIC_SIZE_T, useSVM);
1562         EXECUTE_TEST(
1563             error, test_size_t.Execute(deviceID, context, queue, num_elements));
1564         CBasicTestFetchXor<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
1565             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1566         EXECUTE_TEST(
1567             error,
1568             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1569     }
1570     else
1571     {
1572         CBasicTestFetchXor<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
1573             test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1574         EXECUTE_TEST(
1575             error,
1576             test_intptr_t.Execute(deviceID, context, queue, num_elements));
1577         CBasicTestFetchXor<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
1578             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1579         EXECUTE_TEST(
1580             error,
1581             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1582         CBasicTestFetchXor<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
1583             TYPE_ATOMIC_SIZE_T, useSVM);
1584         EXECUTE_TEST(
1585             error, test_size_t.Execute(deviceID, context, queue, num_elements));
1586         CBasicTestFetchXor<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
1587             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1588         EXECUTE_TEST(
1589             error,
1590             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1591     }
1592     return error;
1593 }
1594 
test_atomic_fetch_xor(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1595 int test_atomic_fetch_xor(cl_device_id deviceID, cl_context context,
1596                           cl_command_queue queue, int num_elements)
1597 {
1598     return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements,
1599                                          false);
1600 }
1601 
test_svm_atomic_fetch_xor(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1602 int test_svm_atomic_fetch_xor(cl_device_id deviceID, cl_context context,
1603                               cl_command_queue queue, int num_elements)
1604 {
1605     return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements,
1606                                          true);
1607 }
1608 
1609 template <typename HostAtomicType, typename HostDataType>
1610 class CBasicTestFetchAnd
1611     : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
1612 public:
1613     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1614     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
1615     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1616     using CBasicTestMemOrderScope<HostAtomicType,
1617                                   HostDataType>::MemoryOrderScopeStr;
CBasicTestFetchAnd(TExplicitAtomicType dataType,bool useSVM)1618     CBasicTestFetchAnd(TExplicitAtomicType dataType, bool useSVM)
1619         : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
1620                                                                 useSVM)
1621     {
1622         StartValue(~(HostDataType)0);
1623     }
NumResults(cl_uint threadCount,cl_device_id deviceID)1624     virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
1625     {
1626         cl_uint numBits = DataType().Size(deviceID) * 8;
1627 
1628         return (threadCount + numBits - 1) / numBits;
1629     }
ProgramCore()1630     virtual std::string ProgramCore()
1631     {
1632         std::string memoryOrderScope = MemoryOrderScopeStr();
1633         std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1634         return std::string("  size_t numBits = sizeof(")
1635             + DataType().RegularTypeName()
1636             + ") * 8;\n"
1637               "  int whichResult = tid / numBits;\n"
1638               "  int bitIndex = tid - (whichResult * numBits);\n"
1639               "\n"
1640               "  oldValues[tid] = atomic_fetch_and"
1641             + postfix + "(&destMemory[whichResult], ~(("
1642             + DataType().RegularTypeName() + ")1 << bitIndex) "
1643             + memoryOrderScope + ");\n";
1644     }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1645     virtual void HostFunction(cl_uint tid, cl_uint threadCount,
1646                               volatile HostAtomicType *destMemory,
1647                               HostDataType *oldValues)
1648     {
1649         size_t numBits = sizeof(HostDataType) * 8;
1650         size_t whichResult = tid / numBits;
1651         size_t bitIndex = tid - (whichResult * numBits);
1652 
1653         oldValues[tid] = host_atomic_fetch_and(&destMemory[whichResult],
1654                                                ~((HostDataType)1 << bitIndex),
1655                                                MemoryOrder());
1656     }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1657     virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
1658                                HostDataType *startRefValues,
1659                                cl_uint whichDestValue)
1660     {
1661         cl_uint numValues = (threadCount + (sizeof(HostDataType) * 8 - 1))
1662             / (sizeof(HostDataType) * 8);
1663         if (whichDestValue < numValues - 1)
1664         {
1665             expected = 0;
1666             return true;
1667         }
1668         // Last item doesn't get and'ed on every bit, so we have to mask away
1669         size_t numBits =
1670             threadCount - whichDestValue * (sizeof(HostDataType) * 8);
1671         expected = StartValue();
1672         for (size_t i = 0; i < numBits; i++)
1673             expected &= ~((HostDataType)1 << i);
1674         return true;
1675     }
1676 };
1677 
test_atomic_fetch_and_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1678 int test_atomic_fetch_and_generic(cl_device_id deviceID, cl_context context,
1679                                   cl_command_queue queue, int num_elements,
1680                                   bool useSVM)
1681 {
1682     int error = 0;
1683     CBasicTestFetchAnd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
1684                                                            useSVM);
1685     EXECUTE_TEST(error,
1686                  test_int.Execute(deviceID, context, queue, num_elements));
1687     CBasicTestFetchAnd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
1688                                                               useSVM);
1689     EXECUTE_TEST(error,
1690                  test_uint.Execute(deviceID, context, queue, num_elements));
1691     CBasicTestFetchAnd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
1692                                                               useSVM);
1693     EXECUTE_TEST(error,
1694                  test_long.Execute(deviceID, context, queue, num_elements));
1695     CBasicTestFetchAnd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
1696         TYPE_ATOMIC_ULONG, useSVM);
1697     EXECUTE_TEST(error,
1698                  test_ulong.Execute(deviceID, context, queue, num_elements));
1699     if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
1700     {
1701         CBasicTestFetchAnd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
1702             test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1703         EXECUTE_TEST(
1704             error,
1705             test_intptr_t.Execute(deviceID, context, queue, num_elements));
1706         CBasicTestFetchAnd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
1707             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1708         EXECUTE_TEST(
1709             error,
1710             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1711         CBasicTestFetchAnd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
1712             TYPE_ATOMIC_SIZE_T, useSVM);
1713         EXECUTE_TEST(
1714             error, test_size_t.Execute(deviceID, context, queue, num_elements));
1715         CBasicTestFetchAnd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
1716             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1717         EXECUTE_TEST(
1718             error,
1719             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1720     }
1721     else
1722     {
1723         CBasicTestFetchAnd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
1724             test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1725         EXECUTE_TEST(
1726             error,
1727             test_intptr_t.Execute(deviceID, context, queue, num_elements));
1728         CBasicTestFetchAnd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
1729             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1730         EXECUTE_TEST(
1731             error,
1732             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1733         CBasicTestFetchAnd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
1734             TYPE_ATOMIC_SIZE_T, useSVM);
1735         EXECUTE_TEST(
1736             error, test_size_t.Execute(deviceID, context, queue, num_elements));
1737         CBasicTestFetchAnd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
1738             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1739         EXECUTE_TEST(
1740             error,
1741             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1742     }
1743     return error;
1744 }
1745 
test_atomic_fetch_and(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1746 int test_atomic_fetch_and(cl_device_id deviceID, cl_context context,
1747                           cl_command_queue queue, int num_elements)
1748 {
1749     return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements,
1750                                          false);
1751 }
1752 
test_svm_atomic_fetch_and(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1753 int test_svm_atomic_fetch_and(cl_device_id deviceID, cl_context context,
1754                               cl_command_queue queue, int num_elements)
1755 {
1756     return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements,
1757                                          true);
1758 }
1759 
1760 template <typename HostAtomicType, typename HostDataType>
1761 class CBasicTestFetchOrAnd
1762     : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
1763 public:
1764     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1765     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
1766     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1767     using CBasicTestMemOrderScope<HostAtomicType,
1768                                   HostDataType>::MemoryOrderScopeStr;
1769     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
1770     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
CBasicTestFetchOrAnd(TExplicitAtomicType dataType,bool useSVM)1771     CBasicTestFetchOrAnd(TExplicitAtomicType dataType, bool useSVM)
1772         : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
1773                                                                 useSVM)
1774     {
1775         StartValue(0);
1776     }
NumResults(cl_uint threadCount,cl_device_id deviceID)1777     virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
1778     {
1779         return 1 + (threadCount - 1) / (DataType().Size(deviceID) * 8);
1780     }
1781     // each thread modifies (with OR and AND operations) and verifies
1782     // only one bit in atomic variable
1783     // other bits are modified by other threads but it must not affect current
1784     // thread operation
ProgramCore()1785     virtual std::string ProgramCore()
1786     {
1787         std::string memoryOrderScope = MemoryOrderScopeStr();
1788         std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1789         return std::string("  int bits = sizeof(")
1790             + DataType().RegularTypeName() + ")*8;\n"
1791             + "  size_t valueInd = tid/bits;\n"
1792               "  "
1793             + DataType().RegularTypeName() + " value, bitMask = ("
1794             + DataType().RegularTypeName()
1795             + ")1 << tid%bits;\n"
1796               "  oldValues[tid] = 0;\n"
1797               "  for(int i = 0; i < "
1798             + IterationsStr()
1799             + "; i++)\n"
1800               "  {\n"
1801               "    value = atomic_fetch_or"
1802             + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope
1803             + ");\n"
1804               "    if(value & bitMask) // bit should be set to 0\n"
1805               "      oldValues[tid]++;\n"
1806               "    value = atomic_fetch_and"
1807             + postfix + "(destMemory+valueInd, ~bitMask" + memoryOrderScope
1808             + ");\n"
1809               "    if(!(value & bitMask)) // bit should be set to 1\n"
1810               "      oldValues[tid]++;\n"
1811               "  }\n";
1812     }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1813     virtual void HostFunction(cl_uint tid, cl_uint threadCount,
1814                               volatile HostAtomicType *destMemory,
1815                               HostDataType *oldValues)
1816     {
1817         int bits = sizeof(HostDataType) * 8;
1818         size_t valueInd = tid / bits;
1819         HostDataType value, bitMask = (HostDataType)1 << tid % bits;
1820         oldValues[tid] = 0;
1821         for (int i = 0; i < Iterations(); i++)
1822         {
1823             value = host_atomic_fetch_or(destMemory + valueInd, bitMask,
1824                                          MemoryOrder());
1825             if (value & bitMask) // bit should be set to 0
1826                 oldValues[tid]++;
1827             value = host_atomic_fetch_and(destMemory + valueInd, ~bitMask,
1828                                           MemoryOrder());
1829             if (!(value & bitMask)) // bit should be set to 1
1830                 oldValues[tid]++;
1831         }
1832     }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1833     virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
1834                                HostDataType *startRefValues,
1835                                cl_uint whichDestValue)
1836     {
1837         expected = 0;
1838         return true;
1839     }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)1840     virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
1841                             HostDataType *refValues,
1842                             HostAtomicType *finalValues)
1843     {
1844         correct = true;
1845         for (cl_uint i = 0; i < threadCount; i++)
1846         {
1847             if (refValues[i] > 0)
1848             {
1849                 log_error("Thread %d found %d mismatch(es)\n", i,
1850                           (cl_uint)refValues[i]);
1851                 correct = false;
1852             }
1853         }
1854         return true;
1855     }
1856 };
1857 
test_atomic_fetch_orand_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1858 int test_atomic_fetch_orand_generic(cl_device_id deviceID, cl_context context,
1859                                     cl_command_queue queue, int num_elements,
1860                                     bool useSVM)
1861 {
1862     int error = 0;
1863     CBasicTestFetchOrAnd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
1864                                                              useSVM);
1865     EXECUTE_TEST(error,
1866                  test_int.Execute(deviceID, context, queue, num_elements));
1867     CBasicTestFetchOrAnd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(
1868         TYPE_ATOMIC_UINT, useSVM);
1869     EXECUTE_TEST(error,
1870                  test_uint.Execute(deviceID, context, queue, num_elements));
1871     CBasicTestFetchOrAnd<HOST_ATOMIC_LONG, HOST_LONG> test_long(
1872         TYPE_ATOMIC_LONG, useSVM);
1873     EXECUTE_TEST(error,
1874                  test_long.Execute(deviceID, context, queue, num_elements));
1875     CBasicTestFetchOrAnd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
1876         TYPE_ATOMIC_ULONG, useSVM);
1877     EXECUTE_TEST(error,
1878                  test_ulong.Execute(deviceID, context, queue, num_elements));
1879     if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
1880     {
1881         CBasicTestFetchOrAnd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
1882             test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1883         EXECUTE_TEST(
1884             error,
1885             test_intptr_t.Execute(deviceID, context, queue, num_elements));
1886         CBasicTestFetchOrAnd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
1887             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1888         EXECUTE_TEST(
1889             error,
1890             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1891         CBasicTestFetchOrAnd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
1892             TYPE_ATOMIC_SIZE_T, useSVM);
1893         EXECUTE_TEST(
1894             error, test_size_t.Execute(deviceID, context, queue, num_elements));
1895         CBasicTestFetchOrAnd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
1896             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1897         EXECUTE_TEST(
1898             error,
1899             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1900     }
1901     else
1902     {
1903         CBasicTestFetchOrAnd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
1904             test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1905         EXECUTE_TEST(
1906             error,
1907             test_intptr_t.Execute(deviceID, context, queue, num_elements));
1908         CBasicTestFetchOrAnd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
1909             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1910         EXECUTE_TEST(
1911             error,
1912             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1913         CBasicTestFetchOrAnd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
1914             TYPE_ATOMIC_SIZE_T, useSVM);
1915         EXECUTE_TEST(
1916             error, test_size_t.Execute(deviceID, context, queue, num_elements));
1917         CBasicTestFetchOrAnd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
1918             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1919         EXECUTE_TEST(
1920             error,
1921             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1922     }
1923     return error;
1924 }
1925 
test_atomic_fetch_orand(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1926 int test_atomic_fetch_orand(cl_device_id deviceID, cl_context context,
1927                             cl_command_queue queue, int num_elements)
1928 {
1929     return test_atomic_fetch_orand_generic(deviceID, context, queue,
1930                                            num_elements, false);
1931 }
1932 
test_svm_atomic_fetch_orand(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1933 int test_svm_atomic_fetch_orand(cl_device_id deviceID, cl_context context,
1934                                 cl_command_queue queue, int num_elements)
1935 {
1936     return test_atomic_fetch_orand_generic(deviceID, context, queue,
1937                                            num_elements, true);
1938 }
1939 
1940 template <typename HostAtomicType, typename HostDataType>
1941 class CBasicTestFetchXor2
1942     : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
1943 public:
1944     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1945     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
1946     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1947     using CBasicTestMemOrderScope<HostAtomicType,
1948                                   HostDataType>::MemoryOrderScopeStr;
1949     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
1950     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
CBasicTestFetchXor2(TExplicitAtomicType dataType,bool useSVM)1951     CBasicTestFetchXor2(TExplicitAtomicType dataType, bool useSVM)
1952         : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
1953                                                                 useSVM)
1954     {
1955         StartValue(0);
1956     }
NumResults(cl_uint threadCount,cl_device_id deviceID)1957     virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
1958     {
1959         return 1 + (threadCount - 1) / (DataType().Size(deviceID) * 8);
1960     }
1961     // each thread modifies (with XOR operation) and verifies
1962     // only one bit in atomic variable
1963     // other bits are modified by other threads but it must not affect current
1964     // thread operation
ProgramCore()1965     virtual std::string ProgramCore()
1966     {
1967         std::string memoryOrderScope = MemoryOrderScopeStr();
1968         std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1969         return std::string("  int bits = sizeof(")
1970             + DataType().RegularTypeName() + ")*8;\n"
1971             + "  size_t valueInd = tid/bits;\n"
1972               "  "
1973             + DataType().RegularTypeName() + " value, bitMask = ("
1974             + DataType().RegularTypeName()
1975             + ")1 << tid%bits;\n"
1976               "  oldValues[tid] = 0;\n"
1977               "  for(int i = 0; i < "
1978             + IterationsStr()
1979             + "; i++)\n"
1980               "  {\n"
1981               "    value = atomic_fetch_xor"
1982             + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope
1983             + ");\n"
1984               "    if(value & bitMask) // bit should be set to 0\n"
1985               "      oldValues[tid]++;\n"
1986               "    value = atomic_fetch_xor"
1987             + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope
1988             + ");\n"
1989               "    if(!(value & bitMask)) // bit should be set to 1\n"
1990               "      oldValues[tid]++;\n"
1991               "  }\n";
1992     }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1993     virtual void HostFunction(cl_uint tid, cl_uint threadCount,
1994                               volatile HostAtomicType *destMemory,
1995                               HostDataType *oldValues)
1996     {
1997         int bits = sizeof(HostDataType) * 8;
1998         size_t valueInd = tid / bits;
1999         HostDataType value, bitMask = (HostDataType)1 << tid % bits;
2000         oldValues[tid] = 0;
2001         for (int i = 0; i < Iterations(); i++)
2002         {
2003             value = host_atomic_fetch_xor(destMemory + valueInd, bitMask,
2004                                           MemoryOrder());
2005             if (value & bitMask) // bit should be set to 0
2006                 oldValues[tid]++;
2007             value = host_atomic_fetch_xor(destMemory + valueInd, bitMask,
2008                                           MemoryOrder());
2009             if (!(value & bitMask)) // bit should be set to 1
2010                 oldValues[tid]++;
2011         }
2012     }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)2013     virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
2014                                HostDataType *startRefValues,
2015                                cl_uint whichDestValue)
2016     {
2017         expected = 0;
2018         return true;
2019     }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)2020     virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
2021                             HostDataType *refValues,
2022                             HostAtomicType *finalValues)
2023     {
2024         correct = true;
2025         for (cl_uint i = 0; i < threadCount; i++)
2026         {
2027             if (refValues[i] > 0)
2028             {
2029                 log_error("Thread %d found %d mismatches\n", i,
2030                           (cl_uint)refValues[i]);
2031                 correct = false;
2032             }
2033         }
2034         return true;
2035     }
2036 };
2037 
test_atomic_fetch_xor2_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)2038 int test_atomic_fetch_xor2_generic(cl_device_id deviceID, cl_context context,
2039                                    cl_command_queue queue, int num_elements,
2040                                    bool useSVM)
2041 {
2042     int error = 0;
2043     CBasicTestFetchXor2<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
2044                                                             useSVM);
2045     EXECUTE_TEST(error,
2046                  test_int.Execute(deviceID, context, queue, num_elements));
2047     CBasicTestFetchXor2<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
2048                                                                useSVM);
2049     EXECUTE_TEST(error,
2050                  test_uint.Execute(deviceID, context, queue, num_elements));
2051     CBasicTestFetchXor2<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
2052                                                                useSVM);
2053     EXECUTE_TEST(error,
2054                  test_long.Execute(deviceID, context, queue, num_elements));
2055     CBasicTestFetchXor2<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
2056         TYPE_ATOMIC_ULONG, useSVM);
2057     EXECUTE_TEST(error,
2058                  test_ulong.Execute(deviceID, context, queue, num_elements));
2059     if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
2060     {
2061         CBasicTestFetchXor2<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
2062             test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
2063         EXECUTE_TEST(
2064             error,
2065             test_intptr_t.Execute(deviceID, context, queue, num_elements));
2066         CBasicTestFetchXor2<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
2067             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
2068         EXECUTE_TEST(
2069             error,
2070             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
2071         CBasicTestFetchXor2<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
2072             TYPE_ATOMIC_SIZE_T, useSVM);
2073         EXECUTE_TEST(
2074             error, test_size_t.Execute(deviceID, context, queue, num_elements));
2075         CBasicTestFetchXor2<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
2076             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
2077         EXECUTE_TEST(
2078             error,
2079             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
2080     }
2081     else
2082     {
2083         CBasicTestFetchXor2<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
2084             test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
2085         EXECUTE_TEST(
2086             error,
2087             test_intptr_t.Execute(deviceID, context, queue, num_elements));
2088         CBasicTestFetchXor2<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
2089             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
2090         EXECUTE_TEST(
2091             error,
2092             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
2093         CBasicTestFetchXor2<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
2094             TYPE_ATOMIC_SIZE_T, useSVM);
2095         EXECUTE_TEST(
2096             error, test_size_t.Execute(deviceID, context, queue, num_elements));
2097         CBasicTestFetchXor2<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
2098             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
2099         EXECUTE_TEST(
2100             error,
2101             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
2102     }
2103     return error;
2104 }
2105 
test_atomic_fetch_xor2(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)2106 int test_atomic_fetch_xor2(cl_device_id deviceID, cl_context context,
2107                            cl_command_queue queue, int num_elements)
2108 {
2109     return test_atomic_fetch_xor2_generic(deviceID, context, queue,
2110                                           num_elements, false);
2111 }
2112 
test_svm_atomic_fetch_xor2(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)2113 int test_svm_atomic_fetch_xor2(cl_device_id deviceID, cl_context context,
2114                                cl_command_queue queue, int num_elements)
2115 {
2116     return test_atomic_fetch_xor2_generic(deviceID, context, queue,
2117                                           num_elements, true);
2118 }
2119 
2120 template <typename HostAtomicType, typename HostDataType>
2121 class CBasicTestFetchMin
2122     : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
2123 public:
2124     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
2125     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
2126     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
2127     using CBasicTestMemOrderScope<HostAtomicType,
2128                                   HostDataType>::MemoryOrderScopeStr;
CBasicTestFetchMin(TExplicitAtomicType dataType,bool useSVM)2129     CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM)
2130         : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
2131                                                                 useSVM)
2132     {
2133         StartValue(DataType().MaxValue());
2134     }
ProgramCore()2135     virtual std::string ProgramCore()
2136     {
2137         std::string memoryOrderScope = MemoryOrderScopeStr();
2138         std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
2139         return "  oldValues[tid] = atomic_fetch_min" + postfix
2140             + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n";
2141     }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)2142     virtual void HostFunction(cl_uint tid, cl_uint threadCount,
2143                               volatile HostAtomicType *destMemory,
2144                               HostDataType *oldValues)
2145     {
2146         oldValues[tid] = host_atomic_fetch_min(&destMemory[0], oldValues[tid],
2147                                                MemoryOrder());
2148     }
GenerateRefs(cl_uint threadCount,HostDataType * startRefValues,MTdata d)2149     virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
2150                               MTdata d)
2151     {
2152         for (cl_uint i = 0; i < threadCount; i++)
2153         {
2154             startRefValues[i] = genrand_int32(d);
2155             if (sizeof(HostDataType) >= 8)
2156                 startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
2157         }
2158         return true;
2159     }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)2160     virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
2161                                HostDataType *startRefValues,
2162                                cl_uint whichDestValue)
2163     {
2164         expected = StartValue();
2165         for (cl_uint i = 0; i < threadCount; i++)
2166         {
2167             if (startRefValues[i] < expected) expected = startRefValues[i];
2168         }
2169         return true;
2170     }
2171 };
2172 
test_atomic_fetch_min_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)2173 int test_atomic_fetch_min_generic(cl_device_id deviceID, cl_context context,
2174                                   cl_command_queue queue, int num_elements,
2175                                   bool useSVM)
2176 {
2177     int error = 0;
2178     CBasicTestFetchMin<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
2179                                                            useSVM);
2180     EXECUTE_TEST(error,
2181                  test_int.Execute(deviceID, context, queue, num_elements));
2182     CBasicTestFetchMin<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
2183                                                               useSVM);
2184     EXECUTE_TEST(error,
2185                  test_uint.Execute(deviceID, context, queue, num_elements));
2186     CBasicTestFetchMin<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
2187                                                               useSVM);
2188     EXECUTE_TEST(error,
2189                  test_long.Execute(deviceID, context, queue, num_elements));
2190     CBasicTestFetchMin<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
2191         TYPE_ATOMIC_ULONG, useSVM);
2192     EXECUTE_TEST(error,
2193                  test_ulong.Execute(deviceID, context, queue, num_elements));
2194     if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
2195     {
2196         CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
2197             test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
2198         EXECUTE_TEST(
2199             error,
2200             test_intptr_t.Execute(deviceID, context, queue, num_elements));
2201         CBasicTestFetchMin<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
2202             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
2203         EXECUTE_TEST(
2204             error,
2205             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
2206         CBasicTestFetchMin<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
2207             TYPE_ATOMIC_SIZE_T, useSVM);
2208         EXECUTE_TEST(
2209             error, test_size_t.Execute(deviceID, context, queue, num_elements));
2210         CBasicTestFetchMin<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
2211             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
2212         EXECUTE_TEST(
2213             error,
2214             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
2215     }
2216     else
2217     {
2218         CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
2219             test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
2220         EXECUTE_TEST(
2221             error,
2222             test_intptr_t.Execute(deviceID, context, queue, num_elements));
2223         CBasicTestFetchMin<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
2224             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
2225         EXECUTE_TEST(
2226             error,
2227             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
2228         CBasicTestFetchMin<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
2229             TYPE_ATOMIC_SIZE_T, useSVM);
2230         EXECUTE_TEST(
2231             error, test_size_t.Execute(deviceID, context, queue, num_elements));
2232         CBasicTestFetchMin<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
2233             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
2234         EXECUTE_TEST(
2235             error,
2236             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
2237     }
2238     return error;
2239 }
2240 
test_atomic_fetch_min(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)2241 int test_atomic_fetch_min(cl_device_id deviceID, cl_context context,
2242                           cl_command_queue queue, int num_elements)
2243 {
2244     return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements,
2245                                          false);
2246 }
2247 
test_svm_atomic_fetch_min(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)2248 int test_svm_atomic_fetch_min(cl_device_id deviceID, cl_context context,
2249                               cl_command_queue queue, int num_elements)
2250 {
2251     return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements,
2252                                          true);
2253 }
2254 
2255 template <typename HostAtomicType, typename HostDataType>
2256 class CBasicTestFetchMax
2257     : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
2258 public:
2259     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
2260     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
2261     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
2262     using CBasicTestMemOrderScope<HostAtomicType,
2263                                   HostDataType>::MemoryOrderScopeStr;
CBasicTestFetchMax(TExplicitAtomicType dataType,bool useSVM)2264     CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM)
2265         : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
2266                                                                 useSVM)
2267     {
2268         StartValue(DataType().MinValue());
2269     }
ProgramCore()2270     virtual std::string ProgramCore()
2271     {
2272         std::string memoryOrderScope = MemoryOrderScopeStr();
2273         std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
2274         return "  oldValues[tid] = atomic_fetch_max" + postfix
2275             + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n";
2276     }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)2277     virtual void HostFunction(cl_uint tid, cl_uint threadCount,
2278                               volatile HostAtomicType *destMemory,
2279                               HostDataType *oldValues)
2280     {
2281         oldValues[tid] = host_atomic_fetch_max(&destMemory[0], oldValues[tid],
2282                                                MemoryOrder());
2283     }
GenerateRefs(cl_uint threadCount,HostDataType * startRefValues,MTdata d)2284     virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
2285                               MTdata d)
2286     {
2287         for (cl_uint i = 0; i < threadCount; i++)
2288         {
2289             startRefValues[i] = genrand_int32(d);
2290             if (sizeof(HostDataType) >= 8)
2291                 startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
2292         }
2293         return true;
2294     }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)2295     virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
2296                                HostDataType *startRefValues,
2297                                cl_uint whichDestValue)
2298     {
2299         expected = StartValue();
2300         for (cl_uint i = 0; i < threadCount; i++)
2301         {
2302             if (startRefValues[i] > expected) expected = startRefValues[i];
2303         }
2304         return true;
2305     }
2306 };
2307 
test_atomic_fetch_max_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)2308 int test_atomic_fetch_max_generic(cl_device_id deviceID, cl_context context,
2309                                   cl_command_queue queue, int num_elements,
2310                                   bool useSVM)
2311 {
2312     int error = 0;
2313     CBasicTestFetchMax<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
2314                                                            useSVM);
2315     EXECUTE_TEST(error,
2316                  test_int.Execute(deviceID, context, queue, num_elements));
2317     CBasicTestFetchMax<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
2318                                                               useSVM);
2319     EXECUTE_TEST(error,
2320                  test_uint.Execute(deviceID, context, queue, num_elements));
2321     CBasicTestFetchMax<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
2322                                                               useSVM);
2323     EXECUTE_TEST(error,
2324                  test_long.Execute(deviceID, context, queue, num_elements));
2325     CBasicTestFetchMax<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
2326         TYPE_ATOMIC_ULONG, useSVM);
2327     EXECUTE_TEST(error,
2328                  test_ulong.Execute(deviceID, context, queue, num_elements));
2329     if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
2330     {
2331         CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
2332             test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
2333         EXECUTE_TEST(
2334             error,
2335             test_intptr_t.Execute(deviceID, context, queue, num_elements));
2336         CBasicTestFetchMax<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
2337             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
2338         EXECUTE_TEST(
2339             error,
2340             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
2341         CBasicTestFetchMax<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
2342             TYPE_ATOMIC_SIZE_T, useSVM);
2343         EXECUTE_TEST(
2344             error, test_size_t.Execute(deviceID, context, queue, num_elements));
2345         CBasicTestFetchMax<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
2346             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
2347         EXECUTE_TEST(
2348             error,
2349             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
2350     }
2351     else
2352     {
2353         CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
2354             test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
2355         EXECUTE_TEST(
2356             error,
2357             test_intptr_t.Execute(deviceID, context, queue, num_elements));
2358         CBasicTestFetchMax<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
2359             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
2360         EXECUTE_TEST(
2361             error,
2362             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
2363         CBasicTestFetchMax<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
2364             TYPE_ATOMIC_SIZE_T, useSVM);
2365         EXECUTE_TEST(
2366             error, test_size_t.Execute(deviceID, context, queue, num_elements));
2367         CBasicTestFetchMax<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
2368             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
2369         EXECUTE_TEST(
2370             error,
2371             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
2372     }
2373     return error;
2374 }
2375 
test_atomic_fetch_max(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)2376 int test_atomic_fetch_max(cl_device_id deviceID, cl_context context,
2377                           cl_command_queue queue, int num_elements)
2378 {
2379     return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements,
2380                                          false);
2381 }
2382 
test_svm_atomic_fetch_max(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)2383 int test_svm_atomic_fetch_max(cl_device_id deviceID, cl_context context,
2384                               cl_command_queue queue, int num_elements)
2385 {
2386     return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements,
2387                                          true);
2388 }
2389 
2390 template <typename HostAtomicType, typename HostDataType>
2391 class CBasicTestFlag
2392     : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
2393     static const HostDataType CRITICAL_SECTION_NOT_VISITED = 1000000000;
2394 
2395 public:
2396     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
2397     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
2398     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
2399     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
2400     using CBasicTestMemOrderScope<HostAtomicType,
2401                                   HostDataType>::MemoryOrderScopeStr;
2402     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
2403     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
CBasicTestFlag(TExplicitAtomicType dataType,bool useSVM)2404     CBasicTestFlag(TExplicitAtomicType dataType, bool useSVM)
2405         : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
2406                                                                 useSVM)
2407     {
2408         StartValue(0);
2409         OldValueCheck(false);
2410     }
NumResults(cl_uint threadCount,cl_device_id deviceID)2411     virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
2412     {
2413         return threadCount;
2414     }
MemoryOrderForClear()2415     TExplicitMemoryOrderType MemoryOrderForClear()
2416     {
2417         // Memory ordering for atomic_flag_clear function
2418         // ("shall not be memory_order_acquire nor memory_order_acq_rel")
2419         if (MemoryOrder() == MEMORY_ORDER_ACQUIRE) return MEMORY_ORDER_RELAXED;
2420         if (MemoryOrder() == MEMORY_ORDER_ACQ_REL) return MEMORY_ORDER_RELEASE;
2421         return MemoryOrder();
2422     }
MemoryOrderScopeStrForClear()2423     std::string MemoryOrderScopeStrForClear()
2424     {
2425         std::string orderStr;
2426         if (MemoryOrder() != MEMORY_ORDER_EMPTY)
2427             orderStr = std::string(", ")
2428                 + get_memory_order_type_name(MemoryOrderForClear());
2429         return orderStr + MemoryScopeStr();
2430     }
2431 
ExecuteSingleTest(cl_device_id deviceID,cl_context context,cl_command_queue queue)2432     virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
2433                                   cl_command_queue queue)
2434     {
2435         // This test assumes support for the memory_scope_device scope in the
2436         // case that LocalMemory() == false. Therefore we should skip this test
2437         // in that configuration on a 3.0 driver since supporting the
2438         // memory_scope_device scope is optionaly.
2439         if (get_device_cl_version(deviceID) >= Version{ 3, 0 })
2440         {
2441             if (!LocalMemory()
2442                 && !(gAtomicFenceCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE))
2443             {
2444                 log_info("Skipping atomic_flag test due to use of "
2445                          "atomic_scope_device "
2446                          "which is optionally not supported on this device\n");
2447                 return 0; // skip test - not applicable
2448             }
2449         }
2450         return CBasicTestMemOrderScope<
2451             HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
2452                                                              queue);
2453     }
ProgramCore()2454     virtual std::string ProgramCore()
2455     {
2456         std::string memoryOrderScope = MemoryOrderScopeStr();
2457         std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
2458         std::string program =
2459             "  uint cnt, stop = 0;\n"
2460             "  for(cnt = 0; !stop && cnt < threadCount; cnt++) // each thread "
2461             "must find critical section where it is the first visitor\n"
2462             "  {\n"
2463             "    bool set = atomic_flag_test_and_set"
2464             + postfix + "(&destMemory[cnt]" + memoryOrderScope + ");\n";
2465         if (MemoryOrder() == MEMORY_ORDER_RELAXED
2466             || MemoryOrder() == MEMORY_ORDER_RELEASE || LocalMemory())
2467             program += "    atomic_work_item_fence("
2468                 + std::string(
2469                            LocalMemory()
2470                                ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, "
2471                                : "CLK_GLOBAL_MEM_FENCE, ")
2472                 + "memory_order_acquire,"
2473                 + std::string(LocalMemory()
2474                                   ? "memory_scope_work_group"
2475                                   : (UseSVM() ? "memory_scope_all_svm_devices"
2476                                               : "memory_scope_device"))
2477                 + ");\n";
2478 
2479         program += "    if (!set)\n"
2480                    "    {\n";
2481 
2482         if (LocalMemory())
2483             program += "      uint csIndex = "
2484                        "get_enqueued_local_size(0)*get_group_id(0)+cnt;\n";
2485         else
2486             program += "      uint csIndex = cnt;\n";
2487 
2488         std::ostringstream csNotVisited;
2489         csNotVisited << CRITICAL_SECTION_NOT_VISITED;
2490         program += "      // verify that thread is the first visitor\n"
2491                    "      if(oldValues[csIndex] == "
2492             + csNotVisited.str()
2493             + ")\n"
2494               "      {\n"
2495               "        oldValues[csIndex] = tid; // set the winner id for this "
2496               "critical section\n"
2497               "        stop = 1;\n"
2498               "      }\n";
2499 
2500         if (MemoryOrder() == MEMORY_ORDER_ACQUIRE
2501             || MemoryOrder() == MEMORY_ORDER_RELAXED || LocalMemory())
2502             program += "      atomic_work_item_fence("
2503                 + std::string(
2504                            LocalMemory()
2505                                ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, "
2506                                : "CLK_GLOBAL_MEM_FENCE, ")
2507                 + "memory_order_release,"
2508                 + std::string(LocalMemory()
2509                                   ? "memory_scope_work_group"
2510                                   : (UseSVM() ? "memory_scope_all_svm_devices"
2511                                               : "memory_scope_device"))
2512                 + ");\n";
2513 
2514         program += "      atomic_flag_clear" + postfix + "(&destMemory[cnt]"
2515             + MemoryOrderScopeStrForClear()
2516             + ");\n"
2517               "    }\n"
2518               "  }\n";
2519         return program;
2520     }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)2521     virtual void HostFunction(cl_uint tid, cl_uint threadCount,
2522                               volatile HostAtomicType *destMemory,
2523                               HostDataType *oldValues)
2524     {
2525         cl_uint cnt, stop = 0;
2526         for (cnt = 0; !stop && cnt < threadCount;
2527              cnt++) // each thread must find critical section where it is the
2528                     // first visitor\n"
2529         {
2530             if (!host_atomic_flag_test_and_set(&destMemory[cnt], MemoryOrder()))
2531             {
2532                 cl_uint csIndex = cnt;
2533                 // verify that thread is the first visitor\n"
2534                 if (oldValues[csIndex] == CRITICAL_SECTION_NOT_VISITED)
2535                 {
2536                     oldValues[csIndex] =
2537                         tid; // set the winner id for this critical section\n"
2538                     stop = 1;
2539                 }
2540                 host_atomic_flag_clear(&destMemory[cnt], MemoryOrderForClear());
2541             }
2542         }
2543     }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)2544     virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
2545                                HostDataType *startRefValues,
2546                                cl_uint whichDestValue)
2547     {
2548         expected = StartValue();
2549         return true;
2550     }
GenerateRefs(cl_uint threadCount,HostDataType * startRefValues,MTdata d)2551     virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
2552                               MTdata d)
2553     {
2554         for (cl_uint i = 0; i < threadCount; i++)
2555             startRefValues[i] = CRITICAL_SECTION_NOT_VISITED;
2556         return true;
2557     }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)2558     virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
2559                             HostDataType *refValues,
2560                             HostAtomicType *finalValues)
2561     {
2562         correct = true;
2563         /* We are expecting unique values from 0 to threadCount-1 (each critical
2564          * section must be visited) */
2565         /* These values must be distributed across refValues array */
2566         std::vector<bool> tidFound(threadCount);
2567         cl_uint i;
2568 
2569         for (i = 0; i < threadCount; i++)
2570         {
2571             cl_uint value = (cl_uint)refValues[i];
2572             if (value == CRITICAL_SECTION_NOT_VISITED)
2573             {
2574                 // Special initial value
2575                 log_error("ERROR: Critical section %u not visited\n", i);
2576                 correct = false;
2577                 return true;
2578             }
2579             if (value >= threadCount)
2580             {
2581                 log_error(
2582                     "ERROR: Reference value %u outside of valid range! (%u)\n",
2583                     i, value);
2584                 correct = false;
2585                 return true;
2586             }
2587             if (tidFound[value])
2588             {
2589                 log_error("ERROR: Value (%u) occurred more thane once\n",
2590                           value);
2591                 correct = false;
2592                 return true;
2593             }
2594             tidFound[value] = true;
2595         }
2596         return true;
2597     }
2598 };
2599 
test_atomic_flag_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)2600 int test_atomic_flag_generic(cl_device_id deviceID, cl_context context,
2601                              cl_command_queue queue, int num_elements,
2602                              bool useSVM)
2603 {
2604     int error = 0;
2605     CBasicTestFlag<HOST_ATOMIC_FLAG, HOST_FLAG> test_flag(TYPE_ATOMIC_FLAG,
2606                                                           useSVM);
2607     EXECUTE_TEST(error,
2608                  test_flag.Execute(deviceID, context, queue, num_elements));
2609     return error;
2610 }
2611 
test_atomic_flag(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)2612 int test_atomic_flag(cl_device_id deviceID, cl_context context,
2613                      cl_command_queue queue, int num_elements)
2614 {
2615     return test_atomic_flag_generic(deviceID, context, queue, num_elements,
2616                                     false);
2617 }
2618 
test_svm_atomic_flag(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)2619 int test_svm_atomic_flag(cl_device_id deviceID, cl_context context,
2620                          cl_command_queue queue, int num_elements)
2621 {
2622     return test_atomic_flag_generic(deviceID, context, queue, num_elements,
2623                                     true);
2624 }
2625 
2626 template <typename HostAtomicType, typename HostDataType>
2627 class CBasicTestFence
2628     : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
2629     struct TestDefinition
2630     {
2631         bool op1IsFence;
2632         TExplicitMemoryOrderType op1MemOrder;
2633         bool op2IsFence;
2634         TExplicitMemoryOrderType op2MemOrder;
2635     };
2636 
2637 public:
2638     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
2639     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
2640     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
2641     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
2642     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
2643     using CBasicTestMemOrderScope<HostAtomicType,
2644                                   HostDataType>::DeclaredInProgram;
2645     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UsedInFunction;
2646     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
2647     using CBasicTestMemOrderScope<HostAtomicType,
2648                                   HostDataType>::CurrentGroupSize;
2649     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
2650     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
2651     using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalRefValues;
CBasicTestFence(TExplicitAtomicType dataType,bool useSVM)2652     CBasicTestFence(TExplicitAtomicType dataType, bool useSVM)
2653         : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
2654                                                                 useSVM)
2655     {
2656         StartValue(0);
2657         OldValueCheck(false);
2658     }
NumResults(cl_uint threadCount,cl_device_id deviceID)2659     virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
2660     {
2661         return threadCount;
2662     }
NumNonAtomicVariablesPerThread()2663     virtual cl_uint NumNonAtomicVariablesPerThread()
2664     {
2665         if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) return 1;
2666         if (LocalMemory())
2667         {
2668             if (gIsEmbedded)
2669             {
2670                 if (CurrentGroupSize() > 512) CurrentGroupSize(512);
2671                 return 2; // 1KB of local memory required by spec. Clamp group
2672                           // size to 512 and allow 2 variables per thread
2673             }
2674             else
2675                 return 32 * 1024 / 8 / CurrentGroupSize()
2676                     - 1; // 32KB of local memory required by spec
2677         }
2678         return 256;
2679     }
SingleTestName()2680     virtual std::string SingleTestName()
2681     {
2682         std::string testName;
2683         if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
2684             testName += "seq_cst fence, ";
2685         else
2686             testName +=
2687                 std::string(get_memory_order_type_name(_subCase.op1MemOrder))
2688                     .substr(sizeof("memory_order"))
2689                 + (_subCase.op1IsFence ? " fence" : " atomic")
2690                 + " synchronizes-with "
2691                 + std::string(get_memory_order_type_name(_subCase.op2MemOrder))
2692                       .substr(sizeof("memory_order"))
2693                 + (_subCase.op2IsFence ? " fence" : " atomic") + ", ";
2694         testName += CBasicTest<HostAtomicType, HostDataType>::SingleTestName();
2695         testName += std::string(", ")
2696             + std::string(get_memory_scope_type_name(MemoryScope()))
2697                   .substr(sizeof("memory"));
2698         return testName;
2699     }
SVMDataBufferAllSVMConsistent()2700     virtual bool SVMDataBufferAllSVMConsistent()
2701     {
2702         // Although memory_scope_all_devices doesn't mention SVM it is just an
2703         // alias for memory_scope_all_svm_devices.  So both scopes interact with
2704         // SVM allocations, on devices that support those, just the same.
2705         return MemoryScope() == MEMORY_SCOPE_ALL_DEVICES
2706             || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES;
2707     }
ExecuteForEachParameterSet(cl_device_id deviceID,cl_context context,cl_command_queue queue)2708     virtual int ExecuteForEachParameterSet(cl_device_id deviceID,
2709                                            cl_context context,
2710                                            cl_command_queue queue)
2711     {
2712         int error = 0;
2713         // execute 3 (maximum) sub cases for each memory order
2714         for (_subCaseId = 0; _subCaseId < 3; _subCaseId++)
2715         {
2716             EXECUTE_TEST(
2717                 error,
2718                 (CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
2719                      ExecuteForEachParameterSet(deviceID, context, queue)));
2720         }
2721         return error;
2722     }
ExecuteSingleTest(cl_device_id deviceID,cl_context context,cl_command_queue queue)2723     virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
2724                                   cl_command_queue queue)
2725     {
2726         if (DeclaredInProgram() || UsedInFunction())
2727             return 0; // skip test - not applicable - no overloaded fence
2728                       // functions for different address spaces
2729         if (MemoryOrder() == MEMORY_ORDER_EMPTY
2730             || MemoryScope()
2731                 == MEMORY_SCOPE_EMPTY) // empty 'scope' not required since
2732                                        // opencl20-openclc-rev15
2733             return 0; // skip test - not applicable
2734         if ((UseSVM() || gHost) && LocalMemory())
2735             return 0; // skip test - not applicable for SVM and local memory
2736         struct TestDefinition acqTests[] = {
2737             // {op1IsFence, op1MemOrder, op2IsFence, op2MemOrder}
2738             { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE },
2739             { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE },
2740             { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQUIRE }
2741         };
2742         struct TestDefinition relTests[] = {
2743             { true, MEMORY_ORDER_RELEASE, false, MEMORY_ORDER_ACQUIRE },
2744             { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL }
2745         };
2746         struct TestDefinition arTests[] = {
2747             { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL },
2748             { true, MEMORY_ORDER_ACQ_REL, false, MEMORY_ORDER_ACQUIRE },
2749             { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQ_REL }
2750         };
2751         switch (MemoryOrder())
2752         {
2753             case MEMORY_ORDER_ACQUIRE:
2754                 if (_subCaseId
2755                     >= sizeof(acqTests) / sizeof(struct TestDefinition))
2756                     return 0;
2757                 _subCase = acqTests[_subCaseId];
2758                 break;
2759             case MEMORY_ORDER_RELEASE:
2760                 if (_subCaseId
2761                     >= sizeof(relTests) / sizeof(struct TestDefinition))
2762                     return 0;
2763                 _subCase = relTests[_subCaseId];
2764                 break;
2765             case MEMORY_ORDER_ACQ_REL:
2766                 if (_subCaseId
2767                     >= sizeof(arTests) / sizeof(struct TestDefinition))
2768                     return 0;
2769                 _subCase = arTests[_subCaseId];
2770                 break;
2771             case MEMORY_ORDER_SEQ_CST:
2772                 if (_subCaseId != 0) // one special case only
2773                     return 0;
2774                 break;
2775             default: return 0;
2776         }
2777         LocalRefValues(LocalMemory());
2778         return CBasicTestMemOrderScope<
2779             HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
2780                                                              queue);
2781     }
ProgramHeader(cl_uint maxNumDestItems)2782     virtual std::string ProgramHeader(cl_uint maxNumDestItems)
2783     {
2784         std::string header;
2785         if (gOldAPI)
2786         {
2787             if (MemoryScope() == MEMORY_SCOPE_EMPTY)
2788             {
2789                 header += "#define atomic_work_item_fence(x,y)                 "
2790                           "       mem_fence(x)\n";
2791             }
2792             else
2793             {
2794                 header += "#define atomic_work_item_fence(x,y,z)               "
2795                           "       mem_fence(x)\n";
2796             }
2797         }
2798         return header
2799             + CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
2800                 ProgramHeader(maxNumDestItems);
2801     }
ProgramCore()2802     virtual std::string ProgramCore()
2803     {
2804         std::ostringstream naValues;
2805         naValues << NumNonAtomicVariablesPerThread();
2806         std::string program, fenceType, nonAtomic;
2807         if (LocalMemory())
2808         {
2809             program = "  size_t myId = get_local_id(0), hisId = "
2810                       "get_local_size(0)-1-myId;\n";
2811             fenceType = "CLK_LOCAL_MEM_FENCE";
2812             nonAtomic = "localValues";
2813         }
2814         else
2815         {
2816             program = "  size_t myId = tid, hisId = threadCount-1-tid;\n";
2817             fenceType = "CLK_GLOBAL_MEM_FENCE";
2818             nonAtomic = "oldValues";
2819         }
2820         if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
2821         {
2822             // All threads are divided into pairs.
2823             // Each thread has its own atomic variable and performs the
2824             // following actions:
2825             // - increments its own variable
2826             // - performs fence operation to propagate its value and to see
2827             // value from other thread
2828             // - reads value from other thread's variable
2829             // - repeats the above steps when both values are the same (and less
2830             // than 1000000)
2831             // - stores the last value read from other thread (in additional
2832             // variable) At the end of execution at least one thread should know
2833             // the last value from other thread
2834             program += std::string("") + "  " + DataType().RegularTypeName()
2835                 + " myValue = 0, hisValue; \n"
2836                   "  do {\n"
2837                   "    myValue++;\n"
2838                   "    atomic_store_explicit(&destMemory[myId], myValue, "
2839                   "memory_order_relaxed"
2840                 + MemoryScopeStr()
2841                 + ");\n"
2842                   "    atomic_work_item_fence("
2843                 + fenceType + ", memory_order_seq_cst" + MemoryScopeStr()
2844                 + "); \n"
2845                   "    hisValue = atomic_load_explicit(&destMemory[hisId], "
2846                   "memory_order_relaxed"
2847                 + MemoryScopeStr()
2848                 + ");\n"
2849                   "  } while(myValue == hisValue && myValue < 1000000);\n"
2850                   "  "
2851                 + nonAtomic + "[myId] = hisValue; \n";
2852         }
2853         else
2854         {
2855             // Each thread modifies one of its non-atomic variables, increments
2856             // value of its atomic variable and reads values from another thread
2857             // in typical synchronizes-with scenario with:
2858             // - non-atomic variable (at index A) modification (value change
2859             // from 0 to A)
2860             // - release operation (additional fence or within atomic) + atomic
2861             // variable modification (value A)
2862             // - atomic variable read (value B) + acquire operation (additional
2863             // fence or within atomic)
2864             // - non-atomic variable (at index B) read (value C)
2865             // Each thread verifies dependency between atomic and non-atomic
2866             // value read from another thread The following condition must be
2867             // true: B == C
2868             program += std::string("") + "  " + DataType().RegularTypeName()
2869                 + " myValue = 0, hisAtomicValue, hisValue; \n"
2870                   "  do {\n"
2871                   "    myValue++;\n"
2872                   "    "
2873                 + nonAtomic + "[myId*" + naValues.str()
2874                 + "+myValue] = myValue;\n";
2875             if (_subCase.op1IsFence)
2876                 program += std::string("") + "    atomic_work_item_fence("
2877                     + fenceType + ", "
2878                     + get_memory_order_type_name(_subCase.op1MemOrder)
2879                     + MemoryScopeStr()
2880                     + "); \n"
2881                       "    atomic_store_explicit(&destMemory[myId], myValue, "
2882                       "memory_order_relaxed"
2883                     + MemoryScopeStr() + ");\n";
2884             else
2885                 program += std::string("")
2886                     + "    atomic_store_explicit(&destMemory[myId], myValue, "
2887                     + get_memory_order_type_name(_subCase.op1MemOrder)
2888                     + MemoryScopeStr() + ");\n";
2889             if (_subCase.op2IsFence)
2890                 program += std::string("")
2891                     + "    hisAtomicValue = "
2892                       "atomic_load_explicit(&destMemory[hisId], "
2893                       "memory_order_relaxed"
2894                     + MemoryScopeStr()
2895                     + ");\n"
2896                       "    atomic_work_item_fence("
2897                     + fenceType + ", "
2898                     + get_memory_order_type_name(_subCase.op2MemOrder)
2899                     + MemoryScopeStr() + "); \n";
2900             else
2901                 program += std::string("")
2902                     + "    hisAtomicValue = "
2903                       "atomic_load_explicit(&destMemory[hisId], "
2904                     + get_memory_order_type_name(_subCase.op2MemOrder)
2905                     + MemoryScopeStr() + ");\n";
2906             program += "    hisValue = " + nonAtomic + "[hisId*"
2907                 + naValues.str() + "+hisAtomicValue]; \n";
2908             if (LocalMemory())
2909                 program += "    hisId = (hisId+1)%get_local_size(0);\n";
2910             else
2911                 program += "    hisId = (hisId+1)%threadCount;\n";
2912             program += "  } while(hisAtomicValue == hisValue && myValue < "
2913                 + naValues.str()
2914                 + "-1);\n"
2915                   "  if(hisAtomicValue != hisValue)\n"
2916                   "  { // fail\n"
2917                   "    atomic_store_explicit(&destMemory[myId], myValue-1,"
2918                   " memory_order_relaxed, memory_scope_work_group);\n";
2919             if (LocalMemory())
2920                 program += "    hisId = "
2921                            "(hisId+get_local_size(0)-1)%get_local_size(0);\n";
2922             else
2923                 program += "    hisId = (hisId+threadCount-1)%threadCount;\n";
2924             program += "    if(myValue+1 < " + naValues.str()
2925                 + ")\n"
2926                   "      "
2927                 + nonAtomic + "[myId*" + naValues.str()
2928                 + "+myValue+1] = hisId;\n"
2929                   "    if(myValue+2 < "
2930                 + naValues.str()
2931                 + ")\n"
2932                   "      "
2933                 + nonAtomic + "[myId*" + naValues.str()
2934                 + "+myValue+2] = hisAtomicValue;\n"
2935                   "    if(myValue+3 < "
2936                 + naValues.str()
2937                 + ")\n"
2938                   "      "
2939                 + nonAtomic + "[myId*" + naValues.str()
2940                 + "+myValue+3] = hisValue;\n";
2941             if (gDebug)
2942             {
2943                 program += "    printf(\"WI %d: atomic value (%d) at index %d "
2944                            "is different than non-atomic value (%d)\\n\", tid, "
2945                            "hisAtomicValue, hisId, hisValue);\n";
2946             }
2947             program += "  }\n";
2948         }
2949         return program;
2950     }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)2951     virtual void HostFunction(cl_uint tid, cl_uint threadCount,
2952                               volatile HostAtomicType *destMemory,
2953                               HostDataType *oldValues)
2954     {
2955         size_t myId = tid, hisId = threadCount - 1 - tid;
2956         if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
2957         {
2958             HostDataType myValue = 0, hisValue;
2959             // CPU thread typically starts faster - wait for GPU thread
2960             myValue++;
2961             host_atomic_store<HostAtomicType, HostDataType>(
2962                 &destMemory[myId], myValue, MEMORY_ORDER_SEQ_CST);
2963             while (host_atomic_load<HostAtomicType, HostDataType>(
2964                        &destMemory[hisId], MEMORY_ORDER_SEQ_CST)
2965                    == 0)
2966                 ;
2967             do
2968             {
2969                 myValue++;
2970                 host_atomic_store<HostAtomicType, HostDataType>(
2971                     &destMemory[myId], myValue, MEMORY_ORDER_RELAXED);
2972                 host_atomic_thread_fence(MemoryOrder());
2973                 hisValue = host_atomic_load<HostAtomicType, HostDataType>(
2974                     &destMemory[hisId], MEMORY_ORDER_RELAXED);
2975             } while (myValue == hisValue && hisValue < 1000000);
2976             oldValues[tid] = hisValue;
2977         }
2978         else
2979         {
2980             HostDataType myValue = 0, hisAtomicValue, hisValue;
2981             do
2982             {
2983                 myValue++;
2984                 oldValues[myId * NumNonAtomicVariablesPerThread() + myValue] =
2985                     myValue;
2986                 if (_subCase.op1IsFence)
2987                 {
2988                     host_atomic_thread_fence(_subCase.op1MemOrder);
2989                     host_atomic_store<HostAtomicType, HostDataType>(
2990                         &destMemory[myId], myValue, MEMORY_ORDER_RELAXED);
2991                 }
2992                 else
2993                     host_atomic_store<HostAtomicType, HostDataType>(
2994                         &destMemory[myId], myValue, _subCase.op1MemOrder);
2995                 if (_subCase.op2IsFence)
2996                 {
2997                     hisAtomicValue =
2998                         host_atomic_load<HostAtomicType, HostDataType>(
2999                             &destMemory[hisId], MEMORY_ORDER_RELAXED);
3000                     host_atomic_thread_fence(_subCase.op2MemOrder);
3001                 }
3002                 else
3003                     hisAtomicValue =
3004                         host_atomic_load<HostAtomicType, HostDataType>(
3005                             &destMemory[hisId], _subCase.op2MemOrder);
3006                 hisValue = oldValues[hisId * NumNonAtomicVariablesPerThread()
3007                                      + hisAtomicValue];
3008                 hisId = (hisId + 1) % threadCount;
3009             } while (hisAtomicValue == hisValue
3010                      && myValue
3011                          < (HostDataType)NumNonAtomicVariablesPerThread() - 1);
3012             if (hisAtomicValue != hisValue)
3013             { // fail
3014                 host_atomic_store<HostAtomicType, HostDataType>(
3015                     &destMemory[myId], myValue - 1, MEMORY_ORDER_SEQ_CST);
3016                 if (gDebug)
3017                 {
3018                     hisId = (hisId + threadCount - 1) % threadCount;
3019                     printf("WI %d: atomic value (%d) at index %d is different "
3020                            "than non-atomic value (%d)\n",
3021                            tid, hisAtomicValue, hisId, hisValue);
3022                 }
3023             }
3024         }
3025     }
GenerateRefs(cl_uint threadCount,HostDataType * startRefValues,MTdata d)3026     virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
3027                               MTdata d)
3028     {
3029         for (cl_uint i = 0; i < threadCount * NumNonAtomicVariablesPerThread();
3030              i++)
3031             startRefValues[i] = 0;
3032         return true;
3033     }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)3034     virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
3035                             HostDataType *refValues,
3036                             HostAtomicType *finalValues)
3037     {
3038         correct = true;
3039         cl_uint workSize = LocalMemory() ? CurrentGroupSize() : threadCount;
3040         for (cl_uint workOffset = 0; workOffset < threadCount;
3041              workOffset += workSize)
3042         {
3043             if (workOffset + workSize > threadCount)
3044                 // last workgroup (host threads)
3045                 workSize = threadCount - workOffset;
3046             for (cl_uint i = 0; i < workSize && workOffset + i < threadCount;
3047                  i++)
3048             {
3049                 HostAtomicType myValue = finalValues[workOffset + i];
3050                 if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
3051                 {
3052                     HostDataType hisValue = refValues[workOffset + i];
3053                     if (myValue == hisValue)
3054                     {
3055                         // a draw - both threads should reach final value
3056                         // 1000000
3057                         if (myValue != 1000000)
3058                         {
3059                             log_error("ERROR: Invalid reference value #%u (%d "
3060                                       "instead of 1000000)\n",
3061                                       workOffset + i, myValue);
3062                             correct = false;
3063                             return true;
3064                         }
3065                     }
3066                     else
3067                     {
3068                         // slower thread (in total order of seq_cst operations)
3069                         // must know last value written by faster thread
3070                         HostAtomicType hisRealValue =
3071                             finalValues[workOffset + workSize - 1 - i];
3072                         HostDataType myValueReadByHim =
3073                             refValues[workOffset + workSize - 1 - i];
3074 
3075                         // who is the winner? - thread with lower private
3076                         // counter value
3077                         if (myValue == hisRealValue) // forbidden result - fence
3078                                                      // doesn't work
3079                         {
3080                             log_error("ERROR: Atomic counter values #%u and "
3081                                       "#%u are the same (%u)\n",
3082                                       workOffset + i,
3083                                       workOffset + workSize - 1 - i, myValue);
3084                             log_error(
3085                                 "ERROR: Both threads have outdated values read "
3086                                 "from another thread (%u and %u)\n",
3087                                 hisValue, myValueReadByHim);
3088                             correct = false;
3089                             return true;
3090                         }
3091                         if (myValue > hisRealValue) // I'm slower
3092                         {
3093                             if (hisRealValue != hisValue)
3094                             {
3095                                 log_error("ERROR: Invalid reference value #%u "
3096                                           "(%d instead of %d)\n",
3097                                           workOffset + i, hisValue,
3098                                           hisRealValue);
3099                                 log_error(
3100                                     "ERROR: Slower thread #%u should know "
3101                                     "value written by faster thread #%u\n",
3102                                     workOffset + i,
3103                                     workOffset + workSize - 1 - i);
3104                                 correct = false;
3105                                 return true;
3106                             }
3107                         }
3108                         else // I'm faster
3109                         {
3110                             if (myValueReadByHim != myValue)
3111                             {
3112                                 log_error("ERROR: Invalid reference value #%u "
3113                                           "(%d instead of %d)\n",
3114                                           workOffset + workSize - 1 - i,
3115                                           myValueReadByHim, myValue);
3116                                 log_error(
3117                                     "ERROR: Slower thread #%u should know "
3118                                     "value written by faster thread #%u\n",
3119                                     workOffset + workSize - 1 - i,
3120                                     workOffset + i);
3121                                 correct = false;
3122                                 return true;
3123                             }
3124                         }
3125                     }
3126                 }
3127                 else
3128                 {
3129                     if (myValue != NumNonAtomicVariablesPerThread() - 1)
3130                     {
3131                         log_error("ERROR: Invalid atomic value #%u (%d instead "
3132                                   "of %d)\n",
3133                                   workOffset + i, myValue,
3134                                   NumNonAtomicVariablesPerThread() - 1);
3135                         log_error("ERROR: Thread #%u observed invalid values "
3136                                   "in other thread's variables\n",
3137                                   workOffset + i);
3138                         correct = false;
3139                         return true;
3140                     }
3141                 }
3142             }
3143         }
3144         return true;
3145     }
3146 
3147 private:
3148     size_t _subCaseId;
3149     struct TestDefinition _subCase;
3150 };
3151 
test_atomic_fence_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)3152 int test_atomic_fence_generic(cl_device_id deviceID, cl_context context,
3153                               cl_command_queue queue, int num_elements,
3154                               bool useSVM)
3155 {
3156     int error = 0;
3157     CBasicTestFence<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
3158                                                         useSVM);
3159     EXECUTE_TEST(error,
3160                  test_int.Execute(deviceID, context, queue, num_elements));
3161     CBasicTestFence<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
3162                                                            useSVM);
3163     EXECUTE_TEST(error,
3164                  test_uint.Execute(deviceID, context, queue, num_elements));
3165     CBasicTestFence<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
3166                                                            useSVM);
3167     EXECUTE_TEST(error,
3168                  test_long.Execute(deviceID, context, queue, num_elements));
3169     CBasicTestFence<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG,
3170                                                               useSVM);
3171     EXECUTE_TEST(error,
3172                  test_ulong.Execute(deviceID, context, queue, num_elements));
3173     if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
3174     {
3175         CBasicTestFence<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(
3176             TYPE_ATOMIC_INTPTR_T, useSVM);
3177         EXECUTE_TEST(
3178             error,
3179             test_intptr_t.Execute(deviceID, context, queue, num_elements));
3180         CBasicTestFence<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
3181             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
3182         EXECUTE_TEST(
3183             error,
3184             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
3185         CBasicTestFence<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
3186             TYPE_ATOMIC_SIZE_T, useSVM);
3187         EXECUTE_TEST(
3188             error, test_size_t.Execute(deviceID, context, queue, num_elements));
3189         CBasicTestFence<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
3190             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
3191         EXECUTE_TEST(
3192             error,
3193             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
3194     }
3195     else
3196     {
3197         CBasicTestFence<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(
3198             TYPE_ATOMIC_INTPTR_T, useSVM);
3199         EXECUTE_TEST(
3200             error,
3201             test_intptr_t.Execute(deviceID, context, queue, num_elements));
3202         CBasicTestFence<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
3203             test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
3204         EXECUTE_TEST(
3205             error,
3206             test_uintptr_t.Execute(deviceID, context, queue, num_elements));
3207         CBasicTestFence<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
3208             TYPE_ATOMIC_SIZE_T, useSVM);
3209         EXECUTE_TEST(
3210             error, test_size_t.Execute(deviceID, context, queue, num_elements));
3211         CBasicTestFence<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
3212             test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
3213         EXECUTE_TEST(
3214             error,
3215             test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
3216     }
3217     return error;
3218 }
3219 
test_atomic_fence(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)3220 int test_atomic_fence(cl_device_id deviceID, cl_context context,
3221                       cl_command_queue queue, int num_elements)
3222 {
3223     return test_atomic_fence_generic(deviceID, context, queue, num_elements,
3224                                      false);
3225 }
3226 
test_svm_atomic_fence(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)3227 int test_svm_atomic_fence(cl_device_id deviceID, cl_context context,
3228                           cl_command_queue queue, int num_elements)
3229 {
3230     return test_atomic_fence_generic(deviceID, context, queue, num_elements,
3231                                      true);
3232 }
3233