1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/testHarness.h"
17 #include "harness/kernelHelpers.h"
18 #include "harness/typeWrappers.h"
19
20 #include "common.h"
21 #include "host_atomics.h"
22
23 #include <sstream>
24 #include <vector>
25
26 template <typename HostAtomicType, typename HostDataType>
27 class CBasicTestStore
28 : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
29 public:
30 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
31 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
32 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
33 using CBasicTestMemOrderScope<HostAtomicType,
34 HostDataType>::MemoryOrderScopeStr;
35 using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
CBasicTestStore(TExplicitAtomicType dataType,bool useSVM)36 CBasicTestStore(TExplicitAtomicType dataType, bool useSVM)
37 : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
38 useSVM)
39 {
40 OldValueCheck(false);
41 }
NumResults(cl_uint threadCount,cl_device_id deviceID)42 virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
43 {
44 return threadCount;
45 }
ExecuteSingleTest(cl_device_id deviceID,cl_context context,cl_command_queue queue)46 virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
47 cl_command_queue queue)
48 {
49 if (MemoryOrder() == MEMORY_ORDER_ACQUIRE
50 || MemoryOrder() == MEMORY_ORDER_ACQ_REL)
51 return 0; // skip test - not applicable
52
53 if (CheckCapabilities(MemoryScope(), MemoryOrder())
54 == TEST_SKIPPED_ITSELF)
55 return 0; // skip test - not applicable
56
57 return CBasicTestMemOrderScope<
58 HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
59 queue);
60 }
ProgramCore()61 virtual std::string ProgramCore()
62 {
63 std::string memoryOrderScope = MemoryOrderScopeStr();
64 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
65 return " atomic_store" + postfix + "(&destMemory[tid], tid"
66 + memoryOrderScope + ");\n";
67 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)68 virtual void HostFunction(cl_uint tid, cl_uint threadCount,
69 volatile HostAtomicType *destMemory,
70 HostDataType *oldValues)
71 {
72 host_atomic_store(&destMemory[tid], (HostDataType)tid, MemoryOrder());
73 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)74 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
75 HostDataType *startRefValues,
76 cl_uint whichDestValue)
77 {
78 expected = (HostDataType)whichDestValue;
79 return true;
80 }
81 };
82
test_atomic_store_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)83 int test_atomic_store_generic(cl_device_id deviceID, cl_context context,
84 cl_command_queue queue, int num_elements,
85 bool useSVM)
86 {
87 int error = 0;
88 CBasicTestStore<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
89 useSVM);
90 EXECUTE_TEST(error,
91 test_int.Execute(deviceID, context, queue, num_elements));
92 CBasicTestStore<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
93 useSVM);
94 EXECUTE_TEST(error,
95 test_uint.Execute(deviceID, context, queue, num_elements));
96 CBasicTestStore<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
97 useSVM);
98 EXECUTE_TEST(error,
99 test_long.Execute(deviceID, context, queue, num_elements));
100 CBasicTestStore<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG,
101 useSVM);
102 EXECUTE_TEST(error,
103 test_ulong.Execute(deviceID, context, queue, num_elements));
104 CBasicTestStore<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT,
105 useSVM);
106 EXECUTE_TEST(error,
107 test_float.Execute(deviceID, context, queue, num_elements));
108 CBasicTestStore<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
109 TYPE_ATOMIC_DOUBLE, useSVM);
110 EXECUTE_TEST(error,
111 test_double.Execute(deviceID, context, queue, num_elements));
112 if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
113 {
114 CBasicTestStore<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(
115 TYPE_ATOMIC_INTPTR_T, useSVM);
116 EXECUTE_TEST(
117 error,
118 test_intptr_t.Execute(deviceID, context, queue, num_elements));
119 CBasicTestStore<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
120 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
121 EXECUTE_TEST(
122 error,
123 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
124 CBasicTestStore<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
125 TYPE_ATOMIC_SIZE_T, useSVM);
126 EXECUTE_TEST(
127 error, test_size_t.Execute(deviceID, context, queue, num_elements));
128 CBasicTestStore<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
129 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
130 EXECUTE_TEST(
131 error,
132 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
133 }
134 else
135 {
136 CBasicTestStore<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(
137 TYPE_ATOMIC_INTPTR_T, useSVM);
138 EXECUTE_TEST(
139 error,
140 test_intptr_t.Execute(deviceID, context, queue, num_elements));
141 CBasicTestStore<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
142 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
143 EXECUTE_TEST(
144 error,
145 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
146 CBasicTestStore<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
147 TYPE_ATOMIC_SIZE_T, useSVM);
148 EXECUTE_TEST(
149 error, test_size_t.Execute(deviceID, context, queue, num_elements));
150 CBasicTestStore<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
151 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
152 EXECUTE_TEST(
153 error,
154 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
155 }
156 return error;
157 }
158
test_atomic_store(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)159 int test_atomic_store(cl_device_id deviceID, cl_context context,
160 cl_command_queue queue, int num_elements)
161 {
162 return test_atomic_store_generic(deviceID, context, queue, num_elements,
163 false);
164 }
165
test_svm_atomic_store(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)166 int test_svm_atomic_store(cl_device_id deviceID, cl_context context,
167 cl_command_queue queue, int num_elements)
168 {
169 return test_atomic_store_generic(deviceID, context, queue, num_elements,
170 true);
171 }
172
173 template <typename HostAtomicType, typename HostDataType>
174 class CBasicTestInit : public CBasicTest<HostAtomicType, HostDataType> {
175 public:
176 using CBasicTest<HostAtomicType, HostDataType>::OldValueCheck;
CBasicTestInit(TExplicitAtomicType dataType,bool useSVM)177 CBasicTestInit(TExplicitAtomicType dataType, bool useSVM)
178 : CBasicTest<HostAtomicType, HostDataType>(dataType, useSVM)
179 {
180 OldValueCheck(false);
181 }
NumResults(cl_uint threadCount,cl_device_id deviceID)182 virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
183 {
184 return threadCount;
185 }
ProgramCore()186 virtual std::string ProgramCore()
187 {
188 return " atomic_init(&destMemory[tid], tid);\n";
189 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)190 virtual void HostFunction(cl_uint tid, cl_uint threadCount,
191 volatile HostAtomicType *destMemory,
192 HostDataType *oldValues)
193 {
194 host_atomic_init(&destMemory[tid], (HostDataType)tid);
195 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)196 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
197 HostDataType *startRefValues,
198 cl_uint whichDestValue)
199 {
200 expected = (HostDataType)whichDestValue;
201 return true;
202 }
203 };
204
test_atomic_init_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)205 int test_atomic_init_generic(cl_device_id deviceID, cl_context context,
206 cl_command_queue queue, int num_elements,
207 bool useSVM)
208 {
209 int error = 0;
210 CBasicTestInit<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
211 EXECUTE_TEST(error,
212 test_int.Execute(deviceID, context, queue, num_elements));
213 CBasicTestInit<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
214 useSVM);
215 EXECUTE_TEST(error,
216 test_uint.Execute(deviceID, context, queue, num_elements));
217 CBasicTestInit<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
218 useSVM);
219 EXECUTE_TEST(error,
220 test_long.Execute(deviceID, context, queue, num_elements));
221 CBasicTestInit<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG,
222 useSVM);
223 EXECUTE_TEST(error,
224 test_ulong.Execute(deviceID, context, queue, num_elements));
225 CBasicTestInit<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT,
226 useSVM);
227 EXECUTE_TEST(error,
228 test_float.Execute(deviceID, context, queue, num_elements));
229 CBasicTestInit<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
230 TYPE_ATOMIC_DOUBLE, useSVM);
231 EXECUTE_TEST(error,
232 test_double.Execute(deviceID, context, queue, num_elements));
233 if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
234 {
235 CBasicTestInit<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(
236 TYPE_ATOMIC_INTPTR_T, useSVM);
237 EXECUTE_TEST(
238 error,
239 test_intptr_t.Execute(deviceID, context, queue, num_elements));
240 CBasicTestInit<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
241 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
242 EXECUTE_TEST(
243 error,
244 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
245 CBasicTestInit<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
246 TYPE_ATOMIC_SIZE_T, useSVM);
247 EXECUTE_TEST(
248 error, test_size_t.Execute(deviceID, context, queue, num_elements));
249 CBasicTestInit<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
250 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
251 EXECUTE_TEST(
252 error,
253 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
254 }
255 else
256 {
257 CBasicTestInit<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(
258 TYPE_ATOMIC_INTPTR_T, useSVM);
259 EXECUTE_TEST(
260 error,
261 test_intptr_t.Execute(deviceID, context, queue, num_elements));
262 CBasicTestInit<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
263 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
264 EXECUTE_TEST(
265 error,
266 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
267 CBasicTestInit<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
268 TYPE_ATOMIC_SIZE_T, useSVM);
269 EXECUTE_TEST(
270 error, test_size_t.Execute(deviceID, context, queue, num_elements));
271 CBasicTestInit<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
272 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
273 EXECUTE_TEST(
274 error,
275 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
276 }
277 return error;
278 }
279
test_atomic_init(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)280 int test_atomic_init(cl_device_id deviceID, cl_context context,
281 cl_command_queue queue, int num_elements)
282 {
283 return test_atomic_init_generic(deviceID, context, queue, num_elements,
284 false);
285 }
286
test_svm_atomic_init(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)287 int test_svm_atomic_init(cl_device_id deviceID, cl_context context,
288 cl_command_queue queue, int num_elements)
289 {
290 return test_atomic_init_generic(deviceID, context, queue, num_elements,
291 true);
292 }
293
294 template <typename HostAtomicType, typename HostDataType>
295 class CBasicTestLoad
296 : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
297 public:
298 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
299 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
300 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
301 using CBasicTestMemOrderScope<HostAtomicType,
302 HostDataType>::MemoryOrderScopeStr;
303 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
304 using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
CBasicTestLoad(TExplicitAtomicType dataType,bool useSVM)305 CBasicTestLoad(TExplicitAtomicType dataType, bool useSVM)
306 : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
307 useSVM)
308 {
309 OldValueCheck(false);
310 }
NumResults(cl_uint threadCount,cl_device_id deviceID)311 virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
312 {
313 return threadCount;
314 }
ExecuteSingleTest(cl_device_id deviceID,cl_context context,cl_command_queue queue)315 virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
316 cl_command_queue queue)
317 {
318 if (MemoryOrder() == MEMORY_ORDER_RELEASE
319 || MemoryOrder() == MEMORY_ORDER_ACQ_REL)
320 return 0; // skip test - not applicable
321
322 if (CheckCapabilities(MemoryScope(), MemoryOrder())
323 == TEST_SKIPPED_ITSELF)
324 return 0; // skip test - not applicable
325
326 return CBasicTestMemOrderScope<
327 HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
328 queue);
329 }
ProgramCore()330 virtual std::string ProgramCore()
331 {
332 // In the case this test is run with MEMORY_ORDER_ACQUIRE, the store
333 // should be MEMORY_ORDER_RELEASE
334 std::string memoryOrderScopeLoad = MemoryOrderScopeStr();
335 std::string memoryOrderScopeStore =
336 (MemoryOrder() == MEMORY_ORDER_ACQUIRE)
337 ? (", memory_order_release" + MemoryScopeStr())
338 : memoryOrderScopeLoad;
339 std::string postfix(memoryOrderScopeLoad.empty() ? "" : "_explicit");
340 return " atomic_store" + postfix + "(&destMemory[tid], tid"
341 + memoryOrderScopeStore
342 + ");\n"
343 " oldValues[tid] = atomic_load"
344 + postfix + "(&destMemory[tid]" + memoryOrderScopeLoad + ");\n";
345 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)346 virtual void HostFunction(cl_uint tid, cl_uint threadCount,
347 volatile HostAtomicType *destMemory,
348 HostDataType *oldValues)
349 {
350 host_atomic_store(&destMemory[tid], (HostDataType)tid,
351 MEMORY_ORDER_SEQ_CST);
352 oldValues[tid] = host_atomic_load<HostAtomicType, HostDataType>(
353 &destMemory[tid], MemoryOrder());
354 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)355 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
356 HostDataType *startRefValues,
357 cl_uint whichDestValue)
358 {
359 expected = (HostDataType)whichDestValue;
360 return true;
361 }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)362 virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
363 HostDataType *refValues,
364 HostAtomicType *finalValues)
365 {
366 correct = true;
367 for (cl_uint i = 0; i < threadCount; i++)
368 {
369 if (refValues[i] != (HostDataType)i)
370 {
371 log_error("Invalid value for thread %u\n", (cl_uint)i);
372 correct = false;
373 return true;
374 }
375 }
376 return true;
377 }
378 };
379
test_atomic_load_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)380 int test_atomic_load_generic(cl_device_id deviceID, cl_context context,
381 cl_command_queue queue, int num_elements,
382 bool useSVM)
383 {
384 int error = 0;
385 CBasicTestLoad<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
386 EXECUTE_TEST(error,
387 test_int.Execute(deviceID, context, queue, num_elements));
388 CBasicTestLoad<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
389 useSVM);
390 EXECUTE_TEST(error,
391 test_uint.Execute(deviceID, context, queue, num_elements));
392 CBasicTestLoad<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
393 useSVM);
394 EXECUTE_TEST(error,
395 test_long.Execute(deviceID, context, queue, num_elements));
396 CBasicTestLoad<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG,
397 useSVM);
398 EXECUTE_TEST(error,
399 test_ulong.Execute(deviceID, context, queue, num_elements));
400 CBasicTestLoad<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT,
401 useSVM);
402 EXECUTE_TEST(error,
403 test_float.Execute(deviceID, context, queue, num_elements));
404 CBasicTestLoad<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
405 TYPE_ATOMIC_DOUBLE, useSVM);
406 EXECUTE_TEST(error,
407 test_double.Execute(deviceID, context, queue, num_elements));
408 if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
409 {
410 CBasicTestLoad<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(
411 TYPE_ATOMIC_INTPTR_T, useSVM);
412 EXECUTE_TEST(
413 error,
414 test_intptr_t.Execute(deviceID, context, queue, num_elements));
415 CBasicTestLoad<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
416 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
417 EXECUTE_TEST(
418 error,
419 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
420 CBasicTestLoad<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
421 TYPE_ATOMIC_SIZE_T, useSVM);
422 EXECUTE_TEST(
423 error, test_size_t.Execute(deviceID, context, queue, num_elements));
424 CBasicTestLoad<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
425 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
426 EXECUTE_TEST(
427 error,
428 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
429 }
430 else
431 {
432 CBasicTestLoad<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(
433 TYPE_ATOMIC_INTPTR_T, useSVM);
434 EXECUTE_TEST(
435 error,
436 test_intptr_t.Execute(deviceID, context, queue, num_elements));
437 CBasicTestLoad<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
438 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
439 EXECUTE_TEST(
440 error,
441 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
442 CBasicTestLoad<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
443 TYPE_ATOMIC_SIZE_T, useSVM);
444 EXECUTE_TEST(
445 error, test_size_t.Execute(deviceID, context, queue, num_elements));
446 CBasicTestLoad<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
447 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
448 EXECUTE_TEST(
449 error,
450 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
451 }
452 return error;
453 }
454
test_atomic_load(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)455 int test_atomic_load(cl_device_id deviceID, cl_context context,
456 cl_command_queue queue, int num_elements)
457 {
458 return test_atomic_load_generic(deviceID, context, queue, num_elements,
459 false);
460 }
461
test_svm_atomic_load(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)462 int test_svm_atomic_load(cl_device_id deviceID, cl_context context,
463 cl_command_queue queue, int num_elements)
464 {
465 return test_atomic_load_generic(deviceID, context, queue, num_elements,
466 true);
467 }
468
469 template <typename HostAtomicType, typename HostDataType>
470 class CBasicTestExchange
471 : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
472 public:
473 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
474 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
475 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
476 using CBasicTestMemOrderScope<HostAtomicType,
477 HostDataType>::MemoryOrderScopeStr;
478 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
479 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
CBasicTestExchange(TExplicitAtomicType dataType,bool useSVM)480 CBasicTestExchange(TExplicitAtomicType dataType, bool useSVM)
481 : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
482 useSVM)
483 {
484 StartValue(123456);
485 }
ProgramCore()486 virtual std::string ProgramCore()
487 {
488 std::string memoryOrderScope = MemoryOrderScopeStr();
489 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
490 return " oldValues[tid] = atomic_exchange" + postfix
491 + "(&destMemory[0], tid" + memoryOrderScope
492 + ");\n"
493 " for(int i = 0; i < "
494 + IterationsStr()
495 + "; i++)\n"
496 " oldValues[tid] = atomic_exchange"
497 + postfix + "(&destMemory[0], oldValues[tid]" + memoryOrderScope
498 + ");\n";
499 }
500
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)501 virtual void HostFunction(cl_uint tid, cl_uint threadCount,
502 volatile HostAtomicType *destMemory,
503 HostDataType *oldValues)
504 {
505 oldValues[tid] = host_atomic_exchange(&destMemory[0], (HostDataType)tid,
506 MemoryOrder());
507 for (int i = 0; i < Iterations(); i++)
508 oldValues[tid] = host_atomic_exchange(
509 &destMemory[0], oldValues[tid], MemoryOrder());
510 }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)511 virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
512 HostDataType *refValues,
513 HostAtomicType *finalValues)
514 {
515 OldValueCheck(
516 Iterations() % 2
517 == 0); // check is valid for even number of iterations only
518 correct = true;
519 /* We are expecting values from 0 to size-1 and initial value from
520 * atomic variable */
521 /* These values must be distributed across refValues array and atomic
522 * variable finalVaue[0] */
523 /* Any repeated value is treated as an error */
524 std::vector<bool> tidFound(threadCount);
525 bool startValueFound = false;
526 cl_uint i;
527
528 for (i = 0; i <= threadCount; i++)
529 {
530 cl_uint value;
531 if (i == threadCount)
532 value = (cl_uint)finalValues[0]; // additional value from atomic
533 // variable (last written)
534 else
535 value = (cl_uint)refValues[i];
536 if (value == (cl_uint)StartValue())
537 {
538 // Special initial value
539 if (startValueFound)
540 {
541 log_error("ERROR: Starting reference value (%u) occurred "
542 "more thane once\n",
543 (cl_uint)StartValue());
544 correct = false;
545 return true;
546 }
547 startValueFound = true;
548 continue;
549 }
550 if (value >= threadCount)
551 {
552 log_error(
553 "ERROR: Reference value %u outside of valid range! (%u)\n",
554 i, value);
555 correct = false;
556 return true;
557 }
558 if (tidFound[value])
559 {
560 log_error("ERROR: Value (%u) occurred more thane once\n",
561 value);
562 correct = false;
563 return true;
564 }
565 tidFound[value] = true;
566 }
567 return true;
568 }
569 };
570
test_atomic_exchange_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)571 int test_atomic_exchange_generic(cl_device_id deviceID, cl_context context,
572 cl_command_queue queue, int num_elements,
573 bool useSVM)
574 {
575 int error = 0;
576 CBasicTestExchange<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
577 useSVM);
578 EXECUTE_TEST(error,
579 test_int.Execute(deviceID, context, queue, num_elements));
580 CBasicTestExchange<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
581 useSVM);
582 EXECUTE_TEST(error,
583 test_uint.Execute(deviceID, context, queue, num_elements));
584 CBasicTestExchange<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
585 useSVM);
586 EXECUTE_TEST(error,
587 test_long.Execute(deviceID, context, queue, num_elements));
588 CBasicTestExchange<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
589 TYPE_ATOMIC_ULONG, useSVM);
590 EXECUTE_TEST(error,
591 test_ulong.Execute(deviceID, context, queue, num_elements));
592 CBasicTestExchange<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
593 TYPE_ATOMIC_FLOAT, useSVM);
594 EXECUTE_TEST(error,
595 test_float.Execute(deviceID, context, queue, num_elements));
596 CBasicTestExchange<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
597 TYPE_ATOMIC_DOUBLE, useSVM);
598 EXECUTE_TEST(error,
599 test_double.Execute(deviceID, context, queue, num_elements));
600 if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
601 {
602 CBasicTestExchange<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
603 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
604 EXECUTE_TEST(
605 error,
606 test_intptr_t.Execute(deviceID, context, queue, num_elements));
607 CBasicTestExchange<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
608 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
609 EXECUTE_TEST(
610 error,
611 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
612 CBasicTestExchange<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
613 TYPE_ATOMIC_SIZE_T, useSVM);
614 EXECUTE_TEST(
615 error, test_size_t.Execute(deviceID, context, queue, num_elements));
616 CBasicTestExchange<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
617 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
618 EXECUTE_TEST(
619 error,
620 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
621 }
622 else
623 {
624 CBasicTestExchange<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
625 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
626 EXECUTE_TEST(
627 error,
628 test_intptr_t.Execute(deviceID, context, queue, num_elements));
629 CBasicTestExchange<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
630 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
631 EXECUTE_TEST(
632 error,
633 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
634 CBasicTestExchange<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
635 TYPE_ATOMIC_SIZE_T, useSVM);
636 EXECUTE_TEST(
637 error, test_size_t.Execute(deviceID, context, queue, num_elements));
638 CBasicTestExchange<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
639 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
640 EXECUTE_TEST(
641 error,
642 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
643 }
644 return error;
645 }
646
test_atomic_exchange(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)647 int test_atomic_exchange(cl_device_id deviceID, cl_context context,
648 cl_command_queue queue, int num_elements)
649 {
650 return test_atomic_exchange_generic(deviceID, context, queue, num_elements,
651 false);
652 }
653
test_svm_atomic_exchange(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)654 int test_svm_atomic_exchange(cl_device_id deviceID, cl_context context,
655 cl_command_queue queue, int num_elements)
656 {
657 return test_atomic_exchange_generic(deviceID, context, queue, num_elements,
658 true);
659 }
660
661 template <typename HostAtomicType, typename HostDataType>
662 class CBasicTestCompareStrong
663 : public CBasicTestMemOrder2Scope<HostAtomicType, HostDataType> {
664 public:
665 using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::StartValue;
666 using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::OldValueCheck;
667 using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder;
668 using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder2;
669 using CBasicTestMemOrder2Scope<HostAtomicType,
670 HostDataType>::MemoryOrderScope;
671 using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryScope;
672 using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::DataType;
673 using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::Iterations;
674 using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::IterationsStr;
675 using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
CBasicTestCompareStrong(TExplicitAtomicType dataType,bool useSVM)676 CBasicTestCompareStrong(TExplicitAtomicType dataType, bool useSVM)
677 : CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>(dataType,
678 useSVM)
679 {
680 StartValue(123456);
681 OldValueCheck(false);
682 }
ExecuteSingleTest(cl_device_id deviceID,cl_context context,cl_command_queue queue)683 virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
684 cl_command_queue queue)
685 {
686 if (MemoryOrder2() == MEMORY_ORDER_RELEASE
687 || MemoryOrder2() == MEMORY_ORDER_ACQ_REL)
688 return 0; // not allowed as 'failure' argument
689 if ((MemoryOrder() == MEMORY_ORDER_RELAXED
690 && MemoryOrder2() != MEMORY_ORDER_RELAXED)
691 || (MemoryOrder() != MEMORY_ORDER_SEQ_CST
692 && MemoryOrder2() == MEMORY_ORDER_SEQ_CST))
693 return 0; // failure argument shall be no stronger than the success
694
695 if (CheckCapabilities(MemoryScope(), MemoryOrder())
696 == TEST_SKIPPED_ITSELF)
697 return 0; // skip test - not applicable
698
699 if (CheckCapabilities(MemoryScope(), MemoryOrder2())
700 == TEST_SKIPPED_ITSELF)
701 return 0; // skip test - not applicable
702
703 return CBasicTestMemOrder2Scope<
704 HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
705 queue);
706 }
ProgramCore()707 virtual std::string ProgramCore()
708 {
709 std::string memoryOrderScope = MemoryOrderScope();
710 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
711 return std::string(" ") + DataType().RegularTypeName()
712 + " expected, previous;\n"
713 " int successCount = 0;\n"
714 " oldValues[tid] = tid;\n"
715 " expected = tid; // force failure at the beginning\n"
716 " if(atomic_compare_exchange_strong"
717 + postfix + "(&destMemory[0], &expected, oldValues[tid]"
718 + memoryOrderScope
719 + ") || expected == tid)\n"
720 " oldValues[tid] = threadCount+1; //mark unexpected success "
721 "with invalid value\n"
722 " else\n"
723 " {\n"
724 " for(int i = 0; i < "
725 + IterationsStr()
726 + " || successCount == 0; i++)\n"
727 " {\n"
728 " previous = expected;\n"
729 " if(atomic_compare_exchange_strong"
730 + postfix + "(&destMemory[0], &expected, oldValues[tid]"
731 + memoryOrderScope
732 + "))\n"
733 " {\n"
734 " oldValues[tid] = expected;\n"
735 " successCount++;\n"
736 " }\n"
737 " else\n"
738 " {\n"
739 " if(previous == expected) // spurious failure - "
740 "shouldn't occur for 'strong'\n"
741 " {\n"
742 " oldValues[tid] = threadCount; //mark fail with "
743 "invalid value\n"
744 " break;\n"
745 " }\n"
746 " }\n"
747 " }\n"
748 " }\n";
749 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)750 virtual void HostFunction(cl_uint tid, cl_uint threadCount,
751 volatile HostAtomicType *destMemory,
752 HostDataType *oldValues)
753 {
754 HostDataType expected = (HostDataType)StartValue(), previous;
755 oldValues[tid] = (HostDataType)tid;
756 for (int i = 0; i < Iterations(); i++)
757 {
758 previous = expected;
759 if (host_atomic_compare_exchange(&destMemory[0], &expected,
760 oldValues[tid], MemoryOrder(),
761 MemoryOrder2()))
762 oldValues[tid] = expected;
763 else
764 {
765 if (previous == expected) // shouldn't occur for 'strong'
766 {
767 oldValues[tid] = threadCount; // mark fail with invalid
768 // value
769 }
770 }
771 }
772 }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)773 virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
774 HostDataType *refValues,
775 HostAtomicType *finalValues)
776 {
777 correct = true;
778 /* We are expecting values from 0 to size-1 and initial value from
779 * atomic variable */
780 /* These values must be distributed across refValues array and atomic
781 * variable finalVaue[0] */
782 /* Any repeated value is treated as an error */
783 std::vector<bool> tidFound(threadCount);
784 bool startValueFound = false;
785 cl_uint i;
786
787 for (i = 0; i <= threadCount; i++)
788 {
789 cl_uint value;
790 if (i == threadCount)
791 value = (cl_uint)finalValues[0]; // additional value from atomic
792 // variable (last written)
793 else
794 value = (cl_uint)refValues[i];
795 if (value == (cl_uint)StartValue())
796 {
797 // Special initial value
798 if (startValueFound)
799 {
800 log_error("ERROR: Starting reference value (%u) occurred "
801 "more thane once\n",
802 (cl_uint)StartValue());
803 correct = false;
804 return true;
805 }
806 startValueFound = true;
807 continue;
808 }
809 if (value >= threadCount)
810 {
811 if (value == threadCount)
812 log_error("ERROR: Spurious failure detected for "
813 "atomic_compare_exchange_strong\n");
814 log_error(
815 "ERROR: Reference value %u outside of valid range! (%u)\n",
816 i, value);
817 correct = false;
818 return true;
819 }
820 if (tidFound[value])
821 {
822 log_error("ERROR: Value (%u) occurred more thane once\n",
823 value);
824 correct = false;
825 return true;
826 }
827 tidFound[value] = true;
828 }
829 return true;
830 }
831 };
832
test_atomic_compare_exchange_strong_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)833 int test_atomic_compare_exchange_strong_generic(cl_device_id deviceID,
834 cl_context context,
835 cl_command_queue queue,
836 int num_elements, bool useSVM)
837 {
838 int error = 0;
839 CBasicTestCompareStrong<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
840 useSVM);
841 EXECUTE_TEST(error,
842 test_int.Execute(deviceID, context, queue, num_elements));
843 CBasicTestCompareStrong<HOST_ATOMIC_UINT, HOST_UINT> test_uint(
844 TYPE_ATOMIC_UINT, useSVM);
845 EXECUTE_TEST(error,
846 test_uint.Execute(deviceID, context, queue, num_elements));
847 CBasicTestCompareStrong<HOST_ATOMIC_LONG, HOST_LONG> test_long(
848 TYPE_ATOMIC_LONG, useSVM);
849 EXECUTE_TEST(error,
850 test_long.Execute(deviceID, context, queue, num_elements));
851 CBasicTestCompareStrong<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
852 TYPE_ATOMIC_ULONG, useSVM);
853 EXECUTE_TEST(error,
854 test_ulong.Execute(deviceID, context, queue, num_elements));
855 if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
856 {
857 CBasicTestCompareStrong<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
858 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
859 EXECUTE_TEST(
860 error,
861 test_intptr_t.Execute(deviceID, context, queue, num_elements));
862 CBasicTestCompareStrong<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
863 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
864 EXECUTE_TEST(
865 error,
866 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
867 CBasicTestCompareStrong<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32>
868 test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
869 EXECUTE_TEST(
870 error, test_size_t.Execute(deviceID, context, queue, num_elements));
871 CBasicTestCompareStrong<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
872 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
873 EXECUTE_TEST(
874 error,
875 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
876 }
877 else
878 {
879 CBasicTestCompareStrong<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
880 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
881 EXECUTE_TEST(
882 error,
883 test_intptr_t.Execute(deviceID, context, queue, num_elements));
884 CBasicTestCompareStrong<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
885 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
886 EXECUTE_TEST(
887 error,
888 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
889 CBasicTestCompareStrong<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64>
890 test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
891 EXECUTE_TEST(
892 error, test_size_t.Execute(deviceID, context, queue, num_elements));
893 CBasicTestCompareStrong<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
894 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
895 EXECUTE_TEST(
896 error,
897 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
898 }
899 return error;
900 }
901
test_atomic_compare_exchange_strong(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)902 int test_atomic_compare_exchange_strong(cl_device_id deviceID,
903 cl_context context,
904 cl_command_queue queue,
905 int num_elements)
906 {
907 return test_atomic_compare_exchange_strong_generic(deviceID, context, queue,
908 num_elements, false);
909 }
910
test_svm_atomic_compare_exchange_strong(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)911 int test_svm_atomic_compare_exchange_strong(cl_device_id deviceID,
912 cl_context context,
913 cl_command_queue queue,
914 int num_elements)
915 {
916 return test_atomic_compare_exchange_strong_generic(deviceID, context, queue,
917 num_elements, true);
918 }
919
920 template <typename HostAtomicType, typename HostDataType>
921 class CBasicTestCompareWeak
922 : public CBasicTestCompareStrong<HostAtomicType, HostDataType> {
923 public:
924 using CBasicTestCompareStrong<HostAtomicType, HostDataType>::StartValue;
925 using CBasicTestCompareStrong<HostAtomicType,
926 HostDataType>::MemoryOrderScope;
927 using CBasicTestCompareStrong<HostAtomicType, HostDataType>::DataType;
928 using CBasicTestCompareStrong<HostAtomicType, HostDataType>::Iterations;
929 using CBasicTestCompareStrong<HostAtomicType, HostDataType>::IterationsStr;
CBasicTestCompareWeak(TExplicitAtomicType dataType,bool useSVM)930 CBasicTestCompareWeak(TExplicitAtomicType dataType, bool useSVM)
931 : CBasicTestCompareStrong<HostAtomicType, HostDataType>(dataType,
932 useSVM)
933 {}
ProgramCore()934 virtual std::string ProgramCore()
935 {
936 std::string memoryOrderScope = MemoryOrderScope();
937 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
938 return std::string(" ") + DataType().RegularTypeName()
939 + " expected , previous;\n"
940 " int successCount = 0;\n"
941 " oldValues[tid] = tid;\n"
942 " expected = tid; // force failure at the beginning\n"
943 " if(atomic_compare_exchange_weak"
944 + postfix + "(&destMemory[0], &expected, oldValues[tid]"
945 + memoryOrderScope
946 + ") || expected == tid)\n"
947 " oldValues[tid] = threadCount+1; //mark unexpected success "
948 "with invalid value\n"
949 " else\n"
950 " {\n"
951 " for(int i = 0; i < "
952 + IterationsStr()
953 + " || successCount == 0; i++)\n"
954 " {\n"
955 " previous = expected;\n"
956 " if(atomic_compare_exchange_weak"
957 + postfix + "(&destMemory[0], &expected, oldValues[tid]"
958 + memoryOrderScope
959 + "))\n"
960 " {\n"
961 " oldValues[tid] = expected;\n"
962 " successCount++;\n"
963 " }\n"
964 " }\n"
965 " }\n";
966 }
967 };
968
test_atomic_compare_exchange_weak_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)969 int test_atomic_compare_exchange_weak_generic(cl_device_id deviceID,
970 cl_context context,
971 cl_command_queue queue,
972 int num_elements, bool useSVM)
973 {
974 int error = 0;
975 CBasicTestCompareWeak<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
976 useSVM);
977 EXECUTE_TEST(error,
978 test_int.Execute(deviceID, context, queue, num_elements));
979 CBasicTestCompareWeak<HOST_ATOMIC_UINT, HOST_UINT> test_uint(
980 TYPE_ATOMIC_UINT, useSVM);
981 EXECUTE_TEST(error,
982 test_uint.Execute(deviceID, context, queue, num_elements));
983 CBasicTestCompareWeak<HOST_ATOMIC_LONG, HOST_LONG> test_long(
984 TYPE_ATOMIC_LONG, useSVM);
985 EXECUTE_TEST(error,
986 test_long.Execute(deviceID, context, queue, num_elements));
987 CBasicTestCompareWeak<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
988 TYPE_ATOMIC_ULONG, useSVM);
989 EXECUTE_TEST(error,
990 test_ulong.Execute(deviceID, context, queue, num_elements));
991 if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
992 {
993 CBasicTestCompareWeak<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
994 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
995 EXECUTE_TEST(
996 error,
997 test_intptr_t.Execute(deviceID, context, queue, num_elements));
998 CBasicTestCompareWeak<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
999 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1000 EXECUTE_TEST(
1001 error,
1002 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1003 CBasicTestCompareWeak<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
1004 TYPE_ATOMIC_SIZE_T, useSVM);
1005 EXECUTE_TEST(
1006 error, test_size_t.Execute(deviceID, context, queue, num_elements));
1007 CBasicTestCompareWeak<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
1008 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1009 EXECUTE_TEST(
1010 error,
1011 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1012 }
1013 else
1014 {
1015 CBasicTestCompareWeak<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
1016 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1017 EXECUTE_TEST(
1018 error,
1019 test_intptr_t.Execute(deviceID, context, queue, num_elements));
1020 CBasicTestCompareWeak<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
1021 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1022 EXECUTE_TEST(
1023 error,
1024 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1025 CBasicTestCompareWeak<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
1026 TYPE_ATOMIC_SIZE_T, useSVM);
1027 EXECUTE_TEST(
1028 error, test_size_t.Execute(deviceID, context, queue, num_elements));
1029 CBasicTestCompareWeak<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
1030 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1031 EXECUTE_TEST(
1032 error,
1033 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1034 }
1035 return error;
1036 }
1037
test_atomic_compare_exchange_weak(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1038 int test_atomic_compare_exchange_weak(cl_device_id deviceID, cl_context context,
1039 cl_command_queue queue, int num_elements)
1040 {
1041 return test_atomic_compare_exchange_weak_generic(deviceID, context, queue,
1042 num_elements, false);
1043 }
1044
test_svm_atomic_compare_exchange_weak(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1045 int test_svm_atomic_compare_exchange_weak(cl_device_id deviceID,
1046 cl_context context,
1047 cl_command_queue queue,
1048 int num_elements)
1049 {
1050 return test_atomic_compare_exchange_weak_generic(deviceID, context, queue,
1051 num_elements, true);
1052 }
1053
1054 template <typename HostAtomicType, typename HostDataType>
1055 class CBasicTestFetchAdd
1056 : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
1057 public:
1058 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1059 using CBasicTestMemOrderScope<HostAtomicType,
1060 HostDataType>::MemoryOrderScopeStr;
1061 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1062 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
CBasicTestFetchAdd(TExplicitAtomicType dataType,bool useSVM)1063 CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM)
1064 : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
1065 useSVM)
1066 {}
ProgramCore()1067 virtual std::string ProgramCore()
1068 {
1069 std::string memoryOrderScope = MemoryOrderScopeStr();
1070 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1071 return " oldValues[tid] = atomic_fetch_add" + postfix
1072 + "(&destMemory[0], (" + DataType().AddSubOperandTypeName()
1073 + ")tid + 3" + memoryOrderScope + ");\n" + " atomic_fetch_add"
1074 + postfix + "(&destMemory[0], ("
1075 + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope
1076 + ");\n"
1077 " atomic_fetch_add"
1078 + postfix + "(&destMemory[0], ("
1079 + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope
1080 + ");\n"
1081 " atomic_fetch_add"
1082 + postfix + "(&destMemory[0], (("
1083 + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof("
1084 + DataType().AddSubOperandTypeName() + ")-1)*8" + memoryOrderScope
1085 + ");\n";
1086 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1087 virtual void HostFunction(cl_uint tid, cl_uint threadCount,
1088 volatile HostAtomicType *destMemory,
1089 HostDataType *oldValues)
1090 {
1091 oldValues[tid] = host_atomic_fetch_add(
1092 &destMemory[0], (HostDataType)tid + 3, MemoryOrder());
1093 host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3,
1094 MemoryOrder());
1095 host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3,
1096 MemoryOrder());
1097 host_atomic_fetch_add(&destMemory[0],
1098 ((HostDataType)tid + 3)
1099 << (sizeof(HostDataType) - 1) * 8,
1100 MemoryOrder());
1101 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1102 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
1103 HostDataType *startRefValues,
1104 cl_uint whichDestValue)
1105 {
1106 expected = StartValue();
1107 for (cl_uint i = 0; i < threadCount; i++)
1108 expected += ((HostDataType)i + 3) * 3
1109 + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8);
1110 return true;
1111 }
1112 };
1113
test_atomic_fetch_add_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1114 int test_atomic_fetch_add_generic(cl_device_id deviceID, cl_context context,
1115 cl_command_queue queue, int num_elements,
1116 bool useSVM)
1117 {
1118 int error = 0;
1119 CBasicTestFetchAdd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
1120 useSVM);
1121 EXECUTE_TEST(error,
1122 test_int.Execute(deviceID, context, queue, num_elements));
1123 CBasicTestFetchAdd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
1124 useSVM);
1125 EXECUTE_TEST(error,
1126 test_uint.Execute(deviceID, context, queue, num_elements));
1127 CBasicTestFetchAdd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
1128 useSVM);
1129 EXECUTE_TEST(error,
1130 test_long.Execute(deviceID, context, queue, num_elements));
1131 CBasicTestFetchAdd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
1132 TYPE_ATOMIC_ULONG, useSVM);
1133 EXECUTE_TEST(error,
1134 test_ulong.Execute(deviceID, context, queue, num_elements));
1135 if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
1136 {
1137 CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
1138 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1139 EXECUTE_TEST(
1140 error,
1141 test_intptr_t.Execute(deviceID, context, queue, num_elements));
1142 CBasicTestFetchAdd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
1143 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1144 EXECUTE_TEST(
1145 error,
1146 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1147 CBasicTestFetchAdd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
1148 TYPE_ATOMIC_SIZE_T, useSVM);
1149 EXECUTE_TEST(
1150 error, test_size_t.Execute(deviceID, context, queue, num_elements));
1151 CBasicTestFetchAdd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
1152 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1153 EXECUTE_TEST(
1154 error,
1155 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1156 }
1157 else
1158 {
1159 CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
1160 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1161 EXECUTE_TEST(
1162 error,
1163 test_intptr_t.Execute(deviceID, context, queue, num_elements));
1164 CBasicTestFetchAdd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
1165 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1166 EXECUTE_TEST(
1167 error,
1168 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1169 CBasicTestFetchAdd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
1170 TYPE_ATOMIC_SIZE_T, useSVM);
1171 EXECUTE_TEST(
1172 error, test_size_t.Execute(deviceID, context, queue, num_elements));
1173 CBasicTestFetchAdd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
1174 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1175 EXECUTE_TEST(
1176 error,
1177 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1178 }
1179 return error;
1180 }
1181
test_atomic_fetch_add(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1182 int test_atomic_fetch_add(cl_device_id deviceID, cl_context context,
1183 cl_command_queue queue, int num_elements)
1184 {
1185 return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements,
1186 false);
1187 }
1188
test_svm_atomic_fetch_add(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1189 int test_svm_atomic_fetch_add(cl_device_id deviceID, cl_context context,
1190 cl_command_queue queue, int num_elements)
1191 {
1192 return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements,
1193 true);
1194 }
1195
1196 template <typename HostAtomicType, typename HostDataType>
1197 class CBasicTestFetchSub
1198 : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
1199 public:
1200 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1201 using CBasicTestMemOrderScope<HostAtomicType,
1202 HostDataType>::MemoryOrderScopeStr;
1203 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1204 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
CBasicTestFetchSub(TExplicitAtomicType dataType,bool useSVM)1205 CBasicTestFetchSub(TExplicitAtomicType dataType, bool useSVM)
1206 : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
1207 useSVM)
1208 {}
ProgramCore()1209 virtual std::string ProgramCore()
1210 {
1211 std::string memoryOrderScope = MemoryOrderScopeStr();
1212 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1213 return " oldValues[tid] = atomic_fetch_sub" + postfix
1214 + "(&destMemory[0], tid + 3 +((("
1215 + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof("
1216 + DataType().AddSubOperandTypeName() + ")-1)*8)" + memoryOrderScope
1217 + ");\n";
1218 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1219 virtual void HostFunction(cl_uint tid, cl_uint threadCount,
1220 volatile HostAtomicType *destMemory,
1221 HostDataType *oldValues)
1222 {
1223 oldValues[tid] = host_atomic_fetch_sub(
1224 &destMemory[0],
1225 (HostDataType)tid + 3
1226 + (((HostDataType)tid + 3) << (sizeof(HostDataType) - 1) * 8),
1227 MemoryOrder());
1228 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1229 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
1230 HostDataType *startRefValues,
1231 cl_uint whichDestValue)
1232 {
1233 expected = StartValue();
1234 for (cl_uint i = 0; i < threadCount; i++)
1235 expected -= (HostDataType)i + 3
1236 + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8);
1237 return true;
1238 }
1239 };
1240
test_atomic_fetch_sub_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1241 int test_atomic_fetch_sub_generic(cl_device_id deviceID, cl_context context,
1242 cl_command_queue queue, int num_elements,
1243 bool useSVM)
1244 {
1245 int error = 0;
1246 CBasicTestFetchSub<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
1247 useSVM);
1248 EXECUTE_TEST(error,
1249 test_int.Execute(deviceID, context, queue, num_elements));
1250 CBasicTestFetchSub<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
1251 useSVM);
1252 EXECUTE_TEST(error,
1253 test_uint.Execute(deviceID, context, queue, num_elements));
1254 CBasicTestFetchSub<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
1255 useSVM);
1256 EXECUTE_TEST(error,
1257 test_long.Execute(deviceID, context, queue, num_elements));
1258 CBasicTestFetchSub<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
1259 TYPE_ATOMIC_ULONG, useSVM);
1260 EXECUTE_TEST(error,
1261 test_ulong.Execute(deviceID, context, queue, num_elements));
1262 if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
1263 {
1264 CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
1265 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1266 EXECUTE_TEST(
1267 error,
1268 test_intptr_t.Execute(deviceID, context, queue, num_elements));
1269 CBasicTestFetchSub<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
1270 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1271 EXECUTE_TEST(
1272 error,
1273 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1274 CBasicTestFetchSub<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
1275 TYPE_ATOMIC_SIZE_T, useSVM);
1276 EXECUTE_TEST(
1277 error, test_size_t.Execute(deviceID, context, queue, num_elements));
1278 CBasicTestFetchSub<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
1279 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1280 EXECUTE_TEST(
1281 error,
1282 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1283 }
1284 else
1285 {
1286 CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
1287 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1288 EXECUTE_TEST(
1289 error,
1290 test_intptr_t.Execute(deviceID, context, queue, num_elements));
1291 CBasicTestFetchSub<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
1292 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1293 EXECUTE_TEST(
1294 error,
1295 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1296 CBasicTestFetchSub<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
1297 TYPE_ATOMIC_SIZE_T, useSVM);
1298 EXECUTE_TEST(
1299 error, test_size_t.Execute(deviceID, context, queue, num_elements));
1300 CBasicTestFetchSub<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
1301 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1302 EXECUTE_TEST(
1303 error,
1304 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1305 }
1306 return error;
1307 }
1308
test_atomic_fetch_sub(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1309 int test_atomic_fetch_sub(cl_device_id deviceID, cl_context context,
1310 cl_command_queue queue, int num_elements)
1311 {
1312 return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements,
1313 false);
1314 }
1315
test_svm_atomic_fetch_sub(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1316 int test_svm_atomic_fetch_sub(cl_device_id deviceID, cl_context context,
1317 cl_command_queue queue, int num_elements)
1318 {
1319 return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements,
1320 true);
1321 }
1322
1323 template <typename HostAtomicType, typename HostDataType>
1324 class CBasicTestFetchOr
1325 : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
1326 public:
1327 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1328 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
1329 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1330 using CBasicTestMemOrderScope<HostAtomicType,
1331 HostDataType>::MemoryOrderScopeStr;
CBasicTestFetchOr(TExplicitAtomicType dataType,bool useSVM)1332 CBasicTestFetchOr(TExplicitAtomicType dataType, bool useSVM)
1333 : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
1334 useSVM)
1335 {
1336 StartValue(0);
1337 }
NumResults(cl_uint threadCount,cl_device_id deviceID)1338 virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
1339 {
1340 cl_uint numBits = DataType().Size(deviceID) * 8;
1341
1342 return (threadCount + numBits - 1) / numBits;
1343 }
ProgramCore()1344 virtual std::string ProgramCore()
1345 {
1346 std::string memoryOrderScope = MemoryOrderScopeStr();
1347 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1348 return std::string(" size_t numBits = sizeof(")
1349 + DataType().RegularTypeName()
1350 + ") * 8;\n"
1351 " int whichResult = tid / numBits;\n"
1352 " int bitIndex = tid - (whichResult * numBits);\n"
1353 "\n"
1354 " oldValues[tid] = atomic_fetch_or"
1355 + postfix + "(&destMemory[whichResult], (("
1356 + DataType().RegularTypeName() + ")1 << bitIndex) "
1357 + memoryOrderScope + ");\n";
1358 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1359 virtual void HostFunction(cl_uint tid, cl_uint threadCount,
1360 volatile HostAtomicType *destMemory,
1361 HostDataType *oldValues)
1362 {
1363 size_t numBits = sizeof(HostDataType) * 8;
1364 size_t whichResult = tid / numBits;
1365 size_t bitIndex = tid - (whichResult * numBits);
1366
1367 oldValues[tid] =
1368 host_atomic_fetch_or(&destMemory[whichResult],
1369 ((HostDataType)1 << bitIndex), MemoryOrder());
1370 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1371 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
1372 HostDataType *startRefValues,
1373 cl_uint whichDestValue)
1374 {
1375 cl_uint numValues = (threadCount + (sizeof(HostDataType) * 8 - 1))
1376 / (sizeof(HostDataType) * 8);
1377 if (whichDestValue < numValues - 1)
1378 {
1379 expected = ~(HostDataType)0;
1380 return true;
1381 }
1382 // Last item doesn't get or'ed on every bit, so we have to mask away
1383 cl_uint numBits =
1384 threadCount - whichDestValue * (sizeof(HostDataType) * 8);
1385 expected = StartValue();
1386 for (cl_uint i = 0; i < numBits; i++)
1387 expected |= ((HostDataType)1 << i);
1388 return true;
1389 }
1390 };
1391
test_atomic_fetch_or_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1392 int test_atomic_fetch_or_generic(cl_device_id deviceID, cl_context context,
1393 cl_command_queue queue, int num_elements,
1394 bool useSVM)
1395 {
1396 int error = 0;
1397 CBasicTestFetchOr<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
1398 useSVM);
1399 EXECUTE_TEST(error,
1400 test_int.Execute(deviceID, context, queue, num_elements));
1401 CBasicTestFetchOr<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
1402 useSVM);
1403 EXECUTE_TEST(error,
1404 test_uint.Execute(deviceID, context, queue, num_elements));
1405 CBasicTestFetchOr<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
1406 useSVM);
1407 EXECUTE_TEST(error,
1408 test_long.Execute(deviceID, context, queue, num_elements));
1409 CBasicTestFetchOr<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
1410 TYPE_ATOMIC_ULONG, useSVM);
1411 EXECUTE_TEST(error,
1412 test_ulong.Execute(deviceID, context, queue, num_elements));
1413 if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
1414 {
1415 CBasicTestFetchOr<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
1416 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1417 EXECUTE_TEST(
1418 error,
1419 test_intptr_t.Execute(deviceID, context, queue, num_elements));
1420 CBasicTestFetchOr<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
1421 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1422 EXECUTE_TEST(
1423 error,
1424 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1425 CBasicTestFetchOr<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
1426 TYPE_ATOMIC_SIZE_T, useSVM);
1427 EXECUTE_TEST(
1428 error, test_size_t.Execute(deviceID, context, queue, num_elements));
1429 CBasicTestFetchOr<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
1430 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1431 EXECUTE_TEST(
1432 error,
1433 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1434 }
1435 else
1436 {
1437 CBasicTestFetchOr<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
1438 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1439 EXECUTE_TEST(
1440 error,
1441 test_intptr_t.Execute(deviceID, context, queue, num_elements));
1442 CBasicTestFetchOr<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
1443 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1444 EXECUTE_TEST(
1445 error,
1446 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1447 CBasicTestFetchOr<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
1448 TYPE_ATOMIC_SIZE_T, useSVM);
1449 EXECUTE_TEST(
1450 error, test_size_t.Execute(deviceID, context, queue, num_elements));
1451 CBasicTestFetchOr<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
1452 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1453 EXECUTE_TEST(
1454 error,
1455 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1456 }
1457 return error;
1458 }
1459
test_atomic_fetch_or(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1460 int test_atomic_fetch_or(cl_device_id deviceID, cl_context context,
1461 cl_command_queue queue, int num_elements)
1462 {
1463 return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements,
1464 false);
1465 }
1466
test_svm_atomic_fetch_or(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1467 int test_svm_atomic_fetch_or(cl_device_id deviceID, cl_context context,
1468 cl_command_queue queue, int num_elements)
1469 {
1470 return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements,
1471 true);
1472 }
1473
1474 template <typename HostAtomicType, typename HostDataType>
1475 class CBasicTestFetchXor
1476 : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
1477 public:
1478 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1479 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1480 using CBasicTestMemOrderScope<HostAtomicType,
1481 HostDataType>::MemoryOrderScopeStr;
1482 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
CBasicTestFetchXor(TExplicitAtomicType dataType,bool useSVM)1483 CBasicTestFetchXor(TExplicitAtomicType dataType, bool useSVM)
1484 : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
1485 useSVM)
1486 {
1487 StartValue((HostDataType)0x2f08ab418ba0541LL);
1488 }
ProgramCore()1489 virtual std::string ProgramCore()
1490 {
1491 std::string memoryOrderScope = MemoryOrderScopeStr();
1492 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1493 return std::string(" int numBits = sizeof(")
1494 + DataType().RegularTypeName()
1495 + ") * 8;\n"
1496 " int bitIndex = (numBits-1)*(tid+1)/threadCount;\n"
1497 "\n"
1498 " oldValues[tid] = atomic_fetch_xor"
1499 + postfix + "(&destMemory[0], ((" + DataType().RegularTypeName()
1500 + ")1 << bitIndex) " + memoryOrderScope + ");\n";
1501 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1502 virtual void HostFunction(cl_uint tid, cl_uint threadCount,
1503 volatile HostAtomicType *destMemory,
1504 HostDataType *oldValues)
1505 {
1506 int numBits = sizeof(HostDataType) * 8;
1507 int bitIndex = (numBits - 1) * (tid + 1) / threadCount;
1508
1509 oldValues[tid] = host_atomic_fetch_xor(
1510 &destMemory[0], ((HostDataType)1 << bitIndex), MemoryOrder());
1511 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1512 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
1513 HostDataType *startRefValues,
1514 cl_uint whichDestValue)
1515 {
1516 int numBits = sizeof(HostDataType) * 8;
1517 expected = StartValue();
1518 for (cl_uint i = 0; i < threadCount; i++)
1519 {
1520 int bitIndex = (numBits - 1) * (i + 1) / threadCount;
1521 expected ^= ((HostDataType)1 << bitIndex);
1522 }
1523 return true;
1524 }
1525 };
1526
test_atomic_fetch_xor_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1527 int test_atomic_fetch_xor_generic(cl_device_id deviceID, cl_context context,
1528 cl_command_queue queue, int num_elements,
1529 bool useSVM)
1530 {
1531 int error = 0;
1532 CBasicTestFetchXor<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
1533 useSVM);
1534 EXECUTE_TEST(error,
1535 test_int.Execute(deviceID, context, queue, num_elements));
1536 CBasicTestFetchXor<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
1537 useSVM);
1538 EXECUTE_TEST(error,
1539 test_uint.Execute(deviceID, context, queue, num_elements));
1540 CBasicTestFetchXor<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
1541 useSVM);
1542 EXECUTE_TEST(error,
1543 test_long.Execute(deviceID, context, queue, num_elements));
1544 CBasicTestFetchXor<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
1545 TYPE_ATOMIC_ULONG, useSVM);
1546 EXECUTE_TEST(error,
1547 test_ulong.Execute(deviceID, context, queue, num_elements));
1548 if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
1549 {
1550 CBasicTestFetchXor<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
1551 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1552 EXECUTE_TEST(
1553 error,
1554 test_intptr_t.Execute(deviceID, context, queue, num_elements));
1555 CBasicTestFetchXor<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
1556 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1557 EXECUTE_TEST(
1558 error,
1559 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1560 CBasicTestFetchXor<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
1561 TYPE_ATOMIC_SIZE_T, useSVM);
1562 EXECUTE_TEST(
1563 error, test_size_t.Execute(deviceID, context, queue, num_elements));
1564 CBasicTestFetchXor<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
1565 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1566 EXECUTE_TEST(
1567 error,
1568 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1569 }
1570 else
1571 {
1572 CBasicTestFetchXor<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
1573 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1574 EXECUTE_TEST(
1575 error,
1576 test_intptr_t.Execute(deviceID, context, queue, num_elements));
1577 CBasicTestFetchXor<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
1578 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1579 EXECUTE_TEST(
1580 error,
1581 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1582 CBasicTestFetchXor<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
1583 TYPE_ATOMIC_SIZE_T, useSVM);
1584 EXECUTE_TEST(
1585 error, test_size_t.Execute(deviceID, context, queue, num_elements));
1586 CBasicTestFetchXor<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
1587 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1588 EXECUTE_TEST(
1589 error,
1590 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1591 }
1592 return error;
1593 }
1594
test_atomic_fetch_xor(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1595 int test_atomic_fetch_xor(cl_device_id deviceID, cl_context context,
1596 cl_command_queue queue, int num_elements)
1597 {
1598 return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements,
1599 false);
1600 }
1601
test_svm_atomic_fetch_xor(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1602 int test_svm_atomic_fetch_xor(cl_device_id deviceID, cl_context context,
1603 cl_command_queue queue, int num_elements)
1604 {
1605 return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements,
1606 true);
1607 }
1608
1609 template <typename HostAtomicType, typename HostDataType>
1610 class CBasicTestFetchAnd
1611 : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
1612 public:
1613 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1614 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
1615 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1616 using CBasicTestMemOrderScope<HostAtomicType,
1617 HostDataType>::MemoryOrderScopeStr;
CBasicTestFetchAnd(TExplicitAtomicType dataType,bool useSVM)1618 CBasicTestFetchAnd(TExplicitAtomicType dataType, bool useSVM)
1619 : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
1620 useSVM)
1621 {
1622 StartValue(~(HostDataType)0);
1623 }
NumResults(cl_uint threadCount,cl_device_id deviceID)1624 virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
1625 {
1626 cl_uint numBits = DataType().Size(deviceID) * 8;
1627
1628 return (threadCount + numBits - 1) / numBits;
1629 }
ProgramCore()1630 virtual std::string ProgramCore()
1631 {
1632 std::string memoryOrderScope = MemoryOrderScopeStr();
1633 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1634 return std::string(" size_t numBits = sizeof(")
1635 + DataType().RegularTypeName()
1636 + ") * 8;\n"
1637 " int whichResult = tid / numBits;\n"
1638 " int bitIndex = tid - (whichResult * numBits);\n"
1639 "\n"
1640 " oldValues[tid] = atomic_fetch_and"
1641 + postfix + "(&destMemory[whichResult], ~(("
1642 + DataType().RegularTypeName() + ")1 << bitIndex) "
1643 + memoryOrderScope + ");\n";
1644 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1645 virtual void HostFunction(cl_uint tid, cl_uint threadCount,
1646 volatile HostAtomicType *destMemory,
1647 HostDataType *oldValues)
1648 {
1649 size_t numBits = sizeof(HostDataType) * 8;
1650 size_t whichResult = tid / numBits;
1651 size_t bitIndex = tid - (whichResult * numBits);
1652
1653 oldValues[tid] = host_atomic_fetch_and(&destMemory[whichResult],
1654 ~((HostDataType)1 << bitIndex),
1655 MemoryOrder());
1656 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1657 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
1658 HostDataType *startRefValues,
1659 cl_uint whichDestValue)
1660 {
1661 cl_uint numValues = (threadCount + (sizeof(HostDataType) * 8 - 1))
1662 / (sizeof(HostDataType) * 8);
1663 if (whichDestValue < numValues - 1)
1664 {
1665 expected = 0;
1666 return true;
1667 }
1668 // Last item doesn't get and'ed on every bit, so we have to mask away
1669 size_t numBits =
1670 threadCount - whichDestValue * (sizeof(HostDataType) * 8);
1671 expected = StartValue();
1672 for (size_t i = 0; i < numBits; i++)
1673 expected &= ~((HostDataType)1 << i);
1674 return true;
1675 }
1676 };
1677
test_atomic_fetch_and_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1678 int test_atomic_fetch_and_generic(cl_device_id deviceID, cl_context context,
1679 cl_command_queue queue, int num_elements,
1680 bool useSVM)
1681 {
1682 int error = 0;
1683 CBasicTestFetchAnd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
1684 useSVM);
1685 EXECUTE_TEST(error,
1686 test_int.Execute(deviceID, context, queue, num_elements));
1687 CBasicTestFetchAnd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
1688 useSVM);
1689 EXECUTE_TEST(error,
1690 test_uint.Execute(deviceID, context, queue, num_elements));
1691 CBasicTestFetchAnd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
1692 useSVM);
1693 EXECUTE_TEST(error,
1694 test_long.Execute(deviceID, context, queue, num_elements));
1695 CBasicTestFetchAnd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
1696 TYPE_ATOMIC_ULONG, useSVM);
1697 EXECUTE_TEST(error,
1698 test_ulong.Execute(deviceID, context, queue, num_elements));
1699 if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
1700 {
1701 CBasicTestFetchAnd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
1702 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1703 EXECUTE_TEST(
1704 error,
1705 test_intptr_t.Execute(deviceID, context, queue, num_elements));
1706 CBasicTestFetchAnd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
1707 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1708 EXECUTE_TEST(
1709 error,
1710 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1711 CBasicTestFetchAnd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
1712 TYPE_ATOMIC_SIZE_T, useSVM);
1713 EXECUTE_TEST(
1714 error, test_size_t.Execute(deviceID, context, queue, num_elements));
1715 CBasicTestFetchAnd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
1716 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1717 EXECUTE_TEST(
1718 error,
1719 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1720 }
1721 else
1722 {
1723 CBasicTestFetchAnd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
1724 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1725 EXECUTE_TEST(
1726 error,
1727 test_intptr_t.Execute(deviceID, context, queue, num_elements));
1728 CBasicTestFetchAnd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
1729 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1730 EXECUTE_TEST(
1731 error,
1732 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1733 CBasicTestFetchAnd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
1734 TYPE_ATOMIC_SIZE_T, useSVM);
1735 EXECUTE_TEST(
1736 error, test_size_t.Execute(deviceID, context, queue, num_elements));
1737 CBasicTestFetchAnd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
1738 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1739 EXECUTE_TEST(
1740 error,
1741 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1742 }
1743 return error;
1744 }
1745
test_atomic_fetch_and(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1746 int test_atomic_fetch_and(cl_device_id deviceID, cl_context context,
1747 cl_command_queue queue, int num_elements)
1748 {
1749 return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements,
1750 false);
1751 }
1752
test_svm_atomic_fetch_and(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1753 int test_svm_atomic_fetch_and(cl_device_id deviceID, cl_context context,
1754 cl_command_queue queue, int num_elements)
1755 {
1756 return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements,
1757 true);
1758 }
1759
1760 template <typename HostAtomicType, typename HostDataType>
1761 class CBasicTestFetchOrAnd
1762 : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
1763 public:
1764 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1765 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
1766 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1767 using CBasicTestMemOrderScope<HostAtomicType,
1768 HostDataType>::MemoryOrderScopeStr;
1769 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
1770 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
CBasicTestFetchOrAnd(TExplicitAtomicType dataType,bool useSVM)1771 CBasicTestFetchOrAnd(TExplicitAtomicType dataType, bool useSVM)
1772 : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
1773 useSVM)
1774 {
1775 StartValue(0);
1776 }
NumResults(cl_uint threadCount,cl_device_id deviceID)1777 virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
1778 {
1779 return 1 + (threadCount - 1) / (DataType().Size(deviceID) * 8);
1780 }
1781 // each thread modifies (with OR and AND operations) and verifies
1782 // only one bit in atomic variable
1783 // other bits are modified by other threads but it must not affect current
1784 // thread operation
ProgramCore()1785 virtual std::string ProgramCore()
1786 {
1787 std::string memoryOrderScope = MemoryOrderScopeStr();
1788 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1789 return std::string(" int bits = sizeof(")
1790 + DataType().RegularTypeName() + ")*8;\n"
1791 + " size_t valueInd = tid/bits;\n"
1792 " "
1793 + DataType().RegularTypeName() + " value, bitMask = ("
1794 + DataType().RegularTypeName()
1795 + ")1 << tid%bits;\n"
1796 " oldValues[tid] = 0;\n"
1797 " for(int i = 0; i < "
1798 + IterationsStr()
1799 + "; i++)\n"
1800 " {\n"
1801 " value = atomic_fetch_or"
1802 + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope
1803 + ");\n"
1804 " if(value & bitMask) // bit should be set to 0\n"
1805 " oldValues[tid]++;\n"
1806 " value = atomic_fetch_and"
1807 + postfix + "(destMemory+valueInd, ~bitMask" + memoryOrderScope
1808 + ");\n"
1809 " if(!(value & bitMask)) // bit should be set to 1\n"
1810 " oldValues[tid]++;\n"
1811 " }\n";
1812 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1813 virtual void HostFunction(cl_uint tid, cl_uint threadCount,
1814 volatile HostAtomicType *destMemory,
1815 HostDataType *oldValues)
1816 {
1817 int bits = sizeof(HostDataType) * 8;
1818 size_t valueInd = tid / bits;
1819 HostDataType value, bitMask = (HostDataType)1 << tid % bits;
1820 oldValues[tid] = 0;
1821 for (int i = 0; i < Iterations(); i++)
1822 {
1823 value = host_atomic_fetch_or(destMemory + valueInd, bitMask,
1824 MemoryOrder());
1825 if (value & bitMask) // bit should be set to 0
1826 oldValues[tid]++;
1827 value = host_atomic_fetch_and(destMemory + valueInd, ~bitMask,
1828 MemoryOrder());
1829 if (!(value & bitMask)) // bit should be set to 1
1830 oldValues[tid]++;
1831 }
1832 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)1833 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
1834 HostDataType *startRefValues,
1835 cl_uint whichDestValue)
1836 {
1837 expected = 0;
1838 return true;
1839 }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)1840 virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
1841 HostDataType *refValues,
1842 HostAtomicType *finalValues)
1843 {
1844 correct = true;
1845 for (cl_uint i = 0; i < threadCount; i++)
1846 {
1847 if (refValues[i] > 0)
1848 {
1849 log_error("Thread %d found %d mismatch(es)\n", i,
1850 (cl_uint)refValues[i]);
1851 correct = false;
1852 }
1853 }
1854 return true;
1855 }
1856 };
1857
test_atomic_fetch_orand_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)1858 int test_atomic_fetch_orand_generic(cl_device_id deviceID, cl_context context,
1859 cl_command_queue queue, int num_elements,
1860 bool useSVM)
1861 {
1862 int error = 0;
1863 CBasicTestFetchOrAnd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
1864 useSVM);
1865 EXECUTE_TEST(error,
1866 test_int.Execute(deviceID, context, queue, num_elements));
1867 CBasicTestFetchOrAnd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(
1868 TYPE_ATOMIC_UINT, useSVM);
1869 EXECUTE_TEST(error,
1870 test_uint.Execute(deviceID, context, queue, num_elements));
1871 CBasicTestFetchOrAnd<HOST_ATOMIC_LONG, HOST_LONG> test_long(
1872 TYPE_ATOMIC_LONG, useSVM);
1873 EXECUTE_TEST(error,
1874 test_long.Execute(deviceID, context, queue, num_elements));
1875 CBasicTestFetchOrAnd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
1876 TYPE_ATOMIC_ULONG, useSVM);
1877 EXECUTE_TEST(error,
1878 test_ulong.Execute(deviceID, context, queue, num_elements));
1879 if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
1880 {
1881 CBasicTestFetchOrAnd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
1882 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1883 EXECUTE_TEST(
1884 error,
1885 test_intptr_t.Execute(deviceID, context, queue, num_elements));
1886 CBasicTestFetchOrAnd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
1887 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1888 EXECUTE_TEST(
1889 error,
1890 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1891 CBasicTestFetchOrAnd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
1892 TYPE_ATOMIC_SIZE_T, useSVM);
1893 EXECUTE_TEST(
1894 error, test_size_t.Execute(deviceID, context, queue, num_elements));
1895 CBasicTestFetchOrAnd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
1896 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1897 EXECUTE_TEST(
1898 error,
1899 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1900 }
1901 else
1902 {
1903 CBasicTestFetchOrAnd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
1904 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
1905 EXECUTE_TEST(
1906 error,
1907 test_intptr_t.Execute(deviceID, context, queue, num_elements));
1908 CBasicTestFetchOrAnd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
1909 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
1910 EXECUTE_TEST(
1911 error,
1912 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
1913 CBasicTestFetchOrAnd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
1914 TYPE_ATOMIC_SIZE_T, useSVM);
1915 EXECUTE_TEST(
1916 error, test_size_t.Execute(deviceID, context, queue, num_elements));
1917 CBasicTestFetchOrAnd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
1918 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
1919 EXECUTE_TEST(
1920 error,
1921 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
1922 }
1923 return error;
1924 }
1925
test_atomic_fetch_orand(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1926 int test_atomic_fetch_orand(cl_device_id deviceID, cl_context context,
1927 cl_command_queue queue, int num_elements)
1928 {
1929 return test_atomic_fetch_orand_generic(deviceID, context, queue,
1930 num_elements, false);
1931 }
1932
test_svm_atomic_fetch_orand(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1933 int test_svm_atomic_fetch_orand(cl_device_id deviceID, cl_context context,
1934 cl_command_queue queue, int num_elements)
1935 {
1936 return test_atomic_fetch_orand_generic(deviceID, context, queue,
1937 num_elements, true);
1938 }
1939
1940 template <typename HostAtomicType, typename HostDataType>
1941 class CBasicTestFetchXor2
1942 : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
1943 public:
1944 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
1945 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
1946 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
1947 using CBasicTestMemOrderScope<HostAtomicType,
1948 HostDataType>::MemoryOrderScopeStr;
1949 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
1950 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
CBasicTestFetchXor2(TExplicitAtomicType dataType,bool useSVM)1951 CBasicTestFetchXor2(TExplicitAtomicType dataType, bool useSVM)
1952 : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
1953 useSVM)
1954 {
1955 StartValue(0);
1956 }
NumResults(cl_uint threadCount,cl_device_id deviceID)1957 virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
1958 {
1959 return 1 + (threadCount - 1) / (DataType().Size(deviceID) * 8);
1960 }
1961 // each thread modifies (with XOR operation) and verifies
1962 // only one bit in atomic variable
1963 // other bits are modified by other threads but it must not affect current
1964 // thread operation
ProgramCore()1965 virtual std::string ProgramCore()
1966 {
1967 std::string memoryOrderScope = MemoryOrderScopeStr();
1968 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
1969 return std::string(" int bits = sizeof(")
1970 + DataType().RegularTypeName() + ")*8;\n"
1971 + " size_t valueInd = tid/bits;\n"
1972 " "
1973 + DataType().RegularTypeName() + " value, bitMask = ("
1974 + DataType().RegularTypeName()
1975 + ")1 << tid%bits;\n"
1976 " oldValues[tid] = 0;\n"
1977 " for(int i = 0; i < "
1978 + IterationsStr()
1979 + "; i++)\n"
1980 " {\n"
1981 " value = atomic_fetch_xor"
1982 + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope
1983 + ");\n"
1984 " if(value & bitMask) // bit should be set to 0\n"
1985 " oldValues[tid]++;\n"
1986 " value = atomic_fetch_xor"
1987 + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope
1988 + ");\n"
1989 " if(!(value & bitMask)) // bit should be set to 1\n"
1990 " oldValues[tid]++;\n"
1991 " }\n";
1992 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)1993 virtual void HostFunction(cl_uint tid, cl_uint threadCount,
1994 volatile HostAtomicType *destMemory,
1995 HostDataType *oldValues)
1996 {
1997 int bits = sizeof(HostDataType) * 8;
1998 size_t valueInd = tid / bits;
1999 HostDataType value, bitMask = (HostDataType)1 << tid % bits;
2000 oldValues[tid] = 0;
2001 for (int i = 0; i < Iterations(); i++)
2002 {
2003 value = host_atomic_fetch_xor(destMemory + valueInd, bitMask,
2004 MemoryOrder());
2005 if (value & bitMask) // bit should be set to 0
2006 oldValues[tid]++;
2007 value = host_atomic_fetch_xor(destMemory + valueInd, bitMask,
2008 MemoryOrder());
2009 if (!(value & bitMask)) // bit should be set to 1
2010 oldValues[tid]++;
2011 }
2012 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)2013 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
2014 HostDataType *startRefValues,
2015 cl_uint whichDestValue)
2016 {
2017 expected = 0;
2018 return true;
2019 }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)2020 virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
2021 HostDataType *refValues,
2022 HostAtomicType *finalValues)
2023 {
2024 correct = true;
2025 for (cl_uint i = 0; i < threadCount; i++)
2026 {
2027 if (refValues[i] > 0)
2028 {
2029 log_error("Thread %d found %d mismatches\n", i,
2030 (cl_uint)refValues[i]);
2031 correct = false;
2032 }
2033 }
2034 return true;
2035 }
2036 };
2037
test_atomic_fetch_xor2_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)2038 int test_atomic_fetch_xor2_generic(cl_device_id deviceID, cl_context context,
2039 cl_command_queue queue, int num_elements,
2040 bool useSVM)
2041 {
2042 int error = 0;
2043 CBasicTestFetchXor2<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
2044 useSVM);
2045 EXECUTE_TEST(error,
2046 test_int.Execute(deviceID, context, queue, num_elements));
2047 CBasicTestFetchXor2<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
2048 useSVM);
2049 EXECUTE_TEST(error,
2050 test_uint.Execute(deviceID, context, queue, num_elements));
2051 CBasicTestFetchXor2<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
2052 useSVM);
2053 EXECUTE_TEST(error,
2054 test_long.Execute(deviceID, context, queue, num_elements));
2055 CBasicTestFetchXor2<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
2056 TYPE_ATOMIC_ULONG, useSVM);
2057 EXECUTE_TEST(error,
2058 test_ulong.Execute(deviceID, context, queue, num_elements));
2059 if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
2060 {
2061 CBasicTestFetchXor2<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
2062 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
2063 EXECUTE_TEST(
2064 error,
2065 test_intptr_t.Execute(deviceID, context, queue, num_elements));
2066 CBasicTestFetchXor2<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
2067 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
2068 EXECUTE_TEST(
2069 error,
2070 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
2071 CBasicTestFetchXor2<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
2072 TYPE_ATOMIC_SIZE_T, useSVM);
2073 EXECUTE_TEST(
2074 error, test_size_t.Execute(deviceID, context, queue, num_elements));
2075 CBasicTestFetchXor2<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
2076 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
2077 EXECUTE_TEST(
2078 error,
2079 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
2080 }
2081 else
2082 {
2083 CBasicTestFetchXor2<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
2084 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
2085 EXECUTE_TEST(
2086 error,
2087 test_intptr_t.Execute(deviceID, context, queue, num_elements));
2088 CBasicTestFetchXor2<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
2089 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
2090 EXECUTE_TEST(
2091 error,
2092 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
2093 CBasicTestFetchXor2<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
2094 TYPE_ATOMIC_SIZE_T, useSVM);
2095 EXECUTE_TEST(
2096 error, test_size_t.Execute(deviceID, context, queue, num_elements));
2097 CBasicTestFetchXor2<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
2098 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
2099 EXECUTE_TEST(
2100 error,
2101 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
2102 }
2103 return error;
2104 }
2105
test_atomic_fetch_xor2(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)2106 int test_atomic_fetch_xor2(cl_device_id deviceID, cl_context context,
2107 cl_command_queue queue, int num_elements)
2108 {
2109 return test_atomic_fetch_xor2_generic(deviceID, context, queue,
2110 num_elements, false);
2111 }
2112
test_svm_atomic_fetch_xor2(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)2113 int test_svm_atomic_fetch_xor2(cl_device_id deviceID, cl_context context,
2114 cl_command_queue queue, int num_elements)
2115 {
2116 return test_atomic_fetch_xor2_generic(deviceID, context, queue,
2117 num_elements, true);
2118 }
2119
2120 template <typename HostAtomicType, typename HostDataType>
2121 class CBasicTestFetchMin
2122 : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
2123 public:
2124 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
2125 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
2126 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
2127 using CBasicTestMemOrderScope<HostAtomicType,
2128 HostDataType>::MemoryOrderScopeStr;
CBasicTestFetchMin(TExplicitAtomicType dataType,bool useSVM)2129 CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM)
2130 : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
2131 useSVM)
2132 {
2133 StartValue(DataType().MaxValue());
2134 }
ProgramCore()2135 virtual std::string ProgramCore()
2136 {
2137 std::string memoryOrderScope = MemoryOrderScopeStr();
2138 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
2139 return " oldValues[tid] = atomic_fetch_min" + postfix
2140 + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n";
2141 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)2142 virtual void HostFunction(cl_uint tid, cl_uint threadCount,
2143 volatile HostAtomicType *destMemory,
2144 HostDataType *oldValues)
2145 {
2146 oldValues[tid] = host_atomic_fetch_min(&destMemory[0], oldValues[tid],
2147 MemoryOrder());
2148 }
GenerateRefs(cl_uint threadCount,HostDataType * startRefValues,MTdata d)2149 virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
2150 MTdata d)
2151 {
2152 for (cl_uint i = 0; i < threadCount; i++)
2153 {
2154 startRefValues[i] = genrand_int32(d);
2155 if (sizeof(HostDataType) >= 8)
2156 startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
2157 }
2158 return true;
2159 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)2160 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
2161 HostDataType *startRefValues,
2162 cl_uint whichDestValue)
2163 {
2164 expected = StartValue();
2165 for (cl_uint i = 0; i < threadCount; i++)
2166 {
2167 if (startRefValues[i] < expected) expected = startRefValues[i];
2168 }
2169 return true;
2170 }
2171 };
2172
test_atomic_fetch_min_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)2173 int test_atomic_fetch_min_generic(cl_device_id deviceID, cl_context context,
2174 cl_command_queue queue, int num_elements,
2175 bool useSVM)
2176 {
2177 int error = 0;
2178 CBasicTestFetchMin<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
2179 useSVM);
2180 EXECUTE_TEST(error,
2181 test_int.Execute(deviceID, context, queue, num_elements));
2182 CBasicTestFetchMin<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
2183 useSVM);
2184 EXECUTE_TEST(error,
2185 test_uint.Execute(deviceID, context, queue, num_elements));
2186 CBasicTestFetchMin<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
2187 useSVM);
2188 EXECUTE_TEST(error,
2189 test_long.Execute(deviceID, context, queue, num_elements));
2190 CBasicTestFetchMin<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
2191 TYPE_ATOMIC_ULONG, useSVM);
2192 EXECUTE_TEST(error,
2193 test_ulong.Execute(deviceID, context, queue, num_elements));
2194 if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
2195 {
2196 CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
2197 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
2198 EXECUTE_TEST(
2199 error,
2200 test_intptr_t.Execute(deviceID, context, queue, num_elements));
2201 CBasicTestFetchMin<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
2202 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
2203 EXECUTE_TEST(
2204 error,
2205 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
2206 CBasicTestFetchMin<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
2207 TYPE_ATOMIC_SIZE_T, useSVM);
2208 EXECUTE_TEST(
2209 error, test_size_t.Execute(deviceID, context, queue, num_elements));
2210 CBasicTestFetchMin<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
2211 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
2212 EXECUTE_TEST(
2213 error,
2214 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
2215 }
2216 else
2217 {
2218 CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
2219 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
2220 EXECUTE_TEST(
2221 error,
2222 test_intptr_t.Execute(deviceID, context, queue, num_elements));
2223 CBasicTestFetchMin<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
2224 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
2225 EXECUTE_TEST(
2226 error,
2227 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
2228 CBasicTestFetchMin<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
2229 TYPE_ATOMIC_SIZE_T, useSVM);
2230 EXECUTE_TEST(
2231 error, test_size_t.Execute(deviceID, context, queue, num_elements));
2232 CBasicTestFetchMin<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
2233 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
2234 EXECUTE_TEST(
2235 error,
2236 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
2237 }
2238 return error;
2239 }
2240
test_atomic_fetch_min(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)2241 int test_atomic_fetch_min(cl_device_id deviceID, cl_context context,
2242 cl_command_queue queue, int num_elements)
2243 {
2244 return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements,
2245 false);
2246 }
2247
test_svm_atomic_fetch_min(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)2248 int test_svm_atomic_fetch_min(cl_device_id deviceID, cl_context context,
2249 cl_command_queue queue, int num_elements)
2250 {
2251 return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements,
2252 true);
2253 }
2254
2255 template <typename HostAtomicType, typename HostDataType>
2256 class CBasicTestFetchMax
2257 : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
2258 public:
2259 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
2260 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
2261 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
2262 using CBasicTestMemOrderScope<HostAtomicType,
2263 HostDataType>::MemoryOrderScopeStr;
CBasicTestFetchMax(TExplicitAtomicType dataType,bool useSVM)2264 CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM)
2265 : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
2266 useSVM)
2267 {
2268 StartValue(DataType().MinValue());
2269 }
ProgramCore()2270 virtual std::string ProgramCore()
2271 {
2272 std::string memoryOrderScope = MemoryOrderScopeStr();
2273 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
2274 return " oldValues[tid] = atomic_fetch_max" + postfix
2275 + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n";
2276 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)2277 virtual void HostFunction(cl_uint tid, cl_uint threadCount,
2278 volatile HostAtomicType *destMemory,
2279 HostDataType *oldValues)
2280 {
2281 oldValues[tid] = host_atomic_fetch_max(&destMemory[0], oldValues[tid],
2282 MemoryOrder());
2283 }
GenerateRefs(cl_uint threadCount,HostDataType * startRefValues,MTdata d)2284 virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
2285 MTdata d)
2286 {
2287 for (cl_uint i = 0; i < threadCount; i++)
2288 {
2289 startRefValues[i] = genrand_int32(d);
2290 if (sizeof(HostDataType) >= 8)
2291 startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
2292 }
2293 return true;
2294 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)2295 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
2296 HostDataType *startRefValues,
2297 cl_uint whichDestValue)
2298 {
2299 expected = StartValue();
2300 for (cl_uint i = 0; i < threadCount; i++)
2301 {
2302 if (startRefValues[i] > expected) expected = startRefValues[i];
2303 }
2304 return true;
2305 }
2306 };
2307
test_atomic_fetch_max_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)2308 int test_atomic_fetch_max_generic(cl_device_id deviceID, cl_context context,
2309 cl_command_queue queue, int num_elements,
2310 bool useSVM)
2311 {
2312 int error = 0;
2313 CBasicTestFetchMax<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
2314 useSVM);
2315 EXECUTE_TEST(error,
2316 test_int.Execute(deviceID, context, queue, num_elements));
2317 CBasicTestFetchMax<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
2318 useSVM);
2319 EXECUTE_TEST(error,
2320 test_uint.Execute(deviceID, context, queue, num_elements));
2321 CBasicTestFetchMax<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
2322 useSVM);
2323 EXECUTE_TEST(error,
2324 test_long.Execute(deviceID, context, queue, num_elements));
2325 CBasicTestFetchMax<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
2326 TYPE_ATOMIC_ULONG, useSVM);
2327 EXECUTE_TEST(error,
2328 test_ulong.Execute(deviceID, context, queue, num_elements));
2329 if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
2330 {
2331 CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
2332 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
2333 EXECUTE_TEST(
2334 error,
2335 test_intptr_t.Execute(deviceID, context, queue, num_elements));
2336 CBasicTestFetchMax<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
2337 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
2338 EXECUTE_TEST(
2339 error,
2340 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
2341 CBasicTestFetchMax<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
2342 TYPE_ATOMIC_SIZE_T, useSVM);
2343 EXECUTE_TEST(
2344 error, test_size_t.Execute(deviceID, context, queue, num_elements));
2345 CBasicTestFetchMax<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
2346 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
2347 EXECUTE_TEST(
2348 error,
2349 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
2350 }
2351 else
2352 {
2353 CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
2354 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
2355 EXECUTE_TEST(
2356 error,
2357 test_intptr_t.Execute(deviceID, context, queue, num_elements));
2358 CBasicTestFetchMax<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
2359 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
2360 EXECUTE_TEST(
2361 error,
2362 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
2363 CBasicTestFetchMax<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
2364 TYPE_ATOMIC_SIZE_T, useSVM);
2365 EXECUTE_TEST(
2366 error, test_size_t.Execute(deviceID, context, queue, num_elements));
2367 CBasicTestFetchMax<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
2368 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
2369 EXECUTE_TEST(
2370 error,
2371 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
2372 }
2373 return error;
2374 }
2375
test_atomic_fetch_max(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)2376 int test_atomic_fetch_max(cl_device_id deviceID, cl_context context,
2377 cl_command_queue queue, int num_elements)
2378 {
2379 return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements,
2380 false);
2381 }
2382
test_svm_atomic_fetch_max(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)2383 int test_svm_atomic_fetch_max(cl_device_id deviceID, cl_context context,
2384 cl_command_queue queue, int num_elements)
2385 {
2386 return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements,
2387 true);
2388 }
2389
2390 template <typename HostAtomicType, typename HostDataType>
2391 class CBasicTestFlag
2392 : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
2393 static const HostDataType CRITICAL_SECTION_NOT_VISITED = 1000000000;
2394
2395 public:
2396 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
2397 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
2398 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
2399 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
2400 using CBasicTestMemOrderScope<HostAtomicType,
2401 HostDataType>::MemoryOrderScopeStr;
2402 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
2403 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
CBasicTestFlag(TExplicitAtomicType dataType,bool useSVM)2404 CBasicTestFlag(TExplicitAtomicType dataType, bool useSVM)
2405 : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
2406 useSVM)
2407 {
2408 StartValue(0);
2409 OldValueCheck(false);
2410 }
NumResults(cl_uint threadCount,cl_device_id deviceID)2411 virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
2412 {
2413 return threadCount;
2414 }
MemoryOrderForClear()2415 TExplicitMemoryOrderType MemoryOrderForClear()
2416 {
2417 // Memory ordering for atomic_flag_clear function
2418 // ("shall not be memory_order_acquire nor memory_order_acq_rel")
2419 if (MemoryOrder() == MEMORY_ORDER_ACQUIRE) return MEMORY_ORDER_RELAXED;
2420 if (MemoryOrder() == MEMORY_ORDER_ACQ_REL) return MEMORY_ORDER_RELEASE;
2421 return MemoryOrder();
2422 }
MemoryOrderScopeStrForClear()2423 std::string MemoryOrderScopeStrForClear()
2424 {
2425 std::string orderStr;
2426 if (MemoryOrder() != MEMORY_ORDER_EMPTY)
2427 orderStr = std::string(", ")
2428 + get_memory_order_type_name(MemoryOrderForClear());
2429 return orderStr + MemoryScopeStr();
2430 }
2431
ExecuteSingleTest(cl_device_id deviceID,cl_context context,cl_command_queue queue)2432 virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
2433 cl_command_queue queue)
2434 {
2435 // This test assumes support for the memory_scope_device scope in the
2436 // case that LocalMemory() == false. Therefore we should skip this test
2437 // in that configuration on a 3.0 driver since supporting the
2438 // memory_scope_device scope is optionaly.
2439 if (get_device_cl_version(deviceID) >= Version{ 3, 0 })
2440 {
2441 if (!LocalMemory()
2442 && !(gAtomicFenceCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE))
2443 {
2444 log_info("Skipping atomic_flag test due to use of "
2445 "atomic_scope_device "
2446 "which is optionally not supported on this device\n");
2447 return 0; // skip test - not applicable
2448 }
2449 }
2450 return CBasicTestMemOrderScope<
2451 HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
2452 queue);
2453 }
ProgramCore()2454 virtual std::string ProgramCore()
2455 {
2456 std::string memoryOrderScope = MemoryOrderScopeStr();
2457 std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
2458 std::string program =
2459 " uint cnt, stop = 0;\n"
2460 " for(cnt = 0; !stop && cnt < threadCount; cnt++) // each thread "
2461 "must find critical section where it is the first visitor\n"
2462 " {\n"
2463 " bool set = atomic_flag_test_and_set"
2464 + postfix + "(&destMemory[cnt]" + memoryOrderScope + ");\n";
2465 if (MemoryOrder() == MEMORY_ORDER_RELAXED
2466 || MemoryOrder() == MEMORY_ORDER_RELEASE || LocalMemory())
2467 program += " atomic_work_item_fence("
2468 + std::string(
2469 LocalMemory()
2470 ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, "
2471 : "CLK_GLOBAL_MEM_FENCE, ")
2472 + "memory_order_acquire,"
2473 + std::string(LocalMemory()
2474 ? "memory_scope_work_group"
2475 : (UseSVM() ? "memory_scope_all_svm_devices"
2476 : "memory_scope_device"))
2477 + ");\n";
2478
2479 program += " if (!set)\n"
2480 " {\n";
2481
2482 if (LocalMemory())
2483 program += " uint csIndex = "
2484 "get_enqueued_local_size(0)*get_group_id(0)+cnt;\n";
2485 else
2486 program += " uint csIndex = cnt;\n";
2487
2488 std::ostringstream csNotVisited;
2489 csNotVisited << CRITICAL_SECTION_NOT_VISITED;
2490 program += " // verify that thread is the first visitor\n"
2491 " if(oldValues[csIndex] == "
2492 + csNotVisited.str()
2493 + ")\n"
2494 " {\n"
2495 " oldValues[csIndex] = tid; // set the winner id for this "
2496 "critical section\n"
2497 " stop = 1;\n"
2498 " }\n";
2499
2500 if (MemoryOrder() == MEMORY_ORDER_ACQUIRE
2501 || MemoryOrder() == MEMORY_ORDER_RELAXED || LocalMemory())
2502 program += " atomic_work_item_fence("
2503 + std::string(
2504 LocalMemory()
2505 ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, "
2506 : "CLK_GLOBAL_MEM_FENCE, ")
2507 + "memory_order_release,"
2508 + std::string(LocalMemory()
2509 ? "memory_scope_work_group"
2510 : (UseSVM() ? "memory_scope_all_svm_devices"
2511 : "memory_scope_device"))
2512 + ");\n";
2513
2514 program += " atomic_flag_clear" + postfix + "(&destMemory[cnt]"
2515 + MemoryOrderScopeStrForClear()
2516 + ");\n"
2517 " }\n"
2518 " }\n";
2519 return program;
2520 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)2521 virtual void HostFunction(cl_uint tid, cl_uint threadCount,
2522 volatile HostAtomicType *destMemory,
2523 HostDataType *oldValues)
2524 {
2525 cl_uint cnt, stop = 0;
2526 for (cnt = 0; !stop && cnt < threadCount;
2527 cnt++) // each thread must find critical section where it is the
2528 // first visitor\n"
2529 {
2530 if (!host_atomic_flag_test_and_set(&destMemory[cnt], MemoryOrder()))
2531 {
2532 cl_uint csIndex = cnt;
2533 // verify that thread is the first visitor\n"
2534 if (oldValues[csIndex] == CRITICAL_SECTION_NOT_VISITED)
2535 {
2536 oldValues[csIndex] =
2537 tid; // set the winner id for this critical section\n"
2538 stop = 1;
2539 }
2540 host_atomic_flag_clear(&destMemory[cnt], MemoryOrderForClear());
2541 }
2542 }
2543 }
ExpectedValue(HostDataType & expected,cl_uint threadCount,HostDataType * startRefValues,cl_uint whichDestValue)2544 virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
2545 HostDataType *startRefValues,
2546 cl_uint whichDestValue)
2547 {
2548 expected = StartValue();
2549 return true;
2550 }
GenerateRefs(cl_uint threadCount,HostDataType * startRefValues,MTdata d)2551 virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
2552 MTdata d)
2553 {
2554 for (cl_uint i = 0; i < threadCount; i++)
2555 startRefValues[i] = CRITICAL_SECTION_NOT_VISITED;
2556 return true;
2557 }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)2558 virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
2559 HostDataType *refValues,
2560 HostAtomicType *finalValues)
2561 {
2562 correct = true;
2563 /* We are expecting unique values from 0 to threadCount-1 (each critical
2564 * section must be visited) */
2565 /* These values must be distributed across refValues array */
2566 std::vector<bool> tidFound(threadCount);
2567 cl_uint i;
2568
2569 for (i = 0; i < threadCount; i++)
2570 {
2571 cl_uint value = (cl_uint)refValues[i];
2572 if (value == CRITICAL_SECTION_NOT_VISITED)
2573 {
2574 // Special initial value
2575 log_error("ERROR: Critical section %u not visited\n", i);
2576 correct = false;
2577 return true;
2578 }
2579 if (value >= threadCount)
2580 {
2581 log_error(
2582 "ERROR: Reference value %u outside of valid range! (%u)\n",
2583 i, value);
2584 correct = false;
2585 return true;
2586 }
2587 if (tidFound[value])
2588 {
2589 log_error("ERROR: Value (%u) occurred more thane once\n",
2590 value);
2591 correct = false;
2592 return true;
2593 }
2594 tidFound[value] = true;
2595 }
2596 return true;
2597 }
2598 };
2599
test_atomic_flag_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)2600 int test_atomic_flag_generic(cl_device_id deviceID, cl_context context,
2601 cl_command_queue queue, int num_elements,
2602 bool useSVM)
2603 {
2604 int error = 0;
2605 CBasicTestFlag<HOST_ATOMIC_FLAG, HOST_FLAG> test_flag(TYPE_ATOMIC_FLAG,
2606 useSVM);
2607 EXECUTE_TEST(error,
2608 test_flag.Execute(deviceID, context, queue, num_elements));
2609 return error;
2610 }
2611
test_atomic_flag(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)2612 int test_atomic_flag(cl_device_id deviceID, cl_context context,
2613 cl_command_queue queue, int num_elements)
2614 {
2615 return test_atomic_flag_generic(deviceID, context, queue, num_elements,
2616 false);
2617 }
2618
test_svm_atomic_flag(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)2619 int test_svm_atomic_flag(cl_device_id deviceID, cl_context context,
2620 cl_command_queue queue, int num_elements)
2621 {
2622 return test_atomic_flag_generic(deviceID, context, queue, num_elements,
2623 true);
2624 }
2625
2626 template <typename HostAtomicType, typename HostDataType>
2627 class CBasicTestFence
2628 : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
2629 struct TestDefinition
2630 {
2631 bool op1IsFence;
2632 TExplicitMemoryOrderType op1MemOrder;
2633 bool op2IsFence;
2634 TExplicitMemoryOrderType op2MemOrder;
2635 };
2636
2637 public:
2638 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
2639 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
2640 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
2641 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
2642 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
2643 using CBasicTestMemOrderScope<HostAtomicType,
2644 HostDataType>::DeclaredInProgram;
2645 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UsedInFunction;
2646 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
2647 using CBasicTestMemOrderScope<HostAtomicType,
2648 HostDataType>::CurrentGroupSize;
2649 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
2650 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
2651 using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalRefValues;
CBasicTestFence(TExplicitAtomicType dataType,bool useSVM)2652 CBasicTestFence(TExplicitAtomicType dataType, bool useSVM)
2653 : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
2654 useSVM)
2655 {
2656 StartValue(0);
2657 OldValueCheck(false);
2658 }
NumResults(cl_uint threadCount,cl_device_id deviceID)2659 virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
2660 {
2661 return threadCount;
2662 }
NumNonAtomicVariablesPerThread()2663 virtual cl_uint NumNonAtomicVariablesPerThread()
2664 {
2665 if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) return 1;
2666 if (LocalMemory())
2667 {
2668 if (gIsEmbedded)
2669 {
2670 if (CurrentGroupSize() > 512) CurrentGroupSize(512);
2671 return 2; // 1KB of local memory required by spec. Clamp group
2672 // size to 512 and allow 2 variables per thread
2673 }
2674 else
2675 return 32 * 1024 / 8 / CurrentGroupSize()
2676 - 1; // 32KB of local memory required by spec
2677 }
2678 return 256;
2679 }
SingleTestName()2680 virtual std::string SingleTestName()
2681 {
2682 std::string testName;
2683 if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
2684 testName += "seq_cst fence, ";
2685 else
2686 testName +=
2687 std::string(get_memory_order_type_name(_subCase.op1MemOrder))
2688 .substr(sizeof("memory_order"))
2689 + (_subCase.op1IsFence ? " fence" : " atomic")
2690 + " synchronizes-with "
2691 + std::string(get_memory_order_type_name(_subCase.op2MemOrder))
2692 .substr(sizeof("memory_order"))
2693 + (_subCase.op2IsFence ? " fence" : " atomic") + ", ";
2694 testName += CBasicTest<HostAtomicType, HostDataType>::SingleTestName();
2695 testName += std::string(", ")
2696 + std::string(get_memory_scope_type_name(MemoryScope()))
2697 .substr(sizeof("memory"));
2698 return testName;
2699 }
SVMDataBufferAllSVMConsistent()2700 virtual bool SVMDataBufferAllSVMConsistent()
2701 {
2702 // Although memory_scope_all_devices doesn't mention SVM it is just an
2703 // alias for memory_scope_all_svm_devices. So both scopes interact with
2704 // SVM allocations, on devices that support those, just the same.
2705 return MemoryScope() == MEMORY_SCOPE_ALL_DEVICES
2706 || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES;
2707 }
ExecuteForEachParameterSet(cl_device_id deviceID,cl_context context,cl_command_queue queue)2708 virtual int ExecuteForEachParameterSet(cl_device_id deviceID,
2709 cl_context context,
2710 cl_command_queue queue)
2711 {
2712 int error = 0;
2713 // execute 3 (maximum) sub cases for each memory order
2714 for (_subCaseId = 0; _subCaseId < 3; _subCaseId++)
2715 {
2716 EXECUTE_TEST(
2717 error,
2718 (CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
2719 ExecuteForEachParameterSet(deviceID, context, queue)));
2720 }
2721 return error;
2722 }
ExecuteSingleTest(cl_device_id deviceID,cl_context context,cl_command_queue queue)2723 virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
2724 cl_command_queue queue)
2725 {
2726 if (DeclaredInProgram() || UsedInFunction())
2727 return 0; // skip test - not applicable - no overloaded fence
2728 // functions for different address spaces
2729 if (MemoryOrder() == MEMORY_ORDER_EMPTY
2730 || MemoryScope()
2731 == MEMORY_SCOPE_EMPTY) // empty 'scope' not required since
2732 // opencl20-openclc-rev15
2733 return 0; // skip test - not applicable
2734 if ((UseSVM() || gHost) && LocalMemory())
2735 return 0; // skip test - not applicable for SVM and local memory
2736 struct TestDefinition acqTests[] = {
2737 // {op1IsFence, op1MemOrder, op2IsFence, op2MemOrder}
2738 { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE },
2739 { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE },
2740 { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQUIRE }
2741 };
2742 struct TestDefinition relTests[] = {
2743 { true, MEMORY_ORDER_RELEASE, false, MEMORY_ORDER_ACQUIRE },
2744 { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL }
2745 };
2746 struct TestDefinition arTests[] = {
2747 { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL },
2748 { true, MEMORY_ORDER_ACQ_REL, false, MEMORY_ORDER_ACQUIRE },
2749 { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQ_REL }
2750 };
2751 switch (MemoryOrder())
2752 {
2753 case MEMORY_ORDER_ACQUIRE:
2754 if (_subCaseId
2755 >= sizeof(acqTests) / sizeof(struct TestDefinition))
2756 return 0;
2757 _subCase = acqTests[_subCaseId];
2758 break;
2759 case MEMORY_ORDER_RELEASE:
2760 if (_subCaseId
2761 >= sizeof(relTests) / sizeof(struct TestDefinition))
2762 return 0;
2763 _subCase = relTests[_subCaseId];
2764 break;
2765 case MEMORY_ORDER_ACQ_REL:
2766 if (_subCaseId
2767 >= sizeof(arTests) / sizeof(struct TestDefinition))
2768 return 0;
2769 _subCase = arTests[_subCaseId];
2770 break;
2771 case MEMORY_ORDER_SEQ_CST:
2772 if (_subCaseId != 0) // one special case only
2773 return 0;
2774 break;
2775 default: return 0;
2776 }
2777 LocalRefValues(LocalMemory());
2778 return CBasicTestMemOrderScope<
2779 HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
2780 queue);
2781 }
ProgramHeader(cl_uint maxNumDestItems)2782 virtual std::string ProgramHeader(cl_uint maxNumDestItems)
2783 {
2784 std::string header;
2785 if (gOldAPI)
2786 {
2787 if (MemoryScope() == MEMORY_SCOPE_EMPTY)
2788 {
2789 header += "#define atomic_work_item_fence(x,y) "
2790 " mem_fence(x)\n";
2791 }
2792 else
2793 {
2794 header += "#define atomic_work_item_fence(x,y,z) "
2795 " mem_fence(x)\n";
2796 }
2797 }
2798 return header
2799 + CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
2800 ProgramHeader(maxNumDestItems);
2801 }
ProgramCore()2802 virtual std::string ProgramCore()
2803 {
2804 std::ostringstream naValues;
2805 naValues << NumNonAtomicVariablesPerThread();
2806 std::string program, fenceType, nonAtomic;
2807 if (LocalMemory())
2808 {
2809 program = " size_t myId = get_local_id(0), hisId = "
2810 "get_local_size(0)-1-myId;\n";
2811 fenceType = "CLK_LOCAL_MEM_FENCE";
2812 nonAtomic = "localValues";
2813 }
2814 else
2815 {
2816 program = " size_t myId = tid, hisId = threadCount-1-tid;\n";
2817 fenceType = "CLK_GLOBAL_MEM_FENCE";
2818 nonAtomic = "oldValues";
2819 }
2820 if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
2821 {
2822 // All threads are divided into pairs.
2823 // Each thread has its own atomic variable and performs the
2824 // following actions:
2825 // - increments its own variable
2826 // - performs fence operation to propagate its value and to see
2827 // value from other thread
2828 // - reads value from other thread's variable
2829 // - repeats the above steps when both values are the same (and less
2830 // than 1000000)
2831 // - stores the last value read from other thread (in additional
2832 // variable) At the end of execution at least one thread should know
2833 // the last value from other thread
2834 program += std::string("") + " " + DataType().RegularTypeName()
2835 + " myValue = 0, hisValue; \n"
2836 " do {\n"
2837 " myValue++;\n"
2838 " atomic_store_explicit(&destMemory[myId], myValue, "
2839 "memory_order_relaxed"
2840 + MemoryScopeStr()
2841 + ");\n"
2842 " atomic_work_item_fence("
2843 + fenceType + ", memory_order_seq_cst" + MemoryScopeStr()
2844 + "); \n"
2845 " hisValue = atomic_load_explicit(&destMemory[hisId], "
2846 "memory_order_relaxed"
2847 + MemoryScopeStr()
2848 + ");\n"
2849 " } while(myValue == hisValue && myValue < 1000000);\n"
2850 " "
2851 + nonAtomic + "[myId] = hisValue; \n";
2852 }
2853 else
2854 {
2855 // Each thread modifies one of its non-atomic variables, increments
2856 // value of its atomic variable and reads values from another thread
2857 // in typical synchronizes-with scenario with:
2858 // - non-atomic variable (at index A) modification (value change
2859 // from 0 to A)
2860 // - release operation (additional fence or within atomic) + atomic
2861 // variable modification (value A)
2862 // - atomic variable read (value B) + acquire operation (additional
2863 // fence or within atomic)
2864 // - non-atomic variable (at index B) read (value C)
2865 // Each thread verifies dependency between atomic and non-atomic
2866 // value read from another thread The following condition must be
2867 // true: B == C
2868 program += std::string("") + " " + DataType().RegularTypeName()
2869 + " myValue = 0, hisAtomicValue, hisValue; \n"
2870 " do {\n"
2871 " myValue++;\n"
2872 " "
2873 + nonAtomic + "[myId*" + naValues.str()
2874 + "+myValue] = myValue;\n";
2875 if (_subCase.op1IsFence)
2876 program += std::string("") + " atomic_work_item_fence("
2877 + fenceType + ", "
2878 + get_memory_order_type_name(_subCase.op1MemOrder)
2879 + MemoryScopeStr()
2880 + "); \n"
2881 " atomic_store_explicit(&destMemory[myId], myValue, "
2882 "memory_order_relaxed"
2883 + MemoryScopeStr() + ");\n";
2884 else
2885 program += std::string("")
2886 + " atomic_store_explicit(&destMemory[myId], myValue, "
2887 + get_memory_order_type_name(_subCase.op1MemOrder)
2888 + MemoryScopeStr() + ");\n";
2889 if (_subCase.op2IsFence)
2890 program += std::string("")
2891 + " hisAtomicValue = "
2892 "atomic_load_explicit(&destMemory[hisId], "
2893 "memory_order_relaxed"
2894 + MemoryScopeStr()
2895 + ");\n"
2896 " atomic_work_item_fence("
2897 + fenceType + ", "
2898 + get_memory_order_type_name(_subCase.op2MemOrder)
2899 + MemoryScopeStr() + "); \n";
2900 else
2901 program += std::string("")
2902 + " hisAtomicValue = "
2903 "atomic_load_explicit(&destMemory[hisId], "
2904 + get_memory_order_type_name(_subCase.op2MemOrder)
2905 + MemoryScopeStr() + ");\n";
2906 program += " hisValue = " + nonAtomic + "[hisId*"
2907 + naValues.str() + "+hisAtomicValue]; \n";
2908 if (LocalMemory())
2909 program += " hisId = (hisId+1)%get_local_size(0);\n";
2910 else
2911 program += " hisId = (hisId+1)%threadCount;\n";
2912 program += " } while(hisAtomicValue == hisValue && myValue < "
2913 + naValues.str()
2914 + "-1);\n"
2915 " if(hisAtomicValue != hisValue)\n"
2916 " { // fail\n"
2917 " atomic_store_explicit(&destMemory[myId], myValue-1,"
2918 " memory_order_relaxed, memory_scope_work_group);\n";
2919 if (LocalMemory())
2920 program += " hisId = "
2921 "(hisId+get_local_size(0)-1)%get_local_size(0);\n";
2922 else
2923 program += " hisId = (hisId+threadCount-1)%threadCount;\n";
2924 program += " if(myValue+1 < " + naValues.str()
2925 + ")\n"
2926 " "
2927 + nonAtomic + "[myId*" + naValues.str()
2928 + "+myValue+1] = hisId;\n"
2929 " if(myValue+2 < "
2930 + naValues.str()
2931 + ")\n"
2932 " "
2933 + nonAtomic + "[myId*" + naValues.str()
2934 + "+myValue+2] = hisAtomicValue;\n"
2935 " if(myValue+3 < "
2936 + naValues.str()
2937 + ")\n"
2938 " "
2939 + nonAtomic + "[myId*" + naValues.str()
2940 + "+myValue+3] = hisValue;\n";
2941 if (gDebug)
2942 {
2943 program += " printf(\"WI %d: atomic value (%d) at index %d "
2944 "is different than non-atomic value (%d)\\n\", tid, "
2945 "hisAtomicValue, hisId, hisValue);\n";
2946 }
2947 program += " }\n";
2948 }
2949 return program;
2950 }
HostFunction(cl_uint tid,cl_uint threadCount,volatile HostAtomicType * destMemory,HostDataType * oldValues)2951 virtual void HostFunction(cl_uint tid, cl_uint threadCount,
2952 volatile HostAtomicType *destMemory,
2953 HostDataType *oldValues)
2954 {
2955 size_t myId = tid, hisId = threadCount - 1 - tid;
2956 if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
2957 {
2958 HostDataType myValue = 0, hisValue;
2959 // CPU thread typically starts faster - wait for GPU thread
2960 myValue++;
2961 host_atomic_store<HostAtomicType, HostDataType>(
2962 &destMemory[myId], myValue, MEMORY_ORDER_SEQ_CST);
2963 while (host_atomic_load<HostAtomicType, HostDataType>(
2964 &destMemory[hisId], MEMORY_ORDER_SEQ_CST)
2965 == 0)
2966 ;
2967 do
2968 {
2969 myValue++;
2970 host_atomic_store<HostAtomicType, HostDataType>(
2971 &destMemory[myId], myValue, MEMORY_ORDER_RELAXED);
2972 host_atomic_thread_fence(MemoryOrder());
2973 hisValue = host_atomic_load<HostAtomicType, HostDataType>(
2974 &destMemory[hisId], MEMORY_ORDER_RELAXED);
2975 } while (myValue == hisValue && hisValue < 1000000);
2976 oldValues[tid] = hisValue;
2977 }
2978 else
2979 {
2980 HostDataType myValue = 0, hisAtomicValue, hisValue;
2981 do
2982 {
2983 myValue++;
2984 oldValues[myId * NumNonAtomicVariablesPerThread() + myValue] =
2985 myValue;
2986 if (_subCase.op1IsFence)
2987 {
2988 host_atomic_thread_fence(_subCase.op1MemOrder);
2989 host_atomic_store<HostAtomicType, HostDataType>(
2990 &destMemory[myId], myValue, MEMORY_ORDER_RELAXED);
2991 }
2992 else
2993 host_atomic_store<HostAtomicType, HostDataType>(
2994 &destMemory[myId], myValue, _subCase.op1MemOrder);
2995 if (_subCase.op2IsFence)
2996 {
2997 hisAtomicValue =
2998 host_atomic_load<HostAtomicType, HostDataType>(
2999 &destMemory[hisId], MEMORY_ORDER_RELAXED);
3000 host_atomic_thread_fence(_subCase.op2MemOrder);
3001 }
3002 else
3003 hisAtomicValue =
3004 host_atomic_load<HostAtomicType, HostDataType>(
3005 &destMemory[hisId], _subCase.op2MemOrder);
3006 hisValue = oldValues[hisId * NumNonAtomicVariablesPerThread()
3007 + hisAtomicValue];
3008 hisId = (hisId + 1) % threadCount;
3009 } while (hisAtomicValue == hisValue
3010 && myValue
3011 < (HostDataType)NumNonAtomicVariablesPerThread() - 1);
3012 if (hisAtomicValue != hisValue)
3013 { // fail
3014 host_atomic_store<HostAtomicType, HostDataType>(
3015 &destMemory[myId], myValue - 1, MEMORY_ORDER_SEQ_CST);
3016 if (gDebug)
3017 {
3018 hisId = (hisId + threadCount - 1) % threadCount;
3019 printf("WI %d: atomic value (%d) at index %d is different "
3020 "than non-atomic value (%d)\n",
3021 tid, hisAtomicValue, hisId, hisValue);
3022 }
3023 }
3024 }
3025 }
GenerateRefs(cl_uint threadCount,HostDataType * startRefValues,MTdata d)3026 virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
3027 MTdata d)
3028 {
3029 for (cl_uint i = 0; i < threadCount * NumNonAtomicVariablesPerThread();
3030 i++)
3031 startRefValues[i] = 0;
3032 return true;
3033 }
VerifyRefs(bool & correct,cl_uint threadCount,HostDataType * refValues,HostAtomicType * finalValues)3034 virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
3035 HostDataType *refValues,
3036 HostAtomicType *finalValues)
3037 {
3038 correct = true;
3039 cl_uint workSize = LocalMemory() ? CurrentGroupSize() : threadCount;
3040 for (cl_uint workOffset = 0; workOffset < threadCount;
3041 workOffset += workSize)
3042 {
3043 if (workOffset + workSize > threadCount)
3044 // last workgroup (host threads)
3045 workSize = threadCount - workOffset;
3046 for (cl_uint i = 0; i < workSize && workOffset + i < threadCount;
3047 i++)
3048 {
3049 HostAtomicType myValue = finalValues[workOffset + i];
3050 if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
3051 {
3052 HostDataType hisValue = refValues[workOffset + i];
3053 if (myValue == hisValue)
3054 {
3055 // a draw - both threads should reach final value
3056 // 1000000
3057 if (myValue != 1000000)
3058 {
3059 log_error("ERROR: Invalid reference value #%u (%d "
3060 "instead of 1000000)\n",
3061 workOffset + i, myValue);
3062 correct = false;
3063 return true;
3064 }
3065 }
3066 else
3067 {
3068 // slower thread (in total order of seq_cst operations)
3069 // must know last value written by faster thread
3070 HostAtomicType hisRealValue =
3071 finalValues[workOffset + workSize - 1 - i];
3072 HostDataType myValueReadByHim =
3073 refValues[workOffset + workSize - 1 - i];
3074
3075 // who is the winner? - thread with lower private
3076 // counter value
3077 if (myValue == hisRealValue) // forbidden result - fence
3078 // doesn't work
3079 {
3080 log_error("ERROR: Atomic counter values #%u and "
3081 "#%u are the same (%u)\n",
3082 workOffset + i,
3083 workOffset + workSize - 1 - i, myValue);
3084 log_error(
3085 "ERROR: Both threads have outdated values read "
3086 "from another thread (%u and %u)\n",
3087 hisValue, myValueReadByHim);
3088 correct = false;
3089 return true;
3090 }
3091 if (myValue > hisRealValue) // I'm slower
3092 {
3093 if (hisRealValue != hisValue)
3094 {
3095 log_error("ERROR: Invalid reference value #%u "
3096 "(%d instead of %d)\n",
3097 workOffset + i, hisValue,
3098 hisRealValue);
3099 log_error(
3100 "ERROR: Slower thread #%u should know "
3101 "value written by faster thread #%u\n",
3102 workOffset + i,
3103 workOffset + workSize - 1 - i);
3104 correct = false;
3105 return true;
3106 }
3107 }
3108 else // I'm faster
3109 {
3110 if (myValueReadByHim != myValue)
3111 {
3112 log_error("ERROR: Invalid reference value #%u "
3113 "(%d instead of %d)\n",
3114 workOffset + workSize - 1 - i,
3115 myValueReadByHim, myValue);
3116 log_error(
3117 "ERROR: Slower thread #%u should know "
3118 "value written by faster thread #%u\n",
3119 workOffset + workSize - 1 - i,
3120 workOffset + i);
3121 correct = false;
3122 return true;
3123 }
3124 }
3125 }
3126 }
3127 else
3128 {
3129 if (myValue != NumNonAtomicVariablesPerThread() - 1)
3130 {
3131 log_error("ERROR: Invalid atomic value #%u (%d instead "
3132 "of %d)\n",
3133 workOffset + i, myValue,
3134 NumNonAtomicVariablesPerThread() - 1);
3135 log_error("ERROR: Thread #%u observed invalid values "
3136 "in other thread's variables\n",
3137 workOffset + i);
3138 correct = false;
3139 return true;
3140 }
3141 }
3142 }
3143 }
3144 return true;
3145 }
3146
3147 private:
3148 size_t _subCaseId;
3149 struct TestDefinition _subCase;
3150 };
3151
test_atomic_fence_generic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,bool useSVM)3152 int test_atomic_fence_generic(cl_device_id deviceID, cl_context context,
3153 cl_command_queue queue, int num_elements,
3154 bool useSVM)
3155 {
3156 int error = 0;
3157 CBasicTestFence<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
3158 useSVM);
3159 EXECUTE_TEST(error,
3160 test_int.Execute(deviceID, context, queue, num_elements));
3161 CBasicTestFence<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
3162 useSVM);
3163 EXECUTE_TEST(error,
3164 test_uint.Execute(deviceID, context, queue, num_elements));
3165 CBasicTestFence<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
3166 useSVM);
3167 EXECUTE_TEST(error,
3168 test_long.Execute(deviceID, context, queue, num_elements));
3169 CBasicTestFence<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG,
3170 useSVM);
3171 EXECUTE_TEST(error,
3172 test_ulong.Execute(deviceID, context, queue, num_elements));
3173 if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
3174 {
3175 CBasicTestFence<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(
3176 TYPE_ATOMIC_INTPTR_T, useSVM);
3177 EXECUTE_TEST(
3178 error,
3179 test_intptr_t.Execute(deviceID, context, queue, num_elements));
3180 CBasicTestFence<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
3181 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
3182 EXECUTE_TEST(
3183 error,
3184 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
3185 CBasicTestFence<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
3186 TYPE_ATOMIC_SIZE_T, useSVM);
3187 EXECUTE_TEST(
3188 error, test_size_t.Execute(deviceID, context, queue, num_elements));
3189 CBasicTestFence<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
3190 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
3191 EXECUTE_TEST(
3192 error,
3193 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
3194 }
3195 else
3196 {
3197 CBasicTestFence<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(
3198 TYPE_ATOMIC_INTPTR_T, useSVM);
3199 EXECUTE_TEST(
3200 error,
3201 test_intptr_t.Execute(deviceID, context, queue, num_elements));
3202 CBasicTestFence<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
3203 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
3204 EXECUTE_TEST(
3205 error,
3206 test_uintptr_t.Execute(deviceID, context, queue, num_elements));
3207 CBasicTestFence<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
3208 TYPE_ATOMIC_SIZE_T, useSVM);
3209 EXECUTE_TEST(
3210 error, test_size_t.Execute(deviceID, context, queue, num_elements));
3211 CBasicTestFence<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
3212 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
3213 EXECUTE_TEST(
3214 error,
3215 test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
3216 }
3217 return error;
3218 }
3219
test_atomic_fence(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)3220 int test_atomic_fence(cl_device_id deviceID, cl_context context,
3221 cl_command_queue queue, int num_elements)
3222 {
3223 return test_atomic_fence_generic(deviceID, context, queue, num_elements,
3224 false);
3225 }
3226
test_svm_atomic_fence(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)3227 int test_svm_atomic_fence(cl_device_id deviceID, cl_context context,
3228 cl_command_queue queue, int num_elements)
3229 {
3230 return test_atomic_fence_generic(deviceID, context, queue, num_elements,
3231 true);
3232 }
3233