1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2015 The Khronos Group Inc.
6 * Copyright (c) 2017 Google Inc.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Atomic operations (OpAtomic*) tests.
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktAtomicOperationTests.hpp"
26 #include "vktShaderExecutor.hpp"
27
28 #include "vkRefUtil.hpp"
29 #include "vkMemUtil.hpp"
30 #include "vkQueryUtil.hpp"
31 #include "vkObjUtil.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkCmdUtil.hpp"
34 #include "vktTestGroupUtil.hpp"
35
36 #include "tcuTestLog.hpp"
37 #include "tcuStringTemplate.hpp"
38 #include "tcuResultCollector.hpp"
39
40 #include "deFloat16.h"
41 #include "deMath.hpp"
42 #include "deStringUtil.hpp"
43 #include "deSharedPtr.hpp"
44 #include "deRandom.hpp"
45 #include "deArrayUtil.hpp"
46
47 #include <string>
48 #include <memory>
49 #include <cmath>
50
51 namespace vkt
52 {
53 namespace shaderexecutor
54 {
55
56 namespace
57 {
58
59 using de::MovePtr;
60 using de::UniquePtr;
61 using std::vector;
62
63 using namespace vk;
64
65 enum class AtomicMemoryType
66 {
67 BUFFER = 0, // Normal buffer.
68 SHARED, // Shared global struct in a compute workgroup.
69 REFERENCE, // Buffer passed as a reference.
70 PAYLOAD, // Task payload.
71 };
72
73 // Helper struct to indicate the shader type and if it should use shared global memory.
74 class AtomicShaderType
75 {
76 public:
AtomicShaderType(glu::ShaderType type,AtomicMemoryType memoryType)77 AtomicShaderType(glu::ShaderType type, AtomicMemoryType memoryType) : m_type(type), m_atomicMemoryType(memoryType)
78 {
79 // Shared global memory can only be set to true with compute, task and mesh shaders.
80 DE_ASSERT(memoryType != AtomicMemoryType::SHARED || type == glu::SHADERTYPE_COMPUTE ||
81 type == glu::SHADERTYPE_TASK || type == glu::SHADERTYPE_MESH);
82
83 // Task payload memory can only be tested in task shaders.
84 DE_ASSERT(memoryType != AtomicMemoryType::PAYLOAD || type == glu::SHADERTYPE_TASK);
85 }
86
getType(void) const87 glu::ShaderType getType(void) const
88 {
89 return m_type;
90 }
getMemoryType(void) const91 AtomicMemoryType getMemoryType(void) const
92 {
93 return m_atomicMemoryType;
94 }
isSharedLike(void) const95 bool isSharedLike(void) const
96 {
97 return m_atomicMemoryType == AtomicMemoryType::SHARED || m_atomicMemoryType == AtomicMemoryType::PAYLOAD;
98 }
isMeshShadingStage(void) const99 bool isMeshShadingStage(void) const
100 {
101 return (m_type == glu::SHADERTYPE_TASK || m_type == glu::SHADERTYPE_MESH);
102 }
103
104 private:
105 glu::ShaderType m_type;
106 AtomicMemoryType m_atomicMemoryType;
107 };
108
109 // Buffer helper
110 class Buffer
111 {
112 public:
113 Buffer(Context &context, VkBufferUsageFlags usage, size_t size, bool useRef);
114
getBuffer(void) const115 VkBuffer getBuffer(void) const
116 {
117 return *m_buffer;
118 }
getHostPtr(void) const119 void *getHostPtr(void) const
120 {
121 return m_allocation->getHostPtr();
122 }
123 void flush(void);
124 void invalidate(void);
125
126 private:
127 const DeviceInterface &m_vkd;
128 const VkDevice m_device;
129 const VkQueue m_queue;
130 const uint32_t m_queueIndex;
131 const Unique<VkBuffer> m_buffer;
132 const UniquePtr<Allocation> m_allocation;
133 };
134
135 typedef de::SharedPtr<Buffer> BufferSp;
136
createBuffer(const DeviceInterface & vkd,VkDevice device,VkDeviceSize size,VkBufferUsageFlags usageFlags)137 Move<VkBuffer> createBuffer(const DeviceInterface &vkd, VkDevice device, VkDeviceSize size,
138 VkBufferUsageFlags usageFlags)
139 {
140 const VkBufferCreateInfo createInfo = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
141 DE_NULL,
142 (VkBufferCreateFlags)0,
143 size,
144 usageFlags,
145 VK_SHARING_MODE_EXCLUSIVE,
146 0u,
147 DE_NULL};
148 return createBuffer(vkd, device, &createInfo);
149 }
150
allocateAndBindMemory(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,VkBuffer buffer,bool useRef)151 MovePtr<Allocation> allocateAndBindMemory(const DeviceInterface &vkd, VkDevice device, Allocator &allocator,
152 VkBuffer buffer, bool useRef)
153 {
154 const MemoryRequirement allocationType =
155 (MemoryRequirement::HostVisible | (useRef ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any));
156 MovePtr<Allocation> alloc(allocator.allocate(getBufferMemoryRequirements(vkd, device, buffer), allocationType));
157
158 VK_CHECK(vkd.bindBufferMemory(device, buffer, alloc->getMemory(), alloc->getOffset()));
159
160 return alloc;
161 }
162
Buffer(Context & context,VkBufferUsageFlags usage,size_t size,bool useRef)163 Buffer::Buffer(Context &context, VkBufferUsageFlags usage, size_t size, bool useRef)
164 : m_vkd(context.getDeviceInterface())
165 , m_device(context.getDevice())
166 , m_queue(context.getUniversalQueue())
167 , m_queueIndex(context.getUniversalQueueFamilyIndex())
168 , m_buffer(createBuffer(context.getDeviceInterface(), context.getDevice(), (VkDeviceSize)size, usage))
169 , m_allocation(allocateAndBindMemory(context.getDeviceInterface(), context.getDevice(),
170 context.getDefaultAllocator(), *m_buffer, useRef))
171 {
172 }
173
flush(void)174 void Buffer::flush(void)
175 {
176 flushMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
177 }
178
invalidate(void)179 void Buffer::invalidate(void)
180 {
181 const auto cmdPool = vk::makeCommandPool(m_vkd, m_device, m_queueIndex);
182 const auto cmdBufferPtr =
183 vk::allocateCommandBuffer(m_vkd, m_device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
184 const auto cmdBuffer = cmdBufferPtr.get();
185 const auto bufferBarrier = vk::makeBufferMemoryBarrier(VK_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_HOST_READ_BIT,
186 m_buffer.get(), 0ull, VK_WHOLE_SIZE);
187
188 beginCommandBuffer(m_vkd, cmdBuffer);
189 m_vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, nullptr,
190 1u, &bufferBarrier, 0u, nullptr);
191 endCommandBuffer(m_vkd, cmdBuffer);
192 submitCommandsAndWait(m_vkd, m_device, m_queue, cmdBuffer);
193
194 invalidateMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
195 }
196
197 // Tests
198
199 enum AtomicOperation
200 {
201 ATOMIC_OP_EXCHANGE = 0,
202 ATOMIC_OP_COMP_SWAP,
203 ATOMIC_OP_ADD,
204 ATOMIC_OP_MIN,
205 ATOMIC_OP_MAX,
206 ATOMIC_OP_AND,
207 ATOMIC_OP_OR,
208 ATOMIC_OP_XOR,
209
210 ATOMIC_OP_LAST
211 };
212
atomicOp2Str(AtomicOperation op)213 std::string atomicOp2Str(AtomicOperation op)
214 {
215 static const char *const s_names[] = {"atomicExchange", "atomicCompSwap", "atomicAdd", "atomicMin",
216 "atomicMax", "atomicAnd", "atomicOr", "atomicXor"};
217 return de::getSizedArrayElement<ATOMIC_OP_LAST>(s_names, op);
218 }
219
220 enum
221 {
222 NUM_ELEMENTS = 32
223 };
224
225 enum DataType
226 {
227 DATA_TYPE_FLOAT16 = 0,
228 DATA_TYPE_INT32,
229 DATA_TYPE_UINT32,
230 DATA_TYPE_FLOAT32,
231 DATA_TYPE_INT64,
232 DATA_TYPE_UINT64,
233 DATA_TYPE_FLOAT64,
234
235 DATA_TYPE_LAST
236 };
237
dataType2Str(DataType type)238 std::string dataType2Str(DataType type)
239 {
240 static const char *const s_names[] = {
241 "float16_t", "int", "uint", "float", "int64_t", "uint64_t", "double",
242 };
243 return de::getSizedArrayElement<DATA_TYPE_LAST>(s_names, type);
244 }
245
246 class BufferInterface
247 {
248 public:
249 virtual void setBuffer(void *ptr) = 0;
250
251 virtual size_t bufferSize() = 0;
252
253 virtual void fillWithTestData(de::Random &rnd) = 0;
254
255 virtual void checkResults(tcu::ResultCollector &resultCollector) = 0;
256
~BufferInterface()257 virtual ~BufferInterface()
258 {
259 }
260 };
261
262 template <typename dataTypeT>
263 class TestBuffer : public BufferInterface
264 {
265 public:
TestBuffer(AtomicOperation atomicOp)266 TestBuffer(AtomicOperation atomicOp) : m_atomicOp(atomicOp)
267 {
268 }
269
270 template <typename T>
271 struct BufferData
272 {
273 // Use half the number of elements for inout to cause overlap between atomic operations.
274 // Each inout element at index i will have two atomic operations using input from
275 // indices i and i + NUM_ELEMENTS / 2.
276 T inout[NUM_ELEMENTS / 2];
277 T input[NUM_ELEMENTS];
278 T compare[NUM_ELEMENTS];
279 T output[NUM_ELEMENTS];
280 T invocationHitCount[NUM_ELEMENTS];
281 int32_t index;
282 };
283
setBuffer(void * ptr)284 virtual void setBuffer(void *ptr)
285 {
286 m_ptr = static_cast<BufferData<dataTypeT> *>(ptr);
287 }
288
bufferSize()289 virtual size_t bufferSize()
290 {
291 return sizeof(BufferData<dataTypeT>);
292 }
293
fillWithTestData(de::Random & rnd)294 virtual void fillWithTestData(de::Random &rnd)
295 {
296 dataTypeT pattern;
297 deMemset(&pattern, 0xcd, sizeof(dataTypeT));
298
299 for (int i = 0; i < NUM_ELEMENTS / 2; i++)
300 {
301 m_ptr->inout[i] = static_cast<dataTypeT>(rnd.getUint64());
302 // The first half of compare elements match with every even index.
303 // The second half matches with odd indices. This causes the
304 // overlapping operations to only select one.
305 m_ptr->compare[i] = m_ptr->inout[i] + (i % 2);
306 m_ptr->compare[i + NUM_ELEMENTS / 2] = m_ptr->inout[i] + 1 - (i % 2);
307 }
308 for (int i = 0; i < NUM_ELEMENTS; i++)
309 {
310 m_ptr->input[i] = static_cast<dataTypeT>(rnd.getUint64());
311 m_ptr->output[i] = pattern;
312 m_ptr->invocationHitCount[i] = 0;
313 }
314 m_ptr->index = 0;
315
316 // Take a copy to be used when calculating expected values.
317 m_original = *m_ptr;
318 }
319
checkResults(tcu::ResultCollector & resultCollector)320 virtual void checkResults(tcu::ResultCollector &resultCollector)
321 {
322 checkOperation(m_original, *m_ptr, resultCollector);
323 }
324
325 template <typename T>
326 struct Expected
327 {
328 T m_inout;
329 T m_output[2];
330
Expectedvkt::shaderexecutor::__anon8f11a6c00111::TestBuffer::Expected331 Expected(T inout, T output0, T output1) : m_inout(inout)
332 {
333 m_output[0] = output0;
334 m_output[1] = output1;
335 }
336
comparevkt::shaderexecutor::__anon8f11a6c00111::TestBuffer::Expected337 bool compare(T inout, T output0, T output1)
338 {
339 return (deMemCmp((const void *)&m_inout, (const void *)&inout, sizeof(inout)) == 0 &&
340 deMemCmp((const void *)&m_output[0], (const void *)&output0, sizeof(output0)) == 0 &&
341 deMemCmp((const void *)&m_output[1], (const void *)&output1, sizeof(output1)) == 0);
342 }
343 };
344
345 void checkOperation(const BufferData<dataTypeT> &original, const BufferData<dataTypeT> &result,
346 tcu::ResultCollector &resultCollector);
347
348 const AtomicOperation m_atomicOp;
349
350 BufferData<dataTypeT> *m_ptr;
351 BufferData<dataTypeT> m_original;
352 };
353
354 template <typename T>
nanSafeSloppyEquals(T x,T y)355 bool nanSafeSloppyEquals(T x, T y)
356 {
357 if (deIsIEEENaN(x) && deIsIEEENaN(y))
358 return true;
359
360 if (deIsIEEENaN(x) || deIsIEEENaN(y))
361 return false;
362
363 return fabs(deToDouble(x) - deToDouble(y)) < 0.00001;
364 }
365
366 template <typename dataTypeT>
367 class TestBufferFloatingPoint : public BufferInterface
368 {
369 public:
TestBufferFloatingPoint(AtomicOperation atomicOp)370 TestBufferFloatingPoint(AtomicOperation atomicOp) : m_atomicOp(atomicOp)
371 {
372 }
373
374 template <typename T>
375 struct BufferDataFloatingPoint
376 {
377 // Use half the number of elements for inout to cause overlap between atomic operations.
378 // Each inout element at index i will have two atomic operations using input from
379 // indices i and i + NUM_ELEMENTS / 2.
380 T inout[NUM_ELEMENTS / 2];
381 T input[NUM_ELEMENTS];
382 T compare[NUM_ELEMENTS];
383 T output[NUM_ELEMENTS];
384 int32_t invocationHitCount[NUM_ELEMENTS];
385 int32_t index;
386 };
387
setBuffer(void * ptr)388 virtual void setBuffer(void *ptr)
389 {
390 m_ptr = static_cast<BufferDataFloatingPoint<dataTypeT> *>(ptr);
391 }
392
bufferSize()393 virtual size_t bufferSize()
394 {
395 return sizeof(BufferDataFloatingPoint<dataTypeT>);
396 }
397
fillWithTestData(de::Random & rnd)398 virtual void fillWithTestData(de::Random &rnd)
399 {
400 dataTypeT pattern;
401 deMemset(&pattern, 0xcd, sizeof(dataTypeT));
402
403 for (int i = 0; i < NUM_ELEMENTS / 2; i++)
404 {
405 m_ptr->inout[i] = deToFloatType<dataTypeT>(rnd.getFloat());
406 // These aren't used by any of the float tests
407 m_ptr->compare[i] = deToFloatType<dataTypeT>(0.0);
408 }
409 // Add special cases for NaN and +/-0
410 // 0: min(sNaN, x)
411 m_ptr->inout[0] = deSignalingNaN<dataTypeT>();
412 // 1: min(x, sNaN)
413 m_ptr->input[1 * 2 + 0] = deSignalingNaN<dataTypeT>();
414 // 2: min(qNaN, x)
415 m_ptr->inout[2] = deQuietNaN<dataTypeT>();
416 // 3: min(x, qNaN)
417 m_ptr->input[3 * 2 + 0] = deQuietNaN<dataTypeT>();
418 // 4: min(NaN, NaN)
419 m_ptr->inout[4] = deSignalingNaN<dataTypeT>();
420 m_ptr->input[4 * 2 + 0] = deQuietNaN<dataTypeT>();
421 m_ptr->input[4 * 2 + 1] = deQuietNaN<dataTypeT>();
422 // 5: min(+0, -0)
423 m_ptr->inout[5] = deToFloatType<dataTypeT>(-0.0);
424 m_ptr->input[5 * 2 + 0] = deToFloatType<dataTypeT>(0.0);
425 m_ptr->input[5 * 2 + 1] = deToFloatType<dataTypeT>(0.0);
426
427 for (int i = 0; i < NUM_ELEMENTS; i++)
428 {
429 m_ptr->input[i] = deToFloatType<dataTypeT>(rnd.getFloat());
430 m_ptr->output[i] = pattern;
431 m_ptr->invocationHitCount[i] = 0;
432 }
433
434 m_ptr->index = 0;
435
436 // Take a copy to be used when calculating expected values.
437 m_original = *m_ptr;
438 }
439
checkResults(tcu::ResultCollector & resultCollector)440 virtual void checkResults(tcu::ResultCollector &resultCollector)
441 {
442 checkOperationFloatingPoint(m_original, *m_ptr, resultCollector);
443 }
444
445 template <typename T>
446 struct Expected
447 {
448 T m_inout;
449 T m_output[2];
450
Expectedvkt::shaderexecutor::__anon8f11a6c00111::TestBufferFloatingPoint::Expected451 Expected(T inout, T output0, T output1) : m_inout(inout)
452 {
453 m_output[0] = output0;
454 m_output[1] = output1;
455 }
456
comparevkt::shaderexecutor::__anon8f11a6c00111::TestBufferFloatingPoint::Expected457 bool compare(T inout, T output0, T output1)
458 {
459 return nanSafeSloppyEquals(m_inout, inout) && nanSafeSloppyEquals(m_output[0], output0) &&
460 nanSafeSloppyEquals(m_output[1], output1);
461 }
462 };
463
464 void checkOperationFloatingPoint(const BufferDataFloatingPoint<dataTypeT> &original,
465 const BufferDataFloatingPoint<dataTypeT> &result,
466 tcu::ResultCollector &resultCollector);
467
468 const AtomicOperation m_atomicOp;
469
470 BufferDataFloatingPoint<dataTypeT> *m_ptr;
471 BufferDataFloatingPoint<dataTypeT> m_original;
472 };
473
createTestBuffer(DataType type,AtomicOperation atomicOp)474 static BufferInterface *createTestBuffer(DataType type, AtomicOperation atomicOp)
475 {
476 switch (type)
477 {
478 case DATA_TYPE_FLOAT16:
479 return new TestBufferFloatingPoint<deFloat16>(atomicOp);
480 case DATA_TYPE_INT32:
481 return new TestBuffer<int32_t>(atomicOp);
482 case DATA_TYPE_UINT32:
483 return new TestBuffer<uint32_t>(atomicOp);
484 case DATA_TYPE_FLOAT32:
485 return new TestBufferFloatingPoint<float>(atomicOp);
486 case DATA_TYPE_INT64:
487 return new TestBuffer<int64_t>(atomicOp);
488 case DATA_TYPE_UINT64:
489 return new TestBuffer<uint64_t>(atomicOp);
490 case DATA_TYPE_FLOAT64:
491 return new TestBufferFloatingPoint<double>(atomicOp);
492 default:
493 DE_ASSERT(false);
494 return DE_NULL;
495 }
496 }
497
498 // Use template to handle both signed and unsigned cases. SPIR-V should
499 // have separate operations for both.
500 template <typename T>
checkOperation(const BufferData<T> & original,const BufferData<T> & result,tcu::ResultCollector & resultCollector)501 void TestBuffer<T>::checkOperation(const BufferData<T> &original, const BufferData<T> &result,
502 tcu::ResultCollector &resultCollector)
503 {
504 // originalInout = original inout
505 // input0 = input at index i
506 // iinput1 = input at index i + NUM_ELEMENTS / 2
507 //
508 // atomic operation will return the memory contents before
509 // the operation and this is stored as output. Two operations
510 // are executed for each InOut value (using input0 and input1).
511 //
512 // Since there is an overlap of two operations per each
513 // InOut element, the outcome of the resulting InOut and
514 // the outputs of the operations have two result candidates
515 // depending on the execution order. Verification passes
516 // if the results match one of these options.
517
518 for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
519 {
520 // Needed when reinterpeting the data as signed values.
521 const T originalInout = *reinterpret_cast<const T *>(&original.inout[elementNdx]);
522 const T input0 = *reinterpret_cast<const T *>(&original.input[elementNdx]);
523 const T input1 = *reinterpret_cast<const T *>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
524
525 // Expected results are collected to this vector.
526 vector<Expected<T>> exp;
527
528 switch (m_atomicOp)
529 {
530 case ATOMIC_OP_ADD:
531 {
532 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout, originalInout + input0));
533 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout + input1, originalInout));
534 }
535 break;
536
537 case ATOMIC_OP_AND:
538 {
539 exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout, originalInout & input0));
540 exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout & input1, originalInout));
541 }
542 break;
543
544 case ATOMIC_OP_OR:
545 {
546 exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout, originalInout | input0));
547 exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout | input1, originalInout));
548 }
549 break;
550
551 case ATOMIC_OP_XOR:
552 {
553 exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout, originalInout ^ input0));
554 exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout ^ input1, originalInout));
555 }
556 break;
557
558 case ATOMIC_OP_MIN:
559 {
560 exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), originalInout,
561 de::min(originalInout, input0)));
562 exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), de::min(originalInout, input1),
563 originalInout));
564 }
565 break;
566
567 case ATOMIC_OP_MAX:
568 {
569 exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), originalInout,
570 de::max(originalInout, input0)));
571 exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), de::max(originalInout, input1),
572 originalInout));
573 }
574 break;
575
576 case ATOMIC_OP_EXCHANGE:
577 {
578 exp.push_back(Expected<T>(input1, originalInout, input0));
579 exp.push_back(Expected<T>(input0, input1, originalInout));
580 }
581 break;
582
583 case ATOMIC_OP_COMP_SWAP:
584 {
585 if (elementNdx % 2 == 0)
586 {
587 exp.push_back(Expected<T>(input0, originalInout, input0));
588 exp.push_back(Expected<T>(input0, originalInout, originalInout));
589 }
590 else
591 {
592 exp.push_back(Expected<T>(input1, input1, originalInout));
593 exp.push_back(Expected<T>(input1, originalInout, originalInout));
594 }
595 }
596 break;
597
598 default:
599 DE_FATAL("Unexpected atomic operation.");
600 break;
601 }
602
603 const T resIo = result.inout[elementNdx];
604 const T resOutput0 = result.output[elementNdx];
605 const T resOutput1 = result.output[elementNdx + NUM_ELEMENTS / 2];
606
607 if (!exp[0].compare(resIo, resOutput0, resOutput1) && !exp[1].compare(resIo, resOutput0, resOutput1))
608 {
609 std::ostringstream errorMessage;
610 errorMessage << "ERROR: Result value check failed at index " << elementNdx
611 << ". Expected one of the two outcomes: InOut = " << tcu::toHex(exp[0].m_inout)
612 << ", Output0 = " << tcu::toHex(exp[0].m_output[0])
613 << ", Output1 = " << tcu::toHex(exp[0].m_output[1])
614 << ", or InOut = " << tcu::toHex(exp[1].m_inout)
615 << ", Output0 = " << tcu::toHex(exp[1].m_output[0])
616 << ", Output1 = " << tcu::toHex(exp[1].m_output[1]) << ". Got: InOut = " << tcu::toHex(resIo)
617 << ", Output0 = " << tcu::toHex(resOutput0) << ", Output1 = " << tcu::toHex(resOutput1)
618 << ". Using Input0 = " << tcu::toHex(original.input[elementNdx])
619 << " and Input1 = " << tcu::toHex(original.input[elementNdx + NUM_ELEMENTS / 2]) << ".";
620
621 resultCollector.fail(errorMessage.str());
622 }
623 }
624 }
625
626 template <typename T>
handleExceptionalFloatMinMaxValues(vector<T> & values,T x,T y)627 void handleExceptionalFloatMinMaxValues(vector<T> &values, T x, T y)
628 {
629
630 if (deIsSignalingNaN(x) && deIsSignalingNaN(y))
631 {
632 values.push_back(deQuietNaN<T>());
633 values.push_back(deSignalingNaN<T>());
634 }
635 else if (deIsSignalingNaN(x))
636 {
637 values.push_back(deQuietNaN<T>());
638 values.push_back(deSignalingNaN<T>());
639 if (!deIsIEEENaN(y))
640 values.push_back(y);
641 }
642 else if (deIsSignalingNaN(y))
643 {
644 values.push_back(deQuietNaN<T>());
645 values.push_back(deSignalingNaN<T>());
646 if (!deIsIEEENaN(x))
647 values.push_back(x);
648 }
649 else if (deIsIEEENaN(x) && deIsIEEENaN(y))
650 {
651 // Both quiet NaNs
652 values.push_back(deQuietNaN<T>());
653 }
654 else if (deIsIEEENaN(x))
655 {
656 // One quiet NaN and one non-NaN.
657 values.push_back(y);
658 }
659 else if (deIsIEEENaN(y))
660 {
661 // One quiet NaN and one non-NaN.
662 values.push_back(x);
663 }
664 else if ((deIsPositiveZero(x) && deIsNegativeZero(y)) || (deIsNegativeZero(x) && deIsPositiveZero(y)))
665 {
666 values.push_back(deToFloatType<T>(0.0));
667 values.push_back(deToFloatType<T>(-0.0));
668 }
669 }
670
671 template <typename T>
floatAdd(T x,T y)672 T floatAdd(T x, T y)
673 {
674 if (deIsIEEENaN(x) || deIsIEEENaN(y))
675 return deQuietNaN<T>();
676 return deToFloatType<T>(deToDouble(x) + deToDouble(y));
677 }
678
679 template <typename T>
floatMinValues(T x,T y)680 vector<T> floatMinValues(T x, T y)
681 {
682 vector<T> values;
683 handleExceptionalFloatMinMaxValues(values, x, y);
684 if (values.empty())
685 {
686 values.push_back(deToDouble(x) < deToDouble(y) ? x : y);
687 }
688 return values;
689 }
690
691 template <typename T>
floatMaxValues(T x,T y)692 vector<T> floatMaxValues(T x, T y)
693 {
694 vector<T> values;
695 handleExceptionalFloatMinMaxValues(values, x, y);
696 if (values.empty())
697 {
698 values.push_back(deToDouble(x) > deToDouble(y) ? x : y);
699 }
700 return values;
701 }
702
703 // Use template to handle both float and double cases. SPIR-V should
704 // have separate operations for both.
705 template <typename T>
checkOperationFloatingPoint(const BufferDataFloatingPoint<T> & original,const BufferDataFloatingPoint<T> & result,tcu::ResultCollector & resultCollector)706 void TestBufferFloatingPoint<T>::checkOperationFloatingPoint(const BufferDataFloatingPoint<T> &original,
707 const BufferDataFloatingPoint<T> &result,
708 tcu::ResultCollector &resultCollector)
709 {
710 // originalInout = original inout
711 // input0 = input at index i
712 // iinput1 = input at index i + NUM_ELEMENTS / 2
713 //
714 // atomic operation will return the memory contents before
715 // the operation and this is stored as output. Two operations
716 // are executed for each InOut value (using input0 and input1).
717 //
718 // Since there is an overlap of two operations per each
719 // InOut element, the outcome of the resulting InOut and
720 // the outputs of the operations have two result candidates
721 // depending on the execution order. Verification passes
722 // if the results match one of these options.
723
724 for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
725 {
726 // Needed when reinterpeting the data as signed values.
727 const T originalInout = *reinterpret_cast<const T *>(&original.inout[elementNdx]);
728 const T input0 = *reinterpret_cast<const T *>(&original.input[elementNdx]);
729 const T input1 = *reinterpret_cast<const T *>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
730
731 // Expected results are collected to this vector.
732 vector<Expected<T>> exp;
733
734 switch (m_atomicOp)
735 {
736 case ATOMIC_OP_ADD:
737 {
738 exp.push_back(Expected<T>(floatAdd(floatAdd(originalInout, input0), input1), originalInout,
739 floatAdd(originalInout, input0)));
740 exp.push_back(Expected<T>(floatAdd(floatAdd(originalInout, input1), input0),
741 floatAdd(originalInout, input1), originalInout));
742 }
743 break;
744
745 case ATOMIC_OP_MIN:
746 {
747 // The case where input0 is combined first
748 vector<T> minOriginalAndInput0 = floatMinValues(originalInout, input0);
749 for (T x : minOriginalAndInput0)
750 {
751 vector<T> minAll = floatMinValues(x, input1);
752 for (T y : minAll)
753 {
754 exp.push_back(Expected<T>(y, originalInout, x));
755 }
756 }
757
758 // The case where input1 is combined first
759 vector<T> minOriginalAndInput1 = floatMinValues(originalInout, input1);
760 for (T x : minOriginalAndInput1)
761 {
762 vector<T> minAll = floatMinValues(x, input0);
763 for (T y : minAll)
764 {
765 exp.push_back(Expected<T>(y, x, originalInout));
766 }
767 }
768 }
769 break;
770
771 case ATOMIC_OP_MAX:
772 {
773 // The case where input0 is combined first
774 vector<T> minOriginalAndInput0 = floatMaxValues(originalInout, input0);
775 for (T x : minOriginalAndInput0)
776 {
777 vector<T> minAll = floatMaxValues(x, input1);
778 for (T y : minAll)
779 {
780 exp.push_back(Expected<T>(y, originalInout, x));
781 }
782 }
783
784 // The case where input1 is combined first
785 vector<T> minOriginalAndInput1 = floatMaxValues(originalInout, input1);
786 for (T x : minOriginalAndInput1)
787 {
788 vector<T> minAll = floatMaxValues(x, input0);
789 for (T y : minAll)
790 {
791 exp.push_back(Expected<T>(y, x, originalInout));
792 }
793 }
794 }
795 break;
796
797 case ATOMIC_OP_EXCHANGE:
798 {
799 exp.push_back(Expected<T>(input1, originalInout, input0));
800 exp.push_back(Expected<T>(input0, input1, originalInout));
801 }
802 break;
803
804 default:
805 DE_FATAL("Unexpected atomic operation.");
806 break;
807 }
808
809 const T resIo = result.inout[elementNdx];
810 const T resOutput0 = result.output[elementNdx];
811 const T resOutput1 = result.output[elementNdx + NUM_ELEMENTS / 2];
812
813 bool hasMatch = false;
814 for (Expected<T> e : exp)
815 {
816 if (e.compare(resIo, resOutput0, resOutput1))
817 {
818 hasMatch = true;
819 break;
820 }
821 }
822 if (!hasMatch)
823 {
824 std::ostringstream errorMessage;
825 errorMessage << "ERROR: Result value check failed at index " << elementNdx
826 << ". Expected one of the outcomes:";
827
828 bool first = true;
829 for (Expected<T> e : exp)
830 {
831 if (!first)
832 errorMessage << ", or";
833 first = false;
834
835 errorMessage << " InOut = " << e.m_inout << ", Output0 = " << e.m_output[0]
836 << ", Output1 = " << e.m_output[1];
837 }
838
839 errorMessage << ". Got: InOut = " << resIo << ", Output0 = " << resOutput0 << ", Output1 = " << resOutput1
840 << ". Using Input0 = " << original.input[elementNdx]
841 << " and Input1 = " << original.input[elementNdx + NUM_ELEMENTS / 2] << ".";
842
843 resultCollector.fail(errorMessage.str());
844 }
845 }
846 }
847
848 class AtomicOperationCaseInstance : public TestInstance
849 {
850 public:
851 AtomicOperationCaseInstance(Context &context, const ShaderSpec &shaderSpec, AtomicShaderType shaderType,
852 DataType dataType, AtomicOperation atomicOp);
853
854 virtual tcu::TestStatus iterate(void);
855
856 private:
857 const ShaderSpec &m_shaderSpec;
858 AtomicShaderType m_shaderType;
859 const DataType m_dataType;
860 AtomicOperation m_atomicOp;
861 };
862
AtomicOperationCaseInstance(Context & context,const ShaderSpec & shaderSpec,AtomicShaderType shaderType,DataType dataType,AtomicOperation atomicOp)863 AtomicOperationCaseInstance::AtomicOperationCaseInstance(Context &context, const ShaderSpec &shaderSpec,
864 AtomicShaderType shaderType, DataType dataType,
865 AtomicOperation atomicOp)
866 : TestInstance(context)
867 , m_shaderSpec(shaderSpec)
868 , m_shaderType(shaderType)
869 , m_dataType(dataType)
870 , m_atomicOp(atomicOp)
871 {
872 }
873
iterate(void)874 tcu::TestStatus AtomicOperationCaseInstance::iterate(void)
875 {
876 de::UniquePtr<BufferInterface> testBuffer(createTestBuffer(m_dataType, m_atomicOp));
877 tcu::TestLog &log = m_context.getTestContext().getLog();
878 const DeviceInterface &vkd = m_context.getDeviceInterface();
879 const VkDevice device = m_context.getDevice();
880 de::Random rnd(0x62a15e34);
881 const bool useRef = (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE);
882 const VkDescriptorType descType = (useRef ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
883 const VkBufferUsageFlags usageFlags =
884 (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
885 (useRef ? static_cast<VkBufferUsageFlags>(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) : 0u));
886
887 // The main buffer will hold test data. When using buffer references, the buffer's address will be indirectly passed as part of
888 // a uniform buffer. If not, it will be passed directly as a descriptor.
889 Buffer buffer(m_context, usageFlags, testBuffer->bufferSize(), useRef);
890 std::unique_ptr<Buffer> auxBuffer;
891
892 if (useRef)
893 {
894 // Pass the main buffer address inside a uniform buffer.
895 const VkBufferDeviceAddressInfo addressInfo = {
896 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType sType;
897 nullptr, // const void* pNext;
898 buffer.getBuffer(), // VkBuffer buffer;
899 };
900 const auto address = vkd.getBufferDeviceAddress(device, &addressInfo);
901
902 auxBuffer.reset(new Buffer(m_context, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, sizeof(address), false));
903 deMemcpy(auxBuffer->getHostPtr(), &address, sizeof(address));
904 auxBuffer->flush();
905 }
906
907 testBuffer->setBuffer(buffer.getHostPtr());
908 testBuffer->fillWithTestData(rnd);
909
910 buffer.flush();
911
912 Move<VkDescriptorSetLayout> extraResourcesLayout;
913 Move<VkDescriptorPool> extraResourcesSetPool;
914 Move<VkDescriptorSet> extraResourcesSet;
915
916 const VkDescriptorSetLayoutBinding bindings[] = {{0u, descType, 1, VK_SHADER_STAGE_ALL, DE_NULL}};
917
918 const VkDescriptorSetLayoutCreateInfo layoutInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, DE_NULL,
919 (VkDescriptorSetLayoutCreateFlags)0u,
920 DE_LENGTH_OF_ARRAY(bindings), bindings};
921
922 extraResourcesLayout = createDescriptorSetLayout(vkd, device, &layoutInfo);
923
924 const VkDescriptorPoolSize poolSizes[] = {{descType, 1u}};
925
926 const VkDescriptorPoolCreateInfo poolInfo = {
927 VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
928 DE_NULL,
929 (VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
930 1u, // maxSets
931 DE_LENGTH_OF_ARRAY(poolSizes),
932 poolSizes};
933
934 extraResourcesSetPool = createDescriptorPool(vkd, device, &poolInfo);
935
936 const VkDescriptorSetAllocateInfo allocInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, DE_NULL,
937 *extraResourcesSetPool, 1u, &extraResourcesLayout.get()};
938
939 extraResourcesSet = allocateDescriptorSet(vkd, device, &allocInfo);
940
941 VkDescriptorBufferInfo bufferInfo;
942 bufferInfo.buffer = (useRef ? auxBuffer->getBuffer() : buffer.getBuffer());
943 bufferInfo.offset = 0u;
944 bufferInfo.range = VK_WHOLE_SIZE;
945
946 const VkWriteDescriptorSet descriptorWrite = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
947 DE_NULL,
948 *extraResourcesSet,
949 0u, // dstBinding
950 0u, // dstArrayElement
951 1u,
952 descType,
953 (const VkDescriptorImageInfo *)DE_NULL,
954 &bufferInfo,
955 (const VkBufferView *)DE_NULL};
956
957 vkd.updateDescriptorSets(device, 1u, &descriptorWrite, 0u, DE_NULL);
958
959 // Storage for output varying data.
960 std::vector<uint32_t> outputs(NUM_ELEMENTS);
961 std::vector<void *> outputPtr(NUM_ELEMENTS);
962
963 for (size_t i = 0; i < NUM_ELEMENTS; i++)
964 {
965 outputs[i] = 0xcdcdcdcd;
966 outputPtr[i] = &outputs[i];
967 }
968
969 const int numWorkGroups = (m_shaderType.isSharedLike() ? 1 : static_cast<int>(NUM_ELEMENTS));
970 UniquePtr<ShaderExecutor> executor(
971 createExecutor(m_context, m_shaderType.getType(), m_shaderSpec, *extraResourcesLayout));
972
973 executor->execute(numWorkGroups, DE_NULL, &outputPtr[0], *extraResourcesSet);
974 buffer.invalidate();
975
976 tcu::ResultCollector resultCollector(log);
977
978 // Check the results of the atomic operation
979 testBuffer->checkResults(resultCollector);
980
981 return tcu::TestStatus(resultCollector.getResult(), resultCollector.getMessage());
982 }
983
984 class AtomicOperationCase : public TestCase
985 {
986 public:
987 AtomicOperationCase(tcu::TestContext &testCtx, const char *name, AtomicShaderType type, DataType dataType,
988 AtomicOperation atomicOp);
989 virtual ~AtomicOperationCase(void);
990
991 virtual TestInstance *createInstance(Context &ctx) const;
992 virtual void checkSupport(Context &ctx) const;
initPrograms(vk::SourceCollections & programCollection) const993 virtual void initPrograms(vk::SourceCollections &programCollection) const
994 {
995 const bool useSpv14 = m_shaderType.isMeshShadingStage();
996 const auto spvVersion = (useSpv14 ? vk::SPIRV_VERSION_1_4 : vk::SPIRV_VERSION_1_0);
997 const ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, spvVersion, 0u, useSpv14);
998 ShaderSpec sourcesSpec(m_shaderSpec);
999
1000 sourcesSpec.buildOptions = buildOptions;
1001 generateSources(m_shaderType.getType(), sourcesSpec, programCollection);
1002 }
1003
1004 private:
1005 void createShaderSpec();
1006 ShaderSpec m_shaderSpec;
1007 const AtomicShaderType m_shaderType;
1008 const DataType m_dataType;
1009 const AtomicOperation m_atomicOp;
1010 };
1011
AtomicOperationCase(tcu::TestContext & testCtx,const char * name,AtomicShaderType shaderType,DataType dataType,AtomicOperation atomicOp)1012 AtomicOperationCase::AtomicOperationCase(tcu::TestContext &testCtx, const char *name, AtomicShaderType shaderType,
1013 DataType dataType, AtomicOperation atomicOp)
1014 : TestCase(testCtx, name)
1015 , m_shaderType(shaderType)
1016 , m_dataType(dataType)
1017 , m_atomicOp(atomicOp)
1018 {
1019 createShaderSpec();
1020 init();
1021 }
1022
~AtomicOperationCase(void)1023 AtomicOperationCase::~AtomicOperationCase(void)
1024 {
1025 }
1026
createInstance(Context & ctx) const1027 TestInstance *AtomicOperationCase::createInstance(Context &ctx) const
1028 {
1029 return new AtomicOperationCaseInstance(ctx, m_shaderSpec, m_shaderType, m_dataType, m_atomicOp);
1030 }
1031
checkSupport(Context & ctx) const1032 void AtomicOperationCase::checkSupport(Context &ctx) const
1033 {
1034 if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1035 {
1036 ctx.requireDeviceFunctionality("VK_KHR_shader_atomic_int64");
1037
1038 const auto atomicInt64Features = ctx.getShaderAtomicInt64Features();
1039 const bool isSharedMemory = m_shaderType.isSharedLike();
1040
1041 if (!isSharedMemory && atomicInt64Features.shaderBufferInt64Atomics == VK_FALSE)
1042 {
1043 TCU_THROW(NotSupportedError,
1044 "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for buffers");
1045 }
1046 if (isSharedMemory && atomicInt64Features.shaderSharedInt64Atomics == VK_FALSE)
1047 {
1048 TCU_THROW(NotSupportedError,
1049 "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for shared memory");
1050 }
1051 }
1052
1053 if (m_dataType == DATA_TYPE_FLOAT16)
1054 {
1055 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1056 #ifndef CTS_USES_VULKANSC
1057 if (m_atomicOp == ATOMIC_OP_ADD)
1058 {
1059 if (m_shaderType.isSharedLike())
1060 {
1061 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16AtomicAdd)
1062 {
1063 TCU_THROW(NotSupportedError,
1064 "VkShaderAtomicFloat16: 16-bit floating point shared add atomic operation not supported");
1065 }
1066 }
1067 else
1068 {
1069 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16AtomicAdd)
1070 {
1071 TCU_THROW(NotSupportedError,
1072 "VkShaderAtomicFloat16: 16-bit floating point buffer add atomic operation not supported");
1073 }
1074 }
1075 }
1076 if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1077 {
1078 if (m_shaderType.isSharedLike())
1079 {
1080 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16AtomicMinMax)
1081 {
1082 TCU_THROW(
1083 NotSupportedError,
1084 "VkShaderAtomicFloat16: 16-bit floating point shared min/max atomic operation not supported");
1085 }
1086 }
1087 else
1088 {
1089 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16AtomicMinMax)
1090 {
1091 TCU_THROW(
1092 NotSupportedError,
1093 "VkShaderAtomicFloat16: 16-bit floating point buffer min/max atomic operation not supported");
1094 }
1095 }
1096 }
1097 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1098 {
1099 if (m_shaderType.isSharedLike())
1100 {
1101 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16Atomics)
1102 {
1103 TCU_THROW(NotSupportedError,
1104 "VkShaderAtomicFloat16: 16-bit floating point shared atomic operations not supported");
1105 }
1106 }
1107 else
1108 {
1109 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16Atomics)
1110 {
1111 TCU_THROW(NotSupportedError,
1112 "VkShaderAtomicFloat16: 16-bit floating point buffer atomic operations not supported");
1113 }
1114 }
1115 }
1116 #endif // CTS_USES_VULKANSC
1117 }
1118
1119 if (m_dataType == DATA_TYPE_FLOAT32)
1120 {
1121 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
1122 if (m_atomicOp == ATOMIC_OP_ADD)
1123 {
1124 if (m_shaderType.isSharedLike())
1125 {
1126 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32AtomicAdd)
1127 {
1128 TCU_THROW(NotSupportedError,
1129 "VkShaderAtomicFloat32: 32-bit floating point shared add atomic operation not supported");
1130 }
1131 }
1132 else
1133 {
1134 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32AtomicAdd)
1135 {
1136 TCU_THROW(NotSupportedError,
1137 "VkShaderAtomicFloat32: 32-bit floating point buffer add atomic operation not supported");
1138 }
1139 }
1140 }
1141 if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1142 {
1143 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1144 #ifndef CTS_USES_VULKANSC
1145 if (m_shaderType.isSharedLike())
1146 {
1147 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat32AtomicMinMax)
1148 {
1149 TCU_THROW(
1150 NotSupportedError,
1151 "VkShaderAtomicFloat32: 32-bit floating point shared min/max atomic operation not supported");
1152 }
1153 }
1154 else
1155 {
1156 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat32AtomicMinMax)
1157 {
1158 TCU_THROW(
1159 NotSupportedError,
1160 "VkShaderAtomicFloat32: 32-bit floating point buffer min/max atomic operation not supported");
1161 }
1162 }
1163 #endif // CTS_USES_VULKANSC
1164 }
1165 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1166 {
1167 if (m_shaderType.isSharedLike())
1168 {
1169 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32Atomics)
1170 {
1171 TCU_THROW(NotSupportedError,
1172 "VkShaderAtomicFloat32: 32-bit floating point shared atomic operations not supported");
1173 }
1174 }
1175 else
1176 {
1177 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32Atomics)
1178 {
1179 TCU_THROW(NotSupportedError,
1180 "VkShaderAtomicFloat32: 32-bit floating point buffer atomic operations not supported");
1181 }
1182 }
1183 }
1184 }
1185
1186 if (m_dataType == DATA_TYPE_FLOAT64)
1187 {
1188 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
1189 if (m_atomicOp == ATOMIC_OP_ADD)
1190 {
1191 if (m_shaderType.isSharedLike())
1192 {
1193 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64AtomicAdd)
1194 {
1195 TCU_THROW(NotSupportedError,
1196 "VkShaderAtomicFloat64: 64-bit floating point shared add atomic operation not supported");
1197 }
1198 }
1199 else
1200 {
1201 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64AtomicAdd)
1202 {
1203 TCU_THROW(NotSupportedError,
1204 "VkShaderAtomicFloat64: 64-bit floating point buffer add atomic operation not supported");
1205 }
1206 }
1207 }
1208 if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1209 {
1210 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1211 #ifndef CTS_USES_VULKANSC
1212 if (m_shaderType.isSharedLike())
1213 {
1214 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat64AtomicMinMax)
1215 {
1216 TCU_THROW(
1217 NotSupportedError,
1218 "VkShaderAtomicFloat64: 64-bit floating point shared min/max atomic operation not supported");
1219 }
1220 }
1221 else
1222 {
1223 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat64AtomicMinMax)
1224 {
1225 TCU_THROW(
1226 NotSupportedError,
1227 "VkShaderAtomicFloat64: 64-bit floating point buffer min/max atomic operation not supported");
1228 }
1229 }
1230 #endif // CTS_USES_VULKANSC
1231 }
1232 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1233 {
1234 if (m_shaderType.isSharedLike())
1235 {
1236 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64Atomics)
1237 {
1238 TCU_THROW(NotSupportedError,
1239 "VkShaderAtomicFloat64: 64-bit floating point shared atomic operations not supported");
1240 }
1241 }
1242 else
1243 {
1244 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64Atomics)
1245 {
1246 TCU_THROW(NotSupportedError,
1247 "VkShaderAtomicFloat64: 64-bit floating point buffer atomic operations not supported");
1248 }
1249 }
1250 }
1251 }
1252
1253 if (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE)
1254 {
1255 ctx.requireDeviceFunctionality("VK_KHR_buffer_device_address");
1256 }
1257
1258 checkSupportShader(ctx, m_shaderType.getType());
1259 }
1260
createShaderSpec(void)1261 void AtomicOperationCase::createShaderSpec(void)
1262 {
1263 const AtomicMemoryType memoryType = m_shaderType.getMemoryType();
1264 const bool isSharedLike = m_shaderType.isSharedLike();
1265
1266 // Global declarations.
1267 std::ostringstream shaderTemplateGlobalStream;
1268
1269 // Structure in use for atomic operations.
1270 shaderTemplateGlobalStream << "${EXTENSIONS}\n"
1271 << "\n"
1272 << "struct AtomicStruct\n"
1273 << "{\n"
1274 << " ${DATATYPE} inoutValues[${N}/2];\n"
1275 << " ${DATATYPE} inputValues[${N}];\n"
1276 << " ${DATATYPE} compareValues[${N}];\n"
1277 << " ${DATATYPE} outputValues[${N}];\n"
1278 << " int invocationHitCount[${N}];\n"
1279 << " int index;\n"
1280 << "};\n"
1281 << "\n";
1282
1283 // The name dance and declarations below will make sure the structure that will be used with atomic operations can be accessed
1284 // as "buf.data", which is the name used in the atomic operation statements.
1285 //
1286 // * When using a buffer directly, RESULT_BUFFER_NAME will be "buf" and the inner struct will be "data".
1287 // * When using a workgroup-shared global variable, the "data" struct will be nested in an auxiliar "buf" struct.
1288 // * When using buffer references, the uniform buffer reference will be called "buf" and its contents "data".
1289 //
1290 if (memoryType != AtomicMemoryType::REFERENCE)
1291 {
1292 shaderTemplateGlobalStream << "layout (set = ${SETIDX}, binding = 0) buffer AtomicBuffer {\n"
1293 << " AtomicStruct data;\n"
1294 << "} ${RESULT_BUFFER_NAME};\n"
1295 << "\n";
1296
1297 // When using global shared memory in the compute, task or mesh variants, invocations will use a shared global structure
1298 // instead of a descriptor set as the sources and results of each tested operation.
1299 if (memoryType == AtomicMemoryType::SHARED)
1300 {
1301 shaderTemplateGlobalStream << "shared struct { AtomicStruct data; } buf;\n"
1302 << "\n";
1303 }
1304 else if (memoryType == AtomicMemoryType::PAYLOAD)
1305 {
1306 shaderTemplateGlobalStream << "struct TaskData { AtomicStruct data; };\n"
1307 << "taskPayloadSharedEXT TaskData buf;\n";
1308 }
1309 }
1310 else
1311 {
1312 shaderTemplateGlobalStream << "layout (buffer_reference) buffer AtomicBuffer {\n"
1313 << " AtomicStruct data;\n"
1314 << "};\n"
1315 << "\n"
1316 << "layout (set = ${SETIDX}, binding = 0) uniform References {\n"
1317 << " AtomicBuffer buf;\n"
1318 << "};\n"
1319 << "\n";
1320 }
1321
1322 const auto shaderTemplateGlobalString = shaderTemplateGlobalStream.str();
1323 const tcu::StringTemplate shaderTemplateGlobal(shaderTemplateGlobalString);
1324
1325 // Shader body for the non-vertex case.
1326 std::ostringstream nonVertexShaderTemplateStream;
1327
1328 if (isSharedLike)
1329 {
1330 // Invocation zero will initialize the shared structure from the descriptor set.
1331 nonVertexShaderTemplateStream << "if (gl_LocalInvocationIndex == 0u)\n"
1332 << "{\n"
1333 << " buf.data = ${RESULT_BUFFER_NAME}.data;\n"
1334 << "}\n"
1335 << "barrier();\n";
1336 }
1337
1338 if (m_shaderType.getType() == glu::SHADERTYPE_FRAGMENT)
1339 {
1340 nonVertexShaderTemplateStream << "if (!gl_HelperInvocation) {\n"
1341 << " int idx = atomicAdd(buf.data.index, 1);\n"
1342 << " buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % "
1343 "(${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1344 << "}\n";
1345 }
1346 else
1347 {
1348 nonVertexShaderTemplateStream << "if (atomicAdd(buf.data.invocationHitCount[0], 1) < ${N})\n"
1349 << "{\n"
1350 << " int idx = atomicAdd(buf.data.index, 1);\n"
1351 << " buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % "
1352 "(${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1353 << "}\n";
1354 }
1355
1356 if (isSharedLike)
1357 {
1358 // Invocation zero will copy results back to the descriptor set.
1359 nonVertexShaderTemplateStream << "barrier();\n"
1360 << "if (gl_LocalInvocationIndex == 0u)\n"
1361 << "{\n"
1362 << " ${RESULT_BUFFER_NAME}.data = buf.data;\n"
1363 << "}\n";
1364 }
1365
1366 const auto nonVertexShaderTemplateStreamStr = nonVertexShaderTemplateStream.str();
1367 const tcu::StringTemplate nonVertexShaderTemplateSrc(nonVertexShaderTemplateStreamStr);
1368
1369 // Shader body for the vertex case.
1370 const tcu::StringTemplate vertexShaderTemplateSrc(
1371 "int idx = gl_VertexIndex;\n"
1372 "if (atomicAdd(buf.data.invocationHitCount[idx], 1) == 0)\n"
1373 "{\n"
1374 " buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], "
1375 "${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1376 "}\n");
1377
1378 // Extensions.
1379 std::ostringstream extensions;
1380
1381 if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1382 {
1383 extensions << "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
1384 << "#extension GL_EXT_shader_atomic_int64 : enable\n";
1385 }
1386 else if ((m_dataType == DATA_TYPE_FLOAT16) || (m_dataType == DATA_TYPE_FLOAT32) ||
1387 (m_dataType == DATA_TYPE_FLOAT64))
1388 {
1389 extensions << "#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable\n"
1390 << "#extension GL_EXT_shader_atomic_float : enable\n"
1391 << "#extension GL_EXT_shader_atomic_float2 : enable\n"
1392 << "#extension GL_KHR_memory_scope_semantics : enable\n";
1393 }
1394
1395 if (memoryType == AtomicMemoryType::REFERENCE)
1396 {
1397 extensions << "#extension GL_EXT_buffer_reference : require\n";
1398 }
1399
1400 // Specializations.
1401 std::map<std::string, std::string> specializations;
1402
1403 specializations["EXTENSIONS"] = extensions.str();
1404 specializations["DATATYPE"] = dataType2Str(m_dataType);
1405 specializations["ATOMICOP"] = atomicOp2Str(m_atomicOp);
1406 specializations["SETIDX"] = de::toString((int)EXTRA_RESOURCES_DESCRIPTOR_SET_INDEX);
1407 specializations["N"] = de::toString((int)NUM_ELEMENTS);
1408 specializations["COMPARE_ARG"] = ((m_atomicOp == ATOMIC_OP_COMP_SWAP) ? "buf.data.compareValues[idx], " : "");
1409 specializations["RESULT_BUFFER_NAME"] = (isSharedLike ? "result" : "buf");
1410
1411 // Shader spec.
1412 m_shaderSpec.outputs.push_back(Symbol("outData", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
1413 m_shaderSpec.glslVersion = glu::GLSL_VERSION_450;
1414 m_shaderSpec.globalDeclarations = shaderTemplateGlobal.specialize(specializations);
1415 m_shaderSpec.source =
1416 ((m_shaderType.getType() == glu::SHADERTYPE_VERTEX) ? vertexShaderTemplateSrc.specialize(specializations) :
1417 nonVertexShaderTemplateSrc.specialize(specializations));
1418
1419 if (isSharedLike)
1420 {
1421 // When using global shared memory, use a single workgroup and an appropriate number of local invocations.
1422 m_shaderSpec.localSizeX = static_cast<int>(NUM_ELEMENTS);
1423 }
1424 }
1425
addAtomicOperationTests(tcu::TestCaseGroup * atomicOperationTestsGroup)1426 void addAtomicOperationTests(tcu::TestCaseGroup *atomicOperationTestsGroup)
1427 {
1428 tcu::TestContext &testCtx = atomicOperationTestsGroup->getTestContext();
1429
1430 static const struct
1431 {
1432 glu::ShaderType type;
1433 const char *name;
1434 } shaderTypes[] = {
1435 {glu::SHADERTYPE_VERTEX, "vertex"},
1436 {glu::SHADERTYPE_FRAGMENT, "fragment"},
1437 {glu::SHADERTYPE_GEOMETRY, "geometry"},
1438 {glu::SHADERTYPE_TESSELLATION_CONTROL, "tess_ctrl"},
1439 {glu::SHADERTYPE_TESSELLATION_EVALUATION, "tess_eval"},
1440 {glu::SHADERTYPE_COMPUTE, "compute"},
1441 {glu::SHADERTYPE_TASK, "task"},
1442 {glu::SHADERTYPE_MESH, "mesh"},
1443 };
1444
1445 static const struct
1446 {
1447 AtomicMemoryType type;
1448 const char *suffix;
1449 } kMemoryTypes[] = {
1450 {AtomicMemoryType::BUFFER, ""},
1451 {AtomicMemoryType::SHARED, "_shared"},
1452 {AtomicMemoryType::REFERENCE, "_reference"},
1453 {AtomicMemoryType::PAYLOAD, "_payload"},
1454 };
1455
1456 static const struct
1457 {
1458 DataType dataType;
1459 const char *name;
1460 } dataSign[] = {
1461 #ifndef CTS_USES_VULKANSC
1462 // Tests using 16-bit float data
1463 {DATA_TYPE_FLOAT16, "float16"},
1464 #endif // CTS_USES_VULKANSC
1465 // Tests using signed data (int)
1466 {DATA_TYPE_INT32, "signed"},
1467 // Tests using unsigned data (uint)
1468 {DATA_TYPE_UINT32, "unsigned"},
1469 // Tests using 32-bit float data
1470 {DATA_TYPE_FLOAT32, "float32"},
1471 // Tests using 64 bit signed data (int64)
1472 {DATA_TYPE_INT64, "signed64bit"},
1473 // Tests using 64 bit unsigned data (uint64)
1474 {DATA_TYPE_UINT64, "unsigned64bit"},
1475 // Tests using 64-bit float data)
1476 {DATA_TYPE_FLOAT64, "float64"}};
1477
1478 static const struct
1479 {
1480 AtomicOperation value;
1481 const char *name;
1482 } atomicOp[] = {{ATOMIC_OP_EXCHANGE, "exchange"},
1483 {ATOMIC_OP_COMP_SWAP, "comp_swap"},
1484 {ATOMIC_OP_ADD, "add"},
1485 {ATOMIC_OP_MIN, "min"},
1486 {ATOMIC_OP_MAX, "max"},
1487 {ATOMIC_OP_AND, "and"},
1488 {ATOMIC_OP_OR, "or"},
1489 {ATOMIC_OP_XOR, "xor"}};
1490
1491 for (int opNdx = 0; opNdx < DE_LENGTH_OF_ARRAY(atomicOp); opNdx++)
1492 {
1493 for (int signNdx = 0; signNdx < DE_LENGTH_OF_ARRAY(dataSign); signNdx++)
1494 {
1495 for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(shaderTypes); shaderTypeNdx++)
1496 {
1497 // Only ADD and EXCHANGE are supported on floating-point
1498 if (dataSign[signNdx].dataType == DATA_TYPE_FLOAT16 ||
1499 dataSign[signNdx].dataType == DATA_TYPE_FLOAT32 || dataSign[signNdx].dataType == DATA_TYPE_FLOAT64)
1500 {
1501 if (atomicOp[opNdx].value != ATOMIC_OP_ADD &&
1502 #ifndef CTS_USES_VULKANSC
1503 atomicOp[opNdx].value != ATOMIC_OP_MIN && atomicOp[opNdx].value != ATOMIC_OP_MAX &&
1504 #endif // CTS_USES_VULKANSC
1505 atomicOp[opNdx].value != ATOMIC_OP_EXCHANGE)
1506 {
1507 continue;
1508 }
1509 }
1510
1511 for (int memoryTypeNdx = 0; memoryTypeNdx < DE_LENGTH_OF_ARRAY(kMemoryTypes); ++memoryTypeNdx)
1512 {
1513 // Shared memory only available in compute, task and mesh shaders.
1514 if (kMemoryTypes[memoryTypeNdx].type == AtomicMemoryType::SHARED &&
1515 shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_COMPUTE &&
1516 shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_TASK &&
1517 shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_MESH)
1518 continue;
1519
1520 // Payload memory is only available for atomics in task shaders (in mesh shaders it's read-only)
1521 if (kMemoryTypes[memoryTypeNdx].type == AtomicMemoryType::PAYLOAD &&
1522 shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_TASK)
1523 continue;
1524
1525 const std::string name =
1526 std::string(atomicOp[opNdx].name) + "_" + std::string(dataSign[signNdx].name) + "_" +
1527 std::string(shaderTypes[shaderTypeNdx].name) + kMemoryTypes[memoryTypeNdx].suffix;
1528
1529 atomicOperationTestsGroup->addChild(new AtomicOperationCase(
1530 testCtx, name.c_str(),
1531 AtomicShaderType(shaderTypes[shaderTypeNdx].type, kMemoryTypes[memoryTypeNdx].type),
1532 dataSign[signNdx].dataType, atomicOp[opNdx].value));
1533 }
1534 }
1535 }
1536 }
1537 }
1538
1539 } // namespace
1540
createAtomicOperationTests(tcu::TestContext & testCtx)1541 tcu::TestCaseGroup *createAtomicOperationTests(tcu::TestContext &testCtx)
1542 {
1543 return createTestGroup(testCtx, "atomic_operations", addAtomicOperationTests);
1544 }
1545
1546 } // namespace shaderexecutor
1547 } // namespace vkt
1548