1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 The Khronos Group Inc.
6  * Copyright (c) 2017 Google Inc.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Atomic operations (OpAtomic*) tests.
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktAtomicOperationTests.hpp"
26 #include "vktShaderExecutor.hpp"
27 
28 #include "vkRefUtil.hpp"
29 #include "vkMemUtil.hpp"
30 #include "vkQueryUtil.hpp"
31 #include "vkObjUtil.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkCmdUtil.hpp"
34 #include "vktTestGroupUtil.hpp"
35 
36 #include "tcuTestLog.hpp"
37 #include "tcuStringTemplate.hpp"
38 #include "tcuResultCollector.hpp"
39 
40 #include "deFloat16.h"
41 #include "deMath.hpp"
42 #include "deStringUtil.hpp"
43 #include "deSharedPtr.hpp"
44 #include "deRandom.hpp"
45 #include "deArrayUtil.hpp"
46 
47 #include <string>
48 #include <memory>
49 #include <cmath>
50 
51 namespace vkt
52 {
53 namespace shaderexecutor
54 {
55 
56 namespace
57 {
58 
59 using de::MovePtr;
60 using de::UniquePtr;
61 using std::vector;
62 
63 using namespace vk;
64 
65 enum class AtomicMemoryType
66 {
67     BUFFER = 0, // Normal buffer.
68     SHARED,     // Shared global struct in a compute workgroup.
69     REFERENCE,  // Buffer passed as a reference.
70     PAYLOAD,    // Task payload.
71 };
72 
73 // Helper struct to indicate the shader type and if it should use shared global memory.
74 class AtomicShaderType
75 {
76 public:
AtomicShaderType(glu::ShaderType type,AtomicMemoryType memoryType)77     AtomicShaderType(glu::ShaderType type, AtomicMemoryType memoryType) : m_type(type), m_atomicMemoryType(memoryType)
78     {
79         // Shared global memory can only be set to true with compute, task and mesh shaders.
80         DE_ASSERT(memoryType != AtomicMemoryType::SHARED || type == glu::SHADERTYPE_COMPUTE ||
81                   type == glu::SHADERTYPE_TASK || type == glu::SHADERTYPE_MESH);
82 
83         // Task payload memory can only be tested in task shaders.
84         DE_ASSERT(memoryType != AtomicMemoryType::PAYLOAD || type == glu::SHADERTYPE_TASK);
85     }
86 
getType(void) const87     glu::ShaderType getType(void) const
88     {
89         return m_type;
90     }
getMemoryType(void) const91     AtomicMemoryType getMemoryType(void) const
92     {
93         return m_atomicMemoryType;
94     }
isSharedLike(void) const95     bool isSharedLike(void) const
96     {
97         return m_atomicMemoryType == AtomicMemoryType::SHARED || m_atomicMemoryType == AtomicMemoryType::PAYLOAD;
98     }
isMeshShadingStage(void) const99     bool isMeshShadingStage(void) const
100     {
101         return (m_type == glu::SHADERTYPE_TASK || m_type == glu::SHADERTYPE_MESH);
102     }
103 
104 private:
105     glu::ShaderType m_type;
106     AtomicMemoryType m_atomicMemoryType;
107 };
108 
109 // Buffer helper
110 class Buffer
111 {
112 public:
113     Buffer(Context &context, VkBufferUsageFlags usage, size_t size, bool useRef);
114 
getBuffer(void) const115     VkBuffer getBuffer(void) const
116     {
117         return *m_buffer;
118     }
getHostPtr(void) const119     void *getHostPtr(void) const
120     {
121         return m_allocation->getHostPtr();
122     }
123     void flush(void);
124     void invalidate(void);
125 
126 private:
127     const DeviceInterface &m_vkd;
128     const VkDevice m_device;
129     const VkQueue m_queue;
130     const uint32_t m_queueIndex;
131     const Unique<VkBuffer> m_buffer;
132     const UniquePtr<Allocation> m_allocation;
133 };
134 
135 typedef de::SharedPtr<Buffer> BufferSp;
136 
createBuffer(const DeviceInterface & vkd,VkDevice device,VkDeviceSize size,VkBufferUsageFlags usageFlags)137 Move<VkBuffer> createBuffer(const DeviceInterface &vkd, VkDevice device, VkDeviceSize size,
138                             VkBufferUsageFlags usageFlags)
139 {
140     const VkBufferCreateInfo createInfo = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
141                                            DE_NULL,
142                                            (VkBufferCreateFlags)0,
143                                            size,
144                                            usageFlags,
145                                            VK_SHARING_MODE_EXCLUSIVE,
146                                            0u,
147                                            DE_NULL};
148     return createBuffer(vkd, device, &createInfo);
149 }
150 
allocateAndBindMemory(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,VkBuffer buffer,bool useRef)151 MovePtr<Allocation> allocateAndBindMemory(const DeviceInterface &vkd, VkDevice device, Allocator &allocator,
152                                           VkBuffer buffer, bool useRef)
153 {
154     const MemoryRequirement allocationType =
155         (MemoryRequirement::HostVisible | (useRef ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any));
156     MovePtr<Allocation> alloc(allocator.allocate(getBufferMemoryRequirements(vkd, device, buffer), allocationType));
157 
158     VK_CHECK(vkd.bindBufferMemory(device, buffer, alloc->getMemory(), alloc->getOffset()));
159 
160     return alloc;
161 }
162 
Buffer(Context & context,VkBufferUsageFlags usage,size_t size,bool useRef)163 Buffer::Buffer(Context &context, VkBufferUsageFlags usage, size_t size, bool useRef)
164     : m_vkd(context.getDeviceInterface())
165     , m_device(context.getDevice())
166     , m_queue(context.getUniversalQueue())
167     , m_queueIndex(context.getUniversalQueueFamilyIndex())
168     , m_buffer(createBuffer(context.getDeviceInterface(), context.getDevice(), (VkDeviceSize)size, usage))
169     , m_allocation(allocateAndBindMemory(context.getDeviceInterface(), context.getDevice(),
170                                          context.getDefaultAllocator(), *m_buffer, useRef))
171 {
172 }
173 
flush(void)174 void Buffer::flush(void)
175 {
176     flushMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
177 }
178 
invalidate(void)179 void Buffer::invalidate(void)
180 {
181     const auto cmdPool = vk::makeCommandPool(m_vkd, m_device, m_queueIndex);
182     const auto cmdBufferPtr =
183         vk::allocateCommandBuffer(m_vkd, m_device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
184     const auto cmdBuffer     = cmdBufferPtr.get();
185     const auto bufferBarrier = vk::makeBufferMemoryBarrier(VK_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_HOST_READ_BIT,
186                                                            m_buffer.get(), 0ull, VK_WHOLE_SIZE);
187 
188     beginCommandBuffer(m_vkd, cmdBuffer);
189     m_vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, nullptr,
190                              1u, &bufferBarrier, 0u, nullptr);
191     endCommandBuffer(m_vkd, cmdBuffer);
192     submitCommandsAndWait(m_vkd, m_device, m_queue, cmdBuffer);
193 
194     invalidateMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
195 }
196 
197 // Tests
198 
199 enum AtomicOperation
200 {
201     ATOMIC_OP_EXCHANGE = 0,
202     ATOMIC_OP_COMP_SWAP,
203     ATOMIC_OP_ADD,
204     ATOMIC_OP_MIN,
205     ATOMIC_OP_MAX,
206     ATOMIC_OP_AND,
207     ATOMIC_OP_OR,
208     ATOMIC_OP_XOR,
209 
210     ATOMIC_OP_LAST
211 };
212 
atomicOp2Str(AtomicOperation op)213 std::string atomicOp2Str(AtomicOperation op)
214 {
215     static const char *const s_names[] = {"atomicExchange", "atomicCompSwap", "atomicAdd", "atomicMin",
216                                           "atomicMax",      "atomicAnd",      "atomicOr",  "atomicXor"};
217     return de::getSizedArrayElement<ATOMIC_OP_LAST>(s_names, op);
218 }
219 
220 enum
221 {
222     NUM_ELEMENTS = 32
223 };
224 
225 enum DataType
226 {
227     DATA_TYPE_FLOAT16 = 0,
228     DATA_TYPE_INT32,
229     DATA_TYPE_UINT32,
230     DATA_TYPE_FLOAT32,
231     DATA_TYPE_INT64,
232     DATA_TYPE_UINT64,
233     DATA_TYPE_FLOAT64,
234 
235     DATA_TYPE_LAST
236 };
237 
dataType2Str(DataType type)238 std::string dataType2Str(DataType type)
239 {
240     static const char *const s_names[] = {
241         "float16_t", "int", "uint", "float", "int64_t", "uint64_t", "double",
242     };
243     return de::getSizedArrayElement<DATA_TYPE_LAST>(s_names, type);
244 }
245 
246 class BufferInterface
247 {
248 public:
249     virtual void setBuffer(void *ptr) = 0;
250 
251     virtual size_t bufferSize() = 0;
252 
253     virtual void fillWithTestData(de::Random &rnd) = 0;
254 
255     virtual void checkResults(tcu::ResultCollector &resultCollector) = 0;
256 
~BufferInterface()257     virtual ~BufferInterface()
258     {
259     }
260 };
261 
262 template <typename dataTypeT>
263 class TestBuffer : public BufferInterface
264 {
265 public:
TestBuffer(AtomicOperation atomicOp)266     TestBuffer(AtomicOperation atomicOp) : m_atomicOp(atomicOp)
267     {
268     }
269 
270     template <typename T>
271     struct BufferData
272     {
273         // Use half the number of elements for inout to cause overlap between atomic operations.
274         // Each inout element at index i will have two atomic operations using input from
275         // indices i and i + NUM_ELEMENTS / 2.
276         T inout[NUM_ELEMENTS / 2];
277         T input[NUM_ELEMENTS];
278         T compare[NUM_ELEMENTS];
279         T output[NUM_ELEMENTS];
280         T invocationHitCount[NUM_ELEMENTS];
281         int32_t index;
282     };
283 
setBuffer(void * ptr)284     virtual void setBuffer(void *ptr)
285     {
286         m_ptr = static_cast<BufferData<dataTypeT> *>(ptr);
287     }
288 
bufferSize()289     virtual size_t bufferSize()
290     {
291         return sizeof(BufferData<dataTypeT>);
292     }
293 
fillWithTestData(de::Random & rnd)294     virtual void fillWithTestData(de::Random &rnd)
295     {
296         dataTypeT pattern;
297         deMemset(&pattern, 0xcd, sizeof(dataTypeT));
298 
299         for (int i = 0; i < NUM_ELEMENTS / 2; i++)
300         {
301             m_ptr->inout[i] = static_cast<dataTypeT>(rnd.getUint64());
302             // The first half of compare elements match with every even index.
303             // The second half matches with odd indices. This causes the
304             // overlapping operations to only select one.
305             m_ptr->compare[i]                    = m_ptr->inout[i] + (i % 2);
306             m_ptr->compare[i + NUM_ELEMENTS / 2] = m_ptr->inout[i] + 1 - (i % 2);
307         }
308         for (int i = 0; i < NUM_ELEMENTS; i++)
309         {
310             m_ptr->input[i]              = static_cast<dataTypeT>(rnd.getUint64());
311             m_ptr->output[i]             = pattern;
312             m_ptr->invocationHitCount[i] = 0;
313         }
314         m_ptr->index = 0;
315 
316         // Take a copy to be used when calculating expected values.
317         m_original = *m_ptr;
318     }
319 
checkResults(tcu::ResultCollector & resultCollector)320     virtual void checkResults(tcu::ResultCollector &resultCollector)
321     {
322         checkOperation(m_original, *m_ptr, resultCollector);
323     }
324 
325     template <typename T>
326     struct Expected
327     {
328         T m_inout;
329         T m_output[2];
330 
Expectedvkt::shaderexecutor::__anon8f11a6c00111::TestBuffer::Expected331         Expected(T inout, T output0, T output1) : m_inout(inout)
332         {
333             m_output[0] = output0;
334             m_output[1] = output1;
335         }
336 
comparevkt::shaderexecutor::__anon8f11a6c00111::TestBuffer::Expected337         bool compare(T inout, T output0, T output1)
338         {
339             return (deMemCmp((const void *)&m_inout, (const void *)&inout, sizeof(inout)) == 0 &&
340                     deMemCmp((const void *)&m_output[0], (const void *)&output0, sizeof(output0)) == 0 &&
341                     deMemCmp((const void *)&m_output[1], (const void *)&output1, sizeof(output1)) == 0);
342         }
343     };
344 
345     void checkOperation(const BufferData<dataTypeT> &original, const BufferData<dataTypeT> &result,
346                         tcu::ResultCollector &resultCollector);
347 
348     const AtomicOperation m_atomicOp;
349 
350     BufferData<dataTypeT> *m_ptr;
351     BufferData<dataTypeT> m_original;
352 };
353 
354 template <typename T>
nanSafeSloppyEquals(T x,T y)355 bool nanSafeSloppyEquals(T x, T y)
356 {
357     if (deIsIEEENaN(x) && deIsIEEENaN(y))
358         return true;
359 
360     if (deIsIEEENaN(x) || deIsIEEENaN(y))
361         return false;
362 
363     return fabs(deToDouble(x) - deToDouble(y)) < 0.00001;
364 }
365 
366 template <typename dataTypeT>
367 class TestBufferFloatingPoint : public BufferInterface
368 {
369 public:
TestBufferFloatingPoint(AtomicOperation atomicOp)370     TestBufferFloatingPoint(AtomicOperation atomicOp) : m_atomicOp(atomicOp)
371     {
372     }
373 
374     template <typename T>
375     struct BufferDataFloatingPoint
376     {
377         // Use half the number of elements for inout to cause overlap between atomic operations.
378         // Each inout element at index i will have two atomic operations using input from
379         // indices i and i + NUM_ELEMENTS / 2.
380         T inout[NUM_ELEMENTS / 2];
381         T input[NUM_ELEMENTS];
382         T compare[NUM_ELEMENTS];
383         T output[NUM_ELEMENTS];
384         int32_t invocationHitCount[NUM_ELEMENTS];
385         int32_t index;
386     };
387 
setBuffer(void * ptr)388     virtual void setBuffer(void *ptr)
389     {
390         m_ptr = static_cast<BufferDataFloatingPoint<dataTypeT> *>(ptr);
391     }
392 
bufferSize()393     virtual size_t bufferSize()
394     {
395         return sizeof(BufferDataFloatingPoint<dataTypeT>);
396     }
397 
fillWithTestData(de::Random & rnd)398     virtual void fillWithTestData(de::Random &rnd)
399     {
400         dataTypeT pattern;
401         deMemset(&pattern, 0xcd, sizeof(dataTypeT));
402 
403         for (int i = 0; i < NUM_ELEMENTS / 2; i++)
404         {
405             m_ptr->inout[i] = deToFloatType<dataTypeT>(rnd.getFloat());
406             // These aren't used by any of the float tests
407             m_ptr->compare[i] = deToFloatType<dataTypeT>(0.0);
408         }
409         // Add special cases for NaN and +/-0
410         // 0: min(sNaN, x)
411         m_ptr->inout[0] = deSignalingNaN<dataTypeT>();
412         // 1: min(x, sNaN)
413         m_ptr->input[1 * 2 + 0] = deSignalingNaN<dataTypeT>();
414         // 2: min(qNaN, x)
415         m_ptr->inout[2] = deQuietNaN<dataTypeT>();
416         // 3: min(x, qNaN)
417         m_ptr->input[3 * 2 + 0] = deQuietNaN<dataTypeT>();
418         // 4: min(NaN, NaN)
419         m_ptr->inout[4]         = deSignalingNaN<dataTypeT>();
420         m_ptr->input[4 * 2 + 0] = deQuietNaN<dataTypeT>();
421         m_ptr->input[4 * 2 + 1] = deQuietNaN<dataTypeT>();
422         // 5: min(+0, -0)
423         m_ptr->inout[5]         = deToFloatType<dataTypeT>(-0.0);
424         m_ptr->input[5 * 2 + 0] = deToFloatType<dataTypeT>(0.0);
425         m_ptr->input[5 * 2 + 1] = deToFloatType<dataTypeT>(0.0);
426 
427         for (int i = 0; i < NUM_ELEMENTS; i++)
428         {
429             m_ptr->input[i]              = deToFloatType<dataTypeT>(rnd.getFloat());
430             m_ptr->output[i]             = pattern;
431             m_ptr->invocationHitCount[i] = 0;
432         }
433 
434         m_ptr->index = 0;
435 
436         // Take a copy to be used when calculating expected values.
437         m_original = *m_ptr;
438     }
439 
checkResults(tcu::ResultCollector & resultCollector)440     virtual void checkResults(tcu::ResultCollector &resultCollector)
441     {
442         checkOperationFloatingPoint(m_original, *m_ptr, resultCollector);
443     }
444 
445     template <typename T>
446     struct Expected
447     {
448         T m_inout;
449         T m_output[2];
450 
Expectedvkt::shaderexecutor::__anon8f11a6c00111::TestBufferFloatingPoint::Expected451         Expected(T inout, T output0, T output1) : m_inout(inout)
452         {
453             m_output[0] = output0;
454             m_output[1] = output1;
455         }
456 
comparevkt::shaderexecutor::__anon8f11a6c00111::TestBufferFloatingPoint::Expected457         bool compare(T inout, T output0, T output1)
458         {
459             return nanSafeSloppyEquals(m_inout, inout) && nanSafeSloppyEquals(m_output[0], output0) &&
460                    nanSafeSloppyEquals(m_output[1], output1);
461         }
462     };
463 
464     void checkOperationFloatingPoint(const BufferDataFloatingPoint<dataTypeT> &original,
465                                      const BufferDataFloatingPoint<dataTypeT> &result,
466                                      tcu::ResultCollector &resultCollector);
467 
468     const AtomicOperation m_atomicOp;
469 
470     BufferDataFloatingPoint<dataTypeT> *m_ptr;
471     BufferDataFloatingPoint<dataTypeT> m_original;
472 };
473 
createTestBuffer(DataType type,AtomicOperation atomicOp)474 static BufferInterface *createTestBuffer(DataType type, AtomicOperation atomicOp)
475 {
476     switch (type)
477     {
478     case DATA_TYPE_FLOAT16:
479         return new TestBufferFloatingPoint<deFloat16>(atomicOp);
480     case DATA_TYPE_INT32:
481         return new TestBuffer<int32_t>(atomicOp);
482     case DATA_TYPE_UINT32:
483         return new TestBuffer<uint32_t>(atomicOp);
484     case DATA_TYPE_FLOAT32:
485         return new TestBufferFloatingPoint<float>(atomicOp);
486     case DATA_TYPE_INT64:
487         return new TestBuffer<int64_t>(atomicOp);
488     case DATA_TYPE_UINT64:
489         return new TestBuffer<uint64_t>(atomicOp);
490     case DATA_TYPE_FLOAT64:
491         return new TestBufferFloatingPoint<double>(atomicOp);
492     default:
493         DE_ASSERT(false);
494         return DE_NULL;
495     }
496 }
497 
498 // Use template to handle both signed and unsigned cases. SPIR-V should
499 // have separate operations for both.
500 template <typename T>
checkOperation(const BufferData<T> & original,const BufferData<T> & result,tcu::ResultCollector & resultCollector)501 void TestBuffer<T>::checkOperation(const BufferData<T> &original, const BufferData<T> &result,
502                                    tcu::ResultCollector &resultCollector)
503 {
504     // originalInout = original inout
505     // input0 = input at index i
506     // iinput1 = input at index i + NUM_ELEMENTS / 2
507     //
508     // atomic operation will return the memory contents before
509     // the operation and this is stored as output. Two operations
510     // are executed for each InOut value (using input0 and input1).
511     //
512     // Since there is an overlap of two operations per each
513     // InOut element, the outcome of the resulting InOut and
514     // the outputs of the operations have two result candidates
515     // depending on the execution order. Verification passes
516     // if the results match one of these options.
517 
518     for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
519     {
520         // Needed when reinterpeting the data as signed values.
521         const T originalInout = *reinterpret_cast<const T *>(&original.inout[elementNdx]);
522         const T input0        = *reinterpret_cast<const T *>(&original.input[elementNdx]);
523         const T input1        = *reinterpret_cast<const T *>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
524 
525         // Expected results are collected to this vector.
526         vector<Expected<T>> exp;
527 
528         switch (m_atomicOp)
529         {
530         case ATOMIC_OP_ADD:
531         {
532             exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout, originalInout + input0));
533             exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout + input1, originalInout));
534         }
535         break;
536 
537         case ATOMIC_OP_AND:
538         {
539             exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout, originalInout & input0));
540             exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout & input1, originalInout));
541         }
542         break;
543 
544         case ATOMIC_OP_OR:
545         {
546             exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout, originalInout | input0));
547             exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout | input1, originalInout));
548         }
549         break;
550 
551         case ATOMIC_OP_XOR:
552         {
553             exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout, originalInout ^ input0));
554             exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout ^ input1, originalInout));
555         }
556         break;
557 
558         case ATOMIC_OP_MIN:
559         {
560             exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), originalInout,
561                                       de::min(originalInout, input0)));
562             exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), de::min(originalInout, input1),
563                                       originalInout));
564         }
565         break;
566 
567         case ATOMIC_OP_MAX:
568         {
569             exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), originalInout,
570                                       de::max(originalInout, input0)));
571             exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), de::max(originalInout, input1),
572                                       originalInout));
573         }
574         break;
575 
576         case ATOMIC_OP_EXCHANGE:
577         {
578             exp.push_back(Expected<T>(input1, originalInout, input0));
579             exp.push_back(Expected<T>(input0, input1, originalInout));
580         }
581         break;
582 
583         case ATOMIC_OP_COMP_SWAP:
584         {
585             if (elementNdx % 2 == 0)
586             {
587                 exp.push_back(Expected<T>(input0, originalInout, input0));
588                 exp.push_back(Expected<T>(input0, originalInout, originalInout));
589             }
590             else
591             {
592                 exp.push_back(Expected<T>(input1, input1, originalInout));
593                 exp.push_back(Expected<T>(input1, originalInout, originalInout));
594             }
595         }
596         break;
597 
598         default:
599             DE_FATAL("Unexpected atomic operation.");
600             break;
601         }
602 
603         const T resIo      = result.inout[elementNdx];
604         const T resOutput0 = result.output[elementNdx];
605         const T resOutput1 = result.output[elementNdx + NUM_ELEMENTS / 2];
606 
607         if (!exp[0].compare(resIo, resOutput0, resOutput1) && !exp[1].compare(resIo, resOutput0, resOutput1))
608         {
609             std::ostringstream errorMessage;
610             errorMessage << "ERROR: Result value check failed at index " << elementNdx
611                          << ". Expected one of the two outcomes: InOut = " << tcu::toHex(exp[0].m_inout)
612                          << ", Output0 = " << tcu::toHex(exp[0].m_output[0])
613                          << ", Output1 = " << tcu::toHex(exp[0].m_output[1])
614                          << ", or InOut = " << tcu::toHex(exp[1].m_inout)
615                          << ", Output0 = " << tcu::toHex(exp[1].m_output[0])
616                          << ", Output1 = " << tcu::toHex(exp[1].m_output[1]) << ". Got: InOut = " << tcu::toHex(resIo)
617                          << ", Output0 = " << tcu::toHex(resOutput0) << ", Output1 = " << tcu::toHex(resOutput1)
618                          << ". Using Input0 = " << tcu::toHex(original.input[elementNdx])
619                          << " and Input1 = " << tcu::toHex(original.input[elementNdx + NUM_ELEMENTS / 2]) << ".";
620 
621             resultCollector.fail(errorMessage.str());
622         }
623     }
624 }
625 
626 template <typename T>
handleExceptionalFloatMinMaxValues(vector<T> & values,T x,T y)627 void handleExceptionalFloatMinMaxValues(vector<T> &values, T x, T y)
628 {
629 
630     if (deIsSignalingNaN(x) && deIsSignalingNaN(y))
631     {
632         values.push_back(deQuietNaN<T>());
633         values.push_back(deSignalingNaN<T>());
634     }
635     else if (deIsSignalingNaN(x))
636     {
637         values.push_back(deQuietNaN<T>());
638         values.push_back(deSignalingNaN<T>());
639         if (!deIsIEEENaN(y))
640             values.push_back(y);
641     }
642     else if (deIsSignalingNaN(y))
643     {
644         values.push_back(deQuietNaN<T>());
645         values.push_back(deSignalingNaN<T>());
646         if (!deIsIEEENaN(x))
647             values.push_back(x);
648     }
649     else if (deIsIEEENaN(x) && deIsIEEENaN(y))
650     {
651         // Both quiet NaNs
652         values.push_back(deQuietNaN<T>());
653     }
654     else if (deIsIEEENaN(x))
655     {
656         // One quiet NaN and one non-NaN.
657         values.push_back(y);
658     }
659     else if (deIsIEEENaN(y))
660     {
661         // One quiet NaN and one non-NaN.
662         values.push_back(x);
663     }
664     else if ((deIsPositiveZero(x) && deIsNegativeZero(y)) || (deIsNegativeZero(x) && deIsPositiveZero(y)))
665     {
666         values.push_back(deToFloatType<T>(0.0));
667         values.push_back(deToFloatType<T>(-0.0));
668     }
669 }
670 
671 template <typename T>
floatAdd(T x,T y)672 T floatAdd(T x, T y)
673 {
674     if (deIsIEEENaN(x) || deIsIEEENaN(y))
675         return deQuietNaN<T>();
676     return deToFloatType<T>(deToDouble(x) + deToDouble(y));
677 }
678 
679 template <typename T>
floatMinValues(T x,T y)680 vector<T> floatMinValues(T x, T y)
681 {
682     vector<T> values;
683     handleExceptionalFloatMinMaxValues(values, x, y);
684     if (values.empty())
685     {
686         values.push_back(deToDouble(x) < deToDouble(y) ? x : y);
687     }
688     return values;
689 }
690 
691 template <typename T>
floatMaxValues(T x,T y)692 vector<T> floatMaxValues(T x, T y)
693 {
694     vector<T> values;
695     handleExceptionalFloatMinMaxValues(values, x, y);
696     if (values.empty())
697     {
698         values.push_back(deToDouble(x) > deToDouble(y) ? x : y);
699     }
700     return values;
701 }
702 
703 // Use template to handle both float and double cases. SPIR-V should
704 // have separate operations for both.
705 template <typename T>
checkOperationFloatingPoint(const BufferDataFloatingPoint<T> & original,const BufferDataFloatingPoint<T> & result,tcu::ResultCollector & resultCollector)706 void TestBufferFloatingPoint<T>::checkOperationFloatingPoint(const BufferDataFloatingPoint<T> &original,
707                                                              const BufferDataFloatingPoint<T> &result,
708                                                              tcu::ResultCollector &resultCollector)
709 {
710     // originalInout = original inout
711     // input0 = input at index i
712     // iinput1 = input at index i + NUM_ELEMENTS / 2
713     //
714     // atomic operation will return the memory contents before
715     // the operation and this is stored as output. Two operations
716     // are executed for each InOut value (using input0 and input1).
717     //
718     // Since there is an overlap of two operations per each
719     // InOut element, the outcome of the resulting InOut and
720     // the outputs of the operations have two result candidates
721     // depending on the execution order. Verification passes
722     // if the results match one of these options.
723 
724     for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
725     {
726         // Needed when reinterpeting the data as signed values.
727         const T originalInout = *reinterpret_cast<const T *>(&original.inout[elementNdx]);
728         const T input0        = *reinterpret_cast<const T *>(&original.input[elementNdx]);
729         const T input1        = *reinterpret_cast<const T *>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
730 
731         // Expected results are collected to this vector.
732         vector<Expected<T>> exp;
733 
734         switch (m_atomicOp)
735         {
736         case ATOMIC_OP_ADD:
737         {
738             exp.push_back(Expected<T>(floatAdd(floatAdd(originalInout, input0), input1), originalInout,
739                                       floatAdd(originalInout, input0)));
740             exp.push_back(Expected<T>(floatAdd(floatAdd(originalInout, input1), input0),
741                                       floatAdd(originalInout, input1), originalInout));
742         }
743         break;
744 
745         case ATOMIC_OP_MIN:
746         {
747             // The case where input0 is combined first
748             vector<T> minOriginalAndInput0 = floatMinValues(originalInout, input0);
749             for (T x : minOriginalAndInput0)
750             {
751                 vector<T> minAll = floatMinValues(x, input1);
752                 for (T y : minAll)
753                 {
754                     exp.push_back(Expected<T>(y, originalInout, x));
755                 }
756             }
757 
758             // The case where input1 is combined first
759             vector<T> minOriginalAndInput1 = floatMinValues(originalInout, input1);
760             for (T x : minOriginalAndInput1)
761             {
762                 vector<T> minAll = floatMinValues(x, input0);
763                 for (T y : minAll)
764                 {
765                     exp.push_back(Expected<T>(y, x, originalInout));
766                 }
767             }
768         }
769         break;
770 
771         case ATOMIC_OP_MAX:
772         {
773             // The case where input0 is combined first
774             vector<T> minOriginalAndInput0 = floatMaxValues(originalInout, input0);
775             for (T x : minOriginalAndInput0)
776             {
777                 vector<T> minAll = floatMaxValues(x, input1);
778                 for (T y : minAll)
779                 {
780                     exp.push_back(Expected<T>(y, originalInout, x));
781                 }
782             }
783 
784             // The case where input1 is combined first
785             vector<T> minOriginalAndInput1 = floatMaxValues(originalInout, input1);
786             for (T x : minOriginalAndInput1)
787             {
788                 vector<T> minAll = floatMaxValues(x, input0);
789                 for (T y : minAll)
790                 {
791                     exp.push_back(Expected<T>(y, x, originalInout));
792                 }
793             }
794         }
795         break;
796 
797         case ATOMIC_OP_EXCHANGE:
798         {
799             exp.push_back(Expected<T>(input1, originalInout, input0));
800             exp.push_back(Expected<T>(input0, input1, originalInout));
801         }
802         break;
803 
804         default:
805             DE_FATAL("Unexpected atomic operation.");
806             break;
807         }
808 
809         const T resIo      = result.inout[elementNdx];
810         const T resOutput0 = result.output[elementNdx];
811         const T resOutput1 = result.output[elementNdx + NUM_ELEMENTS / 2];
812 
813         bool hasMatch = false;
814         for (Expected<T> e : exp)
815         {
816             if (e.compare(resIo, resOutput0, resOutput1))
817             {
818                 hasMatch = true;
819                 break;
820             }
821         }
822         if (!hasMatch)
823         {
824             std::ostringstream errorMessage;
825             errorMessage << "ERROR: Result value check failed at index " << elementNdx
826                          << ". Expected one of the outcomes:";
827 
828             bool first = true;
829             for (Expected<T> e : exp)
830             {
831                 if (!first)
832                     errorMessage << ", or";
833                 first = false;
834 
835                 errorMessage << " InOut = " << e.m_inout << ", Output0 = " << e.m_output[0]
836                              << ", Output1 = " << e.m_output[1];
837             }
838 
839             errorMessage << ". Got: InOut = " << resIo << ", Output0 = " << resOutput0 << ", Output1 = " << resOutput1
840                          << ". Using Input0 = " << original.input[elementNdx]
841                          << " and Input1 = " << original.input[elementNdx + NUM_ELEMENTS / 2] << ".";
842 
843             resultCollector.fail(errorMessage.str());
844         }
845     }
846 }
847 
848 class AtomicOperationCaseInstance : public TestInstance
849 {
850 public:
851     AtomicOperationCaseInstance(Context &context, const ShaderSpec &shaderSpec, AtomicShaderType shaderType,
852                                 DataType dataType, AtomicOperation atomicOp);
853 
854     virtual tcu::TestStatus iterate(void);
855 
856 private:
857     const ShaderSpec &m_shaderSpec;
858     AtomicShaderType m_shaderType;
859     const DataType m_dataType;
860     AtomicOperation m_atomicOp;
861 };
862 
AtomicOperationCaseInstance(Context & context,const ShaderSpec & shaderSpec,AtomicShaderType shaderType,DataType dataType,AtomicOperation atomicOp)863 AtomicOperationCaseInstance::AtomicOperationCaseInstance(Context &context, const ShaderSpec &shaderSpec,
864                                                          AtomicShaderType shaderType, DataType dataType,
865                                                          AtomicOperation atomicOp)
866     : TestInstance(context)
867     , m_shaderSpec(shaderSpec)
868     , m_shaderType(shaderType)
869     , m_dataType(dataType)
870     , m_atomicOp(atomicOp)
871 {
872 }
873 
iterate(void)874 tcu::TestStatus AtomicOperationCaseInstance::iterate(void)
875 {
876     de::UniquePtr<BufferInterface> testBuffer(createTestBuffer(m_dataType, m_atomicOp));
877     tcu::TestLog &log          = m_context.getTestContext().getLog();
878     const DeviceInterface &vkd = m_context.getDeviceInterface();
879     const VkDevice device      = m_context.getDevice();
880     de::Random rnd(0x62a15e34);
881     const bool useRef               = (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE);
882     const VkDescriptorType descType = (useRef ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
883     const VkBufferUsageFlags usageFlags =
884         (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
885          (useRef ? static_cast<VkBufferUsageFlags>(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) : 0u));
886 
887     // The main buffer will hold test data. When using buffer references, the buffer's address will be indirectly passed as part of
888     // a uniform buffer. If not, it will be passed directly as a descriptor.
889     Buffer buffer(m_context, usageFlags, testBuffer->bufferSize(), useRef);
890     std::unique_ptr<Buffer> auxBuffer;
891 
892     if (useRef)
893     {
894         // Pass the main buffer address inside a uniform buffer.
895         const VkBufferDeviceAddressInfo addressInfo = {
896             VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType sType;
897             nullptr,                                      // const void* pNext;
898             buffer.getBuffer(),                           // VkBuffer buffer;
899         };
900         const auto address = vkd.getBufferDeviceAddress(device, &addressInfo);
901 
902         auxBuffer.reset(new Buffer(m_context, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, sizeof(address), false));
903         deMemcpy(auxBuffer->getHostPtr(), &address, sizeof(address));
904         auxBuffer->flush();
905     }
906 
907     testBuffer->setBuffer(buffer.getHostPtr());
908     testBuffer->fillWithTestData(rnd);
909 
910     buffer.flush();
911 
912     Move<VkDescriptorSetLayout> extraResourcesLayout;
913     Move<VkDescriptorPool> extraResourcesSetPool;
914     Move<VkDescriptorSet> extraResourcesSet;
915 
916     const VkDescriptorSetLayoutBinding bindings[] = {{0u, descType, 1, VK_SHADER_STAGE_ALL, DE_NULL}};
917 
918     const VkDescriptorSetLayoutCreateInfo layoutInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, DE_NULL,
919                                                         (VkDescriptorSetLayoutCreateFlags)0u,
920                                                         DE_LENGTH_OF_ARRAY(bindings), bindings};
921 
922     extraResourcesLayout = createDescriptorSetLayout(vkd, device, &layoutInfo);
923 
924     const VkDescriptorPoolSize poolSizes[] = {{descType, 1u}};
925 
926     const VkDescriptorPoolCreateInfo poolInfo = {
927         VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
928         DE_NULL,
929         (VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
930         1u, // maxSets
931         DE_LENGTH_OF_ARRAY(poolSizes),
932         poolSizes};
933 
934     extraResourcesSetPool = createDescriptorPool(vkd, device, &poolInfo);
935 
936     const VkDescriptorSetAllocateInfo allocInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, DE_NULL,
937                                                    *extraResourcesSetPool, 1u, &extraResourcesLayout.get()};
938 
939     extraResourcesSet = allocateDescriptorSet(vkd, device, &allocInfo);
940 
941     VkDescriptorBufferInfo bufferInfo;
942     bufferInfo.buffer = (useRef ? auxBuffer->getBuffer() : buffer.getBuffer());
943     bufferInfo.offset = 0u;
944     bufferInfo.range  = VK_WHOLE_SIZE;
945 
946     const VkWriteDescriptorSet descriptorWrite = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
947                                                   DE_NULL,
948                                                   *extraResourcesSet,
949                                                   0u, // dstBinding
950                                                   0u, // dstArrayElement
951                                                   1u,
952                                                   descType,
953                                                   (const VkDescriptorImageInfo *)DE_NULL,
954                                                   &bufferInfo,
955                                                   (const VkBufferView *)DE_NULL};
956 
957     vkd.updateDescriptorSets(device, 1u, &descriptorWrite, 0u, DE_NULL);
958 
959     // Storage for output varying data.
960     std::vector<uint32_t> outputs(NUM_ELEMENTS);
961     std::vector<void *> outputPtr(NUM_ELEMENTS);
962 
963     for (size_t i = 0; i < NUM_ELEMENTS; i++)
964     {
965         outputs[i]   = 0xcdcdcdcd;
966         outputPtr[i] = &outputs[i];
967     }
968 
969     const int numWorkGroups = (m_shaderType.isSharedLike() ? 1 : static_cast<int>(NUM_ELEMENTS));
970     UniquePtr<ShaderExecutor> executor(
971         createExecutor(m_context, m_shaderType.getType(), m_shaderSpec, *extraResourcesLayout));
972 
973     executor->execute(numWorkGroups, DE_NULL, &outputPtr[0], *extraResourcesSet);
974     buffer.invalidate();
975 
976     tcu::ResultCollector resultCollector(log);
977 
978     // Check the results of the atomic operation
979     testBuffer->checkResults(resultCollector);
980 
981     return tcu::TestStatus(resultCollector.getResult(), resultCollector.getMessage());
982 }
983 
984 class AtomicOperationCase : public TestCase
985 {
986 public:
987     AtomicOperationCase(tcu::TestContext &testCtx, const char *name, AtomicShaderType type, DataType dataType,
988                         AtomicOperation atomicOp);
989     virtual ~AtomicOperationCase(void);
990 
991     virtual TestInstance *createInstance(Context &ctx) const;
992     virtual void checkSupport(Context &ctx) const;
initPrograms(vk::SourceCollections & programCollection) const993     virtual void initPrograms(vk::SourceCollections &programCollection) const
994     {
995         const bool useSpv14   = m_shaderType.isMeshShadingStage();
996         const auto spvVersion = (useSpv14 ? vk::SPIRV_VERSION_1_4 : vk::SPIRV_VERSION_1_0);
997         const ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, spvVersion, 0u, useSpv14);
998         ShaderSpec sourcesSpec(m_shaderSpec);
999 
1000         sourcesSpec.buildOptions = buildOptions;
1001         generateSources(m_shaderType.getType(), sourcesSpec, programCollection);
1002     }
1003 
1004 private:
1005     void createShaderSpec();
1006     ShaderSpec m_shaderSpec;
1007     const AtomicShaderType m_shaderType;
1008     const DataType m_dataType;
1009     const AtomicOperation m_atomicOp;
1010 };
1011 
AtomicOperationCase(tcu::TestContext & testCtx,const char * name,AtomicShaderType shaderType,DataType dataType,AtomicOperation atomicOp)1012 AtomicOperationCase::AtomicOperationCase(tcu::TestContext &testCtx, const char *name, AtomicShaderType shaderType,
1013                                          DataType dataType, AtomicOperation atomicOp)
1014     : TestCase(testCtx, name)
1015     , m_shaderType(shaderType)
1016     , m_dataType(dataType)
1017     , m_atomicOp(atomicOp)
1018 {
1019     createShaderSpec();
1020     init();
1021 }
1022 
~AtomicOperationCase(void)1023 AtomicOperationCase::~AtomicOperationCase(void)
1024 {
1025 }
1026 
createInstance(Context & ctx) const1027 TestInstance *AtomicOperationCase::createInstance(Context &ctx) const
1028 {
1029     return new AtomicOperationCaseInstance(ctx, m_shaderSpec, m_shaderType, m_dataType, m_atomicOp);
1030 }
1031 
checkSupport(Context & ctx) const1032 void AtomicOperationCase::checkSupport(Context &ctx) const
1033 {
1034     if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1035     {
1036         ctx.requireDeviceFunctionality("VK_KHR_shader_atomic_int64");
1037 
1038         const auto atomicInt64Features = ctx.getShaderAtomicInt64Features();
1039         const bool isSharedMemory      = m_shaderType.isSharedLike();
1040 
1041         if (!isSharedMemory && atomicInt64Features.shaderBufferInt64Atomics == VK_FALSE)
1042         {
1043             TCU_THROW(NotSupportedError,
1044                       "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for buffers");
1045         }
1046         if (isSharedMemory && atomicInt64Features.shaderSharedInt64Atomics == VK_FALSE)
1047         {
1048             TCU_THROW(NotSupportedError,
1049                       "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for shared memory");
1050         }
1051     }
1052 
1053     if (m_dataType == DATA_TYPE_FLOAT16)
1054     {
1055         ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1056 #ifndef CTS_USES_VULKANSC
1057         if (m_atomicOp == ATOMIC_OP_ADD)
1058         {
1059             if (m_shaderType.isSharedLike())
1060             {
1061                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16AtomicAdd)
1062                 {
1063                     TCU_THROW(NotSupportedError,
1064                               "VkShaderAtomicFloat16: 16-bit floating point shared add atomic operation not supported");
1065                 }
1066             }
1067             else
1068             {
1069                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16AtomicAdd)
1070                 {
1071                     TCU_THROW(NotSupportedError,
1072                               "VkShaderAtomicFloat16: 16-bit floating point buffer add atomic operation not supported");
1073                 }
1074             }
1075         }
1076         if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1077         {
1078             if (m_shaderType.isSharedLike())
1079             {
1080                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16AtomicMinMax)
1081                 {
1082                     TCU_THROW(
1083                         NotSupportedError,
1084                         "VkShaderAtomicFloat16: 16-bit floating point shared min/max atomic operation not supported");
1085                 }
1086             }
1087             else
1088             {
1089                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16AtomicMinMax)
1090                 {
1091                     TCU_THROW(
1092                         NotSupportedError,
1093                         "VkShaderAtomicFloat16: 16-bit floating point buffer min/max atomic operation not supported");
1094                 }
1095             }
1096         }
1097         if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1098         {
1099             if (m_shaderType.isSharedLike())
1100             {
1101                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16Atomics)
1102                 {
1103                     TCU_THROW(NotSupportedError,
1104                               "VkShaderAtomicFloat16: 16-bit floating point shared atomic operations not supported");
1105                 }
1106             }
1107             else
1108             {
1109                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16Atomics)
1110                 {
1111                     TCU_THROW(NotSupportedError,
1112                               "VkShaderAtomicFloat16: 16-bit floating point buffer atomic operations not supported");
1113                 }
1114             }
1115         }
1116 #endif // CTS_USES_VULKANSC
1117     }
1118 
1119     if (m_dataType == DATA_TYPE_FLOAT32)
1120     {
1121         ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
1122         if (m_atomicOp == ATOMIC_OP_ADD)
1123         {
1124             if (m_shaderType.isSharedLike())
1125             {
1126                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32AtomicAdd)
1127                 {
1128                     TCU_THROW(NotSupportedError,
1129                               "VkShaderAtomicFloat32: 32-bit floating point shared add atomic operation not supported");
1130                 }
1131             }
1132             else
1133             {
1134                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32AtomicAdd)
1135                 {
1136                     TCU_THROW(NotSupportedError,
1137                               "VkShaderAtomicFloat32: 32-bit floating point buffer add atomic operation not supported");
1138                 }
1139             }
1140         }
1141         if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1142         {
1143             ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1144 #ifndef CTS_USES_VULKANSC
1145             if (m_shaderType.isSharedLike())
1146             {
1147                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat32AtomicMinMax)
1148                 {
1149                     TCU_THROW(
1150                         NotSupportedError,
1151                         "VkShaderAtomicFloat32: 32-bit floating point shared min/max atomic operation not supported");
1152                 }
1153             }
1154             else
1155             {
1156                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat32AtomicMinMax)
1157                 {
1158                     TCU_THROW(
1159                         NotSupportedError,
1160                         "VkShaderAtomicFloat32: 32-bit floating point buffer min/max atomic operation not supported");
1161                 }
1162             }
1163 #endif // CTS_USES_VULKANSC
1164         }
1165         if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1166         {
1167             if (m_shaderType.isSharedLike())
1168             {
1169                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32Atomics)
1170                 {
1171                     TCU_THROW(NotSupportedError,
1172                               "VkShaderAtomicFloat32: 32-bit floating point shared atomic operations not supported");
1173                 }
1174             }
1175             else
1176             {
1177                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32Atomics)
1178                 {
1179                     TCU_THROW(NotSupportedError,
1180                               "VkShaderAtomicFloat32: 32-bit floating point buffer atomic operations not supported");
1181                 }
1182             }
1183         }
1184     }
1185 
1186     if (m_dataType == DATA_TYPE_FLOAT64)
1187     {
1188         ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
1189         if (m_atomicOp == ATOMIC_OP_ADD)
1190         {
1191             if (m_shaderType.isSharedLike())
1192             {
1193                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64AtomicAdd)
1194                 {
1195                     TCU_THROW(NotSupportedError,
1196                               "VkShaderAtomicFloat64: 64-bit floating point shared add atomic operation not supported");
1197                 }
1198             }
1199             else
1200             {
1201                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64AtomicAdd)
1202                 {
1203                     TCU_THROW(NotSupportedError,
1204                               "VkShaderAtomicFloat64: 64-bit floating point buffer add atomic operation not supported");
1205                 }
1206             }
1207         }
1208         if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
1209         {
1210             ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
1211 #ifndef CTS_USES_VULKANSC
1212             if (m_shaderType.isSharedLike())
1213             {
1214                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat64AtomicMinMax)
1215                 {
1216                     TCU_THROW(
1217                         NotSupportedError,
1218                         "VkShaderAtomicFloat64: 64-bit floating point shared min/max atomic operation not supported");
1219                 }
1220             }
1221             else
1222             {
1223                 if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat64AtomicMinMax)
1224                 {
1225                     TCU_THROW(
1226                         NotSupportedError,
1227                         "VkShaderAtomicFloat64: 64-bit floating point buffer min/max atomic operation not supported");
1228                 }
1229             }
1230 #endif // CTS_USES_VULKANSC
1231         }
1232         if (m_atomicOp == ATOMIC_OP_EXCHANGE)
1233         {
1234             if (m_shaderType.isSharedLike())
1235             {
1236                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64Atomics)
1237                 {
1238                     TCU_THROW(NotSupportedError,
1239                               "VkShaderAtomicFloat64: 64-bit floating point shared atomic operations not supported");
1240                 }
1241             }
1242             else
1243             {
1244                 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64Atomics)
1245                 {
1246                     TCU_THROW(NotSupportedError,
1247                               "VkShaderAtomicFloat64: 64-bit floating point buffer atomic operations not supported");
1248                 }
1249             }
1250         }
1251     }
1252 
1253     if (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE)
1254     {
1255         ctx.requireDeviceFunctionality("VK_KHR_buffer_device_address");
1256     }
1257 
1258     checkSupportShader(ctx, m_shaderType.getType());
1259 }
1260 
createShaderSpec(void)1261 void AtomicOperationCase::createShaderSpec(void)
1262 {
1263     const AtomicMemoryType memoryType = m_shaderType.getMemoryType();
1264     const bool isSharedLike           = m_shaderType.isSharedLike();
1265 
1266     // Global declarations.
1267     std::ostringstream shaderTemplateGlobalStream;
1268 
1269     // Structure in use for atomic operations.
1270     shaderTemplateGlobalStream << "${EXTENSIONS}\n"
1271                                << "\n"
1272                                << "struct AtomicStruct\n"
1273                                << "{\n"
1274                                << "    ${DATATYPE} inoutValues[${N}/2];\n"
1275                                << "    ${DATATYPE} inputValues[${N}];\n"
1276                                << "    ${DATATYPE} compareValues[${N}];\n"
1277                                << "    ${DATATYPE} outputValues[${N}];\n"
1278                                << "    int invocationHitCount[${N}];\n"
1279                                << "    int index;\n"
1280                                << "};\n"
1281                                << "\n";
1282 
1283     // The name dance and declarations below will make sure the structure that will be used with atomic operations can be accessed
1284     // as "buf.data", which is the name used in the atomic operation statements.
1285     //
1286     // * When using a buffer directly, RESULT_BUFFER_NAME will be "buf" and the inner struct will be "data".
1287     // * When using a workgroup-shared global variable, the "data" struct will be nested in an auxiliar "buf" struct.
1288     // * When using buffer references, the uniform buffer reference will be called "buf" and its contents "data".
1289     //
1290     if (memoryType != AtomicMemoryType::REFERENCE)
1291     {
1292         shaderTemplateGlobalStream << "layout (set = ${SETIDX}, binding = 0) buffer AtomicBuffer {\n"
1293                                    << "    AtomicStruct data;\n"
1294                                    << "} ${RESULT_BUFFER_NAME};\n"
1295                                    << "\n";
1296 
1297         // When using global shared memory in the compute, task or mesh variants, invocations will use a shared global structure
1298         // instead of a descriptor set as the sources and results of each tested operation.
1299         if (memoryType == AtomicMemoryType::SHARED)
1300         {
1301             shaderTemplateGlobalStream << "shared struct { AtomicStruct data; } buf;\n"
1302                                        << "\n";
1303         }
1304         else if (memoryType == AtomicMemoryType::PAYLOAD)
1305         {
1306             shaderTemplateGlobalStream << "struct TaskData { AtomicStruct data; };\n"
1307                                        << "taskPayloadSharedEXT TaskData buf;\n";
1308         }
1309     }
1310     else
1311     {
1312         shaderTemplateGlobalStream << "layout (buffer_reference) buffer AtomicBuffer {\n"
1313                                    << "    AtomicStruct data;\n"
1314                                    << "};\n"
1315                                    << "\n"
1316                                    << "layout (set = ${SETIDX}, binding = 0) uniform References {\n"
1317                                    << "    AtomicBuffer buf;\n"
1318                                    << "};\n"
1319                                    << "\n";
1320     }
1321 
1322     const auto shaderTemplateGlobalString = shaderTemplateGlobalStream.str();
1323     const tcu::StringTemplate shaderTemplateGlobal(shaderTemplateGlobalString);
1324 
1325     // Shader body for the non-vertex case.
1326     std::ostringstream nonVertexShaderTemplateStream;
1327 
1328     if (isSharedLike)
1329     {
1330         // Invocation zero will initialize the shared structure from the descriptor set.
1331         nonVertexShaderTemplateStream << "if (gl_LocalInvocationIndex == 0u)\n"
1332                                       << "{\n"
1333                                       << "    buf.data = ${RESULT_BUFFER_NAME}.data;\n"
1334                                       << "}\n"
1335                                       << "barrier();\n";
1336     }
1337 
1338     if (m_shaderType.getType() == glu::SHADERTYPE_FRAGMENT)
1339     {
1340         nonVertexShaderTemplateStream << "if (!gl_HelperInvocation) {\n"
1341                                       << "    int idx = atomicAdd(buf.data.index, 1);\n"
1342                                       << "    buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % "
1343                                          "(${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1344                                       << "}\n";
1345     }
1346     else
1347     {
1348         nonVertexShaderTemplateStream << "if (atomicAdd(buf.data.invocationHitCount[0], 1) < ${N})\n"
1349                                       << "{\n"
1350                                       << "    int idx = atomicAdd(buf.data.index, 1);\n"
1351                                       << "    buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % "
1352                                          "(${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1353                                       << "}\n";
1354     }
1355 
1356     if (isSharedLike)
1357     {
1358         // Invocation zero will copy results back to the descriptor set.
1359         nonVertexShaderTemplateStream << "barrier();\n"
1360                                       << "if (gl_LocalInvocationIndex == 0u)\n"
1361                                       << "{\n"
1362                                       << "    ${RESULT_BUFFER_NAME}.data = buf.data;\n"
1363                                       << "}\n";
1364     }
1365 
1366     const auto nonVertexShaderTemplateStreamStr = nonVertexShaderTemplateStream.str();
1367     const tcu::StringTemplate nonVertexShaderTemplateSrc(nonVertexShaderTemplateStreamStr);
1368 
1369     // Shader body for the vertex case.
1370     const tcu::StringTemplate vertexShaderTemplateSrc(
1371         "int idx = gl_VertexIndex;\n"
1372         "if (atomicAdd(buf.data.invocationHitCount[idx], 1) == 0)\n"
1373         "{\n"
1374         "    buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], "
1375         "${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1376         "}\n");
1377 
1378     // Extensions.
1379     std::ostringstream extensions;
1380 
1381     if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1382     {
1383         extensions << "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
1384                    << "#extension GL_EXT_shader_atomic_int64 : enable\n";
1385     }
1386     else if ((m_dataType == DATA_TYPE_FLOAT16) || (m_dataType == DATA_TYPE_FLOAT32) ||
1387              (m_dataType == DATA_TYPE_FLOAT64))
1388     {
1389         extensions << "#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable\n"
1390                    << "#extension GL_EXT_shader_atomic_float : enable\n"
1391                    << "#extension GL_EXT_shader_atomic_float2 : enable\n"
1392                    << "#extension GL_KHR_memory_scope_semantics : enable\n";
1393     }
1394 
1395     if (memoryType == AtomicMemoryType::REFERENCE)
1396     {
1397         extensions << "#extension GL_EXT_buffer_reference : require\n";
1398     }
1399 
1400     // Specializations.
1401     std::map<std::string, std::string> specializations;
1402 
1403     specializations["EXTENSIONS"]  = extensions.str();
1404     specializations["DATATYPE"]    = dataType2Str(m_dataType);
1405     specializations["ATOMICOP"]    = atomicOp2Str(m_atomicOp);
1406     specializations["SETIDX"]      = de::toString((int)EXTRA_RESOURCES_DESCRIPTOR_SET_INDEX);
1407     specializations["N"]           = de::toString((int)NUM_ELEMENTS);
1408     specializations["COMPARE_ARG"] = ((m_atomicOp == ATOMIC_OP_COMP_SWAP) ? "buf.data.compareValues[idx], " : "");
1409     specializations["RESULT_BUFFER_NAME"] = (isSharedLike ? "result" : "buf");
1410 
1411     // Shader spec.
1412     m_shaderSpec.outputs.push_back(Symbol("outData", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
1413     m_shaderSpec.glslVersion        = glu::GLSL_VERSION_450;
1414     m_shaderSpec.globalDeclarations = shaderTemplateGlobal.specialize(specializations);
1415     m_shaderSpec.source =
1416         ((m_shaderType.getType() == glu::SHADERTYPE_VERTEX) ? vertexShaderTemplateSrc.specialize(specializations) :
1417                                                               nonVertexShaderTemplateSrc.specialize(specializations));
1418 
1419     if (isSharedLike)
1420     {
1421         // When using global shared memory, use a single workgroup and an appropriate number of local invocations.
1422         m_shaderSpec.localSizeX = static_cast<int>(NUM_ELEMENTS);
1423     }
1424 }
1425 
addAtomicOperationTests(tcu::TestCaseGroup * atomicOperationTestsGroup)1426 void addAtomicOperationTests(tcu::TestCaseGroup *atomicOperationTestsGroup)
1427 {
1428     tcu::TestContext &testCtx = atomicOperationTestsGroup->getTestContext();
1429 
1430     static const struct
1431     {
1432         glu::ShaderType type;
1433         const char *name;
1434     } shaderTypes[] = {
1435         {glu::SHADERTYPE_VERTEX, "vertex"},
1436         {glu::SHADERTYPE_FRAGMENT, "fragment"},
1437         {glu::SHADERTYPE_GEOMETRY, "geometry"},
1438         {glu::SHADERTYPE_TESSELLATION_CONTROL, "tess_ctrl"},
1439         {glu::SHADERTYPE_TESSELLATION_EVALUATION, "tess_eval"},
1440         {glu::SHADERTYPE_COMPUTE, "compute"},
1441         {glu::SHADERTYPE_TASK, "task"},
1442         {glu::SHADERTYPE_MESH, "mesh"},
1443     };
1444 
1445     static const struct
1446     {
1447         AtomicMemoryType type;
1448         const char *suffix;
1449     } kMemoryTypes[] = {
1450         {AtomicMemoryType::BUFFER, ""},
1451         {AtomicMemoryType::SHARED, "_shared"},
1452         {AtomicMemoryType::REFERENCE, "_reference"},
1453         {AtomicMemoryType::PAYLOAD, "_payload"},
1454     };
1455 
1456     static const struct
1457     {
1458         DataType dataType;
1459         const char *name;
1460     } dataSign[] = {
1461 #ifndef CTS_USES_VULKANSC
1462         // Tests using 16-bit float data
1463         {DATA_TYPE_FLOAT16, "float16"},
1464 #endif // CTS_USES_VULKANSC
1465         // Tests using signed data (int)
1466         {DATA_TYPE_INT32, "signed"},
1467         // Tests using unsigned data (uint)
1468         {DATA_TYPE_UINT32, "unsigned"},
1469         // Tests using 32-bit float data
1470         {DATA_TYPE_FLOAT32, "float32"},
1471         // Tests using 64 bit signed data (int64)
1472         {DATA_TYPE_INT64, "signed64bit"},
1473         // Tests using 64 bit unsigned data (uint64)
1474         {DATA_TYPE_UINT64, "unsigned64bit"},
1475         // Tests using 64-bit float data)
1476         {DATA_TYPE_FLOAT64, "float64"}};
1477 
1478     static const struct
1479     {
1480         AtomicOperation value;
1481         const char *name;
1482     } atomicOp[] = {{ATOMIC_OP_EXCHANGE, "exchange"},
1483                     {ATOMIC_OP_COMP_SWAP, "comp_swap"},
1484                     {ATOMIC_OP_ADD, "add"},
1485                     {ATOMIC_OP_MIN, "min"},
1486                     {ATOMIC_OP_MAX, "max"},
1487                     {ATOMIC_OP_AND, "and"},
1488                     {ATOMIC_OP_OR, "or"},
1489                     {ATOMIC_OP_XOR, "xor"}};
1490 
1491     for (int opNdx = 0; opNdx < DE_LENGTH_OF_ARRAY(atomicOp); opNdx++)
1492     {
1493         for (int signNdx = 0; signNdx < DE_LENGTH_OF_ARRAY(dataSign); signNdx++)
1494         {
1495             for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(shaderTypes); shaderTypeNdx++)
1496             {
1497                 // Only ADD and EXCHANGE are supported on floating-point
1498                 if (dataSign[signNdx].dataType == DATA_TYPE_FLOAT16 ||
1499                     dataSign[signNdx].dataType == DATA_TYPE_FLOAT32 || dataSign[signNdx].dataType == DATA_TYPE_FLOAT64)
1500                 {
1501                     if (atomicOp[opNdx].value != ATOMIC_OP_ADD &&
1502 #ifndef CTS_USES_VULKANSC
1503                         atomicOp[opNdx].value != ATOMIC_OP_MIN && atomicOp[opNdx].value != ATOMIC_OP_MAX &&
1504 #endif // CTS_USES_VULKANSC
1505                         atomicOp[opNdx].value != ATOMIC_OP_EXCHANGE)
1506                     {
1507                         continue;
1508                     }
1509                 }
1510 
1511                 for (int memoryTypeNdx = 0; memoryTypeNdx < DE_LENGTH_OF_ARRAY(kMemoryTypes); ++memoryTypeNdx)
1512                 {
1513                     // Shared memory only available in compute, task and mesh shaders.
1514                     if (kMemoryTypes[memoryTypeNdx].type == AtomicMemoryType::SHARED &&
1515                         shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_COMPUTE &&
1516                         shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_TASK &&
1517                         shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_MESH)
1518                         continue;
1519 
1520                     // Payload memory is only available for atomics in task shaders (in mesh shaders it's read-only)
1521                     if (kMemoryTypes[memoryTypeNdx].type == AtomicMemoryType::PAYLOAD &&
1522                         shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_TASK)
1523                         continue;
1524 
1525                     const std::string name =
1526                         std::string(atomicOp[opNdx].name) + "_" + std::string(dataSign[signNdx].name) + "_" +
1527                         std::string(shaderTypes[shaderTypeNdx].name) + kMemoryTypes[memoryTypeNdx].suffix;
1528 
1529                     atomicOperationTestsGroup->addChild(new AtomicOperationCase(
1530                         testCtx, name.c_str(),
1531                         AtomicShaderType(shaderTypes[shaderTypeNdx].type, kMemoryTypes[memoryTypeNdx].type),
1532                         dataSign[signNdx].dataType, atomicOp[opNdx].value));
1533                 }
1534             }
1535         }
1536     }
1537 }
1538 
1539 } // namespace
1540 
createAtomicOperationTests(tcu::TestContext & testCtx)1541 tcu::TestCaseGroup *createAtomicOperationTests(tcu::TestContext &testCtx)
1542 {
1543     return createTestGroup(testCtx, "atomic_operations", addAtomicOperationTests);
1544 }
1545 
1546 } // namespace shaderexecutor
1547 } // namespace vkt
1548