xref: /aosp_15_r20/external/deqp/modules/gles31/functional/es31fShaderAtomicOpTests.cpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL ES 3.1 Module
3  * -------------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Shader atomic operation tests.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "es31fShaderAtomicOpTests.hpp"
25 #include "gluShaderProgram.hpp"
26 #include "gluShaderUtil.hpp"
27 #include "gluRenderContext.hpp"
28 #include "gluObjectWrapper.hpp"
29 #include "gluProgramInterfaceQuery.hpp"
30 #include "tcuVector.hpp"
31 #include "tcuTestLog.hpp"
32 #include "tcuVectorUtil.hpp"
33 #include "tcuFormatUtil.hpp"
34 #include "deStringUtil.hpp"
35 #include "deRandom.hpp"
36 #include "glwFunctions.hpp"
37 #include "glwEnums.hpp"
38 
39 #include <algorithm>
40 #include <set>
41 
42 namespace deqp
43 {
44 namespace gles31
45 {
46 namespace Functional
47 {
48 
49 using std::set;
50 using std::string;
51 using std::vector;
52 using tcu::TestLog;
53 using tcu::UVec3;
54 using namespace glu;
55 
56 template <typename T, int Size>
product(const tcu::Vector<T,Size> & v)57 static inline T product(const tcu::Vector<T, Size> &v)
58 {
59     T res = v[0];
60     for (int ndx = 1; ndx < Size; ndx++)
61         res *= v[ndx];
62     return res;
63 }
64 
65 class ShaderAtomicOpCase : public TestCase
66 {
67 public:
68     ShaderAtomicOpCase(Context &context, const char *name, const char *funcName, AtomicOperandType operandType,
69                        DataType type, Precision precision, const UVec3 &workGroupSize);
70     ~ShaderAtomicOpCase(void);
71 
72     void init(void);
73     void deinit(void);
74     IterateResult iterate(void);
75 
76 protected:
77     virtual void getInputs(int numValues, int stride, void *inputs) const = 0;
78     virtual bool verify(int numValues, int inputStride, const void *inputs, int outputStride, const void *outputs,
79                         int groupStride, const void *groupOutputs) const  = 0;
80 
81     const string m_funcName;
82     const AtomicOperandType m_operandType;
83     const DataType m_type;
84     const Precision m_precision;
85 
86     const UVec3 m_workGroupSize;
87     const UVec3 m_numWorkGroups;
88 
89     uint32_t m_initialValue;
90 
91 private:
92     ShaderAtomicOpCase(const ShaderAtomicOpCase &other);
93     ShaderAtomicOpCase &operator=(const ShaderAtomicOpCase &other);
94 
95     ShaderProgram *m_program;
96 };
97 
ShaderAtomicOpCase(Context & context,const char * name,const char * funcName,AtomicOperandType operandType,DataType type,Precision precision,const UVec3 & workGroupSize)98 ShaderAtomicOpCase::ShaderAtomicOpCase(Context &context, const char *name, const char *funcName,
99                                        AtomicOperandType operandType, DataType type, Precision precision,
100                                        const UVec3 &workGroupSize)
101     : TestCase(context, name, funcName)
102     , m_funcName(funcName)
103     , m_operandType(operandType)
104     , m_type(type)
105     , m_precision(precision)
106     , m_workGroupSize(workGroupSize)
107     , m_numWorkGroups(4, 4, 4)
108     , m_initialValue(0)
109     , m_program(DE_NULL)
110 {
111 }
112 
~ShaderAtomicOpCase(void)113 ShaderAtomicOpCase::~ShaderAtomicOpCase(void)
114 {
115     ShaderAtomicOpCase::deinit();
116 }
117 
init(void)118 void ShaderAtomicOpCase::init(void)
119 {
120     const bool isSSBO    = m_operandType == ATOMIC_OPERAND_BUFFER_VARIABLE;
121     const char *precName = getPrecisionName(m_precision);
122     const char *typeName = getDataTypeName(m_type);
123 
124     const DataType outType  = isSSBO ? m_type : glu::TYPE_UINT;
125     const char *outTypeName = getDataTypeName(outType);
126 
127     const uint32_t numValues = product(m_workGroupSize) * product(m_numWorkGroups);
128     std::ostringstream src;
129 
130     src << glu::getGLSLVersionDeclaration(getContextTypeGLSLVersion(m_context.getRenderContext().getType())) << "\n"
131         << "layout(local_size_x = " << m_workGroupSize.x() << ", local_size_y = " << m_workGroupSize.y()
132         << ", local_size_z = " << m_workGroupSize.z() << ") in;\n"
133         << "layout(binding = 0) buffer InOut\n"
134         << "{\n"
135         << "    " << precName << " " << typeName << " inputValues[" << numValues << "];\n"
136         << "    " << precName << " " << outTypeName << " outputValues[" << numValues << "];\n"
137         << "    " << (isSSBO ? "coherent " : "") << precName << " " << outTypeName << " groupValues["
138         << product(m_numWorkGroups) << "];\n"
139         << "} sb_inout;\n";
140 
141     if (!isSSBO)
142         src << "shared " << precName << " " << typeName << " s_var;\n";
143 
144     src << "\n"
145         << "void main (void)\n"
146         << "{\n"
147         << "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
148         << "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + "
149            "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
150         << "    uint globalOffs = localSize*globalNdx;\n"
151         << "    uint offset     = globalOffs + gl_LocalInvocationIndex;\n"
152         << "\n";
153 
154     if (isSSBO)
155     {
156         DE_ASSERT(outType == m_type);
157         src << "    sb_inout.outputValues[offset] = " << m_funcName
158             << "(sb_inout.groupValues[globalNdx], sb_inout.inputValues[offset]);\n";
159     }
160     else
161     {
162         const string castBeg      = outType != m_type ? (string(outTypeName) + "(") : string("");
163         const char *const castEnd = outType != m_type ? ")" : "";
164 
165         src << "    if (gl_LocalInvocationIndex == 0u)\n"
166             << "        s_var = " << typeName << "(" << tcu::toHex(m_initialValue) << "u);\n"
167             << "    barrier();\n"
168             << "    " << precName << " " << typeName << " res = " << m_funcName
169             << "(s_var, sb_inout.inputValues[offset]);\n"
170             << "    sb_inout.outputValues[offset] = " << castBeg << "res" << castEnd << ";\n"
171             << "    barrier();\n"
172             << "    if (gl_LocalInvocationIndex == 0u)\n"
173             << "        sb_inout.groupValues[globalNdx] = " << castBeg << "s_var" << castEnd << ";\n";
174     }
175 
176     src << "}\n";
177 
178     DE_ASSERT(!m_program);
179     m_program = new ShaderProgram(m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str()));
180 
181     m_testCtx.getLog() << *m_program;
182 
183     if (!m_program->isOk())
184     {
185         delete m_program;
186         m_program = DE_NULL;
187         throw tcu::TestError("Compile failed");
188     }
189 }
190 
deinit(void)191 void ShaderAtomicOpCase::deinit(void)
192 {
193     delete m_program;
194     m_program = DE_NULL;
195 }
196 
iterate(void)197 ShaderAtomicOpCase::IterateResult ShaderAtomicOpCase::iterate(void)
198 {
199     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
200     const uint32_t program   = m_program->getProgram();
201     const Buffer inoutBuffer(m_context.getRenderContext());
202     const uint32_t blockNdx            = gl.getProgramResourceIndex(program, GL_SHADER_STORAGE_BLOCK, "InOut");
203     const InterfaceBlockInfo blockInfo = getProgramInterfaceBlockInfo(gl, program, GL_SHADER_STORAGE_BLOCK, blockNdx);
204     const uint32_t inVarNdx = gl.getProgramResourceIndex(program, GL_BUFFER_VARIABLE, "InOut.inputValues[0]");
205     const InterfaceVariableInfo inVarInfo = getProgramInterfaceVariableInfo(gl, program, GL_BUFFER_VARIABLE, inVarNdx);
206     const uint32_t outVarNdx = gl.getProgramResourceIndex(program, GL_BUFFER_VARIABLE, "InOut.outputValues[0]");
207     const InterfaceVariableInfo outVarInfo =
208         getProgramInterfaceVariableInfo(gl, program, GL_BUFFER_VARIABLE, outVarNdx);
209     const uint32_t groupVarNdx = gl.getProgramResourceIndex(program, GL_BUFFER_VARIABLE, "InOut.groupValues[0]");
210     const InterfaceVariableInfo groupVarInfo =
211         getProgramInterfaceVariableInfo(gl, program, GL_BUFFER_VARIABLE, groupVarNdx);
212     const uint32_t numValues = product(m_workGroupSize) * product(m_numWorkGroups);
213 
214     TCU_CHECK(inVarInfo.arraySize == numValues && outVarInfo.arraySize == numValues &&
215               groupVarInfo.arraySize == product(m_numWorkGroups));
216 
217     gl.useProgram(program);
218 
219     // Setup buffer.
220     {
221         vector<uint8_t> bufData(blockInfo.dataSize);
222         std::fill(bufData.begin(), bufData.end(), 0);
223 
224         getInputs((int)numValues, (int)inVarInfo.arrayStride, &bufData[0] + inVarInfo.offset);
225 
226         if (m_operandType == ATOMIC_OPERAND_BUFFER_VARIABLE)
227         {
228             for (uint32_t valNdx = 0; valNdx < product(m_numWorkGroups); valNdx++)
229                 *(uint32_t *)(&bufData[0] + groupVarInfo.offset + groupVarInfo.arrayStride * valNdx) = m_initialValue;
230         }
231 
232         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inoutBuffer);
233         gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockInfo.dataSize, &bufData[0], GL_STATIC_READ);
234         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *inoutBuffer);
235         GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
236     }
237 
238     gl.dispatchCompute(m_numWorkGroups.x(), m_numWorkGroups.y(), m_numWorkGroups.z());
239 
240     // Read back and compare
241     {
242         const void *resPtr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, blockInfo.dataSize, GL_MAP_READ_BIT);
243         bool isOk          = true;
244 
245         GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()");
246         TCU_CHECK(resPtr);
247 
248         isOk = verify((int)numValues, (int)inVarInfo.arrayStride, (const uint8_t *)resPtr + inVarInfo.offset,
249                       (int)outVarInfo.arrayStride, (const uint8_t *)resPtr + outVarInfo.offset,
250                       (int)groupVarInfo.arrayStride, (const uint8_t *)resPtr + groupVarInfo.offset);
251 
252         gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
253         GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer()");
254 
255         m_testCtx.setTestResult(isOk ? QP_TEST_RESULT_PASS : QP_TEST_RESULT_FAIL, isOk ? "Pass" : "Comparison failed");
256     }
257 
258     return STOP;
259 }
260 
261 class ShaderAtomicAddCase : public ShaderAtomicOpCase
262 {
263 public:
ShaderAtomicAddCase(Context & context,const char * name,AtomicOperandType operandType,DataType type,Precision precision)264     ShaderAtomicAddCase(Context &context, const char *name, AtomicOperandType operandType, DataType type,
265                         Precision precision)
266         : ShaderAtomicOpCase(context, name, "atomicAdd", operandType, type, precision, UVec3(3, 2, 1))
267     {
268         m_initialValue = 1;
269     }
270 
271 protected:
getInputs(int numValues,int stride,void * inputs) const272     void getInputs(int numValues, int stride, void *inputs) const
273     {
274         de::Random rnd(deStringHash(getName()));
275         const int maxVal = m_precision == PRECISION_LOWP ? 2 : 32;
276         const int minVal = 1;
277 
278         // \todo [2013-09-04 pyry] Negative values!
279 
280         for (int valNdx = 0; valNdx < numValues; valNdx++)
281             *(int *)((uint8_t *)inputs + stride * valNdx) = rnd.getInt(minVal, maxVal);
282     }
283 
verify(int numValues,int inputStride,const void * inputs,int outputStride,const void * outputs,int groupStride,const void * groupOutputs) const284     bool verify(int numValues, int inputStride, const void *inputs, int outputStride, const void *outputs,
285                 int groupStride, const void *groupOutputs) const
286     {
287         const int workGroupSize = (int)product(m_workGroupSize);
288         const int numWorkGroups = numValues / workGroupSize;
289 
290         for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
291         {
292             const int groupOffset = groupNdx * workGroupSize;
293             const int groupOutput = *(const int32_t *)((const uint8_t *)groupOutputs + groupNdx * groupStride);
294             set<int> outValues;
295             bool maxFound = false;
296             int valueSum  = (int)m_initialValue;
297 
298             for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
299             {
300                 const int inputValue =
301                     *(const int32_t *)((const uint8_t *)inputs + inputStride * (groupOffset + localNdx));
302                 valueSum += inputValue;
303             }
304 
305             if (groupOutput != valueSum)
306             {
307                 m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ": expected sum "
308                                    << valueSum << ", got " << groupOutput << TestLog::EndMessage;
309                 return false;
310             }
311 
312             for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
313             {
314                 const int inputValue =
315                     *(const int32_t *)((const uint8_t *)inputs + inputStride * (groupOffset + localNdx));
316                 const int outputValue =
317                     *(const int32_t *)((const uint8_t *)outputs + outputStride * (groupOffset + localNdx));
318 
319                 if (!de::inRange(outputValue, (int)m_initialValue, valueSum - inputValue))
320                 {
321                     m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation "
322                                        << localNdx << ": expected value in range [" << m_initialValue << ", "
323                                        << (valueSum - inputValue) << "], got " << outputValue << TestLog::EndMessage;
324                     return false;
325                 }
326 
327                 if (outValues.find(outputValue) != outValues.end())
328                 {
329                     m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation "
330                                        << localNdx << ": found duplicate value " << outputValue << TestLog::EndMessage;
331                     return false;
332                 }
333 
334                 outValues.insert(outputValue);
335                 if (outputValue == valueSum - inputValue)
336                     maxFound = true;
337             }
338 
339             if (!maxFound)
340             {
341                 m_testCtx.getLog() << TestLog::Message << "ERROR: could not find maximum expected value from group "
342                                    << groupNdx << TestLog::EndMessage;
343                 return false;
344             }
345 
346             if (outValues.find((int)m_initialValue) == outValues.end())
347             {
348                 m_testCtx.getLog() << TestLog::Message << "ERROR: could not find initial value from group " << groupNdx
349                                    << TestLog::EndMessage;
350                 return false;
351             }
352         }
353 
354         return true;
355     }
356 };
357 
getPrecisionNumIntegerBits(glu::Precision precision)358 static int getPrecisionNumIntegerBits(glu::Precision precision)
359 {
360     switch (precision)
361     {
362     case glu::PRECISION_HIGHP:
363         return 32;
364     case glu::PRECISION_MEDIUMP:
365         return 16;
366     case glu::PRECISION_LOWP:
367         return 9;
368     default:
369         DE_ASSERT(false);
370         return 0;
371     }
372 }
373 
getPrecisionMask(int numPreciseBits)374 static uint32_t getPrecisionMask(int numPreciseBits)
375 {
376     // \note: bit shift with larger or equal than var length is undefined, use 64 bit ints
377     return (uint32_t)((((uint64_t)1u) << numPreciseBits) - 1);
378 }
379 
intEqualsAfterUintCast(int32_t value,uint32_t casted,glu::Precision precision)380 static bool intEqualsAfterUintCast(int32_t value, uint32_t casted, glu::Precision precision)
381 {
382     // Bit format of 'casted' = [ uint -> highp uint promotion bits (0) ] [ sign extend bits (s) ] [ value bits ]
383     //                                                                                             |--min len---|
384     //                                                                    |---------------signed length---------|
385     //                          |-------------------------------- highp uint length ----------------------------|
386 
387     const uint32_t reference   = (uint32_t)value;
388     const int signBitOn        = value < 0;
389     const int numPreciseBits   = getPrecisionNumIntegerBits(precision);
390     const uint32_t preciseMask = getPrecisionMask(numPreciseBits);
391 
392     // Lowest N bits must match, N = minimum precision
393     if ((reference & preciseMask) != (casted & preciseMask))
394         return false;
395 
396     // Other lowest bits must match the sign and the remaining (topmost) if any must be 0
397     for (int signedIntegerLength = numPreciseBits; signedIntegerLength <= 32; ++signedIntegerLength)
398     {
399         const uint32_t signBits = (signBitOn) ? (getPrecisionMask(signedIntegerLength)) : (0u);
400 
401         if ((signBits & ~preciseMask) == (casted & ~preciseMask))
402             return true;
403     }
404     return false;
405 }
406 
containsAfterUintCast(const std::set<int32_t> & haystack,uint32_t needle,glu::Precision precision)407 static bool containsAfterUintCast(const std::set<int32_t> &haystack, uint32_t needle, glu::Precision precision)
408 {
409     for (std::set<int32_t>::const_iterator it = haystack.begin(); it != haystack.end(); ++it)
410         if (intEqualsAfterUintCast(*it, needle, precision))
411             return true;
412     return false;
413 }
414 
containsAfterUintCast(const std::set<uint32_t> & haystack,int32_t needle,glu::Precision precision)415 static bool containsAfterUintCast(const std::set<uint32_t> &haystack, int32_t needle, glu::Precision precision)
416 {
417     for (std::set<uint32_t>::const_iterator it = haystack.begin(); it != haystack.end(); ++it)
418         if (intEqualsAfterUintCast(needle, *it, precision))
419             return true;
420     return false;
421 }
422 
423 class ShaderAtomicMinCase : public ShaderAtomicOpCase
424 {
425 public:
ShaderAtomicMinCase(Context & context,const char * name,AtomicOperandType operandType,DataType type,Precision precision)426     ShaderAtomicMinCase(Context &context, const char *name, AtomicOperandType operandType, DataType type,
427                         Precision precision)
428         : ShaderAtomicOpCase(context, name, "atomicMin", operandType, type, precision, UVec3(3, 2, 1))
429     {
430         m_initialValue = m_precision == PRECISION_LOWP ? 100 : 1000;
431     }
432 
433 protected:
getInputs(int numValues,int stride,void * inputs) const434     void getInputs(int numValues, int stride, void *inputs) const
435     {
436         de::Random rnd(deStringHash(getName()));
437         const bool isSigned = m_type == TYPE_INT;
438         const int maxVal    = m_precision == PRECISION_LOWP ? 100 : 1000;
439         const int minVal    = isSigned ? -maxVal : 0;
440 
441         for (int valNdx = 0; valNdx < numValues; valNdx++)
442             *(int *)((uint8_t *)inputs + stride * valNdx) = rnd.getInt(minVal, maxVal);
443     }
444 
verify(int numValues,int inputStride,const void * inputs,int outputStride,const void * outputs,int groupStride,const void * groupOutputs) const445     bool verify(int numValues, int inputStride, const void *inputs, int outputStride, const void *outputs,
446                 int groupStride, const void *groupOutputs) const
447     {
448         const int workGroupSize = (int)product(m_workGroupSize);
449         const int numWorkGroups = numValues / workGroupSize;
450         bool anyError           = false;
451 
452         for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
453         {
454             const int groupOffset      = groupNdx * workGroupSize;
455             const uint32_t groupOutput = *(const uint32_t *)((const uint8_t *)groupOutputs + groupNdx * groupStride);
456             set<int32_t> inValues;
457             set<uint32_t> outValues;
458             int minValue = (int)m_initialValue;
459 
460             for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
461             {
462                 const int32_t inputValue =
463                     *(const int32_t *)((const uint8_t *)inputs + inputStride * (groupOffset + localNdx));
464                 inValues.insert(inputValue);
465                 minValue = de::min(inputValue, minValue);
466             }
467 
468             if (!intEqualsAfterUintCast(minValue, groupOutput, m_precision))
469             {
470                 m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ": expected minimum "
471                                    << minValue << " (" << tcu::Format::Hex<8>((uint32_t)minValue) << ")"
472                                    << ", got " << groupOutput << " (" << tcu::Format::Hex<8>(groupOutput) << ")"
473                                    << TestLog::EndMessage;
474                 anyError = true;
475             }
476 
477             for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
478             {
479                 const uint32_t outputValue =
480                     *(const uint32_t *)((const uint8_t *)outputs + outputStride * (groupOffset + localNdx));
481 
482                 if (!containsAfterUintCast(inValues, outputValue, m_precision) &&
483                     !intEqualsAfterUintCast((int32_t)m_initialValue, outputValue, m_precision))
484                 {
485                     m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation "
486                                        << localNdx << ": found unexpected value " << outputValue << " ("
487                                        << tcu::Format::Hex<8>(outputValue) << ")" << TestLog::EndMessage;
488                     anyError = true;
489                 }
490 
491                 outValues.insert(outputValue);
492             }
493 
494             if (!containsAfterUintCast(outValues, (int)m_initialValue, m_precision))
495             {
496                 m_testCtx.getLog() << TestLog::Message << "ERROR: could not find initial value from group " << groupNdx
497                                    << TestLog::EndMessage;
498                 anyError = true;
499             }
500         }
501 
502         return !anyError;
503     }
504 };
505 
506 class ShaderAtomicMaxCase : public ShaderAtomicOpCase
507 {
508 public:
ShaderAtomicMaxCase(Context & context,const char * name,AtomicOperandType operandType,DataType type,Precision precision)509     ShaderAtomicMaxCase(Context &context, const char *name, AtomicOperandType operandType, DataType type,
510                         Precision precision)
511         : ShaderAtomicOpCase(context, name, "atomicMax", operandType, type, precision, UVec3(3, 2, 1))
512     {
513         const bool isSigned = m_type == TYPE_INT;
514         m_initialValue      = isSigned ? (m_precision == PRECISION_LOWP ? -100 : -1000) : 0;
515     }
516 
517 protected:
getInputs(int numValues,int stride,void * inputs) const518     void getInputs(int numValues, int stride, void *inputs) const
519     {
520         de::Random rnd(deStringHash(getName()));
521         const bool isSigned = m_type == TYPE_INT;
522         const int maxVal    = m_precision == PRECISION_LOWP ? 100 : 1000;
523         const int minVal    = isSigned ? -maxVal : 0;
524 
525         for (int valNdx = 0; valNdx < numValues; valNdx++)
526             *(int *)((uint8_t *)inputs + stride * valNdx) = rnd.getInt(minVal, maxVal);
527     }
528 
verify(int numValues,int inputStride,const void * inputs,int outputStride,const void * outputs,int groupStride,const void * groupOutputs) const529     bool verify(int numValues, int inputStride, const void *inputs, int outputStride, const void *outputs,
530                 int groupStride, const void *groupOutputs) const
531     {
532         const int workGroupSize = (int)product(m_workGroupSize);
533         const int numWorkGroups = numValues / workGroupSize;
534         bool anyError           = false;
535 
536         for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
537         {
538             const int groupOffset      = groupNdx * workGroupSize;
539             const uint32_t groupOutput = *(const uint32_t *)((const uint8_t *)groupOutputs + groupNdx * groupStride);
540             set<int> inValues;
541             set<uint32_t> outValues;
542             int maxValue = (int)m_initialValue;
543 
544             for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
545             {
546                 const int32_t inputValue =
547                     *(const int32_t *)((const uint8_t *)inputs + inputStride * (groupOffset + localNdx));
548                 inValues.insert(inputValue);
549                 maxValue = de::max(maxValue, inputValue);
550             }
551 
552             if (!intEqualsAfterUintCast(maxValue, groupOutput, m_precision))
553             {
554                 m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ": expected maximum "
555                                    << maxValue << " (" << tcu::Format::Hex<8>((uint32_t)maxValue) << ")"
556                                    << ", got " << groupOutput << " (" << tcu::Format::Hex<8>(groupOutput) << ")"
557                                    << TestLog::EndMessage;
558                 anyError = true;
559             }
560 
561             for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
562             {
563                 const uint32_t outputValue =
564                     *(const uint32_t *)((const uint8_t *)outputs + outputStride * (groupOffset + localNdx));
565 
566                 if (!containsAfterUintCast(inValues, outputValue, m_precision) &&
567                     !intEqualsAfterUintCast((int32_t)m_initialValue, outputValue, m_precision))
568                 {
569                     m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation "
570                                        << localNdx << ": found unexpected value " << outputValue << " ("
571                                        << tcu::Format::Hex<8>(outputValue) << ")" << TestLog::EndMessage;
572                     anyError = true;
573                 }
574 
575                 outValues.insert(outputValue);
576             }
577 
578             if (!containsAfterUintCast(outValues, (int)m_initialValue, m_precision))
579             {
580                 m_testCtx.getLog() << TestLog::Message << "ERROR: could not find initial value from group " << groupNdx
581                                    << TestLog::EndMessage;
582                 anyError = true;
583             }
584         }
585 
586         return !anyError;
587     }
588 };
589 
590 class ShaderAtomicAndCase : public ShaderAtomicOpCase
591 {
592 public:
ShaderAtomicAndCase(Context & context,const char * name,AtomicOperandType operandType,DataType type,Precision precision)593     ShaderAtomicAndCase(Context &context, const char *name, AtomicOperandType operandType, DataType type,
594                         Precision precision)
595         : ShaderAtomicOpCase(context, name, "atomicAnd", operandType, type, precision, UVec3(3, 2, 1))
596     {
597         const int numBits        = m_precision == PRECISION_HIGHP ? 32 : m_precision == PRECISION_MEDIUMP ? 16 : 8;
598         const uint32_t valueMask = numBits == 32 ? ~0u : (1u << numBits) - 1u;
599         m_initialValue = ~((1u << (numBits - 1u)) | 1u) & valueMask; // All bits except lowest and highest set.
600     }
601 
602 protected:
getInputs(int numValues,int stride,void * inputs) const603     void getInputs(int numValues, int stride, void *inputs) const
604     {
605         de::Random rnd(deStringHash(getName()));
606         const int workGroupSize  = (int)product(m_workGroupSize);
607         const int numWorkGroups  = numValues / workGroupSize;
608         const int numBits        = m_precision == PRECISION_HIGHP ? 32 : m_precision == PRECISION_MEDIUMP ? 16 : 8;
609         const uint32_t valueMask = numBits == 32 ? ~0u : (1u << numBits) - 1u;
610 
611         for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
612         {
613             const int groupOffset    = groupNdx * workGroupSize;
614             const uint32_t groupMask = 1 << rnd.getInt(0, numBits - 2); // One bit is always set.
615 
616             for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
617                 *(uint32_t *)((uint8_t *)inputs + stride * (groupOffset + localNdx)) =
618                     (rnd.getUint32() & valueMask) | groupMask;
619         }
620     }
621 
verify(int numValues,int inputStride,const void * inputs,int outputStride,const void * outputs,int groupStride,const void * groupOutputs) const622     bool verify(int numValues, int inputStride, const void *inputs, int outputStride, const void *outputs,
623                 int groupStride, const void *groupOutputs) const
624     {
625         const int workGroupSize    = (int)product(m_workGroupSize);
626         const int numWorkGroups    = numValues / workGroupSize;
627         const int numBits          = m_precision == PRECISION_HIGHP ? 32 : m_precision == PRECISION_MEDIUMP ? 16 : 8;
628         const uint32_t compareMask = (m_type == TYPE_UINT || numBits == 32) ? ~0u : (1u << numBits) - 1u;
629 
630         for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
631         {
632             const int groupOffset      = groupNdx * workGroupSize;
633             const uint32_t groupOutput = *(const uint32_t *)((const uint8_t *)groupOutputs + groupNdx * groupStride);
634             uint32_t expectedValue     = m_initialValue;
635 
636             for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
637             {
638                 const uint32_t inputValue =
639                     *(const uint32_t *)((const uint8_t *)inputs + inputStride * (groupOffset + localNdx));
640                 expectedValue &= inputValue;
641             }
642 
643             if ((groupOutput & compareMask) != (expectedValue & compareMask))
644             {
645                 m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ": expected "
646                                    << tcu::toHex(expectedValue) << ", got " << tcu::toHex(groupOutput)
647                                    << TestLog::EndMessage;
648                 return false;
649             }
650 
651             for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
652             {
653                 const uint32_t outputValue =
654                     *(const uint32_t *)((const uint8_t *)outputs + outputStride * (groupOffset + localNdx));
655 
656                 if ((compareMask & (outputValue & ~m_initialValue)) != 0)
657                 {
658                     m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation "
659                                        << localNdx << ": found unexpected value " << tcu::toHex(outputValue)
660                                        << TestLog::EndMessage;
661                     return false;
662                 }
663             }
664         }
665 
666         return true;
667     }
668 };
669 
670 class ShaderAtomicOrCase : public ShaderAtomicOpCase
671 {
672 public:
ShaderAtomicOrCase(Context & context,const char * name,AtomicOperandType operandType,DataType type,Precision precision)673     ShaderAtomicOrCase(Context &context, const char *name, AtomicOperandType operandType, DataType type,
674                        Precision precision)
675         : ShaderAtomicOpCase(context, name, "atomicOr", operandType, type, precision, UVec3(3, 2, 1))
676     {
677         m_initialValue = 1u; // Lowest bit set.
678     }
679 
680 protected:
getInputs(int numValues,int stride,void * inputs) const681     void getInputs(int numValues, int stride, void *inputs) const
682     {
683         de::Random rnd(deStringHash(getName()));
684         const int workGroupSize = (int)product(m_workGroupSize);
685         const int numWorkGroups = numValues / workGroupSize;
686         const int numBits       = m_precision == PRECISION_HIGHP ? 32 : m_precision == PRECISION_MEDIUMP ? 16 : 8;
687 
688         for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
689         {
690             const int groupOffset = groupNdx * workGroupSize;
691 
692             for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
693                 *(uint32_t *)((uint8_t *)inputs + stride * (groupOffset + localNdx)) = 1u << rnd.getInt(0, numBits - 1);
694         }
695     }
696 
verify(int numValues,int inputStride,const void * inputs,int outputStride,const void * outputs,int groupStride,const void * groupOutputs) const697     bool verify(int numValues, int inputStride, const void *inputs, int outputStride, const void *outputs,
698                 int groupStride, const void *groupOutputs) const
699     {
700         const int workGroupSize    = (int)product(m_workGroupSize);
701         const int numWorkGroups    = numValues / workGroupSize;
702         const int numBits          = m_precision == PRECISION_HIGHP ? 32 : m_precision == PRECISION_MEDIUMP ? 16 : 8;
703         const uint32_t compareMask = (m_type == TYPE_UINT || numBits == 32) ? ~0u : (1u << numBits) - 1u;
704 
705         for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
706         {
707             const int groupOffset      = groupNdx * workGroupSize;
708             const uint32_t groupOutput = *(const uint32_t *)((const uint8_t *)groupOutputs + groupNdx * groupStride);
709             uint32_t expectedValue     = m_initialValue;
710 
711             for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
712             {
713                 const uint32_t inputValue =
714                     *(const uint32_t *)((const uint8_t *)inputs + inputStride * (groupOffset + localNdx));
715                 expectedValue |= inputValue;
716             }
717 
718             if ((groupOutput & compareMask) != (expectedValue & compareMask))
719             {
720                 m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ": expected "
721                                    << tcu::toHex(expectedValue) << ", got " << tcu::toHex(groupOutput)
722                                    << TestLog::EndMessage;
723                 return false;
724             }
725 
726             for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
727             {
728                 const uint32_t outputValue =
729                     *(const uint32_t *)((const uint8_t *)outputs + outputStride * (groupOffset + localNdx));
730 
731                 if ((compareMask & (outputValue & m_initialValue)) == 0)
732                 {
733                     m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation "
734                                        << localNdx << ": found unexpected value " << tcu::toHex(outputValue)
735                                        << TestLog::EndMessage;
736                     return false;
737                 }
738             }
739         }
740 
741         return true;
742     }
743 };
744 
745 class ShaderAtomicXorCase : public ShaderAtomicOpCase
746 {
747 public:
ShaderAtomicXorCase(Context & context,const char * name,AtomicOperandType operandType,DataType type,Precision precision)748     ShaderAtomicXorCase(Context &context, const char *name, AtomicOperandType operandType, DataType type,
749                         Precision precision)
750         : ShaderAtomicOpCase(context, name, "atomicXor", operandType, type, precision, UVec3(3, 2, 1))
751     {
752         m_initialValue = 0;
753     }
754 
755 protected:
getInputs(int numValues,int stride,void * inputs) const756     void getInputs(int numValues, int stride, void *inputs) const
757     {
758         de::Random rnd(deStringHash(getName()));
759         const int workGroupSize = (int)product(m_workGroupSize);
760         const int numWorkGroups = numValues / workGroupSize;
761 
762         for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
763         {
764             const int groupOffset = groupNdx * workGroupSize;
765 
766             // First uses random bit-pattern.
767             *(uint32_t *)((uint8_t *)inputs + stride * (groupOffset)) = rnd.getUint32();
768 
769             // Rest have either all or no bits set.
770             for (int localNdx = 1; localNdx < workGroupSize; localNdx++)
771                 *(uint32_t *)((uint8_t *)inputs + stride * (groupOffset + localNdx)) = rnd.getBool() ? ~0u : 0u;
772         }
773     }
774 
verify(int numValues,int inputStride,const void * inputs,int outputStride,const void * outputs,int groupStride,const void * groupOutputs) const775     bool verify(int numValues, int inputStride, const void *inputs, int outputStride, const void *outputs,
776                 int groupStride, const void *groupOutputs) const
777     {
778         const int workGroupSize    = (int)product(m_workGroupSize);
779         const int numWorkGroups    = numValues / workGroupSize;
780         const int numBits          = m_precision == PRECISION_HIGHP ? 32 : m_precision == PRECISION_MEDIUMP ? 16 : 8;
781         const uint32_t compareMask = numBits == 32 ? ~0u : (1u << numBits) - 1u;
782 
783         for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
784         {
785             const int groupOffset      = groupNdx * workGroupSize;
786             const uint32_t groupOutput = *(const uint32_t *)((const uint8_t *)groupOutputs + groupNdx * groupStride);
787             const uint32_t randomValue = *(const int32_t *)((const uint8_t *)inputs + inputStride * groupOffset);
788             const uint32_t expected0   = randomValue ^ 0u;
789             const uint32_t expected1   = randomValue ^ ~0u;
790             int numXorZeros            = (m_initialValue == 0) ? 1 : 0;
791 
792             for (int localNdx = 1; localNdx < workGroupSize; localNdx++)
793             {
794                 const uint32_t inputValue =
795                     *(const uint32_t *)((const uint8_t *)inputs + inputStride * (groupOffset + localNdx));
796                 if (inputValue == 0)
797                     numXorZeros += 1;
798             }
799 
800             const uint32_t expected = (numXorZeros % 2 == 0) ? expected0 : expected1;
801 
802             if ((groupOutput & compareMask) != (expected & compareMask))
803             {
804                 m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ": expected "
805                                    << tcu::toHex(expected0) << " or " << tcu::toHex(expected1) << " (compare mask "
806                                    << tcu::toHex(compareMask) << "), got " << tcu::toHex(groupOutput)
807                                    << TestLog::EndMessage;
808                 return false;
809             }
810 
811             for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
812             {
813                 const uint32_t outputValue =
814                     *(const uint32_t *)((const uint8_t *)outputs + outputStride * (groupOffset + localNdx));
815 
816                 if ((outputValue & compareMask) != 0 && (outputValue & compareMask) != compareMask &&
817                     (outputValue & compareMask) != (expected0 & compareMask) &&
818                     (outputValue & compareMask) != (expected1 & compareMask))
819                 {
820                     m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation "
821                                        << localNdx << ": found unexpected value " << tcu::toHex(outputValue)
822                                        << TestLog::EndMessage;
823                     return false;
824                 }
825             }
826         }
827 
828         return true;
829     }
830 };
831 
832 class ShaderAtomicExchangeCase : public ShaderAtomicOpCase
833 {
834 public:
ShaderAtomicExchangeCase(Context & context,const char * name,AtomicOperandType operandType,DataType type,Precision precision)835     ShaderAtomicExchangeCase(Context &context, const char *name, AtomicOperandType operandType, DataType type,
836                              Precision precision)
837         : ShaderAtomicOpCase(context, name, "atomicExchange", operandType, type, precision, UVec3(3, 2, 1))
838     {
839         m_initialValue = 0;
840     }
841 
842 protected:
getInputs(int numValues,int stride,void * inputs) const843     void getInputs(int numValues, int stride, void *inputs) const
844     {
845         const int workGroupSize = (int)product(m_workGroupSize);
846         const int numWorkGroups = numValues / workGroupSize;
847 
848         for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
849         {
850             const int groupOffset = groupNdx * workGroupSize;
851 
852             for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
853                 *(int *)((uint8_t *)inputs + stride * (groupOffset + localNdx)) = localNdx + 1;
854         }
855     }
856 
verify(int numValues,int inputStride,const void * inputs,int outputStride,const void * outputs,int groupStride,const void * groupOutputs) const857     bool verify(int numValues, int inputStride, const void *inputs, int outputStride, const void *outputs,
858                 int groupStride, const void *groupOutputs) const
859     {
860         const int workGroupSize = (int)product(m_workGroupSize);
861         const int numWorkGroups = numValues / workGroupSize;
862 
863         DE_UNREF(inputStride && inputs);
864 
865         for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
866         {
867             const int groupOffset = groupNdx * workGroupSize;
868             const int groupOutput = *(const int32_t *)((const uint8_t *)groupOutputs + groupNdx * groupStride);
869             set<int> usedValues;
870 
871             for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
872             {
873                 const int outputValue =
874                     *(const int32_t *)((const uint8_t *)outputs + outputStride * (groupOffset + localNdx));
875 
876                 if (!de::inRange(outputValue, 0, workGroupSize) || usedValues.find(outputValue) != usedValues.end())
877                 {
878                     m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation "
879                                        << localNdx << ": found unexpected value " << outputValue << TestLog::EndMessage;
880                     return false;
881                 }
882                 usedValues.insert(outputValue);
883             }
884 
885             if (!de::inRange(groupOutput, 0, workGroupSize) || usedValues.find(groupOutput) != usedValues.end())
886             {
887                 m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ": unexpected final value"
888                                    << groupOutput << TestLog::EndMessage;
889                 return false;
890             }
891         }
892 
893         return true;
894     }
895 };
896 
897 class ShaderAtomicCompSwapCase : public TestCase
898 {
899 public:
900     ShaderAtomicCompSwapCase(Context &context, const char *name, AtomicOperandType operandType, DataType type,
901                              Precision precision);
902     ~ShaderAtomicCompSwapCase(void);
903 
904     void init(void);
905     void deinit(void);
906     IterateResult iterate(void);
907 
908 protected:
909 private:
910     ShaderAtomicCompSwapCase(const ShaderAtomicCompSwapCase &other);
911     ShaderAtomicCompSwapCase &operator=(const ShaderAtomicCompSwapCase &other);
912 
913     const AtomicOperandType m_operandType;
914     const DataType m_type;
915     const Precision m_precision;
916 
917     const UVec3 m_workGroupSize;
918     const UVec3 m_numWorkGroups;
919 
920     ShaderProgram *m_program;
921 };
922 
ShaderAtomicCompSwapCase(Context & context,const char * name,AtomicOperandType operandType,DataType type,Precision precision)923 ShaderAtomicCompSwapCase::ShaderAtomicCompSwapCase(Context &context, const char *name, AtomicOperandType operandType,
924                                                    DataType type, Precision precision)
925     : TestCase(context, name, "atomicCompSwap() Test")
926     , m_operandType(operandType)
927     , m_type(type)
928     , m_precision(precision)
929     , m_workGroupSize(3, 2, 1)
930     , m_numWorkGroups(4, 4, 4)
931     , m_program(DE_NULL)
932 {
933 }
934 
~ShaderAtomicCompSwapCase(void)935 ShaderAtomicCompSwapCase::~ShaderAtomicCompSwapCase(void)
936 {
937     ShaderAtomicCompSwapCase::deinit();
938 }
939 
init(void)940 void ShaderAtomicCompSwapCase::init(void)
941 {
942     const bool isSSBO        = m_operandType == ATOMIC_OPERAND_BUFFER_VARIABLE;
943     const char *precName     = getPrecisionName(m_precision);
944     const char *typeName     = getDataTypeName(m_type);
945     const uint32_t numValues = product(m_workGroupSize) * product(m_numWorkGroups);
946     std::ostringstream src;
947 
948     src << "#version 310 es\n"
949         << "layout(local_size_x = " << m_workGroupSize.x() << ", local_size_y = " << m_workGroupSize.y()
950         << ", local_size_z = " << m_workGroupSize.z() << ") in;\n"
951         << "layout(binding = 0) buffer InOut\n"
952         << "{\n"
953         << "    " << precName << " " << typeName << " compareValues[" << numValues << "];\n"
954         << "    " << precName << " " << typeName << " exchangeValues[" << numValues << "];\n"
955         << "    " << precName << " " << typeName << " outputValues[" << numValues << "];\n"
956         << "    " << (isSSBO ? "coherent " : "") << precName << " " << typeName << " groupValues["
957         << product(m_numWorkGroups) << "];\n"
958         << "} sb_inout;\n";
959 
960     if (!isSSBO)
961         src << "shared " << precName << " " << typeName << " s_var;\n";
962 
963     src << "\n"
964         << "void main (void)\n"
965         << "{\n"
966         << "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
967         << "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + "
968            "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
969         << "    uint globalOffs = localSize*globalNdx;\n"
970         << "    uint offset     = globalOffs + gl_LocalInvocationIndex;\n"
971         << "\n";
972 
973     if (!isSSBO)
974     {
975         src << "    if (gl_LocalInvocationIndex == 0u)\n"
976             << "        s_var = " << typeName << "(" << 0 << ");\n"
977             << "\n";
978     }
979 
980     src << "    " << precName << " " << typeName << " compare = sb_inout.compareValues[offset];\n"
981         << "    " << precName << " " << typeName << " exchange = sb_inout.exchangeValues[offset];\n"
982         << "    " << precName << " " << typeName << " result;\n"
983         << "    bool swapDone = false;\n"
984         << "\n"
985         << "    for (uint ndx = 0u; ndx < localSize; ndx++)\n"
986         << "    {\n"
987         << "        barrier();\n"
988         << "        if (!swapDone)\n"
989         << "        {\n"
990         << "            result = atomicCompSwap(" << (isSSBO ? "sb_inout.groupValues[globalNdx]" : "s_var")
991         << ", compare, exchange);\n"
992         << "            if (result == compare)\n"
993         << "                swapDone = true;\n"
994         << "        }\n"
995         << "    }\n"
996         << "\n"
997         << "    sb_inout.outputValues[offset] = result;\n";
998 
999     if (!isSSBO)
1000     {
1001         src << "    barrier();\n"
1002             << "    if (gl_LocalInvocationIndex == 0u)\n"
1003             << "        sb_inout.groupValues[globalNdx] = s_var;\n";
1004     }
1005 
1006     src << "}\n";
1007 
1008     DE_ASSERT(!m_program);
1009     m_program = new ShaderProgram(m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str()));
1010 
1011     m_testCtx.getLog() << *m_program;
1012 
1013     if (!m_program->isOk())
1014     {
1015         delete m_program;
1016         m_program = DE_NULL;
1017         throw tcu::TestError("Compile failed");
1018     }
1019 }
1020 
deinit(void)1021 void ShaderAtomicCompSwapCase::deinit(void)
1022 {
1023     delete m_program;
1024     m_program = DE_NULL;
1025 }
1026 
iterate(void)1027 ShaderAtomicOpCase::IterateResult ShaderAtomicCompSwapCase::iterate(void)
1028 {
1029     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
1030     const uint32_t program   = m_program->getProgram();
1031     const Buffer inoutBuffer(m_context.getRenderContext());
1032     const uint32_t blockNdx            = gl.getProgramResourceIndex(program, GL_SHADER_STORAGE_BLOCK, "InOut");
1033     const InterfaceBlockInfo blockInfo = getProgramInterfaceBlockInfo(gl, program, GL_SHADER_STORAGE_BLOCK, blockNdx);
1034     const uint32_t cmpVarNdx = gl.getProgramResourceIndex(program, GL_BUFFER_VARIABLE, "InOut.compareValues[0]");
1035     const InterfaceVariableInfo cmpVarInfo =
1036         getProgramInterfaceVariableInfo(gl, program, GL_BUFFER_VARIABLE, cmpVarNdx);
1037     const uint32_t exhVarNdx = gl.getProgramResourceIndex(program, GL_BUFFER_VARIABLE, "InOut.exchangeValues[0]");
1038     const InterfaceVariableInfo exhVarInfo =
1039         getProgramInterfaceVariableInfo(gl, program, GL_BUFFER_VARIABLE, exhVarNdx);
1040     const uint32_t outVarNdx = gl.getProgramResourceIndex(program, GL_BUFFER_VARIABLE, "InOut.outputValues[0]");
1041     const InterfaceVariableInfo outVarInfo =
1042         getProgramInterfaceVariableInfo(gl, program, GL_BUFFER_VARIABLE, outVarNdx);
1043     const uint32_t groupVarNdx = gl.getProgramResourceIndex(program, GL_BUFFER_VARIABLE, "InOut.groupValues[0]");
1044     const InterfaceVariableInfo groupVarInfo =
1045         getProgramInterfaceVariableInfo(gl, program, GL_BUFFER_VARIABLE, groupVarNdx);
1046     const uint32_t numValues = product(m_workGroupSize) * product(m_numWorkGroups);
1047 
1048     TCU_CHECK(cmpVarInfo.arraySize == numValues && exhVarInfo.arraySize == numValues &&
1049               outVarInfo.arraySize == numValues && groupVarInfo.arraySize == product(m_numWorkGroups));
1050 
1051     gl.useProgram(program);
1052 
1053     // \todo [2013-09-05 pyry] Use randomized input values!
1054 
1055     // Setup buffer.
1056     {
1057         const uint32_t workGroupSize = product(m_workGroupSize);
1058         vector<uint8_t> bufData(blockInfo.dataSize);
1059 
1060         std::fill(bufData.begin(), bufData.end(), 0);
1061 
1062         for (uint32_t ndx = 0; ndx < numValues; ndx++)
1063             *(uint32_t *)(&bufData[0] + cmpVarInfo.offset + cmpVarInfo.arrayStride * ndx) = ndx % workGroupSize;
1064 
1065         for (uint32_t ndx = 0; ndx < numValues; ndx++)
1066             *(uint32_t *)(&bufData[0] + exhVarInfo.offset + exhVarInfo.arrayStride * ndx) = (ndx % workGroupSize) + 1;
1067 
1068         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inoutBuffer);
1069         gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockInfo.dataSize, &bufData[0], GL_STATIC_READ);
1070         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *inoutBuffer);
1071         GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
1072     }
1073 
1074     gl.dispatchCompute(m_numWorkGroups.x(), m_numWorkGroups.y(), m_numWorkGroups.z());
1075 
1076     // Read back and compare
1077     {
1078         const void *resPtr      = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, blockInfo.dataSize, GL_MAP_READ_BIT);
1079         const int numWorkGroups = (int)product(m_numWorkGroups);
1080         const int workGroupSize = (int)product(m_workGroupSize);
1081         bool isOk               = true;
1082 
1083         GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()");
1084         TCU_CHECK(resPtr);
1085 
1086         for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
1087         {
1088             const int groupOffset = groupNdx * workGroupSize;
1089             const int groupOutput =
1090                 *(const int32_t *)((const uint8_t *)resPtr + groupVarInfo.offset + groupNdx * groupVarInfo.arrayStride);
1091 
1092             for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
1093             {
1094                 const int refValue    = localNdx;
1095                 const int outputValue = *(const int32_t *)((const uint8_t *)resPtr + outVarInfo.offset +
1096                                                            outVarInfo.arrayStride * (groupOffset + localNdx));
1097 
1098                 if (outputValue != refValue)
1099                 {
1100                     m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation "
1101                                        << localNdx << ": expected " << refValue << ", got " << outputValue
1102                                        << TestLog::EndMessage;
1103                     isOk = false;
1104                     break;
1105                 }
1106             }
1107 
1108             if (groupOutput != workGroupSize)
1109             {
1110                 m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ": expected"
1111                                    << workGroupSize << ", got " << groupOutput << TestLog::EndMessage;
1112                 isOk = false;
1113                 break;
1114             }
1115         }
1116 
1117         gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
1118         GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer()");
1119 
1120         m_testCtx.setTestResult(isOk ? QP_TEST_RESULT_PASS : QP_TEST_RESULT_FAIL, isOk ? "Pass" : "Comparison failed");
1121     }
1122 
1123     return STOP;
1124 }
1125 
ShaderAtomicOpTests(Context & context,const char * name,AtomicOperandType operandType)1126 ShaderAtomicOpTests::ShaderAtomicOpTests(Context &context, const char *name, AtomicOperandType operandType)
1127     : TestCaseGroup(context, name, "Atomic Operation Tests")
1128     , m_operandType(operandType)
1129 {
1130 }
1131 
~ShaderAtomicOpTests(void)1132 ShaderAtomicOpTests::~ShaderAtomicOpTests(void)
1133 {
1134 }
1135 
1136 template <typename T>
createAtomicOpGroup(Context & context,AtomicOperandType operandType,const char * groupName)1137 static tcu::TestCaseGroup *createAtomicOpGroup(Context &context, AtomicOperandType operandType, const char *groupName)
1138 {
1139     tcu::TestCaseGroup *const group =
1140         new tcu::TestCaseGroup(context.getTestContext(), groupName, (string("Atomic ") + groupName).c_str());
1141     try
1142     {
1143         for (int precNdx = 0; precNdx < PRECISION_LAST; precNdx++)
1144         {
1145             for (int typeNdx = 0; typeNdx < 2; typeNdx++)
1146             {
1147                 const Precision precision = Precision(precNdx);
1148                 const DataType type       = typeNdx > 0 ? TYPE_INT : TYPE_UINT;
1149                 const string caseName     = string(getPrecisionName(precision)) + "_" + getDataTypeName(type);
1150 
1151                 group->addChild(new T(context, caseName.c_str(), operandType, type, precision));
1152             }
1153         }
1154 
1155         return group;
1156     }
1157     catch (...)
1158     {
1159         delete group;
1160         throw;
1161     }
1162 }
1163 
init(void)1164 void ShaderAtomicOpTests::init(void)
1165 {
1166     addChild(createAtomicOpGroup<ShaderAtomicAddCase>(m_context, m_operandType, "add"));
1167     addChild(createAtomicOpGroup<ShaderAtomicMinCase>(m_context, m_operandType, "min"));
1168     addChild(createAtomicOpGroup<ShaderAtomicMaxCase>(m_context, m_operandType, "max"));
1169     addChild(createAtomicOpGroup<ShaderAtomicAndCase>(m_context, m_operandType, "and"));
1170     addChild(createAtomicOpGroup<ShaderAtomicOrCase>(m_context, m_operandType, "or"));
1171     addChild(createAtomicOpGroup<ShaderAtomicXorCase>(m_context, m_operandType, "xor"));
1172     addChild(createAtomicOpGroup<ShaderAtomicExchangeCase>(m_context, m_operandType, "exchange"));
1173     addChild(createAtomicOpGroup<ShaderAtomicCompSwapCase>(m_context, m_operandType, "compswap"));
1174 }
1175 
1176 } // namespace Functional
1177 } // namespace gles31
1178 } // namespace deqp
1179