xref: /aosp_15_r20/external/deqp/modules/gles31/functional/es31fBasicComputeShaderTests.cpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL ES 3.1 Module
3  * -------------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Basic Compute Shader Tests.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "es31fBasicComputeShaderTests.hpp"
25 #include "gluShaderProgram.hpp"
26 #include "gluObjectWrapper.hpp"
27 #include "gluRenderContext.hpp"
28 #include "gluProgramInterfaceQuery.hpp"
29 #include "gluContextInfo.hpp"
30 #include "glwFunctions.hpp"
31 #include "glwEnums.hpp"
32 #include "tcuTestLog.hpp"
33 #include "deRandom.hpp"
34 #include "deStringUtil.hpp"
35 #include "deMemory.h"
36 
37 namespace deqp
38 {
39 namespace gles31
40 {
41 namespace Functional
42 {
43 
44 using std::string;
45 using std::vector;
46 using tcu::TestLog;
47 using namespace glu;
48 
49 //! Utility for mapping buffers.
50 class BufferMemMap
51 {
52 public:
BufferMemMap(const glw::Functions & gl,uint32_t target,int offset,int size,uint32_t access)53     BufferMemMap(const glw::Functions &gl, uint32_t target, int offset, int size, uint32_t access)
54         : m_gl(gl)
55         , m_target(target)
56         , m_ptr(DE_NULL)
57     {
58         m_ptr = gl.mapBufferRange(target, offset, size, access);
59         GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()");
60         TCU_CHECK(m_ptr);
61     }
62 
~BufferMemMap(void)63     ~BufferMemMap(void)
64     {
65         m_gl.unmapBuffer(m_target);
66     }
67 
getPtr(void) const68     void *getPtr(void) const
69     {
70         return m_ptr;
71     }
operator *(void) const72     void *operator*(void) const
73     {
74         return m_ptr;
75     }
76 
77 private:
78     BufferMemMap(const BufferMemMap &other);
79     BufferMemMap &operator=(const BufferMemMap &other);
80 
81     const glw::Functions &m_gl;
82     const uint32_t m_target;
83     void *m_ptr;
84 };
85 
86 namespace
87 {
88 
89 class EmptyComputeShaderCase : public TestCase
90 {
91 public:
EmptyComputeShaderCase(Context & context)92     EmptyComputeShaderCase(Context &context) : TestCase(context, "empty", "Empty shader")
93     {
94     }
95 
iterate(void)96     IterateResult iterate(void)
97     {
98         const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
99         std::ostringstream src;
100 
101         src << getGLSLVersionDeclaration(glslVersion) << "\n"
102             << "layout (local_size_x = 1) in;\n"
103                "void main (void) {}\n";
104 
105         const ShaderProgram program(m_context.getRenderContext(), ProgramSources()
106                                                                       << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
107 
108         const glw::Functions &gl = m_context.getRenderContext().getFunctions();
109 
110         m_testCtx.getLog() << program;
111         if (!program.isOk())
112             TCU_FAIL("Compile failed");
113 
114         gl.useProgram(program.getProgram());
115         gl.dispatchCompute(1, 1, 1);
116         GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
117 
118         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
119         return STOP;
120     }
121 };
122 
123 class UBOToSSBOInvertCase : public TestCase
124 {
125 public:
UBOToSSBOInvertCase(Context & context,const char * name,const char * description,int numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)126     UBOToSSBOInvertCase(Context &context, const char *name, const char *description, int numValues,
127                         const tcu::IVec3 &localSize, const tcu::IVec3 &workSize)
128         : TestCase(context, name, description)
129         , m_numValues(numValues)
130         , m_localSize(localSize)
131         , m_workSize(workSize)
132     {
133         DE_ASSERT(m_numValues % (m_workSize[0] * m_workSize[1] * m_workSize[2] * m_localSize[0] * m_localSize[1] *
134                                  m_localSize[2]) ==
135                   0);
136     }
137 
iterate(void)138     IterateResult iterate(void)
139     {
140         const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
141         std::ostringstream src;
142 
143         src << getGLSLVersionDeclaration(glslVersion) << "\n"
144             << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1]
145             << ", local_size_z = " << m_localSize[2] << ") in;\n"
146             << "uniform Input {\n"
147             << "    uint values[" << m_numValues << "];\n"
148             << "} ub_in;\n"
149             << "layout(binding = 1) buffer Output {\n"
150             << "    uint values[" << m_numValues << "];\n"
151             << "} sb_out;\n"
152             << "void main (void) {\n"
153             << "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
154             << "    uint numValuesPerInv = uint(ub_in.values.length()) / (size.x*size.y*size.z);\n"
155             << "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + "
156                "gl_GlobalInvocationID.x;\n"
157             << "    uint offset          = numValuesPerInv*groupNdx;\n"
158             << "\n"
159             << "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
160             << "        sb_out.values[offset + ndx] = ~ub_in.values[offset + ndx];\n"
161             << "}\n";
162 
163         const glw::Functions &gl = m_context.getRenderContext().getFunctions();
164         const ShaderProgram program(m_context.getRenderContext(), ProgramSources()
165                                                                       << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
166         const Buffer inputBuffer(m_context.getRenderContext());
167         const Buffer outputBuffer(m_context.getRenderContext());
168         std::vector<uint32_t> inputValues(m_numValues);
169 
170         // Compute input values.
171         {
172             de::Random rnd(0x111223f);
173             for (int ndx = 0; ndx < (int)inputValues.size(); ndx++)
174                 inputValues[ndx] = rnd.getUint32();
175         }
176 
177         m_testCtx.getLog() << program;
178         if (!program.isOk())
179             TCU_FAIL("Compile failed");
180 
181         m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
182 
183         gl.useProgram(program.getProgram());
184 
185         // Input buffer setup
186         {
187             const uint32_t blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM_BLOCK, "Input");
188             const InterfaceBlockInfo blockInfo =
189                 getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_UNIFORM_BLOCK, blockIndex);
190             const uint32_t valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "Input.values");
191             const InterfaceVariableInfo valueInfo =
192                 getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_UNIFORM, valueIndex);
193 
194             gl.bindBuffer(GL_UNIFORM_BUFFER, *inputBuffer);
195             gl.bufferData(GL_UNIFORM_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW);
196 
197             {
198                 const BufferMemMap bufMap(gl, GL_UNIFORM_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT);
199 
200                 for (uint32_t ndx = 0; ndx < de::min(valueInfo.arraySize, (uint32_t)inputValues.size()); ndx++)
201                     *(uint32_t *)((uint8_t *)bufMap.getPtr() + valueInfo.offset + ndx * valueInfo.arrayStride) =
202                         inputValues[ndx];
203             }
204 
205             gl.uniformBlockBinding(program.getProgram(), blockIndex, 0);
206             gl.bindBufferBase(GL_UNIFORM_BUFFER, 0, *inputBuffer);
207             GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed");
208         }
209 
210         // Output buffer setup
211         {
212             const uint32_t blockIndex =
213                 gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
214             const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex,
215                                                         GL_BUFFER_DATA_SIZE);
216 
217             gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
218             gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
219             gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *outputBuffer);
220             GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
221         }
222 
223         // Dispatch compute workload
224         gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
225         GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
226 
227         // Read back and compare
228         {
229             const uint32_t blockIndex =
230                 gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
231             const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex,
232                                                         GL_BUFFER_DATA_SIZE);
233             const uint32_t valueIndex =
234                 gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
235             const InterfaceVariableInfo valueInfo =
236                 getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
237             const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
238 
239             TCU_CHECK(valueInfo.arraySize == (uint32_t)inputValues.size());
240             for (uint32_t ndx = 0; ndx < valueInfo.arraySize; ndx++)
241             {
242                 const uint32_t res = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + valueInfo.offset +
243                                                           valueInfo.arrayStride * ndx));
244                 const uint32_t ref = ~inputValues[ndx];
245 
246                 if (res != ref)
247                     throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]");
248             }
249         }
250 
251         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
252         return STOP;
253     }
254 
255 private:
256     const int m_numValues;
257     const tcu::IVec3 m_localSize;
258     const tcu::IVec3 m_workSize;
259 };
260 
261 class CopyInvertSSBOCase : public TestCase
262 {
263 public:
CopyInvertSSBOCase(Context & context,const char * name,const char * description,int numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)264     CopyInvertSSBOCase(Context &context, const char *name, const char *description, int numValues,
265                        const tcu::IVec3 &localSize, const tcu::IVec3 &workSize)
266         : TestCase(context, name, description)
267         , m_numValues(numValues)
268         , m_localSize(localSize)
269         , m_workSize(workSize)
270     {
271         DE_ASSERT(m_numValues % (m_workSize[0] * m_workSize[1] * m_workSize[2] * m_localSize[0] * m_localSize[1] *
272                                  m_localSize[2]) ==
273                   0);
274     }
275 
iterate(void)276     IterateResult iterate(void)
277     {
278         const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
279         std::ostringstream src;
280 
281         src << getGLSLVersionDeclaration(glslVersion) << "\n"
282             << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1]
283             << ", local_size_z = " << m_localSize[2] << ") in;\n"
284             << "layout(binding = 0) buffer Input {\n"
285             << "    uint values[" << m_numValues << "];\n"
286             << "} sb_in;\n"
287             << "layout (binding = 1) buffer Output {\n"
288             << "    uint values[" << m_numValues << "];\n"
289             << "} sb_out;\n"
290             << "void main (void) {\n"
291             << "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
292             << "    uint numValuesPerInv = uint(sb_in.values.length()) / (size.x*size.y*size.z);\n"
293             << "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + "
294                "gl_GlobalInvocationID.x;\n"
295             << "    uint offset          = numValuesPerInv*groupNdx;\n"
296             << "\n"
297             << "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
298             << "        sb_out.values[offset + ndx] = ~sb_in.values[offset + ndx];\n"
299             << "}\n";
300 
301         const glw::Functions &gl = m_context.getRenderContext().getFunctions();
302         const ShaderProgram program(m_context.getRenderContext(), ProgramSources()
303                                                                       << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
304         const Buffer inputBuffer(m_context.getRenderContext());
305         const Buffer outputBuffer(m_context.getRenderContext());
306         std::vector<uint32_t> inputValues(m_numValues);
307 
308         // Compute input values.
309         {
310             de::Random rnd(0x124fef);
311             for (int ndx = 0; ndx < (int)inputValues.size(); ndx++)
312                 inputValues[ndx] = rnd.getUint32();
313         }
314 
315         m_testCtx.getLog() << program;
316         if (!program.isOk())
317             TCU_FAIL("Compile failed");
318 
319         m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
320 
321         gl.useProgram(program.getProgram());
322 
323         // Input buffer setup
324         {
325             const uint32_t blockIndex =
326                 gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input");
327             const InterfaceBlockInfo blockInfo =
328                 getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex);
329             const uint32_t valueIndex =
330                 gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values");
331             const InterfaceVariableInfo valueInfo =
332                 getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
333 
334             gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer);
335             gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW);
336 
337             TCU_CHECK(valueInfo.arraySize == (uint32_t)inputValues.size());
338 
339             {
340                 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT);
341 
342                 for (uint32_t ndx = 0; ndx < (uint32_t)inputValues.size(); ndx++)
343                     *(uint32_t *)((uint8_t *)bufMap.getPtr() + valueInfo.offset + ndx * valueInfo.arrayStride) =
344                         inputValues[ndx];
345             }
346 
347             gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer);
348             GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed");
349         }
350 
351         // Output buffer setup
352         {
353             const uint32_t blockIndex =
354                 gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
355             const InterfaceBlockInfo blockInfo =
356                 getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex);
357 
358             gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
359             gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockInfo.dataSize, DE_NULL, GL_STREAM_READ);
360             gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *outputBuffer);
361             GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
362         }
363 
364         // Dispatch compute workload
365         gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
366         GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
367 
368         // Read back and compare
369         {
370             const uint32_t blockIndex =
371                 gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
372             const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex,
373                                                         GL_BUFFER_DATA_SIZE);
374             const uint32_t valueIndex =
375                 gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
376             const InterfaceVariableInfo valueInfo =
377                 getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
378             const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
379 
380             TCU_CHECK(valueInfo.arraySize == (uint32_t)inputValues.size());
381             for (uint32_t ndx = 0; ndx < valueInfo.arraySize; ndx++)
382             {
383                 const uint32_t res = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + valueInfo.offset +
384                                                           valueInfo.arrayStride * ndx));
385                 const uint32_t ref = ~inputValues[ndx];
386 
387                 if (res != ref)
388                     throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]");
389             }
390         }
391 
392         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
393         return STOP;
394     }
395 
396 private:
397     const int m_numValues;
398     const tcu::IVec3 m_localSize;
399     const tcu::IVec3 m_workSize;
400 };
401 
402 class InvertSSBOInPlaceCase : public TestCase
403 {
404 public:
InvertSSBOInPlaceCase(Context & context,const char * name,const char * description,int numValues,bool isSized,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)405     InvertSSBOInPlaceCase(Context &context, const char *name, const char *description, int numValues, bool isSized,
406                           const tcu::IVec3 &localSize, const tcu::IVec3 &workSize)
407         : TestCase(context, name, description)
408         , m_numValues(numValues)
409         , m_isSized(isSized)
410         , m_localSize(localSize)
411         , m_workSize(workSize)
412     {
413         DE_ASSERT(m_numValues % (m_workSize[0] * m_workSize[1] * m_workSize[2] * m_localSize[0] * m_localSize[1] *
414                                  m_localSize[2]) ==
415                   0);
416     }
417 
iterate(void)418     IterateResult iterate(void)
419     {
420         const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
421         std::ostringstream src;
422 
423         src << getGLSLVersionDeclaration(glslVersion) << "\n"
424             << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1]
425             << ", local_size_z = " << m_localSize[2] << ") in;\n"
426             << "layout(binding = 0) buffer InOut {\n"
427             << "    uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n"
428             << "} sb_inout;\n"
429             << "void main (void) {\n"
430             << "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
431             << "    uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
432             << "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + "
433                "gl_GlobalInvocationID.x;\n"
434             << "    uint offset          = numValuesPerInv*groupNdx;\n"
435             << "\n"
436             << "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
437             << "        sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n"
438             << "}\n";
439 
440         const glw::Functions &gl = m_context.getRenderContext().getFunctions();
441         const ShaderProgram program(m_context.getRenderContext(), ProgramSources()
442                                                                       << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
443 
444         m_testCtx.getLog() << program;
445         if (!program.isOk())
446             TCU_FAIL("Compile failed");
447 
448         const Buffer outputBuffer(m_context.getRenderContext());
449         const uint32_t valueIndex =
450             gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "InOut.values");
451         const InterfaceVariableInfo valueInfo =
452             getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
453         const uint32_t blockSize = valueInfo.arrayStride * (uint32_t)m_numValues;
454         std::vector<uint32_t> inputValues(m_numValues);
455 
456         // Compute input values.
457         {
458             de::Random rnd(0x82ce7f);
459             for (int ndx = 0; ndx < (int)inputValues.size(); ndx++)
460                 inputValues[ndx] = rnd.getUint32();
461         }
462 
463         TCU_CHECK(valueInfo.arraySize == (uint32_t)(m_isSized ? m_numValues : 0));
464 
465         m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
466 
467         gl.useProgram(program.getProgram());
468 
469         // Output buffer setup
470         {
471             gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
472             gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_DRAW);
473 
474             {
475                 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockSize, GL_MAP_WRITE_BIT);
476 
477                 for (uint32_t ndx = 0; ndx < (uint32_t)inputValues.size(); ndx++)
478                     *(uint32_t *)((uint8_t *)bufMap.getPtr() + valueInfo.offset + ndx * valueInfo.arrayStride) =
479                         inputValues[ndx];
480             }
481 
482             gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
483             GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed");
484         }
485 
486         // Dispatch compute workload
487         gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
488         GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
489 
490         // Read back and compare
491         {
492             const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
493 
494             for (uint32_t ndx = 0; ndx < (uint32_t)inputValues.size(); ndx++)
495             {
496                 const uint32_t res = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + valueInfo.offset +
497                                                           valueInfo.arrayStride * ndx));
498                 const uint32_t ref = ~inputValues[ndx];
499 
500                 if (res != ref)
501                     throw tcu::TestError(string("Comparison failed for InOut.values[") + de::toString(ndx) + "]");
502             }
503         }
504 
505         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
506         return STOP;
507     }
508 
509 private:
510     const int m_numValues;
511     const bool m_isSized;
512     const tcu::IVec3 m_localSize;
513     const tcu::IVec3 m_workSize;
514 };
515 
516 class WriteToMultipleSSBOCase : public TestCase
517 {
518 public:
WriteToMultipleSSBOCase(Context & context,const char * name,const char * description,int numValues,bool isSized,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)519     WriteToMultipleSSBOCase(Context &context, const char *name, const char *description, int numValues, bool isSized,
520                             const tcu::IVec3 &localSize, const tcu::IVec3 &workSize)
521         : TestCase(context, name, description)
522         , m_numValues(numValues)
523         , m_isSized(isSized)
524         , m_localSize(localSize)
525         , m_workSize(workSize)
526     {
527         DE_ASSERT(m_numValues % (m_workSize[0] * m_workSize[1] * m_workSize[2] * m_localSize[0] * m_localSize[1] *
528                                  m_localSize[2]) ==
529                   0);
530     }
531 
iterate(void)532     IterateResult iterate(void)
533     {
534         const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
535         std::ostringstream src;
536 
537         src << getGLSLVersionDeclaration(glslVersion) << "\n"
538             << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1]
539             << ", local_size_z = " << m_localSize[2] << ") in;\n"
540             << "layout(binding = 0) buffer Out0 {\n"
541             << "    uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n"
542             << "} sb_out0;\n"
543             << "layout(binding = 1) buffer Out1 {\n"
544             << "    uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n"
545             << "} sb_out1;\n"
546             << "void main (void) {\n"
547             << "    uvec3 size      = gl_NumWorkGroups * gl_WorkGroupSize;\n"
548             << "    uint groupNdx   = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + "
549                "gl_GlobalInvocationID.x;\n"
550             << "\n"
551             << "    {\n"
552             << "        uint numValuesPerInv = uint(sb_out0.values.length()) / (size.x*size.y*size.z);\n"
553             << "        uint offset          = numValuesPerInv*groupNdx;\n"
554             << "\n"
555             << "        for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
556             << "            sb_out0.values[offset + ndx] = offset + ndx;\n"
557             << "    }\n"
558             << "    {\n"
559             << "        uint numValuesPerInv = uint(sb_out1.values.length()) / (size.x*size.y*size.z);\n"
560             << "        uint offset          = numValuesPerInv*groupNdx;\n"
561             << "\n"
562             << "        for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
563             << "            sb_out1.values[offset + ndx] = uint(sb_out1.values.length()) - offset - ndx;\n"
564             << "    }\n"
565             << "}\n";
566 
567         const glw::Functions &gl = m_context.getRenderContext().getFunctions();
568         const ShaderProgram program(m_context.getRenderContext(), ProgramSources()
569                                                                       << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
570 
571         m_testCtx.getLog() << program;
572         if (!program.isOk())
573             TCU_FAIL("Compile failed");
574 
575         const Buffer outputBuffer0(m_context.getRenderContext());
576         const uint32_t value0Index =
577             gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Out0.values");
578         const InterfaceVariableInfo value0Info =
579             getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, value0Index);
580         const uint32_t block0Size = value0Info.arrayStride * (uint32_t)m_numValues;
581 
582         const Buffer outputBuffer1(m_context.getRenderContext());
583         const uint32_t value1Index =
584             gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Out1.values");
585         const InterfaceVariableInfo value1Info =
586             getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, value1Index);
587         const uint32_t block1Size = value1Info.arrayStride * (uint32_t)m_numValues;
588 
589         TCU_CHECK(value0Info.arraySize == (uint32_t)(m_isSized ? m_numValues : 0));
590         TCU_CHECK(value1Info.arraySize == (uint32_t)(m_isSized ? m_numValues : 0));
591 
592         m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
593 
594         gl.useProgram(program.getProgram());
595 
596         // Output buffer setup
597         {
598             gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer0);
599             gl.bufferData(GL_SHADER_STORAGE_BUFFER, block0Size, DE_NULL, GL_STREAM_DRAW);
600 
601             gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer0);
602             GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed");
603         }
604         {
605             gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer1);
606             gl.bufferData(GL_SHADER_STORAGE_BUFFER, block1Size, DE_NULL, GL_STREAM_DRAW);
607 
608             gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *outputBuffer1);
609             GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed");
610         }
611 
612         // Dispatch compute workload
613         gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
614         GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
615 
616         // Read back and compare
617         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer0);
618         {
619             const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, block0Size, GL_MAP_READ_BIT);
620 
621             for (uint32_t ndx = 0; ndx < (uint32_t)m_numValues; ndx++)
622             {
623                 const uint32_t res = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + value0Info.offset +
624                                                           value0Info.arrayStride * ndx));
625                 const uint32_t ref = ndx;
626 
627                 if (res != ref)
628                     throw tcu::TestError(string("Comparison failed for Out0.values[") + de::toString(ndx) +
629                                          "] res=" + de::toString(res) + " ref=" + de::toString(ref));
630             }
631         }
632         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer1);
633         {
634             const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, block1Size, GL_MAP_READ_BIT);
635 
636             for (uint32_t ndx = 0; ndx < (uint32_t)m_numValues; ndx++)
637             {
638                 const uint32_t res = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + value1Info.offset +
639                                                           value1Info.arrayStride * ndx));
640                 const uint32_t ref = m_numValues - ndx;
641 
642                 if (res != ref)
643                     throw tcu::TestError(string("Comparison failed for Out1.values[") + de::toString(ndx) +
644                                          "] res=" + de::toString(res) + " ref=" + de::toString(ref));
645             }
646         }
647         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
648         return STOP;
649     }
650 
651 private:
652     const int m_numValues;
653     const bool m_isSized;
654     const tcu::IVec3 m_localSize;
655     const tcu::IVec3 m_workSize;
656 };
657 
658 class SSBOLocalBarrierCase : public TestCase
659 {
660 public:
SSBOLocalBarrierCase(Context & context,const char * name,const char * description,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)661     SSBOLocalBarrierCase(Context &context, const char *name, const char *description, const tcu::IVec3 &localSize,
662                          const tcu::IVec3 &workSize)
663         : TestCase(context, name, description)
664         , m_localSize(localSize)
665         , m_workSize(workSize)
666     {
667     }
668 
iterate(void)669     IterateResult iterate(void)
670     {
671         const glw::Functions &gl = m_context.getRenderContext().getFunctions();
672         const Buffer outputBuffer(m_context.getRenderContext());
673         const int workGroupSize  = m_localSize[0] * m_localSize[1] * m_localSize[2];
674         const int workGroupCount = m_workSize[0] * m_workSize[1] * m_workSize[2];
675         const int numValues      = workGroupSize * workGroupCount;
676 
677         const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
678         std::ostringstream src;
679 
680         src << getGLSLVersionDeclaration(glslVersion) << "\n"
681             << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1]
682             << ", local_size_z = " << m_localSize[2] << ") in;\n"
683             << "layout(binding = 0) buffer Output {\n"
684             << "    coherent uint values[" << numValues << "];\n"
685             << "} sb_out;\n\n"
686             << "shared uint offsets[" << workGroupSize << "];\n\n"
687             << "void main (void) {\n"
688             << "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
689             << "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + "
690                "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
691             << "    uint globalOffs = localSize*globalNdx;\n"
692             << "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + "
693                "gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
694             << "\n"
695             << "    sb_out.values[globalOffs + localOffs] = globalOffs;\n"
696             << "    memoryBarrierBuffer();\n"
697             << "    barrier();\n"
698             << "    sb_out.values[globalOffs + ((localOffs+1u)%localSize)] += localOffs;\n"
699             << "    memoryBarrierBuffer();\n"
700             << "    barrier();\n"
701             << "    sb_out.values[globalOffs + ((localOffs+2u)%localSize)] += localOffs;\n"
702             << "}\n";
703 
704         const ShaderProgram program(m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str()));
705 
706         m_testCtx.getLog() << program;
707         if (!program.isOk())
708             TCU_FAIL("Compile failed");
709 
710         m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
711 
712         gl.useProgram(program.getProgram());
713 
714         // Output buffer setup
715         {
716             const uint32_t blockIndex =
717                 gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
718             const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex,
719                                                         GL_BUFFER_DATA_SIZE);
720 
721             gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
722             gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
723             gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
724             GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
725         }
726 
727         // Dispatch compute workload
728         gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
729         GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
730 
731         // Read back and compare
732         {
733             const uint32_t blockIndex =
734                 gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
735             const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex,
736                                                         GL_BUFFER_DATA_SIZE);
737             const uint32_t valueIndex =
738                 gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
739             const InterfaceVariableInfo valueInfo =
740                 getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
741             const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
742 
743             for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++)
744             {
745                 for (int localOffs = 0; localOffs < workGroupSize; localOffs++)
746                 {
747                     const int globalOffs = groupNdx * workGroupSize;
748                     const uint32_t res   = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + valueInfo.offset +
749                                                               valueInfo.arrayStride * (globalOffs + localOffs)));
750                     const int offs0      = localOffs - 1 < 0 ? ((localOffs + workGroupSize - 1) % workGroupSize) :
751                                                                ((localOffs - 1) % workGroupSize);
752                     const int offs1      = localOffs - 2 < 0 ? ((localOffs + workGroupSize - 2) % workGroupSize) :
753                                                                ((localOffs - 2) % workGroupSize);
754                     const uint32_t ref   = (uint32_t)(globalOffs + offs0 + offs1);
755 
756                     if (res != ref)
757                         throw tcu::TestError(string("Comparison failed for Output.values[") +
758                                              de::toString(globalOffs + localOffs) + "]");
759                 }
760             }
761         }
762 
763         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
764         return STOP;
765     }
766 
767 private:
768     const tcu::IVec3 m_localSize;
769     const tcu::IVec3 m_workSize;
770 };
771 
772 class SSBOBarrierCase : public TestCase
773 {
774 public:
SSBOBarrierCase(Context & context,const char * name,const char * description,const tcu::IVec3 & workSize)775     SSBOBarrierCase(Context &context, const char *name, const char *description, const tcu::IVec3 &workSize)
776         : TestCase(context, name, description)
777         , m_workSize(workSize)
778     {
779     }
780 
iterate(void)781     IterateResult iterate(void)
782     {
783         const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
784         const char *const glslVersionDeclaration = getGLSLVersionDeclaration(glslVersion);
785 
786         std::ostringstream src0;
787         src0 << glslVersionDeclaration << "\n"
788              << "layout (local_size_x = 1) in;\n"
789                 "uniform uint u_baseVal;\n"
790                 "layout(binding = 1) buffer Output {\n"
791                 "    uint values[];\n"
792                 "};\n"
793                 "void main (void) {\n"
794                 "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + "
795                 "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
796                 "    values[offset] = u_baseVal+offset;\n"
797                 "}\n";
798 
799         std::ostringstream src1;
800         src1 << glslVersionDeclaration << "\n"
801              << "layout (local_size_x = 1) in;\n"
802                 "uniform uint u_baseVal;\n"
803                 "layout(binding = 1) buffer Input {\n"
804                 "    uint values[];\n"
805                 "};\n"
806                 "layout(binding = 0) buffer Output {\n"
807                 "    coherent uint sum;\n"
808                 "};\n"
809                 "void main (void) {\n"
810                 "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + "
811                 "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
812                 "    uint value  = values[offset];\n"
813                 "    atomicAdd(sum, value);\n"
814                 "}\n";
815 
816         const ShaderProgram program0(m_context.getRenderContext(), ProgramSources() << ComputeSource(src0.str()));
817         const ShaderProgram program1(m_context.getRenderContext(), ProgramSources() << ComputeSource(src1.str()));
818 
819         const glw::Functions &gl = m_context.getRenderContext().getFunctions();
820         const Buffer tempBuffer(m_context.getRenderContext());
821         const Buffer outputBuffer(m_context.getRenderContext());
822         const uint32_t baseValue = 127;
823 
824         m_testCtx.getLog() << program0 << program1;
825         if (!program0.isOk() || !program1.isOk())
826             TCU_FAIL("Compile failed");
827 
828         m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
829 
830         // Temp buffer setup
831         {
832             const uint32_t valueIndex =
833                 gl.getProgramResourceIndex(program0.getProgram(), GL_BUFFER_VARIABLE, "values[0]");
834             const InterfaceVariableInfo valueInfo =
835                 getProgramInterfaceVariableInfo(gl, program0.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
836             const uint32_t bufferSize = valueInfo.arrayStride * m_workSize[0] * m_workSize[1] * m_workSize[2];
837 
838             gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *tempBuffer);
839             gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)bufferSize, DE_NULL, GL_STATIC_DRAW);
840             gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *tempBuffer);
841             GLU_EXPECT_NO_ERROR(gl.getError(), "Temp buffer setup failed");
842         }
843 
844         // Output buffer setup
845         {
846             const uint32_t blockIndex =
847                 gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
848             const int blockSize = getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex,
849                                                         GL_BUFFER_DATA_SIZE);
850 
851             gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
852             gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
853 
854             {
855                 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_WRITE_BIT);
856                 deMemset(bufMap.getPtr(), 0, blockSize);
857             }
858 
859             gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
860             GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
861         }
862 
863         // Dispatch compute workload
864         gl.useProgram(program0.getProgram());
865         gl.uniform1ui(gl.getUniformLocation(program0.getProgram(), "u_baseVal"), baseValue);
866         gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
867         gl.memoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
868         gl.useProgram(program1.getProgram());
869         gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
870         GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to dispatch commands");
871 
872         // Read back and compare
873         {
874             const uint32_t blockIndex =
875                 gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
876             const int blockSize = getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex,
877                                                         GL_BUFFER_DATA_SIZE);
878             const uint32_t valueIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_BUFFER_VARIABLE, "sum");
879             const InterfaceVariableInfo valueInfo =
880                 getProgramInterfaceVariableInfo(gl, program1.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
881             const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
882 
883             const uint32_t res = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + valueInfo.offset));
884             uint32_t ref       = 0;
885 
886             for (int ndx = 0; ndx < m_workSize[0] * m_workSize[1] * m_workSize[2]; ndx++)
887                 ref += baseValue + (uint32_t)ndx;
888 
889             if (res != ref)
890             {
891                 m_testCtx.getLog() << TestLog::Message << "ERROR: comparison failed, expected " << ref << ", got "
892                                    << res << TestLog::EndMessage;
893                 throw tcu::TestError("Comparison failed");
894             }
895         }
896 
897         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
898         return STOP;
899     }
900 
901 private:
902     const tcu::IVec3 m_workSize;
903 };
904 
905 class BasicSharedVarCase : public TestCase
906 {
907 public:
BasicSharedVarCase(Context & context,const char * name,const char * description,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)908     BasicSharedVarCase(Context &context, const char *name, const char *description, const tcu::IVec3 &localSize,
909                        const tcu::IVec3 &workSize)
910         : TestCase(context, name, description)
911         , m_localSize(localSize)
912         , m_workSize(workSize)
913     {
914     }
915 
iterate(void)916     IterateResult iterate(void)
917     {
918         const glw::Functions &gl = m_context.getRenderContext().getFunctions();
919         const Buffer outputBuffer(m_context.getRenderContext());
920         const int workGroupSize  = m_localSize[0] * m_localSize[1] * m_localSize[2];
921         const int workGroupCount = m_workSize[0] * m_workSize[1] * m_workSize[2];
922         const int numValues      = workGroupSize * workGroupCount;
923 
924         const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
925         std::ostringstream src;
926 
927         src << getGLSLVersionDeclaration(glslVersion) << "\n"
928             << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1]
929             << ", local_size_z = " << m_localSize[2] << ") in;\n"
930             << "layout(binding = 0) buffer Output {\n"
931             << "    uint values[" << numValues << "];\n"
932             << "} sb_out;\n\n"
933             << "shared uint offsets[" << workGroupSize << "];\n\n"
934             << "void main (void) {\n"
935             << "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
936             << "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + "
937                "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
938             << "    uint globalOffs = localSize*globalNdx;\n"
939             << "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + "
940                "gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
941             << "\n"
942             << "    offsets[localSize-localOffs-1u] = globalOffs + localOffs*localOffs;\n"
943             << "    barrier();\n"
944             << "    sb_out.values[globalOffs + localOffs] = offsets[localOffs];\n"
945             << "}\n";
946 
947         const ShaderProgram program(m_context.getRenderContext(), ProgramSources()
948                                                                       << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
949 
950         m_testCtx.getLog() << program;
951         if (!program.isOk())
952             TCU_FAIL("Compile failed");
953 
954         m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
955 
956         gl.useProgram(program.getProgram());
957 
958         // Output buffer setup
959         {
960             const uint32_t blockIndex =
961                 gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
962             const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex,
963                                                         GL_BUFFER_DATA_SIZE);
964 
965             gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
966             gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
967             gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
968             GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
969         }
970 
971         // Dispatch compute workload
972         gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
973         GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
974 
975         // Read back and compare
976         {
977             const uint32_t blockIndex =
978                 gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
979             const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex,
980                                                         GL_BUFFER_DATA_SIZE);
981             const uint32_t valueIndex =
982                 gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
983             const InterfaceVariableInfo valueInfo =
984                 getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
985             const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
986 
987             for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++)
988             {
989                 for (int localOffs = 0; localOffs < workGroupSize; localOffs++)
990                 {
991                     const int globalOffs = groupNdx * workGroupSize;
992                     const uint32_t res   = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + valueInfo.offset +
993                                                               valueInfo.arrayStride * (globalOffs + localOffs)));
994                     const uint32_t ref =
995                         (uint32_t)(globalOffs + (workGroupSize - localOffs - 1) * (workGroupSize - localOffs - 1));
996 
997                     if (res != ref)
998                         throw tcu::TestError(string("Comparison failed for Output.values[") +
999                                              de::toString(globalOffs + localOffs) + "]");
1000                 }
1001             }
1002         }
1003 
1004         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1005         return STOP;
1006     }
1007 
1008 private:
1009     const tcu::IVec3 m_localSize;
1010     const tcu::IVec3 m_workSize;
1011 };
1012 
1013 class SharedVarAtomicOpCase : public TestCase
1014 {
1015 public:
SharedVarAtomicOpCase(Context & context,const char * name,const char * description,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)1016     SharedVarAtomicOpCase(Context &context, const char *name, const char *description, const tcu::IVec3 &localSize,
1017                           const tcu::IVec3 &workSize)
1018         : TestCase(context, name, description)
1019         , m_localSize(localSize)
1020         , m_workSize(workSize)
1021     {
1022     }
1023 
iterate(void)1024     IterateResult iterate(void)
1025     {
1026         const glw::Functions &gl = m_context.getRenderContext().getFunctions();
1027         const Buffer outputBuffer(m_context.getRenderContext());
1028         const int workGroupSize  = m_localSize[0] * m_localSize[1] * m_localSize[2];
1029         const int workGroupCount = m_workSize[0] * m_workSize[1] * m_workSize[2];
1030         const int numValues      = workGroupSize * workGroupCount;
1031 
1032         const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
1033         std::ostringstream src;
1034 
1035         src << getGLSLVersionDeclaration(glslVersion) << "\n"
1036             << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1]
1037             << ", local_size_z = " << m_localSize[2] << ") in;\n"
1038             << "layout(binding = 0) buffer Output {\n"
1039             << "    uint values[" << numValues << "];\n"
1040             << "} sb_out;\n\n"
1041             << "shared uint count;\n\n"
1042             << "void main (void) {\n"
1043             << "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
1044             << "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + "
1045                "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1046             << "    uint globalOffs = localSize*globalNdx;\n"
1047             << "\n"
1048             << "    count = 0u;\n"
1049             << "    barrier();\n"
1050             << "    uint oldVal = atomicAdd(count, 1u);\n"
1051             << "    sb_out.values[globalOffs+oldVal] = oldVal+1u;\n"
1052             << "}\n";
1053 
1054         const ShaderProgram program(m_context.getRenderContext(), ProgramSources()
1055                                                                       << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
1056 
1057         m_testCtx.getLog() << program;
1058         if (!program.isOk())
1059             TCU_FAIL("Compile failed");
1060 
1061         m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
1062 
1063         gl.useProgram(program.getProgram());
1064 
1065         // Output buffer setup
1066         {
1067             const uint32_t blockIndex =
1068                 gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1069             const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex,
1070                                                         GL_BUFFER_DATA_SIZE);
1071 
1072             gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
1073             gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
1074             gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
1075             GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
1076         }
1077 
1078         // Dispatch compute workload
1079         gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
1080         GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1081 
1082         // Read back and compare
1083         {
1084             const uint32_t blockIndex =
1085                 gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1086             const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex,
1087                                                         GL_BUFFER_DATA_SIZE);
1088             const uint32_t valueIndex =
1089                 gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
1090             const InterfaceVariableInfo valueInfo =
1091                 getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1092             const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
1093 
1094             for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++)
1095             {
1096                 for (int localOffs = 0; localOffs < workGroupSize; localOffs++)
1097                 {
1098                     const int globalOffs = groupNdx * workGroupSize;
1099                     const uint32_t res   = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + valueInfo.offset +
1100                                                               valueInfo.arrayStride * (globalOffs + localOffs)));
1101                     const uint32_t ref   = (uint32_t)(localOffs + 1);
1102 
1103                     if (res != ref)
1104                         throw tcu::TestError(string("Comparison failed for Output.values[") +
1105                                              de::toString(globalOffs + localOffs) + "]");
1106                 }
1107             }
1108         }
1109 
1110         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1111         return STOP;
1112     }
1113 
1114 private:
1115     const tcu::IVec3 m_localSize;
1116     const tcu::IVec3 m_workSize;
1117 };
1118 
1119 class CopyImageToSSBOCase : public TestCase
1120 {
1121 public:
CopyImageToSSBOCase(Context & context,const char * name,const char * description,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize)1122     CopyImageToSSBOCase(Context &context, const char *name, const char *description, const tcu::IVec2 &localSize,
1123                         const tcu::IVec2 &imageSize)
1124         : TestCase(context, name, description)
1125         , m_localSize(localSize)
1126         , m_imageSize(imageSize)
1127     {
1128         DE_ASSERT(m_imageSize[0] % m_localSize[0] == 0);
1129         DE_ASSERT(m_imageSize[1] % m_localSize[1] == 0);
1130     }
1131 
iterate(void)1132     IterateResult iterate(void)
1133     {
1134         const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
1135         std::ostringstream src;
1136 
1137         src << getGLSLVersionDeclaration(glslVersion) << "\n"
1138             << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ") in;\n"
1139             << "layout(r32ui, binding = 1) readonly uniform highp uimage2D u_srcImg;\n"
1140             << "layout(binding = 0) buffer Output {\n"
1141             << "    uint values[" << (m_imageSize[0] * m_imageSize[1]) << "];\n"
1142             << "} sb_out;\n\n"
1143             << "void main (void) {\n"
1144             << "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
1145             << "    uint value  = imageLoad(u_srcImg, ivec2(gl_GlobalInvocationID.xy)).x;\n"
1146             << "    sb_out.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x] = value;\n"
1147             << "}\n";
1148 
1149         const glw::Functions &gl = m_context.getRenderContext().getFunctions();
1150         const Buffer outputBuffer(m_context.getRenderContext());
1151         const Texture inputTexture(m_context.getRenderContext());
1152         const ShaderProgram program(m_context.getRenderContext(), ProgramSources()
1153                                                                       << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
1154         const tcu::IVec2 workSize = m_imageSize / m_localSize;
1155         de::Random rnd(0xab2c7);
1156         vector<uint32_t> inputValues(m_imageSize[0] * m_imageSize[1]);
1157 
1158         m_testCtx.getLog() << program;
1159         if (!program.isOk())
1160             TCU_FAIL("Compile failed");
1161 
1162         m_testCtx.getLog() << TestLog::Message << "Work groups: " << workSize << TestLog::EndMessage;
1163 
1164         gl.useProgram(program.getProgram());
1165 
1166         // Input values
1167         for (vector<uint32_t>::iterator i = inputValues.begin(); i != inputValues.end(); ++i)
1168             *i = rnd.getUint32();
1169 
1170         // Input image setup
1171         gl.bindTexture(GL_TEXTURE_2D, *inputTexture);
1172         gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]);
1173         gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_imageSize[0], m_imageSize[1], GL_RED_INTEGER, GL_UNSIGNED_INT,
1174                          &inputValues[0]);
1175         gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1176         gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1177         GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed");
1178 
1179         // Bind to unit 1
1180         gl.bindImageTexture(1, *inputTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
1181         GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed");
1182 
1183         // Output buffer setup
1184         {
1185             const uint32_t blockIndex =
1186                 gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1187             const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex,
1188                                                         GL_BUFFER_DATA_SIZE);
1189 
1190             gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
1191             gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
1192             gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
1193             GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
1194         }
1195 
1196         // Dispatch compute workload
1197         gl.dispatchCompute(workSize[0], workSize[1], 1);
1198         GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1199 
1200         // Read back and compare
1201         {
1202             const uint32_t blockIndex =
1203                 gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1204             const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex,
1205                                                         GL_BUFFER_DATA_SIZE);
1206             const uint32_t valueIndex =
1207                 gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
1208             const InterfaceVariableInfo valueInfo =
1209                 getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1210             const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
1211 
1212             TCU_CHECK(valueInfo.arraySize == (uint32_t)inputValues.size());
1213 
1214             for (uint32_t ndx = 0; ndx < valueInfo.arraySize; ndx++)
1215             {
1216                 const uint32_t res = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + valueInfo.offset +
1217                                                           valueInfo.arrayStride * ndx));
1218                 const uint32_t ref = inputValues[ndx];
1219 
1220                 if (res != ref)
1221                     throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]");
1222             }
1223         }
1224 
1225         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1226         return STOP;
1227     }
1228 
1229 private:
1230     const tcu::IVec2 m_localSize;
1231     const tcu::IVec2 m_imageSize;
1232 };
1233 
1234 class CopySSBOToImageCase : public TestCase
1235 {
1236 public:
CopySSBOToImageCase(Context & context,const char * name,const char * description,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize)1237     CopySSBOToImageCase(Context &context, const char *name, const char *description, const tcu::IVec2 &localSize,
1238                         const tcu::IVec2 &imageSize)
1239         : TestCase(context, name, description)
1240         , m_localSize(localSize)
1241         , m_imageSize(imageSize)
1242     {
1243         DE_ASSERT(m_imageSize[0] % m_localSize[0] == 0);
1244         DE_ASSERT(m_imageSize[1] % m_localSize[1] == 0);
1245     }
1246 
iterate(void)1247     IterateResult iterate(void)
1248     {
1249         const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
1250         std::ostringstream src;
1251 
1252         src << getGLSLVersionDeclaration(glslVersion) << "\n"
1253             << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ") in;\n"
1254             << "layout(r32ui, binding = 1) writeonly uniform highp uimage2D u_dstImg;\n"
1255             << "buffer Input {\n"
1256             << "    uint values[" << (m_imageSize[0] * m_imageSize[1]) << "];\n"
1257             << "} sb_in;\n\n"
1258             << "void main (void) {\n"
1259             << "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
1260             << "    uint value  = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
1261             << "    imageStore(u_dstImg, ivec2(gl_GlobalInvocationID.xy), uvec4(value, 0, 0, 0));\n"
1262             << "}\n";
1263 
1264         const glw::Functions &gl = m_context.getRenderContext().getFunctions();
1265         const Buffer inputBuffer(m_context.getRenderContext());
1266         const Texture outputTexture(m_context.getRenderContext());
1267         const ShaderProgram program(m_context.getRenderContext(), ProgramSources()
1268                                                                       << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
1269         const tcu::IVec2 workSize = m_imageSize / m_localSize;
1270         de::Random rnd(0x77238ac2);
1271         vector<uint32_t> inputValues(m_imageSize[0] * m_imageSize[1]);
1272 
1273         m_testCtx.getLog() << program;
1274         if (!program.isOk())
1275             TCU_FAIL("Compile failed");
1276 
1277         m_testCtx.getLog() << TestLog::Message << "Work groups: " << workSize << TestLog::EndMessage;
1278 
1279         gl.useProgram(program.getProgram());
1280 
1281         // Input values
1282         for (vector<uint32_t>::iterator i = inputValues.begin(); i != inputValues.end(); ++i)
1283             *i = rnd.getUint32();
1284 
1285         // Input buffer setup
1286         {
1287             const uint32_t blockIndex =
1288                 gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input");
1289             const InterfaceBlockInfo blockInfo =
1290                 getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex);
1291             const uint32_t valueIndex =
1292                 gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values");
1293             const InterfaceVariableInfo valueInfo =
1294                 getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1295 
1296             gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer);
1297             gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW);
1298 
1299             TCU_CHECK(valueInfo.arraySize == (uint32_t)inputValues.size());
1300 
1301             {
1302                 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT);
1303 
1304                 for (uint32_t ndx = 0; ndx < (uint32_t)inputValues.size(); ndx++)
1305                     *(uint32_t *)((uint8_t *)bufMap.getPtr() + valueInfo.offset + ndx * valueInfo.arrayStride) =
1306                         inputValues[ndx];
1307             }
1308 
1309             gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer);
1310             GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed");
1311         }
1312 
1313         // Output image setup
1314         gl.bindTexture(GL_TEXTURE_2D, *outputTexture);
1315         gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]);
1316         gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1317         gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1318         GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed");
1319 
1320         // Bind to unit 1
1321         gl.bindImageTexture(1, *outputTexture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
1322         GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed");
1323 
1324         // Dispatch compute workload
1325         gl.dispatchCompute(workSize[0], workSize[1], 1);
1326         GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1327 
1328         // Read back and compare
1329         {
1330             Framebuffer fbo(m_context.getRenderContext());
1331             vector<uint32_t> pixels(inputValues.size() * 4);
1332 
1333             gl.bindFramebuffer(GL_FRAMEBUFFER, *fbo);
1334             gl.framebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, *outputTexture, 0);
1335             TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
1336 
1337             // \note In ES3 we have to use GL_RGBA_INTEGER
1338             gl.readBuffer(GL_COLOR_ATTACHMENT0);
1339             gl.readPixels(0, 0, m_imageSize[0], m_imageSize[1], GL_RGBA_INTEGER, GL_UNSIGNED_INT, &pixels[0]);
1340             GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels failed");
1341 
1342             for (uint32_t ndx = 0; ndx < (uint32_t)inputValues.size(); ndx++)
1343             {
1344                 const uint32_t res = pixels[ndx * 4];
1345                 const uint32_t ref = inputValues[ndx];
1346 
1347                 if (res != ref)
1348                     throw tcu::TestError(string("Comparison failed for pixel ") + de::toString(ndx));
1349             }
1350         }
1351 
1352         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1353         return STOP;
1354     }
1355 
1356 private:
1357     const tcu::IVec2 m_localSize;
1358     const tcu::IVec2 m_imageSize;
1359 };
1360 
1361 class ImageAtomicOpCase : public TestCase
1362 {
1363 public:
ImageAtomicOpCase(Context & context,const char * name,const char * description,int localSize,const tcu::IVec2 & imageSize)1364     ImageAtomicOpCase(Context &context, const char *name, const char *description, int localSize,
1365                       const tcu::IVec2 &imageSize)
1366         : TestCase(context, name, description)
1367         , m_localSize(localSize)
1368         , m_imageSize(imageSize)
1369     {
1370     }
1371 
init(void)1372     void init(void)
1373     {
1374         auto contextType = m_context.getRenderContext().getType();
1375         if (!glu::contextSupports(contextType, glu::ApiType::es(3, 2)) &&
1376             !glu::contextSupports(contextType, glu::ApiType::core(4, 5)) &&
1377             !m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
1378             TCU_THROW(NotSupportedError, "Test requires OES_shader_image_atomic extension");
1379     }
1380 
iterate(void)1381     IterateResult iterate(void)
1382     {
1383         glu::ContextType contextType  = m_context.getRenderContext().getType();
1384         const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(contextType);
1385         const bool supportsES32orGL45 = glu::contextSupports(contextType, glu::ApiType::es(3, 2)) ||
1386                                         glu::contextSupports(contextType, glu::ApiType::core(4, 5));
1387         std::ostringstream src;
1388 
1389         src << getGLSLVersionDeclaration(glslVersion) << "\n"
1390             << (supportsES32orGL45 ? "\n" : "#extension GL_OES_shader_image_atomic : require\n")
1391             << "layout (local_size_x = " << m_localSize << ") in;\n"
1392             << "layout(r32ui, binding = 1) uniform highp uimage2D u_dstImg;\n"
1393             << "buffer Input {\n"
1394             << "    uint values[" << (m_imageSize[0] * m_imageSize[1] * m_localSize) << "];\n"
1395             << "} sb_in;\n\n"
1396             << "void main (void) {\n"
1397             << "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
1398             << "    uint value  = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
1399             << "\n"
1400             << "    if (gl_LocalInvocationIndex == 0u)\n"
1401             << "        imageStore(u_dstImg, ivec2(gl_WorkGroupID.xy), uvec4(0));\n"
1402             << "    barrier();\n"
1403             << "    imageAtomicAdd(u_dstImg, ivec2(gl_WorkGroupID.xy), value);\n"
1404             << "}\n";
1405 
1406         const glw::Functions &gl = m_context.getRenderContext().getFunctions();
1407         const Buffer inputBuffer(m_context.getRenderContext());
1408         const Texture outputTexture(m_context.getRenderContext());
1409         const ShaderProgram program(m_context.getRenderContext(), ProgramSources()
1410                                                                       << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
1411         de::Random rnd(0x77238ac2);
1412         vector<uint32_t> inputValues(m_imageSize[0] * m_imageSize[1] * m_localSize);
1413 
1414         m_testCtx.getLog() << program;
1415         if (!program.isOk())
1416             TCU_FAIL("Compile failed");
1417 
1418         m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_imageSize << TestLog::EndMessage;
1419 
1420         gl.useProgram(program.getProgram());
1421 
1422         // Input values
1423         for (vector<uint32_t>::iterator i = inputValues.begin(); i != inputValues.end(); ++i)
1424             *i = rnd.getUint32();
1425 
1426         // Input buffer setup
1427         {
1428             const uint32_t blockIndex =
1429                 gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input");
1430             const InterfaceBlockInfo blockInfo =
1431                 getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex);
1432             const uint32_t valueIndex =
1433                 gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values");
1434             const InterfaceVariableInfo valueInfo =
1435                 getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1436 
1437             gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer);
1438             gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW);
1439 
1440             TCU_CHECK(valueInfo.arraySize == (uint32_t)inputValues.size());
1441 
1442             {
1443                 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT);
1444 
1445                 for (uint32_t ndx = 0; ndx < (uint32_t)inputValues.size(); ndx++)
1446                     *(uint32_t *)((uint8_t *)bufMap.getPtr() + valueInfo.offset + ndx * valueInfo.arrayStride) =
1447                         inputValues[ndx];
1448             }
1449 
1450             gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer);
1451             GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed");
1452         }
1453 
1454         // Output image setup
1455         gl.bindTexture(GL_TEXTURE_2D, *outputTexture);
1456         gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]);
1457         gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1458         gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1459         GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed");
1460 
1461         // Bind to unit 1
1462         gl.bindImageTexture(1, *outputTexture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
1463         GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed");
1464 
1465         // Dispatch compute workload
1466         gl.dispatchCompute(m_imageSize[0], m_imageSize[1], 1);
1467         GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1468 
1469         // Read back and compare
1470         {
1471             Framebuffer fbo(m_context.getRenderContext());
1472             vector<uint32_t> pixels(m_imageSize[0] * m_imageSize[1] * 4);
1473 
1474             gl.bindFramebuffer(GL_FRAMEBUFFER, *fbo);
1475             gl.framebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, *outputTexture, 0);
1476             TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
1477 
1478             // \note In ES3 we have to use GL_RGBA_INTEGER
1479             gl.readBuffer(GL_COLOR_ATTACHMENT0);
1480             gl.readPixels(0, 0, m_imageSize[0], m_imageSize[1], GL_RGBA_INTEGER, GL_UNSIGNED_INT, &pixels[0]);
1481             GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels failed");
1482 
1483             for (int pixelNdx = 0; pixelNdx < (int)inputValues.size() / m_localSize; pixelNdx++)
1484             {
1485                 const uint32_t res = pixels[pixelNdx * 4];
1486                 uint32_t ref       = 0;
1487 
1488                 for (int offs = 0; offs < m_localSize; offs++)
1489                     ref += inputValues[pixelNdx * m_localSize + offs];
1490 
1491                 if (res != ref)
1492                     throw tcu::TestError(string("Comparison failed for pixel ") + de::toString(pixelNdx));
1493             }
1494         }
1495 
1496         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1497         return STOP;
1498     }
1499 
1500 private:
1501     const int m_localSize;
1502     const tcu::IVec2 m_imageSize;
1503 };
1504 
1505 class ImageBarrierCase : public TestCase
1506 {
1507 public:
ImageBarrierCase(Context & context,const char * name,const char * description,const tcu::IVec2 & workSize)1508     ImageBarrierCase(Context &context, const char *name, const char *description, const tcu::IVec2 &workSize)
1509         : TestCase(context, name, description)
1510         , m_workSize(workSize)
1511     {
1512     }
1513 
iterate(void)1514     IterateResult iterate(void)
1515     {
1516         const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
1517         const char *const glslVersionDeclaration = getGLSLVersionDeclaration(glslVersion);
1518 
1519         std::ostringstream src0;
1520         src0 << glslVersionDeclaration << "\n"
1521              << "layout (local_size_x = 1) in;\n"
1522                 "uniform uint u_baseVal;\n"
1523                 "layout(r32ui, binding = 2) writeonly uniform highp uimage2D u_img;\n"
1524                 "void main (void) {\n"
1525                 "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + "
1526                 "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1527                 "    imageStore(u_img, ivec2(gl_WorkGroupID.xy), uvec4(offset+u_baseVal, 0, 0, 0));\n"
1528                 "}\n";
1529 
1530         std::ostringstream src1;
1531         src1 << glslVersionDeclaration << "\n"
1532              << "layout (local_size_x = 1) in;\n"
1533                 "layout(r32ui, binding = 2) readonly uniform highp uimage2D u_img;\n"
1534                 "layout(binding = 0) buffer Output {\n"
1535                 "    coherent uint sum;\n"
1536                 "};\n"
1537                 "void main (void) {\n"
1538                 "    uint value = imageLoad(u_img, ivec2(gl_WorkGroupID.xy)).x;\n"
1539                 "    atomicAdd(sum, value);\n"
1540                 "}\n";
1541 
1542         const ShaderProgram program0(m_context.getRenderContext(), ProgramSources() << ComputeSource(src0.str()));
1543         const ShaderProgram program1(m_context.getRenderContext(), ProgramSources() << ComputeSource(src1.str()));
1544 
1545         const glw::Functions &gl = m_context.getRenderContext().getFunctions();
1546         const Texture tempTexture(m_context.getRenderContext());
1547         const Buffer outputBuffer(m_context.getRenderContext());
1548         const uint32_t baseValue = 127;
1549 
1550         m_testCtx.getLog() << program0 << program1;
1551         if (!program0.isOk() || !program1.isOk())
1552             TCU_FAIL("Compile failed");
1553 
1554         m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
1555 
1556         // Temp texture setup
1557         gl.bindTexture(GL_TEXTURE_2D, *tempTexture);
1558         gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_workSize[0], m_workSize[1]);
1559         gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1560         gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1561         GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed");
1562 
1563         // Bind to unit 2
1564         gl.bindImageTexture(2, *tempTexture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
1565         GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed");
1566 
1567         // Output buffer setup
1568         {
1569             const uint32_t blockIndex =
1570                 gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1571             const int blockSize = getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex,
1572                                                         GL_BUFFER_DATA_SIZE);
1573 
1574             gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
1575             gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
1576 
1577             {
1578                 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_WRITE_BIT);
1579                 deMemset(bufMap.getPtr(), 0, blockSize);
1580             }
1581 
1582             gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
1583             GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
1584         }
1585 
1586         // Dispatch compute workload
1587         gl.useProgram(program0.getProgram());
1588         gl.uniform1ui(gl.getUniformLocation(program0.getProgram(), "u_baseVal"), baseValue);
1589         gl.dispatchCompute(m_workSize[0], m_workSize[1], 1);
1590         gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
1591         gl.useProgram(program1.getProgram());
1592         gl.dispatchCompute(m_workSize[0], m_workSize[1], 1);
1593         GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to dispatch commands");
1594 
1595         // Read back and compare
1596         {
1597             const uint32_t blockIndex =
1598                 gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1599             const int blockSize = getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex,
1600                                                         GL_BUFFER_DATA_SIZE);
1601             const uint32_t valueIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_BUFFER_VARIABLE, "sum");
1602             const InterfaceVariableInfo valueInfo =
1603                 getProgramInterfaceVariableInfo(gl, program1.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1604             const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
1605 
1606             const uint32_t res = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + valueInfo.offset));
1607             uint32_t ref       = 0;
1608 
1609             for (int ndx = 0; ndx < m_workSize[0] * m_workSize[1]; ndx++)
1610                 ref += baseValue + (uint32_t)ndx;
1611 
1612             if (res != ref)
1613             {
1614                 m_testCtx.getLog() << TestLog::Message << "ERROR: comparison failed, expected " << ref << ", got "
1615                                    << res << TestLog::EndMessage;
1616                 throw tcu::TestError("Comparison failed");
1617             }
1618         }
1619 
1620         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1621         return STOP;
1622     }
1623 
1624 private:
1625     const tcu::IVec2 m_workSize;
1626 };
1627 
1628 class AtomicCounterCase : public TestCase
1629 {
1630 public:
AtomicCounterCase(Context & context,const char * name,const char * description,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize)1631     AtomicCounterCase(Context &context, const char *name, const char *description, const tcu::IVec3 &localSize,
1632                       const tcu::IVec3 &workSize)
1633         : TestCase(context, name, description)
1634         , m_localSize(localSize)
1635         , m_workSize(workSize)
1636     {
1637     }
1638 
iterate(void)1639     IterateResult iterate(void)
1640     {
1641         const glw::Functions &gl = m_context.getRenderContext().getFunctions();
1642         const Buffer outputBuffer(m_context.getRenderContext());
1643         const Buffer counterBuffer(m_context.getRenderContext());
1644         const int workGroupSize  = m_localSize[0] * m_localSize[1] * m_localSize[2];
1645         const int workGroupCount = m_workSize[0] * m_workSize[1] * m_workSize[2];
1646         const int numValues      = workGroupSize * workGroupCount;
1647 
1648         const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
1649         std::ostringstream src;
1650 
1651         src << getGLSLVersionDeclaration(glslVersion) << "\n"
1652             << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1]
1653             << ", local_size_z = " << m_localSize[2] << ") in;\n"
1654             << "layout(binding = 0) buffer Output {\n"
1655             << "    uint values[" << numValues << "];\n"
1656             << "} sb_out;\n\n"
1657             << "layout(binding = 0, offset = 0) uniform atomic_uint u_count;\n\n"
1658             << "void main (void) {\n"
1659             << "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
1660             << "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + "
1661                "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1662             << "    uint globalOffs = localSize*globalNdx;\n"
1663             << "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + "
1664                "gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
1665             << "\n"
1666             << "    uint oldVal = atomicCounterIncrement(u_count);\n"
1667             << "    sb_out.values[globalOffs+localOffs] = oldVal;\n"
1668             << "}\n";
1669 
1670         const ShaderProgram program(m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str()));
1671 
1672         m_testCtx.getLog() << program;
1673         if (!program.isOk())
1674             TCU_FAIL("Compile failed");
1675 
1676         m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
1677 
1678         gl.useProgram(program.getProgram());
1679 
1680         // Atomic counter buffer setup
1681         {
1682             const uint32_t uniformIndex = gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "u_count");
1683             const uint32_t bufferIndex  = getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex,
1684                                                                  GL_ATOMIC_COUNTER_BUFFER_INDEX);
1685             const uint32_t bufferSize   = getProgramResourceUint(gl, program.getProgram(), GL_ATOMIC_COUNTER_BUFFER,
1686                                                                  bufferIndex, GL_BUFFER_DATA_SIZE);
1687 
1688             gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, *counterBuffer);
1689             gl.bufferData(GL_ATOMIC_COUNTER_BUFFER, bufferSize, DE_NULL, GL_STREAM_READ);
1690 
1691             {
1692                 const BufferMemMap memMap(gl, GL_ATOMIC_COUNTER_BUFFER, 0, bufferSize, GL_MAP_WRITE_BIT);
1693                 deMemset(memMap.getPtr(), 0, (int)bufferSize);
1694             }
1695 
1696             gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, *counterBuffer);
1697             GLU_EXPECT_NO_ERROR(gl.getError(), "Atomic counter buffer setup failed");
1698         }
1699 
1700         // Output buffer setup
1701         {
1702             const uint32_t blockIndex =
1703                 gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1704             const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex,
1705                                                         GL_BUFFER_DATA_SIZE);
1706 
1707             gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
1708             gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
1709             gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
1710             GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
1711         }
1712 
1713         // Dispatch compute workload
1714         gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
1715         GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1716 
1717         // Read back and compare atomic counter
1718         {
1719             const uint32_t uniformIndex = gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "u_count");
1720             const uint32_t uniformOffset =
1721                 getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_OFFSET);
1722             const uint32_t bufferIndex = getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex,
1723                                                                 GL_ATOMIC_COUNTER_BUFFER_INDEX);
1724             const uint32_t bufferSize  = getProgramResourceUint(gl, program.getProgram(), GL_ATOMIC_COUNTER_BUFFER,
1725                                                                 bufferIndex, GL_BUFFER_DATA_SIZE);
1726             const BufferMemMap bufMap(gl, GL_ATOMIC_COUNTER_BUFFER, 0, bufferSize, GL_MAP_READ_BIT);
1727 
1728             const uint32_t resVal = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + uniformOffset));
1729 
1730             if (resVal != (uint32_t)numValues)
1731                 throw tcu::TestError("Invalid atomic counter value");
1732         }
1733 
1734         // Read back and compare SSBO
1735         {
1736             const uint32_t blockIndex =
1737                 gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1738             const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex,
1739                                                         GL_BUFFER_DATA_SIZE);
1740             const uint32_t valueIndex =
1741                 gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
1742             const InterfaceVariableInfo valueInfo =
1743                 getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1744             const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
1745             uint32_t valSum = 0;
1746             uint32_t refSum = 0;
1747 
1748             for (int valNdx = 0; valNdx < numValues; valNdx++)
1749             {
1750                 const uint32_t res = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + valueInfo.offset +
1751                                                           valueInfo.arrayStride * valNdx));
1752 
1753                 valSum += res;
1754                 refSum += (uint32_t)valNdx;
1755 
1756                 if (!de::inBounds<uint32_t>(res, 0, (uint32_t)numValues))
1757                     throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(valNdx) + "]");
1758             }
1759 
1760             if (valSum != refSum)
1761                 throw tcu::TestError("Total sum of values in Output.values doesn't match");
1762         }
1763 
1764         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1765         return STOP;
1766     }
1767 
1768 private:
1769     const tcu::IVec3 m_localSize;
1770     const tcu::IVec3 m_workSize;
1771 };
1772 
1773 } // namespace
1774 
BasicComputeShaderTests(Context & context)1775 BasicComputeShaderTests::BasicComputeShaderTests(Context &context)
1776     : TestCaseGroup(context, "basic", "Basic Compute Shader Tests")
1777 {
1778 }
1779 
~BasicComputeShaderTests(void)1780 BasicComputeShaderTests::~BasicComputeShaderTests(void)
1781 {
1782 }
1783 
init(void)1784 void BasicComputeShaderTests::init(void)
1785 {
1786     addChild(new EmptyComputeShaderCase(m_context));
1787 
1788     addChild(new UBOToSSBOInvertCase(m_context, "ubo_to_ssbo_single_invocation",
1789                                      "Copy from UBO to SSBO, inverting bits", 256, tcu::IVec3(1, 1, 1),
1790                                      tcu::IVec3(1, 1, 1)));
1791     addChild(new UBOToSSBOInvertCase(m_context, "ubo_to_ssbo_single_group", "Copy from UBO to SSBO, inverting bits",
1792                                      1024, tcu::IVec3(2, 1, 4), tcu::IVec3(1, 1, 1)));
1793     addChild(new UBOToSSBOInvertCase(m_context, "ubo_to_ssbo_multiple_invocations",
1794                                      "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(1, 1, 1),
1795                                      tcu::IVec3(2, 4, 1)));
1796     addChild(new UBOToSSBOInvertCase(m_context, "ubo_to_ssbo_multiple_groups", "Copy from UBO to SSBO, inverting bits",
1797                                      1024, tcu::IVec3(1, 4, 2), tcu::IVec3(2, 2, 4)));
1798 
1799     addChild(new CopyInvertSSBOCase(m_context, "copy_ssbo_single_invocation", "Copy between SSBOs, inverting bits", 256,
1800                                     tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1)));
1801     addChild(new CopyInvertSSBOCase(m_context, "copy_ssbo_multiple_invocations", "Copy between SSBOs, inverting bits",
1802                                     1024, tcu::IVec3(1, 1, 1), tcu::IVec3(2, 4, 1)));
1803     addChild(new CopyInvertSSBOCase(m_context, "copy_ssbo_multiple_groups", "Copy between SSBOs, inverting bits", 1024,
1804                                     tcu::IVec3(1, 4, 2), tcu::IVec3(2, 2, 4)));
1805 
1806     addChild(new InvertSSBOInPlaceCase(m_context, "ssbo_rw_single_invocation", "Read and write same SSBO", 256, true,
1807                                        tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1)));
1808     addChild(new InvertSSBOInPlaceCase(m_context, "ssbo_rw_multiple_groups", "Read and write same SSBO", 1024, true,
1809                                        tcu::IVec3(1, 4, 2), tcu::IVec3(2, 2, 4)));
1810 
1811     addChild(new InvertSSBOInPlaceCase(m_context, "ssbo_unsized_arr_single_invocation", "Read and write same SSBO", 256,
1812                                        false, tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1)));
1813     addChild(new InvertSSBOInPlaceCase(m_context, "ssbo_unsized_arr_multiple_groups", "Read and write same SSBO", 1024,
1814                                        false, tcu::IVec3(1, 4, 2), tcu::IVec3(2, 2, 4)));
1815 
1816     addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_arr_single_invocation", "Write to multiple SSBOs",
1817                                          256, true, tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1)));
1818     addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_arr_multiple_groups", "Write to multiple SSBOs",
1819                                          1024, true, tcu::IVec3(1, 4, 2), tcu::IVec3(2, 2, 4)));
1820 
1821     addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_unsized_arr_single_invocation",
1822                                          "Write to multiple SSBOs", 256, false, tcu::IVec3(1, 1, 1),
1823                                          tcu::IVec3(1, 1, 1)));
1824     addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_unsized_arr_multiple_groups",
1825                                          "Write to multiple SSBOs", 1024, false, tcu::IVec3(1, 4, 2),
1826                                          tcu::IVec3(2, 2, 4)));
1827 
1828     addChild(new SSBOLocalBarrierCase(m_context, "ssbo_local_barrier_single_invocation", "SSBO local barrier usage",
1829                                       tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1)));
1830     addChild(new SSBOLocalBarrierCase(m_context, "ssbo_local_barrier_single_group", "SSBO local barrier usage",
1831                                       tcu::IVec3(3, 2, 5), tcu::IVec3(1, 1, 1)));
1832     addChild(new SSBOLocalBarrierCase(m_context, "ssbo_local_barrier_multiple_groups", "SSBO local barrier usage",
1833                                       tcu::IVec3(3, 4, 1), tcu::IVec3(2, 7, 3)));
1834 
1835     addChild(
1836         new SSBOBarrierCase(m_context, "ssbo_cmd_barrier_single", "SSBO memory barrier usage", tcu::IVec3(1, 1, 1)));
1837     addChild(
1838         new SSBOBarrierCase(m_context, "ssbo_cmd_barrier_multiple", "SSBO memory barrier usage", tcu::IVec3(11, 5, 7)));
1839 
1840     addChild(new BasicSharedVarCase(m_context, "shared_var_single_invocation", "Basic shared variable usage",
1841                                     tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1)));
1842     addChild(new BasicSharedVarCase(m_context, "shared_var_single_group", "Basic shared variable usage",
1843                                     tcu::IVec3(3, 2, 5), tcu::IVec3(1, 1, 1)));
1844     addChild(new BasicSharedVarCase(m_context, "shared_var_multiple_invocations", "Basic shared variable usage",
1845                                     tcu::IVec3(1, 1, 1), tcu::IVec3(2, 5, 4)));
1846     addChild(new BasicSharedVarCase(m_context, "shared_var_multiple_groups", "Basic shared variable usage",
1847                                     tcu::IVec3(3, 4, 1), tcu::IVec3(2, 7, 3)));
1848 
1849     addChild(new SharedVarAtomicOpCase(m_context, "shared_atomic_op_single_invocation",
1850                                        "Atomic operation with shared var", tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1)));
1851     addChild(new SharedVarAtomicOpCase(m_context, "shared_atomic_op_single_group", "Atomic operation with shared var",
1852                                        tcu::IVec3(3, 2, 5), tcu::IVec3(1, 1, 1)));
1853     addChild(new SharedVarAtomicOpCase(m_context, "shared_atomic_op_multiple_invocations",
1854                                        "Atomic operation with shared var", tcu::IVec3(1, 1, 1), tcu::IVec3(2, 5, 4)));
1855     addChild(new SharedVarAtomicOpCase(m_context, "shared_atomic_op_multiple_groups",
1856                                        "Atomic operation with shared var", tcu::IVec3(3, 4, 1), tcu::IVec3(2, 7, 3)));
1857 
1858     addChild(new CopyImageToSSBOCase(m_context, "copy_image_to_ssbo_small", "Image to SSBO copy", tcu::IVec2(1, 1),
1859                                      tcu::IVec2(64, 64)));
1860     addChild(new CopyImageToSSBOCase(m_context, "copy_image_to_ssbo_large", "Image to SSBO copy", tcu::IVec2(2, 4),
1861                                      tcu::IVec2(512, 512)));
1862 
1863     addChild(new CopySSBOToImageCase(m_context, "copy_ssbo_to_image_small", "SSBO to image copy", tcu::IVec2(1, 1),
1864                                      tcu::IVec2(64, 64)));
1865     addChild(new CopySSBOToImageCase(m_context, "copy_ssbo_to_image_large", "SSBO to image copy", tcu::IVec2(2, 4),
1866                                      tcu::IVec2(512, 512)));
1867 
1868     addChild(new ImageAtomicOpCase(m_context, "image_atomic_op_local_size_1", "Atomic operation with image", 1,
1869                                    tcu::IVec2(64, 64)));
1870     addChild(new ImageAtomicOpCase(m_context, "image_atomic_op_local_size_8", "Atomic operation with image", 8,
1871                                    tcu::IVec2(64, 64)));
1872 
1873     addChild(new ImageBarrierCase(m_context, "image_barrier_single", "Image barrier", tcu::IVec2(1, 1)));
1874     addChild(new ImageBarrierCase(m_context, "image_barrier_multiple", "Image barrier", tcu::IVec2(64, 64)));
1875 
1876     addChild(new AtomicCounterCase(m_context, "atomic_counter_single_invocation", "Basic atomic counter test",
1877                                    tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1)));
1878     addChild(new AtomicCounterCase(m_context, "atomic_counter_single_group", "Basic atomic counter test",
1879                                    tcu::IVec3(3, 2, 5), tcu::IVec3(1, 1, 1)));
1880     addChild(new AtomicCounterCase(m_context, "atomic_counter_multiple_invocations", "Basic atomic counter test",
1881                                    tcu::IVec3(1, 1, 1), tcu::IVec3(2, 5, 4)));
1882     addChild(new AtomicCounterCase(m_context, "atomic_counter_multiple_groups", "Basic atomic counter test",
1883                                    tcu::IVec3(3, 4, 1), tcu::IVec3(2, 7, 3)));
1884 }
1885 
1886 } // namespace Functional
1887 } // namespace gles31
1888 } // namespace deqp
1889