1 /*-------------------------------------------------------------------------
2 * drawElements Quality Program OpenGL ES 3.1 Module
3 * -------------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Indirect compute dispatch tests.
22 *//*--------------------------------------------------------------------*/
23
24 #include "es31fIndirectComputeDispatchTests.hpp"
25 #include "gluObjectWrapper.hpp"
26 #include "gluRenderContext.hpp"
27 #include "gluShaderProgram.hpp"
28 #include "glwFunctions.hpp"
29 #include "glwEnums.hpp"
30 #include "tcuVector.hpp"
31 #include "tcuStringTemplate.hpp"
32 #include "tcuTestLog.hpp"
33 #include "deStringUtil.hpp"
34
35 #include <vector>
36 #include <string>
37 #include <map>
38
39 namespace deqp
40 {
41 namespace gles31
42 {
43 namespace Functional
44 {
45
46 using std::map;
47 using std::string;
48 using std::vector;
49 using tcu::TestLog;
50 using tcu::UVec3;
51
52 // \todo [2014-02-17 pyry] Should be extended with following:
53
54 // Negative:
55 // - no active shader program
56 // - indirect negative or not aligned
57 // - indirect + size outside buffer bounds
58 // - no buffer bound to DRAW_INDIRECT_BUFFER
59 // - (implict) buffer mapped
60
61 // Robustness:
62 // - lot of small work group launches
63 // - very large work group size
64 // - no synchronization, touched by gpu
65 // - compute program overwiting buffer
66
67 namespace
68 {
69
70 enum
71 {
72 RESULT_BLOCK_BASE_SIZE = (3 + 1) * (int)sizeof(uint32_t), // uvec3 + uint
73 RESULT_BLOCK_EXPECTED_COUNT_OFFSET = 0,
74 RESULT_BLOCK_NUM_PASSED_OFFSET = 3 * (int)sizeof(uint32_t),
75
76 INDIRECT_COMMAND_SIZE = 3 * (int)sizeof(uint32_t)
77 };
78
79 enum GenBuffer
80 {
81 GEN_BUFFER_UPLOAD = 0,
82 GEN_BUFFER_COMPUTE,
83
84 GEN_BUFFER_LAST
85 };
86
genVerifySources(const UVec3 & workGroupSize)87 glu::ProgramSources genVerifySources(const UVec3 &workGroupSize)
88 {
89 static const char *s_verifyDispatchTmpl =
90 "#version 310 es\n"
91 "layout(local_size_x = ${LOCAL_SIZE_X}, local_size_y = ${LOCAL_SIZE_Y}, local_size_z = ${LOCAL_SIZE_Z}) in;\n"
92 "layout(binding = 0, std430) buffer Result\n"
93 "{\n"
94 " uvec3 expectedGroupCount;\n"
95 " coherent uint numPassed;\n"
96 "} result;\n"
97 "void main (void)\n"
98 "{\n"
99 " if (all(equal(result.expectedGroupCount, gl_NumWorkGroups)))\n"
100 " atomicAdd(result.numPassed, 1u);\n"
101 "}\n";
102
103 map<string, string> args;
104
105 args["LOCAL_SIZE_X"] = de::toString(workGroupSize.x());
106 args["LOCAL_SIZE_Y"] = de::toString(workGroupSize.y());
107 args["LOCAL_SIZE_Z"] = de::toString(workGroupSize.z());
108
109 return glu::ProgramSources() << glu::ComputeSource(tcu::StringTemplate(s_verifyDispatchTmpl).specialize(args));
110 }
111
112 class IndirectDispatchCase : public TestCase
113 {
114 public:
115 IndirectDispatchCase(Context &context, const char *name, const char *description, GenBuffer genBuffer);
116 ~IndirectDispatchCase(void);
117
118 IterateResult iterate(void);
119
120 protected:
121 struct DispatchCommand
122 {
123 intptr_t offset;
124 UVec3 numWorkGroups;
125
DispatchCommanddeqp::gles31::Functional::__anonacebb5ad0111::IndirectDispatchCase::DispatchCommand126 DispatchCommand(void) : offset(0)
127 {
128 }
DispatchCommanddeqp::gles31::Functional::__anonacebb5ad0111::IndirectDispatchCase::DispatchCommand129 DispatchCommand(intptr_t offset_, const UVec3 &numWorkGroups_) : offset(offset_), numWorkGroups(numWorkGroups_)
130 {
131 }
132 };
133
134 GenBuffer m_genBuffer;
135 uintptr_t m_bufferSize;
136 UVec3 m_workGroupSize;
137 vector<DispatchCommand> m_commands;
138
139 void createCommandBuffer(uint32_t buffer) const;
140 void createResultBuffer(uint32_t buffer) const;
141
142 bool verifyResultBuffer(uint32_t buffer);
143
144 void createCmdBufferUpload(uint32_t buffer) const;
145 void createCmdBufferCompute(uint32_t buffer) const;
146
147 private:
148 IndirectDispatchCase(const IndirectDispatchCase &);
149 IndirectDispatchCase &operator=(const IndirectDispatchCase &);
150 };
151
IndirectDispatchCase(Context & context,const char * name,const char * description,GenBuffer genBuffer)152 IndirectDispatchCase::IndirectDispatchCase(Context &context, const char *name, const char *description,
153 GenBuffer genBuffer)
154 : TestCase(context, name, description)
155 , m_genBuffer(genBuffer)
156 , m_bufferSize(0)
157 {
158 }
159
~IndirectDispatchCase(void)160 IndirectDispatchCase::~IndirectDispatchCase(void)
161 {
162 }
163
getResultBlockAlignedSize(const glw::Functions & gl)164 static int getResultBlockAlignedSize(const glw::Functions &gl)
165 {
166 const int baseSize = RESULT_BLOCK_BASE_SIZE;
167 int alignment = 0;
168 gl.getIntegerv(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT, &alignment);
169
170 if (alignment == 0 || (baseSize % alignment == 0))
171 return baseSize;
172 else
173 return (baseSize / alignment + 1) * alignment;
174 }
175
createCommandBuffer(uint32_t buffer) const176 void IndirectDispatchCase::createCommandBuffer(uint32_t buffer) const
177 {
178 switch (m_genBuffer)
179 {
180 case GEN_BUFFER_UPLOAD:
181 createCmdBufferUpload(buffer);
182 break;
183 case GEN_BUFFER_COMPUTE:
184 createCmdBufferCompute(buffer);
185 break;
186 default:
187 DE_ASSERT(false);
188 }
189 }
190
createCmdBufferUpload(uint32_t buffer) const191 void IndirectDispatchCase::createCmdBufferUpload(uint32_t buffer) const
192 {
193 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
194 vector<uint8_t> data(m_bufferSize);
195
196 for (vector<DispatchCommand>::const_iterator cmdIter = m_commands.begin(); cmdIter != m_commands.end(); ++cmdIter)
197 {
198 DE_STATIC_ASSERT(INDIRECT_COMMAND_SIZE >= sizeof(uint32_t) * 3);
199 DE_ASSERT(cmdIter->offset >= 0);
200 DE_ASSERT(cmdIter->offset % sizeof(uint32_t) == 0);
201 DE_ASSERT(cmdIter->offset + INDIRECT_COMMAND_SIZE <= (intptr_t)m_bufferSize);
202
203 uint32_t *const dstPtr = (uint32_t *)&data[cmdIter->offset];
204
205 dstPtr[0] = cmdIter->numWorkGroups[0];
206 dstPtr[1] = cmdIter->numWorkGroups[1];
207 dstPtr[2] = cmdIter->numWorkGroups[2];
208 }
209
210 gl.bindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer);
211 gl.bufferData(GL_DISPATCH_INDIRECT_BUFFER, (glw::GLsizeiptr)data.size(), &data[0], GL_STATIC_DRAW);
212 }
213
createCmdBufferCompute(uint32_t buffer) const214 void IndirectDispatchCase::createCmdBufferCompute(uint32_t buffer) const
215 {
216 std::ostringstream src;
217
218 // Header
219 src << "#version 310 es\n"
220 "layout(local_size_x = 1) in;\n"
221 "layout(std430, binding = 1) buffer Out\n"
222 "{\n"
223 " highp uint data[];\n"
224 "};\n"
225 "void writeCmd (uint offset, uvec3 numWorkGroups)\n"
226 "{\n"
227 " data[offset+0u] = numWorkGroups.x;\n"
228 " data[offset+1u] = numWorkGroups.y;\n"
229 " data[offset+2u] = numWorkGroups.z;\n"
230 "}\n"
231 "void main (void)\n"
232 "{\n";
233
234 // Commands
235 for (vector<DispatchCommand>::const_iterator cmdIter = m_commands.begin(); cmdIter != m_commands.end(); ++cmdIter)
236 {
237 const uint32_t offs = (uint32_t)(cmdIter->offset / 4);
238 DE_ASSERT((intptr_t)offs * 4 == cmdIter->offset);
239
240 src << "\twriteCmd(" << offs << "u, uvec3(" << cmdIter->numWorkGroups.x() << "u, " << cmdIter->numWorkGroups.y()
241 << "u, " << cmdIter->numWorkGroups.z() << "u));\n";
242 }
243
244 src << "}\n";
245
246 {
247 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
248 glu::ShaderProgram program(m_context.getRenderContext(), glu::ProgramSources()
249 << glu::ComputeSource(src.str()));
250
251 m_testCtx.getLog() << program;
252 if (!program.isOk())
253 TCU_FAIL("Compile failed");
254
255 gl.useProgram(program.getProgram());
256
257 gl.bindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer);
258 gl.bufferData(GL_DISPATCH_INDIRECT_BUFFER, (glw::GLsizeiptr)m_bufferSize, DE_NULL, GL_STATIC_DRAW);
259 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, buffer);
260 GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed");
261
262 gl.dispatchCompute(1, 1, 1);
263 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute() failed");
264
265 gl.memoryBarrier(GL_COMMAND_BARRIER_BIT);
266 GLU_EXPECT_NO_ERROR(gl.getError(), "glMemoryBarrier(GL_COMMAND_BARRIER_BIT) failed");
267 }
268 }
269
createResultBuffer(uint32_t buffer) const270 void IndirectDispatchCase::createResultBuffer(uint32_t buffer) const
271 {
272 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
273 const int resultBlockSize = getResultBlockAlignedSize(gl);
274 const int resultBufferSize = resultBlockSize * (int)m_commands.size();
275 vector<uint8_t> data(resultBufferSize);
276
277 for (size_t cmdNdx = 0; cmdNdx < m_commands.size(); cmdNdx++)
278 {
279 uint8_t *const dstPtr = &data[resultBlockSize * cmdNdx];
280
281 *(uint32_t *)(dstPtr + RESULT_BLOCK_EXPECTED_COUNT_OFFSET + 0 * 4) = m_commands[cmdNdx].numWorkGroups[0];
282 *(uint32_t *)(dstPtr + RESULT_BLOCK_EXPECTED_COUNT_OFFSET + 1 * 4) = m_commands[cmdNdx].numWorkGroups[1];
283 *(uint32_t *)(dstPtr + RESULT_BLOCK_EXPECTED_COUNT_OFFSET + 2 * 4) = m_commands[cmdNdx].numWorkGroups[2];
284 *(uint32_t *)(dstPtr + RESULT_BLOCK_NUM_PASSED_OFFSET) = 0;
285 }
286
287 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
288 gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizei)data.size(), &data[0], GL_STATIC_READ);
289 }
290
computeInvocationCount(const UVec3 & workGroupSize,const UVec3 & numWorkGroups)291 uint32_t computeInvocationCount(const UVec3 &workGroupSize, const UVec3 &numWorkGroups)
292 {
293 const int numInvocationsPerGroup = workGroupSize[0] * workGroupSize[1] * workGroupSize[2];
294 const int numGroups = numWorkGroups[0] * numWorkGroups[1] * numWorkGroups[2];
295
296 return numInvocationsPerGroup * numGroups;
297 }
298
verifyResultBuffer(uint32_t buffer)299 bool IndirectDispatchCase::verifyResultBuffer(uint32_t buffer)
300 {
301 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
302
303 const int resultBlockSize = getResultBlockAlignedSize(gl);
304 const int resultBufferSize = resultBlockSize * (int)m_commands.size();
305
306 void *mapPtr = DE_NULL;
307 bool allOk = true;
308
309 try
310 {
311 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
312 mapPtr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, resultBufferSize, GL_MAP_READ_BIT);
313
314 GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange() failed");
315 TCU_CHECK(mapPtr);
316
317 for (size_t cmdNdx = 0; cmdNdx < m_commands.size(); cmdNdx++)
318 {
319 const DispatchCommand &cmd = m_commands[cmdNdx];
320 const uint8_t *const srcPtr = (const uint8_t *)mapPtr + cmdNdx * resultBlockSize;
321 const uint32_t numPassed = *(const uint32_t *)(srcPtr + RESULT_BLOCK_NUM_PASSED_OFFSET);
322 const uint32_t expectedCount = computeInvocationCount(m_workGroupSize, cmd.numWorkGroups);
323
324 // Verify numPassed.
325 if (numPassed != expectedCount)
326 {
327 m_testCtx.getLog() << TestLog::Message << "ERROR: got invalid result for invocation " << cmdNdx
328 << ": got numPassed = " << numPassed << ", expected " << expectedCount
329 << TestLog::EndMessage;
330 allOk = false;
331 }
332 }
333 }
334 catch (...)
335 {
336 if (mapPtr)
337 gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
338 }
339
340 gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
341 GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer() failed");
342
343 return allOk;
344 }
345
iterate(void)346 IndirectDispatchCase::IterateResult IndirectDispatchCase::iterate(void)
347 {
348 const glu::RenderContext &renderCtx = m_context.getRenderContext();
349 const glw::Functions &gl = renderCtx.getFunctions();
350
351 const glu::ShaderProgram program(renderCtx, genVerifySources(m_workGroupSize));
352
353 glu::Buffer cmdBuffer(renderCtx);
354 glu::Buffer resultBuffer(renderCtx);
355
356 m_testCtx.getLog() << program;
357 TCU_CHECK_MSG(program.isOk(), "Compile failed");
358
359 m_testCtx.getLog() << TestLog::Message << "GL_DISPATCH_INDIRECT_BUFFER size = " << m_bufferSize
360 << TestLog::EndMessage;
361 {
362 tcu::ScopedLogSection section(m_testCtx.getLog(), "Commands",
363 "Indirect Dispatch Commands (" + de::toString(m_commands.size()) + " in total)");
364
365 for (size_t cmdNdx = 0; cmdNdx < m_commands.size(); cmdNdx++)
366 m_testCtx.getLog() << TestLog::Message << cmdNdx << ": "
367 << "offset = " << m_commands[cmdNdx].offset
368 << ", numWorkGroups = " << m_commands[cmdNdx].numWorkGroups << TestLog::EndMessage;
369 }
370
371 createResultBuffer(*resultBuffer);
372 createCommandBuffer(*cmdBuffer);
373
374 gl.useProgram(program.getProgram());
375 gl.bindBuffer(GL_DISPATCH_INDIRECT_BUFFER, *cmdBuffer);
376 GLU_EXPECT_NO_ERROR(gl.getError(), "State setup failed");
377
378 {
379 const int resultBlockAlignedSize = getResultBlockAlignedSize(gl);
380 intptr_t curOffset = 0;
381
382 for (vector<DispatchCommand>::const_iterator cmdIter = m_commands.begin(); cmdIter != m_commands.end();
383 ++cmdIter)
384 {
385 gl.bindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, *resultBuffer, (glw::GLintptr)curOffset,
386 resultBlockAlignedSize);
387 gl.dispatchComputeIndirect((glw::GLintptr)cmdIter->offset);
388
389 curOffset += resultBlockAlignedSize;
390 }
391 }
392
393 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchComputeIndirect() failed");
394
395 if (verifyResultBuffer(*resultBuffer))
396 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
397 else
398 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Invalid values in result buffer");
399
400 return STOP;
401 }
402
403 class SingleDispatchCase : public IndirectDispatchCase
404 {
405 public:
SingleDispatchCase(Context & context,const char * name,const char * description,GenBuffer genBuffer,uintptr_t bufferSize,uintptr_t offset,const UVec3 & workGroupSize,const UVec3 & numWorkGroups)406 SingleDispatchCase(Context &context, const char *name, const char *description, GenBuffer genBuffer,
407 uintptr_t bufferSize, uintptr_t offset, const UVec3 &workGroupSize, const UVec3 &numWorkGroups)
408 : IndirectDispatchCase(context, name, description, genBuffer)
409 {
410 m_bufferSize = bufferSize;
411 m_workGroupSize = workGroupSize;
412 m_commands.push_back(DispatchCommand(offset, numWorkGroups));
413 }
414 };
415
416 class MultiDispatchCase : public IndirectDispatchCase
417 {
418 public:
MultiDispatchCase(Context & context,GenBuffer genBuffer)419 MultiDispatchCase(Context &context, GenBuffer genBuffer)
420 : IndirectDispatchCase(context, "multi_dispatch", "Dispatch multiple compute commands from single buffer",
421 genBuffer)
422 {
423 m_bufferSize = 1 << 10;
424 m_workGroupSize = UVec3(3, 1, 2);
425
426 m_commands.push_back(DispatchCommand(0, UVec3(1, 1, 1)));
427 m_commands.push_back(DispatchCommand(INDIRECT_COMMAND_SIZE, UVec3(2, 1, 1)));
428 m_commands.push_back(DispatchCommand(104, UVec3(1, 3, 1)));
429 m_commands.push_back(DispatchCommand(40, UVec3(1, 1, 7)));
430 m_commands.push_back(DispatchCommand(52, UVec3(1, 1, 4)));
431 }
432 };
433
434 class MultiDispatchReuseCommandCase : public IndirectDispatchCase
435 {
436 public:
MultiDispatchReuseCommandCase(Context & context,GenBuffer genBuffer)437 MultiDispatchReuseCommandCase(Context &context, GenBuffer genBuffer)
438 : IndirectDispatchCase(context, "multi_dispatch_reuse_command",
439 "Dispatch multiple compute commands from single buffer", genBuffer)
440 {
441 m_bufferSize = 1 << 10;
442 m_workGroupSize = UVec3(3, 1, 2);
443
444 m_commands.push_back(DispatchCommand(0, UVec3(1, 1, 1)));
445 m_commands.push_back(DispatchCommand(0, UVec3(1, 1, 1)));
446 m_commands.push_back(DispatchCommand(0, UVec3(1, 1, 1)));
447 m_commands.push_back(DispatchCommand(104, UVec3(1, 3, 1)));
448 m_commands.push_back(DispatchCommand(104, UVec3(1, 3, 1)));
449 m_commands.push_back(DispatchCommand(52, UVec3(1, 1, 4)));
450 m_commands.push_back(DispatchCommand(52, UVec3(1, 1, 4)));
451 }
452 };
453
454 } // namespace
455
IndirectComputeDispatchTests(Context & context)456 IndirectComputeDispatchTests::IndirectComputeDispatchTests(Context &context)
457 : TestCaseGroup(context, "indirect_dispatch", "Indirect dispatch tests")
458 {
459 }
460
~IndirectComputeDispatchTests(void)461 IndirectComputeDispatchTests::~IndirectComputeDispatchTests(void)
462 {
463 }
464
init(void)465 void IndirectComputeDispatchTests::init(void)
466 {
467 static const struct
468 {
469 const char *name;
470 GenBuffer gen;
471 } s_genBuffer[] = {{"upload_buffer", GEN_BUFFER_UPLOAD}, {"gen_in_compute", GEN_BUFFER_COMPUTE}};
472
473 static const struct
474 {
475 const char *name;
476 const char *description;
477 uintptr_t bufferSize;
478 uintptr_t offset;
479 UVec3 workGroupSize;
480 UVec3 numWorkGroups;
481 } s_singleDispatchCases[] = {
482 // Name Desc BufferSize Offs WorkGroupSize NumWorkGroups
483 {"single_invocation", "Single invocation only from offset 0", INDIRECT_COMMAND_SIZE, 0, UVec3(1, 1, 1),
484 UVec3(1, 1, 1)},
485 {"multiple_groups", "Multiple groups dispatched from offset 0", INDIRECT_COMMAND_SIZE, 0, UVec3(1, 1, 1),
486 UVec3(2, 3, 5)},
487 {"multiple_groups_multiple_invocations", "Multiple groups of size 2x3x1 from offset 0", INDIRECT_COMMAND_SIZE,
488 0, UVec3(2, 3, 1), UVec3(1, 2, 3)},
489 {"small_offset", "Small offset", 16 + INDIRECT_COMMAND_SIZE, 16, UVec3(1, 1, 1), UVec3(1, 1, 1)},
490 {"large_offset", "Large offset", (2 << 20), (1 << 20) + 12, UVec3(1, 1, 1), UVec3(1, 1, 1)},
491 {"large_offset_multiple_invocations", "Large offset, multiple invocations", (2 << 20), (1 << 20) + 12,
492 UVec3(2, 3, 1), UVec3(1, 2, 3)},
493 {"empty_command", "Empty command", INDIRECT_COMMAND_SIZE, 0, UVec3(1, 1, 1), UVec3(0, 0, 0)},
494 };
495
496 for (int genNdx = 0; genNdx < DE_LENGTH_OF_ARRAY(s_genBuffer); genNdx++)
497 {
498 const GenBuffer genBuf = s_genBuffer[genNdx].gen;
499 tcu::TestCaseGroup *const genGroup = new tcu::TestCaseGroup(m_testCtx, s_genBuffer[genNdx].name, "");
500 addChild(genGroup);
501
502 for (int ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_singleDispatchCases); ndx++)
503 genGroup->addChild(new SingleDispatchCase(
504 m_context, s_singleDispatchCases[ndx].name, s_singleDispatchCases[ndx].description, genBuf,
505 s_singleDispatchCases[ndx].bufferSize, s_singleDispatchCases[ndx].offset,
506 s_singleDispatchCases[ndx].workGroupSize, s_singleDispatchCases[ndx].numWorkGroups));
507
508 genGroup->addChild(new MultiDispatchCase(m_context, genBuf));
509 genGroup->addChild(new MultiDispatchReuseCommandCase(m_context, genBuf));
510 }
511 }
512
513 } // namespace Functional
514 } // namespace gles31
515 } // namespace deqp
516