1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  * Copyright (c) 2016 The Android Open Source Project
7  * Copyright (c) 2023 LunarG, Inc.
8  * Copyright (c) 2023 Nintendo
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  *      http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  *
22  *//*!
23  * \file
24  * \brief Compute Shader Built-in variable tests.
25  *//*--------------------------------------------------------------------*/
26 
27 #include "vktComputeShaderBuiltinVarTests.hpp"
28 #include "vktTestCaseUtil.hpp"
29 #include "vktComputeTestsUtil.hpp"
30 
31 #include "vkDefs.hpp"
32 #include "vkPlatform.hpp"
33 #include "vkRef.hpp"
34 #include "vkPrograms.hpp"
35 #include "vkStrUtil.hpp"
36 #include "vkRefUtil.hpp"
37 #include "vkQueryUtil.hpp"
38 #include "vkBarrierUtil.hpp"
39 #include "vkMemUtil.hpp"
40 #include "vkDeviceUtil.hpp"
41 #include "vkTypeUtil.hpp"
42 #include "vkBuilderUtil.hpp"
43 #include "vkCmdUtil.hpp"
44 #include "vkObjUtil.hpp"
45 #include "vkBufferWithMemory.hpp"
46 
47 #include "tcuTestLog.hpp"
48 #include "tcuFormatUtil.hpp"
49 #include "tcuVectorUtil.hpp"
50 #include "tcuCommandLine.hpp"
51 
52 #include "gluShaderUtil.hpp"
53 
54 #include "deUniquePtr.hpp"
55 #include "deSharedPtr.hpp"
56 
57 #include <map>
58 #include <string>
59 #include <vector>
60 
61 namespace vkt
62 {
63 namespace compute
64 {
65 namespace
66 {
67 
68 using namespace vk;
69 using std::map;
70 using std::string;
71 using std::vector;
72 using tcu::IVec3;
73 using tcu::TestLog;
74 using tcu::UVec3;
75 
76 class ComputeBuiltinVarInstance;
77 class ComputeBuiltinVarCase;
78 
79 static const string s_prefixProgramName = "compute_";
80 
compareNumComponents(const UVec3 & a,const UVec3 & b,const int numComps)81 static inline bool compareNumComponents(const UVec3 &a, const UVec3 &b, const int numComps)
82 {
83     DE_ASSERT(numComps == 1 || numComps == 3);
84     return numComps == 3 ? tcu::allEqual(a, b) : a.x() == b.x();
85 }
86 
readResultVec(const uint32_t * ptr,const int numComps)87 static inline UVec3 readResultVec(const uint32_t *ptr, const int numComps)
88 {
89     UVec3 res;
90     for (int ndx = 0; ndx < numComps; ndx++)
91         res[ndx] = ptr[ndx];
92     return res;
93 }
94 
95 struct LogComps
96 {
97     const UVec3 &v;
98     int numComps;
99 
LogCompsvkt::compute::__anon8c7b201d0111::LogComps100     LogComps(const UVec3 &v_, int numComps_) : v(v_), numComps(numComps_)
101     {
102     }
103 };
104 
operator <<(std::ostream & str,const LogComps & c)105 static inline std::ostream &operator<<(std::ostream &str, const LogComps &c)
106 {
107     DE_ASSERT(c.numComps == 1 || c.numComps == 3);
108     return c.numComps == 3 ? str << c.v : str << c.v.x();
109 }
110 
111 class SubCase
112 {
113 public:
114     // Use getters instead of public const members, because SubCase must be assignable
115     // in order to be stored in a vector.
116 
localSize(void) const117     const UVec3 &localSize(void) const
118     {
119         return m_localSize;
120     }
numWorkGroups(void) const121     const UVec3 &numWorkGroups(void) const
122     {
123         return m_numWorkGroups;
124     }
125 
SubCase(void)126     SubCase(void)
127     {
128     }
SubCase(const UVec3 & localSize_,const UVec3 & numWorkGroups_)129     SubCase(const UVec3 &localSize_, const UVec3 &numWorkGroups_)
130         : m_localSize(localSize_)
131         , m_numWorkGroups(numWorkGroups_)
132     {
133     }
134 
135 private:
136     UVec3 m_localSize;
137     UVec3 m_numWorkGroups;
138 };
139 
140 class ComputeBuiltinVarInstance : public vkt::TestInstance
141 {
142 public:
143     ComputeBuiltinVarInstance(Context &context, const vector<SubCase> &subCases, const glu::DataType varType,
144                               const ComputeBuiltinVarCase *builtinVarCase,
145                               const vk::ComputePipelineConstructionType computePipelineConstructionType);
146 
147     virtual tcu::TestStatus iterate(void);
148 
149 private:
150     const VkDevice m_device;
151     const DeviceInterface &m_vki;
152     const VkQueue m_queue;
153     const uint32_t m_queueFamilyIndex;
154     vector<SubCase> m_subCases;
155     const ComputeBuiltinVarCase *m_builtin_var_case;
156     int m_subCaseNdx;
157     const glu::DataType m_varType;
158     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
159 };
160 
161 class ComputeBuiltinVarCase : public vkt::TestCase
162 {
163 public:
164     ComputeBuiltinVarCase(tcu::TestContext &context, const string &name, const char *varName, glu::DataType varType,
165                           bool readByComponent,
166                           const vk::ComputePipelineConstructionType computePipelineConstructionType);
167     ~ComputeBuiltinVarCase(void);
168 
checkSupport(Context & context) const169     virtual void checkSupport(Context &context) const
170     {
171         checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
172                                       m_computePipelineConstructionType);
173     }
createInstance(Context & context) const174     TestInstance *createInstance(Context &context) const
175     {
176         return new ComputeBuiltinVarInstance(context, m_subCases, m_varType, this, m_computePipelineConstructionType);
177     }
178 
179     virtual void initPrograms(SourceCollections &programCollection) const;
180     virtual UVec3 computeReference(const UVec3 &numWorkGroups, const UVec3 &workGroupSize, const UVec3 &workGroupID,
181                                    const UVec3 &localInvocationID) const = 0;
182 
183 protected:
184     string genBuiltinVarSource(const string &varName, glu::DataType varType, const UVec3 &localSize,
185                                bool readByComponent) const;
186     vector<SubCase> m_subCases;
187 
188 private:
189     uint32_t getProgram(const tcu::UVec3 &localSize);
190 
191     const string m_varName;
192     const glu::DataType m_varType;
193     int m_subCaseNdx;
194     bool m_readByComponent;
195     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
196 
197     ComputeBuiltinVarCase(const ComputeBuiltinVarCase &other);
198     ComputeBuiltinVarCase &operator=(const ComputeBuiltinVarCase &other);
199 };
200 
ComputeBuiltinVarCase(tcu::TestContext & context,const string & name,const char * varName,glu::DataType varType,bool readByComponent,const vk::ComputePipelineConstructionType computePipelineConstructionType)201 ComputeBuiltinVarCase::ComputeBuiltinVarCase(tcu::TestContext &context, const string &name, const char *varName,
202                                              glu::DataType varType, bool readByComponent,
203                                              const vk::ComputePipelineConstructionType computePipelineConstructionType)
204     : TestCase(context, name + (readByComponent ? "_component" : ""))
205     , m_varName(varName)
206     , m_varType(varType)
207     , m_subCaseNdx(0)
208     , m_readByComponent(readByComponent)
209     , m_computePipelineConstructionType(computePipelineConstructionType)
210 {
211 }
212 
~ComputeBuiltinVarCase(void)213 ComputeBuiltinVarCase::~ComputeBuiltinVarCase(void)
214 {
215     ComputeBuiltinVarCase::deinit();
216 }
217 
initPrograms(SourceCollections & programCollection) const218 void ComputeBuiltinVarCase::initPrograms(SourceCollections &programCollection) const
219 {
220     for (std::size_t i = 0; i < m_subCases.size(); i++)
221     {
222         const SubCase &subCase = m_subCases[i];
223         std::ostringstream name;
224         name << s_prefixProgramName << i;
225         programCollection.glslSources.add(name.str()) << glu::ComputeSource(
226             genBuiltinVarSource(m_varName, m_varType, subCase.localSize(), m_readByComponent).c_str());
227     }
228 }
229 
genBuiltinVarSource(const string & varName,glu::DataType varType,const UVec3 & localSize,bool readByComponent) const230 string ComputeBuiltinVarCase::genBuiltinVarSource(const string &varName, glu::DataType varType, const UVec3 &localSize,
231                                                   bool readByComponent) const
232 {
233     std::ostringstream src;
234 
235     src << "#version 310 es\n"
236         << "layout (local_size_x = " << localSize.x() << ", local_size_y = " << localSize.y()
237         << ", local_size_z = " << localSize.z() << ") in;\n";
238 
239     // For the gl_WorkGroupSize case, force it to be specialized so that
240     // Glslang can't just bypass the read of the builtin variable.
241     // We will not override these spec constants.
242     src << "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
243 
244     src << "layout(set = 0, binding = 0) uniform Stride\n"
245         << "{\n"
246         << "    uvec2 u_stride;\n"
247         << "}stride;\n"
248         << "layout(set = 0, binding = 1, std430) buffer Output\n"
249         << "{\n"
250         << "    " << glu::getDataTypeName(varType) << " result[];\n"
251         << "} sb_out;\n"
252         << "\n"
253         << "void main (void)\n"
254         << "{\n"
255         << "    highp uint offset = stride.u_stride.x*gl_GlobalInvocationID.z + "
256            "stride.u_stride.y*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n";
257 
258     if (readByComponent && varType != glu::TYPE_UINT)
259     {
260         switch (varType)
261         {
262         case glu::TYPE_UINT_VEC4:
263             src << "    sb_out.result[offset].w = " << varName << ".w;\n";
264             // Fall through
265         case glu::TYPE_UINT_VEC3:
266             src << "    sb_out.result[offset].z = " << varName << ".z;\n";
267             // Fall through
268         case glu::TYPE_UINT_VEC2:
269             src << "    sb_out.result[offset].y = " << varName << ".y;\n"
270                 << "    sb_out.result[offset].x = " << varName << ".x;\n";
271             break;
272         default:
273             DE_FATAL("Illegal data type");
274             break;
275         }
276     }
277     else
278     {
279         src << "    sb_out.result[offset] = " << varName << ";\n";
280     }
281     src << "}\n";
282 
283     return src.str();
284 }
285 
286 class NumWorkGroupsCase : public ComputeBuiltinVarCase
287 {
288 public:
NumWorkGroupsCase(tcu::TestContext & context,bool readByCompnent,const vk::ComputePipelineConstructionType computePipelineConstructionType)289     NumWorkGroupsCase(tcu::TestContext &context, bool readByCompnent,
290                       const vk::ComputePipelineConstructionType computePipelineConstructionType)
291         : ComputeBuiltinVarCase(context, "num_work_groups", "gl_NumWorkGroups", glu::TYPE_UINT_VEC3, readByCompnent,
292                                 computePipelineConstructionType)
293     {
294         m_subCases.push_back(SubCase(UVec3(1, 1, 1), UVec3(1, 1, 1)));
295         m_subCases.push_back(SubCase(UVec3(1, 1, 1), UVec3(52, 1, 1)));
296         m_subCases.push_back(SubCase(UVec3(1, 1, 1), UVec3(1, 39, 1)));
297         m_subCases.push_back(SubCase(UVec3(1, 1, 1), UVec3(1, 1, 78)));
298         m_subCases.push_back(SubCase(UVec3(1, 1, 1), UVec3(4, 7, 11)));
299         m_subCases.push_back(SubCase(UVec3(2, 3, 4), UVec3(4, 7, 11)));
300     }
301 
computeReference(const UVec3 & numWorkGroups,const UVec3 & workGroupSize,const UVec3 & workGroupID,const UVec3 & localInvocationID) const302     UVec3 computeReference(const UVec3 &numWorkGroups, const UVec3 &workGroupSize, const UVec3 &workGroupID,
303                            const UVec3 &localInvocationID) const
304     {
305         DE_UNREF(numWorkGroups);
306         DE_UNREF(workGroupSize);
307         DE_UNREF(workGroupID);
308         DE_UNREF(localInvocationID);
309         return numWorkGroups;
310     }
311 };
312 
313 class WorkGroupSizeCase : public ComputeBuiltinVarCase
314 {
315 public:
WorkGroupSizeCase(tcu::TestContext & context,bool readByComponent,const vk::ComputePipelineConstructionType computePipelineConstructionType)316     WorkGroupSizeCase(tcu::TestContext &context, bool readByComponent,
317                       const vk::ComputePipelineConstructionType computePipelineConstructionType)
318         : ComputeBuiltinVarCase(context, "work_group_size", "gl_WorkGroupSize", glu::TYPE_UINT_VEC3, readByComponent,
319                                 computePipelineConstructionType)
320     {
321         m_subCases.push_back(SubCase(UVec3(1, 1, 1), UVec3(1, 1, 1)));
322         m_subCases.push_back(SubCase(UVec3(1, 1, 1), UVec3(2, 7, 3)));
323         m_subCases.push_back(SubCase(UVec3(2, 1, 1), UVec3(1, 1, 1)));
324         m_subCases.push_back(SubCase(UVec3(2, 1, 1), UVec3(1, 3, 5)));
325         m_subCases.push_back(SubCase(UVec3(1, 3, 1), UVec3(1, 1, 1)));
326         m_subCases.push_back(SubCase(UVec3(1, 1, 7), UVec3(1, 1, 1)));
327         m_subCases.push_back(SubCase(UVec3(1, 1, 7), UVec3(3, 3, 1)));
328         m_subCases.push_back(SubCase(UVec3(10, 3, 4), UVec3(1, 1, 1)));
329         m_subCases.push_back(SubCase(UVec3(10, 3, 4), UVec3(3, 1, 2)));
330     }
331 
computeReference(const UVec3 & numWorkGroups,const UVec3 & workGroupSize,const UVec3 & workGroupID,const UVec3 & localInvocationID) const332     UVec3 computeReference(const UVec3 &numWorkGroups, const UVec3 &workGroupSize, const UVec3 &workGroupID,
333                            const UVec3 &localInvocationID) const
334     {
335         DE_UNREF(numWorkGroups);
336         DE_UNREF(workGroupID);
337         DE_UNREF(localInvocationID);
338         return workGroupSize;
339     }
340 };
341 
342 //-----------------------------------------------------------------------
343 class WorkGroupIDCase : public ComputeBuiltinVarCase
344 {
345 public:
WorkGroupIDCase(tcu::TestContext & context,bool readbyComponent,const vk::ComputePipelineConstructionType computePipelineConstructionType)346     WorkGroupIDCase(tcu::TestContext &context, bool readbyComponent,
347                     const vk::ComputePipelineConstructionType computePipelineConstructionType)
348         : ComputeBuiltinVarCase(context, "work_group_id", "gl_WorkGroupID", glu::TYPE_UINT_VEC3, readbyComponent,
349                                 computePipelineConstructionType)
350     {
351         m_subCases.push_back(SubCase(UVec3(1, 1, 1), UVec3(1, 1, 1)));
352         m_subCases.push_back(SubCase(UVec3(1, 1, 1), UVec3(52, 1, 1)));
353         m_subCases.push_back(SubCase(UVec3(1, 1, 1), UVec3(1, 39, 1)));
354         m_subCases.push_back(SubCase(UVec3(1, 1, 1), UVec3(1, 1, 78)));
355         m_subCases.push_back(SubCase(UVec3(1, 1, 1), UVec3(4, 7, 11)));
356         m_subCases.push_back(SubCase(UVec3(2, 3, 4), UVec3(4, 7, 11)));
357     }
358 
computeReference(const UVec3 & numWorkGroups,const UVec3 & workGroupSize,const UVec3 & workGroupID,const UVec3 & localInvocationID) const359     UVec3 computeReference(const UVec3 &numWorkGroups, const UVec3 &workGroupSize, const UVec3 &workGroupID,
360                            const UVec3 &localInvocationID) const
361     {
362         DE_UNREF(numWorkGroups);
363         DE_UNREF(workGroupSize);
364         DE_UNREF(localInvocationID);
365         return workGroupID;
366     }
367 };
368 
369 class LocalInvocationIDCase : public ComputeBuiltinVarCase
370 {
371 public:
LocalInvocationIDCase(tcu::TestContext & context,bool readByComponent,const vk::ComputePipelineConstructionType computePipelineConstructionType)372     LocalInvocationIDCase(tcu::TestContext &context, bool readByComponent,
373                           const vk::ComputePipelineConstructionType computePipelineConstructionType)
374         : ComputeBuiltinVarCase(context, "local_invocation_id", "gl_LocalInvocationID", glu::TYPE_UINT_VEC3,
375                                 readByComponent, computePipelineConstructionType)
376     {
377         m_subCases.push_back(SubCase(UVec3(1, 1, 1), UVec3(1, 1, 1)));
378         m_subCases.push_back(SubCase(UVec3(1, 1, 1), UVec3(2, 7, 3)));
379         m_subCases.push_back(SubCase(UVec3(2, 1, 1), UVec3(1, 1, 1)));
380         m_subCases.push_back(SubCase(UVec3(2, 1, 1), UVec3(1, 3, 5)));
381         m_subCases.push_back(SubCase(UVec3(1, 3, 1), UVec3(1, 1, 1)));
382         m_subCases.push_back(SubCase(UVec3(1, 1, 7), UVec3(1, 1, 1)));
383         m_subCases.push_back(SubCase(UVec3(1, 1, 7), UVec3(3, 3, 1)));
384         m_subCases.push_back(SubCase(UVec3(10, 3, 4), UVec3(1, 1, 1)));
385         m_subCases.push_back(SubCase(UVec3(10, 3, 4), UVec3(3, 1, 2)));
386     }
387 
computeReference(const UVec3 & numWorkGroups,const UVec3 & workGroupSize,const UVec3 & workGroupID,const UVec3 & localInvocationID) const388     UVec3 computeReference(const UVec3 &numWorkGroups, const UVec3 &workGroupSize, const UVec3 &workGroupID,
389                            const UVec3 &localInvocationID) const
390     {
391         DE_UNREF(numWorkGroups);
392         DE_UNREF(workGroupSize);
393         DE_UNREF(workGroupID);
394         return localInvocationID;
395     }
396 };
397 
398 class GlobalInvocationIDCase : public ComputeBuiltinVarCase
399 {
400 public:
GlobalInvocationIDCase(tcu::TestContext & context,bool readByComponent,const vk::ComputePipelineConstructionType computePipelineConstructionType)401     GlobalInvocationIDCase(tcu::TestContext &context, bool readByComponent,
402                            const vk::ComputePipelineConstructionType computePipelineConstructionType)
403         : ComputeBuiltinVarCase(context, "global_invocation_id", "gl_GlobalInvocationID", glu::TYPE_UINT_VEC3,
404                                 readByComponent, computePipelineConstructionType)
405     {
406         m_subCases.push_back(SubCase(UVec3(1, 1, 1), UVec3(1, 1, 1)));
407         m_subCases.push_back(SubCase(UVec3(1, 1, 1), UVec3(52, 1, 1)));
408         m_subCases.push_back(SubCase(UVec3(1, 1, 1), UVec3(1, 39, 1)));
409         m_subCases.push_back(SubCase(UVec3(1, 1, 1), UVec3(1, 1, 78)));
410         m_subCases.push_back(SubCase(UVec3(1, 1, 1), UVec3(4, 7, 11)));
411         m_subCases.push_back(SubCase(UVec3(2, 3, 4), UVec3(4, 7, 11)));
412         m_subCases.push_back(SubCase(UVec3(10, 3, 4), UVec3(1, 1, 1)));
413         m_subCases.push_back(SubCase(UVec3(10, 3, 4), UVec3(3, 1, 2)));
414     }
415 
computeReference(const UVec3 & numWorkGroups,const UVec3 & workGroupSize,const UVec3 & workGroupID,const UVec3 & localInvocationID) const416     UVec3 computeReference(const UVec3 &numWorkGroups, const UVec3 &workGroupSize, const UVec3 &workGroupID,
417                            const UVec3 &localInvocationID) const
418     {
419         DE_UNREF(numWorkGroups);
420         return workGroupID * workGroupSize + localInvocationID;
421     }
422 };
423 
424 class LocalInvocationIndexCase : public ComputeBuiltinVarCase
425 {
426 public:
LocalInvocationIndexCase(tcu::TestContext & context,bool readByComponent,const vk::ComputePipelineConstructionType computePipelineConstructionType)427     LocalInvocationIndexCase(tcu::TestContext &context, bool readByComponent,
428                              const vk::ComputePipelineConstructionType computePipelineConstructionType)
429         : ComputeBuiltinVarCase(context, "local_invocation_index", "gl_LocalInvocationIndex", glu::TYPE_UINT,
430                                 readByComponent, computePipelineConstructionType)
431     {
432         m_subCases.push_back(SubCase(UVec3(1, 1, 1), UVec3(1, 1, 1)));
433         m_subCases.push_back(SubCase(UVec3(1, 1, 1), UVec3(1, 39, 1)));
434         m_subCases.push_back(SubCase(UVec3(1, 1, 1), UVec3(4, 7, 11)));
435         m_subCases.push_back(SubCase(UVec3(2, 3, 4), UVec3(4, 7, 11)));
436         m_subCases.push_back(SubCase(UVec3(10, 3, 4), UVec3(1, 1, 1)));
437         m_subCases.push_back(SubCase(UVec3(10, 3, 4), UVec3(3, 1, 2)));
438     }
439 
computeReference(const UVec3 & numWorkGroups,const UVec3 & workGroupSize,const UVec3 & workGroupID,const UVec3 & localInvocationID) const440     UVec3 computeReference(const UVec3 &numWorkGroups, const UVec3 &workGroupSize, const UVec3 &workGroupID,
441                            const UVec3 &localInvocationID) const
442     {
443         DE_UNREF(workGroupID);
444         DE_UNREF(numWorkGroups);
445         return UVec3(localInvocationID.z() * workGroupSize.x() * workGroupSize.y() +
446                          localInvocationID.y() * workGroupSize.x() + localInvocationID.x(),
447                      0, 0);
448     }
449 };
450 
ComputeBuiltinVarInstance(Context & context,const vector<SubCase> & subCases,const glu::DataType varType,const ComputeBuiltinVarCase * builtinVarCase,const vk::ComputePipelineConstructionType computePipelineConstructionType)451 ComputeBuiltinVarInstance::ComputeBuiltinVarInstance(
452     Context &context, const vector<SubCase> &subCases, const glu::DataType varType,
453     const ComputeBuiltinVarCase *builtinVarCase,
454     const vk::ComputePipelineConstructionType computePipelineConstructionType)
455     : vkt::TestInstance(context)
456     , m_device(m_context.getDevice())
457     , m_vki(m_context.getDeviceInterface())
458     , m_queue(context.getUniversalQueue())
459     , m_queueFamilyIndex(context.getUniversalQueueFamilyIndex())
460     , m_subCases(subCases)
461     , m_builtin_var_case(builtinVarCase)
462     , m_subCaseNdx(0)
463     , m_varType(varType)
464     , m_computePipelineConstructionType(computePipelineConstructionType)
465 {
466 }
467 
iterate(void)468 tcu::TestStatus ComputeBuiltinVarInstance::iterate(void)
469 {
470     std::ostringstream program_name;
471     program_name << s_prefixProgramName << m_subCaseNdx;
472 
473     const SubCase &subCase      = m_subCases[m_subCaseNdx];
474     const tcu::UVec3 globalSize = subCase.localSize() * subCase.numWorkGroups();
475     const tcu::UVec2 stride(globalSize[0] * globalSize[1], globalSize[0]);
476     const uint32_t sizeOfUniformBuffer = sizeof(stride);
477     const int numScalars               = glu::getDataTypeScalarSize(m_varType);
478     const uint32_t numInvocations      = subCase.localSize()[0] * subCase.localSize()[1] * subCase.localSize()[2] *
479                                     subCase.numWorkGroups()[0] * subCase.numWorkGroups()[1] *
480                                     subCase.numWorkGroups()[2];
481 
482     uint32_t resultBufferStride = 0;
483     switch (m_varType)
484     {
485     case glu::TYPE_UINT:
486         resultBufferStride = sizeof(uint32_t);
487         break;
488     case glu::TYPE_UINT_VEC2:
489         resultBufferStride = sizeof(tcu::UVec2);
490         break;
491     case glu::TYPE_UINT_VEC3:
492     case glu::TYPE_UINT_VEC4:
493         resultBufferStride = sizeof(tcu::UVec4);
494         break;
495     default:
496         DE_FATAL("Illegal data type");
497     }
498 
499     const uint32_t resultBufferSize = numInvocations * resultBufferStride;
500 
501     // Create result buffer
502     vk::BufferWithMemory uniformBuffer(m_vki, m_device, m_context.getDefaultAllocator(),
503                                        makeBufferCreateInfo(sizeOfUniformBuffer, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT),
504                                        MemoryRequirement::HostVisible);
505     vk::BufferWithMemory resultBuffer(m_vki, m_device, m_context.getDefaultAllocator(),
506                                       makeBufferCreateInfo(resultBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
507                                       MemoryRequirement::HostVisible);
508 
509     {
510         const Allocation &alloc = uniformBuffer.getAllocation();
511         memcpy(alloc.getHostPtr(), &stride, sizeOfUniformBuffer);
512         flushAlloc(m_vki, m_device, alloc);
513     }
514 
515     // Create descriptorSetLayout
516     const Unique<VkDescriptorSetLayout> descriptorSetLayout(
517         DescriptorSetLayoutBuilder()
518             .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
519             .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
520             .build(m_vki, m_device));
521 
522     ComputePipelineWrapper pipeline(m_vki, m_device, m_computePipelineConstructionType,
523                                     m_context.getBinaryCollection().get(program_name.str()));
524     pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
525     pipeline.buildPipeline();
526 
527     const Unique<VkDescriptorPool> descriptorPool(
528         DescriptorPoolBuilder()
529             .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
530             .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
531             .build(m_vki, m_device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
532 
533     const VkBufferMemoryBarrier bufferBarrier = makeBufferMemoryBarrier(
534         VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *resultBuffer, 0ull, resultBufferSize);
535 
536     const Unique<VkCommandPool> cmdPool(makeCommandPool(m_vki, m_device, m_queueFamilyIndex));
537     const Unique<VkCommandBuffer> cmdBuffer(
538         allocateCommandBuffer(m_vki, m_device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
539 
540     // Start recording commands
541     beginCommandBuffer(m_vki, *cmdBuffer);
542 
543     pipeline.bind(*cmdBuffer);
544 
545     // Create descriptor set
546     const Unique<VkDescriptorSet> descriptorSet(
547         makeDescriptorSet(m_vki, m_device, *descriptorPool, *descriptorSetLayout));
548 
549     const VkDescriptorBufferInfo resultDescriptorInfo = makeDescriptorBufferInfo(*resultBuffer, 0ull, resultBufferSize);
550     const VkDescriptorBufferInfo uniformDescriptorInfo =
551         makeDescriptorBufferInfo(*uniformBuffer, 0ull, sizeOfUniformBuffer);
552 
553     DescriptorSetUpdateBuilder()
554         .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
555                      VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformDescriptorInfo)
556         .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
557                      VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo)
558         .update(m_vki, m_device);
559 
560     m_vki.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
561                                 &descriptorSet.get(), 0u, DE_NULL);
562 
563     // Dispatch indirect compute command
564     m_vki.cmdDispatch(*cmdBuffer, subCase.numWorkGroups()[0], subCase.numWorkGroups()[1], subCase.numWorkGroups()[2]);
565 
566     m_vki.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
567                              (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &bufferBarrier, 0,
568                              (const VkImageMemoryBarrier *)DE_NULL);
569 
570     // End recording commands
571     endCommandBuffer(m_vki, *cmdBuffer);
572 
573     // Wait for command buffer execution finish
574     submitCommandsAndWait(m_vki, m_device, m_queue, *cmdBuffer);
575 
576     const Allocation &resultAlloc = resultBuffer.getAllocation();
577     invalidateAlloc(m_vki, m_device, resultAlloc);
578 
579     const uint8_t *ptr = reinterpret_cast<uint8_t *>(resultAlloc.getHostPtr());
580 
581     int numFailed          = 0;
582     const int maxLogPrints = 10;
583 
584     tcu::TestContext &testCtx = m_context.getTestContext();
585 
586 #ifdef CTS_USES_VULKANSC
587     if (testCtx.getCommandLine().isSubProcess())
588 #endif // CTS_USES_VULKANSC
589     {
590         for (uint32_t groupZ = 0; groupZ < subCase.numWorkGroups().z(); groupZ++)
591             for (uint32_t groupY = 0; groupY < subCase.numWorkGroups().y(); groupY++)
592                 for (uint32_t groupX = 0; groupX < subCase.numWorkGroups().x(); groupX++)
593                     for (uint32_t localZ = 0; localZ < subCase.localSize().z(); localZ++)
594                         for (uint32_t localY = 0; localY < subCase.localSize().y(); localY++)
595                             for (uint32_t localX = 0; localX < subCase.localSize().x(); localX++)
596                             {
597                                 const UVec3 refGroupID(groupX, groupY, groupZ);
598                                 const UVec3 refLocalID(localX, localY, localZ);
599                                 const UVec3 refGlobalID = refGroupID * subCase.localSize() + refLocalID;
600 
601                                 const uint32_t refOffset =
602                                     stride.x() * refGlobalID.z() + stride.y() * refGlobalID.y() + refGlobalID.x();
603 
604                                 const UVec3 refValue = m_builtin_var_case->computeReference(
605                                     subCase.numWorkGroups(), subCase.localSize(), refGroupID, refLocalID);
606 
607                                 const uint32_t *resPtr = (const uint32_t *)(ptr + refOffset * resultBufferStride);
608                                 const UVec3 resValue   = readResultVec(resPtr, numScalars);
609 
610                                 if (!compareNumComponents(refValue, resValue, numScalars))
611                                 {
612                                     if (numFailed < maxLogPrints)
613                                         testCtx.getLog()
614                                             << TestLog::Message << "ERROR: comparison failed at offset " << refOffset
615                                             << ": expected " << LogComps(refValue, numScalars) << ", got "
616                                             << LogComps(resValue, numScalars) << TestLog::EndMessage;
617                                     else if (numFailed == maxLogPrints)
618                                         testCtx.getLog() << TestLog::Message << "..." << TestLog::EndMessage;
619 
620                                     numFailed += 1;
621                                 }
622                             }
623     }
624 
625     testCtx.getLog() << TestLog::Message << (numInvocations - numFailed) << " / " << numInvocations << " values passed"
626                      << TestLog::EndMessage;
627 
628     if (numFailed > 0)
629         return tcu::TestStatus::fail("Comparison failed");
630 
631     m_subCaseNdx += 1;
632     return (m_subCaseNdx < (int)m_subCases.size()) ? tcu::TestStatus::incomplete() :
633                                                      tcu::TestStatus::pass("Comparison succeeded");
634 }
635 
636 class ComputeShaderBuiltinVarTests : public tcu::TestCaseGroup
637 {
638 public:
639     ComputeShaderBuiltinVarTests(tcu::TestContext &context,
640                                  vk::ComputePipelineConstructionType computePipelineConstructionType);
641 
642     void init(void);
643 
644 private:
645     ComputeShaderBuiltinVarTests(const ComputeShaderBuiltinVarTests &other);
646     ComputeShaderBuiltinVarTests &operator=(const ComputeShaderBuiltinVarTests &other);
647 
648     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
649 };
650 
ComputeShaderBuiltinVarTests(tcu::TestContext & context,vk::ComputePipelineConstructionType computePipelineConstructionType)651 ComputeShaderBuiltinVarTests::ComputeShaderBuiltinVarTests(
652     tcu::TestContext &context, vk::ComputePipelineConstructionType computePipelineConstructionType)
653     : TestCaseGroup(context, "builtin_var")
654     , m_computePipelineConstructionType(computePipelineConstructionType)
655 {
656 }
657 
init(void)658 void ComputeShaderBuiltinVarTests::init(void)
659 {
660     // Builtin variables with vector values should be read whole and by component.
661     for (int i = 0; i < 2; i++)
662     {
663         const bool readByComponent = (i != 0);
664         addChild(new NumWorkGroupsCase(this->getTestContext(), readByComponent, m_computePipelineConstructionType));
665         addChild(new WorkGroupSizeCase(this->getTestContext(), readByComponent, m_computePipelineConstructionType));
666         addChild(new WorkGroupIDCase(this->getTestContext(), readByComponent, m_computePipelineConstructionType));
667         addChild(new LocalInvocationIDCase(this->getTestContext(), readByComponent, m_computePipelineConstructionType));
668         addChild(
669             new GlobalInvocationIDCase(this->getTestContext(), readByComponent, m_computePipelineConstructionType));
670     }
671     // Local invocation index is already just a scalar.
672     addChild(new LocalInvocationIndexCase(this->getTestContext(), false, m_computePipelineConstructionType));
673 }
674 
675 } // namespace
676 
createComputeShaderBuiltinVarTests(tcu::TestContext & testCtx,vk::ComputePipelineConstructionType computePipelineConstructionType)677 tcu::TestCaseGroup *createComputeShaderBuiltinVarTests(
678     tcu::TestContext &testCtx, vk::ComputePipelineConstructionType computePipelineConstructionType)
679 {
680     return new ComputeShaderBuiltinVarTests(testCtx, computePipelineConstructionType);
681 }
682 
683 } // namespace compute
684 } // namespace vkt
685