xref: /aosp_15_r20/external/deqp/external/vulkancts/modules/vulkan/subgroups/vktSubgroupsTestsUtils.cpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 The Khronos Group Inc.
6  * Copyright (c) 2019 Google Inc.
7  * Copyright (c) 2017 Codeplay Software Ltd.
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  */ /*!
22  * \file
23  * \brief Subgroups Tests Utils
24  */ /*--------------------------------------------------------------------*/
25 
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "vkRayTracingUtil.hpp"
28 #include "tcuFloat.hpp"
29 #include "deRandom.hpp"
30 #include "tcuCommandLine.hpp"
31 #include "tcuStringTemplate.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkTypeUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37 
38 using namespace tcu;
39 using namespace std;
40 using namespace vk;
41 using namespace vkt;
42 
43 namespace
44 {
45 
46 enum class ComputeLike
47 {
48     COMPUTE = 0,
49     MESH
50 };
51 
getMaxWidth()52 uint32_t getMaxWidth()
53 {
54     return 1024u;
55 }
56 
getNextWidth(const uint32_t width)57 uint32_t getNextWidth(const uint32_t width)
58 {
59     if (width < 128)
60     {
61         // This ensures we test every value up to 128 (the max subgroup size).
62         return width + 1;
63     }
64     else
65     {
66         // And once we hit 128 we increment to only power of 2's to reduce testing time.
67         return width * 2;
68     }
69 }
70 
getFormatSizeInBytes(const VkFormat format)71 uint32_t getFormatSizeInBytes(const VkFormat format)
72 {
73     switch (format)
74     {
75     default:
76         DE_FATAL("Unhandled format!");
77         return 0;
78     case VK_FORMAT_R8_SINT:
79     case VK_FORMAT_R8_UINT:
80         return static_cast<uint32_t>(sizeof(int8_t));
81     case VK_FORMAT_R8G8_SINT:
82     case VK_FORMAT_R8G8_UINT:
83         return static_cast<uint32_t>(sizeof(int8_t) * 2);
84     case VK_FORMAT_R8G8B8_SINT:
85     case VK_FORMAT_R8G8B8_UINT:
86     case VK_FORMAT_R8G8B8A8_SINT:
87     case VK_FORMAT_R8G8B8A8_UINT:
88         return static_cast<uint32_t>(sizeof(int8_t) * 4);
89     case VK_FORMAT_R16_SINT:
90     case VK_FORMAT_R16_UINT:
91     case VK_FORMAT_R16_SFLOAT:
92         return static_cast<uint32_t>(sizeof(int16_t));
93     case VK_FORMAT_R16G16_SINT:
94     case VK_FORMAT_R16G16_UINT:
95     case VK_FORMAT_R16G16_SFLOAT:
96         return static_cast<uint32_t>(sizeof(int16_t) * 2);
97     case VK_FORMAT_R16G16B16_UINT:
98     case VK_FORMAT_R16G16B16_SINT:
99     case VK_FORMAT_R16G16B16_SFLOAT:
100     case VK_FORMAT_R16G16B16A16_SINT:
101     case VK_FORMAT_R16G16B16A16_UINT:
102     case VK_FORMAT_R16G16B16A16_SFLOAT:
103         return static_cast<uint32_t>(sizeof(int16_t) * 4);
104     case VK_FORMAT_R32_SINT:
105     case VK_FORMAT_R32_UINT:
106     case VK_FORMAT_R32_SFLOAT:
107         return static_cast<uint32_t>(sizeof(int32_t));
108     case VK_FORMAT_R32G32_SINT:
109     case VK_FORMAT_R32G32_UINT:
110     case VK_FORMAT_R32G32_SFLOAT:
111         return static_cast<uint32_t>(sizeof(int32_t) * 2);
112     case VK_FORMAT_R32G32B32_SINT:
113     case VK_FORMAT_R32G32B32_UINT:
114     case VK_FORMAT_R32G32B32_SFLOAT:
115     case VK_FORMAT_R32G32B32A32_SINT:
116     case VK_FORMAT_R32G32B32A32_UINT:
117     case VK_FORMAT_R32G32B32A32_SFLOAT:
118         return static_cast<uint32_t>(sizeof(int32_t) * 4);
119     case VK_FORMAT_R64_SINT:
120     case VK_FORMAT_R64_UINT:
121     case VK_FORMAT_R64_SFLOAT:
122         return static_cast<uint32_t>(sizeof(int64_t));
123     case VK_FORMAT_R64G64_SINT:
124     case VK_FORMAT_R64G64_UINT:
125     case VK_FORMAT_R64G64_SFLOAT:
126         return static_cast<uint32_t>(sizeof(int64_t) * 2);
127     case VK_FORMAT_R64G64B64_SINT:
128     case VK_FORMAT_R64G64B64_UINT:
129     case VK_FORMAT_R64G64B64_SFLOAT:
130     case VK_FORMAT_R64G64B64A64_SINT:
131     case VK_FORMAT_R64G64B64A64_UINT:
132     case VK_FORMAT_R64G64B64A64_SFLOAT:
133         return static_cast<uint32_t>(sizeof(int64_t) * 4);
134     // The below formats are used to represent bool and bvec* types. These
135     // types are passed to the shader as int and ivec* types, before the
136     // calculations are done as booleans. We need a distinct type here so
137     // that the shader generators can switch on it and generate the correct
138     // shader source for testing.
139     case VK_FORMAT_R8_USCALED:
140         return static_cast<uint32_t>(sizeof(int32_t));
141     case VK_FORMAT_R8G8_USCALED:
142         return static_cast<uint32_t>(sizeof(int32_t) * 2);
143     case VK_FORMAT_R8G8B8_USCALED:
144     case VK_FORMAT_R8G8B8A8_USCALED:
145         return static_cast<uint32_t>(sizeof(int32_t) * 4);
146     }
147 }
148 
getElementSizeInBytes(const VkFormat format,const subgroups::SSBOData::InputDataLayoutType layout)149 uint32_t getElementSizeInBytes(const VkFormat format, const subgroups::SSBOData::InputDataLayoutType layout)
150 {
151     const uint32_t bytes = getFormatSizeInBytes(format);
152 
153     if (layout == subgroups::SSBOData::LayoutStd140)
154         return bytes < 16 ? 16 : bytes;
155     else
156         return bytes;
157 }
158 
makeRenderPass(Context & context,VkFormat format)159 Move<VkRenderPass> makeRenderPass(Context &context, VkFormat format)
160 {
161     const VkAttachmentReference colorReference    = {0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL};
162     const VkSubpassDescription subpassDescription = {
163         0u,                              //  VkSubpassDescriptionFlags flags;
164         VK_PIPELINE_BIND_POINT_GRAPHICS, //  VkPipelineBindPoint pipelineBindPoint;
165         0,                               //  uint32_t inputAttachmentCount;
166         DE_NULL,                         //  const VkAttachmentReference* pInputAttachments;
167         1,                               //  uint32_t colorAttachmentCount;
168         &colorReference,                 //  const VkAttachmentReference* pColorAttachments;
169         DE_NULL,                         //  const VkAttachmentReference* pResolveAttachments;
170         DE_NULL,                         //  const VkAttachmentReference* pDepthStencilAttachment;
171         0,                               //  uint32_t preserveAttachmentCount;
172         DE_NULL                          //  const uint32_t* pPreserveAttachments;
173     };
174     const VkSubpassDependency subpassDependencies[2] = {
175         {
176             VK_SUBPASS_EXTERNAL,                           //  uint32_t srcSubpass;
177             0u,                                            //  uint32_t dstSubpass;
178             VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,          //  VkPipelineStageFlags srcStageMask;
179             VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, //  VkPipelineStageFlags dstStageMask;
180             VK_ACCESS_MEMORY_READ_BIT,                     //  VkAccessFlags srcAccessMask;
181             VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, //  VkAccessFlags dstAccessMask;
182             VK_DEPENDENCY_BY_REGION_BIT //  VkDependencyFlags dependencyFlags;
183         },
184         {
185             0u,                                            //  uint32_t srcSubpass;
186             VK_SUBPASS_EXTERNAL,                           //  uint32_t dstSubpass;
187             VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, //  VkPipelineStageFlags srcStageMask;
188             VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,          //  VkPipelineStageFlags dstStageMask;
189             VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, //  VkAccessFlags srcAccessMask;
190             VK_ACCESS_MEMORY_READ_BIT,                                                  //  VkAccessFlags dstAccessMask;
191             VK_DEPENDENCY_BY_REGION_BIT //  VkDependencyFlags dependencyFlags;
192         },
193     };
194     const VkAttachmentDescription attachmentDescription = {
195         0u,                                  //  VkAttachmentDescriptionFlags flags;
196         format,                              //  VkFormat format;
197         VK_SAMPLE_COUNT_1_BIT,               //  VkSampleCountFlagBits samples;
198         VK_ATTACHMENT_LOAD_OP_CLEAR,         //  VkAttachmentLoadOp loadOp;
199         VK_ATTACHMENT_STORE_OP_STORE,        //  VkAttachmentStoreOp storeOp;
200         VK_ATTACHMENT_LOAD_OP_DONT_CARE,     //  VkAttachmentLoadOp stencilLoadOp;
201         VK_ATTACHMENT_STORE_OP_DONT_CARE,    //  VkAttachmentStoreOp stencilStoreOp;
202         VK_IMAGE_LAYOUT_UNDEFINED,           //  VkImageLayout initialLayout;
203         VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL //  VkImageLayout finalLayout;
204     };
205     const VkRenderPassCreateInfo renderPassCreateInfo = {
206         VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, //  VkStructureType sType;
207         DE_NULL,                                   //  const void* pNext;
208         0u,                                        //  VkRenderPassCreateFlags flags;
209         1,                                         //  uint32_t attachmentCount;
210         &attachmentDescription,                    //  const VkAttachmentDescription* pAttachments;
211         1,                                         //  uint32_t subpassCount;
212         &subpassDescription,                       //  const VkSubpassDescription* pSubpasses;
213         2,                                         //  uint32_t dependencyCount;
214         subpassDependencies                        //  const VkSubpassDependency* pDependencies;
215     };
216 
217     return createRenderPass(context.getDeviceInterface(), context.getDevice(), &renderPassCreateInfo);
218 }
219 
makeGraphicsPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const VkShaderModule vertexShaderModule,const VkShaderModule tessellationControlShaderModule,const VkShaderModule tessellationEvalShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule fragmentShaderModule,const VkRenderPass renderPass,const std::vector<VkViewport> & viewports,const std::vector<VkRect2D> & scissors,const VkPrimitiveTopology topology,const uint32_t subpass,const uint32_t patchControlPoints,const VkPipelineVertexInputStateCreateInfo * vertexInputStateCreateInfo,const VkPipelineRasterizationStateCreateInfo * rasterizationStateCreateInfo,const VkPipelineMultisampleStateCreateInfo * multisampleStateCreateInfo,const VkPipelineDepthStencilStateCreateInfo * depthStencilStateCreateInfo,const VkPipelineColorBlendStateCreateInfo * colorBlendStateCreateInfo,const VkPipelineDynamicStateCreateInfo * dynamicStateCreateInfo,const uint32_t vertexShaderStageCreateFlags,const uint32_t tessellationControlShaderStageCreateFlags,const uint32_t tessellationEvalShaderStageCreateFlags,const uint32_t geometryShaderStageCreateFlags,const uint32_t fragmentShaderStageCreateFlags,const uint32_t requiredSubgroupSize[5])220 Move<VkPipeline> makeGraphicsPipeline(
221     const DeviceInterface &vk, const VkDevice device, const VkPipelineLayout pipelineLayout,
222     const VkShaderModule vertexShaderModule, const VkShaderModule tessellationControlShaderModule,
223     const VkShaderModule tessellationEvalShaderModule, const VkShaderModule geometryShaderModule,
224     const VkShaderModule fragmentShaderModule, const VkRenderPass renderPass, const std::vector<VkViewport> &viewports,
225     const std::vector<VkRect2D> &scissors, const VkPrimitiveTopology topology, const uint32_t subpass,
226     const uint32_t patchControlPoints, const VkPipelineVertexInputStateCreateInfo *vertexInputStateCreateInfo,
227     const VkPipelineRasterizationStateCreateInfo *rasterizationStateCreateInfo,
228     const VkPipelineMultisampleStateCreateInfo *multisampleStateCreateInfo,
229     const VkPipelineDepthStencilStateCreateInfo *depthStencilStateCreateInfo,
230     const VkPipelineColorBlendStateCreateInfo *colorBlendStateCreateInfo,
231     const VkPipelineDynamicStateCreateInfo *dynamicStateCreateInfo, const uint32_t vertexShaderStageCreateFlags,
232     const uint32_t tessellationControlShaderStageCreateFlags, const uint32_t tessellationEvalShaderStageCreateFlags,
233     const uint32_t geometryShaderStageCreateFlags, const uint32_t fragmentShaderStageCreateFlags,
234     const uint32_t requiredSubgroupSize[5])
235 {
236     const VkBool32 disableRasterization = (fragmentShaderModule == DE_NULL);
237     const bool hasTessellation =
238         (tessellationControlShaderModule != DE_NULL || tessellationEvalShaderModule != DE_NULL);
239 
240     VkPipelineShaderStageCreateInfo stageCreateInfo = {
241         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType                     sType
242         DE_NULL,                                             // const void*                         pNext
243         0u,                                                  // VkPipelineShaderStageCreateFlags    flags
244         VK_SHADER_STAGE_VERTEX_BIT,                          // VkShaderStageFlagBits               stage
245         DE_NULL,                                             // VkShaderModule                      module
246         "main",                                              // const char*                         pName
247         DE_NULL                                              // const VkSpecializationInfo*         pSpecializationInfo
248     };
249 
250     std::vector<VkPipelineShaderStageCreateInfo> pipelineShaderStageParams;
251 
252     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[5] = {
253         {
254             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
255             DE_NULL,
256             requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
257         },
258         {
259             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
260             DE_NULL,
261             requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
262         },
263         {
264             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
265             DE_NULL,
266             requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
267         },
268         {
269             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
270             DE_NULL,
271             requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
272         },
273         {
274             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
275             DE_NULL,
276             requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
277         },
278     };
279 
280     {
281         stageCreateInfo.pNext  = (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize != 0u) ?
282                                      &requiredSubgroupSizeCreateInfo[0] :
283                                      DE_NULL;
284         stageCreateInfo.flags  = vertexShaderStageCreateFlags;
285         stageCreateInfo.stage  = VK_SHADER_STAGE_VERTEX_BIT;
286         stageCreateInfo.module = vertexShaderModule;
287         pipelineShaderStageParams.push_back(stageCreateInfo);
288     }
289 
290     if (tessellationControlShaderModule != DE_NULL)
291     {
292         stageCreateInfo.pNext  = (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize != 0u) ?
293                                      &requiredSubgroupSizeCreateInfo[1] :
294                                      DE_NULL;
295         stageCreateInfo.flags  = tessellationControlShaderStageCreateFlags;
296         stageCreateInfo.stage  = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
297         stageCreateInfo.module = tessellationControlShaderModule;
298         pipelineShaderStageParams.push_back(stageCreateInfo);
299     }
300 
301     if (tessellationEvalShaderModule != DE_NULL)
302     {
303         stageCreateInfo.pNext =
304             (requiredSubgroupSize != DE_NULL && requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize != 0u) ?
305                 &requiredSubgroupSizeCreateInfo[2] :
306                 DE_NULL;
307         stageCreateInfo.flags  = tessellationEvalShaderStageCreateFlags;
308         stageCreateInfo.stage  = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
309         stageCreateInfo.module = tessellationEvalShaderModule;
310         pipelineShaderStageParams.push_back(stageCreateInfo);
311     }
312 
313     if (geometryShaderModule != DE_NULL)
314     {
315         stageCreateInfo.pNext  = (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize != 0u) ?
316                                      &requiredSubgroupSizeCreateInfo[3] :
317                                      DE_NULL;
318         stageCreateInfo.flags  = geometryShaderStageCreateFlags;
319         stageCreateInfo.stage  = VK_SHADER_STAGE_GEOMETRY_BIT;
320         stageCreateInfo.module = geometryShaderModule;
321         pipelineShaderStageParams.push_back(stageCreateInfo);
322     }
323 
324     if (fragmentShaderModule != DE_NULL)
325     {
326         stageCreateInfo.pNext  = (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize != 0u) ?
327                                      &requiredSubgroupSizeCreateInfo[4] :
328                                      DE_NULL;
329         stageCreateInfo.flags  = fragmentShaderStageCreateFlags;
330         stageCreateInfo.stage  = VK_SHADER_STAGE_FRAGMENT_BIT;
331         stageCreateInfo.module = fragmentShaderModule;
332         pipelineShaderStageParams.push_back(stageCreateInfo);
333     }
334 
335     const VkVertexInputBindingDescription vertexInputBindingDescription = {
336         0u,                          // uint32_t             binding
337         sizeof(tcu::Vec4),           // uint32_t             stride
338         VK_VERTEX_INPUT_RATE_VERTEX, // VkVertexInputRate    inputRate
339     };
340 
341     const VkVertexInputAttributeDescription vertexInputAttributeDescription = {
342         0u,                            // uint32_t    location
343         0u,                            // uint32_t    binding
344         VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat    format
345         0u                             // uint32_t    offset
346     };
347 
348     const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfoDefault = {
349         VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType                             sType
350         DE_NULL,                                                   // const void*                                 pNext
351         (VkPipelineVertexInputStateCreateFlags)0,                  // VkPipelineVertexInputStateCreateFlags       flags
352         1u,                              // uint32_t                                    vertexBindingDescriptionCount
353         &vertexInputBindingDescription,  // const VkVertexInputBindingDescription*      pVertexBindingDescriptions
354         1u,                              // uint32_t                                    vertexAttributeDescriptionCount
355         &vertexInputAttributeDescription // const VkVertexInputAttributeDescription*    pVertexAttributeDescriptions
356     };
357 
358     const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo = {
359         VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType                            sType
360         DE_NULL,                                                     // const void*                                pNext
361         0u,                                                          // VkPipelineInputAssemblyStateCreateFlags    flags
362         topology, // VkPrimitiveTopology                        topology
363         VK_FALSE  // VkBool32                                   primitiveRestartEnable
364     };
365 
366     const VkPipelineTessellationStateCreateInfo tessStateCreateInfo = {
367         VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, // VkStructureType                           sType
368         DE_NULL,                                                   // const void*                               pNext
369         0u,                                                        // VkPipelineTessellationStateCreateFlags    flags
370         patchControlPoints // uint32_t                                  patchControlPoints
371     };
372 
373     const VkPipelineViewportStateCreateInfo viewportStateCreateInfo = {
374         VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType                             sType
375         DE_NULL,                                               // const void*                                 pNext
376         (VkPipelineViewportStateCreateFlags)0,                 // VkPipelineViewportStateCreateFlags          flags
377         viewports.empty() ? 1u :
378                             (uint32_t)viewports.size(),     // uint32_t                                    viewportCount
379         viewports.empty() ? DE_NULL : &viewports[0],        // const VkViewport*                           pViewports
380         viewports.empty() ? 1u : (uint32_t)scissors.size(), // uint32_t                                    scissorCount
381         scissors.empty() ? DE_NULL : &scissors[0]           // const VkRect2D*                             pScissors
382     };
383 
384     const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfoDefault = {
385         VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType                            sType
386         DE_NULL,                                                    // const void*                                pNext
387         0u,                                                         // VkPipelineRasterizationStateCreateFlags    flags
388         VK_FALSE,                        // VkBool32                                   depthClampEnable
389         disableRasterization,            // VkBool32                                   rasterizerDiscardEnable
390         VK_POLYGON_MODE_FILL,            // VkPolygonMode                              polygonMode
391         VK_CULL_MODE_NONE,               // VkCullModeFlags                            cullMode
392         VK_FRONT_FACE_COUNTER_CLOCKWISE, // VkFrontFace                                frontFace
393         VK_FALSE,                        // VkBool32                                   depthBiasEnable
394         0.0f,                            // float                                      depthBiasConstantFactor
395         0.0f,                            // float                                      depthBiasClamp
396         0.0f,                            // float                                      depthBiasSlopeFactor
397         1.0f                             // float                                      lineWidth
398     };
399 
400     const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfoDefault = {
401         VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType                          sType
402         DE_NULL,                                                  // const void*                              pNext
403         0u,                                                       // VkPipelineMultisampleStateCreateFlags    flags
404         VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits                    rasterizationSamples
405         VK_FALSE,              // VkBool32                                 sampleShadingEnable
406         1.0f,                  // float                                    minSampleShading
407         DE_NULL,               // const VkSampleMask*                      pSampleMask
408         VK_FALSE,              // VkBool32                                 alphaToCoverageEnable
409         VK_FALSE               // VkBool32                                 alphaToOneEnable
410     };
411 
412     const VkStencilOpState stencilOpState = {
413         VK_STENCIL_OP_KEEP,  // VkStencilOp    failOp
414         VK_STENCIL_OP_KEEP,  // VkStencilOp    passOp
415         VK_STENCIL_OP_KEEP,  // VkStencilOp    depthFailOp
416         VK_COMPARE_OP_NEVER, // VkCompareOp    compareOp
417         0,                   // uint32_t       compareMask
418         0,                   // uint32_t       writeMask
419         0                    // uint32_t       reference
420     };
421 
422     const VkPipelineDepthStencilStateCreateInfo depthStencilStateCreateInfoDefault = {
423         VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, // VkStructureType                          sType
424         DE_NULL,                                                    // const void*                              pNext
425         0u,                                                         // VkPipelineDepthStencilStateCreateFlags   flags
426         VK_FALSE,                    // VkBool32                                 depthTestEnable
427         VK_FALSE,                    // VkBool32                                 depthWriteEnable
428         VK_COMPARE_OP_LESS_OR_EQUAL, // VkCompareOp                              depthCompareOp
429         VK_FALSE,                    // VkBool32                                 depthBoundsTestEnable
430         VK_FALSE,                    // VkBool32                                 stencilTestEnable
431         stencilOpState,              // VkStencilOpState                         front
432         stencilOpState,              // VkStencilOpState                         back
433         0.0f,                        // float                                    minDepthBounds
434         1.0f,                        // float                                    maxDepthBounds
435     };
436 
437     const VkPipelineColorBlendAttachmentState colorBlendAttachmentState = {
438         VK_FALSE,                // VkBool32                 blendEnable
439         VK_BLEND_FACTOR_ZERO,    // VkBlendFactor            srcColorBlendFactor
440         VK_BLEND_FACTOR_ZERO,    // VkBlendFactor            dstColorBlendFactor
441         VK_BLEND_OP_ADD,         // VkBlendOp                colorBlendOp
442         VK_BLEND_FACTOR_ZERO,    // VkBlendFactor            srcAlphaBlendFactor
443         VK_BLEND_FACTOR_ZERO,    // VkBlendFactor            dstAlphaBlendFactor
444         VK_BLEND_OP_ADD,         // VkBlendOp                alphaBlendOp
445         VK_COLOR_COMPONENT_R_BIT // VkColorComponentFlags    colorWriteMask
446             | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT};
447 
448     const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfoDefault = {
449         VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType                               sType
450         DE_NULL,                                                  // const void*                                   pNext
451         0u,                                                       // VkPipelineColorBlendStateCreateFlags          flags
452         VK_FALSE,                   // VkBool32                                      logicOpEnable
453         VK_LOGIC_OP_CLEAR,          // VkLogicOp                                     logicOp
454         1u,                         // uint32_t                                      attachmentCount
455         &colorBlendAttachmentState, // const VkPipelineColorBlendAttachmentState*    pAttachments
456         {0.0f, 0.0f, 0.0f, 0.0f}    // float                                         blendConstants[4]
457     };
458 
459     std::vector<VkDynamicState> dynamicStates;
460 
461     if (viewports.empty())
462         dynamicStates.push_back(VK_DYNAMIC_STATE_VIEWPORT);
463     if (scissors.empty())
464         dynamicStates.push_back(VK_DYNAMIC_STATE_SCISSOR);
465 
466     const VkPipelineDynamicStateCreateInfo dynamicStateCreateInfoDefault = {
467         VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, // VkStructureType                      sType
468         DE_NULL,                                              // const void*                          pNext
469         0u,                                                   // VkPipelineDynamicStateCreateFlags    flags
470         (uint32_t)dynamicStates.size(),                       // uint32_t                             dynamicStateCount
471         dynamicStates.empty() ? DE_NULL : &dynamicStates[0]   // const VkDynamicState*                pDynamicStates
472     };
473 
474     const VkPipelineDynamicStateCreateInfo *dynamicStateCreateInfoDefaultPtr =
475         dynamicStates.empty() ? DE_NULL : &dynamicStateCreateInfoDefault;
476 
477     const VkGraphicsPipelineCreateInfo pipelineCreateInfo = {
478         VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType                                  sType
479         DE_NULL,                                         // const void*                                      pNext
480         0u,                                              // VkPipelineCreateFlags                            flags
481         (uint32_t)pipelineShaderStageParams.size(),      // uint32_t                                         stageCount
482         &pipelineShaderStageParams[0],                   // const VkPipelineShaderStageCreateInfo*           pStages
483         vertexInputStateCreateInfo ?
484             vertexInputStateCreateInfo :
485             &vertexInputStateCreateInfoDefault, // const VkPipelineVertexInputStateCreateInfo*      pVertexInputState
486         &inputAssemblyStateCreateInfo,          // const VkPipelineInputAssemblyStateCreateInfo*    pInputAssemblyState
487         hasTessellation ? &tessStateCreateInfo :
488                           DE_NULL, // const VkPipelineTessellationStateCreateInfo*     pTessellationState
489         &viewportStateCreateInfo,  // const VkPipelineViewportStateCreateInfo*         pViewportState
490         rasterizationStateCreateInfo ?
491             rasterizationStateCreateInfo :
492             &rasterizationStateCreateInfoDefault, // const VkPipelineRasterizationStateCreateInfo*    pRasterizationState
493         multisampleStateCreateInfo ?
494             multisampleStateCreateInfo :
495             &multisampleStateCreateInfoDefault, // const VkPipelineMultisampleStateCreateInfo*      pMultisampleState
496         depthStencilStateCreateInfo ?
497             depthStencilStateCreateInfo :
498             &depthStencilStateCreateInfoDefault, // const VkPipelineDepthStencilStateCreateInfo*     pDepthStencilState
499         colorBlendStateCreateInfo ?
500             colorBlendStateCreateInfo :
501             &colorBlendStateCreateInfoDefault, // const VkPipelineColorBlendStateCreateInfo*       pColorBlendState
502         dynamicStateCreateInfo ?
503             dynamicStateCreateInfo :
504             dynamicStateCreateInfoDefaultPtr, // const VkPipelineDynamicStateCreateInfo*          pDynamicState
505         pipelineLayout,                       // VkPipelineLayout                                 layout
506         renderPass,                           // VkRenderPass                                     renderPass
507         subpass,                              // uint32_t                                         subpass
508         DE_NULL,                              // VkPipeline                                       basePipelineHandle
509         0                                     // int32_t                                          basePipelineIndex;
510     };
511 
512     return createGraphicsPipeline(vk, device, DE_NULL, &pipelineCreateInfo);
513 }
514 
makeGraphicsPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderStageFlags stages,const VkShaderModule vertexShaderModule,const VkShaderModule fragmentShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule tessellationControlModule,const VkShaderModule tessellationEvaluationModule,const VkRenderPass renderPass,const VkPrimitiveTopology topology=VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,const VkVertexInputBindingDescription * vertexInputBindingDescription=DE_NULL,const VkVertexInputAttributeDescription * vertexInputAttributeDescriptions=DE_NULL,const bool frameBufferTests=false,const vk::VkFormat attachmentFormat=VK_FORMAT_R32G32B32A32_SFLOAT,const uint32_t vertexShaderStageCreateFlags=0u,const uint32_t tessellationControlShaderStageCreateFlags=0u,const uint32_t tessellationEvalShaderStageCreateFlags=0u,const uint32_t geometryShaderStageCreateFlags=0u,const uint32_t fragmentShaderStageCreateFlags=0u,const uint32_t requiredSubgroupSize[5]=DE_NULL)515 Move<VkPipeline> makeGraphicsPipeline(
516     Context &context, const VkPipelineLayout pipelineLayout, const VkShaderStageFlags stages,
517     const VkShaderModule vertexShaderModule, const VkShaderModule fragmentShaderModule,
518     const VkShaderModule geometryShaderModule, const VkShaderModule tessellationControlModule,
519     const VkShaderModule tessellationEvaluationModule, const VkRenderPass renderPass,
520     const VkPrimitiveTopology topology                                        = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
521     const VkVertexInputBindingDescription *vertexInputBindingDescription      = DE_NULL,
522     const VkVertexInputAttributeDescription *vertexInputAttributeDescriptions = DE_NULL,
523     const bool frameBufferTests = false, const vk::VkFormat attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT,
524     const uint32_t vertexShaderStageCreateFlags = 0u, const uint32_t tessellationControlShaderStageCreateFlags = 0u,
525     const uint32_t tessellationEvalShaderStageCreateFlags = 0u, const uint32_t geometryShaderStageCreateFlags = 0u,
526     const uint32_t fragmentShaderStageCreateFlags = 0u, const uint32_t requiredSubgroupSize[5] = DE_NULL)
527 {
528     const std::vector<VkViewport> noViewports;
529     const std::vector<VkRect2D> noScissors;
530     const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo = {
531         VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
532         DE_NULL,                                                   // const void* pNext;
533         0u,                                                        // VkPipelineVertexInputStateCreateFlags flags;
534         vertexInputBindingDescription == DE_NULL ? 0u : 1u,        // uint32_t vertexBindingDescriptionCount;
535         vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
536         vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u, // uint32_t vertexAttributeDescriptionCount;
537         vertexInputAttributeDescriptions, // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
538     };
539     const uint32_t numChannels = getNumUsedChannels(mapVkFormat(attachmentFormat).order);
540     const VkColorComponentFlags colorComponent =
541         numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
542         numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
543         numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
544                            VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT |
545                                VK_COLOR_COMPONENT_A_BIT;
546     const VkPipelineColorBlendAttachmentState colorBlendAttachmentState = {
547         VK_FALSE,             //  VkBool32 blendEnable;
548         VK_BLEND_FACTOR_ZERO, //  VkBlendFactor srcColorBlendFactor;
549         VK_BLEND_FACTOR_ZERO, //  VkBlendFactor dstColorBlendFactor;
550         VK_BLEND_OP_ADD,      //  VkBlendOp colorBlendOp;
551         VK_BLEND_FACTOR_ZERO, //  VkBlendFactor srcAlphaBlendFactor;
552         VK_BLEND_FACTOR_ZERO, //  VkBlendFactor dstAlphaBlendFactor;
553         VK_BLEND_OP_ADD,      //  VkBlendOp alphaBlendOp;
554         colorComponent        //  VkColorComponentFlags colorWriteMask;
555     };
556     const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo = {
557         VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, //  VkStructureType sType;
558         DE_NULL,                                                  //  const void* pNext;
559         0u,                                                       //  VkPipelineColorBlendStateCreateFlags flags;
560         VK_FALSE,                                                 //  VkBool32 logicOpEnable;
561         VK_LOGIC_OP_CLEAR,                                        //  VkLogicOp logicOp;
562         1,                                                        //  uint32_t attachmentCount;
563         &colorBlendAttachmentState, //  const VkPipelineColorBlendAttachmentState* pAttachments;
564         {0.0f, 0.0f, 0.0f, 0.0f}    //  float blendConstants[4];
565     };
566     const uint32_t patchControlPoints = (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
567 
568     return makeGraphicsPipeline(
569         context.getDeviceInterface(), // const DeviceInterface&                        vk
570         context.getDevice(),          // const VkDevice                                device
571         pipelineLayout,               // const VkPipelineLayout                        pipelineLayout
572         vertexShaderModule,           // const VkShaderModule                          vertexShaderModule
573         tessellationControlModule,    // const VkShaderModule                          tessellationControlShaderModule
574         tessellationEvaluationModule, // const VkShaderModule                          tessellationEvalShaderModule
575         geometryShaderModule,         // const VkShaderModule                          geometryShaderModule
576         fragmentShaderModule,         // const VkShaderModule                          fragmentShaderModule
577         renderPass,                   // const VkRenderPass                            renderPass
578         noViewports,                  // const std::vector<VkViewport>&                viewports
579         noScissors,                   // const std::vector<VkRect2D>&                  scissors
580         topology,                     // const VkPrimitiveTopology                     topology
581         0u,                           // const uint32_t                                subpass
582         patchControlPoints,           // const uint32_t                                patchControlPoints
583         &vertexInputStateCreateInfo,  // const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
584         DE_NULL,                      // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
585         DE_NULL,                      // const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo
586         DE_NULL,                      // const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo
587         &colorBlendStateCreateInfo,   // const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo
588         DE_NULL,                      // const VkPipelineDynamicStateCreateInfo*
589         vertexShaderStageCreateFlags, // const uint32_t                                 vertexShaderStageCreateFlags,
590         tessellationControlShaderStageCreateFlags, // const uint32_t                     tessellationControlShaderStageCreateFlags
591         tessellationEvalShaderStageCreateFlags, // const uint32_t                     tessellationEvalShaderStageCreateFlags
592         geometryShaderStageCreateFlags, // const uint32_t                                 geometryShaderStageCreateFlags
593         fragmentShaderStageCreateFlags, // const uint32_t                                 fragmentShaderStageCreateFlags
594         requiredSubgroupSize);          // const uint32_t                                 requiredSubgroupSize[5]
595 }
596 
makeCommandBuffer(Context & context,const VkCommandPool commandPool)597 Move<VkCommandBuffer> makeCommandBuffer(Context &context, const VkCommandPool commandPool)
598 {
599     const VkCommandBufferAllocateInfo bufferAllocateParams = {
600         VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType sType;
601         DE_NULL,                                        // const void* pNext;
602         commandPool,                                    // VkCommandPool commandPool;
603         VK_COMMAND_BUFFER_LEVEL_PRIMARY,                // VkCommandBufferLevel level;
604         1u,                                             // uint32_t bufferCount;
605     };
606     return allocateCommandBuffer(context.getDeviceInterface(), context.getDevice(), &bufferAllocateParams);
607 }
608 
609 struct Buffer;
610 struct Image;
611 
612 struct BufferOrImage
613 {
isImage__anon39bd43f10111::BufferOrImage614     bool isImage() const
615     {
616         return m_isImage;
617     }
618 
getAsBuffer__anon39bd43f10111::BufferOrImage619     Buffer *getAsBuffer()
620     {
621         if (m_isImage)
622             DE_FATAL("Trying to get a buffer as an image!");
623         return reinterpret_cast<Buffer *>(this);
624     }
625 
getAsImage__anon39bd43f10111::BufferOrImage626     Image *getAsImage()
627     {
628         if (!m_isImage)
629             DE_FATAL("Trying to get an image as a buffer!");
630         return reinterpret_cast<Image *>(this);
631     }
632 
getType__anon39bd43f10111::BufferOrImage633     virtual VkDescriptorType getType() const
634     {
635         if (m_isImage)
636         {
637             return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
638         }
639         else
640         {
641             return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
642         }
643     }
644 
getAllocation__anon39bd43f10111::BufferOrImage645     Allocation &getAllocation() const
646     {
647         return *m_allocation;
648     }
649 
~BufferOrImage__anon39bd43f10111::BufferOrImage650     virtual ~BufferOrImage()
651     {
652     }
653 
654 protected:
BufferOrImage__anon39bd43f10111::BufferOrImage655     explicit BufferOrImage(bool image) : m_isImage(image)
656     {
657     }
658 
659     bool m_isImage;
660     de::details::MovePtr<Allocation> m_allocation;
661 };
662 
663 struct Buffer : public BufferOrImage
664 {
Buffer__anon39bd43f10111::Buffer665     explicit Buffer(Context &context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage)
666         : BufferOrImage(false)
667         , m_sizeInBytes(sizeInBytes)
668         , m_usage(usage)
669     {
670         const DeviceInterface &vkd = context.getDeviceInterface();
671         const VkDevice device      = context.getDevice();
672 
673         const vk::VkBufferCreateInfo bufferCreateInfo = {
674             VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
675             DE_NULL,
676             0u,
677             m_sizeInBytes,
678             m_usage,
679             VK_SHARING_MODE_EXCLUSIVE,
680             0u,
681             DE_NULL,
682         };
683         m_buffer = createBuffer(vkd, device, &bufferCreateInfo);
684 
685         VkMemoryRequirements req = getBufferMemoryRequirements(vkd, device, *m_buffer);
686 
687         m_allocation = context.getDefaultAllocator().allocate(req, MemoryRequirement::HostVisible);
688         VK_CHECK(vkd.bindBufferMemory(device, *m_buffer, m_allocation->getMemory(), m_allocation->getOffset()));
689     }
690 
getType__anon39bd43f10111::Buffer691     virtual VkDescriptorType getType() const
692     {
693         if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
694         {
695             return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
696         }
697         return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
698     }
699 
getBuffer__anon39bd43f10111::Buffer700     VkBuffer getBuffer() const
701     {
702         return *m_buffer;
703     }
704 
getBufferPtr__anon39bd43f10111::Buffer705     const VkBuffer *getBufferPtr() const
706     {
707         return &(*m_buffer);
708     }
709 
getSize__anon39bd43f10111::Buffer710     VkDeviceSize getSize() const
711     {
712         return m_sizeInBytes;
713     }
714 
715 private:
716     Move<VkBuffer> m_buffer;
717     VkDeviceSize m_sizeInBytes;
718     const VkBufferUsageFlags m_usage;
719 };
720 
721 struct Image : public BufferOrImage
722 {
Image__anon39bd43f10111::Image723     explicit Image(Context &context, uint32_t width, uint32_t height, VkFormat format,
724                    VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
725         : BufferOrImage(true)
726     {
727         const DeviceInterface &vk       = context.getDeviceInterface();
728         const VkDevice device           = context.getDevice();
729         const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
730 
731         const VkImageCreateInfo imageCreateInfo = {
732             VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, //  VkStructureType sType;
733             DE_NULL,                             //  const void* pNext;
734             0,                                   //  VkImageCreateFlags flags;
735             VK_IMAGE_TYPE_2D,                    //  VkImageType imageType;
736             format,                              //  VkFormat format;
737             {width, height, 1},                  //  VkExtent3D extent;
738             1,                                   //  uint32_t mipLevels;
739             1,                                   //  uint32_t arrayLayers;
740             VK_SAMPLE_COUNT_1_BIT,               //  VkSampleCountFlagBits samples;
741             VK_IMAGE_TILING_OPTIMAL,             //  VkImageTiling tiling;
742             usage,                               //  VkImageUsageFlags usage;
743             VK_SHARING_MODE_EXCLUSIVE,           //  VkSharingMode sharingMode;
744             0u,                                  //  uint32_t queueFamilyIndexCount;
745             DE_NULL,                             //  const uint32_t* pQueueFamilyIndices;
746             VK_IMAGE_LAYOUT_UNDEFINED            //  VkImageLayout initialLayout;
747         };
748 
749         const VkComponentMapping componentMapping = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
750                                                      VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
751 
752         const VkImageSubresourceRange subresourceRange = {
753             VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags    aspectMask
754             0u,                        //uint32_t                baseMipLevel
755             1u,                        //uint32_t                levelCount
756             0u,                        //uint32_t                baseArrayLayer
757             1u                         //uint32_t                layerCount
758         };
759 
760         const VkSamplerCreateInfo samplerCreateInfo = {
761             VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,   //  VkStructureType sType;
762             DE_NULL,                                 //  const void* pNext;
763             0u,                                      //  VkSamplerCreateFlags flags;
764             VK_FILTER_NEAREST,                       //  VkFilter magFilter;
765             VK_FILTER_NEAREST,                       //  VkFilter minFilter;
766             VK_SAMPLER_MIPMAP_MODE_NEAREST,          //  VkSamplerMipmapMode mipmapMode;
767             VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,   //  VkSamplerAddressMode addressModeU;
768             VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,   //  VkSamplerAddressMode addressModeV;
769             VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,   //  VkSamplerAddressMode addressModeW;
770             0.0f,                                    //  float mipLodBias;
771             VK_FALSE,                                //  VkBool32 anisotropyEnable;
772             1.0f,                                    //  float maxAnisotropy;
773             false,                                   //  VkBool32 compareEnable;
774             VK_COMPARE_OP_ALWAYS,                    //  VkCompareOp compareOp;
775             0.0f,                                    //  float minLod;
776             0.0f,                                    //  float maxLod;
777             VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, //  VkBorderColor borderColor;
778             VK_FALSE,                                //  VkBool32 unnormalizedCoordinates;
779         };
780 
781         m_image = createImage(vk, device, &imageCreateInfo);
782 
783         VkMemoryRequirements req = getImageMemoryRequirements(vk, device, *m_image);
784 
785         req.size *= 2;
786         m_allocation = context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
787 
788         VK_CHECK(vk.bindImageMemory(device, *m_image, m_allocation->getMemory(), m_allocation->getOffset()));
789 
790         const VkImageViewCreateInfo imageViewCreateInfo = {
791             VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, //  VkStructureType sType;
792             DE_NULL,                                  //  const void* pNext;
793             0,                                        //  VkImageViewCreateFlags flags;
794             *m_image,                                 //  VkImage image;
795             VK_IMAGE_VIEW_TYPE_2D,                    //  VkImageViewType viewType;
796             imageCreateInfo.format,                   //  VkFormat format;
797             componentMapping,                         //  VkComponentMapping components;
798             subresourceRange                          //  VkImageSubresourceRange subresourceRange;
799         };
800 
801         m_imageView = createImageView(vk, device, &imageViewCreateInfo);
802         m_sampler   = createSampler(vk, device, &samplerCreateInfo);
803 
804         // Transition input image layouts
805         {
806             const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
807             const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
808 
809             beginCommandBuffer(vk, *cmdBuffer);
810 
811             const VkImageMemoryBarrier imageBarrier =
812                 makeImageMemoryBarrier((VkAccessFlags)0u, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
813                                        VK_IMAGE_LAYOUT_GENERAL, *m_image, subresourceRange);
814 
815             vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
816                                   (VkDependencyFlags)0, 0u, (const VkMemoryBarrier *)DE_NULL, 0u,
817                                   (const VkBufferMemoryBarrier *)DE_NULL, 1u, &imageBarrier);
818 
819             endCommandBuffer(vk, *cmdBuffer);
820             submitCommandsAndWait(vk, device, context.getUniversalQueue(), *cmdBuffer);
821         }
822     }
823 
getImage__anon39bd43f10111::Image824     VkImage getImage() const
825     {
826         return *m_image;
827     }
828 
getImageView__anon39bd43f10111::Image829     VkImageView getImageView() const
830     {
831         return *m_imageView;
832     }
833 
getSampler__anon39bd43f10111::Image834     VkSampler getSampler() const
835     {
836         return *m_sampler;
837     }
838 
839 private:
840     Move<VkImage> m_image;
841     Move<VkImageView> m_imageView;
842     Move<VkSampler> m_sampler;
843 };
844 } // namespace
845 
getStagesCount(const VkShaderStageFlags shaderStages)846 uint32_t vkt::subgroups::getStagesCount(const VkShaderStageFlags shaderStages)
847 {
848     const uint32_t stageCount = isAllGraphicsStages(shaderStages) ? 4 :
849                                 isAllComputeStages(shaderStages)  ? 1
850 #ifndef CTS_USES_VULKANSC
851                                 :
852                                 isAllRayTracingStages(shaderStages)  ? 6 :
853                                 isAllMeshShadingStages(shaderStages) ? 1
854 #endif // CTS_USES_VULKANSC
855                                                                        :
856                                                                        0;
857 
858     DE_ASSERT(stageCount != 0);
859 
860     return stageCount;
861 }
862 
getSharedMemoryBallotHelper()863 std::string vkt::subgroups::getSharedMemoryBallotHelper()
864 {
865     return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * "
866            "gl_WorkGroupSize.z];\n"
867            "uvec4 sharedMemoryBallot(bool vote)\n"
868            "{\n"
869            "  uint groupOffset = gl_SubgroupID;\n"
870            "  // One invocation in the group 0's the whole group's data\n"
871            "  if (subgroupElect())\n"
872            "  {\n"
873            "    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
874            "  }\n"
875            "  subgroupMemoryBarrierShared();\n"
876            "  if (vote)\n"
877            "  {\n"
878            "    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
879            "    const highp uint bitToSet = 1u << invocationId;\n"
880            "    switch (gl_SubgroupInvocationID / 32)\n"
881            "    {\n"
882            "    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
883            "    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
884            "    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
885            "    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
886            "    }\n"
887            "  }\n"
888            "  subgroupMemoryBarrierShared();\n"
889            "  return superSecretComputeShaderHelper[groupOffset];\n"
890            "}\n";
891 }
892 
getSharedMemoryBallotHelperARB()893 std::string vkt::subgroups::getSharedMemoryBallotHelperARB()
894 {
895     return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * "
896            "gl_WorkGroupSize.z];\n"
897            "uint64_t sharedMemoryBallot(bool vote)\n"
898            "{\n"
899            "  uint groupOffset = gl_SubgroupID;\n"
900            "  // One invocation in the group 0's the whole group's data\n"
901            "  if (subgroupElect())\n"
902            "  {\n"
903            "    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
904            "  }\n"
905            "  subgroupMemoryBarrierShared();\n"
906            "  if (vote)\n"
907            "  {\n"
908            "    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
909            "    const highp uint bitToSet = 1u << invocationId;\n"
910            "    switch (gl_SubgroupInvocationID / 32)\n"
911            "    {\n"
912            "    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
913            "    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
914            "    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
915            "    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
916            "    }\n"
917            "  }\n"
918            "  subgroupMemoryBarrierShared();\n"
919            "  return packUint2x32(superSecretComputeShaderHelper[groupOffset].xy);\n"
920            "}\n";
921 }
922 
getSubgroupSize(Context & context)923 uint32_t vkt::subgroups::getSubgroupSize(Context &context)
924 {
925     return context.getSubgroupProperties().subgroupSize;
926 }
927 
maxSupportedSubgroupSize()928 uint32_t vkt::subgroups::maxSupportedSubgroupSize()
929 {
930     return 128u;
931 }
932 
getShaderStageName(VkShaderStageFlags stage)933 std::string vkt::subgroups::getShaderStageName(VkShaderStageFlags stage)
934 {
935     switch (stage)
936     {
937     case VK_SHADER_STAGE_COMPUTE_BIT:
938         return "compute";
939     case VK_SHADER_STAGE_FRAGMENT_BIT:
940         return "fragment";
941     case VK_SHADER_STAGE_VERTEX_BIT:
942         return "vertex";
943     case VK_SHADER_STAGE_GEOMETRY_BIT:
944         return "geometry";
945     case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
946         return "tess_control";
947     case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
948         return "tess_eval";
949 #ifndef CTS_USES_VULKANSC
950     case VK_SHADER_STAGE_RAYGEN_BIT_KHR:
951         return "rgen";
952     case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:
953         return "ahit";
954     case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:
955         return "chit";
956     case VK_SHADER_STAGE_MISS_BIT_KHR:
957         return "miss";
958     case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:
959         return "sect";
960     case VK_SHADER_STAGE_CALLABLE_BIT_KHR:
961         return "call";
962     case VK_SHADER_STAGE_MESH_BIT_EXT:
963         return "mesh";
964     case VK_SHADER_STAGE_TASK_BIT_EXT:
965         return "task";
966 #endif // CTS_USES_VULKANSC
967     default:
968         TCU_THROW(InternalError, "Unhandled stage");
969     }
970 }
971 
getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)972 std::string vkt::subgroups::getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)
973 {
974     switch (bit)
975     {
976     case VK_SUBGROUP_FEATURE_BASIC_BIT:
977         return "VK_SUBGROUP_FEATURE_BASIC_BIT";
978     case VK_SUBGROUP_FEATURE_VOTE_BIT:
979         return "VK_SUBGROUP_FEATURE_VOTE_BIT";
980     case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT:
981         return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
982     case VK_SUBGROUP_FEATURE_BALLOT_BIT:
983         return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
984     case VK_SUBGROUP_FEATURE_SHUFFLE_BIT:
985         return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
986     case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT:
987         return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
988     case VK_SUBGROUP_FEATURE_CLUSTERED_BIT:
989         return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
990     case VK_SUBGROUP_FEATURE_QUAD_BIT:
991         return "VK_SUBGROUP_FEATURE_QUAD_BIT";
992     default:
993         TCU_THROW(InternalError, "Unknown subgroup feature category");
994     }
995 }
996 
addNoSubgroupShader(SourceCollections & programCollection)997 void vkt::subgroups::addNoSubgroupShader(SourceCollections &programCollection)
998 {
999     {
1000         /*
1001             "#version 450\n"
1002             "void main (void)\n"
1003             "{\n"
1004             "  float pixelSize = 2.0f/1024.0f;\n"
1005             "   float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1006             "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1007             "  gl_PointSize = 1.0f;\n"
1008             "}\n"
1009         */
1010         const std::string vertNoSubgroup = "; SPIR-V\n"
1011                                            "; Version: 1.3\n"
1012                                            "; Generator: Khronos Glslang Reference Front End; 1\n"
1013                                            "; Bound: 37\n"
1014                                            "; Schema: 0\n"
1015                                            "OpCapability Shader\n"
1016                                            "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1017                                            "OpMemoryModel Logical GLSL450\n"
1018                                            "OpEntryPoint Vertex %4 \"main\" %22 %26\n"
1019                                            "OpMemberDecorate %20 0 BuiltIn Position\n"
1020                                            "OpMemberDecorate %20 1 BuiltIn PointSize\n"
1021                                            "OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
1022                                            "OpMemberDecorate %20 3 BuiltIn CullDistance\n"
1023                                            "OpDecorate %20 Block\n"
1024                                            "OpDecorate %26 BuiltIn VertexIndex\n"
1025                                            "%2 = OpTypeVoid\n"
1026                                            "%3 = OpTypeFunction %2\n"
1027                                            "%6 = OpTypeFloat 32\n"
1028                                            "%7 = OpTypePointer Function %6\n"
1029                                            "%9 = OpConstant %6 0.00195313\n"
1030                                            "%12 = OpConstant %6 2\n"
1031                                            "%14 = OpConstant %6 1\n"
1032                                            "%16 = OpTypeVector %6 4\n"
1033                                            "%17 = OpTypeInt 32 0\n"
1034                                            "%18 = OpConstant %17 1\n"
1035                                            "%19 = OpTypeArray %6 %18\n"
1036                                            "%20 = OpTypeStruct %16 %6 %19 %19\n"
1037                                            "%21 = OpTypePointer Output %20\n"
1038                                            "%22 = OpVariable %21 Output\n"
1039                                            "%23 = OpTypeInt 32 1\n"
1040                                            "%24 = OpConstant %23 0\n"
1041                                            "%25 = OpTypePointer Input %23\n"
1042                                            "%26 = OpVariable %25 Input\n"
1043                                            "%33 = OpConstant %6 0\n"
1044                                            "%35 = OpTypePointer Output %16\n"
1045                                            "%37 = OpConstant %23 1\n"
1046                                            "%38 = OpTypePointer Output %6\n"
1047                                            "%4 = OpFunction %2 None %3\n"
1048                                            "%5 = OpLabel\n"
1049                                            "%8 = OpVariable %7 Function\n"
1050                                            "%10 = OpVariable %7 Function\n"
1051                                            "OpStore %8 %9\n"
1052                                            "%11 = OpLoad %6 %8\n"
1053                                            "%13 = OpFDiv %6 %11 %12\n"
1054                                            "%15 = OpFSub %6 %13 %14\n"
1055                                            "OpStore %10 %15\n"
1056                                            "%27 = OpLoad %23 %26\n"
1057                                            "%28 = OpConvertSToF %6 %27\n"
1058                                            "%29 = OpLoad %6 %8\n"
1059                                            "%30 = OpFMul %6 %28 %29\n"
1060                                            "%31 = OpLoad %6 %10\n"
1061                                            "%32 = OpFAdd %6 %30 %31\n"
1062                                            "%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
1063                                            "%36 = OpAccessChain %35 %22 %24\n"
1064                                            "OpStore %36 %34\n"
1065                                            "%39 = OpAccessChain %38 %22 %37\n"
1066                                            "OpStore %39 %14\n"
1067                                            "OpReturn\n"
1068                                            "OpFunctionEnd\n";
1069         programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
1070     }
1071 
1072     {
1073         /*
1074             "#version 450\n"
1075             "layout(vertices=1) out;\n"
1076             "\n"
1077             "void main (void)\n"
1078             "{\n"
1079             "  if (gl_InvocationID == 0)\n"
1080             "  {\n"
1081             "    gl_TessLevelOuter[0] = 1.0f;\n"
1082             "    gl_TessLevelOuter[1] = 1.0f;\n"
1083             "  }\n"
1084             "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1085             "}\n"
1086         */
1087         const std::string tescNoSubgroup = "; SPIR-V\n"
1088                                            "; Version: 1.3\n"
1089                                            "; Generator: Khronos Glslang Reference Front End; 1\n"
1090                                            "; Bound: 45\n"
1091                                            "; Schema: 0\n"
1092                                            "OpCapability Tessellation\n"
1093                                            "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1094                                            "OpMemoryModel Logical GLSL450\n"
1095                                            "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
1096                                            "OpExecutionMode %4 OutputVertices 1\n"
1097                                            "OpDecorate %8 BuiltIn InvocationId\n"
1098                                            "OpDecorate %20 Patch\n"
1099                                            "OpDecorate %20 BuiltIn TessLevelOuter\n"
1100                                            "OpMemberDecorate %29 0 BuiltIn Position\n"
1101                                            "OpMemberDecorate %29 1 BuiltIn PointSize\n"
1102                                            "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
1103                                            "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
1104                                            "OpDecorate %29 Block\n"
1105                                            "OpMemberDecorate %34 0 BuiltIn Position\n"
1106                                            "OpMemberDecorate %34 1 BuiltIn PointSize\n"
1107                                            "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
1108                                            "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
1109                                            "OpDecorate %34 Block\n"
1110                                            "%2 = OpTypeVoid\n"
1111                                            "%3 = OpTypeFunction %2\n"
1112                                            "%6 = OpTypeInt 32 1\n"
1113                                            "%7 = OpTypePointer Input %6\n"
1114                                            "%8 = OpVariable %7 Input\n"
1115                                            "%10 = OpConstant %6 0\n"
1116                                            "%11 = OpTypeBool\n"
1117                                            "%15 = OpTypeFloat 32\n"
1118                                            "%16 = OpTypeInt 32 0\n"
1119                                            "%17 = OpConstant %16 4\n"
1120                                            "%18 = OpTypeArray %15 %17\n"
1121                                            "%19 = OpTypePointer Output %18\n"
1122                                            "%20 = OpVariable %19 Output\n"
1123                                            "%21 = OpConstant %15 1\n"
1124                                            "%22 = OpTypePointer Output %15\n"
1125                                            "%24 = OpConstant %6 1\n"
1126                                            "%26 = OpTypeVector %15 4\n"
1127                                            "%27 = OpConstant %16 1\n"
1128                                            "%28 = OpTypeArray %15 %27\n"
1129                                            "%29 = OpTypeStruct %26 %15 %28 %28\n"
1130                                            "%30 = OpTypeArray %29 %27\n"
1131                                            "%31 = OpTypePointer Output %30\n"
1132                                            "%32 = OpVariable %31 Output\n"
1133                                            "%34 = OpTypeStruct %26 %15 %28 %28\n"
1134                                            "%35 = OpConstant %16 32\n"
1135                                            "%36 = OpTypeArray %34 %35\n"
1136                                            "%37 = OpTypePointer Input %36\n"
1137                                            "%38 = OpVariable %37 Input\n"
1138                                            "%40 = OpTypePointer Input %26\n"
1139                                            "%43 = OpTypePointer Output %26\n"
1140                                            "%4 = OpFunction %2 None %3\n"
1141                                            "%5 = OpLabel\n"
1142                                            "%9 = OpLoad %6 %8\n"
1143                                            "%12 = OpIEqual %11 %9 %10\n"
1144                                            "OpSelectionMerge %14 None\n"
1145                                            "OpBranchConditional %12 %13 %14\n"
1146                                            "%13 = OpLabel\n"
1147                                            "%23 = OpAccessChain %22 %20 %10\n"
1148                                            "OpStore %23 %21\n"
1149                                            "%25 = OpAccessChain %22 %20 %24\n"
1150                                            "OpStore %25 %21\n"
1151                                            "OpBranch %14\n"
1152                                            "%14 = OpLabel\n"
1153                                            "%33 = OpLoad %6 %8\n"
1154                                            "%39 = OpLoad %6 %8\n"
1155                                            "%41 = OpAccessChain %40 %38 %39 %10\n"
1156                                            "%42 = OpLoad %26 %41\n"
1157                                            "%44 = OpAccessChain %43 %32 %33 %10\n"
1158                                            "OpStore %44 %42\n"
1159                                            "OpReturn\n"
1160                                            "OpFunctionEnd\n";
1161         programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
1162     }
1163 
1164     {
1165         /*
1166             "#version 450\n"
1167             "layout(isolines) in;\n"
1168             "\n"
1169             "void main (void)\n"
1170             "{\n"
1171             "  float pixelSize = 2.0f/1024.0f;\n"
1172             "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1173             "}\n";
1174         */
1175         const std::string teseNoSubgroup = "; SPIR-V\n"
1176                                            "; Version: 1.3\n"
1177                                            "; Generator: Khronos Glslang Reference Front End; 2\n"
1178                                            "; Bound: 42\n"
1179                                            "; Schema: 0\n"
1180                                            "OpCapability Tessellation\n"
1181                                            "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1182                                            "OpMemoryModel Logical GLSL450\n"
1183                                            "OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
1184                                            "OpExecutionMode %4 Isolines\n"
1185                                            "OpExecutionMode %4 SpacingEqual\n"
1186                                            "OpExecutionMode %4 VertexOrderCcw\n"
1187                                            "OpMemberDecorate %14 0 BuiltIn Position\n"
1188                                            "OpMemberDecorate %14 1 BuiltIn PointSize\n"
1189                                            "OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
1190                                            "OpMemberDecorate %14 3 BuiltIn CullDistance\n"
1191                                            "OpDecorate %14 Block\n"
1192                                            "OpMemberDecorate %19 0 BuiltIn Position\n"
1193                                            "OpMemberDecorate %19 1 BuiltIn PointSize\n"
1194                                            "OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
1195                                            "OpMemberDecorate %19 3 BuiltIn CullDistance\n"
1196                                            "OpDecorate %19 Block\n"
1197                                            "OpDecorate %29 BuiltIn TessCoord\n"
1198                                            "%2 = OpTypeVoid\n"
1199                                            "%3 = OpTypeFunction %2\n"
1200                                            "%6 = OpTypeFloat 32\n"
1201                                            "%7 = OpTypePointer Function %6\n"
1202                                            "%9 = OpConstant %6 0.00195313\n"
1203                                            "%10 = OpTypeVector %6 4\n"
1204                                            "%11 = OpTypeInt 32 0\n"
1205                                            "%12 = OpConstant %11 1\n"
1206                                            "%13 = OpTypeArray %6 %12\n"
1207                                            "%14 = OpTypeStruct %10 %6 %13 %13\n"
1208                                            "%15 = OpTypePointer Output %14\n"
1209                                            "%16 = OpVariable %15 Output\n"
1210                                            "%17 = OpTypeInt 32 1\n"
1211                                            "%18 = OpConstant %17 0\n"
1212                                            "%19 = OpTypeStruct %10 %6 %13 %13\n"
1213                                            "%20 = OpConstant %11 32\n"
1214                                            "%21 = OpTypeArray %19 %20\n"
1215                                            "%22 = OpTypePointer Input %21\n"
1216                                            "%23 = OpVariable %22 Input\n"
1217                                            "%24 = OpTypePointer Input %10\n"
1218                                            "%27 = OpTypeVector %6 3\n"
1219                                            "%28 = OpTypePointer Input %27\n"
1220                                            "%29 = OpVariable %28 Input\n"
1221                                            "%30 = OpConstant %11 0\n"
1222                                            "%31 = OpTypePointer Input %6\n"
1223                                            "%36 = OpConstant %6 2\n"
1224                                            "%40 = OpTypePointer Output %10\n"
1225                                            "%4 = OpFunction %2 None %3\n"
1226                                            "%5 = OpLabel\n"
1227                                            "%8 = OpVariable %7 Function\n"
1228                                            "OpStore %8 %9\n"
1229                                            "%25 = OpAccessChain %24 %23 %18 %18\n"
1230                                            "%26 = OpLoad %10 %25\n"
1231                                            "%32 = OpAccessChain %31 %29 %30\n"
1232                                            "%33 = OpLoad %6 %32\n"
1233                                            "%34 = OpLoad %6 %8\n"
1234                                            "%35 = OpFMul %6 %33 %34\n"
1235                                            "%37 = OpFDiv %6 %35 %36\n"
1236                                            "%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
1237                                            "%39 = OpFAdd %10 %26 %38\n"
1238                                            "%41 = OpAccessChain %40 %16 %18\n"
1239                                            "OpStore %41 %39\n"
1240                                            "OpReturn\n"
1241                                            "OpFunctionEnd\n";
1242         programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
1243     }
1244 }
1245 
getFramebufferBufferDeclarations(const VkFormat & format,const std::vector<std::string> & declarations,const uint32_t stage)1246 static std::string getFramebufferBufferDeclarations(const VkFormat &format,
1247                                                     const std::vector<std::string> &declarations, const uint32_t stage)
1248 {
1249     if (declarations.empty())
1250     {
1251         const std::string name   = (stage == 0) ? "result" : "out_color";
1252         const std::string suffix = (stage == 2) ? "[]" : "";
1253         const std::string result = "layout(location = 0) out float " + name + suffix +
1254                                    ";\n"
1255                                    "layout(set = 0, binding = 0) uniform Buffer1\n"
1256                                    "{\n"
1257                                    "  " +
1258                                    de::toString(subgroups::getFormatNameForGLSL(format)) + " data[" +
1259                                    de::toString(subgroups::maxSupportedSubgroupSize()) +
1260                                    "];\n"
1261                                    "};\n";
1262 
1263         return result;
1264     }
1265     else
1266     {
1267         return declarations[stage];
1268     }
1269 }
1270 
initStdFrameBufferPrograms(SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,VkShaderStageFlags shaderStage,VkFormat format,bool gsPointSize,const std::string & extHeader,const std::string & testSrc,const std::string & helperStr,const std::vector<std::string> & declarations)1271 void vkt::subgroups::initStdFrameBufferPrograms(SourceCollections &programCollection,
1272                                                 const vk::ShaderBuildOptions &buildOptions,
1273                                                 VkShaderStageFlags shaderStage, VkFormat format, bool gsPointSize,
1274                                                 const std::string &extHeader, const std::string &testSrc,
1275                                                 const std::string &helperStr,
1276                                                 const std::vector<std::string> &declarations)
1277 {
1278     subgroups::setFragmentShaderFrameBuffer(programCollection);
1279 
1280     if (shaderStage != VK_SHADER_STAGE_VERTEX_BIT)
1281         subgroups::setVertexShaderFrameBuffer(programCollection);
1282 
1283     if (shaderStage == VK_SHADER_STAGE_VERTEX_BIT)
1284     {
1285         std::ostringstream vertex;
1286 
1287         vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1288                << extHeader << "layout(location = 0) in highp vec4 in_position;\n"
1289                << getFramebufferBufferDeclarations(format, declarations, 0) << "\n"
1290                << helperStr << "void main (void)\n"
1291                << "{\n"
1292                << "  uint tempRes;\n"
1293                << testSrc << "  result = float(tempRes);\n"
1294                << "  gl_Position = in_position;\n"
1295                << "  gl_PointSize = 1.0f;\n"
1296                << "}\n";
1297 
1298         programCollection.glslSources.add("vert") << glu::VertexSource(vertex.str()) << buildOptions;
1299     }
1300     else if (shaderStage == VK_SHADER_STAGE_GEOMETRY_BIT)
1301     {
1302         std::ostringstream geometry;
1303 
1304         geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1305                  << extHeader << "layout(points) in;\n"
1306                  << "layout(points, max_vertices = 1) out;\n"
1307                  << getFramebufferBufferDeclarations(format, declarations, 1) << "\n"
1308                  << helperStr << "void main (void)\n"
1309                  << "{\n"
1310                  << "  uint tempRes;\n"
1311                  << testSrc << "  out_color = float(tempRes);\n"
1312                  << "  gl_Position = gl_in[0].gl_Position;\n"
1313                  << (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "") << "  EmitVertex();\n"
1314                  << "  EndPrimitive();\n"
1315                  << "}\n";
1316 
1317         programCollection.glslSources.add("geometry") << glu::GeometrySource(geometry.str()) << buildOptions;
1318     }
1319     else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1320     {
1321         std::ostringstream controlSource;
1322 
1323         controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1324                       << extHeader << "layout(vertices = 2) out;\n"
1325                       << getFramebufferBufferDeclarations(format, declarations, 2) << "\n"
1326                       << helperStr << "void main (void)\n"
1327                       << "{\n"
1328                       << "  if (gl_InvocationID == 0)\n"
1329                       << "  {\n"
1330                       << "    gl_TessLevelOuter[0] = 1.0f;\n"
1331                       << "    gl_TessLevelOuter[1] = 1.0f;\n"
1332                       << "  }\n"
1333                       << "  uint tempRes;\n"
1334                       << testSrc << "  out_color[gl_InvocationID] = float(tempRes);\n"
1335                       << "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1336                       << (gsPointSize ?
1337                               "  gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" :
1338                               "")
1339                       << "}\n";
1340 
1341         programCollection.glslSources.add("tesc")
1342             << glu::TessellationControlSource(controlSource.str()) << buildOptions;
1343         subgroups::setTesEvalShaderFrameBuffer(programCollection);
1344     }
1345     else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1346     {
1347         ostringstream evaluationSource;
1348 
1349         evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1350                          << extHeader << "layout(isolines, equal_spacing, ccw ) in;\n"
1351                          << getFramebufferBufferDeclarations(format, declarations, 3) << "\n"
1352                          << helperStr << "void main (void)\n"
1353                          << "{\n"
1354                          << "  uint tempRes;\n"
1355                          << testSrc << "  out_color = float(tempRes);\n"
1356                          << "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1357                          << (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "") << "}\n";
1358 
1359         subgroups::setTesCtrlShaderFrameBuffer(programCollection);
1360         programCollection.glslSources.add("tese")
1361             << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
1362     }
1363     else
1364     {
1365         DE_FATAL("Unsupported shader stage");
1366     }
1367 }
1368 
getBufferDeclarations(vk::VkShaderStageFlags shaderStage,const std::string & formatName,const std::vector<std::string> & declarations,const uint32_t stage)1369 static std::string getBufferDeclarations(vk::VkShaderStageFlags shaderStage, const std::string &formatName,
1370                                          const std::vector<std::string> &declarations, const uint32_t stage)
1371 {
1372     if (declarations.empty())
1373     {
1374         const uint32_t stageCount = vkt::subgroups::getStagesCount(shaderStage);
1375         const uint32_t binding0   = stage;
1376         const uint32_t binding1   = stageCount;
1377         const bool fragment       = (shaderStage & VK_SHADER_STAGE_FRAGMENT_BIT) && (stage == stageCount);
1378         const string buffer1      = fragment ? "layout(location = 0) out uint result;\n" :
1379                                                "layout(set = 0, binding = " + de::toString(binding0) +
1380                                               ", std430) buffer Buffer1\n"
1381                                                    "{\n"
1382                                                    "  uint result[];\n"
1383                                                    "};\n";
1384         //todo boza I suppose it can be "layout(set = 0, binding = " + de::toString(binding1) + ", std430) readonly buffer Buffer2\n"
1385         const string buffer2 = "layout(set = 0, binding = " + de::toString(binding1) + ", std430)" +
1386                                (stageCount == 1 ? "" : " readonly") + " buffer Buffer" + (fragment ? "1" : "2") +
1387                                "\n"
1388                                "{\n"
1389                                "  " +
1390                                formatName +
1391                                " data[];\n"
1392                                "};\n";
1393 
1394         return buffer1 + buffer2;
1395     }
1396     else
1397     {
1398         return declarations[stage];
1399     }
1400 }
1401 
initStdPrograms(vk::SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,vk::VkShaderStageFlags shaderStage,vk::VkFormat format,bool gsPointSize,const std::string & extHeader,const std::string & testSrc,const std::string & helperStr,const std::vector<std::string> & declarations,const bool avoidHelperInvocations,const std::string & tempRes)1402 void vkt::subgroups::initStdPrograms(vk::SourceCollections &programCollection,
1403                                      const vk::ShaderBuildOptions &buildOptions, vk::VkShaderStageFlags shaderStage,
1404                                      vk::VkFormat format, bool gsPointSize, const std::string &extHeader,
1405                                      const std::string &testSrc, const std::string &helperStr,
1406                                      const std::vector<std::string> &declarations, const bool avoidHelperInvocations,
1407                                      const std::string &tempRes)
1408 {
1409     const std::string formatName = subgroups::getFormatNameForGLSL(format);
1410 
1411     if (isAllComputeStages(shaderStage))
1412     {
1413         std::ostringstream src;
1414 
1415         src << "#version 450\n"
1416             << extHeader
1417             << "layout (local_size_x_id = 0, local_size_y_id = 1, "
1418                "local_size_z_id = 2) in;\n"
1419             << getBufferDeclarations(shaderStage, formatName, declarations, 0) << "\n"
1420             << helperStr << "void main (void)\n"
1421             << "{\n"
1422             << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1423             << "  highp uint offset = globalSize.x * ((globalSize.y * "
1424                "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1425                "gl_GlobalInvocationID.x;\n"
1426             << tempRes << testSrc << "  result[offset] = tempRes;\n"
1427             << "}\n";
1428 
1429         programCollection.glslSources.add("comp") << glu::ComputeSource(src.str()) << buildOptions;
1430     }
1431 #ifndef CTS_USES_VULKANSC
1432     else if (isAllMeshShadingStages(shaderStage))
1433     {
1434         const bool testMesh = ((shaderStage & VK_SHADER_STAGE_MESH_BIT_EXT) != 0u);
1435         const bool testTask = ((shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u);
1436 
1437         if (testMesh)
1438         {
1439             std::ostringstream mesh;
1440 
1441             mesh << "#version 450\n"
1442                  << "#extension GL_EXT_mesh_shader : enable\n"
1443                  << extHeader << "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n"
1444                  << "layout (points) out;\n"
1445                  << "layout (max_vertices = 1, max_primitives = 1) out;\n"
1446                  << getBufferDeclarations(shaderStage, formatName, declarations, 0) << "\n"
1447                  << helperStr << "void main (void)\n"
1448                  << "{\n"
1449                  << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1450                  << "  highp uint offset = globalSize.x * ((globalSize.y * "
1451                     "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1452                     "gl_GlobalInvocationID.x;\n"
1453                  << tempRes << testSrc << "  result[offset] = tempRes;\n"
1454                  << "  SetMeshOutputsEXT(0u, 0u);\n"
1455                  << "}\n";
1456 
1457             programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1458         }
1459         else
1460         {
1461             const std::string meshShaderNoSubgroups =
1462                 "#version 450\n"
1463                 "#extension GL_EXT_mesh_shader : enable\n"
1464                 "\n"
1465                 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
1466                 "layout (points) out;\n"
1467                 "layout (max_vertices = 1, max_primitives = 1) out;\n"
1468                 "\n"
1469                 "void main (void)\n"
1470                 "{\n"
1471                 "  SetMeshOutputsEXT(0u, 0u);\n"
1472                 "}\n";
1473             programCollection.glslSources.add("mesh") << glu::MeshSource(meshShaderNoSubgroups) << buildOptions;
1474         }
1475 
1476         if (testTask)
1477         {
1478             const tcu::UVec3 emitSize = (testMesh ? tcu::UVec3(1u, 1u, 1u) : tcu::UVec3(0u, 0u, 0u));
1479             std::ostringstream task;
1480 
1481             task << "#version 450\n"
1482                  << "#extension GL_EXT_mesh_shader : enable\n"
1483                  //<< "#extension GL_NV_mesh_shader : enable\n"
1484                  << extHeader << "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n"
1485                  << getBufferDeclarations(shaderStage, formatName, declarations, 0) << "\n"
1486                  << helperStr << "void main (void)\n"
1487                  << "{\n"
1488                  << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1489                  //<< "  uvec3 globalSize = uvec3(0, 0, 0)/*gl_NumWorkGroups*/ * gl_WorkGroupSize;\n"
1490                  << "  highp uint offset = globalSize.x * ((globalSize.y * "
1491                     "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1492                     "gl_GlobalInvocationID.x;\n"
1493                  << tempRes << testSrc << "  result[offset] = tempRes;\n"
1494                  << "  EmitMeshTasksEXT(" << emitSize.x() << ", " << emitSize.y() << ", " << emitSize.z()
1495                  << ");\n"
1496                  //<< "  gl_TaskCountNV = " << emitSize.x() << ";\n"
1497                  << "}\n";
1498 
1499             programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1500         }
1501     }
1502 #endif // CTS_USES_VULKANSC
1503     else if (isAllGraphicsStages(shaderStage))
1504     {
1505         const string vertex =
1506             "#version 450\n" + extHeader + getBufferDeclarations(shaderStage, formatName, declarations, 0) + "\n" +
1507             helperStr +
1508             "void main (void)\n"
1509             "{\n"
1510             "  uint tempRes;\n" +
1511             testSrc +
1512             "  result[gl_VertexIndex] = tempRes;\n"
1513             "  float pixelSize = 2.0f/1024.0f;\n"
1514             "  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1515             "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1516             "  gl_PointSize = 1.0f;\n"
1517             "}\n";
1518 
1519         const string tesc =
1520             "#version 450\n" + extHeader + "layout(vertices=1) out;\n" +
1521             getBufferDeclarations(shaderStage, formatName, declarations, 1) + "\n" + helperStr +
1522             "void main (void)\n"
1523             "{\n" +
1524             tempRes + testSrc +
1525             "  result[gl_PrimitiveID] = tempRes;\n"
1526             "  if (gl_InvocationID == 0)\n"
1527             "  {\n"
1528             "    gl_TessLevelOuter[0] = 1.0f;\n"
1529             "    gl_TessLevelOuter[1] = 1.0f;\n"
1530             "  }\n"
1531             "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n" +
1532             (gsPointSize ? "  gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "") +
1533             "}\n";
1534 
1535         const string tese = "#version 450\n" + extHeader + "layout(isolines) in;\n" +
1536                             getBufferDeclarations(shaderStage, formatName, declarations, 2) + "\n" + helperStr +
1537                             "void main (void)\n"
1538                             "{\n" +
1539                             tempRes + testSrc +
1540                             "  result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempRes;\n"
1541                             "  float pixelSize = 2.0f/1024.0f;\n"
1542                             "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n" +
1543                             (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "") + "}\n";
1544 
1545         const string geometry = "#version 450\n" + extHeader +
1546                                 "layout(${TOPOLOGY}) in;\n"
1547                                 "layout(points, max_vertices = 1) out;\n" +
1548                                 getBufferDeclarations(shaderStage, formatName, declarations, 3) + "\n" + helperStr +
1549                                 "void main (void)\n"
1550                                 "{\n" +
1551                                 tempRes + testSrc +
1552                                 "  result[gl_PrimitiveIDIn] = tempRes;\n"
1553                                 "  gl_Position = gl_in[0].gl_Position;\n" +
1554                                 (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "") +
1555                                 "  EmitVertex();\n"
1556                                 "  EndPrimitive();\n"
1557                                 "}\n";
1558 
1559         const string fragment =
1560             "#version 450\n" + extHeader + getBufferDeclarations(shaderStage, formatName, declarations, 4) + helperStr +
1561             "void main (void)\n"
1562             "{\n" +
1563             (avoidHelperInvocations ? "  if (gl_HelperInvocation) return;\n" : "") + tempRes + testSrc +
1564             "  result = tempRes;\n"
1565             "}\n";
1566 
1567         subgroups::addNoSubgroupShader(programCollection);
1568 
1569         programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << buildOptions;
1570         programCollection.glslSources.add("tesc") << glu::TessellationControlSource(tesc) << buildOptions;
1571         programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(tese) << buildOptions;
1572         subgroups::addGeometryShadersFromTemplate(geometry, buildOptions, programCollection.glslSources);
1573         programCollection.glslSources.add("fragment") << glu::FragmentSource(fragment) << buildOptions;
1574     }
1575 #ifndef CTS_USES_VULKANSC
1576     else if (isAllRayTracingStages(shaderStage))
1577     {
1578         const std::string rgenShader =
1579             "#version 460 core\n"
1580             "#extension GL_EXT_ray_tracing: require\n" +
1581             extHeader +
1582             "layout(location = 0) rayPayloadEXT uvec4 payload;\n"
1583             "layout(location = 0) callableDataEXT uvec4 callData;"
1584             "layout(set = 1, binding = 0) uniform accelerationStructureEXT topLevelAS;\n" +
1585             getBufferDeclarations(shaderStage, formatName, declarations, 0) + "\n" + helperStr +
1586             "void main()\n"
1587             "{\n" +
1588             tempRes + testSrc +
1589             "  uint  rayFlags   = 0;\n"
1590             "  uint  cullMask   = 0xFF;\n"
1591             "  float tmin       = 0.0;\n"
1592             "  float tmax       = 9.0;\n"
1593             "  vec3  origin     = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), "
1594             "(float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
1595             "  vec3  directHit  = vec3(0.0, 0.0, -1.0);\n"
1596             "  vec3  directMiss = vec3(0.0, 0.0, +1.0);\n"
1597             "\n"
1598             "  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directHit, tmax, 0);\n"
1599             "  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directMiss, tmax, 0);\n"
1600             "  executeCallableEXT(0, 0);"
1601             "  result[gl_LaunchIDEXT.x] = tempRes;\n"
1602             "}\n";
1603         const std::string ahitShader = "#version 460 core\n"
1604                                        "#extension GL_EXT_ray_tracing: require\n" +
1605                                        extHeader +
1606                                        "hitAttributeEXT vec3 attribs;\n"
1607                                        "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n" +
1608                                        getBufferDeclarations(shaderStage, formatName, declarations, 1) + "\n" +
1609                                        helperStr +
1610                                        "void main()\n"
1611                                        "{\n" +
1612                                        tempRes + testSrc +
1613                                        "  result[gl_LaunchIDEXT.x] = tempRes;\n"
1614                                        "}\n";
1615         const std::string chitShader = "#version 460 core\n"
1616                                        "#extension GL_EXT_ray_tracing: require\n" +
1617                                        extHeader +
1618                                        "hitAttributeEXT vec3 attribs;\n"
1619                                        "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n" +
1620                                        getBufferDeclarations(shaderStage, formatName, declarations, 2) + "\n" +
1621                                        helperStr +
1622                                        "void main()\n"
1623                                        "{\n" +
1624                                        tempRes + testSrc +
1625                                        "  result[gl_LaunchIDEXT.x] = tempRes;\n"
1626                                        "}\n";
1627         const std::string missShader = "#version 460 core\n"
1628                                        "#extension GL_EXT_ray_tracing: require\n" +
1629                                        extHeader + "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n" +
1630                                        getBufferDeclarations(shaderStage, formatName, declarations, 3) + "\n" +
1631                                        helperStr +
1632                                        "void main()\n"
1633                                        "{\n" +
1634                                        tempRes + testSrc +
1635                                        "  result[gl_LaunchIDEXT.x] = tempRes;\n"
1636                                        "}\n";
1637         const std::string sectShader = "#version 460 core\n"
1638                                        "#extension GL_EXT_ray_tracing: require\n" +
1639                                        extHeader + "hitAttributeEXT vec3 hitAttribute;\n" +
1640                                        getBufferDeclarations(shaderStage, formatName, declarations, 4) + "\n" +
1641                                        helperStr +
1642                                        "void main()\n"
1643                                        "{\n" +
1644                                        tempRes + testSrc +
1645                                        "  reportIntersectionEXT(0.75f, 0x7Eu);\n"
1646                                        "  result[gl_LaunchIDEXT.x] = tempRes;\n"
1647                                        "}\n";
1648         const std::string callShader = "#version 460 core\n"
1649                                        "#extension GL_EXT_ray_tracing: require\n" +
1650                                        extHeader + "layout(location = 0) callableDataInEXT float callData;\n" +
1651                                        getBufferDeclarations(shaderStage, formatName, declarations, 5) + "\n" +
1652                                        helperStr +
1653                                        "void main()\n"
1654                                        "{\n" +
1655                                        tempRes + testSrc +
1656                                        "  result[gl_LaunchIDEXT.x] = tempRes;\n"
1657                                        "}\n";
1658 
1659         programCollection.glslSources.add("rgen") << glu::RaygenSource(rgenShader) << buildOptions;
1660         programCollection.glslSources.add("ahit") << glu::AnyHitSource(ahitShader) << buildOptions;
1661         programCollection.glslSources.add("chit") << glu::ClosestHitSource(chitShader) << buildOptions;
1662         programCollection.glslSources.add("miss") << glu::MissSource(missShader) << buildOptions;
1663         programCollection.glslSources.add("sect") << glu::IntersectionSource(sectShader) << buildOptions;
1664         programCollection.glslSources.add("call") << glu::CallableSource(callShader) << buildOptions;
1665 
1666         subgroups::addRayTracingNoSubgroupShader(programCollection);
1667     }
1668 #endif // CTS_USES_VULKANSC
1669     else
1670         TCU_THROW(InternalError, "Unknown stage or invalid stage set");
1671 }
1672 
isSubgroupSupported(Context & context)1673 bool vkt::subgroups::isSubgroupSupported(Context &context)
1674 {
1675     return context.contextSupports(vk::ApiVersion(0, 1, 1, 0));
1676 }
1677 
areSubgroupOperationsSupportedForStage(Context & context,const VkShaderStageFlags stage)1678 bool vkt::subgroups::areSubgroupOperationsSupportedForStage(Context &context, const VkShaderStageFlags stage)
1679 {
1680     return (stage & (context.getSubgroupProperties().supportedStages)) ? true : false;
1681 }
1682 
isSubgroupFeatureSupportedForDevice(Context & context,VkSubgroupFeatureFlagBits bit)1683 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice(Context &context, VkSubgroupFeatureFlagBits bit)
1684 {
1685     return (bit & (context.getSubgroupProperties().supportedOperations)) ? true : false;
1686 }
1687 
areQuadOperationsSupportedForStages(Context & context,const VkShaderStageFlags stages)1688 bool vkt::subgroups::areQuadOperationsSupportedForStages(Context &context, const VkShaderStageFlags stages)
1689 {
1690     // Check general quad feature support first.
1691     if (!isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_QUAD_BIT))
1692         return false;
1693 
1694     if (context.getSubgroupProperties().quadOperationsInAllStages == VK_TRUE)
1695         return true; // No problem, any stage works.
1696 
1697     // Only frag and compute are supported.
1698     const VkShaderStageFlags fragCompute = (VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
1699     const VkShaderStageFlags otherStages = ~fragCompute;
1700     return ((stages & otherStages) == 0u);
1701 }
1702 
isFragmentSSBOSupportedForDevice(Context & context)1703 bool vkt::subgroups::isFragmentSSBOSupportedForDevice(Context &context)
1704 {
1705     return context.getDeviceFeatures().fragmentStoresAndAtomics ? true : false;
1706 }
1707 
isVertexSSBOSupportedForDevice(Context & context)1708 bool vkt::subgroups::isVertexSSBOSupportedForDevice(Context &context)
1709 {
1710     return context.getDeviceFeatures().vertexPipelineStoresAndAtomics ? true : false;
1711 }
1712 
isInt64SupportedForDevice(Context & context)1713 bool vkt::subgroups::isInt64SupportedForDevice(Context &context)
1714 {
1715     return context.getDeviceFeatures().shaderInt64 ? true : false;
1716 }
1717 
isTessellationAndGeometryPointSizeSupported(Context & context)1718 bool vkt::subgroups::isTessellationAndGeometryPointSizeSupported(Context &context)
1719 {
1720     return context.getDeviceFeatures().shaderTessellationAndGeometryPointSize ? true : false;
1721 }
1722 
is16BitUBOStorageSupported(Context & context)1723 bool vkt::subgroups::is16BitUBOStorageSupported(Context &context)
1724 {
1725     return context.get16BitStorageFeatures().uniformAndStorageBuffer16BitAccess ? true : false;
1726 }
1727 
is8BitUBOStorageSupported(Context & context)1728 bool vkt::subgroups::is8BitUBOStorageSupported(Context &context)
1729 {
1730     return context.get8BitStorageFeatures().uniformAndStorageBuffer8BitAccess ? true : false;
1731 }
1732 
isFormatSupportedForDevice(Context & context,vk::VkFormat format)1733 bool vkt::subgroups::isFormatSupportedForDevice(Context &context, vk::VkFormat format)
1734 {
1735     const VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures &subgroupExtendedTypesFeatures =
1736         context.getShaderSubgroupExtendedTypesFeatures();
1737     const VkPhysicalDeviceShaderFloat16Int8Features &float16Int8Features = context.getShaderFloat16Int8Features();
1738     const VkPhysicalDevice16BitStorageFeatures &storage16bit             = context.get16BitStorageFeatures();
1739     const VkPhysicalDevice8BitStorageFeatures &storage8bit               = context.get8BitStorageFeatures();
1740     const VkPhysicalDeviceFeatures &features                             = context.getDeviceFeatures();
1741     bool shaderFloat64                                                   = features.shaderFloat64 ? true : false;
1742     bool shaderInt16                                                     = features.shaderInt16 ? true : false;
1743     bool shaderInt64                                                     = features.shaderInt64 ? true : false;
1744     bool shaderSubgroupExtendedTypes                                     = false;
1745     bool shaderFloat16                                                   = false;
1746     bool shaderInt8                                                      = false;
1747     bool storageBuffer16BitAccess                                        = false;
1748     bool storageBuffer8BitAccess                                         = false;
1749 
1750     if (context.isDeviceFunctionalitySupported("VK_KHR_shader_subgroup_extended_types") &&
1751         context.isDeviceFunctionalitySupported("VK_KHR_shader_float16_int8"))
1752     {
1753         shaderSubgroupExtendedTypes = subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes ? true : false;
1754         shaderFloat16               = float16Int8Features.shaderFloat16 ? true : false;
1755         shaderInt8                  = float16Int8Features.shaderInt8 ? true : false;
1756 
1757         if (context.isDeviceFunctionalitySupported("VK_KHR_16bit_storage"))
1758             storageBuffer16BitAccess = storage16bit.storageBuffer16BitAccess ? true : false;
1759 
1760         if (context.isDeviceFunctionalitySupported("VK_KHR_8bit_storage"))
1761             storageBuffer8BitAccess = storage8bit.storageBuffer8BitAccess ? true : false;
1762     }
1763 
1764     switch (format)
1765     {
1766     default:
1767         return true;
1768     case VK_FORMAT_R16_SFLOAT:
1769     case VK_FORMAT_R16G16_SFLOAT:
1770     case VK_FORMAT_R16G16B16_SFLOAT:
1771     case VK_FORMAT_R16G16B16A16_SFLOAT:
1772         return shaderSubgroupExtendedTypes && shaderFloat16 && storageBuffer16BitAccess;
1773     case VK_FORMAT_R64_SFLOAT:
1774     case VK_FORMAT_R64G64_SFLOAT:
1775     case VK_FORMAT_R64G64B64_SFLOAT:
1776     case VK_FORMAT_R64G64B64A64_SFLOAT:
1777         return shaderFloat64;
1778     case VK_FORMAT_R8_SINT:
1779     case VK_FORMAT_R8G8_SINT:
1780     case VK_FORMAT_R8G8B8_SINT:
1781     case VK_FORMAT_R8G8B8A8_SINT:
1782     case VK_FORMAT_R8_UINT:
1783     case VK_FORMAT_R8G8_UINT:
1784     case VK_FORMAT_R8G8B8_UINT:
1785     case VK_FORMAT_R8G8B8A8_UINT:
1786         return shaderSubgroupExtendedTypes && shaderInt8 && storageBuffer8BitAccess;
1787     case VK_FORMAT_R16_SINT:
1788     case VK_FORMAT_R16G16_SINT:
1789     case VK_FORMAT_R16G16B16_SINT:
1790     case VK_FORMAT_R16G16B16A16_SINT:
1791     case VK_FORMAT_R16_UINT:
1792     case VK_FORMAT_R16G16_UINT:
1793     case VK_FORMAT_R16G16B16_UINT:
1794     case VK_FORMAT_R16G16B16A16_UINT:
1795         return shaderSubgroupExtendedTypes && shaderInt16 && storageBuffer16BitAccess;
1796     case VK_FORMAT_R64_SINT:
1797     case VK_FORMAT_R64G64_SINT:
1798     case VK_FORMAT_R64G64B64_SINT:
1799     case VK_FORMAT_R64G64B64A64_SINT:
1800     case VK_FORMAT_R64_UINT:
1801     case VK_FORMAT_R64G64_UINT:
1802     case VK_FORMAT_R64G64B64_UINT:
1803     case VK_FORMAT_R64G64B64A64_UINT:
1804         return shaderSubgroupExtendedTypes && shaderInt64;
1805     }
1806 }
1807 
isSubgroupBroadcastDynamicIdSupported(Context & context)1808 bool vkt::subgroups::isSubgroupBroadcastDynamicIdSupported(Context &context)
1809 {
1810     return context.contextSupports(vk::ApiVersion(0, 1, 2, 0)) &&
1811            vk::getPhysicalDeviceVulkan12Features(context.getInstanceInterface(), context.getPhysicalDevice())
1812                .subgroupBroadcastDynamicId;
1813 }
1814 
isSubgroupRotateSpecVersionValid(Context & context)1815 bool vkt::subgroups::isSubgroupRotateSpecVersionValid(Context &context)
1816 {
1817     // Ensure "VK_KHR_shader_subgroup_rotate" extension's spec version is at least 2
1818     {
1819         const std::string extensionName = "VK_KHR_shader_subgroup_rotate";
1820         const std::vector<VkExtensionProperties> deviceExtensionProperties =
1821             enumerateDeviceExtensionProperties(context.getInstanceInterface(), context.getPhysicalDevice(), DE_NULL);
1822 
1823         for (const auto &property : deviceExtensionProperties)
1824         {
1825             if (property.extensionName == extensionName && property.specVersion < 2)
1826             {
1827                 return false;
1828             }
1829         }
1830     }
1831     return true;
1832 }
1833 
getFormatNameForGLSL(VkFormat format)1834 std::string vkt::subgroups::getFormatNameForGLSL(VkFormat format)
1835 {
1836     switch (format)
1837     {
1838     case VK_FORMAT_R8_SINT:
1839         return "int8_t";
1840     case VK_FORMAT_R8G8_SINT:
1841         return "i8vec2";
1842     case VK_FORMAT_R8G8B8_SINT:
1843         return "i8vec3";
1844     case VK_FORMAT_R8G8B8A8_SINT:
1845         return "i8vec4";
1846     case VK_FORMAT_R8_UINT:
1847         return "uint8_t";
1848     case VK_FORMAT_R8G8_UINT:
1849         return "u8vec2";
1850     case VK_FORMAT_R8G8B8_UINT:
1851         return "u8vec3";
1852     case VK_FORMAT_R8G8B8A8_UINT:
1853         return "u8vec4";
1854     case VK_FORMAT_R16_SINT:
1855         return "int16_t";
1856     case VK_FORMAT_R16G16_SINT:
1857         return "i16vec2";
1858     case VK_FORMAT_R16G16B16_SINT:
1859         return "i16vec3";
1860     case VK_FORMAT_R16G16B16A16_SINT:
1861         return "i16vec4";
1862     case VK_FORMAT_R16_UINT:
1863         return "uint16_t";
1864     case VK_FORMAT_R16G16_UINT:
1865         return "u16vec2";
1866     case VK_FORMAT_R16G16B16_UINT:
1867         return "u16vec3";
1868     case VK_FORMAT_R16G16B16A16_UINT:
1869         return "u16vec4";
1870     case VK_FORMAT_R32_SINT:
1871         return "int";
1872     case VK_FORMAT_R32G32_SINT:
1873         return "ivec2";
1874     case VK_FORMAT_R32G32B32_SINT:
1875         return "ivec3";
1876     case VK_FORMAT_R32G32B32A32_SINT:
1877         return "ivec4";
1878     case VK_FORMAT_R32_UINT:
1879         return "uint";
1880     case VK_FORMAT_R32G32_UINT:
1881         return "uvec2";
1882     case VK_FORMAT_R32G32B32_UINT:
1883         return "uvec3";
1884     case VK_FORMAT_R32G32B32A32_UINT:
1885         return "uvec4";
1886     case VK_FORMAT_R64_SINT:
1887         return "int64_t";
1888     case VK_FORMAT_R64G64_SINT:
1889         return "i64vec2";
1890     case VK_FORMAT_R64G64B64_SINT:
1891         return "i64vec3";
1892     case VK_FORMAT_R64G64B64A64_SINT:
1893         return "i64vec4";
1894     case VK_FORMAT_R64_UINT:
1895         return "uint64_t";
1896     case VK_FORMAT_R64G64_UINT:
1897         return "u64vec2";
1898     case VK_FORMAT_R64G64B64_UINT:
1899         return "u64vec3";
1900     case VK_FORMAT_R64G64B64A64_UINT:
1901         return "u64vec4";
1902     case VK_FORMAT_R16_SFLOAT:
1903         return "float16_t";
1904     case VK_FORMAT_R16G16_SFLOAT:
1905         return "f16vec2";
1906     case VK_FORMAT_R16G16B16_SFLOAT:
1907         return "f16vec3";
1908     case VK_FORMAT_R16G16B16A16_SFLOAT:
1909         return "f16vec4";
1910     case VK_FORMAT_R32_SFLOAT:
1911         return "float";
1912     case VK_FORMAT_R32G32_SFLOAT:
1913         return "vec2";
1914     case VK_FORMAT_R32G32B32_SFLOAT:
1915         return "vec3";
1916     case VK_FORMAT_R32G32B32A32_SFLOAT:
1917         return "vec4";
1918     case VK_FORMAT_R64_SFLOAT:
1919         return "double";
1920     case VK_FORMAT_R64G64_SFLOAT:
1921         return "dvec2";
1922     case VK_FORMAT_R64G64B64_SFLOAT:
1923         return "dvec3";
1924     case VK_FORMAT_R64G64B64A64_SFLOAT:
1925         return "dvec4";
1926     case VK_FORMAT_R8_USCALED:
1927         return "bool";
1928     case VK_FORMAT_R8G8_USCALED:
1929         return "bvec2";
1930     case VK_FORMAT_R8G8B8_USCALED:
1931         return "bvec3";
1932     case VK_FORMAT_R8G8B8A8_USCALED:
1933         return "bvec4";
1934     default:
1935         TCU_THROW(InternalError, "Unhandled format");
1936     }
1937 }
1938 
getAdditionalExtensionForFormat(vk::VkFormat format)1939 std::string vkt::subgroups::getAdditionalExtensionForFormat(vk::VkFormat format)
1940 {
1941     switch (format)
1942     {
1943     default:
1944         return "";
1945     case VK_FORMAT_R8_SINT:
1946     case VK_FORMAT_R8G8_SINT:
1947     case VK_FORMAT_R8G8B8_SINT:
1948     case VK_FORMAT_R8G8B8A8_SINT:
1949     case VK_FORMAT_R8_UINT:
1950     case VK_FORMAT_R8G8_UINT:
1951     case VK_FORMAT_R8G8B8_UINT:
1952     case VK_FORMAT_R8G8B8A8_UINT:
1953         return "#extension GL_EXT_shader_subgroup_extended_types_int8 : enable\n";
1954     case VK_FORMAT_R16_SINT:
1955     case VK_FORMAT_R16G16_SINT:
1956     case VK_FORMAT_R16G16B16_SINT:
1957     case VK_FORMAT_R16G16B16A16_SINT:
1958     case VK_FORMAT_R16_UINT:
1959     case VK_FORMAT_R16G16_UINT:
1960     case VK_FORMAT_R16G16B16_UINT:
1961     case VK_FORMAT_R16G16B16A16_UINT:
1962         return "#extension GL_EXT_shader_subgroup_extended_types_int16 : enable\n";
1963     case VK_FORMAT_R64_SINT:
1964     case VK_FORMAT_R64G64_SINT:
1965     case VK_FORMAT_R64G64B64_SINT:
1966     case VK_FORMAT_R64G64B64A64_SINT:
1967     case VK_FORMAT_R64_UINT:
1968     case VK_FORMAT_R64G64_UINT:
1969     case VK_FORMAT_R64G64B64_UINT:
1970     case VK_FORMAT_R64G64B64A64_UINT:
1971         return "#extension GL_EXT_shader_subgroup_extended_types_int64 : enable\n";
1972     case VK_FORMAT_R16_SFLOAT:
1973     case VK_FORMAT_R16G16_SFLOAT:
1974     case VK_FORMAT_R16G16B16_SFLOAT:
1975     case VK_FORMAT_R16G16B16A16_SFLOAT:
1976         return "#extension GL_EXT_shader_subgroup_extended_types_float16 : enable\n";
1977     }
1978 }
1979 
getAllFormats()1980 const std::vector<vk::VkFormat> vkt::subgroups::getAllFormats()
1981 {
1982     std::vector<VkFormat> formats;
1983 
1984     formats.push_back(VK_FORMAT_R8_SINT);
1985     formats.push_back(VK_FORMAT_R8G8_SINT);
1986     formats.push_back(VK_FORMAT_R8G8B8_SINT);
1987     formats.push_back(VK_FORMAT_R8G8B8A8_SINT);
1988     formats.push_back(VK_FORMAT_R8_UINT);
1989     formats.push_back(VK_FORMAT_R8G8_UINT);
1990     formats.push_back(VK_FORMAT_R8G8B8_UINT);
1991     formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
1992     formats.push_back(VK_FORMAT_R16_SINT);
1993     formats.push_back(VK_FORMAT_R16G16_SINT);
1994     formats.push_back(VK_FORMAT_R16G16B16_SINT);
1995     formats.push_back(VK_FORMAT_R16G16B16A16_SINT);
1996     formats.push_back(VK_FORMAT_R16_UINT);
1997     formats.push_back(VK_FORMAT_R16G16_UINT);
1998     formats.push_back(VK_FORMAT_R16G16B16_UINT);
1999     formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
2000     formats.push_back(VK_FORMAT_R32_SINT);
2001     formats.push_back(VK_FORMAT_R32G32_SINT);
2002     formats.push_back(VK_FORMAT_R32G32B32_SINT);
2003     formats.push_back(VK_FORMAT_R32G32B32A32_SINT);
2004     formats.push_back(VK_FORMAT_R32_UINT);
2005     formats.push_back(VK_FORMAT_R32G32_UINT);
2006     formats.push_back(VK_FORMAT_R32G32B32_UINT);
2007     formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
2008     formats.push_back(VK_FORMAT_R64_SINT);
2009     formats.push_back(VK_FORMAT_R64G64_SINT);
2010     formats.push_back(VK_FORMAT_R64G64B64_SINT);
2011     formats.push_back(VK_FORMAT_R64G64B64A64_SINT);
2012     formats.push_back(VK_FORMAT_R64_UINT);
2013     formats.push_back(VK_FORMAT_R64G64_UINT);
2014     formats.push_back(VK_FORMAT_R64G64B64_UINT);
2015     formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
2016     formats.push_back(VK_FORMAT_R16_SFLOAT);
2017     formats.push_back(VK_FORMAT_R16G16_SFLOAT);
2018     formats.push_back(VK_FORMAT_R16G16B16_SFLOAT);
2019     formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
2020     formats.push_back(VK_FORMAT_R32_SFLOAT);
2021     formats.push_back(VK_FORMAT_R32G32_SFLOAT);
2022     formats.push_back(VK_FORMAT_R32G32B32_SFLOAT);
2023     formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
2024     formats.push_back(VK_FORMAT_R64_SFLOAT);
2025     formats.push_back(VK_FORMAT_R64G64_SFLOAT);
2026     formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
2027     formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
2028     formats.push_back(VK_FORMAT_R8_USCALED);
2029     formats.push_back(VK_FORMAT_R8G8_USCALED);
2030     formats.push_back(VK_FORMAT_R8G8B8_USCALED);
2031     formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
2032 
2033     return formats;
2034 }
2035 
isFormatSigned(VkFormat format)2036 bool vkt::subgroups::isFormatSigned(VkFormat format)
2037 {
2038     switch (format)
2039     {
2040     default:
2041         return false;
2042     case VK_FORMAT_R8_SINT:
2043     case VK_FORMAT_R8G8_SINT:
2044     case VK_FORMAT_R8G8B8_SINT:
2045     case VK_FORMAT_R8G8B8A8_SINT:
2046     case VK_FORMAT_R16_SINT:
2047     case VK_FORMAT_R16G16_SINT:
2048     case VK_FORMAT_R16G16B16_SINT:
2049     case VK_FORMAT_R16G16B16A16_SINT:
2050     case VK_FORMAT_R32_SINT:
2051     case VK_FORMAT_R32G32_SINT:
2052     case VK_FORMAT_R32G32B32_SINT:
2053     case VK_FORMAT_R32G32B32A32_SINT:
2054     case VK_FORMAT_R64_SINT:
2055     case VK_FORMAT_R64G64_SINT:
2056     case VK_FORMAT_R64G64B64_SINT:
2057     case VK_FORMAT_R64G64B64A64_SINT:
2058         return true;
2059     }
2060 }
2061 
isFormatUnsigned(VkFormat format)2062 bool vkt::subgroups::isFormatUnsigned(VkFormat format)
2063 {
2064     switch (format)
2065     {
2066     default:
2067         return false;
2068     case VK_FORMAT_R8_UINT:
2069     case VK_FORMAT_R8G8_UINT:
2070     case VK_FORMAT_R8G8B8_UINT:
2071     case VK_FORMAT_R8G8B8A8_UINT:
2072     case VK_FORMAT_R16_UINT:
2073     case VK_FORMAT_R16G16_UINT:
2074     case VK_FORMAT_R16G16B16_UINT:
2075     case VK_FORMAT_R16G16B16A16_UINT:
2076     case VK_FORMAT_R32_UINT:
2077     case VK_FORMAT_R32G32_UINT:
2078     case VK_FORMAT_R32G32B32_UINT:
2079     case VK_FORMAT_R32G32B32A32_UINT:
2080     case VK_FORMAT_R64_UINT:
2081     case VK_FORMAT_R64G64_UINT:
2082     case VK_FORMAT_R64G64B64_UINT:
2083     case VK_FORMAT_R64G64B64A64_UINT:
2084         return true;
2085     }
2086 }
2087 
isFormatFloat(VkFormat format)2088 bool vkt::subgroups::isFormatFloat(VkFormat format)
2089 {
2090     switch (format)
2091     {
2092     default:
2093         return false;
2094     case VK_FORMAT_R16_SFLOAT:
2095     case VK_FORMAT_R16G16_SFLOAT:
2096     case VK_FORMAT_R16G16B16_SFLOAT:
2097     case VK_FORMAT_R16G16B16A16_SFLOAT:
2098     case VK_FORMAT_R32_SFLOAT:
2099     case VK_FORMAT_R32G32_SFLOAT:
2100     case VK_FORMAT_R32G32B32_SFLOAT:
2101     case VK_FORMAT_R32G32B32A32_SFLOAT:
2102     case VK_FORMAT_R64_SFLOAT:
2103     case VK_FORMAT_R64G64_SFLOAT:
2104     case VK_FORMAT_R64G64B64_SFLOAT:
2105     case VK_FORMAT_R64G64B64A64_SFLOAT:
2106         return true;
2107     }
2108 }
2109 
isFormatBool(VkFormat format)2110 bool vkt::subgroups::isFormatBool(VkFormat format)
2111 {
2112     switch (format)
2113     {
2114     default:
2115         return false;
2116     case VK_FORMAT_R8_USCALED:
2117     case VK_FORMAT_R8G8_USCALED:
2118     case VK_FORMAT_R8G8B8_USCALED:
2119     case VK_FORMAT_R8G8B8A8_USCALED:
2120         return true;
2121     }
2122 }
2123 
isFormat8bitTy(VkFormat format)2124 bool vkt::subgroups::isFormat8bitTy(VkFormat format)
2125 {
2126     switch (format)
2127     {
2128     default:
2129         return false;
2130     case VK_FORMAT_R8_SINT:
2131     case VK_FORMAT_R8G8_SINT:
2132     case VK_FORMAT_R8G8B8_SINT:
2133     case VK_FORMAT_R8G8B8A8_SINT:
2134     case VK_FORMAT_R8_UINT:
2135     case VK_FORMAT_R8G8_UINT:
2136     case VK_FORMAT_R8G8B8_UINT:
2137     case VK_FORMAT_R8G8B8A8_UINT:
2138         return true;
2139     }
2140 }
2141 
isFormat16BitTy(VkFormat format)2142 bool vkt::subgroups::isFormat16BitTy(VkFormat format)
2143 {
2144     switch (format)
2145     {
2146     default:
2147         return false;
2148     case VK_FORMAT_R16_SFLOAT:
2149     case VK_FORMAT_R16G16_SFLOAT:
2150     case VK_FORMAT_R16G16B16_SFLOAT:
2151     case VK_FORMAT_R16G16B16A16_SFLOAT:
2152     case VK_FORMAT_R16_SINT:
2153     case VK_FORMAT_R16G16_SINT:
2154     case VK_FORMAT_R16G16B16_SINT:
2155     case VK_FORMAT_R16G16B16A16_SINT:
2156     case VK_FORMAT_R16_UINT:
2157     case VK_FORMAT_R16G16_UINT:
2158     case VK_FORMAT_R16G16B16_UINT:
2159     case VK_FORMAT_R16G16B16A16_UINT:
2160         return true;
2161     }
2162 }
2163 
setVertexShaderFrameBuffer(SourceCollections & programCollection)2164 void vkt::subgroups::setVertexShaderFrameBuffer(SourceCollections &programCollection)
2165 {
2166     /*
2167         "layout(location = 0) in highp vec4 in_position;\n"
2168         "void main (void)\n"
2169         "{\n"
2170         "  gl_Position = in_position;\n"
2171         "  gl_PointSize = 1.0f;\n"
2172         "}\n";
2173     */
2174     programCollection.spirvAsmSources.add("vert") << "; SPIR-V\n"
2175                                                      "; Version: 1.3\n"
2176                                                      "; Generator: Khronos Glslang Reference Front End; 7\n"
2177                                                      "; Bound: 25\n"
2178                                                      "; Schema: 0\n"
2179                                                      "OpCapability Shader\n"
2180                                                      "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2181                                                      "OpMemoryModel Logical GLSL450\n"
2182                                                      "OpEntryPoint Vertex %4 \"main\" %13 %17\n"
2183                                                      "OpMemberDecorate %11 0 BuiltIn Position\n"
2184                                                      "OpMemberDecorate %11 1 BuiltIn PointSize\n"
2185                                                      "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2186                                                      "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2187                                                      "OpDecorate %11 Block\n"
2188                                                      "OpDecorate %17 Location 0\n"
2189                                                      "%2 = OpTypeVoid\n"
2190                                                      "%3 = OpTypeFunction %2\n"
2191                                                      "%6 = OpTypeFloat 32\n"
2192                                                      "%7 = OpTypeVector %6 4\n"
2193                                                      "%8 = OpTypeInt 32 0\n"
2194                                                      "%9 = OpConstant %8 1\n"
2195                                                      "%10 = OpTypeArray %6 %9\n"
2196                                                      "%11 = OpTypeStruct %7 %6 %10 %10\n"
2197                                                      "%12 = OpTypePointer Output %11\n"
2198                                                      "%13 = OpVariable %12 Output\n"
2199                                                      "%14 = OpTypeInt 32 1\n"
2200                                                      "%15 = OpConstant %14 0\n"
2201                                                      "%16 = OpTypePointer Input %7\n"
2202                                                      "%17 = OpVariable %16 Input\n"
2203                                                      "%19 = OpTypePointer Output %7\n"
2204                                                      "%21 = OpConstant %14 1\n"
2205                                                      "%22 = OpConstant %6 1\n"
2206                                                      "%23 = OpTypePointer Output %6\n"
2207                                                      "%4 = OpFunction %2 None %3\n"
2208                                                      "%5 = OpLabel\n"
2209                                                      "%18 = OpLoad %7 %17\n"
2210                                                      "%20 = OpAccessChain %19 %13 %15\n"
2211                                                      "OpStore %20 %18\n"
2212                                                      "%24 = OpAccessChain %23 %13 %21\n"
2213                                                      "OpStore %24 %22\n"
2214                                                      "OpReturn\n"
2215                                                      "OpFunctionEnd\n";
2216 }
2217 
setFragmentShaderFrameBuffer(vk::SourceCollections & programCollection)2218 void vkt::subgroups::setFragmentShaderFrameBuffer(vk::SourceCollections &programCollection)
2219 {
2220     /*
2221         "layout(location = 0) in float in_color;\n"
2222         "layout(location = 0) out uint out_color;\n"
2223         "void main()\n"
2224         {\n"
2225         "    out_color = uint(in_color);\n"
2226         "}\n";
2227     */
2228     programCollection.spirvAsmSources.add("fragment") << "; SPIR-V\n"
2229                                                          "; Version: 1.3\n"
2230                                                          "; Generator: Khronos Glslang Reference Front End; 2\n"
2231                                                          "; Bound: 14\n"
2232                                                          "; Schema: 0\n"
2233                                                          "OpCapability Shader\n"
2234                                                          "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2235                                                          "OpMemoryModel Logical GLSL450\n"
2236                                                          "OpEntryPoint Fragment %4 \"main\" %8 %11\n"
2237                                                          "OpExecutionMode %4 OriginUpperLeft\n"
2238                                                          "OpDecorate %8 Location 0\n"
2239                                                          "OpDecorate %11 Location 0\n"
2240                                                          "%2 = OpTypeVoid\n"
2241                                                          "%3 = OpTypeFunction %2\n"
2242                                                          "%6 = OpTypeInt 32 0\n"
2243                                                          "%7 = OpTypePointer Output %6\n"
2244                                                          "%8 = OpVariable %7 Output\n"
2245                                                          "%9 = OpTypeFloat 32\n"
2246                                                          "%10 = OpTypePointer Input %9\n"
2247                                                          "%11 = OpVariable %10 Input\n"
2248                                                          "%4 = OpFunction %2 None %3\n"
2249                                                          "%5 = OpLabel\n"
2250                                                          "%12 = OpLoad %9 %11\n"
2251                                                          "%13 = OpConvertFToU %6 %12\n"
2252                                                          "OpStore %8 %13\n"
2253                                                          "OpReturn\n"
2254                                                          "OpFunctionEnd\n";
2255 }
2256 
setTesCtrlShaderFrameBuffer(vk::SourceCollections & programCollection)2257 void vkt::subgroups::setTesCtrlShaderFrameBuffer(vk::SourceCollections &programCollection)
2258 {
2259     /*
2260         "#extension GL_KHR_shader_subgroup_basic: enable\n"
2261         "#extension GL_EXT_tessellation_shader : require\n"
2262         "layout(vertices = 2) out;\n"
2263         "void main (void)\n"
2264         "{\n"
2265         "  if (gl_InvocationID == 0)\n"
2266         "  {\n"
2267         "    gl_TessLevelOuter[0] = 1.0f;\n"
2268         "    gl_TessLevelOuter[1] = 1.0f;\n"
2269         "  }\n"
2270         "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
2271         "}\n";
2272     */
2273     programCollection.spirvAsmSources.add("tesc") << "; SPIR-V\n"
2274                                                      "; Version: 1.3\n"
2275                                                      "; Generator: Khronos Glslang Reference Front End; 2\n"
2276                                                      "; Bound: 46\n"
2277                                                      "; Schema: 0\n"
2278                                                      "OpCapability Tessellation\n"
2279                                                      "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2280                                                      "OpMemoryModel Logical GLSL450\n"
2281                                                      "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
2282                                                      "OpExecutionMode %4 OutputVertices 2\n"
2283                                                      "OpDecorate %8 BuiltIn InvocationId\n"
2284                                                      "OpDecorate %20 Patch\n"
2285                                                      "OpDecorate %20 BuiltIn TessLevelOuter\n"
2286                                                      "OpMemberDecorate %29 0 BuiltIn Position\n"
2287                                                      "OpMemberDecorate %29 1 BuiltIn PointSize\n"
2288                                                      "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
2289                                                      "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
2290                                                      "OpDecorate %29 Block\n"
2291                                                      "OpMemberDecorate %35 0 BuiltIn Position\n"
2292                                                      "OpMemberDecorate %35 1 BuiltIn PointSize\n"
2293                                                      "OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
2294                                                      "OpMemberDecorate %35 3 BuiltIn CullDistance\n"
2295                                                      "OpDecorate %35 Block\n"
2296                                                      "%2 = OpTypeVoid\n"
2297                                                      "%3 = OpTypeFunction %2\n"
2298                                                      "%6 = OpTypeInt 32 1\n"
2299                                                      "%7 = OpTypePointer Input %6\n"
2300                                                      "%8 = OpVariable %7 Input\n"
2301                                                      "%10 = OpConstant %6 0\n"
2302                                                      "%11 = OpTypeBool\n"
2303                                                      "%15 = OpTypeFloat 32\n"
2304                                                      "%16 = OpTypeInt 32 0\n"
2305                                                      "%17 = OpConstant %16 4\n"
2306                                                      "%18 = OpTypeArray %15 %17\n"
2307                                                      "%19 = OpTypePointer Output %18\n"
2308                                                      "%20 = OpVariable %19 Output\n"
2309                                                      "%21 = OpConstant %15 1\n"
2310                                                      "%22 = OpTypePointer Output %15\n"
2311                                                      "%24 = OpConstant %6 1\n"
2312                                                      "%26 = OpTypeVector %15 4\n"
2313                                                      "%27 = OpConstant %16 1\n"
2314                                                      "%28 = OpTypeArray %15 %27\n"
2315                                                      "%29 = OpTypeStruct %26 %15 %28 %28\n"
2316                                                      "%30 = OpConstant %16 2\n"
2317                                                      "%31 = OpTypeArray %29 %30\n"
2318                                                      "%32 = OpTypePointer Output %31\n"
2319                                                      "%33 = OpVariable %32 Output\n"
2320                                                      "%35 = OpTypeStruct %26 %15 %28 %28\n"
2321                                                      "%36 = OpConstant %16 32\n"
2322                                                      "%37 = OpTypeArray %35 %36\n"
2323                                                      "%38 = OpTypePointer Input %37\n"
2324                                                      "%39 = OpVariable %38 Input\n"
2325                                                      "%41 = OpTypePointer Input %26\n"
2326                                                      "%44 = OpTypePointer Output %26\n"
2327                                                      "%4 = OpFunction %2 None %3\n"
2328                                                      "%5 = OpLabel\n"
2329                                                      "%9 = OpLoad %6 %8\n"
2330                                                      "%12 = OpIEqual %11 %9 %10\n"
2331                                                      "OpSelectionMerge %14 None\n"
2332                                                      "OpBranchConditional %12 %13 %14\n"
2333                                                      "%13 = OpLabel\n"
2334                                                      "%23 = OpAccessChain %22 %20 %10\n"
2335                                                      "OpStore %23 %21\n"
2336                                                      "%25 = OpAccessChain %22 %20 %24\n"
2337                                                      "OpStore %25 %21\n"
2338                                                      "OpBranch %14\n"
2339                                                      "%14 = OpLabel\n"
2340                                                      "%34 = OpLoad %6 %8\n"
2341                                                      "%40 = OpLoad %6 %8\n"
2342                                                      "%42 = OpAccessChain %41 %39 %40 %10\n"
2343                                                      "%43 = OpLoad %26 %42\n"
2344                                                      "%45 = OpAccessChain %44 %33 %34 %10\n"
2345                                                      "OpStore %45 %43\n"
2346                                                      "OpReturn\n"
2347                                                      "OpFunctionEnd\n";
2348 }
2349 
setTesEvalShaderFrameBuffer(vk::SourceCollections & programCollection)2350 void vkt::subgroups::setTesEvalShaderFrameBuffer(vk::SourceCollections &programCollection)
2351 {
2352     /*
2353         "#extension GL_KHR_shader_subgroup_ballot: enable\n"
2354         "#extension GL_EXT_tessellation_shader : require\n"
2355         "layout(isolines, equal_spacing, ccw ) in;\n"
2356         "layout(location = 0) in float in_color[];\n"
2357         "layout(location = 0) out float out_color;\n"
2358         "\n"
2359         "void main (void)\n"
2360         "{\n"
2361         "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
2362         "  out_color = in_color[0];\n"
2363         "}\n";
2364     */
2365     programCollection.spirvAsmSources.add("tese")
2366         << "; SPIR-V\n"
2367            "; Version: 1.3\n"
2368            "; Generator: Khronos Glslang Reference Front End; 2\n"
2369            "; Bound: 45\n"
2370            "; Schema: 0\n"
2371            "OpCapability Tessellation\n"
2372            "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2373            "OpMemoryModel Logical GLSL450\n"
2374            "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
2375            "OpExecutionMode %4 Isolines\n"
2376            "OpExecutionMode %4 SpacingEqual\n"
2377            "OpExecutionMode %4 VertexOrderCcw\n"
2378            "OpMemberDecorate %11 0 BuiltIn Position\n"
2379            "OpMemberDecorate %11 1 BuiltIn PointSize\n"
2380            "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2381            "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2382            "OpDecorate %11 Block\n"
2383            "OpMemberDecorate %16 0 BuiltIn Position\n"
2384            "OpMemberDecorate %16 1 BuiltIn PointSize\n"
2385            "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
2386            "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
2387            "OpDecorate %16 Block\n"
2388            "OpDecorate %29 BuiltIn TessCoord\n"
2389            "OpDecorate %39 Location 0\n"
2390            "OpDecorate %42 Location 0\n"
2391            "%2 = OpTypeVoid\n"
2392            "%3 = OpTypeFunction %2\n"
2393            "%6 = OpTypeFloat 32\n"
2394            "%7 = OpTypeVector %6 4\n"
2395            "%8 = OpTypeInt 32 0\n"
2396            "%9 = OpConstant %8 1\n"
2397            "%10 = OpTypeArray %6 %9\n"
2398            "%11 = OpTypeStruct %7 %6 %10 %10\n"
2399            "%12 = OpTypePointer Output %11\n"
2400            "%13 = OpVariable %12 Output\n"
2401            "%14 = OpTypeInt 32 1\n"
2402            "%15 = OpConstant %14 0\n"
2403            "%16 = OpTypeStruct %7 %6 %10 %10\n"
2404            "%17 = OpConstant %8 32\n"
2405            "%18 = OpTypeArray %16 %17\n"
2406            "%19 = OpTypePointer Input %18\n"
2407            "%20 = OpVariable %19 Input\n"
2408            "%21 = OpTypePointer Input %7\n"
2409            "%24 = OpConstant %14 1\n"
2410            "%27 = OpTypeVector %6 3\n"
2411            "%28 = OpTypePointer Input %27\n"
2412            "%29 = OpVariable %28 Input\n"
2413            "%30 = OpConstant %8 0\n"
2414            "%31 = OpTypePointer Input %6\n"
2415            "%36 = OpTypePointer Output %7\n"
2416            "%38 = OpTypePointer Output %6\n"
2417            "%39 = OpVariable %38 Output\n"
2418            "%40 = OpTypeArray %6 %17\n"
2419            "%41 = OpTypePointer Input %40\n"
2420            "%42 = OpVariable %41 Input\n"
2421            "%4 = OpFunction %2 None %3\n"
2422            "%5 = OpLabel\n"
2423            "%22 = OpAccessChain %21 %20 %15 %15\n"
2424            "%23 = OpLoad %7 %22\n"
2425            "%25 = OpAccessChain %21 %20 %24 %15\n"
2426            "%26 = OpLoad %7 %25\n"
2427            "%32 = OpAccessChain %31 %29 %30\n"
2428            "%33 = OpLoad %6 %32\n"
2429            "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
2430            "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
2431            "%37 = OpAccessChain %36 %13 %15\n"
2432            "OpStore %37 %35\n"
2433            "%43 = OpAccessChain %31 %42 %15\n"
2434            "%44 = OpLoad %6 %43\n"
2435            "OpStore %39 %44\n"
2436            "OpReturn\n"
2437            "OpFunctionEnd\n";
2438 }
2439 
addGeometryShadersFromTemplate(const std::string & glslTemplate,const vk::ShaderBuildOptions & options,vk::GlslSourceCollection & collection)2440 void vkt::subgroups::addGeometryShadersFromTemplate(const std::string &glslTemplate,
2441                                                     const vk::ShaderBuildOptions &options,
2442                                                     vk::GlslSourceCollection &collection)
2443 {
2444     tcu::StringTemplate geometryTemplate(glslTemplate);
2445 
2446     map<string, string> linesParams;
2447     linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
2448 
2449     map<string, string> pointsParams;
2450     pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
2451 
2452     collection.add("geometry_lines") << glu::GeometrySource(geometryTemplate.specialize(linesParams)) << options;
2453     collection.add("geometry_points") << glu::GeometrySource(geometryTemplate.specialize(pointsParams)) << options;
2454 }
2455 
addGeometryShadersFromTemplate(const std::string & spirvTemplate,const vk::SpirVAsmBuildOptions & options,vk::SpirVAsmCollection & collection)2456 void vkt::subgroups::addGeometryShadersFromTemplate(const std::string &spirvTemplate,
2457                                                     const vk::SpirVAsmBuildOptions &options,
2458                                                     vk::SpirVAsmCollection &collection)
2459 {
2460     tcu::StringTemplate geometryTemplate(spirvTemplate);
2461 
2462     map<string, string> linesParams;
2463     linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
2464 
2465     map<string, string> pointsParams;
2466     pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
2467 
2468     collection.add("geometry_lines") << geometryTemplate.specialize(linesParams) << options;
2469     collection.add("geometry_points") << geometryTemplate.specialize(pointsParams) << options;
2470 }
2471 
initializeMemory(Context & context,const Allocation & alloc,const subgroups::SSBOData & data)2472 void initializeMemory(Context &context, const Allocation &alloc, const subgroups::SSBOData &data)
2473 {
2474     const vk::VkFormat format = data.format;
2475     const vk::VkDeviceSize size =
2476         data.numElements * (data.isImage() ? getFormatSizeInBytes(format) : getElementSizeInBytes(format, data.layout));
2477     if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
2478     {
2479         de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
2480 
2481         switch (format)
2482         {
2483         default:
2484             DE_FATAL("Illegal buffer format");
2485             break;
2486         case VK_FORMAT_R8_SINT:
2487         case VK_FORMAT_R8G8_SINT:
2488         case VK_FORMAT_R8G8B8_SINT:
2489         case VK_FORMAT_R8G8B8A8_SINT:
2490         case VK_FORMAT_R8_UINT:
2491         case VK_FORMAT_R8G8_UINT:
2492         case VK_FORMAT_R8G8B8_UINT:
2493         case VK_FORMAT_R8G8B8A8_UINT:
2494         {
2495             uint8_t *ptr = reinterpret_cast<uint8_t *>(alloc.getHostPtr());
2496 
2497             for (vk::VkDeviceSize k = 0; k < (size / sizeof(uint8_t)); k++)
2498             {
2499                 ptr[k] = rnd.getUint8();
2500             }
2501         }
2502         break;
2503         case VK_FORMAT_R16_SINT:
2504         case VK_FORMAT_R16G16_SINT:
2505         case VK_FORMAT_R16G16B16_SINT:
2506         case VK_FORMAT_R16G16B16A16_SINT:
2507         case VK_FORMAT_R16_UINT:
2508         case VK_FORMAT_R16G16_UINT:
2509         case VK_FORMAT_R16G16B16_UINT:
2510         case VK_FORMAT_R16G16B16A16_UINT:
2511         {
2512             uint16_t *ptr = reinterpret_cast<uint16_t *>(alloc.getHostPtr());
2513 
2514             for (vk::VkDeviceSize k = 0; k < (size / sizeof(uint16_t)); k++)
2515             {
2516                 ptr[k] = rnd.getUint16();
2517             }
2518         }
2519         break;
2520         case VK_FORMAT_R8_USCALED:
2521         case VK_FORMAT_R8G8_USCALED:
2522         case VK_FORMAT_R8G8B8_USCALED:
2523         case VK_FORMAT_R8G8B8A8_USCALED:
2524         {
2525             uint32_t *ptr = reinterpret_cast<uint32_t *>(alloc.getHostPtr());
2526 
2527             for (vk::VkDeviceSize k = 0; k < (size / sizeof(uint32_t)); k++)
2528             {
2529                 uint32_t r = rnd.getUint32();
2530                 ptr[k]     = (r & 1) ? r : 0;
2531             }
2532         }
2533         break;
2534         case VK_FORMAT_R32_SINT:
2535         case VK_FORMAT_R32G32_SINT:
2536         case VK_FORMAT_R32G32B32_SINT:
2537         case VK_FORMAT_R32G32B32A32_SINT:
2538         case VK_FORMAT_R32_UINT:
2539         case VK_FORMAT_R32G32_UINT:
2540         case VK_FORMAT_R32G32B32_UINT:
2541         case VK_FORMAT_R32G32B32A32_UINT:
2542         {
2543             uint32_t *ptr = reinterpret_cast<uint32_t *>(alloc.getHostPtr());
2544 
2545             for (vk::VkDeviceSize k = 0; k < (size / sizeof(uint32_t)); k++)
2546             {
2547                 ptr[k] = rnd.getUint32();
2548             }
2549         }
2550         break;
2551         case VK_FORMAT_R64_SINT:
2552         case VK_FORMAT_R64G64_SINT:
2553         case VK_FORMAT_R64G64B64_SINT:
2554         case VK_FORMAT_R64G64B64A64_SINT:
2555         case VK_FORMAT_R64_UINT:
2556         case VK_FORMAT_R64G64_UINT:
2557         case VK_FORMAT_R64G64B64_UINT:
2558         case VK_FORMAT_R64G64B64A64_UINT:
2559         {
2560             uint64_t *ptr = reinterpret_cast<uint64_t *>(alloc.getHostPtr());
2561 
2562             for (vk::VkDeviceSize k = 0; k < (size / sizeof(uint64_t)); k++)
2563             {
2564                 ptr[k] = rnd.getUint64();
2565             }
2566         }
2567         break;
2568         case VK_FORMAT_R16_SFLOAT:
2569         case VK_FORMAT_R16G16_SFLOAT:
2570         case VK_FORMAT_R16G16B16_SFLOAT:
2571         case VK_FORMAT_R16G16B16A16_SFLOAT:
2572         {
2573             float16_t *const ptr = reinterpret_cast<float16_t *>(alloc.getHostPtr());
2574 
2575             for (vk::VkDeviceSize k = 0; k < (size / sizeof(float16_t)); k++)
2576             {
2577                 ptr[k] = tcu::Float16(rnd.getFloat()).bits();
2578             }
2579         }
2580         break;
2581         case VK_FORMAT_R32_SFLOAT:
2582         case VK_FORMAT_R32G32_SFLOAT:
2583         case VK_FORMAT_R32G32B32_SFLOAT:
2584         case VK_FORMAT_R32G32B32A32_SFLOAT:
2585         {
2586             float *ptr = reinterpret_cast<float *>(alloc.getHostPtr());
2587 
2588             for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
2589             {
2590                 ptr[k] = rnd.getFloat();
2591             }
2592         }
2593         break;
2594         case VK_FORMAT_R64_SFLOAT:
2595         case VK_FORMAT_R64G64_SFLOAT:
2596         case VK_FORMAT_R64G64B64_SFLOAT:
2597         case VK_FORMAT_R64G64B64A64_SFLOAT:
2598         {
2599             double *ptr = reinterpret_cast<double *>(alloc.getHostPtr());
2600 
2601             for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
2602             {
2603                 ptr[k] = rnd.getDouble();
2604             }
2605         }
2606         break;
2607         }
2608     }
2609     else if (subgroups::SSBOData::InitializeZero == data.initializeType)
2610     {
2611         uint32_t *ptr = reinterpret_cast<uint32_t *>(alloc.getHostPtr());
2612 
2613         for (vk::VkDeviceSize k = 0; k < size / 4; k++)
2614         {
2615             ptr[k] = 0;
2616         }
2617     }
2618 
2619     if (subgroups::SSBOData::InitializeNone != data.initializeType)
2620     {
2621         flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2622     }
2623 }
2624 
getResultBinding(const VkShaderStageFlagBits shaderStage)2625 uint32_t getResultBinding(const VkShaderStageFlagBits shaderStage)
2626 {
2627     switch (shaderStage)
2628     {
2629     case VK_SHADER_STAGE_VERTEX_BIT:
2630         return 0u;
2631     case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
2632         return 1u;
2633     case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
2634         return 2u;
2635     case VK_SHADER_STAGE_GEOMETRY_BIT:
2636         return 3u;
2637     default:
2638         DE_ASSERT(0);
2639         return -1;
2640     }
2641     DE_ASSERT(0);
2642     return -1;
2643 }
2644 
makeTessellationEvaluationFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const VkShaderStageFlags shaderStage)2645 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest(
2646     Context &context, VkFormat format, const SSBOData *extraData, uint32_t extraDataCount, const void *internalData,
2647     subgroups::CheckResult checkResult, const VkShaderStageFlags shaderStage)
2648 {
2649     return makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
2650         context, format, extraData, extraDataCount, internalData, checkResult, shaderStage, 0u, 0u);
2651 }
2652 
makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const VkShaderStageFlags shaderStage,const uint32_t tessShaderStageCreateFlags,const uint32_t requiredSubgroupSize)2653 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
2654     Context &context, VkFormat format, const SSBOData *extraData, uint32_t extraDataCount, const void *internalData,
2655     subgroups::CheckResult checkResult, const VkShaderStageFlags shaderStage, const uint32_t tessShaderStageCreateFlags,
2656     const uint32_t requiredSubgroupSize)
2657 {
2658     const DeviceInterface &vk = context.getDeviceInterface();
2659     const VkDevice device     = context.getDevice();
2660     const uint32_t maxWidth   = getMaxWidth();
2661     vector<de::SharedPtr<BufferOrImage>> inputBuffers(extraDataCount);
2662     DescriptorSetLayoutBuilder layoutBuilder;
2663     DescriptorPoolBuilder poolBuilder;
2664     DescriptorSetUpdateBuilder updateBuilder;
2665     Move<VkDescriptorPool> descriptorPool;
2666     Move<VkDescriptorSet> descriptorSet;
2667     const Unique<VkShaderModule> vertexShaderModule(
2668         createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2669     const Unique<VkShaderModule> teCtrlShaderModule(
2670         createShaderModule(vk, device, context.getBinaryCollection().get("tesc"), 0u));
2671     const Unique<VkShaderModule> teEvalShaderModule(
2672         createShaderModule(vk, device, context.getBinaryCollection().get("tese"), 0u));
2673     const Unique<VkShaderModule> fragmentShaderModule(
2674         createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2675     const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
2676     const VkVertexInputBindingDescription vertexInputBinding = {
2677         0u,                                       //  uint32_t binding;
2678         static_cast<uint32_t>(sizeof(tcu::Vec4)), //  uint32_t stride;
2679         VK_VERTEX_INPUT_RATE_VERTEX               //  VkVertexInputRate inputRate;
2680     };
2681     const VkVertexInputAttributeDescription vertexInputAttribute = {
2682         0u,                            //  uint32_t location;
2683         0u,                            //  uint32_t binding;
2684         VK_FORMAT_R32G32B32A32_SFLOAT, //  VkFormat format;
2685         0u                             //  uint32_t offset;
2686     };
2687 
2688     for (uint32_t i = 0u; i < extraDataCount; i++)
2689     {
2690         if (extraData[i].isImage())
2691         {
2692             inputBuffers[i] = de::SharedPtr<BufferOrImage>(
2693                 new Image(context, static_cast<uint32_t>(extraData[i].numElements), 1u, extraData[i].format));
2694         }
2695         else
2696         {
2697             DE_ASSERT(extraData[i].isUBO());
2698             vk::VkDeviceSize size =
2699                 getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2700             inputBuffers[i] =
2701                 de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2702         }
2703         const Allocation &alloc = inputBuffers[i]->getAllocation();
2704         initializeMemory(context, alloc, extraData[i]);
2705     }
2706 
2707     for (uint32_t ndx = 0u; ndx < extraDataCount; ndx++)
2708         layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL);
2709 
2710     const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
2711 
2712     const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2713 
2714     const uint32_t requiredSubgroupSizes[5] = {
2715         0u, ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? requiredSubgroupSize : 0u),
2716         ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? requiredSubgroupSize : 0u), 0u, 0u};
2717 
2718     const Unique<VkPipeline> pipeline(makeGraphicsPipeline(
2719         context, *pipelineLayout,
2720         VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
2721             VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
2722         *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule, *renderPass,
2723         VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format, 0u,
2724         ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? tessShaderStageCreateFlags : 0u),
2725         ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? tessShaderStageCreateFlags : 0u), 0u, 0u,
2726         requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2727 
2728     for (uint32_t ndx = 0u; ndx < extraDataCount; ndx++)
2729         poolBuilder.addType(inputBuffers[ndx]->getType());
2730 
2731     if (extraDataCount > 0)
2732     {
2733         descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2734         descriptorSet  = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2735     }
2736 
2737     for (uint32_t buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2738     {
2739         if (inputBuffers[buffersNdx]->isImage())
2740         {
2741             VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2742                                                                  inputBuffers[buffersNdx]->getAsImage()->getImageView(),
2743                                                                  VK_IMAGE_LAYOUT_GENERAL);
2744 
2745             updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2746                                       inputBuffers[buffersNdx]->getType(), &info);
2747         }
2748         else
2749         {
2750             VkDescriptorBufferInfo info =
2751                 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(), 0ull,
2752                                          inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2753 
2754             updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2755                                       inputBuffers[buffersNdx]->getType(), &info);
2756         }
2757     }
2758 
2759     updateBuilder.update(vk, device);
2760 
2761     const VkQueue queue             = context.getUniversalQueue();
2762     const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2763     const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2764     const uint32_t subgroupSize = getSubgroupSize(context);
2765     const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
2766     const vk::VkDeviceSize vertexBufferSize = 2ull * maxWidth * sizeof(tcu::Vec4);
2767     Buffer vertexBuffer(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2768     unsigned totalIterations  = 0u;
2769     unsigned failedIterations = 0u;
2770     Image discardableImage(context, maxWidth, 1u, format,
2771                            VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2772 
2773     {
2774         const Allocation &alloc = vertexBuffer.getAllocation();
2775         std::vector<tcu::Vec4> data(2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
2776         const float pixelSize  = 2.0f / static_cast<float>(maxWidth);
2777         float leftHandPosition = -1.0f;
2778 
2779         for (uint32_t ndx = 0u; ndx < data.size(); ndx += 2u)
2780         {
2781             data[ndx][0] = leftHandPosition;
2782             leftHandPosition += pixelSize;
2783             data[ndx + 1][0] = leftHandPosition;
2784         }
2785 
2786         deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
2787         flushAlloc(vk, device, alloc);
2788     }
2789 
2790     const Unique<VkFramebuffer> framebuffer(
2791         makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2792     const VkViewport viewport              = makeViewport(maxWidth, 1u);
2793     const VkRect2D scissor                 = makeRect2D(maxWidth, 1u);
2794     const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2795     Buffer imageBufferResult(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2796     const VkDeviceSize vertexBufferOffset = 0u;
2797 
2798     for (uint32_t width = 1u; width < maxWidth; width = getNextWidth(width))
2799     {
2800         totalIterations++;
2801 
2802         beginCommandBuffer(vk, *cmdBuffer);
2803         {
2804 
2805             vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2806             vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2807 
2808             beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2809 
2810             vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2811 
2812             if (extraDataCount > 0)
2813             {
2814                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2815                                          &descriptorSet.get(), 0u, DE_NULL);
2816             }
2817 
2818             vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2819             vk.cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
2820 
2821             endRenderPass(vk, *cmdBuffer);
2822 
2823             copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(),
2824                               tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
2825                               VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2826             endCommandBuffer(vk, *cmdBuffer);
2827 
2828             submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2829         }
2830         context.resetCommandPoolForVKSC(device, *cmdPool);
2831 
2832         {
2833             const Allocation &allocResult = imageBufferResult.getAllocation();
2834             invalidateAlloc(vk, device, allocResult);
2835 
2836             std::vector<const void *> datas;
2837             datas.push_back(allocResult.getHostPtr());
2838             if (!checkResult(internalData, datas, width / 2u, subgroupSize))
2839                 failedIterations++;
2840         }
2841     }
2842 
2843     if (0 < failedIterations)
2844     {
2845         unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2846 
2847         context.getTestContext().getLog()
2848             << TestLog::Message << valuesPassed << " / " << totalIterations << " values passed" << TestLog::EndMessage;
2849         return tcu::TestStatus::fail("Failed!");
2850     }
2851 
2852     return tcu::TestStatus::pass("OK");
2853 }
2854 
check(std::vector<const void * > datas,uint32_t width,uint32_t ref)2855 bool vkt::subgroups::check(std::vector<const void *> datas, uint32_t width, uint32_t ref)
2856 {
2857     const uint32_t *data = reinterpret_cast<const uint32_t *>(datas[0]);
2858 
2859     for (uint32_t n = 0; n < width; ++n)
2860     {
2861         if (data[n] != ref)
2862         {
2863             return false;
2864         }
2865     }
2866 
2867     return true;
2868 }
2869 
checkComputeOrMesh(std::vector<const void * > datas,const uint32_t numWorkgroups[3],const uint32_t localSize[3],uint32_t ref)2870 bool vkt::subgroups::checkComputeOrMesh(std::vector<const void *> datas, const uint32_t numWorkgroups[3],
2871                                         const uint32_t localSize[3], uint32_t ref)
2872 {
2873     const uint32_t globalSizeX = numWorkgroups[0] * localSize[0];
2874     const uint32_t globalSizeY = numWorkgroups[1] * localSize[1];
2875     const uint32_t globalSizeZ = numWorkgroups[2] * localSize[2];
2876 
2877     return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
2878 }
2879 
makeGeometryFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,subgroups::CheckResult checkResult)2880 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest(Context &context, VkFormat format,
2881                                                             const SSBOData *extraData, uint32_t extraDataCount,
2882                                                             const void *internalData,
2883                                                             subgroups::CheckResult checkResult)
2884 {
2885     return makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData,
2886                                                            checkResult, 0u, 0u);
2887 }
2888 
makeGeometryFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const uint32_t geometryShaderStageCreateFlags,const uint32_t requiredSubgroupSize)2889 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize(
2890     Context &context, VkFormat format, const SSBOData *extraData, uint32_t extraDataCount, const void *internalData,
2891     subgroups::CheckResult checkResult, const uint32_t geometryShaderStageCreateFlags,
2892     const uint32_t requiredSubgroupSize)
2893 {
2894     const DeviceInterface &vk = context.getDeviceInterface();
2895     const VkDevice device     = context.getDevice();
2896     const uint32_t maxWidth   = getMaxWidth();
2897     vector<de::SharedPtr<BufferOrImage>> inputBuffers(extraDataCount);
2898     DescriptorSetLayoutBuilder layoutBuilder;
2899     DescriptorPoolBuilder poolBuilder;
2900     DescriptorSetUpdateBuilder updateBuilder;
2901     Move<VkDescriptorPool> descriptorPool;
2902     Move<VkDescriptorSet> descriptorSet;
2903     const Unique<VkShaderModule> vertexShaderModule(
2904         createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2905     const Unique<VkShaderModule> geometryShaderModule(
2906         createShaderModule(vk, device, context.getBinaryCollection().get("geometry"), 0u));
2907     const Unique<VkShaderModule> fragmentShaderModule(
2908         createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2909     const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
2910     const VkVertexInputBindingDescription vertexInputBinding = {
2911         0u,                                       //  uint32_t binding;
2912         static_cast<uint32_t>(sizeof(tcu::Vec4)), //  uint32_t stride;
2913         VK_VERTEX_INPUT_RATE_VERTEX               //  VkVertexInputRate inputRate;
2914     };
2915     const VkVertexInputAttributeDescription vertexInputAttribute = {
2916         0u,                            //  uint32_t location;
2917         0u,                            //  uint32_t binding;
2918         VK_FORMAT_R32G32B32A32_SFLOAT, //  VkFormat format;
2919         0u                             //  uint32_t offset;
2920     };
2921 
2922     for (uint32_t i = 0u; i < extraDataCount; i++)
2923     {
2924         if (extraData[i].isImage())
2925         {
2926             inputBuffers[i] = de::SharedPtr<BufferOrImage>(
2927                 new Image(context, static_cast<uint32_t>(extraData[i].numElements), 1u, extraData[i].format));
2928         }
2929         else
2930         {
2931             DE_ASSERT(extraData[i].isUBO());
2932             vk::VkDeviceSize size =
2933                 getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2934             inputBuffers[i] =
2935                 de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2936         }
2937         const Allocation &alloc = inputBuffers[i]->getAllocation();
2938         initializeMemory(context, alloc, extraData[i]);
2939     }
2940 
2941     for (uint32_t ndx = 0u; ndx < extraDataCount; ndx++)
2942         layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
2943 
2944     const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
2945 
2946     const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2947 
2948     const uint32_t requiredSubgroupSizes[5] = {0u, 0u, 0u, requiredSubgroupSize, 0u};
2949 
2950     const Unique<VkPipeline> pipeline(makeGraphicsPipeline(
2951         context, *pipelineLayout,
2952         VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT, *vertexShaderModule,
2953         *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL, *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
2954         &vertexInputBinding, &vertexInputAttribute, true, format, 0u, 0u, 0u, geometryShaderStageCreateFlags, 0u,
2955         requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2956 
2957     for (uint32_t ndx = 0u; ndx < extraDataCount; ndx++)
2958         poolBuilder.addType(inputBuffers[ndx]->getType());
2959 
2960     if (extraDataCount > 0)
2961     {
2962         descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2963         descriptorSet  = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2964     }
2965 
2966     for (uint32_t buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2967     {
2968         if (inputBuffers[buffersNdx]->isImage())
2969         {
2970             VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2971                                                                  inputBuffers[buffersNdx]->getAsImage()->getImageView(),
2972                                                                  VK_IMAGE_LAYOUT_GENERAL);
2973 
2974             updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2975                                       inputBuffers[buffersNdx]->getType(), &info);
2976         }
2977         else
2978         {
2979             VkDescriptorBufferInfo info =
2980                 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(), 0ull,
2981                                          inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2982 
2983             updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2984                                       inputBuffers[buffersNdx]->getType(), &info);
2985         }
2986     }
2987 
2988     updateBuilder.update(vk, device);
2989 
2990     const VkQueue queue             = context.getUniversalQueue();
2991     const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2992     const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2993     const uint32_t subgroupSize = getSubgroupSize(context);
2994     const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
2995     const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
2996     Buffer vertexBuffer(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2997     unsigned totalIterations  = 0u;
2998     unsigned failedIterations = 0u;
2999     Image discardableImage(context, maxWidth, 1u, format,
3000                            VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3001 
3002     {
3003         const Allocation &alloc = vertexBuffer.getAllocation();
3004         std::vector<tcu::Vec4> data(maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3005         const float pixelSize  = 2.0f / static_cast<float>(maxWidth);
3006         float leftHandPosition = -1.0f;
3007 
3008         for (uint32_t ndx = 0u; ndx < maxWidth; ++ndx)
3009         {
3010             data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3011             leftHandPosition += pixelSize;
3012         }
3013 
3014         deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3015         flushAlloc(vk, device, alloc);
3016     }
3017 
3018     const Unique<VkFramebuffer> framebuffer(
3019         makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3020     const VkViewport viewport              = makeViewport(maxWidth, 1u);
3021     const VkRect2D scissor                 = makeRect2D(maxWidth, 1u);
3022     const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3023     Buffer imageBufferResult(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3024     const VkDeviceSize vertexBufferOffset = 0u;
3025 
3026     for (uint32_t width = 1u; width < maxWidth; width = getNextWidth(width))
3027     {
3028         totalIterations++;
3029 
3030         for (uint32_t ndx = 0u; ndx < inputBuffers.size(); ndx++)
3031         {
3032             const Allocation &alloc = inputBuffers[ndx]->getAllocation();
3033             initializeMemory(context, alloc, extraData[ndx]);
3034         }
3035 
3036         beginCommandBuffer(vk, *cmdBuffer);
3037         {
3038             vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3039 
3040             vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3041 
3042             beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3043 
3044             vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3045 
3046             if (extraDataCount > 0)
3047             {
3048                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3049                                          &descriptorSet.get(), 0u, DE_NULL);
3050             }
3051 
3052             vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3053 
3054             vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3055 
3056             endRenderPass(vk, *cmdBuffer);
3057 
3058             copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(),
3059                               tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3060                               VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3061 
3062             endCommandBuffer(vk, *cmdBuffer);
3063 
3064             submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3065         }
3066         context.resetCommandPoolForVKSC(device, *cmdPool);
3067 
3068         {
3069             const Allocation &allocResult = imageBufferResult.getAllocation();
3070             invalidateAlloc(vk, device, allocResult);
3071 
3072             std::vector<const void *> datas;
3073             datas.push_back(allocResult.getHostPtr());
3074             if (!checkResult(internalData, datas, width, subgroupSize))
3075                 failedIterations++;
3076         }
3077     }
3078 
3079     if (0 < failedIterations)
3080     {
3081         unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3082 
3083         context.getTestContext().getLog()
3084             << TestLog::Message << valuesPassed << " / " << totalIterations << " values passed" << TestLog::EndMessage;
3085 
3086         return tcu::TestStatus::fail("Failed!");
3087     }
3088 
3089     return tcu::TestStatus::pass("OK");
3090 }
3091 
getPossibleGraphicsSubgroupStages(Context & context,const vk::VkShaderStageFlags testedStages)3092 vk::VkShaderStageFlags vkt::subgroups::getPossibleGraphicsSubgroupStages(Context &context,
3093                                                                          const vk::VkShaderStageFlags testedStages)
3094 {
3095     const VkPhysicalDeviceSubgroupProperties &subgroupProperties = context.getSubgroupProperties();
3096     VkShaderStageFlags stages                                    = testedStages & subgroupProperties.supportedStages;
3097 
3098     DE_ASSERT(isAllGraphicsStages(testedStages));
3099 
3100     if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
3101     {
3102         if ((stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
3103             TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
3104         else
3105             stages = VK_SHADER_STAGE_FRAGMENT_BIT;
3106     }
3107 
3108     if (static_cast<VkShaderStageFlags>(0u) == stages)
3109         TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
3110 
3111     return stages;
3112 }
3113 
allStages(Context & context,vk::VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStage)3114 tcu::TestStatus vkt::subgroups::allStages(Context &context, vk::VkFormat format, const SSBOData *extraData,
3115                                           uint32_t extraDataCount, const void *internalData,
3116                                           const VerificationFunctor &checkResult,
3117                                           const vk::VkShaderStageFlags shaderStage)
3118 {
3119     return vkt::subgroups::allStagesRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData,
3120                                                          checkResult, shaderStage, 0u, 0u, 0u, 0u, 0u, DE_NULL);
3121 }
3122 
allStagesRequiredSubgroupSize(Context & context,vk::VkFormat format,const SSBOData * extraDatas,uint32_t extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStageTested,const uint32_t vertexShaderStageCreateFlags,const uint32_t tessellationControlShaderStageCreateFlags,const uint32_t tessellationEvalShaderStageCreateFlags,const uint32_t geometryShaderStageCreateFlags,const uint32_t fragmentShaderStageCreateFlags,const uint32_t requiredSubgroupSize[5])3123 tcu::TestStatus vkt::subgroups::allStagesRequiredSubgroupSize(
3124     Context &context, vk::VkFormat format, const SSBOData *extraDatas, uint32_t extraDatasCount,
3125     const void *internalData, const VerificationFunctor &checkResult, const vk::VkShaderStageFlags shaderStageTested,
3126     const uint32_t vertexShaderStageCreateFlags, const uint32_t tessellationControlShaderStageCreateFlags,
3127     const uint32_t tessellationEvalShaderStageCreateFlags, const uint32_t geometryShaderStageCreateFlags,
3128     const uint32_t fragmentShaderStageCreateFlags, const uint32_t requiredSubgroupSize[5])
3129 {
3130     const DeviceInterface &vk = context.getDeviceInterface();
3131     const VkDevice device     = context.getDevice();
3132     const uint32_t maxWidth   = getMaxWidth();
3133     vector<VkShaderStageFlagBits> stagesVector;
3134     VkShaderStageFlags shaderStageRequired = (VkShaderStageFlags)0ull;
3135 
3136     Move<VkShaderModule> vertexShaderModule;
3137     Move<VkShaderModule> teCtrlShaderModule;
3138     Move<VkShaderModule> teEvalShaderModule;
3139     Move<VkShaderModule> geometryShaderModule;
3140     Move<VkShaderModule> fragmentShaderModule;
3141 
3142     if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
3143     {
3144         stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
3145     }
3146     if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3147     {
3148         stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
3149         shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ?
3150                                    (VkShaderStageFlags)0u :
3151                                    (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
3152         shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ?
3153                                    (VkShaderStageFlags)0u :
3154                                    (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3155     }
3156     if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3157     {
3158         stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
3159         shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ?
3160                                    (VkShaderStageFlags)0u :
3161                                    (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3162         shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ?
3163                                    (VkShaderStageFlags)0u :
3164                                    (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
3165     }
3166     if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
3167     {
3168         stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
3169         const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3170         shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags)0 : required;
3171     }
3172     if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3173     {
3174         const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3175         shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags)0 : required;
3176     }
3177 
3178     const uint32_t stagesCount = static_cast<uint32_t>(stagesVector.size());
3179     const string vert          = (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT) ? "vert_noSubgroup" : "vert";
3180     const string tesc = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? "tesc_noSubgroup" : "tesc";
3181     const string tese =
3182         (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? "tese_noSubgroup" : "tese";
3183 
3184     shaderStageRequired = shaderStageTested | shaderStageRequired;
3185 
3186     vertexShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(vert), 0u);
3187     if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3188     {
3189         teCtrlShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tesc), 0u);
3190         teEvalShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tese), 0u);
3191     }
3192     if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
3193     {
3194         if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3195         {
3196             // tessellation shaders output line primitives
3197             geometryShaderModule =
3198                 createShaderModule(vk, device, context.getBinaryCollection().get("geometry_lines"), 0u);
3199         }
3200         else
3201         {
3202             // otherwise points are processed by geometry shader
3203             geometryShaderModule =
3204                 createShaderModule(vk, device, context.getBinaryCollection().get("geometry_points"), 0u);
3205         }
3206     }
3207     if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
3208         fragmentShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u);
3209 
3210     std::vector<de::SharedPtr<BufferOrImage>> inputBuffers(stagesCount + extraDatasCount);
3211 
3212     DescriptorSetLayoutBuilder layoutBuilder;
3213 
3214     // The implicit result SSBO we use to store our outputs from the shader
3215     for (uint32_t ndx = 0u; ndx < stagesCount; ++ndx)
3216     {
3217         const VkDeviceSize shaderSize =
3218             (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
3219         const VkDeviceSize size = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
3220         inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
3221 
3222         layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx],
3223                                         getResultBinding(stagesVector[ndx]), DE_NULL);
3224     }
3225 
3226     for (uint32_t ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3227     {
3228         const uint32_t datasNdx = ndx - stagesCount;
3229         if (extraDatas[datasNdx].isImage())
3230         {
3231             inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(
3232                 context, static_cast<uint32_t>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
3233         }
3234         else
3235         {
3236             const auto usage = (extraDatas[datasNdx].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT :
3237                                                                VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
3238             const auto size  = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) *
3239                               extraDatas[datasNdx].numElements;
3240             inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
3241         }
3242 
3243         const Allocation &alloc = inputBuffers[ndx]->getAllocation();
3244         initializeMemory(context, alloc, extraDatas[datasNdx]);
3245 
3246         layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, extraDatas[datasNdx].stages,
3247                                         extraDatas[datasNdx].binding, DE_NULL);
3248     }
3249 
3250     const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3251 
3252     const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
3253 
3254     const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3255     const Unique<VkPipeline> pipeline(makeGraphicsPipeline(
3256         context, *pipelineLayout, shaderStageRequired, *vertexShaderModule, *fragmentShaderModule,
3257         *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule, *renderPass,
3258         (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST :
3259                                                                            VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3260         DE_NULL, DE_NULL, false, VK_FORMAT_R32G32B32A32_SFLOAT, vertexShaderStageCreateFlags,
3261         tessellationControlShaderStageCreateFlags, tessellationEvalShaderStageCreateFlags,
3262         geometryShaderStageCreateFlags, fragmentShaderStageCreateFlags, requiredSubgroupSize));
3263 
3264     Move<VkDescriptorPool> descriptorPool;
3265     Move<VkDescriptorSet> descriptorSet;
3266 
3267     if (inputBuffers.size() > 0)
3268     {
3269         DescriptorPoolBuilder poolBuilder;
3270 
3271         for (uint32_t ndx = 0u; ndx < static_cast<uint32_t>(inputBuffers.size()); ndx++)
3272         {
3273             poolBuilder.addType(inputBuffers[ndx]->getType());
3274         }
3275 
3276         descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3277 
3278         // Create descriptor set
3279         descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3280 
3281         DescriptorSetUpdateBuilder updateBuilder;
3282 
3283         for (uint32_t ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
3284         {
3285             uint32_t binding;
3286             if (ndx < stagesCount)
3287                 binding = getResultBinding(stagesVector[ndx]);
3288             else
3289                 binding = extraDatas[ndx - stagesCount].binding;
3290 
3291             if (inputBuffers[ndx]->isImage())
3292             {
3293                 VkDescriptorImageInfo info =
3294                     makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
3295                                             inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3296 
3297                 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding),
3298                                           inputBuffers[ndx]->getType(), &info);
3299             }
3300             else
3301             {
3302                 VkDescriptorBufferInfo info = makeDescriptorBufferInfo(
3303                     inputBuffers[ndx]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
3304 
3305                 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding),
3306                                           inputBuffers[ndx]->getType(), &info);
3307             }
3308         }
3309 
3310         updateBuilder.update(vk, device);
3311     }
3312 
3313     {
3314         const VkQueue queue             = context.getUniversalQueue();
3315         const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3316         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
3317         const uint32_t subgroupSize = getSubgroupSize(context);
3318         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
3319         unsigned totalIterations  = 0u;
3320         unsigned failedIterations = 0u;
3321         Image resultImage(context, maxWidth, 1, format,
3322                           VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3323         const Unique<VkFramebuffer> framebuffer(
3324             makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), maxWidth, 1u));
3325         const VkViewport viewport              = makeViewport(maxWidth, 1u);
3326         const VkRect2D scissor                 = makeRect2D(maxWidth, 1u);
3327         const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3328         Buffer imageBufferResult(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3329         const VkImageSubresourceRange subresourceRange = {
3330             VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags    aspectMask
3331             0u,                        //uint32_t                baseMipLevel
3332             1u,                        //uint32_t                levelCount
3333             0u,                        //uint32_t                baseArrayLayer
3334             1u                         //uint32_t                layerCount
3335         };
3336 
3337         const VkImageMemoryBarrier colorAttachmentBarrier =
3338             makeImageMemoryBarrier((VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
3339                                    VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, resultImage.getImage(), subresourceRange);
3340 
3341         for (uint32_t width = 1u; width < maxWidth; width = getNextWidth(width))
3342         {
3343             for (uint32_t ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3344             {
3345                 // re-init the data
3346                 const Allocation &alloc = inputBuffers[ndx]->getAllocation();
3347                 initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
3348             }
3349 
3350             totalIterations++;
3351 
3352             beginCommandBuffer(vk, *cmdBuffer);
3353 
3354             vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
3355                                   VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u,
3356                                   (const VkMemoryBarrier *)DE_NULL, 0u, (const VkBufferMemoryBarrier *)DE_NULL, 1u,
3357                                   &colorAttachmentBarrier);
3358 
3359             vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3360 
3361             vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3362 
3363             beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3364 
3365             vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3366 
3367             if (stagesCount + extraDatasCount > 0)
3368                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3369                                          &descriptorSet.get(), 0u, DE_NULL);
3370 
3371             vk.cmdDraw(*cmdBuffer, width, 1, 0, 0);
3372 
3373             endRenderPass(vk, *cmdBuffer);
3374 
3375             copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(),
3376                               tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3377                               VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3378 
3379             endCommandBuffer(vk, *cmdBuffer);
3380 
3381             submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3382 
3383             for (uint32_t ndx = 0u; ndx < stagesCount; ++ndx)
3384             {
3385                 std::vector<const void *> datas;
3386                 if (!inputBuffers[ndx]->isImage())
3387                 {
3388                     const Allocation &resultAlloc = inputBuffers[ndx]->getAllocation();
3389                     invalidateAlloc(vk, device, resultAlloc);
3390                     // we always have our result data first
3391                     datas.push_back(resultAlloc.getHostPtr());
3392                 }
3393 
3394                 for (uint32_t index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3395                 {
3396                     const uint32_t datasNdx = index - stagesCount;
3397                     if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
3398                     {
3399                         const Allocation &resultAlloc = inputBuffers[index]->getAllocation();
3400                         invalidateAlloc(vk, device, resultAlloc);
3401                         // we always have our result data first
3402                         datas.push_back(resultAlloc.getHostPtr());
3403                     }
3404                 }
3405 
3406                 // Any stage in the vertex pipeline may be called multiple times per vertex, so we may need >= non-strict comparisons.
3407                 const bool multiCall = (stagesVector[ndx] == VK_SHADER_STAGE_VERTEX_BIT ||
3408                                         stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT ||
3409                                         stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT ||
3410                                         stagesVector[ndx] == VK_SHADER_STAGE_GEOMETRY_BIT);
3411                 const uint32_t usedWidth =
3412                     ((stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width);
3413 
3414                 if (!checkResult(internalData, datas, usedWidth, subgroupSize, multiCall))
3415                     failedIterations++;
3416             }
3417             if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3418             {
3419                 std::vector<const void *> datas;
3420                 const Allocation &resultAlloc = imageBufferResult.getAllocation();
3421                 invalidateAlloc(vk, device, resultAlloc);
3422 
3423                 // we always have our result data first
3424                 datas.push_back(resultAlloc.getHostPtr());
3425 
3426                 for (uint32_t index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3427                 {
3428                     const uint32_t datasNdx = index - stagesCount;
3429                     if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
3430                     {
3431                         const Allocation &alloc = inputBuffers[index]->getAllocation();
3432                         invalidateAlloc(vk, device, alloc);
3433                         // we always have our result data first
3434                         datas.push_back(alloc.getHostPtr());
3435                     }
3436                 }
3437 
3438                 if (!checkResult(internalData, datas, width, subgroupSize, false))
3439                     failedIterations++;
3440             }
3441 
3442             context.resetCommandPoolForVKSC(device, *cmdPool);
3443         }
3444 
3445         if (0 < failedIterations)
3446         {
3447             unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3448 
3449             context.getTestContext().getLog() << TestLog::Message << valuesPassed << " / " << totalIterations
3450                                               << " values passed" << TestLog::EndMessage;
3451 
3452             return tcu::TestStatus::fail("Failed!");
3453         }
3454     }
3455 
3456     return tcu::TestStatus::pass("OK");
3457 }
3458 
makeVertexFrameBufferTest(Context & context,vk::VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,subgroups::CheckResult checkResult)3459 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest(Context &context, vk::VkFormat format,
3460                                                           const SSBOData *extraData, uint32_t extraDataCount,
3461                                                           const void *internalData, subgroups::CheckResult checkResult)
3462 {
3463     return makeVertexFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData,
3464                                                          checkResult, 0u, 0u);
3465 }
3466 
makeVertexFrameBufferTestRequiredSubgroupSize(Context & context,vk::VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const uint32_t vertexShaderStageCreateFlags,const uint32_t requiredSubgroupSize)3467 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTestRequiredSubgroupSize(
3468     Context &context, vk::VkFormat format, const SSBOData *extraData, uint32_t extraDataCount, const void *internalData,
3469     subgroups::CheckResult checkResult, const uint32_t vertexShaderStageCreateFlags,
3470     const uint32_t requiredSubgroupSize)
3471 {
3472     const DeviceInterface &vk       = context.getDeviceInterface();
3473     const VkDevice device           = context.getDevice();
3474     const VkQueue queue             = context.getUniversalQueue();
3475     const uint32_t maxWidth         = getMaxWidth();
3476     const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3477     vector<de::SharedPtr<BufferOrImage>> inputBuffers(extraDataCount);
3478     DescriptorSetLayoutBuilder layoutBuilder;
3479     const Unique<VkShaderModule> vertexShaderModule(
3480         createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3481     const Unique<VkShaderModule> fragmentShaderModule(
3482         createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3483     const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3484     const VkVertexInputBindingDescription vertexInputBinding = {
3485         0u,                                       // binding;
3486         static_cast<uint32_t>(sizeof(tcu::Vec4)), // stride;
3487         VK_VERTEX_INPUT_RATE_VERTEX               // inputRate
3488     };
3489     const VkVertexInputAttributeDescription vertexInputAttribute = {0u, 0u, VK_FORMAT_R32G32B32A32_SFLOAT, 0u};
3490 
3491     for (uint32_t i = 0u; i < extraDataCount; i++)
3492     {
3493         if (extraData[i].isImage())
3494         {
3495             inputBuffers[i] = de::SharedPtr<BufferOrImage>(
3496                 new Image(context, static_cast<uint32_t>(extraData[i].numElements), 1u, extraData[i].format));
3497         }
3498         else
3499         {
3500             DE_ASSERT(extraData[i].isUBO());
3501             vk::VkDeviceSize size =
3502                 getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
3503             inputBuffers[i] =
3504                 de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3505         }
3506         const Allocation &alloc = inputBuffers[i]->getAllocation();
3507         initializeMemory(context, alloc, extraData[i]);
3508     }
3509 
3510     for (uint32_t ndx = 0u; ndx < extraDataCount; ndx++)
3511         layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
3512 
3513     const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3514 
3515     const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
3516 
3517     const uint32_t requiredSubgroupSizes[5] = {requiredSubgroupSize, 0u, 0u, 0u, 0u};
3518     const Unique<VkPipeline> pipeline(makeGraphicsPipeline(
3519         context, *pipelineLayout, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, *vertexShaderModule,
3520         *fragmentShaderModule, DE_NULL, DE_NULL, DE_NULL, *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3521         &vertexInputBinding, &vertexInputAttribute, true, format, vertexShaderStageCreateFlags, 0u, 0u, 0u, 0u,
3522         requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3523     DescriptorPoolBuilder poolBuilder;
3524     DescriptorSetUpdateBuilder updateBuilder;
3525 
3526     for (uint32_t ndx = 0u; ndx < inputBuffers.size(); ndx++)
3527         poolBuilder.addType(inputBuffers[ndx]->getType());
3528 
3529     Move<VkDescriptorPool> descriptorPool;
3530     Move<VkDescriptorSet> descriptorSet;
3531 
3532     if (extraDataCount > 0)
3533     {
3534         descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3535         descriptorSet  = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3536     }
3537 
3538     for (uint32_t ndx = 0u; ndx < extraDataCount; ndx++)
3539     {
3540         const Allocation &alloc = inputBuffers[ndx]->getAllocation();
3541         initializeMemory(context, alloc, extraData[ndx]);
3542     }
3543 
3544     for (uint32_t buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
3545     {
3546         if (inputBuffers[buffersNdx]->isImage())
3547         {
3548             VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
3549                                                                  inputBuffers[buffersNdx]->getAsImage()->getImageView(),
3550                                                                  VK_IMAGE_LAYOUT_GENERAL);
3551 
3552             updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3553                                       inputBuffers[buffersNdx]->getType(), &info);
3554         }
3555         else
3556         {
3557             VkDescriptorBufferInfo info =
3558                 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(), 0ull,
3559                                          inputBuffers[buffersNdx]->getAsBuffer()->getSize());
3560 
3561             updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3562                                       inputBuffers[buffersNdx]->getType(), &info);
3563         }
3564     }
3565     updateBuilder.update(vk, device);
3566 
3567     const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
3568 
3569     const uint32_t subgroupSize = getSubgroupSize(context);
3570 
3571     const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
3572 
3573     const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
3574     Buffer vertexBuffer(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
3575 
3576     unsigned totalIterations  = 0u;
3577     unsigned failedIterations = 0u;
3578 
3579     Image discardableImage(context, maxWidth, 1u, format,
3580                            VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3581 
3582     {
3583         const Allocation &alloc = vertexBuffer.getAllocation();
3584         std::vector<tcu::Vec4> data(maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3585         const float pixelSize  = 2.0f / static_cast<float>(maxWidth);
3586         float leftHandPosition = -1.0f;
3587 
3588         for (uint32_t ndx = 0u; ndx < maxWidth; ++ndx)
3589         {
3590             data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3591             leftHandPosition += pixelSize;
3592         }
3593 
3594         deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3595         flushAlloc(vk, device, alloc);
3596     }
3597 
3598     const Unique<VkFramebuffer> framebuffer(
3599         makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3600     const VkViewport viewport              = makeViewport(maxWidth, 1u);
3601     const VkRect2D scissor                 = makeRect2D(maxWidth, 1u);
3602     const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3603     Buffer imageBufferResult(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3604     const VkDeviceSize vertexBufferOffset = 0u;
3605 
3606     for (uint32_t width = 1u; width < maxWidth; width = getNextWidth(width))
3607     {
3608         totalIterations++;
3609 
3610         for (uint32_t ndx = 0u; ndx < inputBuffers.size(); ndx++)
3611         {
3612             const Allocation &alloc = inputBuffers[ndx]->getAllocation();
3613             initializeMemory(context, alloc, extraData[ndx]);
3614         }
3615 
3616         beginCommandBuffer(vk, *cmdBuffer);
3617         {
3618             vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3619 
3620             vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3621 
3622             beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3623 
3624             vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3625 
3626             if (extraDataCount > 0)
3627             {
3628                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3629                                          &descriptorSet.get(), 0u, DE_NULL);
3630             }
3631 
3632             vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3633 
3634             vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3635 
3636             endRenderPass(vk, *cmdBuffer);
3637 
3638             copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(),
3639                               tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3640                               VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3641 
3642             endCommandBuffer(vk, *cmdBuffer);
3643 
3644             submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3645         }
3646         context.resetCommandPoolForVKSC(device, *cmdPool);
3647 
3648         {
3649             const Allocation &allocResult = imageBufferResult.getAllocation();
3650             invalidateAlloc(vk, device, allocResult);
3651 
3652             std::vector<const void *> datas;
3653             datas.push_back(allocResult.getHostPtr());
3654             if (!checkResult(internalData, datas, width, subgroupSize))
3655                 failedIterations++;
3656         }
3657     }
3658 
3659     if (0 < failedIterations)
3660     {
3661         unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3662 
3663         context.getTestContext().getLog()
3664             << TestLog::Message << valuesPassed << " / " << totalIterations << " values passed" << TestLog::EndMessage;
3665 
3666         return tcu::TestStatus::fail("Failed!");
3667     }
3668 
3669     return tcu::TestStatus::pass("OK");
3670 }
3671 
makeFragmentFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraDatas,uint32_t extraDatasCount,const void * internalData,CheckResultFragment checkResult)3672 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest(Context &context, VkFormat format,
3673                                                             const SSBOData *extraDatas, uint32_t extraDatasCount,
3674                                                             const void *internalData, CheckResultFragment checkResult)
3675 {
3676     return makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, extraDatas, extraDatasCount, internalData,
3677                                                            checkResult, 0u, 0u);
3678 }
3679 
makeFragmentFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraDatas,uint32_t extraDatasCount,const void * internalData,CheckResultFragment checkResult,const uint32_t fragmentShaderStageCreateFlags,const uint32_t requiredSubgroupSize)3680 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize(
3681     Context &context, VkFormat format, const SSBOData *extraDatas, uint32_t extraDatasCount, const void *internalData,
3682     CheckResultFragment checkResult, const uint32_t fragmentShaderStageCreateFlags, const uint32_t requiredSubgroupSize)
3683 {
3684     const DeviceInterface &vk       = context.getDeviceInterface();
3685     const VkDevice device           = context.getDevice();
3686     const VkQueue queue             = context.getUniversalQueue();
3687     const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3688     const Unique<VkShaderModule> vertexShaderModule(
3689         createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3690     const Unique<VkShaderModule> fragmentShaderModule(
3691         createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3692     std::vector<de::SharedPtr<BufferOrImage>> inputBuffers(extraDatasCount);
3693 
3694     for (uint32_t i = 0; i < extraDatasCount; i++)
3695     {
3696         if (extraDatas[i].isImage())
3697         {
3698             inputBuffers[i] = de::SharedPtr<BufferOrImage>(
3699                 new Image(context, static_cast<uint32_t>(extraDatas[i].numElements), 1, extraDatas[i].format));
3700         }
3701         else
3702         {
3703             DE_ASSERT(extraDatas[i].isUBO());
3704 
3705             const vk::VkDeviceSize size =
3706                 getElementSizeInBytes(extraDatas[i].format, extraDatas[i].layout) * extraDatas[i].numElements;
3707 
3708             inputBuffers[i] =
3709                 de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3710         }
3711 
3712         const Allocation &alloc = inputBuffers[i]->getAllocation();
3713 
3714         initializeMemory(context, alloc, extraDatas[i]);
3715     }
3716 
3717     DescriptorSetLayoutBuilder layoutBuilder;
3718 
3719     for (uint32_t i = 0; i < extraDatasCount; i++)
3720     {
3721         layoutBuilder.addBinding(inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
3722     }
3723 
3724     const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3725     const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
3726     const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3727     const uint32_t requiredSubgroupSizes[5] = {0u, 0u, 0u, 0u, requiredSubgroupSize};
3728     const Unique<VkPipeline> pipeline(makeGraphicsPipeline(
3729         context, *pipelineLayout, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, *vertexShaderModule,
3730         *fragmentShaderModule, DE_NULL, DE_NULL, DE_NULL, *renderPass, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, DE_NULL,
3731         DE_NULL, true, VK_FORMAT_R32G32B32A32_SFLOAT, 0u, 0u, 0u, 0u, fragmentShaderStageCreateFlags,
3732         requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3733     DescriptorPoolBuilder poolBuilder;
3734 
3735     // To stop validation complaining, always add at least one type to pool.
3736     poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3737     for (uint32_t i = 0; i < extraDatasCount; i++)
3738     {
3739         poolBuilder.addType(inputBuffers[i]->getType());
3740     }
3741 
3742     Move<VkDescriptorPool> descriptorPool;
3743     // Create descriptor set
3744     Move<VkDescriptorSet> descriptorSet;
3745 
3746     if (extraDatasCount > 0)
3747     {
3748         descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3749 
3750         descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3751     }
3752 
3753     DescriptorSetUpdateBuilder updateBuilder;
3754 
3755     for (uint32_t i = 0; i < extraDatasCount; i++)
3756     {
3757         if (inputBuffers[i]->isImage())
3758         {
3759             const VkDescriptorImageInfo info =
3760                 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
3761                                         inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3762 
3763             updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i),
3764                                       inputBuffers[i]->getType(), &info);
3765         }
3766         else
3767         {
3768             const VkDescriptorBufferInfo info = makeDescriptorBufferInfo(
3769                 inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[i]->getAsBuffer()->getSize());
3770 
3771             updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i),
3772                                       inputBuffers[i]->getType(), &info);
3773         }
3774     }
3775 
3776     if (extraDatasCount > 0)
3777         updateBuilder.update(vk, device);
3778 
3779     const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
3780     const uint32_t subgroupSize = getSubgroupSize(context);
3781     const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
3782     unsigned totalIterations  = 0;
3783     unsigned failedIterations = 0;
3784 
3785     for (uint32_t width = 8; width <= subgroupSize; width *= 2)
3786     {
3787         for (uint32_t height = 8; height <= subgroupSize; height *= 2)
3788         {
3789             totalIterations++;
3790 
3791             // re-init the data
3792             for (uint32_t i = 0; i < extraDatasCount; i++)
3793             {
3794                 const Allocation &alloc = inputBuffers[i]->getAllocation();
3795 
3796                 initializeMemory(context, alloc, extraDatas[i]);
3797             }
3798 
3799             const VkDeviceSize formatSize             = getFormatSizeInBytes(format);
3800             const VkDeviceSize resultImageSizeInBytes = width * height * formatSize;
3801             Image resultImage(context, width, height, format,
3802                               VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3803             Buffer resultBuffer(context, resultImageSizeInBytes, VK_IMAGE_USAGE_TRANSFER_DST_BIT);
3804             const Unique<VkFramebuffer> framebuffer(
3805                 makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), width, height));
3806             VkViewport viewport = makeViewport(width, height);
3807             VkRect2D scissor    = {{0, 0}, {width, height}};
3808 
3809             beginCommandBuffer(vk, *cmdBuffer);
3810 
3811             vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3812 
3813             vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3814 
3815             beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height),
3816                             tcu::Vec4(0.0f));
3817 
3818             vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3819 
3820             if (extraDatasCount > 0)
3821                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3822                                          &descriptorSet.get(), 0u, DE_NULL);
3823 
3824             vk.cmdDraw(*cmdBuffer, 4, 1, 0, 0);
3825 
3826             endRenderPass(vk, *cmdBuffer);
3827 
3828             copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(),
3829                               tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3830                               VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3831 
3832             endCommandBuffer(vk, *cmdBuffer);
3833 
3834             submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3835 
3836             std::vector<const void *> datas;
3837             {
3838                 const Allocation &resultAlloc = resultBuffer.getAllocation();
3839                 invalidateAlloc(vk, device, resultAlloc);
3840 
3841                 // we always have our result data first
3842                 datas.push_back(resultAlloc.getHostPtr());
3843             }
3844 
3845             if (!checkResult(internalData, datas, width, height, subgroupSize))
3846             {
3847                 failedIterations++;
3848             }
3849 
3850             context.resetCommandPoolForVKSC(device, *cmdPool);
3851         }
3852     }
3853 
3854     if (0 < failedIterations)
3855     {
3856         unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3857 
3858         context.getTestContext().getLog()
3859             << TestLog::Message << valuesPassed << " / " << totalIterations << " values passed" << TestLog::EndMessage;
3860 
3861         return tcu::TestStatus::fail("Failed!");
3862     }
3863 
3864     return tcu::TestStatus::pass("OK");
3865 }
3866 
makeComputePipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule shaderModule,const uint32_t pipelineShaderStageFlags,const uint32_t pipelineCreateFlags,VkPipeline basePipelineHandle,uint32_t localSizeX,uint32_t localSizeY,uint32_t localSizeZ,uint32_t requiredSubgroupSize)3867 Move<VkPipeline> makeComputePipeline(Context &context, const VkPipelineLayout pipelineLayout,
3868                                      const VkShaderModule shaderModule, const uint32_t pipelineShaderStageFlags,
3869                                      const uint32_t pipelineCreateFlags, VkPipeline basePipelineHandle,
3870                                      uint32_t localSizeX, uint32_t localSizeY, uint32_t localSizeZ,
3871                                      uint32_t requiredSubgroupSize)
3872 {
3873     const uint32_t localSize[3]                   = {localSizeX, localSizeY, localSizeZ};
3874     const vk::VkSpecializationMapEntry entries[3] = {
3875         {0, sizeof(uint32_t) * 0, sizeof(uint32_t)},
3876         {1, sizeof(uint32_t) * 1, sizeof(uint32_t)},
3877         {2, static_cast<uint32_t>(sizeof(uint32_t) * 2), sizeof(uint32_t)},
3878     };
3879     const vk::VkSpecializationInfo info                                                     = {/* mapEntryCount = */ 3,
3880                                            /* pMapEntries   = */ entries,
3881                                            /* dataSize      = */ sizeof(localSize),
3882                                            /* pData         = */ localSize};
3883     const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeCreateInfo = {
3884         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, // VkStructureType    sType;
3885         DE_NULL,                                                                        // void*              pNext;
3886         requiredSubgroupSize // uint32_t           requiredSubgroupSize;
3887     };
3888     const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams = {
3889         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,              // VkStructureType sType;
3890         (requiredSubgroupSize != 0u ? &subgroupSizeCreateInfo : DE_NULL), // const void* pNext;
3891         pipelineShaderStageFlags,                                         // VkPipelineShaderStageCreateFlags flags;
3892         VK_SHADER_STAGE_COMPUTE_BIT,                                      // VkShaderStageFlagBits stage;
3893         shaderModule,                                                     // VkShaderModule module;
3894         "main",                                                           // const char* pName;
3895         &info, // const VkSpecializationInfo* pSpecializationInfo;
3896     };
3897     const vk::VkComputePipelineCreateInfo pipelineCreateInfo = {
3898         VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
3899         DE_NULL,                                        // const void* pNext;
3900         pipelineCreateFlags,                            // VkPipelineCreateFlags flags;
3901         pipelineShaderStageParams,                      // VkPipelineShaderStageCreateInfo stage;
3902         pipelineLayout,                                 // VkPipelineLayout layout;
3903 #ifndef CTS_USES_VULKANSC
3904         basePipelineHandle, // VkPipeline basePipelineHandle;
3905         -1,                 // int32_t basePipelineIndex;
3906 #else
3907         DE_NULL, // VkPipeline basePipelineHandle;
3908         0,       // int32_t basePipelineIndex;
3909 #endif // CTS_USES_VULKANSC
3910     };
3911     static_cast<void>(basePipelineHandle);
3912 
3913     return createComputePipeline(context.getDeviceInterface(), context.getDevice(), DE_NULL, &pipelineCreateInfo);
3914 }
3915 
3916 #ifndef CTS_USES_VULKANSC
makeMeshPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule taskModule,const VkShaderModule meshModule,const uint32_t pipelineShaderStageFlags,const uint32_t pipelineCreateFlags,VkPipeline basePipelineHandle,uint32_t localSizeX,uint32_t localSizeY,uint32_t localSizeZ,uint32_t requiredSubgroupSize,const VkRenderPass renderPass)3917 Move<VkPipeline> makeMeshPipeline(Context &context, const VkPipelineLayout pipelineLayout,
3918                                   const VkShaderModule taskModule, const VkShaderModule meshModule,
3919                                   const uint32_t pipelineShaderStageFlags, const uint32_t pipelineCreateFlags,
3920                                   VkPipeline basePipelineHandle, uint32_t localSizeX, uint32_t localSizeY,
3921                                   uint32_t localSizeZ, uint32_t requiredSubgroupSize, const VkRenderPass renderPass)
3922 {
3923     const uint32_t localSize[3]                   = {localSizeX, localSizeY, localSizeZ};
3924     const vk::VkSpecializationMapEntry entries[3] = {
3925         {0, sizeof(uint32_t) * 0, sizeof(uint32_t)},
3926         {1, sizeof(uint32_t) * 1, sizeof(uint32_t)},
3927         {2, static_cast<uint32_t>(sizeof(uint32_t) * 2), sizeof(uint32_t)},
3928     };
3929     const vk::VkSpecializationInfo info                                                     = {/* mapEntryCount = */ 3,
3930                                            /* pMapEntries   = */ entries,
3931                                            /* dataSize      = */ sizeof(localSize),
3932                                            /* pData         = */ localSize};
3933     const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeCreateInfo = {
3934         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, // VkStructureType    sType;
3935         DE_NULL,                                                                        // void*              pNext;
3936         requiredSubgroupSize // uint32_t           requiredSubgroupSize;
3937     };
3938 
3939     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *pSubgroupSizeCreateInfo =
3940         ((requiredSubgroupSize != 0u) ? &subgroupSizeCreateInfo : nullptr);
3941 
3942     std::vector<VkPipelineShaderStageCreateInfo> shaderStageParams;
3943     vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams = {
3944         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
3945         nullptr,                                             // const void* pNext;
3946         pipelineShaderStageFlags,                            // VkPipelineShaderStageCreateFlags flags;
3947         VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM,                  // VkShaderStageFlagBits stage;
3948         DE_NULL,                                             // VkShaderModule module;
3949         "main",                                              // const char* pName;
3950         &info,                                               // const VkSpecializationInfo* pSpecializationInfo;
3951     };
3952 
3953     if (taskModule != DE_NULL)
3954     {
3955         pipelineShaderStageParams.module = taskModule;
3956         pipelineShaderStageParams.pNext  = pSubgroupSizeCreateInfo;
3957         pipelineShaderStageParams.stage  = VK_SHADER_STAGE_TASK_BIT_EXT;
3958         shaderStageParams.push_back(pipelineShaderStageParams);
3959     }
3960 
3961     if (meshModule != DE_NULL)
3962     {
3963         pipelineShaderStageParams.module = meshModule;
3964         pipelineShaderStageParams.pNext  = ((taskModule == DE_NULL) ? pSubgroupSizeCreateInfo : nullptr);
3965         pipelineShaderStageParams.stage  = VK_SHADER_STAGE_MESH_BIT_EXT;
3966         shaderStageParams.push_back(pipelineShaderStageParams);
3967     }
3968 
3969     const std::vector<VkViewport> viewports(1u, makeViewport(1u, 1u));
3970     const std::vector<VkRect2D> scissors(1u, makeRect2D(1u, 1u));
3971 
3972     return makeGraphicsPipeline(context.getDeviceInterface(), context.getDevice(), basePipelineHandle, pipelineLayout,
3973                                 pipelineCreateFlags, shaderStageParams, renderPass, viewports, scissors);
3974 }
3975 #endif // CTS_USES_VULKANSC
3976 
makeComputeOrMeshTestRequiredSubgroupSize(ComputeLike testType,Context & context,VkFormat format,const vkt::subgroups::SSBOData * inputs,uint32_t inputsCount,const void * internalData,vkt::subgroups::CheckResultCompute checkResult,const uint32_t pipelineShaderStageCreateFlags,const uint32_t numWorkgroups[3],const bool isRequiredSubgroupSize,const uint32_t subgroupSize,const uint32_t localSizesToTest[][3],const uint32_t localSizesToTestCount)3977 tcu::TestStatus makeComputeOrMeshTestRequiredSubgroupSize(
3978     ComputeLike testType, Context &context, VkFormat format, const vkt::subgroups::SSBOData *inputs,
3979     uint32_t inputsCount, const void *internalData, vkt::subgroups::CheckResultCompute checkResult,
3980     const uint32_t pipelineShaderStageCreateFlags, const uint32_t numWorkgroups[3], const bool isRequiredSubgroupSize,
3981     const uint32_t subgroupSize, const uint32_t localSizesToTest[][3], const uint32_t localSizesToTestCount)
3982 {
3983     const DeviceInterface &vk       = context.getDeviceInterface();
3984     const VkDevice device           = context.getDevice();
3985     const VkQueue queue             = context.getUniversalQueue();
3986     const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3987 #ifndef CTS_USES_VULKANSC
3988     const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
3989         context.getSubgroupSizeControlProperties();
3990 #else
3991     const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
3992         context.getSubgroupSizeControlPropertiesEXT();
3993 #endif // CTS_USES_VULKANSC
3994     const VkDeviceSize elementSize     = getFormatSizeInBytes(format);
3995     const VkDeviceSize maxSubgroupSize = isRequiredSubgroupSize ? deMax32(subgroupSizeControlProperties.maxSubgroupSize,
3996                                                                           vkt::subgroups::maxSupportedSubgroupSize()) :
3997                                                                   vkt::subgroups::maxSupportedSubgroupSize();
3998     const VkDeviceSize resultBufferSize        = maxSubgroupSize * maxSubgroupSize * maxSubgroupSize;
3999     const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize;
4000     Buffer resultBuffer(context, resultBufferSizeInBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4001     std::vector<de::SharedPtr<BufferOrImage>> inputBuffers(inputsCount);
4002     const auto shaderStageFlags =
4003         ((testType == ComputeLike::COMPUTE) ? VK_SHADER_STAGE_COMPUTE_BIT
4004 #ifndef CTS_USES_VULKANSC
4005                                               :
4006                                               (VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT));
4007 #else
4008                                               :
4009                                               0);
4010 #endif // CTS_USES_VULKANSC
4011     const auto pipelineBindPoint =
4012         ((testType == ComputeLike::COMPUTE) ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS);
4013     const auto pipelineStage = ((testType == ComputeLike::COMPUTE) ?
4014                                     VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
4015 #ifndef CTS_USES_VULKANSC
4016                                     :
4017                                     (VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT | VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT));
4018 #else
4019                                     :
4020                                     0);
4021 #endif // CTS_USES_VULKANSC
4022     const auto renderArea = makeRect2D(1u, 1u);
4023 
4024     std::vector<tcu::UVec3> usedLocalSizes;
4025     for (uint32_t i = 0; i < localSizesToTestCount; ++i)
4026     {
4027         usedLocalSizes.push_back(tcu::UVec3(localSizesToTest[i][0], localSizesToTest[i][1], localSizesToTest[i][2]));
4028     }
4029 
4030     for (uint32_t i = 0; i < inputsCount; i++)
4031     {
4032         if (inputs[i].isImage())
4033         {
4034             inputBuffers[i] = de::SharedPtr<BufferOrImage>(
4035                 new Image(context, static_cast<uint32_t>(inputs[i].numElements), 1, inputs[i].format));
4036         }
4037         else
4038         {
4039             const auto usage =
4040                 (inputs[i].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4041             const auto size = getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
4042             inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
4043         }
4044 
4045         const Allocation &alloc = inputBuffers[i]->getAllocation();
4046 
4047         initializeMemory(context, alloc, inputs[i]);
4048     }
4049 
4050     DescriptorSetLayoutBuilder layoutBuilder;
4051     layoutBuilder.addBinding(resultBuffer.getType(), 1, shaderStageFlags, DE_NULL);
4052 
4053     for (uint32_t i = 0; i < inputsCount; i++)
4054     {
4055         layoutBuilder.addBinding(inputBuffers[i]->getType(), 1, shaderStageFlags, DE_NULL);
4056     }
4057 
4058     const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
4059 
4060     Move<VkShaderModule> compShader;
4061     Move<VkShaderModule> meshShader;
4062     Move<VkShaderModule> taskShader;
4063     const auto &binaries = context.getBinaryCollection();
4064 
4065     if (testType == ComputeLike::COMPUTE)
4066     {
4067         compShader = createShaderModule(vk, device, binaries.get("comp"));
4068     }
4069     else if (testType == ComputeLike::MESH)
4070     {
4071         meshShader = createShaderModule(vk, device, binaries.get("mesh"));
4072         if (binaries.contains("task"))
4073             taskShader = createShaderModule(vk, device, binaries.get("task"));
4074     }
4075     else
4076     {
4077         DE_ASSERT(false);
4078     }
4079 
4080     const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
4081 
4082     DescriptorPoolBuilder poolBuilder;
4083 
4084     poolBuilder.addType(resultBuffer.getType());
4085 
4086     for (uint32_t i = 0; i < inputsCount; i++)
4087     {
4088         poolBuilder.addType(inputBuffers[i]->getType());
4089     }
4090 
4091     const Unique<VkDescriptorPool> descriptorPool(
4092         poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
4093     const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
4094     const VkDescriptorBufferInfo resultDescriptorInfo =
4095         makeDescriptorBufferInfo(resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
4096     DescriptorSetUpdateBuilder updateBuilder;
4097 
4098     updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
4099                               VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
4100 
4101     for (uint32_t i = 0; i < inputsCount; i++)
4102     {
4103         if (inputBuffers[i]->isImage())
4104         {
4105             const VkDescriptorImageInfo info =
4106                 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
4107                                         inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
4108 
4109             updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i + 1),
4110                                       inputBuffers[i]->getType(), &info);
4111         }
4112         else
4113         {
4114             vk::VkDeviceSize size = getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
4115             VkDescriptorBufferInfo info =
4116                 makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
4117 
4118             updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i + 1),
4119                                       inputBuffers[i]->getType(), &info);
4120         }
4121     }
4122 
4123     updateBuilder.update(vk, device);
4124 
4125     const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
4126     unsigned totalIterations  = 0;
4127     unsigned failedIterations = 0;
4128     const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
4129     std::vector<de::SharedPtr<Move<VkPipeline>>> pipelines(localSizesToTestCount);
4130     const auto reqSubgroupSize = (isRequiredSubgroupSize ? subgroupSize : 0u);
4131     Move<VkRenderPass> renderPass;
4132     Move<VkFramebuffer> framebuffer;
4133 
4134     if (testType == ComputeLike::MESH)
4135     {
4136         renderPass  = makeRenderPass(vk, device);
4137         framebuffer = makeFramebuffer(vk, device, renderPass.get(), 0u, nullptr, renderArea.extent.width,
4138                                       renderArea.extent.height);
4139     }
4140 
4141     context.getTestContext().touchWatchdog();
4142     {
4143         if (testType == ComputeLike::COMPUTE)
4144         {
4145             pipelines[0] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(
4146                 makeComputePipeline(context, *pipelineLayout, *compShader, pipelineShaderStageCreateFlags,
4147 #ifndef CTS_USES_VULKANSC
4148                                     VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT,
4149 #else
4150                                     0u,
4151 #endif // CTS_USES_VULKANSC
4152                                     (VkPipeline)DE_NULL, usedLocalSizes[0][0], usedLocalSizes[0][1],
4153                                     usedLocalSizes[0][2], reqSubgroupSize)));
4154         }
4155 #ifndef CTS_USES_VULKANSC
4156         else if (testType == ComputeLike::MESH)
4157         {
4158             pipelines[0] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeMeshPipeline(
4159                 context, pipelineLayout.get(), taskShader.get(), meshShader.get(), pipelineShaderStageCreateFlags,
4160                 VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT, DE_NULL, usedLocalSizes[0][0], usedLocalSizes[0][1],
4161                 usedLocalSizes[0][2], reqSubgroupSize, renderPass.get())));
4162         }
4163 #endif // CTS_USES_VULKANSC
4164         else
4165         {
4166             DE_ASSERT(false);
4167         }
4168     }
4169     context.getTestContext().touchWatchdog();
4170 
4171     for (uint32_t index = 1; index < (localSizesToTestCount - 1); index++)
4172     {
4173         const uint32_t nextX = usedLocalSizes[index][0];
4174         const uint32_t nextY = usedLocalSizes[index][1];
4175         const uint32_t nextZ = usedLocalSizes[index][2];
4176 
4177         context.getTestContext().touchWatchdog();
4178         {
4179             if (testType == ComputeLike::COMPUTE)
4180             {
4181                 pipelines[index] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(
4182                     makeComputePipeline(context, *pipelineLayout, *compShader, pipelineShaderStageCreateFlags,
4183 #ifndef CTS_USES_VULKANSC
4184                                         VK_PIPELINE_CREATE_DERIVATIVE_BIT,
4185 #else
4186                                         0u,
4187 #endif // CTS_USES_VULKANSC
4188                                         **pipelines[0], nextX, nextY, nextZ, reqSubgroupSize)));
4189             }
4190 #ifndef CTS_USES_VULKANSC
4191             else if (testType == ComputeLike::MESH)
4192             {
4193                 pipelines[index] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeMeshPipeline(
4194                     context, pipelineLayout.get(), taskShader.get(), meshShader.get(), pipelineShaderStageCreateFlags,
4195                     VK_PIPELINE_CREATE_DERIVATIVE_BIT, pipelines[0].get()->get(), nextX, nextY, nextZ, reqSubgroupSize,
4196                     renderPass.get())));
4197             }
4198 #endif // CTS_USES_VULKANSC
4199             else
4200             {
4201                 DE_ASSERT(false);
4202             }
4203         }
4204         context.getTestContext().touchWatchdog();
4205     }
4206 
4207     for (uint32_t index = 0; index < (localSizesToTestCount - 1); index++)
4208     {
4209         // we are running one test
4210         totalIterations++;
4211 
4212         beginCommandBuffer(vk, *cmdBuffer);
4213         {
4214             if (testType == ComputeLike::MESH)
4215                 beginRenderPass(vk, *cmdBuffer, renderPass.get(), framebuffer.get(), renderArea);
4216 
4217             vk.cmdBindPipeline(*cmdBuffer, pipelineBindPoint, **pipelines[index]);
4218 
4219             vk.cmdBindDescriptorSets(*cmdBuffer, pipelineBindPoint, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u,
4220                                      DE_NULL);
4221 
4222             if (testType == ComputeLike::COMPUTE)
4223                 vk.cmdDispatch(*cmdBuffer, numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
4224 #ifndef CTS_USES_VULKANSC
4225             else if (testType == ComputeLike::MESH)
4226                 vk.cmdDrawMeshTasksEXT(*cmdBuffer, numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
4227                 //vk.cmdDrawMeshTasksNV(*cmdBuffer, numWorkgroups[0], 0);
4228 #endif // CTS_USES_VULKANSC
4229             else
4230                 DE_ASSERT(false);
4231 
4232             if (testType == ComputeLike::MESH)
4233                 endRenderPass(vk, *cmdBuffer);
4234         }
4235 
4236         // Make shader writes available.
4237         const auto postShaderBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
4238         vk.cmdPipelineBarrier(*cmdBuffer, pipelineStage, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postShaderBarrier, 0u,
4239                               nullptr, 0u, nullptr);
4240 
4241         endCommandBuffer(vk, *cmdBuffer);
4242 
4243         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4244 
4245         std::vector<const void *> datas;
4246 
4247         {
4248             const Allocation &resultAlloc = resultBuffer.getAllocation();
4249             invalidateAlloc(vk, device, resultAlloc);
4250 
4251             // we always have our result data first
4252             datas.push_back(resultAlloc.getHostPtr());
4253         }
4254 
4255         for (uint32_t i = 0; i < inputsCount; i++)
4256         {
4257             if (!inputBuffers[i]->isImage())
4258             {
4259                 const Allocation &resultAlloc = inputBuffers[i]->getAllocation();
4260                 invalidateAlloc(vk, device, resultAlloc);
4261 
4262                 // we always have our result data first
4263                 datas.push_back(resultAlloc.getHostPtr());
4264             }
4265         }
4266 
4267         if (!checkResult(internalData, datas, numWorkgroups, usedLocalSizes[index].getPtr(), subgroupSize))
4268         {
4269             failedIterations++;
4270         }
4271 
4272         context.resetCommandPoolForVKSC(device, *cmdPool);
4273     }
4274 
4275     if (0 < failedIterations)
4276     {
4277         unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
4278 
4279         context.getTestContext().getLog()
4280             << TestLog::Message << valuesPassed << " / " << totalIterations << " values passed" << TestLog::EndMessage;
4281 
4282         return tcu::TestStatus::fail("Failed!");
4283     }
4284 
4285     return tcu::TestStatus::pass("OK");
4286 }
4287 
makeComputeTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * inputs,uint32_t inputsCount,const void * internalData,CheckResultCompute checkResult,const uint32_t pipelineShaderStageCreateFlags,const uint32_t numWorkgroups[3],const bool isRequiredSubgroupSize,const uint32_t subgroupSize,const uint32_t localSizesToTest[][3],const uint32_t localSizesToTestCount)4288 tcu::TestStatus vkt::subgroups::makeComputeTestRequiredSubgroupSize(
4289     Context &context, VkFormat format, const SSBOData *inputs, uint32_t inputsCount, const void *internalData,
4290     CheckResultCompute checkResult, const uint32_t pipelineShaderStageCreateFlags, const uint32_t numWorkgroups[3],
4291     const bool isRequiredSubgroupSize, const uint32_t subgroupSize, const uint32_t localSizesToTest[][3],
4292     const uint32_t localSizesToTestCount)
4293 {
4294     return makeComputeOrMeshTestRequiredSubgroupSize(ComputeLike::COMPUTE, context, format, inputs, inputsCount,
4295                                                      internalData, checkResult, pipelineShaderStageCreateFlags,
4296                                                      numWorkgroups, isRequiredSubgroupSize, subgroupSize,
4297                                                      localSizesToTest, localSizesToTestCount);
4298 }
4299 
makeMeshTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * inputs,uint32_t inputsCount,const void * internalData,CheckResultCompute checkResult,const uint32_t pipelineShaderStageCreateFlags,const uint32_t numWorkgroups[3],const bool isRequiredSubgroupSize,const uint32_t subgroupSize,const uint32_t localSizesToTest[][3],const uint32_t localSizesToTestCount)4300 tcu::TestStatus vkt::subgroups::makeMeshTestRequiredSubgroupSize(
4301     Context &context, VkFormat format, const SSBOData *inputs, uint32_t inputsCount, const void *internalData,
4302     CheckResultCompute checkResult, const uint32_t pipelineShaderStageCreateFlags, const uint32_t numWorkgroups[3],
4303     const bool isRequiredSubgroupSize, const uint32_t subgroupSize, const uint32_t localSizesToTest[][3],
4304     const uint32_t localSizesToTestCount)
4305 {
4306     return makeComputeOrMeshTestRequiredSubgroupSize(ComputeLike::MESH, context, format, inputs, inputsCount,
4307                                                      internalData, checkResult, pipelineShaderStageCreateFlags,
4308                                                      numWorkgroups, isRequiredSubgroupSize, subgroupSize,
4309                                                      localSizesToTest, localSizesToTestCount);
4310 }
4311 
makeComputeOrMeshTest(ComputeLike testType,Context & context,VkFormat format,const vkt::subgroups::SSBOData * inputs,uint32_t inputsCount,const void * internalData,vkt::subgroups::CheckResultCompute checkResult,uint32_t requiredSubgroupSize,const uint32_t pipelineShaderStageCreateFlags)4312 tcu::TestStatus makeComputeOrMeshTest(ComputeLike testType, Context &context, VkFormat format,
4313                                       const vkt::subgroups::SSBOData *inputs, uint32_t inputsCount,
4314                                       const void *internalData, vkt::subgroups::CheckResultCompute checkResult,
4315                                       uint32_t requiredSubgroupSize, const uint32_t pipelineShaderStageCreateFlags)
4316 {
4317     const uint32_t numWorkgroups[3]   = {4, 2, 2};
4318     const bool isRequiredSubgroupSize = (requiredSubgroupSize != 0u);
4319     const uint32_t subgroupSize =
4320         (isRequiredSubgroupSize ? requiredSubgroupSize : vkt::subgroups::getSubgroupSize(context));
4321 
4322     const uint32_t localSizesToTestCount                = 8;
4323     uint32_t localSizesToTest[localSizesToTestCount][3] = {
4324         {1, 1, 1}, {subgroupSize, 1, 1}, {1, subgroupSize, 1}, {1, 1, subgroupSize}, {32, 4, 1}, {1, 4, 32}, {3, 5, 7},
4325         {1, 1, 1} // Isn't used, just here to make double buffering checks easier
4326     };
4327 
4328     if (testType == ComputeLike::COMPUTE)
4329         return makeComputeTestRequiredSubgroupSize(
4330             context, format, inputs, inputsCount, internalData, checkResult, pipelineShaderStageCreateFlags,
4331             numWorkgroups, isRequiredSubgroupSize, subgroupSize, localSizesToTest, localSizesToTestCount);
4332     else
4333         return makeMeshTestRequiredSubgroupSize(context, format, inputs, inputsCount, internalData, checkResult,
4334                                                 pipelineShaderStageCreateFlags, numWorkgroups, isRequiredSubgroupSize,
4335                                                 subgroupSize, localSizesToTest, localSizesToTestCount);
4336 }
4337 
makeComputeTest(Context & context,VkFormat format,const SSBOData * inputs,uint32_t inputsCount,const void * internalData,CheckResultCompute checkResult,uint32_t requiredSubgroupSize,const uint32_t pipelineShaderStageCreateFlags)4338 tcu::TestStatus vkt::subgroups::makeComputeTest(Context &context, VkFormat format, const SSBOData *inputs,
4339                                                 uint32_t inputsCount, const void *internalData,
4340                                                 CheckResultCompute checkResult, uint32_t requiredSubgroupSize,
4341                                                 const uint32_t pipelineShaderStageCreateFlags)
4342 {
4343     return makeComputeOrMeshTest(ComputeLike::COMPUTE, context, format, inputs, inputsCount, internalData, checkResult,
4344                                  requiredSubgroupSize, pipelineShaderStageCreateFlags);
4345 }
4346 
makeMeshTest(Context & context,VkFormat format,const SSBOData * inputs,uint32_t inputsCount,const void * internalData,CheckResultCompute checkResult,uint32_t requiredSubgroupSize,const uint32_t pipelineShaderStageCreateFlags)4347 tcu::TestStatus vkt::subgroups::makeMeshTest(Context &context, VkFormat format, const SSBOData *inputs,
4348                                              uint32_t inputsCount, const void *internalData,
4349                                              CheckResultCompute checkResult, uint32_t requiredSubgroupSize,
4350                                              const uint32_t pipelineShaderStageCreateFlags)
4351 {
4352     return makeComputeOrMeshTest(ComputeLike::MESH, context, format, inputs, inputsCount, internalData, checkResult,
4353                                  requiredSubgroupSize, pipelineShaderStageCreateFlags);
4354 }
4355 
checkShaderStageSetValidity(const VkShaderStageFlags shaderStages)4356 static inline void checkShaderStageSetValidity(const VkShaderStageFlags shaderStages)
4357 {
4358     if (shaderStages == 0)
4359         TCU_THROW(InternalError, "Shader stage is not specified");
4360 
4361     // It can actually be only 1 or 0.
4362     const uint32_t exclusivePipelinesCount =
4363         (isAllComputeStages(shaderStages) ? 1 : 0) + (isAllGraphicsStages(shaderStages) ? 1 : 0)
4364 #ifndef CTS_USES_VULKANSC
4365         + (isAllRayTracingStages(shaderStages) ? 1 : 0) + (isAllMeshShadingStages(shaderStages) ? 1 : 0)
4366 #endif // CTS_USES_VULKANSC
4367         ;
4368 
4369     if (exclusivePipelinesCount != 1)
4370         TCU_THROW(InternalError, "Mix of shaders from different pipelines is detected");
4371 }
4372 
supportedCheckShader(Context & context,const VkShaderStageFlags shaderStages)4373 void vkt::subgroups::supportedCheckShader(Context &context, const VkShaderStageFlags shaderStages)
4374 {
4375     checkShaderStageSetValidity(shaderStages);
4376 
4377     if ((context.getSubgroupProperties().supportedStages & shaderStages) == 0)
4378     {
4379         if (isAllComputeStages(shaderStages))
4380             TCU_FAIL("Compute shader is required to support subgroup operations");
4381         else
4382             TCU_THROW(NotSupportedError, "Subgroup support is not available for test shader stage(s)");
4383     }
4384 
4385 #ifndef CTS_USES_VULKANSC
4386     if ((VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) & shaderStages &&
4387         context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
4388         !context.getPortabilitySubsetFeatures().tessellationIsolines)
4389     {
4390         TCU_THROW(NotSupportedError,
4391                   "VK_KHR_portability_subset: Tessellation iso lines are not supported by this implementation");
4392     }
4393 #endif // CTS_USES_VULKANSC
4394 }
4395 
4396 namespace vkt
4397 {
4398 namespace subgroups
4399 {
4400 typedef std::vector<de::SharedPtr<BufferOrImage>> vectorBufferOrImage;
4401 
4402 enum ShaderGroups
4403 {
4404     FIRST_GROUP  = 0,
4405     RAYGEN_GROUP = FIRST_GROUP,
4406     MISS_GROUP,
4407     HIT_GROUP,
4408     CALL_GROUP,
4409     GROUP_COUNT
4410 };
4411 
getAllRayTracingFormats()4412 const std::vector<vk::VkFormat> getAllRayTracingFormats()
4413 {
4414     std::vector<VkFormat> formats;
4415 
4416     formats.push_back(VK_FORMAT_R8G8B8_SINT);
4417     formats.push_back(VK_FORMAT_R8_UINT);
4418     formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
4419     formats.push_back(VK_FORMAT_R16G16B16_SINT);
4420     formats.push_back(VK_FORMAT_R16_UINT);
4421     formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
4422     formats.push_back(VK_FORMAT_R32G32B32_SINT);
4423     formats.push_back(VK_FORMAT_R32_UINT);
4424     formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
4425     formats.push_back(VK_FORMAT_R64G64B64_SINT);
4426     formats.push_back(VK_FORMAT_R64_UINT);
4427     formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
4428     formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
4429     formats.push_back(VK_FORMAT_R32_SFLOAT);
4430     formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
4431     formats.push_back(VK_FORMAT_R64_SFLOAT);
4432     formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
4433     formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
4434     formats.push_back(VK_FORMAT_R8_USCALED);
4435     formats.push_back(VK_FORMAT_R8G8_USCALED);
4436     formats.push_back(VK_FORMAT_R8G8B8_USCALED);
4437     formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
4438 
4439     return formats;
4440 }
4441 
addRayTracingNoSubgroupShader(SourceCollections & programCollection)4442 void addRayTracingNoSubgroupShader(SourceCollections &programCollection)
4443 {
4444     const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
4445 
4446     const std::string rgenShaderNoSubgroups =
4447         "#version 460 core\n"
4448         "#extension GL_EXT_ray_tracing: require\n"
4449         "layout(location = 0) rayPayloadEXT uvec4 payload;\n"
4450         "layout(location = 0) callableDataEXT uvec4 callData;"
4451         "layout(set = 1, binding = 0) uniform accelerationStructureEXT topLevelAS;\n"
4452         "\n"
4453         "void main()\n"
4454         "{\n"
4455         "  uint  rayFlags   = 0;\n"
4456         "  uint  cullMask   = 0xFF;\n"
4457         "  float tmin       = 0.0;\n"
4458         "  float tmax       = 9.0;\n"
4459         "  vec3  origin     = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), "
4460         "(float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
4461         "  vec3  directHit  = vec3(0.0, 0.0, -1.0);\n"
4462         "  vec3  directMiss = vec3(0.0, 0.0, +1.0);\n"
4463         "\n"
4464         "  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directHit, tmax, 0);\n"
4465         "  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directMiss, tmax, 0);\n"
4466         "  executeCallableEXT(0, 0);"
4467         "}\n";
4468     const std::string hitShaderNoSubgroups  = "#version 460 core\n"
4469                                               "#extension GL_EXT_ray_tracing: require\n"
4470                                               "hitAttributeEXT vec3 attribs;\n"
4471                                               "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
4472                                               "\n"
4473                                               "void main()\n"
4474                                               "{\n"
4475                                               "}\n";
4476     const std::string missShaderNoSubgroups = "#version 460 core\n"
4477                                               "#extension GL_EXT_ray_tracing: require\n"
4478                                               "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
4479                                               "\n"
4480                                               "void main()\n"
4481                                               "{\n"
4482                                               "}\n";
4483     const std::string sectShaderNoSubgroups = "#version 460 core\n"
4484                                               "#extension GL_EXT_ray_tracing: require\n"
4485                                               "hitAttributeEXT vec3 hitAttribute;\n"
4486                                               "\n"
4487                                               "void main()\n"
4488                                               "{\n"
4489                                               "  reportIntersectionEXT(0.75f, 0x7Eu);\n"
4490                                               "}\n";
4491     const std::string callShaderNoSubgroups = "#version 460 core\n"
4492                                               "#extension GL_EXT_ray_tracing: require\n"
4493                                               "layout(location = 0) callableDataInEXT float callData;\n"
4494                                               "\n"
4495                                               "void main()\n"
4496                                               "{\n"
4497                                               "}\n";
4498 
4499     programCollection.glslSources.add("rgen_noSubgroup") << glu::RaygenSource(rgenShaderNoSubgroups) << buildOptions;
4500     programCollection.glslSources.add("ahit_noSubgroup") << glu::AnyHitSource(hitShaderNoSubgroups) << buildOptions;
4501     programCollection.glslSources.add("chit_noSubgroup") << glu::ClosestHitSource(hitShaderNoSubgroups) << buildOptions;
4502     programCollection.glslSources.add("miss_noSubgroup") << glu::MissSource(missShaderNoSubgroups) << buildOptions;
4503     programCollection.glslSources.add("sect_noSubgroup")
4504         << glu::IntersectionSource(sectShaderNoSubgroups) << buildOptions;
4505     programCollection.glslSources.add("call_noSubgroup") << glu::CallableSource(callShaderNoSubgroups) << buildOptions;
4506 }
4507 
4508 #ifndef CTS_USES_VULKANSC
4509 
enumerateRayTracingShaderStages(const VkShaderStageFlags shaderStage)4510 static vector<VkShaderStageFlagBits> enumerateRayTracingShaderStages(const VkShaderStageFlags shaderStage)
4511 {
4512     vector<VkShaderStageFlagBits> result;
4513     const VkShaderStageFlagBits shaderStageFlags[] = {
4514         VK_SHADER_STAGE_RAYGEN_BIT_KHR, VK_SHADER_STAGE_ANY_HIT_BIT_KHR,      VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4515         VK_SHADER_STAGE_MISS_BIT_KHR,   VK_SHADER_STAGE_INTERSECTION_BIT_KHR, VK_SHADER_STAGE_CALLABLE_BIT_KHR,
4516     };
4517 
4518     for (auto shaderStageFlag : shaderStageFlags)
4519     {
4520         if (0 != (shaderStage & shaderStageFlag))
4521             result.push_back(shaderStageFlag);
4522     }
4523 
4524     return result;
4525 }
4526 
getRayTracingResultBinding(const VkShaderStageFlagBits shaderStage)4527 static uint32_t getRayTracingResultBinding(const VkShaderStageFlagBits shaderStage)
4528 {
4529     const VkShaderStageFlags shaderStageFlags[] = {
4530         VK_SHADER_STAGE_RAYGEN_BIT_KHR, VK_SHADER_STAGE_ANY_HIT_BIT_KHR,      VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4531         VK_SHADER_STAGE_MISS_BIT_KHR,   VK_SHADER_STAGE_INTERSECTION_BIT_KHR, VK_SHADER_STAGE_CALLABLE_BIT_KHR,
4532     };
4533 
4534     for (uint32_t shaderStageNdx = 0; shaderStageNdx < DE_LENGTH_OF_ARRAY(shaderStageFlags); ++shaderStageNdx)
4535     {
4536         if (0 != (shaderStage & shaderStageFlags[shaderStageNdx]))
4537         {
4538             DE_ASSERT(0 == (shaderStage & (~shaderStageFlags[shaderStageNdx])));
4539 
4540             return shaderStageNdx;
4541         }
4542     }
4543 
4544     TCU_THROW(InternalError, "Non-raytracing stage specified or no stage at all");
4545 }
4546 
makeRayTracingInputBuffers(Context & context,VkFormat format,const SSBOData * extraDatas,uint32_t extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector)4547 static vectorBufferOrImage makeRayTracingInputBuffers(Context &context, VkFormat format, const SSBOData *extraDatas,
4548                                                       uint32_t extraDatasCount,
4549                                                       const vector<VkShaderStageFlagBits> &stagesVector)
4550 {
4551     const size_t stagesCount           = stagesVector.size();
4552     const VkDeviceSize shaderSize      = getMaxWidth();
4553     const VkDeviceSize inputBufferSize = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
4554     vectorBufferOrImage inputBuffers(stagesCount + extraDatasCount);
4555 
4556     // The implicit result SSBO we use to store our outputs from the shader
4557     for (size_t stageNdx = 0u; stageNdx < stagesCount; ++stageNdx)
4558         inputBuffers[stageNdx] =
4559             de::SharedPtr<BufferOrImage>(new Buffer(context, inputBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
4560 
4561     for (size_t stageNdx = stagesCount; stageNdx < stagesCount + extraDatasCount; ++stageNdx)
4562     {
4563         const size_t datasNdx = stageNdx - stagesCount;
4564 
4565         if (extraDatas[datasNdx].isImage())
4566         {
4567             inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Image(
4568                 context, static_cast<uint32_t>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
4569         }
4570         else
4571         {
4572             const auto usage = (extraDatas[datasNdx].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT :
4573                                                                VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4574             const auto size  = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) *
4575                               extraDatas[datasNdx].numElements;
4576             inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
4577         }
4578 
4579         initializeMemory(context, inputBuffers[stageNdx]->getAllocation(), extraDatas[datasNdx]);
4580     }
4581 
4582     return inputBuffers;
4583 }
4584 
makeRayTracingDescriptorSetLayout(Context & context,const SSBOData * extraDatas,uint32_t extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector,const vectorBufferOrImage & inputBuffers)4585 static Move<VkDescriptorSetLayout> makeRayTracingDescriptorSetLayout(Context &context, const SSBOData *extraDatas,
4586                                                                      uint32_t extraDatasCount,
4587                                                                      const vector<VkShaderStageFlagBits> &stagesVector,
4588                                                                      const vectorBufferOrImage &inputBuffers)
4589 {
4590     const DeviceInterface &vkd = context.getDeviceInterface();
4591     const VkDevice device      = context.getDevice();
4592     const size_t stagesCount   = stagesVector.size();
4593     DescriptorSetLayoutBuilder layoutBuilder;
4594 
4595     // The implicit result SSBO we use to store our outputs from the shader
4596     for (size_t stageNdx = 0u; stageNdx < stagesVector.size(); ++stageNdx)
4597     {
4598         const uint32_t stageBinding = getRayTracingResultBinding(stagesVector[stageNdx]);
4599 
4600         layoutBuilder.addIndexedBinding(inputBuffers[stageNdx]->getType(), 1, stagesVector[stageNdx], stageBinding,
4601                                         DE_NULL);
4602     }
4603 
4604     for (size_t stageNdx = stagesCount; stageNdx < stagesCount + extraDatasCount; ++stageNdx)
4605     {
4606         const size_t datasNdx = stageNdx - stagesCount;
4607 
4608         layoutBuilder.addIndexedBinding(inputBuffers[stageNdx]->getType(), 1, extraDatas[datasNdx].stages,
4609                                         extraDatas[datasNdx].binding, DE_NULL);
4610     }
4611 
4612     return layoutBuilder.build(vkd, device);
4613 }
4614 
makeRayTracingDescriptorSetLayoutAS(Context & context)4615 static Move<VkDescriptorSetLayout> makeRayTracingDescriptorSetLayoutAS(Context &context)
4616 {
4617     const DeviceInterface &vkd = context.getDeviceInterface();
4618     const VkDevice device      = context.getDevice();
4619     DescriptorSetLayoutBuilder layoutBuilder;
4620 
4621     layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, VK_SHADER_STAGE_RAYGEN_BIT_KHR);
4622 
4623     return layoutBuilder.build(vkd, device);
4624 }
4625 
makeRayTracingDescriptorPool(Context & context,const vectorBufferOrImage & inputBuffers)4626 static Move<VkDescriptorPool> makeRayTracingDescriptorPool(Context &context, const vectorBufferOrImage &inputBuffers)
4627 {
4628     const DeviceInterface &vkd       = context.getDeviceInterface();
4629     const VkDevice device            = context.getDevice();
4630     const uint32_t maxDescriptorSets = 2u;
4631     DescriptorPoolBuilder poolBuilder;
4632     Move<VkDescriptorPool> result;
4633 
4634     if (inputBuffers.size() > 0)
4635     {
4636         for (size_t ndx = 0u; ndx < inputBuffers.size(); ndx++)
4637             poolBuilder.addType(inputBuffers[ndx]->getType());
4638     }
4639 
4640     poolBuilder.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR);
4641 
4642     result = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, maxDescriptorSets);
4643 
4644     return result;
4645 }
4646 
makeRayTracingDescriptorSet(Context & context,VkDescriptorPool descriptorPool,VkDescriptorSetLayout descriptorSetLayout,const SSBOData * extraDatas,uint32_t extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector,const vectorBufferOrImage & inputBuffers)4647 static Move<VkDescriptorSet> makeRayTracingDescriptorSet(Context &context, VkDescriptorPool descriptorPool,
4648                                                          VkDescriptorSetLayout descriptorSetLayout,
4649                                                          const SSBOData *extraDatas, uint32_t extraDatasCount,
4650                                                          const vector<VkShaderStageFlagBits> &stagesVector,
4651                                                          const vectorBufferOrImage &inputBuffers)
4652 {
4653     const DeviceInterface &vkd = context.getDeviceInterface();
4654     const VkDevice device      = context.getDevice();
4655     const size_t stagesCount   = stagesVector.size();
4656     Move<VkDescriptorSet> descriptorSet;
4657 
4658     if (inputBuffers.size() > 0)
4659     {
4660         DescriptorSetUpdateBuilder updateBuilder;
4661 
4662         // Create descriptor set
4663         descriptorSet = makeDescriptorSet(vkd, device, descriptorPool, descriptorSetLayout);
4664 
4665         for (size_t ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
4666         {
4667             const uint32_t binding = (ndx < stagesCount) ? getRayTracingResultBinding(stagesVector[ndx]) :
4668                                                            extraDatas[ndx - stagesCount].binding;
4669 
4670             if (inputBuffers[ndx]->isImage())
4671             {
4672                 const VkDescriptorImageInfo info =
4673                     makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
4674                                             inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
4675 
4676                 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding),
4677                                           inputBuffers[ndx]->getType(), &info);
4678             }
4679             else
4680             {
4681                 const VkDescriptorBufferInfo info = makeDescriptorBufferInfo(
4682                     inputBuffers[ndx]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
4683 
4684                 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding),
4685                                           inputBuffers[ndx]->getType(), &info);
4686             }
4687         }
4688 
4689         updateBuilder.update(vkd, device);
4690     }
4691 
4692     return descriptorSet;
4693 }
4694 
makeRayTracingDescriptorSetAS(Context & context,VkDescriptorPool descriptorPool,VkDescriptorSetLayout descriptorSetLayout,de::MovePtr<TopLevelAccelerationStructure> & topLevelAccelerationStructure)4695 static Move<VkDescriptorSet> makeRayTracingDescriptorSetAS(
4696     Context &context, VkDescriptorPool descriptorPool, VkDescriptorSetLayout descriptorSetLayout,
4697     de::MovePtr<TopLevelAccelerationStructure> &topLevelAccelerationStructure)
4698 {
4699     const DeviceInterface &vkd                                            = context.getDeviceInterface();
4700     const VkDevice device                                                 = context.getDevice();
4701     const TopLevelAccelerationStructure *topLevelAccelerationStructurePtr = topLevelAccelerationStructure.get();
4702     const VkWriteDescriptorSetAccelerationStructureKHR accelerationStructureWriteDescriptorSet = {
4703         VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, //  VkStructureType sType;
4704         DE_NULL,                                                           //  const void* pNext;
4705         1u,                                                                //  uint32_t accelerationStructureCount;
4706         topLevelAccelerationStructurePtr->getPtr(), //  const VkAccelerationStructureKHR* pAccelerationStructures;
4707     };
4708     Move<VkDescriptorSet> descriptorSet = makeDescriptorSet(vkd, device, descriptorPool, descriptorSetLayout);
4709 
4710     DescriptorSetUpdateBuilder()
4711         .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
4712                      VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
4713         .update(vkd, device);
4714 
4715     return descriptorSet;
4716 }
4717 
makeRayTracingPipelineLayout(Context & context,const VkDescriptorSetLayout descriptorSetLayout0,const VkDescriptorSetLayout descriptorSetLayout1)4718 static Move<VkPipelineLayout> makeRayTracingPipelineLayout(Context &context,
4719                                                            const VkDescriptorSetLayout descriptorSetLayout0,
4720                                                            const VkDescriptorSetLayout descriptorSetLayout1)
4721 {
4722     const DeviceInterface &vkd = context.getDeviceInterface();
4723     const VkDevice device      = context.getDevice();
4724     const std::vector<VkDescriptorSetLayout> descriptorSetLayouts{descriptorSetLayout0, descriptorSetLayout1};
4725     const uint32_t descriptorSetLayoutsSize = static_cast<uint32_t>(descriptorSetLayouts.size());
4726 
4727     return makePipelineLayout(vkd, device, descriptorSetLayoutsSize, descriptorSetLayouts.data());
4728 }
4729 
createTopAccelerationStructure(Context & context,de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure)4730 static de::MovePtr<TopLevelAccelerationStructure> createTopAccelerationStructure(
4731     Context &context, de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure)
4732 {
4733     const DeviceInterface &vkd                        = context.getDeviceInterface();
4734     const VkDevice device                             = context.getDevice();
4735     Allocator &allocator                              = context.getDefaultAllocator();
4736     de::MovePtr<TopLevelAccelerationStructure> result = makeTopLevelAccelerationStructure();
4737 
4738     result->setInstanceCount(1);
4739     result->addInstance(bottomLevelAccelerationStructure);
4740     result->create(vkd, device, allocator);
4741 
4742     return result;
4743 }
4744 
createBottomAccelerationStructure(Context & context)4745 static de::SharedPtr<BottomLevelAccelerationStructure> createBottomAccelerationStructure(Context &context)
4746 {
4747     const DeviceInterface &vkd                           = context.getDeviceInterface();
4748     const VkDevice device                                = context.getDevice();
4749     Allocator &allocator                                 = context.getDefaultAllocator();
4750     de::MovePtr<BottomLevelAccelerationStructure> result = makeBottomLevelAccelerationStructure();
4751     const std::vector<tcu::Vec3> geometryData{tcu::Vec3(-1.0f, -1.0f, -2.0f), tcu::Vec3(+1.0f, +1.0f, -1.0f)};
4752 
4753     result->setGeometryCount(1u);
4754     result->addGeometry(geometryData, false);
4755     result->create(vkd, device, allocator, 0u);
4756 
4757     return de::SharedPtr<BottomLevelAccelerationStructure>(result.release());
4758 }
4759 
makeRayTracingPipeline(Context & context,const VkShaderStageFlags shaderStageTested,const VkPipelineLayout pipelineLayout,const uint32_t shaderStageCreateFlags[6],const uint32_t requiredSubgroupSize[6],Move<VkPipeline> & pipelineOut)4760 static de::MovePtr<RayTracingPipeline> makeRayTracingPipeline(
4761     Context &context, const VkShaderStageFlags shaderStageTested, const VkPipelineLayout pipelineLayout,
4762     const uint32_t shaderStageCreateFlags[6], const uint32_t requiredSubgroupSize[6], Move<VkPipeline> &pipelineOut)
4763 {
4764     const DeviceInterface &vkd   = context.getDeviceInterface();
4765     const VkDevice device        = context.getDevice();
4766     BinaryCollection &collection = context.getBinaryCollection();
4767     const char *shaderRgenName =
4768         (0 != (shaderStageTested & VK_SHADER_STAGE_RAYGEN_BIT_KHR)) ? "rgen" : "rgen_noSubgroup";
4769     const char *shaderAhitName =
4770         (0 != (shaderStageTested & VK_SHADER_STAGE_ANY_HIT_BIT_KHR)) ? "ahit" : "ahit_noSubgroup";
4771     const char *shaderChitName =
4772         (0 != (shaderStageTested & VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR)) ? "chit" : "chit_noSubgroup";
4773     const char *shaderMissName = (0 != (shaderStageTested & VK_SHADER_STAGE_MISS_BIT_KHR)) ? "miss" : "miss_noSubgroup";
4774     const char *shaderSectName =
4775         (0 != (shaderStageTested & VK_SHADER_STAGE_INTERSECTION_BIT_KHR)) ? "sect" : "sect_noSubgroup";
4776     const char *shaderCallName =
4777         (0 != (shaderStageTested & VK_SHADER_STAGE_CALLABLE_BIT_KHR)) ? "call" : "call_noSubgroup";
4778     const VkShaderModuleCreateFlags noShaderModuleCreateFlags = static_cast<VkShaderModuleCreateFlags>(0);
4779     Move<VkShaderModule> rgenShaderModule =
4780         createShaderModule(vkd, device, collection.get(shaderRgenName), noShaderModuleCreateFlags);
4781     Move<VkShaderModule> ahitShaderModule =
4782         createShaderModule(vkd, device, collection.get(shaderAhitName), noShaderModuleCreateFlags);
4783     Move<VkShaderModule> chitShaderModule =
4784         createShaderModule(vkd, device, collection.get(shaderChitName), noShaderModuleCreateFlags);
4785     Move<VkShaderModule> missShaderModule =
4786         createShaderModule(vkd, device, collection.get(shaderMissName), noShaderModuleCreateFlags);
4787     Move<VkShaderModule> sectShaderModule =
4788         createShaderModule(vkd, device, collection.get(shaderSectName), noShaderModuleCreateFlags);
4789     Move<VkShaderModule> callShaderModule =
4790         createShaderModule(vkd, device, collection.get(shaderCallName), noShaderModuleCreateFlags);
4791     const VkPipelineShaderStageCreateFlags noPipelineShaderStageCreateFlags =
4792         static_cast<VkPipelineShaderStageCreateFlags>(0);
4793     const VkPipelineShaderStageCreateFlags rgenPipelineShaderStageCreateFlags =
4794         (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[0];
4795     const VkPipelineShaderStageCreateFlags ahitPipelineShaderStageCreateFlags =
4796         (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[1];
4797     const VkPipelineShaderStageCreateFlags chitPipelineShaderStageCreateFlags =
4798         (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[2];
4799     const VkPipelineShaderStageCreateFlags missPipelineShaderStageCreateFlags =
4800         (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[3];
4801     const VkPipelineShaderStageCreateFlags sectPipelineShaderStageCreateFlags =
4802         (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[4];
4803     const VkPipelineShaderStageCreateFlags callPipelineShaderStageCreateFlags =
4804         (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[5];
4805     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[6] = {
4806         {
4807             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4808             DE_NULL,
4809             requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
4810         },
4811         {
4812             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4813             DE_NULL,
4814             requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
4815         },
4816         {
4817             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4818             DE_NULL,
4819             requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
4820         },
4821         {
4822             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4823             DE_NULL,
4824             requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
4825         },
4826         {
4827             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4828             DE_NULL,
4829             requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
4830         },
4831         {
4832             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4833             DE_NULL,
4834             requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[5] : 0u,
4835         },
4836     };
4837     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rgenRequiredSubgroupSizeCreateInfo =
4838         (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[0];
4839     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *ahitRequiredSubgroupSizeCreateInfo =
4840         (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[1];
4841     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *chitRequiredSubgroupSizeCreateInfo =
4842         (requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[2];
4843     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *missRequiredSubgroupSizeCreateInfo =
4844         (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[3];
4845     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *sectRequiredSubgroupSizeCreateInfo =
4846         (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[4];
4847     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *callRequiredSubgroupSizeCreateInfo =
4848         (requiredSubgroupSizeCreateInfo[5].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[5];
4849     de::MovePtr<RayTracingPipeline> rayTracingPipeline = de::newMovePtr<RayTracingPipeline>();
4850 
4851     rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR, rgenShaderModule, RAYGEN_GROUP, DE_NULL,
4852                                   rgenPipelineShaderStageCreateFlags, rgenRequiredSubgroupSizeCreateInfo);
4853     rayTracingPipeline->addShader(VK_SHADER_STAGE_ANY_HIT_BIT_KHR, ahitShaderModule, HIT_GROUP, DE_NULL,
4854                                   ahitPipelineShaderStageCreateFlags, ahitRequiredSubgroupSizeCreateInfo);
4855     rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, chitShaderModule, HIT_GROUP, DE_NULL,
4856                                   chitPipelineShaderStageCreateFlags, chitRequiredSubgroupSizeCreateInfo);
4857     rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR, missShaderModule, MISS_GROUP, DE_NULL,
4858                                   missPipelineShaderStageCreateFlags, missRequiredSubgroupSizeCreateInfo);
4859     rayTracingPipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR, sectShaderModule, HIT_GROUP, DE_NULL,
4860                                   sectPipelineShaderStageCreateFlags, sectRequiredSubgroupSizeCreateInfo);
4861     rayTracingPipeline->addShader(VK_SHADER_STAGE_CALLABLE_BIT_KHR, callShaderModule, CALL_GROUP, DE_NULL,
4862                                   callPipelineShaderStageCreateFlags, callRequiredSubgroupSizeCreateInfo);
4863 
4864     // Must execute createPipeline here, due to pNext pointers in calls to addShader are local
4865     pipelineOut = rayTracingPipeline->createPipeline(vkd, device, pipelineLayout);
4866 
4867     return rayTracingPipeline;
4868 }
4869 
getPossibleRayTracingSubgroupStages(Context & context,const VkShaderStageFlags testedStages)4870 VkShaderStageFlags getPossibleRayTracingSubgroupStages(Context &context, const VkShaderStageFlags testedStages)
4871 {
4872     const VkPhysicalDeviceSubgroupProperties &subgroupProperties = context.getSubgroupProperties();
4873     const VkShaderStageFlags stages                              = testedStages & subgroupProperties.supportedStages;
4874 
4875     DE_ASSERT(isAllRayTracingStages(testedStages));
4876 
4877     return stages;
4878 }
4879 
allRayTracingStages(Context & context,VkFormat format,const SSBOData * extraDatas,uint32_t extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStage)4880 tcu::TestStatus allRayTracingStages(Context &context, VkFormat format, const SSBOData *extraDatas,
4881                                     uint32_t extraDataCount, const void *internalData,
4882                                     const VerificationFunctor &checkResult, const VkShaderStageFlags shaderStage)
4883 {
4884     return vkt::subgroups::allRayTracingStagesRequiredSubgroupSize(
4885         context, format, extraDatas, extraDataCount, internalData, checkResult, shaderStage, DE_NULL, DE_NULL);
4886 }
4887 
allRayTracingStagesRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraDatas,uint32_t extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStageTested,const uint32_t shaderStageCreateFlags[6],const uint32_t requiredSubgroupSize[6])4888 tcu::TestStatus allRayTracingStagesRequiredSubgroupSize(Context &context, VkFormat format, const SSBOData *extraDatas,
4889                                                         uint32_t extraDatasCount, const void *internalData,
4890                                                         const VerificationFunctor &checkResult,
4891                                                         const VkShaderStageFlags shaderStageTested,
4892                                                         const uint32_t shaderStageCreateFlags[6],
4893                                                         const uint32_t requiredSubgroupSize[6])
4894 {
4895     const DeviceInterface &vkd                       = context.getDeviceInterface();
4896     const VkDevice device                            = context.getDevice();
4897     const VkQueue queue                              = context.getUniversalQueue();
4898     const uint32_t queueFamilyIndex                  = context.getUniversalQueueFamilyIndex();
4899     Allocator &allocator                             = context.getDefaultAllocator();
4900     const uint32_t subgroupSize                      = getSubgroupSize(context);
4901     const uint32_t maxWidth                          = getMaxWidth();
4902     const vector<VkShaderStageFlagBits> stagesVector = enumerateRayTracingShaderStages(shaderStageTested);
4903     const uint32_t stagesCount                       = static_cast<uint32_t>(stagesVector.size());
4904     de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure =
4905         createBottomAccelerationStructure(context);
4906     de::MovePtr<TopLevelAccelerationStructure> topLevelAccelerationStructure =
4907         createTopAccelerationStructure(context, bottomLevelAccelerationStructure);
4908     vectorBufferOrImage inputBuffers =
4909         makeRayTracingInputBuffers(context, format, extraDatas, extraDatasCount, stagesVector);
4910     const Move<VkDescriptorSetLayout> descriptorSetLayout =
4911         makeRayTracingDescriptorSetLayout(context, extraDatas, extraDatasCount, stagesVector, inputBuffers);
4912     const Move<VkDescriptorSetLayout> descriptorSetLayoutAS = makeRayTracingDescriptorSetLayoutAS(context);
4913     const Move<VkPipelineLayout> pipelineLayout =
4914         makeRayTracingPipelineLayout(context, *descriptorSetLayout, *descriptorSetLayoutAS);
4915     Move<VkPipeline> pipeline                                = Move<VkPipeline>();
4916     const de::MovePtr<RayTracingPipeline> rayTracingPipeline = makeRayTracingPipeline(
4917         context, shaderStageTested, *pipelineLayout, shaderStageCreateFlags, requiredSubgroupSize, pipeline);
4918     const uint32_t shaderGroupHandleSize    = context.getRayTracingPipelineProperties().shaderGroupHandleSize;
4919     const uint32_t shaderGroupBaseAlignment = context.getRayTracingPipelineProperties().shaderGroupBaseAlignment;
4920     de::MovePtr<BufferWithMemory> rgenShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
4921         vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, RAYGEN_GROUP, 1u);
4922     de::MovePtr<BufferWithMemory> missShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
4923         vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, MISS_GROUP, 1u);
4924     de::MovePtr<BufferWithMemory> hitsShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
4925         vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, HIT_GROUP, 1u);
4926     de::MovePtr<BufferWithMemory> callShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
4927         vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, CALL_GROUP, 1u);
4928     const VkStridedDeviceAddressRegionKHR rgenShaderBindingTableRegion =
4929         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, rgenShaderBindingTable->get(), 0),
4930                                           shaderGroupHandleSize, shaderGroupHandleSize);
4931     const VkStridedDeviceAddressRegionKHR missShaderBindingTableRegion =
4932         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missShaderBindingTable->get(), 0),
4933                                           shaderGroupHandleSize, shaderGroupHandleSize);
4934     const VkStridedDeviceAddressRegionKHR hitsShaderBindingTableRegion =
4935         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitsShaderBindingTable->get(), 0),
4936                                           shaderGroupHandleSize, shaderGroupHandleSize);
4937     const VkStridedDeviceAddressRegionKHR callShaderBindingTableRegion =
4938         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, callShaderBindingTable->get(), 0),
4939                                           shaderGroupHandleSize, shaderGroupHandleSize);
4940     const Move<VkDescriptorPool> descriptorPool = makeRayTracingDescriptorPool(context, inputBuffers);
4941     const Move<VkDescriptorSet> descriptorSet   = makeRayTracingDescriptorSet(
4942         context, *descriptorPool, *descriptorSetLayout, extraDatas, extraDatasCount, stagesVector, inputBuffers);
4943     const Move<VkDescriptorSet> descriptorSetAS =
4944         makeRayTracingDescriptorSetAS(context, *descriptorPool, *descriptorSetLayoutAS, topLevelAccelerationStructure);
4945     const Move<VkCommandPool> cmdPool     = makeCommandPool(vkd, device, queueFamilyIndex);
4946     const Move<VkCommandBuffer> cmdBuffer = makeCommandBuffer(context, *cmdPool);
4947     uint32_t passIterations               = 0u;
4948     uint32_t failIterations               = 0u;
4949 
4950     DE_ASSERT(shaderStageTested != 0);
4951 
4952     for (uint32_t width = 1u; width < maxWidth; width = getNextWidth(width))
4953     {
4954 
4955         for (uint32_t ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
4956         {
4957             // re-init the data
4958             const Allocation &alloc = inputBuffers[ndx]->getAllocation();
4959 
4960             initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
4961         }
4962 
4963         beginCommandBuffer(vkd, *cmdBuffer);
4964         {
4965             vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
4966 
4967             bottomLevelAccelerationStructure->build(vkd, device, *cmdBuffer);
4968             topLevelAccelerationStructure->build(vkd, device, *cmdBuffer);
4969 
4970             vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 1u, 1u,
4971                                       &descriptorSetAS.get(), 0u, DE_NULL);
4972 
4973             if (stagesCount + extraDatasCount > 0)
4974                 vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0u, 1u,
4975                                           &descriptorSet.get(), 0u, DE_NULL);
4976 
4977             cmdTraceRays(vkd, *cmdBuffer, &rgenShaderBindingTableRegion, &missShaderBindingTableRegion,
4978                          &hitsShaderBindingTableRegion, &callShaderBindingTableRegion, width, 1, 1);
4979 
4980             const VkMemoryBarrier postTraceMemoryBarrier =
4981                 makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
4982             cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR,
4983                                      VK_PIPELINE_STAGE_HOST_BIT, &postTraceMemoryBarrier);
4984         }
4985         endCommandBuffer(vkd, *cmdBuffer);
4986 
4987         submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
4988 
4989         for (uint32_t ndx = 0u; ndx < stagesCount; ++ndx)
4990         {
4991             std::vector<const void *> datas;
4992 
4993             if (!inputBuffers[ndx]->isImage())
4994             {
4995                 const Allocation &resultAlloc = inputBuffers[ndx]->getAllocation();
4996 
4997                 invalidateAlloc(vkd, device, resultAlloc);
4998 
4999                 // we always have our result data first
5000                 datas.push_back(resultAlloc.getHostPtr());
5001             }
5002 
5003             for (uint32_t index = stagesCount; index < stagesCount + extraDatasCount; ++index)
5004             {
5005                 const uint32_t datasNdx = index - stagesCount;
5006 
5007                 if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
5008                 {
5009                     const Allocation &resultAlloc = inputBuffers[index]->getAllocation();
5010 
5011                     invalidateAlloc(vkd, device, resultAlloc);
5012 
5013                     // we always have our result data first
5014                     datas.push_back(resultAlloc.getHostPtr());
5015                 }
5016             }
5017 
5018             if (!checkResult(internalData, datas, width, subgroupSize, false))
5019                 failIterations++;
5020             else
5021                 passIterations++;
5022         }
5023 
5024         context.resetCommandPoolForVKSC(device, *cmdPool);
5025     }
5026 
5027     if (failIterations > 0 || passIterations == 0)
5028         return tcu::TestStatus::fail("Failed " + de::toString(failIterations) + " out of " +
5029                                      de::toString(failIterations + passIterations) + " iterations.");
5030     else
5031         return tcu::TestStatus::pass("OK");
5032 }
5033 #endif // CTS_USES_VULKANSC
5034 
5035 } // namespace subgroups
5036 } // namespace vkt
5037