1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2019 The Khronos Group Inc.
6 * Copyright (c) 2019 Google Inc.
7 * Copyright (c) 2017 Codeplay Software Ltd.
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 */ /*!
22 * \file
23 * \brief Subgroups Tests Utils
24 */ /*--------------------------------------------------------------------*/
25
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "vkRayTracingUtil.hpp"
28 #include "tcuFloat.hpp"
29 #include "deRandom.hpp"
30 #include "tcuCommandLine.hpp"
31 #include "tcuStringTemplate.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkTypeUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37
38 using namespace tcu;
39 using namespace std;
40 using namespace vk;
41 using namespace vkt;
42
43 namespace
44 {
45
46 enum class ComputeLike
47 {
48 COMPUTE = 0,
49 MESH
50 };
51
getMaxWidth()52 uint32_t getMaxWidth()
53 {
54 return 1024u;
55 }
56
getNextWidth(const uint32_t width)57 uint32_t getNextWidth(const uint32_t width)
58 {
59 if (width < 128)
60 {
61 // This ensures we test every value up to 128 (the max subgroup size).
62 return width + 1;
63 }
64 else
65 {
66 // And once we hit 128 we increment to only power of 2's to reduce testing time.
67 return width * 2;
68 }
69 }
70
getFormatSizeInBytes(const VkFormat format)71 uint32_t getFormatSizeInBytes(const VkFormat format)
72 {
73 switch (format)
74 {
75 default:
76 DE_FATAL("Unhandled format!");
77 return 0;
78 case VK_FORMAT_R8_SINT:
79 case VK_FORMAT_R8_UINT:
80 return static_cast<uint32_t>(sizeof(int8_t));
81 case VK_FORMAT_R8G8_SINT:
82 case VK_FORMAT_R8G8_UINT:
83 return static_cast<uint32_t>(sizeof(int8_t) * 2);
84 case VK_FORMAT_R8G8B8_SINT:
85 case VK_FORMAT_R8G8B8_UINT:
86 case VK_FORMAT_R8G8B8A8_SINT:
87 case VK_FORMAT_R8G8B8A8_UINT:
88 return static_cast<uint32_t>(sizeof(int8_t) * 4);
89 case VK_FORMAT_R16_SINT:
90 case VK_FORMAT_R16_UINT:
91 case VK_FORMAT_R16_SFLOAT:
92 return static_cast<uint32_t>(sizeof(int16_t));
93 case VK_FORMAT_R16G16_SINT:
94 case VK_FORMAT_R16G16_UINT:
95 case VK_FORMAT_R16G16_SFLOAT:
96 return static_cast<uint32_t>(sizeof(int16_t) * 2);
97 case VK_FORMAT_R16G16B16_UINT:
98 case VK_FORMAT_R16G16B16_SINT:
99 case VK_FORMAT_R16G16B16_SFLOAT:
100 case VK_FORMAT_R16G16B16A16_SINT:
101 case VK_FORMAT_R16G16B16A16_UINT:
102 case VK_FORMAT_R16G16B16A16_SFLOAT:
103 return static_cast<uint32_t>(sizeof(int16_t) * 4);
104 case VK_FORMAT_R32_SINT:
105 case VK_FORMAT_R32_UINT:
106 case VK_FORMAT_R32_SFLOAT:
107 return static_cast<uint32_t>(sizeof(int32_t));
108 case VK_FORMAT_R32G32_SINT:
109 case VK_FORMAT_R32G32_UINT:
110 case VK_FORMAT_R32G32_SFLOAT:
111 return static_cast<uint32_t>(sizeof(int32_t) * 2);
112 case VK_FORMAT_R32G32B32_SINT:
113 case VK_FORMAT_R32G32B32_UINT:
114 case VK_FORMAT_R32G32B32_SFLOAT:
115 case VK_FORMAT_R32G32B32A32_SINT:
116 case VK_FORMAT_R32G32B32A32_UINT:
117 case VK_FORMAT_R32G32B32A32_SFLOAT:
118 return static_cast<uint32_t>(sizeof(int32_t) * 4);
119 case VK_FORMAT_R64_SINT:
120 case VK_FORMAT_R64_UINT:
121 case VK_FORMAT_R64_SFLOAT:
122 return static_cast<uint32_t>(sizeof(int64_t));
123 case VK_FORMAT_R64G64_SINT:
124 case VK_FORMAT_R64G64_UINT:
125 case VK_FORMAT_R64G64_SFLOAT:
126 return static_cast<uint32_t>(sizeof(int64_t) * 2);
127 case VK_FORMAT_R64G64B64_SINT:
128 case VK_FORMAT_R64G64B64_UINT:
129 case VK_FORMAT_R64G64B64_SFLOAT:
130 case VK_FORMAT_R64G64B64A64_SINT:
131 case VK_FORMAT_R64G64B64A64_UINT:
132 case VK_FORMAT_R64G64B64A64_SFLOAT:
133 return static_cast<uint32_t>(sizeof(int64_t) * 4);
134 // The below formats are used to represent bool and bvec* types. These
135 // types are passed to the shader as int and ivec* types, before the
136 // calculations are done as booleans. We need a distinct type here so
137 // that the shader generators can switch on it and generate the correct
138 // shader source for testing.
139 case VK_FORMAT_R8_USCALED:
140 return static_cast<uint32_t>(sizeof(int32_t));
141 case VK_FORMAT_R8G8_USCALED:
142 return static_cast<uint32_t>(sizeof(int32_t) * 2);
143 case VK_FORMAT_R8G8B8_USCALED:
144 case VK_FORMAT_R8G8B8A8_USCALED:
145 return static_cast<uint32_t>(sizeof(int32_t) * 4);
146 }
147 }
148
getElementSizeInBytes(const VkFormat format,const subgroups::SSBOData::InputDataLayoutType layout)149 uint32_t getElementSizeInBytes(const VkFormat format, const subgroups::SSBOData::InputDataLayoutType layout)
150 {
151 const uint32_t bytes = getFormatSizeInBytes(format);
152
153 if (layout == subgroups::SSBOData::LayoutStd140)
154 return bytes < 16 ? 16 : bytes;
155 else
156 return bytes;
157 }
158
makeRenderPass(Context & context,VkFormat format)159 Move<VkRenderPass> makeRenderPass(Context &context, VkFormat format)
160 {
161 const VkAttachmentReference colorReference = {0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL};
162 const VkSubpassDescription subpassDescription = {
163 0u, // VkSubpassDescriptionFlags flags;
164 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
165 0, // uint32_t inputAttachmentCount;
166 DE_NULL, // const VkAttachmentReference* pInputAttachments;
167 1, // uint32_t colorAttachmentCount;
168 &colorReference, // const VkAttachmentReference* pColorAttachments;
169 DE_NULL, // const VkAttachmentReference* pResolveAttachments;
170 DE_NULL, // const VkAttachmentReference* pDepthStencilAttachment;
171 0, // uint32_t preserveAttachmentCount;
172 DE_NULL // const uint32_t* pPreserveAttachments;
173 };
174 const VkSubpassDependency subpassDependencies[2] = {
175 {
176 VK_SUBPASS_EXTERNAL, // uint32_t srcSubpass;
177 0u, // uint32_t dstSubpass;
178 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, // VkPipelineStageFlags srcStageMask;
179 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, // VkPipelineStageFlags dstStageMask;
180 VK_ACCESS_MEMORY_READ_BIT, // VkAccessFlags srcAccessMask;
181 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, // VkAccessFlags dstAccessMask;
182 VK_DEPENDENCY_BY_REGION_BIT // VkDependencyFlags dependencyFlags;
183 },
184 {
185 0u, // uint32_t srcSubpass;
186 VK_SUBPASS_EXTERNAL, // uint32_t dstSubpass;
187 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, // VkPipelineStageFlags srcStageMask;
188 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, // VkPipelineStageFlags dstStageMask;
189 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, // VkAccessFlags srcAccessMask;
190 VK_ACCESS_MEMORY_READ_BIT, // VkAccessFlags dstAccessMask;
191 VK_DEPENDENCY_BY_REGION_BIT // VkDependencyFlags dependencyFlags;
192 },
193 };
194 const VkAttachmentDescription attachmentDescription = {
195 0u, // VkAttachmentDescriptionFlags flags;
196 format, // VkFormat format;
197 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
198 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp;
199 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp;
200 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp;
201 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp;
202 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
203 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL // VkImageLayout finalLayout;
204 };
205 const VkRenderPassCreateInfo renderPassCreateInfo = {
206 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
207 DE_NULL, // const void* pNext;
208 0u, // VkRenderPassCreateFlags flags;
209 1, // uint32_t attachmentCount;
210 &attachmentDescription, // const VkAttachmentDescription* pAttachments;
211 1, // uint32_t subpassCount;
212 &subpassDescription, // const VkSubpassDescription* pSubpasses;
213 2, // uint32_t dependencyCount;
214 subpassDependencies // const VkSubpassDependency* pDependencies;
215 };
216
217 return createRenderPass(context.getDeviceInterface(), context.getDevice(), &renderPassCreateInfo);
218 }
219
makeGraphicsPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const VkShaderModule vertexShaderModule,const VkShaderModule tessellationControlShaderModule,const VkShaderModule tessellationEvalShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule fragmentShaderModule,const VkRenderPass renderPass,const std::vector<VkViewport> & viewports,const std::vector<VkRect2D> & scissors,const VkPrimitiveTopology topology,const uint32_t subpass,const uint32_t patchControlPoints,const VkPipelineVertexInputStateCreateInfo * vertexInputStateCreateInfo,const VkPipelineRasterizationStateCreateInfo * rasterizationStateCreateInfo,const VkPipelineMultisampleStateCreateInfo * multisampleStateCreateInfo,const VkPipelineDepthStencilStateCreateInfo * depthStencilStateCreateInfo,const VkPipelineColorBlendStateCreateInfo * colorBlendStateCreateInfo,const VkPipelineDynamicStateCreateInfo * dynamicStateCreateInfo,const uint32_t vertexShaderStageCreateFlags,const uint32_t tessellationControlShaderStageCreateFlags,const uint32_t tessellationEvalShaderStageCreateFlags,const uint32_t geometryShaderStageCreateFlags,const uint32_t fragmentShaderStageCreateFlags,const uint32_t requiredSubgroupSize[5])220 Move<VkPipeline> makeGraphicsPipeline(
221 const DeviceInterface &vk, const VkDevice device, const VkPipelineLayout pipelineLayout,
222 const VkShaderModule vertexShaderModule, const VkShaderModule tessellationControlShaderModule,
223 const VkShaderModule tessellationEvalShaderModule, const VkShaderModule geometryShaderModule,
224 const VkShaderModule fragmentShaderModule, const VkRenderPass renderPass, const std::vector<VkViewport> &viewports,
225 const std::vector<VkRect2D> &scissors, const VkPrimitiveTopology topology, const uint32_t subpass,
226 const uint32_t patchControlPoints, const VkPipelineVertexInputStateCreateInfo *vertexInputStateCreateInfo,
227 const VkPipelineRasterizationStateCreateInfo *rasterizationStateCreateInfo,
228 const VkPipelineMultisampleStateCreateInfo *multisampleStateCreateInfo,
229 const VkPipelineDepthStencilStateCreateInfo *depthStencilStateCreateInfo,
230 const VkPipelineColorBlendStateCreateInfo *colorBlendStateCreateInfo,
231 const VkPipelineDynamicStateCreateInfo *dynamicStateCreateInfo, const uint32_t vertexShaderStageCreateFlags,
232 const uint32_t tessellationControlShaderStageCreateFlags, const uint32_t tessellationEvalShaderStageCreateFlags,
233 const uint32_t geometryShaderStageCreateFlags, const uint32_t fragmentShaderStageCreateFlags,
234 const uint32_t requiredSubgroupSize[5])
235 {
236 const VkBool32 disableRasterization = (fragmentShaderModule == DE_NULL);
237 const bool hasTessellation =
238 (tessellationControlShaderModule != DE_NULL || tessellationEvalShaderModule != DE_NULL);
239
240 VkPipelineShaderStageCreateInfo stageCreateInfo = {
241 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType
242 DE_NULL, // const void* pNext
243 0u, // VkPipelineShaderStageCreateFlags flags
244 VK_SHADER_STAGE_VERTEX_BIT, // VkShaderStageFlagBits stage
245 DE_NULL, // VkShaderModule module
246 "main", // const char* pName
247 DE_NULL // const VkSpecializationInfo* pSpecializationInfo
248 };
249
250 std::vector<VkPipelineShaderStageCreateInfo> pipelineShaderStageParams;
251
252 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[5] = {
253 {
254 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
255 DE_NULL,
256 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
257 },
258 {
259 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
260 DE_NULL,
261 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
262 },
263 {
264 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
265 DE_NULL,
266 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
267 },
268 {
269 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
270 DE_NULL,
271 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
272 },
273 {
274 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
275 DE_NULL,
276 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
277 },
278 };
279
280 {
281 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize != 0u) ?
282 &requiredSubgroupSizeCreateInfo[0] :
283 DE_NULL;
284 stageCreateInfo.flags = vertexShaderStageCreateFlags;
285 stageCreateInfo.stage = VK_SHADER_STAGE_VERTEX_BIT;
286 stageCreateInfo.module = vertexShaderModule;
287 pipelineShaderStageParams.push_back(stageCreateInfo);
288 }
289
290 if (tessellationControlShaderModule != DE_NULL)
291 {
292 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize != 0u) ?
293 &requiredSubgroupSizeCreateInfo[1] :
294 DE_NULL;
295 stageCreateInfo.flags = tessellationControlShaderStageCreateFlags;
296 stageCreateInfo.stage = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
297 stageCreateInfo.module = tessellationControlShaderModule;
298 pipelineShaderStageParams.push_back(stageCreateInfo);
299 }
300
301 if (tessellationEvalShaderModule != DE_NULL)
302 {
303 stageCreateInfo.pNext =
304 (requiredSubgroupSize != DE_NULL && requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize != 0u) ?
305 &requiredSubgroupSizeCreateInfo[2] :
306 DE_NULL;
307 stageCreateInfo.flags = tessellationEvalShaderStageCreateFlags;
308 stageCreateInfo.stage = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
309 stageCreateInfo.module = tessellationEvalShaderModule;
310 pipelineShaderStageParams.push_back(stageCreateInfo);
311 }
312
313 if (geometryShaderModule != DE_NULL)
314 {
315 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize != 0u) ?
316 &requiredSubgroupSizeCreateInfo[3] :
317 DE_NULL;
318 stageCreateInfo.flags = geometryShaderStageCreateFlags;
319 stageCreateInfo.stage = VK_SHADER_STAGE_GEOMETRY_BIT;
320 stageCreateInfo.module = geometryShaderModule;
321 pipelineShaderStageParams.push_back(stageCreateInfo);
322 }
323
324 if (fragmentShaderModule != DE_NULL)
325 {
326 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize != 0u) ?
327 &requiredSubgroupSizeCreateInfo[4] :
328 DE_NULL;
329 stageCreateInfo.flags = fragmentShaderStageCreateFlags;
330 stageCreateInfo.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
331 stageCreateInfo.module = fragmentShaderModule;
332 pipelineShaderStageParams.push_back(stageCreateInfo);
333 }
334
335 const VkVertexInputBindingDescription vertexInputBindingDescription = {
336 0u, // uint32_t binding
337 sizeof(tcu::Vec4), // uint32_t stride
338 VK_VERTEX_INPUT_RATE_VERTEX, // VkVertexInputRate inputRate
339 };
340
341 const VkVertexInputAttributeDescription vertexInputAttributeDescription = {
342 0u, // uint32_t location
343 0u, // uint32_t binding
344 VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat format
345 0u // uint32_t offset
346 };
347
348 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfoDefault = {
349 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType
350 DE_NULL, // const void* pNext
351 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags
352 1u, // uint32_t vertexBindingDescriptionCount
353 &vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions
354 1u, // uint32_t vertexAttributeDescriptionCount
355 &vertexInputAttributeDescription // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions
356 };
357
358 const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo = {
359 VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType
360 DE_NULL, // const void* pNext
361 0u, // VkPipelineInputAssemblyStateCreateFlags flags
362 topology, // VkPrimitiveTopology topology
363 VK_FALSE // VkBool32 primitiveRestartEnable
364 };
365
366 const VkPipelineTessellationStateCreateInfo tessStateCreateInfo = {
367 VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, // VkStructureType sType
368 DE_NULL, // const void* pNext
369 0u, // VkPipelineTessellationStateCreateFlags flags
370 patchControlPoints // uint32_t patchControlPoints
371 };
372
373 const VkPipelineViewportStateCreateInfo viewportStateCreateInfo = {
374 VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType
375 DE_NULL, // const void* pNext
376 (VkPipelineViewportStateCreateFlags)0, // VkPipelineViewportStateCreateFlags flags
377 viewports.empty() ? 1u :
378 (uint32_t)viewports.size(), // uint32_t viewportCount
379 viewports.empty() ? DE_NULL : &viewports[0], // const VkViewport* pViewports
380 viewports.empty() ? 1u : (uint32_t)scissors.size(), // uint32_t scissorCount
381 scissors.empty() ? DE_NULL : &scissors[0] // const VkRect2D* pScissors
382 };
383
384 const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfoDefault = {
385 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType
386 DE_NULL, // const void* pNext
387 0u, // VkPipelineRasterizationStateCreateFlags flags
388 VK_FALSE, // VkBool32 depthClampEnable
389 disableRasterization, // VkBool32 rasterizerDiscardEnable
390 VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode
391 VK_CULL_MODE_NONE, // VkCullModeFlags cullMode
392 VK_FRONT_FACE_COUNTER_CLOCKWISE, // VkFrontFace frontFace
393 VK_FALSE, // VkBool32 depthBiasEnable
394 0.0f, // float depthBiasConstantFactor
395 0.0f, // float depthBiasClamp
396 0.0f, // float depthBiasSlopeFactor
397 1.0f // float lineWidth
398 };
399
400 const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfoDefault = {
401 VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType
402 DE_NULL, // const void* pNext
403 0u, // VkPipelineMultisampleStateCreateFlags flags
404 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits rasterizationSamples
405 VK_FALSE, // VkBool32 sampleShadingEnable
406 1.0f, // float minSampleShading
407 DE_NULL, // const VkSampleMask* pSampleMask
408 VK_FALSE, // VkBool32 alphaToCoverageEnable
409 VK_FALSE // VkBool32 alphaToOneEnable
410 };
411
412 const VkStencilOpState stencilOpState = {
413 VK_STENCIL_OP_KEEP, // VkStencilOp failOp
414 VK_STENCIL_OP_KEEP, // VkStencilOp passOp
415 VK_STENCIL_OP_KEEP, // VkStencilOp depthFailOp
416 VK_COMPARE_OP_NEVER, // VkCompareOp compareOp
417 0, // uint32_t compareMask
418 0, // uint32_t writeMask
419 0 // uint32_t reference
420 };
421
422 const VkPipelineDepthStencilStateCreateInfo depthStencilStateCreateInfoDefault = {
423 VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, // VkStructureType sType
424 DE_NULL, // const void* pNext
425 0u, // VkPipelineDepthStencilStateCreateFlags flags
426 VK_FALSE, // VkBool32 depthTestEnable
427 VK_FALSE, // VkBool32 depthWriteEnable
428 VK_COMPARE_OP_LESS_OR_EQUAL, // VkCompareOp depthCompareOp
429 VK_FALSE, // VkBool32 depthBoundsTestEnable
430 VK_FALSE, // VkBool32 stencilTestEnable
431 stencilOpState, // VkStencilOpState front
432 stencilOpState, // VkStencilOpState back
433 0.0f, // float minDepthBounds
434 1.0f, // float maxDepthBounds
435 };
436
437 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState = {
438 VK_FALSE, // VkBool32 blendEnable
439 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcColorBlendFactor
440 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor
441 VK_BLEND_OP_ADD, // VkBlendOp colorBlendOp
442 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcAlphaBlendFactor
443 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstAlphaBlendFactor
444 VK_BLEND_OP_ADD, // VkBlendOp alphaBlendOp
445 VK_COLOR_COMPONENT_R_BIT // VkColorComponentFlags colorWriteMask
446 | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT};
447
448 const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfoDefault = {
449 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType
450 DE_NULL, // const void* pNext
451 0u, // VkPipelineColorBlendStateCreateFlags flags
452 VK_FALSE, // VkBool32 logicOpEnable
453 VK_LOGIC_OP_CLEAR, // VkLogicOp logicOp
454 1u, // uint32_t attachmentCount
455 &colorBlendAttachmentState, // const VkPipelineColorBlendAttachmentState* pAttachments
456 {0.0f, 0.0f, 0.0f, 0.0f} // float blendConstants[4]
457 };
458
459 std::vector<VkDynamicState> dynamicStates;
460
461 if (viewports.empty())
462 dynamicStates.push_back(VK_DYNAMIC_STATE_VIEWPORT);
463 if (scissors.empty())
464 dynamicStates.push_back(VK_DYNAMIC_STATE_SCISSOR);
465
466 const VkPipelineDynamicStateCreateInfo dynamicStateCreateInfoDefault = {
467 VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, // VkStructureType sType
468 DE_NULL, // const void* pNext
469 0u, // VkPipelineDynamicStateCreateFlags flags
470 (uint32_t)dynamicStates.size(), // uint32_t dynamicStateCount
471 dynamicStates.empty() ? DE_NULL : &dynamicStates[0] // const VkDynamicState* pDynamicStates
472 };
473
474 const VkPipelineDynamicStateCreateInfo *dynamicStateCreateInfoDefaultPtr =
475 dynamicStates.empty() ? DE_NULL : &dynamicStateCreateInfoDefault;
476
477 const VkGraphicsPipelineCreateInfo pipelineCreateInfo = {
478 VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType
479 DE_NULL, // const void* pNext
480 0u, // VkPipelineCreateFlags flags
481 (uint32_t)pipelineShaderStageParams.size(), // uint32_t stageCount
482 &pipelineShaderStageParams[0], // const VkPipelineShaderStageCreateInfo* pStages
483 vertexInputStateCreateInfo ?
484 vertexInputStateCreateInfo :
485 &vertexInputStateCreateInfoDefault, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState
486 &inputAssemblyStateCreateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState
487 hasTessellation ? &tessStateCreateInfo :
488 DE_NULL, // const VkPipelineTessellationStateCreateInfo* pTessellationState
489 &viewportStateCreateInfo, // const VkPipelineViewportStateCreateInfo* pViewportState
490 rasterizationStateCreateInfo ?
491 rasterizationStateCreateInfo :
492 &rasterizationStateCreateInfoDefault, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState
493 multisampleStateCreateInfo ?
494 multisampleStateCreateInfo :
495 &multisampleStateCreateInfoDefault, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState
496 depthStencilStateCreateInfo ?
497 depthStencilStateCreateInfo :
498 &depthStencilStateCreateInfoDefault, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState
499 colorBlendStateCreateInfo ?
500 colorBlendStateCreateInfo :
501 &colorBlendStateCreateInfoDefault, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState
502 dynamicStateCreateInfo ?
503 dynamicStateCreateInfo :
504 dynamicStateCreateInfoDefaultPtr, // const VkPipelineDynamicStateCreateInfo* pDynamicState
505 pipelineLayout, // VkPipelineLayout layout
506 renderPass, // VkRenderPass renderPass
507 subpass, // uint32_t subpass
508 DE_NULL, // VkPipeline basePipelineHandle
509 0 // int32_t basePipelineIndex;
510 };
511
512 return createGraphicsPipeline(vk, device, DE_NULL, &pipelineCreateInfo);
513 }
514
makeGraphicsPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderStageFlags stages,const VkShaderModule vertexShaderModule,const VkShaderModule fragmentShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule tessellationControlModule,const VkShaderModule tessellationEvaluationModule,const VkRenderPass renderPass,const VkPrimitiveTopology topology=VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,const VkVertexInputBindingDescription * vertexInputBindingDescription=DE_NULL,const VkVertexInputAttributeDescription * vertexInputAttributeDescriptions=DE_NULL,const bool frameBufferTests=false,const vk::VkFormat attachmentFormat=VK_FORMAT_R32G32B32A32_SFLOAT,const uint32_t vertexShaderStageCreateFlags=0u,const uint32_t tessellationControlShaderStageCreateFlags=0u,const uint32_t tessellationEvalShaderStageCreateFlags=0u,const uint32_t geometryShaderStageCreateFlags=0u,const uint32_t fragmentShaderStageCreateFlags=0u,const uint32_t requiredSubgroupSize[5]=DE_NULL)515 Move<VkPipeline> makeGraphicsPipeline(
516 Context &context, const VkPipelineLayout pipelineLayout, const VkShaderStageFlags stages,
517 const VkShaderModule vertexShaderModule, const VkShaderModule fragmentShaderModule,
518 const VkShaderModule geometryShaderModule, const VkShaderModule tessellationControlModule,
519 const VkShaderModule tessellationEvaluationModule, const VkRenderPass renderPass,
520 const VkPrimitiveTopology topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
521 const VkVertexInputBindingDescription *vertexInputBindingDescription = DE_NULL,
522 const VkVertexInputAttributeDescription *vertexInputAttributeDescriptions = DE_NULL,
523 const bool frameBufferTests = false, const vk::VkFormat attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT,
524 const uint32_t vertexShaderStageCreateFlags = 0u, const uint32_t tessellationControlShaderStageCreateFlags = 0u,
525 const uint32_t tessellationEvalShaderStageCreateFlags = 0u, const uint32_t geometryShaderStageCreateFlags = 0u,
526 const uint32_t fragmentShaderStageCreateFlags = 0u, const uint32_t requiredSubgroupSize[5] = DE_NULL)
527 {
528 const std::vector<VkViewport> noViewports;
529 const std::vector<VkRect2D> noScissors;
530 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo = {
531 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
532 DE_NULL, // const void* pNext;
533 0u, // VkPipelineVertexInputStateCreateFlags flags;
534 vertexInputBindingDescription == DE_NULL ? 0u : 1u, // uint32_t vertexBindingDescriptionCount;
535 vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
536 vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u, // uint32_t vertexAttributeDescriptionCount;
537 vertexInputAttributeDescriptions, // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
538 };
539 const uint32_t numChannels = getNumUsedChannels(mapVkFormat(attachmentFormat).order);
540 const VkColorComponentFlags colorComponent =
541 numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
542 numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
543 numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
544 VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT |
545 VK_COLOR_COMPONENT_A_BIT;
546 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState = {
547 VK_FALSE, // VkBool32 blendEnable;
548 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcColorBlendFactor;
549 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor;
550 VK_BLEND_OP_ADD, // VkBlendOp colorBlendOp;
551 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcAlphaBlendFactor;
552 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstAlphaBlendFactor;
553 VK_BLEND_OP_ADD, // VkBlendOp alphaBlendOp;
554 colorComponent // VkColorComponentFlags colorWriteMask;
555 };
556 const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo = {
557 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
558 DE_NULL, // const void* pNext;
559 0u, // VkPipelineColorBlendStateCreateFlags flags;
560 VK_FALSE, // VkBool32 logicOpEnable;
561 VK_LOGIC_OP_CLEAR, // VkLogicOp logicOp;
562 1, // uint32_t attachmentCount;
563 &colorBlendAttachmentState, // const VkPipelineColorBlendAttachmentState* pAttachments;
564 {0.0f, 0.0f, 0.0f, 0.0f} // float blendConstants[4];
565 };
566 const uint32_t patchControlPoints = (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
567
568 return makeGraphicsPipeline(
569 context.getDeviceInterface(), // const DeviceInterface& vk
570 context.getDevice(), // const VkDevice device
571 pipelineLayout, // const VkPipelineLayout pipelineLayout
572 vertexShaderModule, // const VkShaderModule vertexShaderModule
573 tessellationControlModule, // const VkShaderModule tessellationControlShaderModule
574 tessellationEvaluationModule, // const VkShaderModule tessellationEvalShaderModule
575 geometryShaderModule, // const VkShaderModule geometryShaderModule
576 fragmentShaderModule, // const VkShaderModule fragmentShaderModule
577 renderPass, // const VkRenderPass renderPass
578 noViewports, // const std::vector<VkViewport>& viewports
579 noScissors, // const std::vector<VkRect2D>& scissors
580 topology, // const VkPrimitiveTopology topology
581 0u, // const uint32_t subpass
582 patchControlPoints, // const uint32_t patchControlPoints
583 &vertexInputStateCreateInfo, // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
584 DE_NULL, // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
585 DE_NULL, // const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo
586 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo
587 &colorBlendStateCreateInfo, // const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo
588 DE_NULL, // const VkPipelineDynamicStateCreateInfo*
589 vertexShaderStageCreateFlags, // const uint32_t vertexShaderStageCreateFlags,
590 tessellationControlShaderStageCreateFlags, // const uint32_t tessellationControlShaderStageCreateFlags
591 tessellationEvalShaderStageCreateFlags, // const uint32_t tessellationEvalShaderStageCreateFlags
592 geometryShaderStageCreateFlags, // const uint32_t geometryShaderStageCreateFlags
593 fragmentShaderStageCreateFlags, // const uint32_t fragmentShaderStageCreateFlags
594 requiredSubgroupSize); // const uint32_t requiredSubgroupSize[5]
595 }
596
makeCommandBuffer(Context & context,const VkCommandPool commandPool)597 Move<VkCommandBuffer> makeCommandBuffer(Context &context, const VkCommandPool commandPool)
598 {
599 const VkCommandBufferAllocateInfo bufferAllocateParams = {
600 VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType sType;
601 DE_NULL, // const void* pNext;
602 commandPool, // VkCommandPool commandPool;
603 VK_COMMAND_BUFFER_LEVEL_PRIMARY, // VkCommandBufferLevel level;
604 1u, // uint32_t bufferCount;
605 };
606 return allocateCommandBuffer(context.getDeviceInterface(), context.getDevice(), &bufferAllocateParams);
607 }
608
609 struct Buffer;
610 struct Image;
611
612 struct BufferOrImage
613 {
isImage__anon39bd43f10111::BufferOrImage614 bool isImage() const
615 {
616 return m_isImage;
617 }
618
getAsBuffer__anon39bd43f10111::BufferOrImage619 Buffer *getAsBuffer()
620 {
621 if (m_isImage)
622 DE_FATAL("Trying to get a buffer as an image!");
623 return reinterpret_cast<Buffer *>(this);
624 }
625
getAsImage__anon39bd43f10111::BufferOrImage626 Image *getAsImage()
627 {
628 if (!m_isImage)
629 DE_FATAL("Trying to get an image as a buffer!");
630 return reinterpret_cast<Image *>(this);
631 }
632
getType__anon39bd43f10111::BufferOrImage633 virtual VkDescriptorType getType() const
634 {
635 if (m_isImage)
636 {
637 return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
638 }
639 else
640 {
641 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
642 }
643 }
644
getAllocation__anon39bd43f10111::BufferOrImage645 Allocation &getAllocation() const
646 {
647 return *m_allocation;
648 }
649
~BufferOrImage__anon39bd43f10111::BufferOrImage650 virtual ~BufferOrImage()
651 {
652 }
653
654 protected:
BufferOrImage__anon39bd43f10111::BufferOrImage655 explicit BufferOrImage(bool image) : m_isImage(image)
656 {
657 }
658
659 bool m_isImage;
660 de::details::MovePtr<Allocation> m_allocation;
661 };
662
663 struct Buffer : public BufferOrImage
664 {
Buffer__anon39bd43f10111::Buffer665 explicit Buffer(Context &context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage)
666 : BufferOrImage(false)
667 , m_sizeInBytes(sizeInBytes)
668 , m_usage(usage)
669 {
670 const DeviceInterface &vkd = context.getDeviceInterface();
671 const VkDevice device = context.getDevice();
672
673 const vk::VkBufferCreateInfo bufferCreateInfo = {
674 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
675 DE_NULL,
676 0u,
677 m_sizeInBytes,
678 m_usage,
679 VK_SHARING_MODE_EXCLUSIVE,
680 0u,
681 DE_NULL,
682 };
683 m_buffer = createBuffer(vkd, device, &bufferCreateInfo);
684
685 VkMemoryRequirements req = getBufferMemoryRequirements(vkd, device, *m_buffer);
686
687 m_allocation = context.getDefaultAllocator().allocate(req, MemoryRequirement::HostVisible);
688 VK_CHECK(vkd.bindBufferMemory(device, *m_buffer, m_allocation->getMemory(), m_allocation->getOffset()));
689 }
690
getType__anon39bd43f10111::Buffer691 virtual VkDescriptorType getType() const
692 {
693 if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
694 {
695 return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
696 }
697 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
698 }
699
getBuffer__anon39bd43f10111::Buffer700 VkBuffer getBuffer() const
701 {
702 return *m_buffer;
703 }
704
getBufferPtr__anon39bd43f10111::Buffer705 const VkBuffer *getBufferPtr() const
706 {
707 return &(*m_buffer);
708 }
709
getSize__anon39bd43f10111::Buffer710 VkDeviceSize getSize() const
711 {
712 return m_sizeInBytes;
713 }
714
715 private:
716 Move<VkBuffer> m_buffer;
717 VkDeviceSize m_sizeInBytes;
718 const VkBufferUsageFlags m_usage;
719 };
720
721 struct Image : public BufferOrImage
722 {
Image__anon39bd43f10111::Image723 explicit Image(Context &context, uint32_t width, uint32_t height, VkFormat format,
724 VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
725 : BufferOrImage(true)
726 {
727 const DeviceInterface &vk = context.getDeviceInterface();
728 const VkDevice device = context.getDevice();
729 const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
730
731 const VkImageCreateInfo imageCreateInfo = {
732 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
733 DE_NULL, // const void* pNext;
734 0, // VkImageCreateFlags flags;
735 VK_IMAGE_TYPE_2D, // VkImageType imageType;
736 format, // VkFormat format;
737 {width, height, 1}, // VkExtent3D extent;
738 1, // uint32_t mipLevels;
739 1, // uint32_t arrayLayers;
740 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
741 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
742 usage, // VkImageUsageFlags usage;
743 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
744 0u, // uint32_t queueFamilyIndexCount;
745 DE_NULL, // const uint32_t* pQueueFamilyIndices;
746 VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout;
747 };
748
749 const VkComponentMapping componentMapping = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
750 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
751
752 const VkImageSubresourceRange subresourceRange = {
753 VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags aspectMask
754 0u, //uint32_t baseMipLevel
755 1u, //uint32_t levelCount
756 0u, //uint32_t baseArrayLayer
757 1u //uint32_t layerCount
758 };
759
760 const VkSamplerCreateInfo samplerCreateInfo = {
761 VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, // VkStructureType sType;
762 DE_NULL, // const void* pNext;
763 0u, // VkSamplerCreateFlags flags;
764 VK_FILTER_NEAREST, // VkFilter magFilter;
765 VK_FILTER_NEAREST, // VkFilter minFilter;
766 VK_SAMPLER_MIPMAP_MODE_NEAREST, // VkSamplerMipmapMode mipmapMode;
767 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeU;
768 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeV;
769 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeW;
770 0.0f, // float mipLodBias;
771 VK_FALSE, // VkBool32 anisotropyEnable;
772 1.0f, // float maxAnisotropy;
773 false, // VkBool32 compareEnable;
774 VK_COMPARE_OP_ALWAYS, // VkCompareOp compareOp;
775 0.0f, // float minLod;
776 0.0f, // float maxLod;
777 VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, // VkBorderColor borderColor;
778 VK_FALSE, // VkBool32 unnormalizedCoordinates;
779 };
780
781 m_image = createImage(vk, device, &imageCreateInfo);
782
783 VkMemoryRequirements req = getImageMemoryRequirements(vk, device, *m_image);
784
785 req.size *= 2;
786 m_allocation = context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
787
788 VK_CHECK(vk.bindImageMemory(device, *m_image, m_allocation->getMemory(), m_allocation->getOffset()));
789
790 const VkImageViewCreateInfo imageViewCreateInfo = {
791 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
792 DE_NULL, // const void* pNext;
793 0, // VkImageViewCreateFlags flags;
794 *m_image, // VkImage image;
795 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
796 imageCreateInfo.format, // VkFormat format;
797 componentMapping, // VkComponentMapping components;
798 subresourceRange // VkImageSubresourceRange subresourceRange;
799 };
800
801 m_imageView = createImageView(vk, device, &imageViewCreateInfo);
802 m_sampler = createSampler(vk, device, &samplerCreateInfo);
803
804 // Transition input image layouts
805 {
806 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
807 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
808
809 beginCommandBuffer(vk, *cmdBuffer);
810
811 const VkImageMemoryBarrier imageBarrier =
812 makeImageMemoryBarrier((VkAccessFlags)0u, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
813 VK_IMAGE_LAYOUT_GENERAL, *m_image, subresourceRange);
814
815 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
816 (VkDependencyFlags)0, 0u, (const VkMemoryBarrier *)DE_NULL, 0u,
817 (const VkBufferMemoryBarrier *)DE_NULL, 1u, &imageBarrier);
818
819 endCommandBuffer(vk, *cmdBuffer);
820 submitCommandsAndWait(vk, device, context.getUniversalQueue(), *cmdBuffer);
821 }
822 }
823
getImage__anon39bd43f10111::Image824 VkImage getImage() const
825 {
826 return *m_image;
827 }
828
getImageView__anon39bd43f10111::Image829 VkImageView getImageView() const
830 {
831 return *m_imageView;
832 }
833
getSampler__anon39bd43f10111::Image834 VkSampler getSampler() const
835 {
836 return *m_sampler;
837 }
838
839 private:
840 Move<VkImage> m_image;
841 Move<VkImageView> m_imageView;
842 Move<VkSampler> m_sampler;
843 };
844 } // namespace
845
getStagesCount(const VkShaderStageFlags shaderStages)846 uint32_t vkt::subgroups::getStagesCount(const VkShaderStageFlags shaderStages)
847 {
848 const uint32_t stageCount = isAllGraphicsStages(shaderStages) ? 4 :
849 isAllComputeStages(shaderStages) ? 1
850 #ifndef CTS_USES_VULKANSC
851 :
852 isAllRayTracingStages(shaderStages) ? 6 :
853 isAllMeshShadingStages(shaderStages) ? 1
854 #endif // CTS_USES_VULKANSC
855 :
856 0;
857
858 DE_ASSERT(stageCount != 0);
859
860 return stageCount;
861 }
862
getSharedMemoryBallotHelper()863 std::string vkt::subgroups::getSharedMemoryBallotHelper()
864 {
865 return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * "
866 "gl_WorkGroupSize.z];\n"
867 "uvec4 sharedMemoryBallot(bool vote)\n"
868 "{\n"
869 " uint groupOffset = gl_SubgroupID;\n"
870 " // One invocation in the group 0's the whole group's data\n"
871 " if (subgroupElect())\n"
872 " {\n"
873 " superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
874 " }\n"
875 " subgroupMemoryBarrierShared();\n"
876 " if (vote)\n"
877 " {\n"
878 " const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
879 " const highp uint bitToSet = 1u << invocationId;\n"
880 " switch (gl_SubgroupInvocationID / 32)\n"
881 " {\n"
882 " case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
883 " case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
884 " case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
885 " case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
886 " }\n"
887 " }\n"
888 " subgroupMemoryBarrierShared();\n"
889 " return superSecretComputeShaderHelper[groupOffset];\n"
890 "}\n";
891 }
892
getSharedMemoryBallotHelperARB()893 std::string vkt::subgroups::getSharedMemoryBallotHelperARB()
894 {
895 return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * "
896 "gl_WorkGroupSize.z];\n"
897 "uint64_t sharedMemoryBallot(bool vote)\n"
898 "{\n"
899 " uint groupOffset = gl_SubgroupID;\n"
900 " // One invocation in the group 0's the whole group's data\n"
901 " if (subgroupElect())\n"
902 " {\n"
903 " superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
904 " }\n"
905 " subgroupMemoryBarrierShared();\n"
906 " if (vote)\n"
907 " {\n"
908 " const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
909 " const highp uint bitToSet = 1u << invocationId;\n"
910 " switch (gl_SubgroupInvocationID / 32)\n"
911 " {\n"
912 " case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
913 " case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
914 " case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
915 " case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
916 " }\n"
917 " }\n"
918 " subgroupMemoryBarrierShared();\n"
919 " return packUint2x32(superSecretComputeShaderHelper[groupOffset].xy);\n"
920 "}\n";
921 }
922
getSubgroupSize(Context & context)923 uint32_t vkt::subgroups::getSubgroupSize(Context &context)
924 {
925 return context.getSubgroupProperties().subgroupSize;
926 }
927
maxSupportedSubgroupSize()928 uint32_t vkt::subgroups::maxSupportedSubgroupSize()
929 {
930 return 128u;
931 }
932
getShaderStageName(VkShaderStageFlags stage)933 std::string vkt::subgroups::getShaderStageName(VkShaderStageFlags stage)
934 {
935 switch (stage)
936 {
937 case VK_SHADER_STAGE_COMPUTE_BIT:
938 return "compute";
939 case VK_SHADER_STAGE_FRAGMENT_BIT:
940 return "fragment";
941 case VK_SHADER_STAGE_VERTEX_BIT:
942 return "vertex";
943 case VK_SHADER_STAGE_GEOMETRY_BIT:
944 return "geometry";
945 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
946 return "tess_control";
947 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
948 return "tess_eval";
949 #ifndef CTS_USES_VULKANSC
950 case VK_SHADER_STAGE_RAYGEN_BIT_KHR:
951 return "rgen";
952 case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:
953 return "ahit";
954 case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:
955 return "chit";
956 case VK_SHADER_STAGE_MISS_BIT_KHR:
957 return "miss";
958 case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:
959 return "sect";
960 case VK_SHADER_STAGE_CALLABLE_BIT_KHR:
961 return "call";
962 case VK_SHADER_STAGE_MESH_BIT_EXT:
963 return "mesh";
964 case VK_SHADER_STAGE_TASK_BIT_EXT:
965 return "task";
966 #endif // CTS_USES_VULKANSC
967 default:
968 TCU_THROW(InternalError, "Unhandled stage");
969 }
970 }
971
getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)972 std::string vkt::subgroups::getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)
973 {
974 switch (bit)
975 {
976 case VK_SUBGROUP_FEATURE_BASIC_BIT:
977 return "VK_SUBGROUP_FEATURE_BASIC_BIT";
978 case VK_SUBGROUP_FEATURE_VOTE_BIT:
979 return "VK_SUBGROUP_FEATURE_VOTE_BIT";
980 case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT:
981 return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
982 case VK_SUBGROUP_FEATURE_BALLOT_BIT:
983 return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
984 case VK_SUBGROUP_FEATURE_SHUFFLE_BIT:
985 return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
986 case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT:
987 return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
988 case VK_SUBGROUP_FEATURE_CLUSTERED_BIT:
989 return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
990 case VK_SUBGROUP_FEATURE_QUAD_BIT:
991 return "VK_SUBGROUP_FEATURE_QUAD_BIT";
992 default:
993 TCU_THROW(InternalError, "Unknown subgroup feature category");
994 }
995 }
996
addNoSubgroupShader(SourceCollections & programCollection)997 void vkt::subgroups::addNoSubgroupShader(SourceCollections &programCollection)
998 {
999 {
1000 /*
1001 "#version 450\n"
1002 "void main (void)\n"
1003 "{\n"
1004 " float pixelSize = 2.0f/1024.0f;\n"
1005 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1006 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1007 " gl_PointSize = 1.0f;\n"
1008 "}\n"
1009 */
1010 const std::string vertNoSubgroup = "; SPIR-V\n"
1011 "; Version: 1.3\n"
1012 "; Generator: Khronos Glslang Reference Front End; 1\n"
1013 "; Bound: 37\n"
1014 "; Schema: 0\n"
1015 "OpCapability Shader\n"
1016 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1017 "OpMemoryModel Logical GLSL450\n"
1018 "OpEntryPoint Vertex %4 \"main\" %22 %26\n"
1019 "OpMemberDecorate %20 0 BuiltIn Position\n"
1020 "OpMemberDecorate %20 1 BuiltIn PointSize\n"
1021 "OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
1022 "OpMemberDecorate %20 3 BuiltIn CullDistance\n"
1023 "OpDecorate %20 Block\n"
1024 "OpDecorate %26 BuiltIn VertexIndex\n"
1025 "%2 = OpTypeVoid\n"
1026 "%3 = OpTypeFunction %2\n"
1027 "%6 = OpTypeFloat 32\n"
1028 "%7 = OpTypePointer Function %6\n"
1029 "%9 = OpConstant %6 0.00195313\n"
1030 "%12 = OpConstant %6 2\n"
1031 "%14 = OpConstant %6 1\n"
1032 "%16 = OpTypeVector %6 4\n"
1033 "%17 = OpTypeInt 32 0\n"
1034 "%18 = OpConstant %17 1\n"
1035 "%19 = OpTypeArray %6 %18\n"
1036 "%20 = OpTypeStruct %16 %6 %19 %19\n"
1037 "%21 = OpTypePointer Output %20\n"
1038 "%22 = OpVariable %21 Output\n"
1039 "%23 = OpTypeInt 32 1\n"
1040 "%24 = OpConstant %23 0\n"
1041 "%25 = OpTypePointer Input %23\n"
1042 "%26 = OpVariable %25 Input\n"
1043 "%33 = OpConstant %6 0\n"
1044 "%35 = OpTypePointer Output %16\n"
1045 "%37 = OpConstant %23 1\n"
1046 "%38 = OpTypePointer Output %6\n"
1047 "%4 = OpFunction %2 None %3\n"
1048 "%5 = OpLabel\n"
1049 "%8 = OpVariable %7 Function\n"
1050 "%10 = OpVariable %7 Function\n"
1051 "OpStore %8 %9\n"
1052 "%11 = OpLoad %6 %8\n"
1053 "%13 = OpFDiv %6 %11 %12\n"
1054 "%15 = OpFSub %6 %13 %14\n"
1055 "OpStore %10 %15\n"
1056 "%27 = OpLoad %23 %26\n"
1057 "%28 = OpConvertSToF %6 %27\n"
1058 "%29 = OpLoad %6 %8\n"
1059 "%30 = OpFMul %6 %28 %29\n"
1060 "%31 = OpLoad %6 %10\n"
1061 "%32 = OpFAdd %6 %30 %31\n"
1062 "%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
1063 "%36 = OpAccessChain %35 %22 %24\n"
1064 "OpStore %36 %34\n"
1065 "%39 = OpAccessChain %38 %22 %37\n"
1066 "OpStore %39 %14\n"
1067 "OpReturn\n"
1068 "OpFunctionEnd\n";
1069 programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
1070 }
1071
1072 {
1073 /*
1074 "#version 450\n"
1075 "layout(vertices=1) out;\n"
1076 "\n"
1077 "void main (void)\n"
1078 "{\n"
1079 " if (gl_InvocationID == 0)\n"
1080 " {\n"
1081 " gl_TessLevelOuter[0] = 1.0f;\n"
1082 " gl_TessLevelOuter[1] = 1.0f;\n"
1083 " }\n"
1084 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1085 "}\n"
1086 */
1087 const std::string tescNoSubgroup = "; SPIR-V\n"
1088 "; Version: 1.3\n"
1089 "; Generator: Khronos Glslang Reference Front End; 1\n"
1090 "; Bound: 45\n"
1091 "; Schema: 0\n"
1092 "OpCapability Tessellation\n"
1093 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1094 "OpMemoryModel Logical GLSL450\n"
1095 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
1096 "OpExecutionMode %4 OutputVertices 1\n"
1097 "OpDecorate %8 BuiltIn InvocationId\n"
1098 "OpDecorate %20 Patch\n"
1099 "OpDecorate %20 BuiltIn TessLevelOuter\n"
1100 "OpMemberDecorate %29 0 BuiltIn Position\n"
1101 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
1102 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
1103 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
1104 "OpDecorate %29 Block\n"
1105 "OpMemberDecorate %34 0 BuiltIn Position\n"
1106 "OpMemberDecorate %34 1 BuiltIn PointSize\n"
1107 "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
1108 "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
1109 "OpDecorate %34 Block\n"
1110 "%2 = OpTypeVoid\n"
1111 "%3 = OpTypeFunction %2\n"
1112 "%6 = OpTypeInt 32 1\n"
1113 "%7 = OpTypePointer Input %6\n"
1114 "%8 = OpVariable %7 Input\n"
1115 "%10 = OpConstant %6 0\n"
1116 "%11 = OpTypeBool\n"
1117 "%15 = OpTypeFloat 32\n"
1118 "%16 = OpTypeInt 32 0\n"
1119 "%17 = OpConstant %16 4\n"
1120 "%18 = OpTypeArray %15 %17\n"
1121 "%19 = OpTypePointer Output %18\n"
1122 "%20 = OpVariable %19 Output\n"
1123 "%21 = OpConstant %15 1\n"
1124 "%22 = OpTypePointer Output %15\n"
1125 "%24 = OpConstant %6 1\n"
1126 "%26 = OpTypeVector %15 4\n"
1127 "%27 = OpConstant %16 1\n"
1128 "%28 = OpTypeArray %15 %27\n"
1129 "%29 = OpTypeStruct %26 %15 %28 %28\n"
1130 "%30 = OpTypeArray %29 %27\n"
1131 "%31 = OpTypePointer Output %30\n"
1132 "%32 = OpVariable %31 Output\n"
1133 "%34 = OpTypeStruct %26 %15 %28 %28\n"
1134 "%35 = OpConstant %16 32\n"
1135 "%36 = OpTypeArray %34 %35\n"
1136 "%37 = OpTypePointer Input %36\n"
1137 "%38 = OpVariable %37 Input\n"
1138 "%40 = OpTypePointer Input %26\n"
1139 "%43 = OpTypePointer Output %26\n"
1140 "%4 = OpFunction %2 None %3\n"
1141 "%5 = OpLabel\n"
1142 "%9 = OpLoad %6 %8\n"
1143 "%12 = OpIEqual %11 %9 %10\n"
1144 "OpSelectionMerge %14 None\n"
1145 "OpBranchConditional %12 %13 %14\n"
1146 "%13 = OpLabel\n"
1147 "%23 = OpAccessChain %22 %20 %10\n"
1148 "OpStore %23 %21\n"
1149 "%25 = OpAccessChain %22 %20 %24\n"
1150 "OpStore %25 %21\n"
1151 "OpBranch %14\n"
1152 "%14 = OpLabel\n"
1153 "%33 = OpLoad %6 %8\n"
1154 "%39 = OpLoad %6 %8\n"
1155 "%41 = OpAccessChain %40 %38 %39 %10\n"
1156 "%42 = OpLoad %26 %41\n"
1157 "%44 = OpAccessChain %43 %32 %33 %10\n"
1158 "OpStore %44 %42\n"
1159 "OpReturn\n"
1160 "OpFunctionEnd\n";
1161 programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
1162 }
1163
1164 {
1165 /*
1166 "#version 450\n"
1167 "layout(isolines) in;\n"
1168 "\n"
1169 "void main (void)\n"
1170 "{\n"
1171 " float pixelSize = 2.0f/1024.0f;\n"
1172 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1173 "}\n";
1174 */
1175 const std::string teseNoSubgroup = "; SPIR-V\n"
1176 "; Version: 1.3\n"
1177 "; Generator: Khronos Glslang Reference Front End; 2\n"
1178 "; Bound: 42\n"
1179 "; Schema: 0\n"
1180 "OpCapability Tessellation\n"
1181 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1182 "OpMemoryModel Logical GLSL450\n"
1183 "OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
1184 "OpExecutionMode %4 Isolines\n"
1185 "OpExecutionMode %4 SpacingEqual\n"
1186 "OpExecutionMode %4 VertexOrderCcw\n"
1187 "OpMemberDecorate %14 0 BuiltIn Position\n"
1188 "OpMemberDecorate %14 1 BuiltIn PointSize\n"
1189 "OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
1190 "OpMemberDecorate %14 3 BuiltIn CullDistance\n"
1191 "OpDecorate %14 Block\n"
1192 "OpMemberDecorate %19 0 BuiltIn Position\n"
1193 "OpMemberDecorate %19 1 BuiltIn PointSize\n"
1194 "OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
1195 "OpMemberDecorate %19 3 BuiltIn CullDistance\n"
1196 "OpDecorate %19 Block\n"
1197 "OpDecorate %29 BuiltIn TessCoord\n"
1198 "%2 = OpTypeVoid\n"
1199 "%3 = OpTypeFunction %2\n"
1200 "%6 = OpTypeFloat 32\n"
1201 "%7 = OpTypePointer Function %6\n"
1202 "%9 = OpConstant %6 0.00195313\n"
1203 "%10 = OpTypeVector %6 4\n"
1204 "%11 = OpTypeInt 32 0\n"
1205 "%12 = OpConstant %11 1\n"
1206 "%13 = OpTypeArray %6 %12\n"
1207 "%14 = OpTypeStruct %10 %6 %13 %13\n"
1208 "%15 = OpTypePointer Output %14\n"
1209 "%16 = OpVariable %15 Output\n"
1210 "%17 = OpTypeInt 32 1\n"
1211 "%18 = OpConstant %17 0\n"
1212 "%19 = OpTypeStruct %10 %6 %13 %13\n"
1213 "%20 = OpConstant %11 32\n"
1214 "%21 = OpTypeArray %19 %20\n"
1215 "%22 = OpTypePointer Input %21\n"
1216 "%23 = OpVariable %22 Input\n"
1217 "%24 = OpTypePointer Input %10\n"
1218 "%27 = OpTypeVector %6 3\n"
1219 "%28 = OpTypePointer Input %27\n"
1220 "%29 = OpVariable %28 Input\n"
1221 "%30 = OpConstant %11 0\n"
1222 "%31 = OpTypePointer Input %6\n"
1223 "%36 = OpConstant %6 2\n"
1224 "%40 = OpTypePointer Output %10\n"
1225 "%4 = OpFunction %2 None %3\n"
1226 "%5 = OpLabel\n"
1227 "%8 = OpVariable %7 Function\n"
1228 "OpStore %8 %9\n"
1229 "%25 = OpAccessChain %24 %23 %18 %18\n"
1230 "%26 = OpLoad %10 %25\n"
1231 "%32 = OpAccessChain %31 %29 %30\n"
1232 "%33 = OpLoad %6 %32\n"
1233 "%34 = OpLoad %6 %8\n"
1234 "%35 = OpFMul %6 %33 %34\n"
1235 "%37 = OpFDiv %6 %35 %36\n"
1236 "%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
1237 "%39 = OpFAdd %10 %26 %38\n"
1238 "%41 = OpAccessChain %40 %16 %18\n"
1239 "OpStore %41 %39\n"
1240 "OpReturn\n"
1241 "OpFunctionEnd\n";
1242 programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
1243 }
1244 }
1245
getFramebufferBufferDeclarations(const VkFormat & format,const std::vector<std::string> & declarations,const uint32_t stage)1246 static std::string getFramebufferBufferDeclarations(const VkFormat &format,
1247 const std::vector<std::string> &declarations, const uint32_t stage)
1248 {
1249 if (declarations.empty())
1250 {
1251 const std::string name = (stage == 0) ? "result" : "out_color";
1252 const std::string suffix = (stage == 2) ? "[]" : "";
1253 const std::string result = "layout(location = 0) out float " + name + suffix +
1254 ";\n"
1255 "layout(set = 0, binding = 0) uniform Buffer1\n"
1256 "{\n"
1257 " " +
1258 de::toString(subgroups::getFormatNameForGLSL(format)) + " data[" +
1259 de::toString(subgroups::maxSupportedSubgroupSize()) +
1260 "];\n"
1261 "};\n";
1262
1263 return result;
1264 }
1265 else
1266 {
1267 return declarations[stage];
1268 }
1269 }
1270
initStdFrameBufferPrograms(SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,VkShaderStageFlags shaderStage,VkFormat format,bool gsPointSize,const std::string & extHeader,const std::string & testSrc,const std::string & helperStr,const std::vector<std::string> & declarations)1271 void vkt::subgroups::initStdFrameBufferPrograms(SourceCollections &programCollection,
1272 const vk::ShaderBuildOptions &buildOptions,
1273 VkShaderStageFlags shaderStage, VkFormat format, bool gsPointSize,
1274 const std::string &extHeader, const std::string &testSrc,
1275 const std::string &helperStr,
1276 const std::vector<std::string> &declarations)
1277 {
1278 subgroups::setFragmentShaderFrameBuffer(programCollection);
1279
1280 if (shaderStage != VK_SHADER_STAGE_VERTEX_BIT)
1281 subgroups::setVertexShaderFrameBuffer(programCollection);
1282
1283 if (shaderStage == VK_SHADER_STAGE_VERTEX_BIT)
1284 {
1285 std::ostringstream vertex;
1286
1287 vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1288 << extHeader << "layout(location = 0) in highp vec4 in_position;\n"
1289 << getFramebufferBufferDeclarations(format, declarations, 0) << "\n"
1290 << helperStr << "void main (void)\n"
1291 << "{\n"
1292 << " uint tempRes;\n"
1293 << testSrc << " result = float(tempRes);\n"
1294 << " gl_Position = in_position;\n"
1295 << " gl_PointSize = 1.0f;\n"
1296 << "}\n";
1297
1298 programCollection.glslSources.add("vert") << glu::VertexSource(vertex.str()) << buildOptions;
1299 }
1300 else if (shaderStage == VK_SHADER_STAGE_GEOMETRY_BIT)
1301 {
1302 std::ostringstream geometry;
1303
1304 geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1305 << extHeader << "layout(points) in;\n"
1306 << "layout(points, max_vertices = 1) out;\n"
1307 << getFramebufferBufferDeclarations(format, declarations, 1) << "\n"
1308 << helperStr << "void main (void)\n"
1309 << "{\n"
1310 << " uint tempRes;\n"
1311 << testSrc << " out_color = float(tempRes);\n"
1312 << " gl_Position = gl_in[0].gl_Position;\n"
1313 << (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "") << " EmitVertex();\n"
1314 << " EndPrimitive();\n"
1315 << "}\n";
1316
1317 programCollection.glslSources.add("geometry") << glu::GeometrySource(geometry.str()) << buildOptions;
1318 }
1319 else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1320 {
1321 std::ostringstream controlSource;
1322
1323 controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1324 << extHeader << "layout(vertices = 2) out;\n"
1325 << getFramebufferBufferDeclarations(format, declarations, 2) << "\n"
1326 << helperStr << "void main (void)\n"
1327 << "{\n"
1328 << " if (gl_InvocationID == 0)\n"
1329 << " {\n"
1330 << " gl_TessLevelOuter[0] = 1.0f;\n"
1331 << " gl_TessLevelOuter[1] = 1.0f;\n"
1332 << " }\n"
1333 << " uint tempRes;\n"
1334 << testSrc << " out_color[gl_InvocationID] = float(tempRes);\n"
1335 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1336 << (gsPointSize ?
1337 " gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" :
1338 "")
1339 << "}\n";
1340
1341 programCollection.glslSources.add("tesc")
1342 << glu::TessellationControlSource(controlSource.str()) << buildOptions;
1343 subgroups::setTesEvalShaderFrameBuffer(programCollection);
1344 }
1345 else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1346 {
1347 ostringstream evaluationSource;
1348
1349 evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1350 << extHeader << "layout(isolines, equal_spacing, ccw ) in;\n"
1351 << getFramebufferBufferDeclarations(format, declarations, 3) << "\n"
1352 << helperStr << "void main (void)\n"
1353 << "{\n"
1354 << " uint tempRes;\n"
1355 << testSrc << " out_color = float(tempRes);\n"
1356 << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1357 << (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "") << "}\n";
1358
1359 subgroups::setTesCtrlShaderFrameBuffer(programCollection);
1360 programCollection.glslSources.add("tese")
1361 << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
1362 }
1363 else
1364 {
1365 DE_FATAL("Unsupported shader stage");
1366 }
1367 }
1368
getBufferDeclarations(vk::VkShaderStageFlags shaderStage,const std::string & formatName,const std::vector<std::string> & declarations,const uint32_t stage)1369 static std::string getBufferDeclarations(vk::VkShaderStageFlags shaderStage, const std::string &formatName,
1370 const std::vector<std::string> &declarations, const uint32_t stage)
1371 {
1372 if (declarations.empty())
1373 {
1374 const uint32_t stageCount = vkt::subgroups::getStagesCount(shaderStage);
1375 const uint32_t binding0 = stage;
1376 const uint32_t binding1 = stageCount;
1377 const bool fragment = (shaderStage & VK_SHADER_STAGE_FRAGMENT_BIT) && (stage == stageCount);
1378 const string buffer1 = fragment ? "layout(location = 0) out uint result;\n" :
1379 "layout(set = 0, binding = " + de::toString(binding0) +
1380 ", std430) buffer Buffer1\n"
1381 "{\n"
1382 " uint result[];\n"
1383 "};\n";
1384 //todo boza I suppose it can be "layout(set = 0, binding = " + de::toString(binding1) + ", std430) readonly buffer Buffer2\n"
1385 const string buffer2 = "layout(set = 0, binding = " + de::toString(binding1) + ", std430)" +
1386 (stageCount == 1 ? "" : " readonly") + " buffer Buffer" + (fragment ? "1" : "2") +
1387 "\n"
1388 "{\n"
1389 " " +
1390 formatName +
1391 " data[];\n"
1392 "};\n";
1393
1394 return buffer1 + buffer2;
1395 }
1396 else
1397 {
1398 return declarations[stage];
1399 }
1400 }
1401
initStdPrograms(vk::SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,vk::VkShaderStageFlags shaderStage,vk::VkFormat format,bool gsPointSize,const std::string & extHeader,const std::string & testSrc,const std::string & helperStr,const std::vector<std::string> & declarations,const bool avoidHelperInvocations,const std::string & tempRes)1402 void vkt::subgroups::initStdPrograms(vk::SourceCollections &programCollection,
1403 const vk::ShaderBuildOptions &buildOptions, vk::VkShaderStageFlags shaderStage,
1404 vk::VkFormat format, bool gsPointSize, const std::string &extHeader,
1405 const std::string &testSrc, const std::string &helperStr,
1406 const std::vector<std::string> &declarations, const bool avoidHelperInvocations,
1407 const std::string &tempRes)
1408 {
1409 const std::string formatName = subgroups::getFormatNameForGLSL(format);
1410
1411 if (isAllComputeStages(shaderStage))
1412 {
1413 std::ostringstream src;
1414
1415 src << "#version 450\n"
1416 << extHeader
1417 << "layout (local_size_x_id = 0, local_size_y_id = 1, "
1418 "local_size_z_id = 2) in;\n"
1419 << getBufferDeclarations(shaderStage, formatName, declarations, 0) << "\n"
1420 << helperStr << "void main (void)\n"
1421 << "{\n"
1422 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1423 << " highp uint offset = globalSize.x * ((globalSize.y * "
1424 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1425 "gl_GlobalInvocationID.x;\n"
1426 << tempRes << testSrc << " result[offset] = tempRes;\n"
1427 << "}\n";
1428
1429 programCollection.glslSources.add("comp") << glu::ComputeSource(src.str()) << buildOptions;
1430 }
1431 #ifndef CTS_USES_VULKANSC
1432 else if (isAllMeshShadingStages(shaderStage))
1433 {
1434 const bool testMesh = ((shaderStage & VK_SHADER_STAGE_MESH_BIT_EXT) != 0u);
1435 const bool testTask = ((shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u);
1436
1437 if (testMesh)
1438 {
1439 std::ostringstream mesh;
1440
1441 mesh << "#version 450\n"
1442 << "#extension GL_EXT_mesh_shader : enable\n"
1443 << extHeader << "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n"
1444 << "layout (points) out;\n"
1445 << "layout (max_vertices = 1, max_primitives = 1) out;\n"
1446 << getBufferDeclarations(shaderStage, formatName, declarations, 0) << "\n"
1447 << helperStr << "void main (void)\n"
1448 << "{\n"
1449 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1450 << " highp uint offset = globalSize.x * ((globalSize.y * "
1451 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1452 "gl_GlobalInvocationID.x;\n"
1453 << tempRes << testSrc << " result[offset] = tempRes;\n"
1454 << " SetMeshOutputsEXT(0u, 0u);\n"
1455 << "}\n";
1456
1457 programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1458 }
1459 else
1460 {
1461 const std::string meshShaderNoSubgroups =
1462 "#version 450\n"
1463 "#extension GL_EXT_mesh_shader : enable\n"
1464 "\n"
1465 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
1466 "layout (points) out;\n"
1467 "layout (max_vertices = 1, max_primitives = 1) out;\n"
1468 "\n"
1469 "void main (void)\n"
1470 "{\n"
1471 " SetMeshOutputsEXT(0u, 0u);\n"
1472 "}\n";
1473 programCollection.glslSources.add("mesh") << glu::MeshSource(meshShaderNoSubgroups) << buildOptions;
1474 }
1475
1476 if (testTask)
1477 {
1478 const tcu::UVec3 emitSize = (testMesh ? tcu::UVec3(1u, 1u, 1u) : tcu::UVec3(0u, 0u, 0u));
1479 std::ostringstream task;
1480
1481 task << "#version 450\n"
1482 << "#extension GL_EXT_mesh_shader : enable\n"
1483 //<< "#extension GL_NV_mesh_shader : enable\n"
1484 << extHeader << "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n"
1485 << getBufferDeclarations(shaderStage, formatName, declarations, 0) << "\n"
1486 << helperStr << "void main (void)\n"
1487 << "{\n"
1488 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1489 //<< " uvec3 globalSize = uvec3(0, 0, 0)/*gl_NumWorkGroups*/ * gl_WorkGroupSize;\n"
1490 << " highp uint offset = globalSize.x * ((globalSize.y * "
1491 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1492 "gl_GlobalInvocationID.x;\n"
1493 << tempRes << testSrc << " result[offset] = tempRes;\n"
1494 << " EmitMeshTasksEXT(" << emitSize.x() << ", " << emitSize.y() << ", " << emitSize.z()
1495 << ");\n"
1496 //<< " gl_TaskCountNV = " << emitSize.x() << ";\n"
1497 << "}\n";
1498
1499 programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1500 }
1501 }
1502 #endif // CTS_USES_VULKANSC
1503 else if (isAllGraphicsStages(shaderStage))
1504 {
1505 const string vertex =
1506 "#version 450\n" + extHeader + getBufferDeclarations(shaderStage, formatName, declarations, 0) + "\n" +
1507 helperStr +
1508 "void main (void)\n"
1509 "{\n"
1510 " uint tempRes;\n" +
1511 testSrc +
1512 " result[gl_VertexIndex] = tempRes;\n"
1513 " float pixelSize = 2.0f/1024.0f;\n"
1514 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1515 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1516 " gl_PointSize = 1.0f;\n"
1517 "}\n";
1518
1519 const string tesc =
1520 "#version 450\n" + extHeader + "layout(vertices=1) out;\n" +
1521 getBufferDeclarations(shaderStage, formatName, declarations, 1) + "\n" + helperStr +
1522 "void main (void)\n"
1523 "{\n" +
1524 tempRes + testSrc +
1525 " result[gl_PrimitiveID] = tempRes;\n"
1526 " if (gl_InvocationID == 0)\n"
1527 " {\n"
1528 " gl_TessLevelOuter[0] = 1.0f;\n"
1529 " gl_TessLevelOuter[1] = 1.0f;\n"
1530 " }\n"
1531 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n" +
1532 (gsPointSize ? " gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "") +
1533 "}\n";
1534
1535 const string tese = "#version 450\n" + extHeader + "layout(isolines) in;\n" +
1536 getBufferDeclarations(shaderStage, formatName, declarations, 2) + "\n" + helperStr +
1537 "void main (void)\n"
1538 "{\n" +
1539 tempRes + testSrc +
1540 " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempRes;\n"
1541 " float pixelSize = 2.0f/1024.0f;\n"
1542 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n" +
1543 (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "") + "}\n";
1544
1545 const string geometry = "#version 450\n" + extHeader +
1546 "layout(${TOPOLOGY}) in;\n"
1547 "layout(points, max_vertices = 1) out;\n" +
1548 getBufferDeclarations(shaderStage, formatName, declarations, 3) + "\n" + helperStr +
1549 "void main (void)\n"
1550 "{\n" +
1551 tempRes + testSrc +
1552 " result[gl_PrimitiveIDIn] = tempRes;\n"
1553 " gl_Position = gl_in[0].gl_Position;\n" +
1554 (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "") +
1555 " EmitVertex();\n"
1556 " EndPrimitive();\n"
1557 "}\n";
1558
1559 const string fragment =
1560 "#version 450\n" + extHeader + getBufferDeclarations(shaderStage, formatName, declarations, 4) + helperStr +
1561 "void main (void)\n"
1562 "{\n" +
1563 (avoidHelperInvocations ? " if (gl_HelperInvocation) return;\n" : "") + tempRes + testSrc +
1564 " result = tempRes;\n"
1565 "}\n";
1566
1567 subgroups::addNoSubgroupShader(programCollection);
1568
1569 programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << buildOptions;
1570 programCollection.glslSources.add("tesc") << glu::TessellationControlSource(tesc) << buildOptions;
1571 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(tese) << buildOptions;
1572 subgroups::addGeometryShadersFromTemplate(geometry, buildOptions, programCollection.glslSources);
1573 programCollection.glslSources.add("fragment") << glu::FragmentSource(fragment) << buildOptions;
1574 }
1575 #ifndef CTS_USES_VULKANSC
1576 else if (isAllRayTracingStages(shaderStage))
1577 {
1578 const std::string rgenShader =
1579 "#version 460 core\n"
1580 "#extension GL_EXT_ray_tracing: require\n" +
1581 extHeader +
1582 "layout(location = 0) rayPayloadEXT uvec4 payload;\n"
1583 "layout(location = 0) callableDataEXT uvec4 callData;"
1584 "layout(set = 1, binding = 0) uniform accelerationStructureEXT topLevelAS;\n" +
1585 getBufferDeclarations(shaderStage, formatName, declarations, 0) + "\n" + helperStr +
1586 "void main()\n"
1587 "{\n" +
1588 tempRes + testSrc +
1589 " uint rayFlags = 0;\n"
1590 " uint cullMask = 0xFF;\n"
1591 " float tmin = 0.0;\n"
1592 " float tmax = 9.0;\n"
1593 " vec3 origin = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), "
1594 "(float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
1595 " vec3 directHit = vec3(0.0, 0.0, -1.0);\n"
1596 " vec3 directMiss = vec3(0.0, 0.0, +1.0);\n"
1597 "\n"
1598 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directHit, tmax, 0);\n"
1599 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directMiss, tmax, 0);\n"
1600 " executeCallableEXT(0, 0);"
1601 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1602 "}\n";
1603 const std::string ahitShader = "#version 460 core\n"
1604 "#extension GL_EXT_ray_tracing: require\n" +
1605 extHeader +
1606 "hitAttributeEXT vec3 attribs;\n"
1607 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n" +
1608 getBufferDeclarations(shaderStage, formatName, declarations, 1) + "\n" +
1609 helperStr +
1610 "void main()\n"
1611 "{\n" +
1612 tempRes + testSrc +
1613 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1614 "}\n";
1615 const std::string chitShader = "#version 460 core\n"
1616 "#extension GL_EXT_ray_tracing: require\n" +
1617 extHeader +
1618 "hitAttributeEXT vec3 attribs;\n"
1619 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n" +
1620 getBufferDeclarations(shaderStage, formatName, declarations, 2) + "\n" +
1621 helperStr +
1622 "void main()\n"
1623 "{\n" +
1624 tempRes + testSrc +
1625 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1626 "}\n";
1627 const std::string missShader = "#version 460 core\n"
1628 "#extension GL_EXT_ray_tracing: require\n" +
1629 extHeader + "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n" +
1630 getBufferDeclarations(shaderStage, formatName, declarations, 3) + "\n" +
1631 helperStr +
1632 "void main()\n"
1633 "{\n" +
1634 tempRes + testSrc +
1635 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1636 "}\n";
1637 const std::string sectShader = "#version 460 core\n"
1638 "#extension GL_EXT_ray_tracing: require\n" +
1639 extHeader + "hitAttributeEXT vec3 hitAttribute;\n" +
1640 getBufferDeclarations(shaderStage, formatName, declarations, 4) + "\n" +
1641 helperStr +
1642 "void main()\n"
1643 "{\n" +
1644 tempRes + testSrc +
1645 " reportIntersectionEXT(0.75f, 0x7Eu);\n"
1646 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1647 "}\n";
1648 const std::string callShader = "#version 460 core\n"
1649 "#extension GL_EXT_ray_tracing: require\n" +
1650 extHeader + "layout(location = 0) callableDataInEXT float callData;\n" +
1651 getBufferDeclarations(shaderStage, formatName, declarations, 5) + "\n" +
1652 helperStr +
1653 "void main()\n"
1654 "{\n" +
1655 tempRes + testSrc +
1656 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1657 "}\n";
1658
1659 programCollection.glslSources.add("rgen") << glu::RaygenSource(rgenShader) << buildOptions;
1660 programCollection.glslSources.add("ahit") << glu::AnyHitSource(ahitShader) << buildOptions;
1661 programCollection.glslSources.add("chit") << glu::ClosestHitSource(chitShader) << buildOptions;
1662 programCollection.glslSources.add("miss") << glu::MissSource(missShader) << buildOptions;
1663 programCollection.glslSources.add("sect") << glu::IntersectionSource(sectShader) << buildOptions;
1664 programCollection.glslSources.add("call") << glu::CallableSource(callShader) << buildOptions;
1665
1666 subgroups::addRayTracingNoSubgroupShader(programCollection);
1667 }
1668 #endif // CTS_USES_VULKANSC
1669 else
1670 TCU_THROW(InternalError, "Unknown stage or invalid stage set");
1671 }
1672
isSubgroupSupported(Context & context)1673 bool vkt::subgroups::isSubgroupSupported(Context &context)
1674 {
1675 return context.contextSupports(vk::ApiVersion(0, 1, 1, 0));
1676 }
1677
areSubgroupOperationsSupportedForStage(Context & context,const VkShaderStageFlags stage)1678 bool vkt::subgroups::areSubgroupOperationsSupportedForStage(Context &context, const VkShaderStageFlags stage)
1679 {
1680 return (stage & (context.getSubgroupProperties().supportedStages)) ? true : false;
1681 }
1682
isSubgroupFeatureSupportedForDevice(Context & context,VkSubgroupFeatureFlagBits bit)1683 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice(Context &context, VkSubgroupFeatureFlagBits bit)
1684 {
1685 return (bit & (context.getSubgroupProperties().supportedOperations)) ? true : false;
1686 }
1687
areQuadOperationsSupportedForStages(Context & context,const VkShaderStageFlags stages)1688 bool vkt::subgroups::areQuadOperationsSupportedForStages(Context &context, const VkShaderStageFlags stages)
1689 {
1690 // Check general quad feature support first.
1691 if (!isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_QUAD_BIT))
1692 return false;
1693
1694 if (context.getSubgroupProperties().quadOperationsInAllStages == VK_TRUE)
1695 return true; // No problem, any stage works.
1696
1697 // Only frag and compute are supported.
1698 const VkShaderStageFlags fragCompute = (VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
1699 const VkShaderStageFlags otherStages = ~fragCompute;
1700 return ((stages & otherStages) == 0u);
1701 }
1702
isFragmentSSBOSupportedForDevice(Context & context)1703 bool vkt::subgroups::isFragmentSSBOSupportedForDevice(Context &context)
1704 {
1705 return context.getDeviceFeatures().fragmentStoresAndAtomics ? true : false;
1706 }
1707
isVertexSSBOSupportedForDevice(Context & context)1708 bool vkt::subgroups::isVertexSSBOSupportedForDevice(Context &context)
1709 {
1710 return context.getDeviceFeatures().vertexPipelineStoresAndAtomics ? true : false;
1711 }
1712
isInt64SupportedForDevice(Context & context)1713 bool vkt::subgroups::isInt64SupportedForDevice(Context &context)
1714 {
1715 return context.getDeviceFeatures().shaderInt64 ? true : false;
1716 }
1717
isTessellationAndGeometryPointSizeSupported(Context & context)1718 bool vkt::subgroups::isTessellationAndGeometryPointSizeSupported(Context &context)
1719 {
1720 return context.getDeviceFeatures().shaderTessellationAndGeometryPointSize ? true : false;
1721 }
1722
is16BitUBOStorageSupported(Context & context)1723 bool vkt::subgroups::is16BitUBOStorageSupported(Context &context)
1724 {
1725 return context.get16BitStorageFeatures().uniformAndStorageBuffer16BitAccess ? true : false;
1726 }
1727
is8BitUBOStorageSupported(Context & context)1728 bool vkt::subgroups::is8BitUBOStorageSupported(Context &context)
1729 {
1730 return context.get8BitStorageFeatures().uniformAndStorageBuffer8BitAccess ? true : false;
1731 }
1732
isFormatSupportedForDevice(Context & context,vk::VkFormat format)1733 bool vkt::subgroups::isFormatSupportedForDevice(Context &context, vk::VkFormat format)
1734 {
1735 const VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures &subgroupExtendedTypesFeatures =
1736 context.getShaderSubgroupExtendedTypesFeatures();
1737 const VkPhysicalDeviceShaderFloat16Int8Features &float16Int8Features = context.getShaderFloat16Int8Features();
1738 const VkPhysicalDevice16BitStorageFeatures &storage16bit = context.get16BitStorageFeatures();
1739 const VkPhysicalDevice8BitStorageFeatures &storage8bit = context.get8BitStorageFeatures();
1740 const VkPhysicalDeviceFeatures &features = context.getDeviceFeatures();
1741 bool shaderFloat64 = features.shaderFloat64 ? true : false;
1742 bool shaderInt16 = features.shaderInt16 ? true : false;
1743 bool shaderInt64 = features.shaderInt64 ? true : false;
1744 bool shaderSubgroupExtendedTypes = false;
1745 bool shaderFloat16 = false;
1746 bool shaderInt8 = false;
1747 bool storageBuffer16BitAccess = false;
1748 bool storageBuffer8BitAccess = false;
1749
1750 if (context.isDeviceFunctionalitySupported("VK_KHR_shader_subgroup_extended_types") &&
1751 context.isDeviceFunctionalitySupported("VK_KHR_shader_float16_int8"))
1752 {
1753 shaderSubgroupExtendedTypes = subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes ? true : false;
1754 shaderFloat16 = float16Int8Features.shaderFloat16 ? true : false;
1755 shaderInt8 = float16Int8Features.shaderInt8 ? true : false;
1756
1757 if (context.isDeviceFunctionalitySupported("VK_KHR_16bit_storage"))
1758 storageBuffer16BitAccess = storage16bit.storageBuffer16BitAccess ? true : false;
1759
1760 if (context.isDeviceFunctionalitySupported("VK_KHR_8bit_storage"))
1761 storageBuffer8BitAccess = storage8bit.storageBuffer8BitAccess ? true : false;
1762 }
1763
1764 switch (format)
1765 {
1766 default:
1767 return true;
1768 case VK_FORMAT_R16_SFLOAT:
1769 case VK_FORMAT_R16G16_SFLOAT:
1770 case VK_FORMAT_R16G16B16_SFLOAT:
1771 case VK_FORMAT_R16G16B16A16_SFLOAT:
1772 return shaderSubgroupExtendedTypes && shaderFloat16 && storageBuffer16BitAccess;
1773 case VK_FORMAT_R64_SFLOAT:
1774 case VK_FORMAT_R64G64_SFLOAT:
1775 case VK_FORMAT_R64G64B64_SFLOAT:
1776 case VK_FORMAT_R64G64B64A64_SFLOAT:
1777 return shaderFloat64;
1778 case VK_FORMAT_R8_SINT:
1779 case VK_FORMAT_R8G8_SINT:
1780 case VK_FORMAT_R8G8B8_SINT:
1781 case VK_FORMAT_R8G8B8A8_SINT:
1782 case VK_FORMAT_R8_UINT:
1783 case VK_FORMAT_R8G8_UINT:
1784 case VK_FORMAT_R8G8B8_UINT:
1785 case VK_FORMAT_R8G8B8A8_UINT:
1786 return shaderSubgroupExtendedTypes && shaderInt8 && storageBuffer8BitAccess;
1787 case VK_FORMAT_R16_SINT:
1788 case VK_FORMAT_R16G16_SINT:
1789 case VK_FORMAT_R16G16B16_SINT:
1790 case VK_FORMAT_R16G16B16A16_SINT:
1791 case VK_FORMAT_R16_UINT:
1792 case VK_FORMAT_R16G16_UINT:
1793 case VK_FORMAT_R16G16B16_UINT:
1794 case VK_FORMAT_R16G16B16A16_UINT:
1795 return shaderSubgroupExtendedTypes && shaderInt16 && storageBuffer16BitAccess;
1796 case VK_FORMAT_R64_SINT:
1797 case VK_FORMAT_R64G64_SINT:
1798 case VK_FORMAT_R64G64B64_SINT:
1799 case VK_FORMAT_R64G64B64A64_SINT:
1800 case VK_FORMAT_R64_UINT:
1801 case VK_FORMAT_R64G64_UINT:
1802 case VK_FORMAT_R64G64B64_UINT:
1803 case VK_FORMAT_R64G64B64A64_UINT:
1804 return shaderSubgroupExtendedTypes && shaderInt64;
1805 }
1806 }
1807
isSubgroupBroadcastDynamicIdSupported(Context & context)1808 bool vkt::subgroups::isSubgroupBroadcastDynamicIdSupported(Context &context)
1809 {
1810 return context.contextSupports(vk::ApiVersion(0, 1, 2, 0)) &&
1811 vk::getPhysicalDeviceVulkan12Features(context.getInstanceInterface(), context.getPhysicalDevice())
1812 .subgroupBroadcastDynamicId;
1813 }
1814
isSubgroupRotateSpecVersionValid(Context & context)1815 bool vkt::subgroups::isSubgroupRotateSpecVersionValid(Context &context)
1816 {
1817 // Ensure "VK_KHR_shader_subgroup_rotate" extension's spec version is at least 2
1818 {
1819 const std::string extensionName = "VK_KHR_shader_subgroup_rotate";
1820 const std::vector<VkExtensionProperties> deviceExtensionProperties =
1821 enumerateDeviceExtensionProperties(context.getInstanceInterface(), context.getPhysicalDevice(), DE_NULL);
1822
1823 for (const auto &property : deviceExtensionProperties)
1824 {
1825 if (property.extensionName == extensionName && property.specVersion < 2)
1826 {
1827 return false;
1828 }
1829 }
1830 }
1831 return true;
1832 }
1833
getFormatNameForGLSL(VkFormat format)1834 std::string vkt::subgroups::getFormatNameForGLSL(VkFormat format)
1835 {
1836 switch (format)
1837 {
1838 case VK_FORMAT_R8_SINT:
1839 return "int8_t";
1840 case VK_FORMAT_R8G8_SINT:
1841 return "i8vec2";
1842 case VK_FORMAT_R8G8B8_SINT:
1843 return "i8vec3";
1844 case VK_FORMAT_R8G8B8A8_SINT:
1845 return "i8vec4";
1846 case VK_FORMAT_R8_UINT:
1847 return "uint8_t";
1848 case VK_FORMAT_R8G8_UINT:
1849 return "u8vec2";
1850 case VK_FORMAT_R8G8B8_UINT:
1851 return "u8vec3";
1852 case VK_FORMAT_R8G8B8A8_UINT:
1853 return "u8vec4";
1854 case VK_FORMAT_R16_SINT:
1855 return "int16_t";
1856 case VK_FORMAT_R16G16_SINT:
1857 return "i16vec2";
1858 case VK_FORMAT_R16G16B16_SINT:
1859 return "i16vec3";
1860 case VK_FORMAT_R16G16B16A16_SINT:
1861 return "i16vec4";
1862 case VK_FORMAT_R16_UINT:
1863 return "uint16_t";
1864 case VK_FORMAT_R16G16_UINT:
1865 return "u16vec2";
1866 case VK_FORMAT_R16G16B16_UINT:
1867 return "u16vec3";
1868 case VK_FORMAT_R16G16B16A16_UINT:
1869 return "u16vec4";
1870 case VK_FORMAT_R32_SINT:
1871 return "int";
1872 case VK_FORMAT_R32G32_SINT:
1873 return "ivec2";
1874 case VK_FORMAT_R32G32B32_SINT:
1875 return "ivec3";
1876 case VK_FORMAT_R32G32B32A32_SINT:
1877 return "ivec4";
1878 case VK_FORMAT_R32_UINT:
1879 return "uint";
1880 case VK_FORMAT_R32G32_UINT:
1881 return "uvec2";
1882 case VK_FORMAT_R32G32B32_UINT:
1883 return "uvec3";
1884 case VK_FORMAT_R32G32B32A32_UINT:
1885 return "uvec4";
1886 case VK_FORMAT_R64_SINT:
1887 return "int64_t";
1888 case VK_FORMAT_R64G64_SINT:
1889 return "i64vec2";
1890 case VK_FORMAT_R64G64B64_SINT:
1891 return "i64vec3";
1892 case VK_FORMAT_R64G64B64A64_SINT:
1893 return "i64vec4";
1894 case VK_FORMAT_R64_UINT:
1895 return "uint64_t";
1896 case VK_FORMAT_R64G64_UINT:
1897 return "u64vec2";
1898 case VK_FORMAT_R64G64B64_UINT:
1899 return "u64vec3";
1900 case VK_FORMAT_R64G64B64A64_UINT:
1901 return "u64vec4";
1902 case VK_FORMAT_R16_SFLOAT:
1903 return "float16_t";
1904 case VK_FORMAT_R16G16_SFLOAT:
1905 return "f16vec2";
1906 case VK_FORMAT_R16G16B16_SFLOAT:
1907 return "f16vec3";
1908 case VK_FORMAT_R16G16B16A16_SFLOAT:
1909 return "f16vec4";
1910 case VK_FORMAT_R32_SFLOAT:
1911 return "float";
1912 case VK_FORMAT_R32G32_SFLOAT:
1913 return "vec2";
1914 case VK_FORMAT_R32G32B32_SFLOAT:
1915 return "vec3";
1916 case VK_FORMAT_R32G32B32A32_SFLOAT:
1917 return "vec4";
1918 case VK_FORMAT_R64_SFLOAT:
1919 return "double";
1920 case VK_FORMAT_R64G64_SFLOAT:
1921 return "dvec2";
1922 case VK_FORMAT_R64G64B64_SFLOAT:
1923 return "dvec3";
1924 case VK_FORMAT_R64G64B64A64_SFLOAT:
1925 return "dvec4";
1926 case VK_FORMAT_R8_USCALED:
1927 return "bool";
1928 case VK_FORMAT_R8G8_USCALED:
1929 return "bvec2";
1930 case VK_FORMAT_R8G8B8_USCALED:
1931 return "bvec3";
1932 case VK_FORMAT_R8G8B8A8_USCALED:
1933 return "bvec4";
1934 default:
1935 TCU_THROW(InternalError, "Unhandled format");
1936 }
1937 }
1938
getAdditionalExtensionForFormat(vk::VkFormat format)1939 std::string vkt::subgroups::getAdditionalExtensionForFormat(vk::VkFormat format)
1940 {
1941 switch (format)
1942 {
1943 default:
1944 return "";
1945 case VK_FORMAT_R8_SINT:
1946 case VK_FORMAT_R8G8_SINT:
1947 case VK_FORMAT_R8G8B8_SINT:
1948 case VK_FORMAT_R8G8B8A8_SINT:
1949 case VK_FORMAT_R8_UINT:
1950 case VK_FORMAT_R8G8_UINT:
1951 case VK_FORMAT_R8G8B8_UINT:
1952 case VK_FORMAT_R8G8B8A8_UINT:
1953 return "#extension GL_EXT_shader_subgroup_extended_types_int8 : enable\n";
1954 case VK_FORMAT_R16_SINT:
1955 case VK_FORMAT_R16G16_SINT:
1956 case VK_FORMAT_R16G16B16_SINT:
1957 case VK_FORMAT_R16G16B16A16_SINT:
1958 case VK_FORMAT_R16_UINT:
1959 case VK_FORMAT_R16G16_UINT:
1960 case VK_FORMAT_R16G16B16_UINT:
1961 case VK_FORMAT_R16G16B16A16_UINT:
1962 return "#extension GL_EXT_shader_subgroup_extended_types_int16 : enable\n";
1963 case VK_FORMAT_R64_SINT:
1964 case VK_FORMAT_R64G64_SINT:
1965 case VK_FORMAT_R64G64B64_SINT:
1966 case VK_FORMAT_R64G64B64A64_SINT:
1967 case VK_FORMAT_R64_UINT:
1968 case VK_FORMAT_R64G64_UINT:
1969 case VK_FORMAT_R64G64B64_UINT:
1970 case VK_FORMAT_R64G64B64A64_UINT:
1971 return "#extension GL_EXT_shader_subgroup_extended_types_int64 : enable\n";
1972 case VK_FORMAT_R16_SFLOAT:
1973 case VK_FORMAT_R16G16_SFLOAT:
1974 case VK_FORMAT_R16G16B16_SFLOAT:
1975 case VK_FORMAT_R16G16B16A16_SFLOAT:
1976 return "#extension GL_EXT_shader_subgroup_extended_types_float16 : enable\n";
1977 }
1978 }
1979
getAllFormats()1980 const std::vector<vk::VkFormat> vkt::subgroups::getAllFormats()
1981 {
1982 std::vector<VkFormat> formats;
1983
1984 formats.push_back(VK_FORMAT_R8_SINT);
1985 formats.push_back(VK_FORMAT_R8G8_SINT);
1986 formats.push_back(VK_FORMAT_R8G8B8_SINT);
1987 formats.push_back(VK_FORMAT_R8G8B8A8_SINT);
1988 formats.push_back(VK_FORMAT_R8_UINT);
1989 formats.push_back(VK_FORMAT_R8G8_UINT);
1990 formats.push_back(VK_FORMAT_R8G8B8_UINT);
1991 formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
1992 formats.push_back(VK_FORMAT_R16_SINT);
1993 formats.push_back(VK_FORMAT_R16G16_SINT);
1994 formats.push_back(VK_FORMAT_R16G16B16_SINT);
1995 formats.push_back(VK_FORMAT_R16G16B16A16_SINT);
1996 formats.push_back(VK_FORMAT_R16_UINT);
1997 formats.push_back(VK_FORMAT_R16G16_UINT);
1998 formats.push_back(VK_FORMAT_R16G16B16_UINT);
1999 formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
2000 formats.push_back(VK_FORMAT_R32_SINT);
2001 formats.push_back(VK_FORMAT_R32G32_SINT);
2002 formats.push_back(VK_FORMAT_R32G32B32_SINT);
2003 formats.push_back(VK_FORMAT_R32G32B32A32_SINT);
2004 formats.push_back(VK_FORMAT_R32_UINT);
2005 formats.push_back(VK_FORMAT_R32G32_UINT);
2006 formats.push_back(VK_FORMAT_R32G32B32_UINT);
2007 formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
2008 formats.push_back(VK_FORMAT_R64_SINT);
2009 formats.push_back(VK_FORMAT_R64G64_SINT);
2010 formats.push_back(VK_FORMAT_R64G64B64_SINT);
2011 formats.push_back(VK_FORMAT_R64G64B64A64_SINT);
2012 formats.push_back(VK_FORMAT_R64_UINT);
2013 formats.push_back(VK_FORMAT_R64G64_UINT);
2014 formats.push_back(VK_FORMAT_R64G64B64_UINT);
2015 formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
2016 formats.push_back(VK_FORMAT_R16_SFLOAT);
2017 formats.push_back(VK_FORMAT_R16G16_SFLOAT);
2018 formats.push_back(VK_FORMAT_R16G16B16_SFLOAT);
2019 formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
2020 formats.push_back(VK_FORMAT_R32_SFLOAT);
2021 formats.push_back(VK_FORMAT_R32G32_SFLOAT);
2022 formats.push_back(VK_FORMAT_R32G32B32_SFLOAT);
2023 formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
2024 formats.push_back(VK_FORMAT_R64_SFLOAT);
2025 formats.push_back(VK_FORMAT_R64G64_SFLOAT);
2026 formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
2027 formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
2028 formats.push_back(VK_FORMAT_R8_USCALED);
2029 formats.push_back(VK_FORMAT_R8G8_USCALED);
2030 formats.push_back(VK_FORMAT_R8G8B8_USCALED);
2031 formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
2032
2033 return formats;
2034 }
2035
isFormatSigned(VkFormat format)2036 bool vkt::subgroups::isFormatSigned(VkFormat format)
2037 {
2038 switch (format)
2039 {
2040 default:
2041 return false;
2042 case VK_FORMAT_R8_SINT:
2043 case VK_FORMAT_R8G8_SINT:
2044 case VK_FORMAT_R8G8B8_SINT:
2045 case VK_FORMAT_R8G8B8A8_SINT:
2046 case VK_FORMAT_R16_SINT:
2047 case VK_FORMAT_R16G16_SINT:
2048 case VK_FORMAT_R16G16B16_SINT:
2049 case VK_FORMAT_R16G16B16A16_SINT:
2050 case VK_FORMAT_R32_SINT:
2051 case VK_FORMAT_R32G32_SINT:
2052 case VK_FORMAT_R32G32B32_SINT:
2053 case VK_FORMAT_R32G32B32A32_SINT:
2054 case VK_FORMAT_R64_SINT:
2055 case VK_FORMAT_R64G64_SINT:
2056 case VK_FORMAT_R64G64B64_SINT:
2057 case VK_FORMAT_R64G64B64A64_SINT:
2058 return true;
2059 }
2060 }
2061
isFormatUnsigned(VkFormat format)2062 bool vkt::subgroups::isFormatUnsigned(VkFormat format)
2063 {
2064 switch (format)
2065 {
2066 default:
2067 return false;
2068 case VK_FORMAT_R8_UINT:
2069 case VK_FORMAT_R8G8_UINT:
2070 case VK_FORMAT_R8G8B8_UINT:
2071 case VK_FORMAT_R8G8B8A8_UINT:
2072 case VK_FORMAT_R16_UINT:
2073 case VK_FORMAT_R16G16_UINT:
2074 case VK_FORMAT_R16G16B16_UINT:
2075 case VK_FORMAT_R16G16B16A16_UINT:
2076 case VK_FORMAT_R32_UINT:
2077 case VK_FORMAT_R32G32_UINT:
2078 case VK_FORMAT_R32G32B32_UINT:
2079 case VK_FORMAT_R32G32B32A32_UINT:
2080 case VK_FORMAT_R64_UINT:
2081 case VK_FORMAT_R64G64_UINT:
2082 case VK_FORMAT_R64G64B64_UINT:
2083 case VK_FORMAT_R64G64B64A64_UINT:
2084 return true;
2085 }
2086 }
2087
isFormatFloat(VkFormat format)2088 bool vkt::subgroups::isFormatFloat(VkFormat format)
2089 {
2090 switch (format)
2091 {
2092 default:
2093 return false;
2094 case VK_FORMAT_R16_SFLOAT:
2095 case VK_FORMAT_R16G16_SFLOAT:
2096 case VK_FORMAT_R16G16B16_SFLOAT:
2097 case VK_FORMAT_R16G16B16A16_SFLOAT:
2098 case VK_FORMAT_R32_SFLOAT:
2099 case VK_FORMAT_R32G32_SFLOAT:
2100 case VK_FORMAT_R32G32B32_SFLOAT:
2101 case VK_FORMAT_R32G32B32A32_SFLOAT:
2102 case VK_FORMAT_R64_SFLOAT:
2103 case VK_FORMAT_R64G64_SFLOAT:
2104 case VK_FORMAT_R64G64B64_SFLOAT:
2105 case VK_FORMAT_R64G64B64A64_SFLOAT:
2106 return true;
2107 }
2108 }
2109
isFormatBool(VkFormat format)2110 bool vkt::subgroups::isFormatBool(VkFormat format)
2111 {
2112 switch (format)
2113 {
2114 default:
2115 return false;
2116 case VK_FORMAT_R8_USCALED:
2117 case VK_FORMAT_R8G8_USCALED:
2118 case VK_FORMAT_R8G8B8_USCALED:
2119 case VK_FORMAT_R8G8B8A8_USCALED:
2120 return true;
2121 }
2122 }
2123
isFormat8bitTy(VkFormat format)2124 bool vkt::subgroups::isFormat8bitTy(VkFormat format)
2125 {
2126 switch (format)
2127 {
2128 default:
2129 return false;
2130 case VK_FORMAT_R8_SINT:
2131 case VK_FORMAT_R8G8_SINT:
2132 case VK_FORMAT_R8G8B8_SINT:
2133 case VK_FORMAT_R8G8B8A8_SINT:
2134 case VK_FORMAT_R8_UINT:
2135 case VK_FORMAT_R8G8_UINT:
2136 case VK_FORMAT_R8G8B8_UINT:
2137 case VK_FORMAT_R8G8B8A8_UINT:
2138 return true;
2139 }
2140 }
2141
isFormat16BitTy(VkFormat format)2142 bool vkt::subgroups::isFormat16BitTy(VkFormat format)
2143 {
2144 switch (format)
2145 {
2146 default:
2147 return false;
2148 case VK_FORMAT_R16_SFLOAT:
2149 case VK_FORMAT_R16G16_SFLOAT:
2150 case VK_FORMAT_R16G16B16_SFLOAT:
2151 case VK_FORMAT_R16G16B16A16_SFLOAT:
2152 case VK_FORMAT_R16_SINT:
2153 case VK_FORMAT_R16G16_SINT:
2154 case VK_FORMAT_R16G16B16_SINT:
2155 case VK_FORMAT_R16G16B16A16_SINT:
2156 case VK_FORMAT_R16_UINT:
2157 case VK_FORMAT_R16G16_UINT:
2158 case VK_FORMAT_R16G16B16_UINT:
2159 case VK_FORMAT_R16G16B16A16_UINT:
2160 return true;
2161 }
2162 }
2163
setVertexShaderFrameBuffer(SourceCollections & programCollection)2164 void vkt::subgroups::setVertexShaderFrameBuffer(SourceCollections &programCollection)
2165 {
2166 /*
2167 "layout(location = 0) in highp vec4 in_position;\n"
2168 "void main (void)\n"
2169 "{\n"
2170 " gl_Position = in_position;\n"
2171 " gl_PointSize = 1.0f;\n"
2172 "}\n";
2173 */
2174 programCollection.spirvAsmSources.add("vert") << "; SPIR-V\n"
2175 "; Version: 1.3\n"
2176 "; Generator: Khronos Glslang Reference Front End; 7\n"
2177 "; Bound: 25\n"
2178 "; Schema: 0\n"
2179 "OpCapability Shader\n"
2180 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2181 "OpMemoryModel Logical GLSL450\n"
2182 "OpEntryPoint Vertex %4 \"main\" %13 %17\n"
2183 "OpMemberDecorate %11 0 BuiltIn Position\n"
2184 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
2185 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2186 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2187 "OpDecorate %11 Block\n"
2188 "OpDecorate %17 Location 0\n"
2189 "%2 = OpTypeVoid\n"
2190 "%3 = OpTypeFunction %2\n"
2191 "%6 = OpTypeFloat 32\n"
2192 "%7 = OpTypeVector %6 4\n"
2193 "%8 = OpTypeInt 32 0\n"
2194 "%9 = OpConstant %8 1\n"
2195 "%10 = OpTypeArray %6 %9\n"
2196 "%11 = OpTypeStruct %7 %6 %10 %10\n"
2197 "%12 = OpTypePointer Output %11\n"
2198 "%13 = OpVariable %12 Output\n"
2199 "%14 = OpTypeInt 32 1\n"
2200 "%15 = OpConstant %14 0\n"
2201 "%16 = OpTypePointer Input %7\n"
2202 "%17 = OpVariable %16 Input\n"
2203 "%19 = OpTypePointer Output %7\n"
2204 "%21 = OpConstant %14 1\n"
2205 "%22 = OpConstant %6 1\n"
2206 "%23 = OpTypePointer Output %6\n"
2207 "%4 = OpFunction %2 None %3\n"
2208 "%5 = OpLabel\n"
2209 "%18 = OpLoad %7 %17\n"
2210 "%20 = OpAccessChain %19 %13 %15\n"
2211 "OpStore %20 %18\n"
2212 "%24 = OpAccessChain %23 %13 %21\n"
2213 "OpStore %24 %22\n"
2214 "OpReturn\n"
2215 "OpFunctionEnd\n";
2216 }
2217
setFragmentShaderFrameBuffer(vk::SourceCollections & programCollection)2218 void vkt::subgroups::setFragmentShaderFrameBuffer(vk::SourceCollections &programCollection)
2219 {
2220 /*
2221 "layout(location = 0) in float in_color;\n"
2222 "layout(location = 0) out uint out_color;\n"
2223 "void main()\n"
2224 {\n"
2225 " out_color = uint(in_color);\n"
2226 "}\n";
2227 */
2228 programCollection.spirvAsmSources.add("fragment") << "; SPIR-V\n"
2229 "; Version: 1.3\n"
2230 "; Generator: Khronos Glslang Reference Front End; 2\n"
2231 "; Bound: 14\n"
2232 "; Schema: 0\n"
2233 "OpCapability Shader\n"
2234 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2235 "OpMemoryModel Logical GLSL450\n"
2236 "OpEntryPoint Fragment %4 \"main\" %8 %11\n"
2237 "OpExecutionMode %4 OriginUpperLeft\n"
2238 "OpDecorate %8 Location 0\n"
2239 "OpDecorate %11 Location 0\n"
2240 "%2 = OpTypeVoid\n"
2241 "%3 = OpTypeFunction %2\n"
2242 "%6 = OpTypeInt 32 0\n"
2243 "%7 = OpTypePointer Output %6\n"
2244 "%8 = OpVariable %7 Output\n"
2245 "%9 = OpTypeFloat 32\n"
2246 "%10 = OpTypePointer Input %9\n"
2247 "%11 = OpVariable %10 Input\n"
2248 "%4 = OpFunction %2 None %3\n"
2249 "%5 = OpLabel\n"
2250 "%12 = OpLoad %9 %11\n"
2251 "%13 = OpConvertFToU %6 %12\n"
2252 "OpStore %8 %13\n"
2253 "OpReturn\n"
2254 "OpFunctionEnd\n";
2255 }
2256
setTesCtrlShaderFrameBuffer(vk::SourceCollections & programCollection)2257 void vkt::subgroups::setTesCtrlShaderFrameBuffer(vk::SourceCollections &programCollection)
2258 {
2259 /*
2260 "#extension GL_KHR_shader_subgroup_basic: enable\n"
2261 "#extension GL_EXT_tessellation_shader : require\n"
2262 "layout(vertices = 2) out;\n"
2263 "void main (void)\n"
2264 "{\n"
2265 " if (gl_InvocationID == 0)\n"
2266 " {\n"
2267 " gl_TessLevelOuter[0] = 1.0f;\n"
2268 " gl_TessLevelOuter[1] = 1.0f;\n"
2269 " }\n"
2270 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
2271 "}\n";
2272 */
2273 programCollection.spirvAsmSources.add("tesc") << "; SPIR-V\n"
2274 "; Version: 1.3\n"
2275 "; Generator: Khronos Glslang Reference Front End; 2\n"
2276 "; Bound: 46\n"
2277 "; Schema: 0\n"
2278 "OpCapability Tessellation\n"
2279 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2280 "OpMemoryModel Logical GLSL450\n"
2281 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
2282 "OpExecutionMode %4 OutputVertices 2\n"
2283 "OpDecorate %8 BuiltIn InvocationId\n"
2284 "OpDecorate %20 Patch\n"
2285 "OpDecorate %20 BuiltIn TessLevelOuter\n"
2286 "OpMemberDecorate %29 0 BuiltIn Position\n"
2287 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
2288 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
2289 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
2290 "OpDecorate %29 Block\n"
2291 "OpMemberDecorate %35 0 BuiltIn Position\n"
2292 "OpMemberDecorate %35 1 BuiltIn PointSize\n"
2293 "OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
2294 "OpMemberDecorate %35 3 BuiltIn CullDistance\n"
2295 "OpDecorate %35 Block\n"
2296 "%2 = OpTypeVoid\n"
2297 "%3 = OpTypeFunction %2\n"
2298 "%6 = OpTypeInt 32 1\n"
2299 "%7 = OpTypePointer Input %6\n"
2300 "%8 = OpVariable %7 Input\n"
2301 "%10 = OpConstant %6 0\n"
2302 "%11 = OpTypeBool\n"
2303 "%15 = OpTypeFloat 32\n"
2304 "%16 = OpTypeInt 32 0\n"
2305 "%17 = OpConstant %16 4\n"
2306 "%18 = OpTypeArray %15 %17\n"
2307 "%19 = OpTypePointer Output %18\n"
2308 "%20 = OpVariable %19 Output\n"
2309 "%21 = OpConstant %15 1\n"
2310 "%22 = OpTypePointer Output %15\n"
2311 "%24 = OpConstant %6 1\n"
2312 "%26 = OpTypeVector %15 4\n"
2313 "%27 = OpConstant %16 1\n"
2314 "%28 = OpTypeArray %15 %27\n"
2315 "%29 = OpTypeStruct %26 %15 %28 %28\n"
2316 "%30 = OpConstant %16 2\n"
2317 "%31 = OpTypeArray %29 %30\n"
2318 "%32 = OpTypePointer Output %31\n"
2319 "%33 = OpVariable %32 Output\n"
2320 "%35 = OpTypeStruct %26 %15 %28 %28\n"
2321 "%36 = OpConstant %16 32\n"
2322 "%37 = OpTypeArray %35 %36\n"
2323 "%38 = OpTypePointer Input %37\n"
2324 "%39 = OpVariable %38 Input\n"
2325 "%41 = OpTypePointer Input %26\n"
2326 "%44 = OpTypePointer Output %26\n"
2327 "%4 = OpFunction %2 None %3\n"
2328 "%5 = OpLabel\n"
2329 "%9 = OpLoad %6 %8\n"
2330 "%12 = OpIEqual %11 %9 %10\n"
2331 "OpSelectionMerge %14 None\n"
2332 "OpBranchConditional %12 %13 %14\n"
2333 "%13 = OpLabel\n"
2334 "%23 = OpAccessChain %22 %20 %10\n"
2335 "OpStore %23 %21\n"
2336 "%25 = OpAccessChain %22 %20 %24\n"
2337 "OpStore %25 %21\n"
2338 "OpBranch %14\n"
2339 "%14 = OpLabel\n"
2340 "%34 = OpLoad %6 %8\n"
2341 "%40 = OpLoad %6 %8\n"
2342 "%42 = OpAccessChain %41 %39 %40 %10\n"
2343 "%43 = OpLoad %26 %42\n"
2344 "%45 = OpAccessChain %44 %33 %34 %10\n"
2345 "OpStore %45 %43\n"
2346 "OpReturn\n"
2347 "OpFunctionEnd\n";
2348 }
2349
setTesEvalShaderFrameBuffer(vk::SourceCollections & programCollection)2350 void vkt::subgroups::setTesEvalShaderFrameBuffer(vk::SourceCollections &programCollection)
2351 {
2352 /*
2353 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
2354 "#extension GL_EXT_tessellation_shader : require\n"
2355 "layout(isolines, equal_spacing, ccw ) in;\n"
2356 "layout(location = 0) in float in_color[];\n"
2357 "layout(location = 0) out float out_color;\n"
2358 "\n"
2359 "void main (void)\n"
2360 "{\n"
2361 " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
2362 " out_color = in_color[0];\n"
2363 "}\n";
2364 */
2365 programCollection.spirvAsmSources.add("tese")
2366 << "; SPIR-V\n"
2367 "; Version: 1.3\n"
2368 "; Generator: Khronos Glslang Reference Front End; 2\n"
2369 "; Bound: 45\n"
2370 "; Schema: 0\n"
2371 "OpCapability Tessellation\n"
2372 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2373 "OpMemoryModel Logical GLSL450\n"
2374 "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
2375 "OpExecutionMode %4 Isolines\n"
2376 "OpExecutionMode %4 SpacingEqual\n"
2377 "OpExecutionMode %4 VertexOrderCcw\n"
2378 "OpMemberDecorate %11 0 BuiltIn Position\n"
2379 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
2380 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2381 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2382 "OpDecorate %11 Block\n"
2383 "OpMemberDecorate %16 0 BuiltIn Position\n"
2384 "OpMemberDecorate %16 1 BuiltIn PointSize\n"
2385 "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
2386 "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
2387 "OpDecorate %16 Block\n"
2388 "OpDecorate %29 BuiltIn TessCoord\n"
2389 "OpDecorate %39 Location 0\n"
2390 "OpDecorate %42 Location 0\n"
2391 "%2 = OpTypeVoid\n"
2392 "%3 = OpTypeFunction %2\n"
2393 "%6 = OpTypeFloat 32\n"
2394 "%7 = OpTypeVector %6 4\n"
2395 "%8 = OpTypeInt 32 0\n"
2396 "%9 = OpConstant %8 1\n"
2397 "%10 = OpTypeArray %6 %9\n"
2398 "%11 = OpTypeStruct %7 %6 %10 %10\n"
2399 "%12 = OpTypePointer Output %11\n"
2400 "%13 = OpVariable %12 Output\n"
2401 "%14 = OpTypeInt 32 1\n"
2402 "%15 = OpConstant %14 0\n"
2403 "%16 = OpTypeStruct %7 %6 %10 %10\n"
2404 "%17 = OpConstant %8 32\n"
2405 "%18 = OpTypeArray %16 %17\n"
2406 "%19 = OpTypePointer Input %18\n"
2407 "%20 = OpVariable %19 Input\n"
2408 "%21 = OpTypePointer Input %7\n"
2409 "%24 = OpConstant %14 1\n"
2410 "%27 = OpTypeVector %6 3\n"
2411 "%28 = OpTypePointer Input %27\n"
2412 "%29 = OpVariable %28 Input\n"
2413 "%30 = OpConstant %8 0\n"
2414 "%31 = OpTypePointer Input %6\n"
2415 "%36 = OpTypePointer Output %7\n"
2416 "%38 = OpTypePointer Output %6\n"
2417 "%39 = OpVariable %38 Output\n"
2418 "%40 = OpTypeArray %6 %17\n"
2419 "%41 = OpTypePointer Input %40\n"
2420 "%42 = OpVariable %41 Input\n"
2421 "%4 = OpFunction %2 None %3\n"
2422 "%5 = OpLabel\n"
2423 "%22 = OpAccessChain %21 %20 %15 %15\n"
2424 "%23 = OpLoad %7 %22\n"
2425 "%25 = OpAccessChain %21 %20 %24 %15\n"
2426 "%26 = OpLoad %7 %25\n"
2427 "%32 = OpAccessChain %31 %29 %30\n"
2428 "%33 = OpLoad %6 %32\n"
2429 "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
2430 "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
2431 "%37 = OpAccessChain %36 %13 %15\n"
2432 "OpStore %37 %35\n"
2433 "%43 = OpAccessChain %31 %42 %15\n"
2434 "%44 = OpLoad %6 %43\n"
2435 "OpStore %39 %44\n"
2436 "OpReturn\n"
2437 "OpFunctionEnd\n";
2438 }
2439
addGeometryShadersFromTemplate(const std::string & glslTemplate,const vk::ShaderBuildOptions & options,vk::GlslSourceCollection & collection)2440 void vkt::subgroups::addGeometryShadersFromTemplate(const std::string &glslTemplate,
2441 const vk::ShaderBuildOptions &options,
2442 vk::GlslSourceCollection &collection)
2443 {
2444 tcu::StringTemplate geometryTemplate(glslTemplate);
2445
2446 map<string, string> linesParams;
2447 linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
2448
2449 map<string, string> pointsParams;
2450 pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
2451
2452 collection.add("geometry_lines") << glu::GeometrySource(geometryTemplate.specialize(linesParams)) << options;
2453 collection.add("geometry_points") << glu::GeometrySource(geometryTemplate.specialize(pointsParams)) << options;
2454 }
2455
addGeometryShadersFromTemplate(const std::string & spirvTemplate,const vk::SpirVAsmBuildOptions & options,vk::SpirVAsmCollection & collection)2456 void vkt::subgroups::addGeometryShadersFromTemplate(const std::string &spirvTemplate,
2457 const vk::SpirVAsmBuildOptions &options,
2458 vk::SpirVAsmCollection &collection)
2459 {
2460 tcu::StringTemplate geometryTemplate(spirvTemplate);
2461
2462 map<string, string> linesParams;
2463 linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
2464
2465 map<string, string> pointsParams;
2466 pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
2467
2468 collection.add("geometry_lines") << geometryTemplate.specialize(linesParams) << options;
2469 collection.add("geometry_points") << geometryTemplate.specialize(pointsParams) << options;
2470 }
2471
initializeMemory(Context & context,const Allocation & alloc,const subgroups::SSBOData & data)2472 void initializeMemory(Context &context, const Allocation &alloc, const subgroups::SSBOData &data)
2473 {
2474 const vk::VkFormat format = data.format;
2475 const vk::VkDeviceSize size =
2476 data.numElements * (data.isImage() ? getFormatSizeInBytes(format) : getElementSizeInBytes(format, data.layout));
2477 if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
2478 {
2479 de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
2480
2481 switch (format)
2482 {
2483 default:
2484 DE_FATAL("Illegal buffer format");
2485 break;
2486 case VK_FORMAT_R8_SINT:
2487 case VK_FORMAT_R8G8_SINT:
2488 case VK_FORMAT_R8G8B8_SINT:
2489 case VK_FORMAT_R8G8B8A8_SINT:
2490 case VK_FORMAT_R8_UINT:
2491 case VK_FORMAT_R8G8_UINT:
2492 case VK_FORMAT_R8G8B8_UINT:
2493 case VK_FORMAT_R8G8B8A8_UINT:
2494 {
2495 uint8_t *ptr = reinterpret_cast<uint8_t *>(alloc.getHostPtr());
2496
2497 for (vk::VkDeviceSize k = 0; k < (size / sizeof(uint8_t)); k++)
2498 {
2499 ptr[k] = rnd.getUint8();
2500 }
2501 }
2502 break;
2503 case VK_FORMAT_R16_SINT:
2504 case VK_FORMAT_R16G16_SINT:
2505 case VK_FORMAT_R16G16B16_SINT:
2506 case VK_FORMAT_R16G16B16A16_SINT:
2507 case VK_FORMAT_R16_UINT:
2508 case VK_FORMAT_R16G16_UINT:
2509 case VK_FORMAT_R16G16B16_UINT:
2510 case VK_FORMAT_R16G16B16A16_UINT:
2511 {
2512 uint16_t *ptr = reinterpret_cast<uint16_t *>(alloc.getHostPtr());
2513
2514 for (vk::VkDeviceSize k = 0; k < (size / sizeof(uint16_t)); k++)
2515 {
2516 ptr[k] = rnd.getUint16();
2517 }
2518 }
2519 break;
2520 case VK_FORMAT_R8_USCALED:
2521 case VK_FORMAT_R8G8_USCALED:
2522 case VK_FORMAT_R8G8B8_USCALED:
2523 case VK_FORMAT_R8G8B8A8_USCALED:
2524 {
2525 uint32_t *ptr = reinterpret_cast<uint32_t *>(alloc.getHostPtr());
2526
2527 for (vk::VkDeviceSize k = 0; k < (size / sizeof(uint32_t)); k++)
2528 {
2529 uint32_t r = rnd.getUint32();
2530 ptr[k] = (r & 1) ? r : 0;
2531 }
2532 }
2533 break;
2534 case VK_FORMAT_R32_SINT:
2535 case VK_FORMAT_R32G32_SINT:
2536 case VK_FORMAT_R32G32B32_SINT:
2537 case VK_FORMAT_R32G32B32A32_SINT:
2538 case VK_FORMAT_R32_UINT:
2539 case VK_FORMAT_R32G32_UINT:
2540 case VK_FORMAT_R32G32B32_UINT:
2541 case VK_FORMAT_R32G32B32A32_UINT:
2542 {
2543 uint32_t *ptr = reinterpret_cast<uint32_t *>(alloc.getHostPtr());
2544
2545 for (vk::VkDeviceSize k = 0; k < (size / sizeof(uint32_t)); k++)
2546 {
2547 ptr[k] = rnd.getUint32();
2548 }
2549 }
2550 break;
2551 case VK_FORMAT_R64_SINT:
2552 case VK_FORMAT_R64G64_SINT:
2553 case VK_FORMAT_R64G64B64_SINT:
2554 case VK_FORMAT_R64G64B64A64_SINT:
2555 case VK_FORMAT_R64_UINT:
2556 case VK_FORMAT_R64G64_UINT:
2557 case VK_FORMAT_R64G64B64_UINT:
2558 case VK_FORMAT_R64G64B64A64_UINT:
2559 {
2560 uint64_t *ptr = reinterpret_cast<uint64_t *>(alloc.getHostPtr());
2561
2562 for (vk::VkDeviceSize k = 0; k < (size / sizeof(uint64_t)); k++)
2563 {
2564 ptr[k] = rnd.getUint64();
2565 }
2566 }
2567 break;
2568 case VK_FORMAT_R16_SFLOAT:
2569 case VK_FORMAT_R16G16_SFLOAT:
2570 case VK_FORMAT_R16G16B16_SFLOAT:
2571 case VK_FORMAT_R16G16B16A16_SFLOAT:
2572 {
2573 float16_t *const ptr = reinterpret_cast<float16_t *>(alloc.getHostPtr());
2574
2575 for (vk::VkDeviceSize k = 0; k < (size / sizeof(float16_t)); k++)
2576 {
2577 ptr[k] = tcu::Float16(rnd.getFloat()).bits();
2578 }
2579 }
2580 break;
2581 case VK_FORMAT_R32_SFLOAT:
2582 case VK_FORMAT_R32G32_SFLOAT:
2583 case VK_FORMAT_R32G32B32_SFLOAT:
2584 case VK_FORMAT_R32G32B32A32_SFLOAT:
2585 {
2586 float *ptr = reinterpret_cast<float *>(alloc.getHostPtr());
2587
2588 for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
2589 {
2590 ptr[k] = rnd.getFloat();
2591 }
2592 }
2593 break;
2594 case VK_FORMAT_R64_SFLOAT:
2595 case VK_FORMAT_R64G64_SFLOAT:
2596 case VK_FORMAT_R64G64B64_SFLOAT:
2597 case VK_FORMAT_R64G64B64A64_SFLOAT:
2598 {
2599 double *ptr = reinterpret_cast<double *>(alloc.getHostPtr());
2600
2601 for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
2602 {
2603 ptr[k] = rnd.getDouble();
2604 }
2605 }
2606 break;
2607 }
2608 }
2609 else if (subgroups::SSBOData::InitializeZero == data.initializeType)
2610 {
2611 uint32_t *ptr = reinterpret_cast<uint32_t *>(alloc.getHostPtr());
2612
2613 for (vk::VkDeviceSize k = 0; k < size / 4; k++)
2614 {
2615 ptr[k] = 0;
2616 }
2617 }
2618
2619 if (subgroups::SSBOData::InitializeNone != data.initializeType)
2620 {
2621 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2622 }
2623 }
2624
getResultBinding(const VkShaderStageFlagBits shaderStage)2625 uint32_t getResultBinding(const VkShaderStageFlagBits shaderStage)
2626 {
2627 switch (shaderStage)
2628 {
2629 case VK_SHADER_STAGE_VERTEX_BIT:
2630 return 0u;
2631 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
2632 return 1u;
2633 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
2634 return 2u;
2635 case VK_SHADER_STAGE_GEOMETRY_BIT:
2636 return 3u;
2637 default:
2638 DE_ASSERT(0);
2639 return -1;
2640 }
2641 DE_ASSERT(0);
2642 return -1;
2643 }
2644
makeTessellationEvaluationFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const VkShaderStageFlags shaderStage)2645 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest(
2646 Context &context, VkFormat format, const SSBOData *extraData, uint32_t extraDataCount, const void *internalData,
2647 subgroups::CheckResult checkResult, const VkShaderStageFlags shaderStage)
2648 {
2649 return makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
2650 context, format, extraData, extraDataCount, internalData, checkResult, shaderStage, 0u, 0u);
2651 }
2652
makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const VkShaderStageFlags shaderStage,const uint32_t tessShaderStageCreateFlags,const uint32_t requiredSubgroupSize)2653 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
2654 Context &context, VkFormat format, const SSBOData *extraData, uint32_t extraDataCount, const void *internalData,
2655 subgroups::CheckResult checkResult, const VkShaderStageFlags shaderStage, const uint32_t tessShaderStageCreateFlags,
2656 const uint32_t requiredSubgroupSize)
2657 {
2658 const DeviceInterface &vk = context.getDeviceInterface();
2659 const VkDevice device = context.getDevice();
2660 const uint32_t maxWidth = getMaxWidth();
2661 vector<de::SharedPtr<BufferOrImage>> inputBuffers(extraDataCount);
2662 DescriptorSetLayoutBuilder layoutBuilder;
2663 DescriptorPoolBuilder poolBuilder;
2664 DescriptorSetUpdateBuilder updateBuilder;
2665 Move<VkDescriptorPool> descriptorPool;
2666 Move<VkDescriptorSet> descriptorSet;
2667 const Unique<VkShaderModule> vertexShaderModule(
2668 createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2669 const Unique<VkShaderModule> teCtrlShaderModule(
2670 createShaderModule(vk, device, context.getBinaryCollection().get("tesc"), 0u));
2671 const Unique<VkShaderModule> teEvalShaderModule(
2672 createShaderModule(vk, device, context.getBinaryCollection().get("tese"), 0u));
2673 const Unique<VkShaderModule> fragmentShaderModule(
2674 createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2675 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
2676 const VkVertexInputBindingDescription vertexInputBinding = {
2677 0u, // uint32_t binding;
2678 static_cast<uint32_t>(sizeof(tcu::Vec4)), // uint32_t stride;
2679 VK_VERTEX_INPUT_RATE_VERTEX // VkVertexInputRate inputRate;
2680 };
2681 const VkVertexInputAttributeDescription vertexInputAttribute = {
2682 0u, // uint32_t location;
2683 0u, // uint32_t binding;
2684 VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat format;
2685 0u // uint32_t offset;
2686 };
2687
2688 for (uint32_t i = 0u; i < extraDataCount; i++)
2689 {
2690 if (extraData[i].isImage())
2691 {
2692 inputBuffers[i] = de::SharedPtr<BufferOrImage>(
2693 new Image(context, static_cast<uint32_t>(extraData[i].numElements), 1u, extraData[i].format));
2694 }
2695 else
2696 {
2697 DE_ASSERT(extraData[i].isUBO());
2698 vk::VkDeviceSize size =
2699 getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2700 inputBuffers[i] =
2701 de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2702 }
2703 const Allocation &alloc = inputBuffers[i]->getAllocation();
2704 initializeMemory(context, alloc, extraData[i]);
2705 }
2706
2707 for (uint32_t ndx = 0u; ndx < extraDataCount; ndx++)
2708 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL);
2709
2710 const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
2711
2712 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2713
2714 const uint32_t requiredSubgroupSizes[5] = {
2715 0u, ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? requiredSubgroupSize : 0u),
2716 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? requiredSubgroupSize : 0u), 0u, 0u};
2717
2718 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(
2719 context, *pipelineLayout,
2720 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
2721 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
2722 *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule, *renderPass,
2723 VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format, 0u,
2724 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? tessShaderStageCreateFlags : 0u),
2725 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? tessShaderStageCreateFlags : 0u), 0u, 0u,
2726 requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2727
2728 for (uint32_t ndx = 0u; ndx < extraDataCount; ndx++)
2729 poolBuilder.addType(inputBuffers[ndx]->getType());
2730
2731 if (extraDataCount > 0)
2732 {
2733 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2734 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2735 }
2736
2737 for (uint32_t buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2738 {
2739 if (inputBuffers[buffersNdx]->isImage())
2740 {
2741 VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2742 inputBuffers[buffersNdx]->getAsImage()->getImageView(),
2743 VK_IMAGE_LAYOUT_GENERAL);
2744
2745 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2746 inputBuffers[buffersNdx]->getType(), &info);
2747 }
2748 else
2749 {
2750 VkDescriptorBufferInfo info =
2751 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(), 0ull,
2752 inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2753
2754 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2755 inputBuffers[buffersNdx]->getType(), &info);
2756 }
2757 }
2758
2759 updateBuilder.update(vk, device);
2760
2761 const VkQueue queue = context.getUniversalQueue();
2762 const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2763 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2764 const uint32_t subgroupSize = getSubgroupSize(context);
2765 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
2766 const vk::VkDeviceSize vertexBufferSize = 2ull * maxWidth * sizeof(tcu::Vec4);
2767 Buffer vertexBuffer(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2768 unsigned totalIterations = 0u;
2769 unsigned failedIterations = 0u;
2770 Image discardableImage(context, maxWidth, 1u, format,
2771 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2772
2773 {
2774 const Allocation &alloc = vertexBuffer.getAllocation();
2775 std::vector<tcu::Vec4> data(2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
2776 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
2777 float leftHandPosition = -1.0f;
2778
2779 for (uint32_t ndx = 0u; ndx < data.size(); ndx += 2u)
2780 {
2781 data[ndx][0] = leftHandPosition;
2782 leftHandPosition += pixelSize;
2783 data[ndx + 1][0] = leftHandPosition;
2784 }
2785
2786 deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
2787 flushAlloc(vk, device, alloc);
2788 }
2789
2790 const Unique<VkFramebuffer> framebuffer(
2791 makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2792 const VkViewport viewport = makeViewport(maxWidth, 1u);
2793 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
2794 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2795 Buffer imageBufferResult(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2796 const VkDeviceSize vertexBufferOffset = 0u;
2797
2798 for (uint32_t width = 1u; width < maxWidth; width = getNextWidth(width))
2799 {
2800 totalIterations++;
2801
2802 beginCommandBuffer(vk, *cmdBuffer);
2803 {
2804
2805 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2806 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2807
2808 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2809
2810 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2811
2812 if (extraDataCount > 0)
2813 {
2814 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2815 &descriptorSet.get(), 0u, DE_NULL);
2816 }
2817
2818 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2819 vk.cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
2820
2821 endRenderPass(vk, *cmdBuffer);
2822
2823 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(),
2824 tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
2825 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2826 endCommandBuffer(vk, *cmdBuffer);
2827
2828 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2829 }
2830 context.resetCommandPoolForVKSC(device, *cmdPool);
2831
2832 {
2833 const Allocation &allocResult = imageBufferResult.getAllocation();
2834 invalidateAlloc(vk, device, allocResult);
2835
2836 std::vector<const void *> datas;
2837 datas.push_back(allocResult.getHostPtr());
2838 if (!checkResult(internalData, datas, width / 2u, subgroupSize))
2839 failedIterations++;
2840 }
2841 }
2842
2843 if (0 < failedIterations)
2844 {
2845 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2846
2847 context.getTestContext().getLog()
2848 << TestLog::Message << valuesPassed << " / " << totalIterations << " values passed" << TestLog::EndMessage;
2849 return tcu::TestStatus::fail("Failed!");
2850 }
2851
2852 return tcu::TestStatus::pass("OK");
2853 }
2854
check(std::vector<const void * > datas,uint32_t width,uint32_t ref)2855 bool vkt::subgroups::check(std::vector<const void *> datas, uint32_t width, uint32_t ref)
2856 {
2857 const uint32_t *data = reinterpret_cast<const uint32_t *>(datas[0]);
2858
2859 for (uint32_t n = 0; n < width; ++n)
2860 {
2861 if (data[n] != ref)
2862 {
2863 return false;
2864 }
2865 }
2866
2867 return true;
2868 }
2869
checkComputeOrMesh(std::vector<const void * > datas,const uint32_t numWorkgroups[3],const uint32_t localSize[3],uint32_t ref)2870 bool vkt::subgroups::checkComputeOrMesh(std::vector<const void *> datas, const uint32_t numWorkgroups[3],
2871 const uint32_t localSize[3], uint32_t ref)
2872 {
2873 const uint32_t globalSizeX = numWorkgroups[0] * localSize[0];
2874 const uint32_t globalSizeY = numWorkgroups[1] * localSize[1];
2875 const uint32_t globalSizeZ = numWorkgroups[2] * localSize[2];
2876
2877 return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
2878 }
2879
makeGeometryFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,subgroups::CheckResult checkResult)2880 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest(Context &context, VkFormat format,
2881 const SSBOData *extraData, uint32_t extraDataCount,
2882 const void *internalData,
2883 subgroups::CheckResult checkResult)
2884 {
2885 return makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData,
2886 checkResult, 0u, 0u);
2887 }
2888
makeGeometryFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const uint32_t geometryShaderStageCreateFlags,const uint32_t requiredSubgroupSize)2889 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize(
2890 Context &context, VkFormat format, const SSBOData *extraData, uint32_t extraDataCount, const void *internalData,
2891 subgroups::CheckResult checkResult, const uint32_t geometryShaderStageCreateFlags,
2892 const uint32_t requiredSubgroupSize)
2893 {
2894 const DeviceInterface &vk = context.getDeviceInterface();
2895 const VkDevice device = context.getDevice();
2896 const uint32_t maxWidth = getMaxWidth();
2897 vector<de::SharedPtr<BufferOrImage>> inputBuffers(extraDataCount);
2898 DescriptorSetLayoutBuilder layoutBuilder;
2899 DescriptorPoolBuilder poolBuilder;
2900 DescriptorSetUpdateBuilder updateBuilder;
2901 Move<VkDescriptorPool> descriptorPool;
2902 Move<VkDescriptorSet> descriptorSet;
2903 const Unique<VkShaderModule> vertexShaderModule(
2904 createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2905 const Unique<VkShaderModule> geometryShaderModule(
2906 createShaderModule(vk, device, context.getBinaryCollection().get("geometry"), 0u));
2907 const Unique<VkShaderModule> fragmentShaderModule(
2908 createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2909 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
2910 const VkVertexInputBindingDescription vertexInputBinding = {
2911 0u, // uint32_t binding;
2912 static_cast<uint32_t>(sizeof(tcu::Vec4)), // uint32_t stride;
2913 VK_VERTEX_INPUT_RATE_VERTEX // VkVertexInputRate inputRate;
2914 };
2915 const VkVertexInputAttributeDescription vertexInputAttribute = {
2916 0u, // uint32_t location;
2917 0u, // uint32_t binding;
2918 VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat format;
2919 0u // uint32_t offset;
2920 };
2921
2922 for (uint32_t i = 0u; i < extraDataCount; i++)
2923 {
2924 if (extraData[i].isImage())
2925 {
2926 inputBuffers[i] = de::SharedPtr<BufferOrImage>(
2927 new Image(context, static_cast<uint32_t>(extraData[i].numElements), 1u, extraData[i].format));
2928 }
2929 else
2930 {
2931 DE_ASSERT(extraData[i].isUBO());
2932 vk::VkDeviceSize size =
2933 getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2934 inputBuffers[i] =
2935 de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2936 }
2937 const Allocation &alloc = inputBuffers[i]->getAllocation();
2938 initializeMemory(context, alloc, extraData[i]);
2939 }
2940
2941 for (uint32_t ndx = 0u; ndx < extraDataCount; ndx++)
2942 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
2943
2944 const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
2945
2946 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2947
2948 const uint32_t requiredSubgroupSizes[5] = {0u, 0u, 0u, requiredSubgroupSize, 0u};
2949
2950 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(
2951 context, *pipelineLayout,
2952 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT, *vertexShaderModule,
2953 *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL, *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
2954 &vertexInputBinding, &vertexInputAttribute, true, format, 0u, 0u, 0u, geometryShaderStageCreateFlags, 0u,
2955 requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2956
2957 for (uint32_t ndx = 0u; ndx < extraDataCount; ndx++)
2958 poolBuilder.addType(inputBuffers[ndx]->getType());
2959
2960 if (extraDataCount > 0)
2961 {
2962 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2963 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2964 }
2965
2966 for (uint32_t buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2967 {
2968 if (inputBuffers[buffersNdx]->isImage())
2969 {
2970 VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2971 inputBuffers[buffersNdx]->getAsImage()->getImageView(),
2972 VK_IMAGE_LAYOUT_GENERAL);
2973
2974 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2975 inputBuffers[buffersNdx]->getType(), &info);
2976 }
2977 else
2978 {
2979 VkDescriptorBufferInfo info =
2980 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(), 0ull,
2981 inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2982
2983 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2984 inputBuffers[buffersNdx]->getType(), &info);
2985 }
2986 }
2987
2988 updateBuilder.update(vk, device);
2989
2990 const VkQueue queue = context.getUniversalQueue();
2991 const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2992 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2993 const uint32_t subgroupSize = getSubgroupSize(context);
2994 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
2995 const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
2996 Buffer vertexBuffer(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2997 unsigned totalIterations = 0u;
2998 unsigned failedIterations = 0u;
2999 Image discardableImage(context, maxWidth, 1u, format,
3000 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3001
3002 {
3003 const Allocation &alloc = vertexBuffer.getAllocation();
3004 std::vector<tcu::Vec4> data(maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3005 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
3006 float leftHandPosition = -1.0f;
3007
3008 for (uint32_t ndx = 0u; ndx < maxWidth; ++ndx)
3009 {
3010 data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3011 leftHandPosition += pixelSize;
3012 }
3013
3014 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3015 flushAlloc(vk, device, alloc);
3016 }
3017
3018 const Unique<VkFramebuffer> framebuffer(
3019 makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3020 const VkViewport viewport = makeViewport(maxWidth, 1u);
3021 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
3022 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3023 Buffer imageBufferResult(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3024 const VkDeviceSize vertexBufferOffset = 0u;
3025
3026 for (uint32_t width = 1u; width < maxWidth; width = getNextWidth(width))
3027 {
3028 totalIterations++;
3029
3030 for (uint32_t ndx = 0u; ndx < inputBuffers.size(); ndx++)
3031 {
3032 const Allocation &alloc = inputBuffers[ndx]->getAllocation();
3033 initializeMemory(context, alloc, extraData[ndx]);
3034 }
3035
3036 beginCommandBuffer(vk, *cmdBuffer);
3037 {
3038 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3039
3040 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3041
3042 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3043
3044 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3045
3046 if (extraDataCount > 0)
3047 {
3048 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3049 &descriptorSet.get(), 0u, DE_NULL);
3050 }
3051
3052 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3053
3054 vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3055
3056 endRenderPass(vk, *cmdBuffer);
3057
3058 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(),
3059 tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3060 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3061
3062 endCommandBuffer(vk, *cmdBuffer);
3063
3064 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3065 }
3066 context.resetCommandPoolForVKSC(device, *cmdPool);
3067
3068 {
3069 const Allocation &allocResult = imageBufferResult.getAllocation();
3070 invalidateAlloc(vk, device, allocResult);
3071
3072 std::vector<const void *> datas;
3073 datas.push_back(allocResult.getHostPtr());
3074 if (!checkResult(internalData, datas, width, subgroupSize))
3075 failedIterations++;
3076 }
3077 }
3078
3079 if (0 < failedIterations)
3080 {
3081 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3082
3083 context.getTestContext().getLog()
3084 << TestLog::Message << valuesPassed << " / " << totalIterations << " values passed" << TestLog::EndMessage;
3085
3086 return tcu::TestStatus::fail("Failed!");
3087 }
3088
3089 return tcu::TestStatus::pass("OK");
3090 }
3091
getPossibleGraphicsSubgroupStages(Context & context,const vk::VkShaderStageFlags testedStages)3092 vk::VkShaderStageFlags vkt::subgroups::getPossibleGraphicsSubgroupStages(Context &context,
3093 const vk::VkShaderStageFlags testedStages)
3094 {
3095 const VkPhysicalDeviceSubgroupProperties &subgroupProperties = context.getSubgroupProperties();
3096 VkShaderStageFlags stages = testedStages & subgroupProperties.supportedStages;
3097
3098 DE_ASSERT(isAllGraphicsStages(testedStages));
3099
3100 if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
3101 {
3102 if ((stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
3103 TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
3104 else
3105 stages = VK_SHADER_STAGE_FRAGMENT_BIT;
3106 }
3107
3108 if (static_cast<VkShaderStageFlags>(0u) == stages)
3109 TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
3110
3111 return stages;
3112 }
3113
allStages(Context & context,vk::VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStage)3114 tcu::TestStatus vkt::subgroups::allStages(Context &context, vk::VkFormat format, const SSBOData *extraData,
3115 uint32_t extraDataCount, const void *internalData,
3116 const VerificationFunctor &checkResult,
3117 const vk::VkShaderStageFlags shaderStage)
3118 {
3119 return vkt::subgroups::allStagesRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData,
3120 checkResult, shaderStage, 0u, 0u, 0u, 0u, 0u, DE_NULL);
3121 }
3122
allStagesRequiredSubgroupSize(Context & context,vk::VkFormat format,const SSBOData * extraDatas,uint32_t extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStageTested,const uint32_t vertexShaderStageCreateFlags,const uint32_t tessellationControlShaderStageCreateFlags,const uint32_t tessellationEvalShaderStageCreateFlags,const uint32_t geometryShaderStageCreateFlags,const uint32_t fragmentShaderStageCreateFlags,const uint32_t requiredSubgroupSize[5])3123 tcu::TestStatus vkt::subgroups::allStagesRequiredSubgroupSize(
3124 Context &context, vk::VkFormat format, const SSBOData *extraDatas, uint32_t extraDatasCount,
3125 const void *internalData, const VerificationFunctor &checkResult, const vk::VkShaderStageFlags shaderStageTested,
3126 const uint32_t vertexShaderStageCreateFlags, const uint32_t tessellationControlShaderStageCreateFlags,
3127 const uint32_t tessellationEvalShaderStageCreateFlags, const uint32_t geometryShaderStageCreateFlags,
3128 const uint32_t fragmentShaderStageCreateFlags, const uint32_t requiredSubgroupSize[5])
3129 {
3130 const DeviceInterface &vk = context.getDeviceInterface();
3131 const VkDevice device = context.getDevice();
3132 const uint32_t maxWidth = getMaxWidth();
3133 vector<VkShaderStageFlagBits> stagesVector;
3134 VkShaderStageFlags shaderStageRequired = (VkShaderStageFlags)0ull;
3135
3136 Move<VkShaderModule> vertexShaderModule;
3137 Move<VkShaderModule> teCtrlShaderModule;
3138 Move<VkShaderModule> teEvalShaderModule;
3139 Move<VkShaderModule> geometryShaderModule;
3140 Move<VkShaderModule> fragmentShaderModule;
3141
3142 if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
3143 {
3144 stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
3145 }
3146 if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3147 {
3148 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
3149 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ?
3150 (VkShaderStageFlags)0u :
3151 (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
3152 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ?
3153 (VkShaderStageFlags)0u :
3154 (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3155 }
3156 if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3157 {
3158 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
3159 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ?
3160 (VkShaderStageFlags)0u :
3161 (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3162 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ?
3163 (VkShaderStageFlags)0u :
3164 (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
3165 }
3166 if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
3167 {
3168 stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
3169 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3170 shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags)0 : required;
3171 }
3172 if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3173 {
3174 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3175 shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags)0 : required;
3176 }
3177
3178 const uint32_t stagesCount = static_cast<uint32_t>(stagesVector.size());
3179 const string vert = (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT) ? "vert_noSubgroup" : "vert";
3180 const string tesc = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? "tesc_noSubgroup" : "tesc";
3181 const string tese =
3182 (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? "tese_noSubgroup" : "tese";
3183
3184 shaderStageRequired = shaderStageTested | shaderStageRequired;
3185
3186 vertexShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(vert), 0u);
3187 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3188 {
3189 teCtrlShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tesc), 0u);
3190 teEvalShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tese), 0u);
3191 }
3192 if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
3193 {
3194 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3195 {
3196 // tessellation shaders output line primitives
3197 geometryShaderModule =
3198 createShaderModule(vk, device, context.getBinaryCollection().get("geometry_lines"), 0u);
3199 }
3200 else
3201 {
3202 // otherwise points are processed by geometry shader
3203 geometryShaderModule =
3204 createShaderModule(vk, device, context.getBinaryCollection().get("geometry_points"), 0u);
3205 }
3206 }
3207 if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
3208 fragmentShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u);
3209
3210 std::vector<de::SharedPtr<BufferOrImage>> inputBuffers(stagesCount + extraDatasCount);
3211
3212 DescriptorSetLayoutBuilder layoutBuilder;
3213
3214 // The implicit result SSBO we use to store our outputs from the shader
3215 for (uint32_t ndx = 0u; ndx < stagesCount; ++ndx)
3216 {
3217 const VkDeviceSize shaderSize =
3218 (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
3219 const VkDeviceSize size = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
3220 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
3221
3222 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx],
3223 getResultBinding(stagesVector[ndx]), DE_NULL);
3224 }
3225
3226 for (uint32_t ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3227 {
3228 const uint32_t datasNdx = ndx - stagesCount;
3229 if (extraDatas[datasNdx].isImage())
3230 {
3231 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(
3232 context, static_cast<uint32_t>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
3233 }
3234 else
3235 {
3236 const auto usage = (extraDatas[datasNdx].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT :
3237 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
3238 const auto size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) *
3239 extraDatas[datasNdx].numElements;
3240 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
3241 }
3242
3243 const Allocation &alloc = inputBuffers[ndx]->getAllocation();
3244 initializeMemory(context, alloc, extraDatas[datasNdx]);
3245
3246 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, extraDatas[datasNdx].stages,
3247 extraDatas[datasNdx].binding, DE_NULL);
3248 }
3249
3250 const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3251
3252 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
3253
3254 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3255 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(
3256 context, *pipelineLayout, shaderStageRequired, *vertexShaderModule, *fragmentShaderModule,
3257 *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule, *renderPass,
3258 (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST :
3259 VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3260 DE_NULL, DE_NULL, false, VK_FORMAT_R32G32B32A32_SFLOAT, vertexShaderStageCreateFlags,
3261 tessellationControlShaderStageCreateFlags, tessellationEvalShaderStageCreateFlags,
3262 geometryShaderStageCreateFlags, fragmentShaderStageCreateFlags, requiredSubgroupSize));
3263
3264 Move<VkDescriptorPool> descriptorPool;
3265 Move<VkDescriptorSet> descriptorSet;
3266
3267 if (inputBuffers.size() > 0)
3268 {
3269 DescriptorPoolBuilder poolBuilder;
3270
3271 for (uint32_t ndx = 0u; ndx < static_cast<uint32_t>(inputBuffers.size()); ndx++)
3272 {
3273 poolBuilder.addType(inputBuffers[ndx]->getType());
3274 }
3275
3276 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3277
3278 // Create descriptor set
3279 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3280
3281 DescriptorSetUpdateBuilder updateBuilder;
3282
3283 for (uint32_t ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
3284 {
3285 uint32_t binding;
3286 if (ndx < stagesCount)
3287 binding = getResultBinding(stagesVector[ndx]);
3288 else
3289 binding = extraDatas[ndx - stagesCount].binding;
3290
3291 if (inputBuffers[ndx]->isImage())
3292 {
3293 VkDescriptorImageInfo info =
3294 makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
3295 inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3296
3297 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding),
3298 inputBuffers[ndx]->getType(), &info);
3299 }
3300 else
3301 {
3302 VkDescriptorBufferInfo info = makeDescriptorBufferInfo(
3303 inputBuffers[ndx]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
3304
3305 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding),
3306 inputBuffers[ndx]->getType(), &info);
3307 }
3308 }
3309
3310 updateBuilder.update(vk, device);
3311 }
3312
3313 {
3314 const VkQueue queue = context.getUniversalQueue();
3315 const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3316 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
3317 const uint32_t subgroupSize = getSubgroupSize(context);
3318 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
3319 unsigned totalIterations = 0u;
3320 unsigned failedIterations = 0u;
3321 Image resultImage(context, maxWidth, 1, format,
3322 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3323 const Unique<VkFramebuffer> framebuffer(
3324 makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), maxWidth, 1u));
3325 const VkViewport viewport = makeViewport(maxWidth, 1u);
3326 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
3327 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3328 Buffer imageBufferResult(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3329 const VkImageSubresourceRange subresourceRange = {
3330 VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags aspectMask
3331 0u, //uint32_t baseMipLevel
3332 1u, //uint32_t levelCount
3333 0u, //uint32_t baseArrayLayer
3334 1u //uint32_t layerCount
3335 };
3336
3337 const VkImageMemoryBarrier colorAttachmentBarrier =
3338 makeImageMemoryBarrier((VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
3339 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, resultImage.getImage(), subresourceRange);
3340
3341 for (uint32_t width = 1u; width < maxWidth; width = getNextWidth(width))
3342 {
3343 for (uint32_t ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3344 {
3345 // re-init the data
3346 const Allocation &alloc = inputBuffers[ndx]->getAllocation();
3347 initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
3348 }
3349
3350 totalIterations++;
3351
3352 beginCommandBuffer(vk, *cmdBuffer);
3353
3354 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
3355 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u,
3356 (const VkMemoryBarrier *)DE_NULL, 0u, (const VkBufferMemoryBarrier *)DE_NULL, 1u,
3357 &colorAttachmentBarrier);
3358
3359 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3360
3361 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3362
3363 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3364
3365 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3366
3367 if (stagesCount + extraDatasCount > 0)
3368 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3369 &descriptorSet.get(), 0u, DE_NULL);
3370
3371 vk.cmdDraw(*cmdBuffer, width, 1, 0, 0);
3372
3373 endRenderPass(vk, *cmdBuffer);
3374
3375 copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(),
3376 tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3377 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3378
3379 endCommandBuffer(vk, *cmdBuffer);
3380
3381 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3382
3383 for (uint32_t ndx = 0u; ndx < stagesCount; ++ndx)
3384 {
3385 std::vector<const void *> datas;
3386 if (!inputBuffers[ndx]->isImage())
3387 {
3388 const Allocation &resultAlloc = inputBuffers[ndx]->getAllocation();
3389 invalidateAlloc(vk, device, resultAlloc);
3390 // we always have our result data first
3391 datas.push_back(resultAlloc.getHostPtr());
3392 }
3393
3394 for (uint32_t index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3395 {
3396 const uint32_t datasNdx = index - stagesCount;
3397 if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
3398 {
3399 const Allocation &resultAlloc = inputBuffers[index]->getAllocation();
3400 invalidateAlloc(vk, device, resultAlloc);
3401 // we always have our result data first
3402 datas.push_back(resultAlloc.getHostPtr());
3403 }
3404 }
3405
3406 // Any stage in the vertex pipeline may be called multiple times per vertex, so we may need >= non-strict comparisons.
3407 const bool multiCall = (stagesVector[ndx] == VK_SHADER_STAGE_VERTEX_BIT ||
3408 stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT ||
3409 stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT ||
3410 stagesVector[ndx] == VK_SHADER_STAGE_GEOMETRY_BIT);
3411 const uint32_t usedWidth =
3412 ((stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width);
3413
3414 if (!checkResult(internalData, datas, usedWidth, subgroupSize, multiCall))
3415 failedIterations++;
3416 }
3417 if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3418 {
3419 std::vector<const void *> datas;
3420 const Allocation &resultAlloc = imageBufferResult.getAllocation();
3421 invalidateAlloc(vk, device, resultAlloc);
3422
3423 // we always have our result data first
3424 datas.push_back(resultAlloc.getHostPtr());
3425
3426 for (uint32_t index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3427 {
3428 const uint32_t datasNdx = index - stagesCount;
3429 if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
3430 {
3431 const Allocation &alloc = inputBuffers[index]->getAllocation();
3432 invalidateAlloc(vk, device, alloc);
3433 // we always have our result data first
3434 datas.push_back(alloc.getHostPtr());
3435 }
3436 }
3437
3438 if (!checkResult(internalData, datas, width, subgroupSize, false))
3439 failedIterations++;
3440 }
3441
3442 context.resetCommandPoolForVKSC(device, *cmdPool);
3443 }
3444
3445 if (0 < failedIterations)
3446 {
3447 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3448
3449 context.getTestContext().getLog() << TestLog::Message << valuesPassed << " / " << totalIterations
3450 << " values passed" << TestLog::EndMessage;
3451
3452 return tcu::TestStatus::fail("Failed!");
3453 }
3454 }
3455
3456 return tcu::TestStatus::pass("OK");
3457 }
3458
makeVertexFrameBufferTest(Context & context,vk::VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,subgroups::CheckResult checkResult)3459 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest(Context &context, vk::VkFormat format,
3460 const SSBOData *extraData, uint32_t extraDataCount,
3461 const void *internalData, subgroups::CheckResult checkResult)
3462 {
3463 return makeVertexFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData,
3464 checkResult, 0u, 0u);
3465 }
3466
makeVertexFrameBufferTestRequiredSubgroupSize(Context & context,vk::VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const uint32_t vertexShaderStageCreateFlags,const uint32_t requiredSubgroupSize)3467 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTestRequiredSubgroupSize(
3468 Context &context, vk::VkFormat format, const SSBOData *extraData, uint32_t extraDataCount, const void *internalData,
3469 subgroups::CheckResult checkResult, const uint32_t vertexShaderStageCreateFlags,
3470 const uint32_t requiredSubgroupSize)
3471 {
3472 const DeviceInterface &vk = context.getDeviceInterface();
3473 const VkDevice device = context.getDevice();
3474 const VkQueue queue = context.getUniversalQueue();
3475 const uint32_t maxWidth = getMaxWidth();
3476 const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3477 vector<de::SharedPtr<BufferOrImage>> inputBuffers(extraDataCount);
3478 DescriptorSetLayoutBuilder layoutBuilder;
3479 const Unique<VkShaderModule> vertexShaderModule(
3480 createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3481 const Unique<VkShaderModule> fragmentShaderModule(
3482 createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3483 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3484 const VkVertexInputBindingDescription vertexInputBinding = {
3485 0u, // binding;
3486 static_cast<uint32_t>(sizeof(tcu::Vec4)), // stride;
3487 VK_VERTEX_INPUT_RATE_VERTEX // inputRate
3488 };
3489 const VkVertexInputAttributeDescription vertexInputAttribute = {0u, 0u, VK_FORMAT_R32G32B32A32_SFLOAT, 0u};
3490
3491 for (uint32_t i = 0u; i < extraDataCount; i++)
3492 {
3493 if (extraData[i].isImage())
3494 {
3495 inputBuffers[i] = de::SharedPtr<BufferOrImage>(
3496 new Image(context, static_cast<uint32_t>(extraData[i].numElements), 1u, extraData[i].format));
3497 }
3498 else
3499 {
3500 DE_ASSERT(extraData[i].isUBO());
3501 vk::VkDeviceSize size =
3502 getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
3503 inputBuffers[i] =
3504 de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3505 }
3506 const Allocation &alloc = inputBuffers[i]->getAllocation();
3507 initializeMemory(context, alloc, extraData[i]);
3508 }
3509
3510 for (uint32_t ndx = 0u; ndx < extraDataCount; ndx++)
3511 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
3512
3513 const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3514
3515 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
3516
3517 const uint32_t requiredSubgroupSizes[5] = {requiredSubgroupSize, 0u, 0u, 0u, 0u};
3518 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(
3519 context, *pipelineLayout, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, *vertexShaderModule,
3520 *fragmentShaderModule, DE_NULL, DE_NULL, DE_NULL, *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3521 &vertexInputBinding, &vertexInputAttribute, true, format, vertexShaderStageCreateFlags, 0u, 0u, 0u, 0u,
3522 requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3523 DescriptorPoolBuilder poolBuilder;
3524 DescriptorSetUpdateBuilder updateBuilder;
3525
3526 for (uint32_t ndx = 0u; ndx < inputBuffers.size(); ndx++)
3527 poolBuilder.addType(inputBuffers[ndx]->getType());
3528
3529 Move<VkDescriptorPool> descriptorPool;
3530 Move<VkDescriptorSet> descriptorSet;
3531
3532 if (extraDataCount > 0)
3533 {
3534 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3535 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3536 }
3537
3538 for (uint32_t ndx = 0u; ndx < extraDataCount; ndx++)
3539 {
3540 const Allocation &alloc = inputBuffers[ndx]->getAllocation();
3541 initializeMemory(context, alloc, extraData[ndx]);
3542 }
3543
3544 for (uint32_t buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
3545 {
3546 if (inputBuffers[buffersNdx]->isImage())
3547 {
3548 VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
3549 inputBuffers[buffersNdx]->getAsImage()->getImageView(),
3550 VK_IMAGE_LAYOUT_GENERAL);
3551
3552 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3553 inputBuffers[buffersNdx]->getType(), &info);
3554 }
3555 else
3556 {
3557 VkDescriptorBufferInfo info =
3558 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(), 0ull,
3559 inputBuffers[buffersNdx]->getAsBuffer()->getSize());
3560
3561 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3562 inputBuffers[buffersNdx]->getType(), &info);
3563 }
3564 }
3565 updateBuilder.update(vk, device);
3566
3567 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
3568
3569 const uint32_t subgroupSize = getSubgroupSize(context);
3570
3571 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
3572
3573 const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
3574 Buffer vertexBuffer(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
3575
3576 unsigned totalIterations = 0u;
3577 unsigned failedIterations = 0u;
3578
3579 Image discardableImage(context, maxWidth, 1u, format,
3580 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3581
3582 {
3583 const Allocation &alloc = vertexBuffer.getAllocation();
3584 std::vector<tcu::Vec4> data(maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3585 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
3586 float leftHandPosition = -1.0f;
3587
3588 for (uint32_t ndx = 0u; ndx < maxWidth; ++ndx)
3589 {
3590 data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3591 leftHandPosition += pixelSize;
3592 }
3593
3594 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3595 flushAlloc(vk, device, alloc);
3596 }
3597
3598 const Unique<VkFramebuffer> framebuffer(
3599 makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3600 const VkViewport viewport = makeViewport(maxWidth, 1u);
3601 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
3602 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3603 Buffer imageBufferResult(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3604 const VkDeviceSize vertexBufferOffset = 0u;
3605
3606 for (uint32_t width = 1u; width < maxWidth; width = getNextWidth(width))
3607 {
3608 totalIterations++;
3609
3610 for (uint32_t ndx = 0u; ndx < inputBuffers.size(); ndx++)
3611 {
3612 const Allocation &alloc = inputBuffers[ndx]->getAllocation();
3613 initializeMemory(context, alloc, extraData[ndx]);
3614 }
3615
3616 beginCommandBuffer(vk, *cmdBuffer);
3617 {
3618 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3619
3620 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3621
3622 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3623
3624 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3625
3626 if (extraDataCount > 0)
3627 {
3628 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3629 &descriptorSet.get(), 0u, DE_NULL);
3630 }
3631
3632 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3633
3634 vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3635
3636 endRenderPass(vk, *cmdBuffer);
3637
3638 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(),
3639 tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3640 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3641
3642 endCommandBuffer(vk, *cmdBuffer);
3643
3644 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3645 }
3646 context.resetCommandPoolForVKSC(device, *cmdPool);
3647
3648 {
3649 const Allocation &allocResult = imageBufferResult.getAllocation();
3650 invalidateAlloc(vk, device, allocResult);
3651
3652 std::vector<const void *> datas;
3653 datas.push_back(allocResult.getHostPtr());
3654 if (!checkResult(internalData, datas, width, subgroupSize))
3655 failedIterations++;
3656 }
3657 }
3658
3659 if (0 < failedIterations)
3660 {
3661 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3662
3663 context.getTestContext().getLog()
3664 << TestLog::Message << valuesPassed << " / " << totalIterations << " values passed" << TestLog::EndMessage;
3665
3666 return tcu::TestStatus::fail("Failed!");
3667 }
3668
3669 return tcu::TestStatus::pass("OK");
3670 }
3671
makeFragmentFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraDatas,uint32_t extraDatasCount,const void * internalData,CheckResultFragment checkResult)3672 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest(Context &context, VkFormat format,
3673 const SSBOData *extraDatas, uint32_t extraDatasCount,
3674 const void *internalData, CheckResultFragment checkResult)
3675 {
3676 return makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, extraDatas, extraDatasCount, internalData,
3677 checkResult, 0u, 0u);
3678 }
3679
makeFragmentFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraDatas,uint32_t extraDatasCount,const void * internalData,CheckResultFragment checkResult,const uint32_t fragmentShaderStageCreateFlags,const uint32_t requiredSubgroupSize)3680 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize(
3681 Context &context, VkFormat format, const SSBOData *extraDatas, uint32_t extraDatasCount, const void *internalData,
3682 CheckResultFragment checkResult, const uint32_t fragmentShaderStageCreateFlags, const uint32_t requiredSubgroupSize)
3683 {
3684 const DeviceInterface &vk = context.getDeviceInterface();
3685 const VkDevice device = context.getDevice();
3686 const VkQueue queue = context.getUniversalQueue();
3687 const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3688 const Unique<VkShaderModule> vertexShaderModule(
3689 createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3690 const Unique<VkShaderModule> fragmentShaderModule(
3691 createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3692 std::vector<de::SharedPtr<BufferOrImage>> inputBuffers(extraDatasCount);
3693
3694 for (uint32_t i = 0; i < extraDatasCount; i++)
3695 {
3696 if (extraDatas[i].isImage())
3697 {
3698 inputBuffers[i] = de::SharedPtr<BufferOrImage>(
3699 new Image(context, static_cast<uint32_t>(extraDatas[i].numElements), 1, extraDatas[i].format));
3700 }
3701 else
3702 {
3703 DE_ASSERT(extraDatas[i].isUBO());
3704
3705 const vk::VkDeviceSize size =
3706 getElementSizeInBytes(extraDatas[i].format, extraDatas[i].layout) * extraDatas[i].numElements;
3707
3708 inputBuffers[i] =
3709 de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3710 }
3711
3712 const Allocation &alloc = inputBuffers[i]->getAllocation();
3713
3714 initializeMemory(context, alloc, extraDatas[i]);
3715 }
3716
3717 DescriptorSetLayoutBuilder layoutBuilder;
3718
3719 for (uint32_t i = 0; i < extraDatasCount; i++)
3720 {
3721 layoutBuilder.addBinding(inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
3722 }
3723
3724 const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3725 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
3726 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3727 const uint32_t requiredSubgroupSizes[5] = {0u, 0u, 0u, 0u, requiredSubgroupSize};
3728 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(
3729 context, *pipelineLayout, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, *vertexShaderModule,
3730 *fragmentShaderModule, DE_NULL, DE_NULL, DE_NULL, *renderPass, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, DE_NULL,
3731 DE_NULL, true, VK_FORMAT_R32G32B32A32_SFLOAT, 0u, 0u, 0u, 0u, fragmentShaderStageCreateFlags,
3732 requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3733 DescriptorPoolBuilder poolBuilder;
3734
3735 // To stop validation complaining, always add at least one type to pool.
3736 poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3737 for (uint32_t i = 0; i < extraDatasCount; i++)
3738 {
3739 poolBuilder.addType(inputBuffers[i]->getType());
3740 }
3741
3742 Move<VkDescriptorPool> descriptorPool;
3743 // Create descriptor set
3744 Move<VkDescriptorSet> descriptorSet;
3745
3746 if (extraDatasCount > 0)
3747 {
3748 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3749
3750 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3751 }
3752
3753 DescriptorSetUpdateBuilder updateBuilder;
3754
3755 for (uint32_t i = 0; i < extraDatasCount; i++)
3756 {
3757 if (inputBuffers[i]->isImage())
3758 {
3759 const VkDescriptorImageInfo info =
3760 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
3761 inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3762
3763 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i),
3764 inputBuffers[i]->getType(), &info);
3765 }
3766 else
3767 {
3768 const VkDescriptorBufferInfo info = makeDescriptorBufferInfo(
3769 inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[i]->getAsBuffer()->getSize());
3770
3771 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i),
3772 inputBuffers[i]->getType(), &info);
3773 }
3774 }
3775
3776 if (extraDatasCount > 0)
3777 updateBuilder.update(vk, device);
3778
3779 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
3780 const uint32_t subgroupSize = getSubgroupSize(context);
3781 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
3782 unsigned totalIterations = 0;
3783 unsigned failedIterations = 0;
3784
3785 for (uint32_t width = 8; width <= subgroupSize; width *= 2)
3786 {
3787 for (uint32_t height = 8; height <= subgroupSize; height *= 2)
3788 {
3789 totalIterations++;
3790
3791 // re-init the data
3792 for (uint32_t i = 0; i < extraDatasCount; i++)
3793 {
3794 const Allocation &alloc = inputBuffers[i]->getAllocation();
3795
3796 initializeMemory(context, alloc, extraDatas[i]);
3797 }
3798
3799 const VkDeviceSize formatSize = getFormatSizeInBytes(format);
3800 const VkDeviceSize resultImageSizeInBytes = width * height * formatSize;
3801 Image resultImage(context, width, height, format,
3802 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3803 Buffer resultBuffer(context, resultImageSizeInBytes, VK_IMAGE_USAGE_TRANSFER_DST_BIT);
3804 const Unique<VkFramebuffer> framebuffer(
3805 makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), width, height));
3806 VkViewport viewport = makeViewport(width, height);
3807 VkRect2D scissor = {{0, 0}, {width, height}};
3808
3809 beginCommandBuffer(vk, *cmdBuffer);
3810
3811 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3812
3813 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3814
3815 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height),
3816 tcu::Vec4(0.0f));
3817
3818 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3819
3820 if (extraDatasCount > 0)
3821 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3822 &descriptorSet.get(), 0u, DE_NULL);
3823
3824 vk.cmdDraw(*cmdBuffer, 4, 1, 0, 0);
3825
3826 endRenderPass(vk, *cmdBuffer);
3827
3828 copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(),
3829 tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3830 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3831
3832 endCommandBuffer(vk, *cmdBuffer);
3833
3834 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3835
3836 std::vector<const void *> datas;
3837 {
3838 const Allocation &resultAlloc = resultBuffer.getAllocation();
3839 invalidateAlloc(vk, device, resultAlloc);
3840
3841 // we always have our result data first
3842 datas.push_back(resultAlloc.getHostPtr());
3843 }
3844
3845 if (!checkResult(internalData, datas, width, height, subgroupSize))
3846 {
3847 failedIterations++;
3848 }
3849
3850 context.resetCommandPoolForVKSC(device, *cmdPool);
3851 }
3852 }
3853
3854 if (0 < failedIterations)
3855 {
3856 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3857
3858 context.getTestContext().getLog()
3859 << TestLog::Message << valuesPassed << " / " << totalIterations << " values passed" << TestLog::EndMessage;
3860
3861 return tcu::TestStatus::fail("Failed!");
3862 }
3863
3864 return tcu::TestStatus::pass("OK");
3865 }
3866
makeComputePipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule shaderModule,const uint32_t pipelineShaderStageFlags,const uint32_t pipelineCreateFlags,VkPipeline basePipelineHandle,uint32_t localSizeX,uint32_t localSizeY,uint32_t localSizeZ,uint32_t requiredSubgroupSize)3867 Move<VkPipeline> makeComputePipeline(Context &context, const VkPipelineLayout pipelineLayout,
3868 const VkShaderModule shaderModule, const uint32_t pipelineShaderStageFlags,
3869 const uint32_t pipelineCreateFlags, VkPipeline basePipelineHandle,
3870 uint32_t localSizeX, uint32_t localSizeY, uint32_t localSizeZ,
3871 uint32_t requiredSubgroupSize)
3872 {
3873 const uint32_t localSize[3] = {localSizeX, localSizeY, localSizeZ};
3874 const vk::VkSpecializationMapEntry entries[3] = {
3875 {0, sizeof(uint32_t) * 0, sizeof(uint32_t)},
3876 {1, sizeof(uint32_t) * 1, sizeof(uint32_t)},
3877 {2, static_cast<uint32_t>(sizeof(uint32_t) * 2), sizeof(uint32_t)},
3878 };
3879 const vk::VkSpecializationInfo info = {/* mapEntryCount = */ 3,
3880 /* pMapEntries = */ entries,
3881 /* dataSize = */ sizeof(localSize),
3882 /* pData = */ localSize};
3883 const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeCreateInfo = {
3884 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, // VkStructureType sType;
3885 DE_NULL, // void* pNext;
3886 requiredSubgroupSize // uint32_t requiredSubgroupSize;
3887 };
3888 const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams = {
3889 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
3890 (requiredSubgroupSize != 0u ? &subgroupSizeCreateInfo : DE_NULL), // const void* pNext;
3891 pipelineShaderStageFlags, // VkPipelineShaderStageCreateFlags flags;
3892 VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagBits stage;
3893 shaderModule, // VkShaderModule module;
3894 "main", // const char* pName;
3895 &info, // const VkSpecializationInfo* pSpecializationInfo;
3896 };
3897 const vk::VkComputePipelineCreateInfo pipelineCreateInfo = {
3898 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
3899 DE_NULL, // const void* pNext;
3900 pipelineCreateFlags, // VkPipelineCreateFlags flags;
3901 pipelineShaderStageParams, // VkPipelineShaderStageCreateInfo stage;
3902 pipelineLayout, // VkPipelineLayout layout;
3903 #ifndef CTS_USES_VULKANSC
3904 basePipelineHandle, // VkPipeline basePipelineHandle;
3905 -1, // int32_t basePipelineIndex;
3906 #else
3907 DE_NULL, // VkPipeline basePipelineHandle;
3908 0, // int32_t basePipelineIndex;
3909 #endif // CTS_USES_VULKANSC
3910 };
3911 static_cast<void>(basePipelineHandle);
3912
3913 return createComputePipeline(context.getDeviceInterface(), context.getDevice(), DE_NULL, &pipelineCreateInfo);
3914 }
3915
3916 #ifndef CTS_USES_VULKANSC
makeMeshPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule taskModule,const VkShaderModule meshModule,const uint32_t pipelineShaderStageFlags,const uint32_t pipelineCreateFlags,VkPipeline basePipelineHandle,uint32_t localSizeX,uint32_t localSizeY,uint32_t localSizeZ,uint32_t requiredSubgroupSize,const VkRenderPass renderPass)3917 Move<VkPipeline> makeMeshPipeline(Context &context, const VkPipelineLayout pipelineLayout,
3918 const VkShaderModule taskModule, const VkShaderModule meshModule,
3919 const uint32_t pipelineShaderStageFlags, const uint32_t pipelineCreateFlags,
3920 VkPipeline basePipelineHandle, uint32_t localSizeX, uint32_t localSizeY,
3921 uint32_t localSizeZ, uint32_t requiredSubgroupSize, const VkRenderPass renderPass)
3922 {
3923 const uint32_t localSize[3] = {localSizeX, localSizeY, localSizeZ};
3924 const vk::VkSpecializationMapEntry entries[3] = {
3925 {0, sizeof(uint32_t) * 0, sizeof(uint32_t)},
3926 {1, sizeof(uint32_t) * 1, sizeof(uint32_t)},
3927 {2, static_cast<uint32_t>(sizeof(uint32_t) * 2), sizeof(uint32_t)},
3928 };
3929 const vk::VkSpecializationInfo info = {/* mapEntryCount = */ 3,
3930 /* pMapEntries = */ entries,
3931 /* dataSize = */ sizeof(localSize),
3932 /* pData = */ localSize};
3933 const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeCreateInfo = {
3934 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, // VkStructureType sType;
3935 DE_NULL, // void* pNext;
3936 requiredSubgroupSize // uint32_t requiredSubgroupSize;
3937 };
3938
3939 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *pSubgroupSizeCreateInfo =
3940 ((requiredSubgroupSize != 0u) ? &subgroupSizeCreateInfo : nullptr);
3941
3942 std::vector<VkPipelineShaderStageCreateInfo> shaderStageParams;
3943 vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams = {
3944 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
3945 nullptr, // const void* pNext;
3946 pipelineShaderStageFlags, // VkPipelineShaderStageCreateFlags flags;
3947 VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM, // VkShaderStageFlagBits stage;
3948 DE_NULL, // VkShaderModule module;
3949 "main", // const char* pName;
3950 &info, // const VkSpecializationInfo* pSpecializationInfo;
3951 };
3952
3953 if (taskModule != DE_NULL)
3954 {
3955 pipelineShaderStageParams.module = taskModule;
3956 pipelineShaderStageParams.pNext = pSubgroupSizeCreateInfo;
3957 pipelineShaderStageParams.stage = VK_SHADER_STAGE_TASK_BIT_EXT;
3958 shaderStageParams.push_back(pipelineShaderStageParams);
3959 }
3960
3961 if (meshModule != DE_NULL)
3962 {
3963 pipelineShaderStageParams.module = meshModule;
3964 pipelineShaderStageParams.pNext = ((taskModule == DE_NULL) ? pSubgroupSizeCreateInfo : nullptr);
3965 pipelineShaderStageParams.stage = VK_SHADER_STAGE_MESH_BIT_EXT;
3966 shaderStageParams.push_back(pipelineShaderStageParams);
3967 }
3968
3969 const std::vector<VkViewport> viewports(1u, makeViewport(1u, 1u));
3970 const std::vector<VkRect2D> scissors(1u, makeRect2D(1u, 1u));
3971
3972 return makeGraphicsPipeline(context.getDeviceInterface(), context.getDevice(), basePipelineHandle, pipelineLayout,
3973 pipelineCreateFlags, shaderStageParams, renderPass, viewports, scissors);
3974 }
3975 #endif // CTS_USES_VULKANSC
3976
makeComputeOrMeshTestRequiredSubgroupSize(ComputeLike testType,Context & context,VkFormat format,const vkt::subgroups::SSBOData * inputs,uint32_t inputsCount,const void * internalData,vkt::subgroups::CheckResultCompute checkResult,const uint32_t pipelineShaderStageCreateFlags,const uint32_t numWorkgroups[3],const bool isRequiredSubgroupSize,const uint32_t subgroupSize,const uint32_t localSizesToTest[][3],const uint32_t localSizesToTestCount)3977 tcu::TestStatus makeComputeOrMeshTestRequiredSubgroupSize(
3978 ComputeLike testType, Context &context, VkFormat format, const vkt::subgroups::SSBOData *inputs,
3979 uint32_t inputsCount, const void *internalData, vkt::subgroups::CheckResultCompute checkResult,
3980 const uint32_t pipelineShaderStageCreateFlags, const uint32_t numWorkgroups[3], const bool isRequiredSubgroupSize,
3981 const uint32_t subgroupSize, const uint32_t localSizesToTest[][3], const uint32_t localSizesToTestCount)
3982 {
3983 const DeviceInterface &vk = context.getDeviceInterface();
3984 const VkDevice device = context.getDevice();
3985 const VkQueue queue = context.getUniversalQueue();
3986 const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3987 #ifndef CTS_USES_VULKANSC
3988 const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
3989 context.getSubgroupSizeControlProperties();
3990 #else
3991 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
3992 context.getSubgroupSizeControlPropertiesEXT();
3993 #endif // CTS_USES_VULKANSC
3994 const VkDeviceSize elementSize = getFormatSizeInBytes(format);
3995 const VkDeviceSize maxSubgroupSize = isRequiredSubgroupSize ? deMax32(subgroupSizeControlProperties.maxSubgroupSize,
3996 vkt::subgroups::maxSupportedSubgroupSize()) :
3997 vkt::subgroups::maxSupportedSubgroupSize();
3998 const VkDeviceSize resultBufferSize = maxSubgroupSize * maxSubgroupSize * maxSubgroupSize;
3999 const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize;
4000 Buffer resultBuffer(context, resultBufferSizeInBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4001 std::vector<de::SharedPtr<BufferOrImage>> inputBuffers(inputsCount);
4002 const auto shaderStageFlags =
4003 ((testType == ComputeLike::COMPUTE) ? VK_SHADER_STAGE_COMPUTE_BIT
4004 #ifndef CTS_USES_VULKANSC
4005 :
4006 (VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT));
4007 #else
4008 :
4009 0);
4010 #endif // CTS_USES_VULKANSC
4011 const auto pipelineBindPoint =
4012 ((testType == ComputeLike::COMPUTE) ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS);
4013 const auto pipelineStage = ((testType == ComputeLike::COMPUTE) ?
4014 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
4015 #ifndef CTS_USES_VULKANSC
4016 :
4017 (VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT | VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT));
4018 #else
4019 :
4020 0);
4021 #endif // CTS_USES_VULKANSC
4022 const auto renderArea = makeRect2D(1u, 1u);
4023
4024 std::vector<tcu::UVec3> usedLocalSizes;
4025 for (uint32_t i = 0; i < localSizesToTestCount; ++i)
4026 {
4027 usedLocalSizes.push_back(tcu::UVec3(localSizesToTest[i][0], localSizesToTest[i][1], localSizesToTest[i][2]));
4028 }
4029
4030 for (uint32_t i = 0; i < inputsCount; i++)
4031 {
4032 if (inputs[i].isImage())
4033 {
4034 inputBuffers[i] = de::SharedPtr<BufferOrImage>(
4035 new Image(context, static_cast<uint32_t>(inputs[i].numElements), 1, inputs[i].format));
4036 }
4037 else
4038 {
4039 const auto usage =
4040 (inputs[i].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4041 const auto size = getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
4042 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
4043 }
4044
4045 const Allocation &alloc = inputBuffers[i]->getAllocation();
4046
4047 initializeMemory(context, alloc, inputs[i]);
4048 }
4049
4050 DescriptorSetLayoutBuilder layoutBuilder;
4051 layoutBuilder.addBinding(resultBuffer.getType(), 1, shaderStageFlags, DE_NULL);
4052
4053 for (uint32_t i = 0; i < inputsCount; i++)
4054 {
4055 layoutBuilder.addBinding(inputBuffers[i]->getType(), 1, shaderStageFlags, DE_NULL);
4056 }
4057
4058 const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
4059
4060 Move<VkShaderModule> compShader;
4061 Move<VkShaderModule> meshShader;
4062 Move<VkShaderModule> taskShader;
4063 const auto &binaries = context.getBinaryCollection();
4064
4065 if (testType == ComputeLike::COMPUTE)
4066 {
4067 compShader = createShaderModule(vk, device, binaries.get("comp"));
4068 }
4069 else if (testType == ComputeLike::MESH)
4070 {
4071 meshShader = createShaderModule(vk, device, binaries.get("mesh"));
4072 if (binaries.contains("task"))
4073 taskShader = createShaderModule(vk, device, binaries.get("task"));
4074 }
4075 else
4076 {
4077 DE_ASSERT(false);
4078 }
4079
4080 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
4081
4082 DescriptorPoolBuilder poolBuilder;
4083
4084 poolBuilder.addType(resultBuffer.getType());
4085
4086 for (uint32_t i = 0; i < inputsCount; i++)
4087 {
4088 poolBuilder.addType(inputBuffers[i]->getType());
4089 }
4090
4091 const Unique<VkDescriptorPool> descriptorPool(
4092 poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
4093 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
4094 const VkDescriptorBufferInfo resultDescriptorInfo =
4095 makeDescriptorBufferInfo(resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
4096 DescriptorSetUpdateBuilder updateBuilder;
4097
4098 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
4099 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
4100
4101 for (uint32_t i = 0; i < inputsCount; i++)
4102 {
4103 if (inputBuffers[i]->isImage())
4104 {
4105 const VkDescriptorImageInfo info =
4106 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
4107 inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
4108
4109 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i + 1),
4110 inputBuffers[i]->getType(), &info);
4111 }
4112 else
4113 {
4114 vk::VkDeviceSize size = getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
4115 VkDescriptorBufferInfo info =
4116 makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
4117
4118 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i + 1),
4119 inputBuffers[i]->getType(), &info);
4120 }
4121 }
4122
4123 updateBuilder.update(vk, device);
4124
4125 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
4126 unsigned totalIterations = 0;
4127 unsigned failedIterations = 0;
4128 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
4129 std::vector<de::SharedPtr<Move<VkPipeline>>> pipelines(localSizesToTestCount);
4130 const auto reqSubgroupSize = (isRequiredSubgroupSize ? subgroupSize : 0u);
4131 Move<VkRenderPass> renderPass;
4132 Move<VkFramebuffer> framebuffer;
4133
4134 if (testType == ComputeLike::MESH)
4135 {
4136 renderPass = makeRenderPass(vk, device);
4137 framebuffer = makeFramebuffer(vk, device, renderPass.get(), 0u, nullptr, renderArea.extent.width,
4138 renderArea.extent.height);
4139 }
4140
4141 context.getTestContext().touchWatchdog();
4142 {
4143 if (testType == ComputeLike::COMPUTE)
4144 {
4145 pipelines[0] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(
4146 makeComputePipeline(context, *pipelineLayout, *compShader, pipelineShaderStageCreateFlags,
4147 #ifndef CTS_USES_VULKANSC
4148 VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT,
4149 #else
4150 0u,
4151 #endif // CTS_USES_VULKANSC
4152 (VkPipeline)DE_NULL, usedLocalSizes[0][0], usedLocalSizes[0][1],
4153 usedLocalSizes[0][2], reqSubgroupSize)));
4154 }
4155 #ifndef CTS_USES_VULKANSC
4156 else if (testType == ComputeLike::MESH)
4157 {
4158 pipelines[0] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeMeshPipeline(
4159 context, pipelineLayout.get(), taskShader.get(), meshShader.get(), pipelineShaderStageCreateFlags,
4160 VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT, DE_NULL, usedLocalSizes[0][0], usedLocalSizes[0][1],
4161 usedLocalSizes[0][2], reqSubgroupSize, renderPass.get())));
4162 }
4163 #endif // CTS_USES_VULKANSC
4164 else
4165 {
4166 DE_ASSERT(false);
4167 }
4168 }
4169 context.getTestContext().touchWatchdog();
4170
4171 for (uint32_t index = 1; index < (localSizesToTestCount - 1); index++)
4172 {
4173 const uint32_t nextX = usedLocalSizes[index][0];
4174 const uint32_t nextY = usedLocalSizes[index][1];
4175 const uint32_t nextZ = usedLocalSizes[index][2];
4176
4177 context.getTestContext().touchWatchdog();
4178 {
4179 if (testType == ComputeLike::COMPUTE)
4180 {
4181 pipelines[index] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(
4182 makeComputePipeline(context, *pipelineLayout, *compShader, pipelineShaderStageCreateFlags,
4183 #ifndef CTS_USES_VULKANSC
4184 VK_PIPELINE_CREATE_DERIVATIVE_BIT,
4185 #else
4186 0u,
4187 #endif // CTS_USES_VULKANSC
4188 **pipelines[0], nextX, nextY, nextZ, reqSubgroupSize)));
4189 }
4190 #ifndef CTS_USES_VULKANSC
4191 else if (testType == ComputeLike::MESH)
4192 {
4193 pipelines[index] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeMeshPipeline(
4194 context, pipelineLayout.get(), taskShader.get(), meshShader.get(), pipelineShaderStageCreateFlags,
4195 VK_PIPELINE_CREATE_DERIVATIVE_BIT, pipelines[0].get()->get(), nextX, nextY, nextZ, reqSubgroupSize,
4196 renderPass.get())));
4197 }
4198 #endif // CTS_USES_VULKANSC
4199 else
4200 {
4201 DE_ASSERT(false);
4202 }
4203 }
4204 context.getTestContext().touchWatchdog();
4205 }
4206
4207 for (uint32_t index = 0; index < (localSizesToTestCount - 1); index++)
4208 {
4209 // we are running one test
4210 totalIterations++;
4211
4212 beginCommandBuffer(vk, *cmdBuffer);
4213 {
4214 if (testType == ComputeLike::MESH)
4215 beginRenderPass(vk, *cmdBuffer, renderPass.get(), framebuffer.get(), renderArea);
4216
4217 vk.cmdBindPipeline(*cmdBuffer, pipelineBindPoint, **pipelines[index]);
4218
4219 vk.cmdBindDescriptorSets(*cmdBuffer, pipelineBindPoint, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u,
4220 DE_NULL);
4221
4222 if (testType == ComputeLike::COMPUTE)
4223 vk.cmdDispatch(*cmdBuffer, numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
4224 #ifndef CTS_USES_VULKANSC
4225 else if (testType == ComputeLike::MESH)
4226 vk.cmdDrawMeshTasksEXT(*cmdBuffer, numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
4227 //vk.cmdDrawMeshTasksNV(*cmdBuffer, numWorkgroups[0], 0);
4228 #endif // CTS_USES_VULKANSC
4229 else
4230 DE_ASSERT(false);
4231
4232 if (testType == ComputeLike::MESH)
4233 endRenderPass(vk, *cmdBuffer);
4234 }
4235
4236 // Make shader writes available.
4237 const auto postShaderBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
4238 vk.cmdPipelineBarrier(*cmdBuffer, pipelineStage, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postShaderBarrier, 0u,
4239 nullptr, 0u, nullptr);
4240
4241 endCommandBuffer(vk, *cmdBuffer);
4242
4243 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4244
4245 std::vector<const void *> datas;
4246
4247 {
4248 const Allocation &resultAlloc = resultBuffer.getAllocation();
4249 invalidateAlloc(vk, device, resultAlloc);
4250
4251 // we always have our result data first
4252 datas.push_back(resultAlloc.getHostPtr());
4253 }
4254
4255 for (uint32_t i = 0; i < inputsCount; i++)
4256 {
4257 if (!inputBuffers[i]->isImage())
4258 {
4259 const Allocation &resultAlloc = inputBuffers[i]->getAllocation();
4260 invalidateAlloc(vk, device, resultAlloc);
4261
4262 // we always have our result data first
4263 datas.push_back(resultAlloc.getHostPtr());
4264 }
4265 }
4266
4267 if (!checkResult(internalData, datas, numWorkgroups, usedLocalSizes[index].getPtr(), subgroupSize))
4268 {
4269 failedIterations++;
4270 }
4271
4272 context.resetCommandPoolForVKSC(device, *cmdPool);
4273 }
4274
4275 if (0 < failedIterations)
4276 {
4277 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
4278
4279 context.getTestContext().getLog()
4280 << TestLog::Message << valuesPassed << " / " << totalIterations << " values passed" << TestLog::EndMessage;
4281
4282 return tcu::TestStatus::fail("Failed!");
4283 }
4284
4285 return tcu::TestStatus::pass("OK");
4286 }
4287
makeComputeTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * inputs,uint32_t inputsCount,const void * internalData,CheckResultCompute checkResult,const uint32_t pipelineShaderStageCreateFlags,const uint32_t numWorkgroups[3],const bool isRequiredSubgroupSize,const uint32_t subgroupSize,const uint32_t localSizesToTest[][3],const uint32_t localSizesToTestCount)4288 tcu::TestStatus vkt::subgroups::makeComputeTestRequiredSubgroupSize(
4289 Context &context, VkFormat format, const SSBOData *inputs, uint32_t inputsCount, const void *internalData,
4290 CheckResultCompute checkResult, const uint32_t pipelineShaderStageCreateFlags, const uint32_t numWorkgroups[3],
4291 const bool isRequiredSubgroupSize, const uint32_t subgroupSize, const uint32_t localSizesToTest[][3],
4292 const uint32_t localSizesToTestCount)
4293 {
4294 return makeComputeOrMeshTestRequiredSubgroupSize(ComputeLike::COMPUTE, context, format, inputs, inputsCount,
4295 internalData, checkResult, pipelineShaderStageCreateFlags,
4296 numWorkgroups, isRequiredSubgroupSize, subgroupSize,
4297 localSizesToTest, localSizesToTestCount);
4298 }
4299
makeMeshTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * inputs,uint32_t inputsCount,const void * internalData,CheckResultCompute checkResult,const uint32_t pipelineShaderStageCreateFlags,const uint32_t numWorkgroups[3],const bool isRequiredSubgroupSize,const uint32_t subgroupSize,const uint32_t localSizesToTest[][3],const uint32_t localSizesToTestCount)4300 tcu::TestStatus vkt::subgroups::makeMeshTestRequiredSubgroupSize(
4301 Context &context, VkFormat format, const SSBOData *inputs, uint32_t inputsCount, const void *internalData,
4302 CheckResultCompute checkResult, const uint32_t pipelineShaderStageCreateFlags, const uint32_t numWorkgroups[3],
4303 const bool isRequiredSubgroupSize, const uint32_t subgroupSize, const uint32_t localSizesToTest[][3],
4304 const uint32_t localSizesToTestCount)
4305 {
4306 return makeComputeOrMeshTestRequiredSubgroupSize(ComputeLike::MESH, context, format, inputs, inputsCount,
4307 internalData, checkResult, pipelineShaderStageCreateFlags,
4308 numWorkgroups, isRequiredSubgroupSize, subgroupSize,
4309 localSizesToTest, localSizesToTestCount);
4310 }
4311
makeComputeOrMeshTest(ComputeLike testType,Context & context,VkFormat format,const vkt::subgroups::SSBOData * inputs,uint32_t inputsCount,const void * internalData,vkt::subgroups::CheckResultCompute checkResult,uint32_t requiredSubgroupSize,const uint32_t pipelineShaderStageCreateFlags)4312 tcu::TestStatus makeComputeOrMeshTest(ComputeLike testType, Context &context, VkFormat format,
4313 const vkt::subgroups::SSBOData *inputs, uint32_t inputsCount,
4314 const void *internalData, vkt::subgroups::CheckResultCompute checkResult,
4315 uint32_t requiredSubgroupSize, const uint32_t pipelineShaderStageCreateFlags)
4316 {
4317 const uint32_t numWorkgroups[3] = {4, 2, 2};
4318 const bool isRequiredSubgroupSize = (requiredSubgroupSize != 0u);
4319 const uint32_t subgroupSize =
4320 (isRequiredSubgroupSize ? requiredSubgroupSize : vkt::subgroups::getSubgroupSize(context));
4321
4322 const uint32_t localSizesToTestCount = 8;
4323 uint32_t localSizesToTest[localSizesToTestCount][3] = {
4324 {1, 1, 1}, {subgroupSize, 1, 1}, {1, subgroupSize, 1}, {1, 1, subgroupSize}, {32, 4, 1}, {1, 4, 32}, {3, 5, 7},
4325 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
4326 };
4327
4328 if (testType == ComputeLike::COMPUTE)
4329 return makeComputeTestRequiredSubgroupSize(
4330 context, format, inputs, inputsCount, internalData, checkResult, pipelineShaderStageCreateFlags,
4331 numWorkgroups, isRequiredSubgroupSize, subgroupSize, localSizesToTest, localSizesToTestCount);
4332 else
4333 return makeMeshTestRequiredSubgroupSize(context, format, inputs, inputsCount, internalData, checkResult,
4334 pipelineShaderStageCreateFlags, numWorkgroups, isRequiredSubgroupSize,
4335 subgroupSize, localSizesToTest, localSizesToTestCount);
4336 }
4337
makeComputeTest(Context & context,VkFormat format,const SSBOData * inputs,uint32_t inputsCount,const void * internalData,CheckResultCompute checkResult,uint32_t requiredSubgroupSize,const uint32_t pipelineShaderStageCreateFlags)4338 tcu::TestStatus vkt::subgroups::makeComputeTest(Context &context, VkFormat format, const SSBOData *inputs,
4339 uint32_t inputsCount, const void *internalData,
4340 CheckResultCompute checkResult, uint32_t requiredSubgroupSize,
4341 const uint32_t pipelineShaderStageCreateFlags)
4342 {
4343 return makeComputeOrMeshTest(ComputeLike::COMPUTE, context, format, inputs, inputsCount, internalData, checkResult,
4344 requiredSubgroupSize, pipelineShaderStageCreateFlags);
4345 }
4346
makeMeshTest(Context & context,VkFormat format,const SSBOData * inputs,uint32_t inputsCount,const void * internalData,CheckResultCompute checkResult,uint32_t requiredSubgroupSize,const uint32_t pipelineShaderStageCreateFlags)4347 tcu::TestStatus vkt::subgroups::makeMeshTest(Context &context, VkFormat format, const SSBOData *inputs,
4348 uint32_t inputsCount, const void *internalData,
4349 CheckResultCompute checkResult, uint32_t requiredSubgroupSize,
4350 const uint32_t pipelineShaderStageCreateFlags)
4351 {
4352 return makeComputeOrMeshTest(ComputeLike::MESH, context, format, inputs, inputsCount, internalData, checkResult,
4353 requiredSubgroupSize, pipelineShaderStageCreateFlags);
4354 }
4355
checkShaderStageSetValidity(const VkShaderStageFlags shaderStages)4356 static inline void checkShaderStageSetValidity(const VkShaderStageFlags shaderStages)
4357 {
4358 if (shaderStages == 0)
4359 TCU_THROW(InternalError, "Shader stage is not specified");
4360
4361 // It can actually be only 1 or 0.
4362 const uint32_t exclusivePipelinesCount =
4363 (isAllComputeStages(shaderStages) ? 1 : 0) + (isAllGraphicsStages(shaderStages) ? 1 : 0)
4364 #ifndef CTS_USES_VULKANSC
4365 + (isAllRayTracingStages(shaderStages) ? 1 : 0) + (isAllMeshShadingStages(shaderStages) ? 1 : 0)
4366 #endif // CTS_USES_VULKANSC
4367 ;
4368
4369 if (exclusivePipelinesCount != 1)
4370 TCU_THROW(InternalError, "Mix of shaders from different pipelines is detected");
4371 }
4372
supportedCheckShader(Context & context,const VkShaderStageFlags shaderStages)4373 void vkt::subgroups::supportedCheckShader(Context &context, const VkShaderStageFlags shaderStages)
4374 {
4375 checkShaderStageSetValidity(shaderStages);
4376
4377 if ((context.getSubgroupProperties().supportedStages & shaderStages) == 0)
4378 {
4379 if (isAllComputeStages(shaderStages))
4380 TCU_FAIL("Compute shader is required to support subgroup operations");
4381 else
4382 TCU_THROW(NotSupportedError, "Subgroup support is not available for test shader stage(s)");
4383 }
4384
4385 #ifndef CTS_USES_VULKANSC
4386 if ((VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) & shaderStages &&
4387 context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
4388 !context.getPortabilitySubsetFeatures().tessellationIsolines)
4389 {
4390 TCU_THROW(NotSupportedError,
4391 "VK_KHR_portability_subset: Tessellation iso lines are not supported by this implementation");
4392 }
4393 #endif // CTS_USES_VULKANSC
4394 }
4395
4396 namespace vkt
4397 {
4398 namespace subgroups
4399 {
4400 typedef std::vector<de::SharedPtr<BufferOrImage>> vectorBufferOrImage;
4401
4402 enum ShaderGroups
4403 {
4404 FIRST_GROUP = 0,
4405 RAYGEN_GROUP = FIRST_GROUP,
4406 MISS_GROUP,
4407 HIT_GROUP,
4408 CALL_GROUP,
4409 GROUP_COUNT
4410 };
4411
getAllRayTracingFormats()4412 const std::vector<vk::VkFormat> getAllRayTracingFormats()
4413 {
4414 std::vector<VkFormat> formats;
4415
4416 formats.push_back(VK_FORMAT_R8G8B8_SINT);
4417 formats.push_back(VK_FORMAT_R8_UINT);
4418 formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
4419 formats.push_back(VK_FORMAT_R16G16B16_SINT);
4420 formats.push_back(VK_FORMAT_R16_UINT);
4421 formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
4422 formats.push_back(VK_FORMAT_R32G32B32_SINT);
4423 formats.push_back(VK_FORMAT_R32_UINT);
4424 formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
4425 formats.push_back(VK_FORMAT_R64G64B64_SINT);
4426 formats.push_back(VK_FORMAT_R64_UINT);
4427 formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
4428 formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
4429 formats.push_back(VK_FORMAT_R32_SFLOAT);
4430 formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
4431 formats.push_back(VK_FORMAT_R64_SFLOAT);
4432 formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
4433 formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
4434 formats.push_back(VK_FORMAT_R8_USCALED);
4435 formats.push_back(VK_FORMAT_R8G8_USCALED);
4436 formats.push_back(VK_FORMAT_R8G8B8_USCALED);
4437 formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
4438
4439 return formats;
4440 }
4441
addRayTracingNoSubgroupShader(SourceCollections & programCollection)4442 void addRayTracingNoSubgroupShader(SourceCollections &programCollection)
4443 {
4444 const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
4445
4446 const std::string rgenShaderNoSubgroups =
4447 "#version 460 core\n"
4448 "#extension GL_EXT_ray_tracing: require\n"
4449 "layout(location = 0) rayPayloadEXT uvec4 payload;\n"
4450 "layout(location = 0) callableDataEXT uvec4 callData;"
4451 "layout(set = 1, binding = 0) uniform accelerationStructureEXT topLevelAS;\n"
4452 "\n"
4453 "void main()\n"
4454 "{\n"
4455 " uint rayFlags = 0;\n"
4456 " uint cullMask = 0xFF;\n"
4457 " float tmin = 0.0;\n"
4458 " float tmax = 9.0;\n"
4459 " vec3 origin = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), "
4460 "(float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
4461 " vec3 directHit = vec3(0.0, 0.0, -1.0);\n"
4462 " vec3 directMiss = vec3(0.0, 0.0, +1.0);\n"
4463 "\n"
4464 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directHit, tmax, 0);\n"
4465 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directMiss, tmax, 0);\n"
4466 " executeCallableEXT(0, 0);"
4467 "}\n";
4468 const std::string hitShaderNoSubgroups = "#version 460 core\n"
4469 "#extension GL_EXT_ray_tracing: require\n"
4470 "hitAttributeEXT vec3 attribs;\n"
4471 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
4472 "\n"
4473 "void main()\n"
4474 "{\n"
4475 "}\n";
4476 const std::string missShaderNoSubgroups = "#version 460 core\n"
4477 "#extension GL_EXT_ray_tracing: require\n"
4478 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
4479 "\n"
4480 "void main()\n"
4481 "{\n"
4482 "}\n";
4483 const std::string sectShaderNoSubgroups = "#version 460 core\n"
4484 "#extension GL_EXT_ray_tracing: require\n"
4485 "hitAttributeEXT vec3 hitAttribute;\n"
4486 "\n"
4487 "void main()\n"
4488 "{\n"
4489 " reportIntersectionEXT(0.75f, 0x7Eu);\n"
4490 "}\n";
4491 const std::string callShaderNoSubgroups = "#version 460 core\n"
4492 "#extension GL_EXT_ray_tracing: require\n"
4493 "layout(location = 0) callableDataInEXT float callData;\n"
4494 "\n"
4495 "void main()\n"
4496 "{\n"
4497 "}\n";
4498
4499 programCollection.glslSources.add("rgen_noSubgroup") << glu::RaygenSource(rgenShaderNoSubgroups) << buildOptions;
4500 programCollection.glslSources.add("ahit_noSubgroup") << glu::AnyHitSource(hitShaderNoSubgroups) << buildOptions;
4501 programCollection.glslSources.add("chit_noSubgroup") << glu::ClosestHitSource(hitShaderNoSubgroups) << buildOptions;
4502 programCollection.glslSources.add("miss_noSubgroup") << glu::MissSource(missShaderNoSubgroups) << buildOptions;
4503 programCollection.glslSources.add("sect_noSubgroup")
4504 << glu::IntersectionSource(sectShaderNoSubgroups) << buildOptions;
4505 programCollection.glslSources.add("call_noSubgroup") << glu::CallableSource(callShaderNoSubgroups) << buildOptions;
4506 }
4507
4508 #ifndef CTS_USES_VULKANSC
4509
enumerateRayTracingShaderStages(const VkShaderStageFlags shaderStage)4510 static vector<VkShaderStageFlagBits> enumerateRayTracingShaderStages(const VkShaderStageFlags shaderStage)
4511 {
4512 vector<VkShaderStageFlagBits> result;
4513 const VkShaderStageFlagBits shaderStageFlags[] = {
4514 VK_SHADER_STAGE_RAYGEN_BIT_KHR, VK_SHADER_STAGE_ANY_HIT_BIT_KHR, VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4515 VK_SHADER_STAGE_MISS_BIT_KHR, VK_SHADER_STAGE_INTERSECTION_BIT_KHR, VK_SHADER_STAGE_CALLABLE_BIT_KHR,
4516 };
4517
4518 for (auto shaderStageFlag : shaderStageFlags)
4519 {
4520 if (0 != (shaderStage & shaderStageFlag))
4521 result.push_back(shaderStageFlag);
4522 }
4523
4524 return result;
4525 }
4526
getRayTracingResultBinding(const VkShaderStageFlagBits shaderStage)4527 static uint32_t getRayTracingResultBinding(const VkShaderStageFlagBits shaderStage)
4528 {
4529 const VkShaderStageFlags shaderStageFlags[] = {
4530 VK_SHADER_STAGE_RAYGEN_BIT_KHR, VK_SHADER_STAGE_ANY_HIT_BIT_KHR, VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4531 VK_SHADER_STAGE_MISS_BIT_KHR, VK_SHADER_STAGE_INTERSECTION_BIT_KHR, VK_SHADER_STAGE_CALLABLE_BIT_KHR,
4532 };
4533
4534 for (uint32_t shaderStageNdx = 0; shaderStageNdx < DE_LENGTH_OF_ARRAY(shaderStageFlags); ++shaderStageNdx)
4535 {
4536 if (0 != (shaderStage & shaderStageFlags[shaderStageNdx]))
4537 {
4538 DE_ASSERT(0 == (shaderStage & (~shaderStageFlags[shaderStageNdx])));
4539
4540 return shaderStageNdx;
4541 }
4542 }
4543
4544 TCU_THROW(InternalError, "Non-raytracing stage specified or no stage at all");
4545 }
4546
makeRayTracingInputBuffers(Context & context,VkFormat format,const SSBOData * extraDatas,uint32_t extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector)4547 static vectorBufferOrImage makeRayTracingInputBuffers(Context &context, VkFormat format, const SSBOData *extraDatas,
4548 uint32_t extraDatasCount,
4549 const vector<VkShaderStageFlagBits> &stagesVector)
4550 {
4551 const size_t stagesCount = stagesVector.size();
4552 const VkDeviceSize shaderSize = getMaxWidth();
4553 const VkDeviceSize inputBufferSize = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
4554 vectorBufferOrImage inputBuffers(stagesCount + extraDatasCount);
4555
4556 // The implicit result SSBO we use to store our outputs from the shader
4557 for (size_t stageNdx = 0u; stageNdx < stagesCount; ++stageNdx)
4558 inputBuffers[stageNdx] =
4559 de::SharedPtr<BufferOrImage>(new Buffer(context, inputBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
4560
4561 for (size_t stageNdx = stagesCount; stageNdx < stagesCount + extraDatasCount; ++stageNdx)
4562 {
4563 const size_t datasNdx = stageNdx - stagesCount;
4564
4565 if (extraDatas[datasNdx].isImage())
4566 {
4567 inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Image(
4568 context, static_cast<uint32_t>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
4569 }
4570 else
4571 {
4572 const auto usage = (extraDatas[datasNdx].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT :
4573 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4574 const auto size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) *
4575 extraDatas[datasNdx].numElements;
4576 inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
4577 }
4578
4579 initializeMemory(context, inputBuffers[stageNdx]->getAllocation(), extraDatas[datasNdx]);
4580 }
4581
4582 return inputBuffers;
4583 }
4584
makeRayTracingDescriptorSetLayout(Context & context,const SSBOData * extraDatas,uint32_t extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector,const vectorBufferOrImage & inputBuffers)4585 static Move<VkDescriptorSetLayout> makeRayTracingDescriptorSetLayout(Context &context, const SSBOData *extraDatas,
4586 uint32_t extraDatasCount,
4587 const vector<VkShaderStageFlagBits> &stagesVector,
4588 const vectorBufferOrImage &inputBuffers)
4589 {
4590 const DeviceInterface &vkd = context.getDeviceInterface();
4591 const VkDevice device = context.getDevice();
4592 const size_t stagesCount = stagesVector.size();
4593 DescriptorSetLayoutBuilder layoutBuilder;
4594
4595 // The implicit result SSBO we use to store our outputs from the shader
4596 for (size_t stageNdx = 0u; stageNdx < stagesVector.size(); ++stageNdx)
4597 {
4598 const uint32_t stageBinding = getRayTracingResultBinding(stagesVector[stageNdx]);
4599
4600 layoutBuilder.addIndexedBinding(inputBuffers[stageNdx]->getType(), 1, stagesVector[stageNdx], stageBinding,
4601 DE_NULL);
4602 }
4603
4604 for (size_t stageNdx = stagesCount; stageNdx < stagesCount + extraDatasCount; ++stageNdx)
4605 {
4606 const size_t datasNdx = stageNdx - stagesCount;
4607
4608 layoutBuilder.addIndexedBinding(inputBuffers[stageNdx]->getType(), 1, extraDatas[datasNdx].stages,
4609 extraDatas[datasNdx].binding, DE_NULL);
4610 }
4611
4612 return layoutBuilder.build(vkd, device);
4613 }
4614
makeRayTracingDescriptorSetLayoutAS(Context & context)4615 static Move<VkDescriptorSetLayout> makeRayTracingDescriptorSetLayoutAS(Context &context)
4616 {
4617 const DeviceInterface &vkd = context.getDeviceInterface();
4618 const VkDevice device = context.getDevice();
4619 DescriptorSetLayoutBuilder layoutBuilder;
4620
4621 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, VK_SHADER_STAGE_RAYGEN_BIT_KHR);
4622
4623 return layoutBuilder.build(vkd, device);
4624 }
4625
makeRayTracingDescriptorPool(Context & context,const vectorBufferOrImage & inputBuffers)4626 static Move<VkDescriptorPool> makeRayTracingDescriptorPool(Context &context, const vectorBufferOrImage &inputBuffers)
4627 {
4628 const DeviceInterface &vkd = context.getDeviceInterface();
4629 const VkDevice device = context.getDevice();
4630 const uint32_t maxDescriptorSets = 2u;
4631 DescriptorPoolBuilder poolBuilder;
4632 Move<VkDescriptorPool> result;
4633
4634 if (inputBuffers.size() > 0)
4635 {
4636 for (size_t ndx = 0u; ndx < inputBuffers.size(); ndx++)
4637 poolBuilder.addType(inputBuffers[ndx]->getType());
4638 }
4639
4640 poolBuilder.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR);
4641
4642 result = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, maxDescriptorSets);
4643
4644 return result;
4645 }
4646
makeRayTracingDescriptorSet(Context & context,VkDescriptorPool descriptorPool,VkDescriptorSetLayout descriptorSetLayout,const SSBOData * extraDatas,uint32_t extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector,const vectorBufferOrImage & inputBuffers)4647 static Move<VkDescriptorSet> makeRayTracingDescriptorSet(Context &context, VkDescriptorPool descriptorPool,
4648 VkDescriptorSetLayout descriptorSetLayout,
4649 const SSBOData *extraDatas, uint32_t extraDatasCount,
4650 const vector<VkShaderStageFlagBits> &stagesVector,
4651 const vectorBufferOrImage &inputBuffers)
4652 {
4653 const DeviceInterface &vkd = context.getDeviceInterface();
4654 const VkDevice device = context.getDevice();
4655 const size_t stagesCount = stagesVector.size();
4656 Move<VkDescriptorSet> descriptorSet;
4657
4658 if (inputBuffers.size() > 0)
4659 {
4660 DescriptorSetUpdateBuilder updateBuilder;
4661
4662 // Create descriptor set
4663 descriptorSet = makeDescriptorSet(vkd, device, descriptorPool, descriptorSetLayout);
4664
4665 for (size_t ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
4666 {
4667 const uint32_t binding = (ndx < stagesCount) ? getRayTracingResultBinding(stagesVector[ndx]) :
4668 extraDatas[ndx - stagesCount].binding;
4669
4670 if (inputBuffers[ndx]->isImage())
4671 {
4672 const VkDescriptorImageInfo info =
4673 makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
4674 inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
4675
4676 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding),
4677 inputBuffers[ndx]->getType(), &info);
4678 }
4679 else
4680 {
4681 const VkDescriptorBufferInfo info = makeDescriptorBufferInfo(
4682 inputBuffers[ndx]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
4683
4684 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding),
4685 inputBuffers[ndx]->getType(), &info);
4686 }
4687 }
4688
4689 updateBuilder.update(vkd, device);
4690 }
4691
4692 return descriptorSet;
4693 }
4694
makeRayTracingDescriptorSetAS(Context & context,VkDescriptorPool descriptorPool,VkDescriptorSetLayout descriptorSetLayout,de::MovePtr<TopLevelAccelerationStructure> & topLevelAccelerationStructure)4695 static Move<VkDescriptorSet> makeRayTracingDescriptorSetAS(
4696 Context &context, VkDescriptorPool descriptorPool, VkDescriptorSetLayout descriptorSetLayout,
4697 de::MovePtr<TopLevelAccelerationStructure> &topLevelAccelerationStructure)
4698 {
4699 const DeviceInterface &vkd = context.getDeviceInterface();
4700 const VkDevice device = context.getDevice();
4701 const TopLevelAccelerationStructure *topLevelAccelerationStructurePtr = topLevelAccelerationStructure.get();
4702 const VkWriteDescriptorSetAccelerationStructureKHR accelerationStructureWriteDescriptorSet = {
4703 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, // VkStructureType sType;
4704 DE_NULL, // const void* pNext;
4705 1u, // uint32_t accelerationStructureCount;
4706 topLevelAccelerationStructurePtr->getPtr(), // const VkAccelerationStructureKHR* pAccelerationStructures;
4707 };
4708 Move<VkDescriptorSet> descriptorSet = makeDescriptorSet(vkd, device, descriptorPool, descriptorSetLayout);
4709
4710 DescriptorSetUpdateBuilder()
4711 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
4712 VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
4713 .update(vkd, device);
4714
4715 return descriptorSet;
4716 }
4717
makeRayTracingPipelineLayout(Context & context,const VkDescriptorSetLayout descriptorSetLayout0,const VkDescriptorSetLayout descriptorSetLayout1)4718 static Move<VkPipelineLayout> makeRayTracingPipelineLayout(Context &context,
4719 const VkDescriptorSetLayout descriptorSetLayout0,
4720 const VkDescriptorSetLayout descriptorSetLayout1)
4721 {
4722 const DeviceInterface &vkd = context.getDeviceInterface();
4723 const VkDevice device = context.getDevice();
4724 const std::vector<VkDescriptorSetLayout> descriptorSetLayouts{descriptorSetLayout0, descriptorSetLayout1};
4725 const uint32_t descriptorSetLayoutsSize = static_cast<uint32_t>(descriptorSetLayouts.size());
4726
4727 return makePipelineLayout(vkd, device, descriptorSetLayoutsSize, descriptorSetLayouts.data());
4728 }
4729
createTopAccelerationStructure(Context & context,de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure)4730 static de::MovePtr<TopLevelAccelerationStructure> createTopAccelerationStructure(
4731 Context &context, de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure)
4732 {
4733 const DeviceInterface &vkd = context.getDeviceInterface();
4734 const VkDevice device = context.getDevice();
4735 Allocator &allocator = context.getDefaultAllocator();
4736 de::MovePtr<TopLevelAccelerationStructure> result = makeTopLevelAccelerationStructure();
4737
4738 result->setInstanceCount(1);
4739 result->addInstance(bottomLevelAccelerationStructure);
4740 result->create(vkd, device, allocator);
4741
4742 return result;
4743 }
4744
createBottomAccelerationStructure(Context & context)4745 static de::SharedPtr<BottomLevelAccelerationStructure> createBottomAccelerationStructure(Context &context)
4746 {
4747 const DeviceInterface &vkd = context.getDeviceInterface();
4748 const VkDevice device = context.getDevice();
4749 Allocator &allocator = context.getDefaultAllocator();
4750 de::MovePtr<BottomLevelAccelerationStructure> result = makeBottomLevelAccelerationStructure();
4751 const std::vector<tcu::Vec3> geometryData{tcu::Vec3(-1.0f, -1.0f, -2.0f), tcu::Vec3(+1.0f, +1.0f, -1.0f)};
4752
4753 result->setGeometryCount(1u);
4754 result->addGeometry(geometryData, false);
4755 result->create(vkd, device, allocator, 0u);
4756
4757 return de::SharedPtr<BottomLevelAccelerationStructure>(result.release());
4758 }
4759
makeRayTracingPipeline(Context & context,const VkShaderStageFlags shaderStageTested,const VkPipelineLayout pipelineLayout,const uint32_t shaderStageCreateFlags[6],const uint32_t requiredSubgroupSize[6],Move<VkPipeline> & pipelineOut)4760 static de::MovePtr<RayTracingPipeline> makeRayTracingPipeline(
4761 Context &context, const VkShaderStageFlags shaderStageTested, const VkPipelineLayout pipelineLayout,
4762 const uint32_t shaderStageCreateFlags[6], const uint32_t requiredSubgroupSize[6], Move<VkPipeline> &pipelineOut)
4763 {
4764 const DeviceInterface &vkd = context.getDeviceInterface();
4765 const VkDevice device = context.getDevice();
4766 BinaryCollection &collection = context.getBinaryCollection();
4767 const char *shaderRgenName =
4768 (0 != (shaderStageTested & VK_SHADER_STAGE_RAYGEN_BIT_KHR)) ? "rgen" : "rgen_noSubgroup";
4769 const char *shaderAhitName =
4770 (0 != (shaderStageTested & VK_SHADER_STAGE_ANY_HIT_BIT_KHR)) ? "ahit" : "ahit_noSubgroup";
4771 const char *shaderChitName =
4772 (0 != (shaderStageTested & VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR)) ? "chit" : "chit_noSubgroup";
4773 const char *shaderMissName = (0 != (shaderStageTested & VK_SHADER_STAGE_MISS_BIT_KHR)) ? "miss" : "miss_noSubgroup";
4774 const char *shaderSectName =
4775 (0 != (shaderStageTested & VK_SHADER_STAGE_INTERSECTION_BIT_KHR)) ? "sect" : "sect_noSubgroup";
4776 const char *shaderCallName =
4777 (0 != (shaderStageTested & VK_SHADER_STAGE_CALLABLE_BIT_KHR)) ? "call" : "call_noSubgroup";
4778 const VkShaderModuleCreateFlags noShaderModuleCreateFlags = static_cast<VkShaderModuleCreateFlags>(0);
4779 Move<VkShaderModule> rgenShaderModule =
4780 createShaderModule(vkd, device, collection.get(shaderRgenName), noShaderModuleCreateFlags);
4781 Move<VkShaderModule> ahitShaderModule =
4782 createShaderModule(vkd, device, collection.get(shaderAhitName), noShaderModuleCreateFlags);
4783 Move<VkShaderModule> chitShaderModule =
4784 createShaderModule(vkd, device, collection.get(shaderChitName), noShaderModuleCreateFlags);
4785 Move<VkShaderModule> missShaderModule =
4786 createShaderModule(vkd, device, collection.get(shaderMissName), noShaderModuleCreateFlags);
4787 Move<VkShaderModule> sectShaderModule =
4788 createShaderModule(vkd, device, collection.get(shaderSectName), noShaderModuleCreateFlags);
4789 Move<VkShaderModule> callShaderModule =
4790 createShaderModule(vkd, device, collection.get(shaderCallName), noShaderModuleCreateFlags);
4791 const VkPipelineShaderStageCreateFlags noPipelineShaderStageCreateFlags =
4792 static_cast<VkPipelineShaderStageCreateFlags>(0);
4793 const VkPipelineShaderStageCreateFlags rgenPipelineShaderStageCreateFlags =
4794 (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[0];
4795 const VkPipelineShaderStageCreateFlags ahitPipelineShaderStageCreateFlags =
4796 (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[1];
4797 const VkPipelineShaderStageCreateFlags chitPipelineShaderStageCreateFlags =
4798 (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[2];
4799 const VkPipelineShaderStageCreateFlags missPipelineShaderStageCreateFlags =
4800 (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[3];
4801 const VkPipelineShaderStageCreateFlags sectPipelineShaderStageCreateFlags =
4802 (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[4];
4803 const VkPipelineShaderStageCreateFlags callPipelineShaderStageCreateFlags =
4804 (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[5];
4805 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[6] = {
4806 {
4807 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4808 DE_NULL,
4809 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
4810 },
4811 {
4812 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4813 DE_NULL,
4814 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
4815 },
4816 {
4817 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4818 DE_NULL,
4819 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
4820 },
4821 {
4822 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4823 DE_NULL,
4824 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
4825 },
4826 {
4827 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4828 DE_NULL,
4829 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
4830 },
4831 {
4832 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4833 DE_NULL,
4834 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[5] : 0u,
4835 },
4836 };
4837 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rgenRequiredSubgroupSizeCreateInfo =
4838 (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[0];
4839 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *ahitRequiredSubgroupSizeCreateInfo =
4840 (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[1];
4841 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *chitRequiredSubgroupSizeCreateInfo =
4842 (requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[2];
4843 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *missRequiredSubgroupSizeCreateInfo =
4844 (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[3];
4845 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *sectRequiredSubgroupSizeCreateInfo =
4846 (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[4];
4847 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *callRequiredSubgroupSizeCreateInfo =
4848 (requiredSubgroupSizeCreateInfo[5].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[5];
4849 de::MovePtr<RayTracingPipeline> rayTracingPipeline = de::newMovePtr<RayTracingPipeline>();
4850
4851 rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR, rgenShaderModule, RAYGEN_GROUP, DE_NULL,
4852 rgenPipelineShaderStageCreateFlags, rgenRequiredSubgroupSizeCreateInfo);
4853 rayTracingPipeline->addShader(VK_SHADER_STAGE_ANY_HIT_BIT_KHR, ahitShaderModule, HIT_GROUP, DE_NULL,
4854 ahitPipelineShaderStageCreateFlags, ahitRequiredSubgroupSizeCreateInfo);
4855 rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, chitShaderModule, HIT_GROUP, DE_NULL,
4856 chitPipelineShaderStageCreateFlags, chitRequiredSubgroupSizeCreateInfo);
4857 rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR, missShaderModule, MISS_GROUP, DE_NULL,
4858 missPipelineShaderStageCreateFlags, missRequiredSubgroupSizeCreateInfo);
4859 rayTracingPipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR, sectShaderModule, HIT_GROUP, DE_NULL,
4860 sectPipelineShaderStageCreateFlags, sectRequiredSubgroupSizeCreateInfo);
4861 rayTracingPipeline->addShader(VK_SHADER_STAGE_CALLABLE_BIT_KHR, callShaderModule, CALL_GROUP, DE_NULL,
4862 callPipelineShaderStageCreateFlags, callRequiredSubgroupSizeCreateInfo);
4863
4864 // Must execute createPipeline here, due to pNext pointers in calls to addShader are local
4865 pipelineOut = rayTracingPipeline->createPipeline(vkd, device, pipelineLayout);
4866
4867 return rayTracingPipeline;
4868 }
4869
getPossibleRayTracingSubgroupStages(Context & context,const VkShaderStageFlags testedStages)4870 VkShaderStageFlags getPossibleRayTracingSubgroupStages(Context &context, const VkShaderStageFlags testedStages)
4871 {
4872 const VkPhysicalDeviceSubgroupProperties &subgroupProperties = context.getSubgroupProperties();
4873 const VkShaderStageFlags stages = testedStages & subgroupProperties.supportedStages;
4874
4875 DE_ASSERT(isAllRayTracingStages(testedStages));
4876
4877 return stages;
4878 }
4879
allRayTracingStages(Context & context,VkFormat format,const SSBOData * extraDatas,uint32_t extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStage)4880 tcu::TestStatus allRayTracingStages(Context &context, VkFormat format, const SSBOData *extraDatas,
4881 uint32_t extraDataCount, const void *internalData,
4882 const VerificationFunctor &checkResult, const VkShaderStageFlags shaderStage)
4883 {
4884 return vkt::subgroups::allRayTracingStagesRequiredSubgroupSize(
4885 context, format, extraDatas, extraDataCount, internalData, checkResult, shaderStage, DE_NULL, DE_NULL);
4886 }
4887
allRayTracingStagesRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraDatas,uint32_t extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStageTested,const uint32_t shaderStageCreateFlags[6],const uint32_t requiredSubgroupSize[6])4888 tcu::TestStatus allRayTracingStagesRequiredSubgroupSize(Context &context, VkFormat format, const SSBOData *extraDatas,
4889 uint32_t extraDatasCount, const void *internalData,
4890 const VerificationFunctor &checkResult,
4891 const VkShaderStageFlags shaderStageTested,
4892 const uint32_t shaderStageCreateFlags[6],
4893 const uint32_t requiredSubgroupSize[6])
4894 {
4895 const DeviceInterface &vkd = context.getDeviceInterface();
4896 const VkDevice device = context.getDevice();
4897 const VkQueue queue = context.getUniversalQueue();
4898 const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
4899 Allocator &allocator = context.getDefaultAllocator();
4900 const uint32_t subgroupSize = getSubgroupSize(context);
4901 const uint32_t maxWidth = getMaxWidth();
4902 const vector<VkShaderStageFlagBits> stagesVector = enumerateRayTracingShaderStages(shaderStageTested);
4903 const uint32_t stagesCount = static_cast<uint32_t>(stagesVector.size());
4904 de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure =
4905 createBottomAccelerationStructure(context);
4906 de::MovePtr<TopLevelAccelerationStructure> topLevelAccelerationStructure =
4907 createTopAccelerationStructure(context, bottomLevelAccelerationStructure);
4908 vectorBufferOrImage inputBuffers =
4909 makeRayTracingInputBuffers(context, format, extraDatas, extraDatasCount, stagesVector);
4910 const Move<VkDescriptorSetLayout> descriptorSetLayout =
4911 makeRayTracingDescriptorSetLayout(context, extraDatas, extraDatasCount, stagesVector, inputBuffers);
4912 const Move<VkDescriptorSetLayout> descriptorSetLayoutAS = makeRayTracingDescriptorSetLayoutAS(context);
4913 const Move<VkPipelineLayout> pipelineLayout =
4914 makeRayTracingPipelineLayout(context, *descriptorSetLayout, *descriptorSetLayoutAS);
4915 Move<VkPipeline> pipeline = Move<VkPipeline>();
4916 const de::MovePtr<RayTracingPipeline> rayTracingPipeline = makeRayTracingPipeline(
4917 context, shaderStageTested, *pipelineLayout, shaderStageCreateFlags, requiredSubgroupSize, pipeline);
4918 const uint32_t shaderGroupHandleSize = context.getRayTracingPipelineProperties().shaderGroupHandleSize;
4919 const uint32_t shaderGroupBaseAlignment = context.getRayTracingPipelineProperties().shaderGroupBaseAlignment;
4920 de::MovePtr<BufferWithMemory> rgenShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
4921 vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, RAYGEN_GROUP, 1u);
4922 de::MovePtr<BufferWithMemory> missShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
4923 vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, MISS_GROUP, 1u);
4924 de::MovePtr<BufferWithMemory> hitsShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
4925 vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, HIT_GROUP, 1u);
4926 de::MovePtr<BufferWithMemory> callShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
4927 vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, CALL_GROUP, 1u);
4928 const VkStridedDeviceAddressRegionKHR rgenShaderBindingTableRegion =
4929 makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, rgenShaderBindingTable->get(), 0),
4930 shaderGroupHandleSize, shaderGroupHandleSize);
4931 const VkStridedDeviceAddressRegionKHR missShaderBindingTableRegion =
4932 makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missShaderBindingTable->get(), 0),
4933 shaderGroupHandleSize, shaderGroupHandleSize);
4934 const VkStridedDeviceAddressRegionKHR hitsShaderBindingTableRegion =
4935 makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitsShaderBindingTable->get(), 0),
4936 shaderGroupHandleSize, shaderGroupHandleSize);
4937 const VkStridedDeviceAddressRegionKHR callShaderBindingTableRegion =
4938 makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, callShaderBindingTable->get(), 0),
4939 shaderGroupHandleSize, shaderGroupHandleSize);
4940 const Move<VkDescriptorPool> descriptorPool = makeRayTracingDescriptorPool(context, inputBuffers);
4941 const Move<VkDescriptorSet> descriptorSet = makeRayTracingDescriptorSet(
4942 context, *descriptorPool, *descriptorSetLayout, extraDatas, extraDatasCount, stagesVector, inputBuffers);
4943 const Move<VkDescriptorSet> descriptorSetAS =
4944 makeRayTracingDescriptorSetAS(context, *descriptorPool, *descriptorSetLayoutAS, topLevelAccelerationStructure);
4945 const Move<VkCommandPool> cmdPool = makeCommandPool(vkd, device, queueFamilyIndex);
4946 const Move<VkCommandBuffer> cmdBuffer = makeCommandBuffer(context, *cmdPool);
4947 uint32_t passIterations = 0u;
4948 uint32_t failIterations = 0u;
4949
4950 DE_ASSERT(shaderStageTested != 0);
4951
4952 for (uint32_t width = 1u; width < maxWidth; width = getNextWidth(width))
4953 {
4954
4955 for (uint32_t ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
4956 {
4957 // re-init the data
4958 const Allocation &alloc = inputBuffers[ndx]->getAllocation();
4959
4960 initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
4961 }
4962
4963 beginCommandBuffer(vkd, *cmdBuffer);
4964 {
4965 vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
4966
4967 bottomLevelAccelerationStructure->build(vkd, device, *cmdBuffer);
4968 topLevelAccelerationStructure->build(vkd, device, *cmdBuffer);
4969
4970 vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 1u, 1u,
4971 &descriptorSetAS.get(), 0u, DE_NULL);
4972
4973 if (stagesCount + extraDatasCount > 0)
4974 vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0u, 1u,
4975 &descriptorSet.get(), 0u, DE_NULL);
4976
4977 cmdTraceRays(vkd, *cmdBuffer, &rgenShaderBindingTableRegion, &missShaderBindingTableRegion,
4978 &hitsShaderBindingTableRegion, &callShaderBindingTableRegion, width, 1, 1);
4979
4980 const VkMemoryBarrier postTraceMemoryBarrier =
4981 makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
4982 cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR,
4983 VK_PIPELINE_STAGE_HOST_BIT, &postTraceMemoryBarrier);
4984 }
4985 endCommandBuffer(vkd, *cmdBuffer);
4986
4987 submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
4988
4989 for (uint32_t ndx = 0u; ndx < stagesCount; ++ndx)
4990 {
4991 std::vector<const void *> datas;
4992
4993 if (!inputBuffers[ndx]->isImage())
4994 {
4995 const Allocation &resultAlloc = inputBuffers[ndx]->getAllocation();
4996
4997 invalidateAlloc(vkd, device, resultAlloc);
4998
4999 // we always have our result data first
5000 datas.push_back(resultAlloc.getHostPtr());
5001 }
5002
5003 for (uint32_t index = stagesCount; index < stagesCount + extraDatasCount; ++index)
5004 {
5005 const uint32_t datasNdx = index - stagesCount;
5006
5007 if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
5008 {
5009 const Allocation &resultAlloc = inputBuffers[index]->getAllocation();
5010
5011 invalidateAlloc(vkd, device, resultAlloc);
5012
5013 // we always have our result data first
5014 datas.push_back(resultAlloc.getHostPtr());
5015 }
5016 }
5017
5018 if (!checkResult(internalData, datas, width, subgroupSize, false))
5019 failIterations++;
5020 else
5021 passIterations++;
5022 }
5023
5024 context.resetCommandPoolForVKSC(device, *cmdPool);
5025 }
5026
5027 if (failIterations > 0 || passIterations == 0)
5028 return tcu::TestStatus::fail("Failed " + de::toString(failIterations) + " out of " +
5029 de::toString(failIterations + passIterations) + " iterations.");
5030 else
5031 return tcu::TestStatus::pass("OK");
5032 }
5033 #endif // CTS_USES_VULKANSC
5034
5035 } // namespace subgroups
5036 } // namespace vkt
5037