1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2023 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Additional tests for VK_KHR_shader_quad_control
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktSubgroupsQuadControlTests.hpp"
25 #include "vkBufferWithMemory.hpp"
26 #include "vkImageWithMemory.hpp"
27 #include "vktTestCaseUtil.hpp"
28 #include "vkBuilderUtil.hpp"
29 #include "vkImageUtil.hpp"
30 #include "vkMemUtil.hpp"
31 #include "vkPrograms.hpp"
32 #include "vkQueryUtil.hpp"
33 #include "vkDeviceUtil.hpp"
34 #include "vkBarrierUtil.hpp"
35 #include "vkRef.hpp"
36 #include "vkRefUtil.hpp"
37 #include "vkTypeUtil.hpp"
38 #include "vkObjUtil.hpp"
39 #include "vkCmdUtil.hpp"
40 #include "tcuTestLog.hpp"
41 #include "deMath.h"
42 #include "tcuVectorUtil.hpp"
43 #include "deUniquePtr.hpp"
44 #include <vector>
45 
46 namespace vkt
47 {
48 namespace subgroups
49 {
50 
51 using namespace vk;
52 
53 enum class TestMode
54 {
55     QUAD_DERIVATIVES = 0,
56     REQUIRE_FULL_QUADS,
57     DIVERGENT_CONDITION,
58 };
59 
60 class DrawWithQuadControlInstanceBase : public vkt::TestInstance
61 {
62 public:
63     DrawWithQuadControlInstanceBase(Context &context, TestMode mode);
64 
65     virtual ~DrawWithQuadControlInstanceBase(void) = default;
66 
67     virtual tcu::TestStatus iterate(void) override;
68 
69 protected:
70     virtual bool isResultCorrect(const tcu::ConstPixelBufferAccess &outputAccess) const = 0;
71 
72     VkImageCreateInfo getImageCreateInfo(VkExtent3D extent, uint32_t mipLevels, VkImageUsageFlags usage) const;
73 
74 protected:
75     const TestMode m_mode;
76     const VkClearColorValue m_mipColors[5];
77     tcu::UVec2 m_renderSize;
78     VkPrimitiveTopology m_topology;
79     std::vector<float> m_vertices;
80 };
81 
DrawWithQuadControlInstanceBase(Context & context,TestMode mode)82 DrawWithQuadControlInstanceBase::DrawWithQuadControlInstanceBase(Context&    context,
83                                                                  TestMode    mode)
84     : vkt::TestInstance        (context)
85     , m_mode                (mode)
86     , m_mipColors
87     {
88         { { 0.9f, 0.4f, 0.2f, 1.0f } }, // orange
89         { { 0.2f, 0.8f, 0.9f, 1.0f } }, // blue
90         { { 0.2f, 0.9f, 0.2f, 1.0f } }, // green
91         { { 0.9f, 0.9f, 0.2f, 1.0f } }, // yellow
92         { { 0.6f, 0.1f, 0.9f, 1.0f } }, // violet
93     }
94     , m_renderSize            (32)
95     , m_topology            (VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST)
96 {
97 }
98 
getImageCreateInfo(VkExtent3D extent,uint32_t mipLevels,VkImageUsageFlags usage) const99 VkImageCreateInfo DrawWithQuadControlInstanceBase::getImageCreateInfo(VkExtent3D extent, uint32_t mipLevels,
100                                                                       VkImageUsageFlags usage) const
101 {
102     return {
103         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
104         DE_NULL,                             // const void* pNext;
105         0u,                                  // VkImageCreateFlags flags;
106         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
107         VK_FORMAT_R8G8B8A8_UNORM,            // VkFormat format;
108         extent,                              // VkExtent3D extent;
109         mipLevels,                           // uint32_t mipLevels;
110         1u,                                  // uint32_t arrayLayers;
111         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
112         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
113         usage,                               // VkImageUsageFlags usage;
114         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
115         0u,                                  // uint32_t queueFamilyIndexCount;
116         DE_NULL,                             // const uint32_t* pQueueFamilyIndices;
117         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
118     };
119 }
120 
iterate(void)121 tcu::TestStatus DrawWithQuadControlInstanceBase::iterate(void)
122 {
123     const DeviceInterface &vk       = m_context.getDeviceInterface();
124     const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
125     const VkDevice device           = m_context.getDevice();
126     Allocator &alloc                = m_context.getDefaultAllocator();
127 
128     const VkFormat colorFormat{VK_FORMAT_R8G8B8A8_UNORM};
129     const std::vector<VkViewport> viewports{makeViewport(m_renderSize)};
130     const std::vector<VkRect2D> scissors{makeRect2D(m_renderSize)};
131 
132     DE_ASSERT(!m_vertices.empty()); // derived class should specify vertex in costructor
133     const VkBufferCreateInfo vertexBufferInfo = makeBufferCreateInfo(
134         m_vertices.size() * sizeof(float), VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
135     BufferWithMemory vertexBuffer(vk, device, alloc, vertexBufferInfo, MemoryRequirement::HostVisible);
136     deMemcpy(vertexBuffer.getAllocation().getHostPtr(), m_vertices.data(), m_vertices.size() * sizeof(float));
137     flushAlloc(vk, device, vertexBuffer.getAllocation());
138 
139     // create output buffer that will be used to read rendered image
140     const VkDeviceSize outputBufferSize =
141         (VkDeviceSize)m_renderSize.x() * m_renderSize.y() * tcu::getPixelSize(mapVkFormat(colorFormat));
142     const VkBufferCreateInfo outputBufferInfo =
143         makeBufferCreateInfo(outputBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
144     BufferWithMemory outputBuffer(vk, device, alloc, outputBufferInfo, MemoryRequirement::HostVisible);
145 
146     // create color buffer
147     VkExtent3D colorImageExtent = makeExtent3D(m_renderSize.x(), m_renderSize.y(), 1u);
148     const VkImageCreateInfo colorImageCreateInfo =
149         getImageCreateInfo(colorImageExtent, 1u, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
150     const VkImageSubresourceRange colorSRR = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
151     ImageWithMemory colorImage(vk, device, alloc, colorImageCreateInfo, MemoryRequirement::Any);
152     Move<VkImageView> colorImageView =
153         makeImageView(vk, device, colorImage.get(), VK_IMAGE_VIEW_TYPE_2D, colorFormat, colorSRR);
154 
155     // create image that will be used as a texture
156     uint32_t mipLevels            = DE_LENGTH_OF_ARRAY(m_mipColors);
157     VkExtent3D textureImageExtent = makeExtent3D(16u, 16u, 1u);
158     const VkImageCreateInfo textureImageCreateInfo =
159         getImageCreateInfo(textureImageExtent, mipLevels, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
160     const VkImageSubresourceRange textureSRR =
161         makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, mipLevels, 0u, 1u);
162     ImageWithMemory textureImage(vk, device, alloc, textureImageCreateInfo, MemoryRequirement::Any);
163     Move<VkImageView> textureImageView =
164         makeImageView(vk, device, textureImage.get(), VK_IMAGE_VIEW_TYPE_2D, colorFormat, textureSRR);
165 
166     // create sampler
167     const VkSamplerCreateInfo samplerCreateInfo{
168         VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,   // VkStructureType sType;
169         DE_NULL,                                 // const void* pNext;
170         0u,                                      // VkSamplerCreateFlags flags;
171         VK_FILTER_NEAREST,                       // VkFilter magFilter;
172         VK_FILTER_NEAREST,                       // VkFilter minFilter;
173         VK_SAMPLER_MIPMAP_MODE_NEAREST,          // VkSamplerMipmapMode mipmapMode;
174         VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,   // VkSamplerAddressMode addressModeU;
175         VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,   // VkSamplerAddressMode addressModeV;
176         VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,   // VkSamplerAddressMode addressModeW;
177         0.0f,                                    // float mipLodBias;
178         VK_FALSE,                                // VkBool32 anisotropyEnable;
179         1.0f,                                    // float maxAnisotropy;
180         false,                                   // VkBool32 compareEnable;
181         VK_COMPARE_OP_ALWAYS,                    // VkCompareOp compareOp;
182         0.0f,                                    // float minLod;
183         5.0f,                                    // float maxLod;
184         VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, // VkBorderColor borderColor;
185         VK_FALSE,                                // VkBool32 unnormalizedCoordinates;
186     };
187     Move<VkSampler> sampler = createSampler(vk, device, &samplerCreateInfo);
188 
189     const VkVertexInputBindingDescription vertexInputBindingDescription{
190         0u,                          // uint32_t                binding
191         6u * sizeof(float),          // uint32_t                stride
192         VK_VERTEX_INPUT_RATE_VERTEX, // VkVertexInputRate    inputRate
193     };
194 
195     const VkVertexInputAttributeDescription vertexInputAttributeDescription[]{
196         {
197             // position: 4 floats
198             0u,                            // uint32_t                location
199             0u,                            // uint32_t                binding
200             VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat                format
201             0u                             // uint32_t                offset
202         },
203         {
204             // uv: 2 floats
205             1u,                      // uint32_t                location
206             0u,                      // uint32_t                binding
207             VK_FORMAT_R32G32_SFLOAT, // VkFormat                format
208             4u * sizeof(float)       // uint32_t                offset
209         }};
210 
211     const VkPipelineVertexInputStateCreateInfo vertexInputState{
212         VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType                                sType
213         DE_NULL,                                  // const void*                                    pNext
214         (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags        flags
215         1u,                             // uint32_t                                        vertexBindingDescriptionCount
216         &vertexInputBindingDescription, // const VkVertexInputBindingDescription*        pVertexBindingDescriptions
217         2u, // uint32_t                                        vertexAttributeDescriptionCount
218         vertexInputAttributeDescription // const VkVertexInputAttributeDescription*        pVertexAttributeDescriptions
219     };
220 
221     // create descriptor set
222     DescriptorPoolBuilder poolBuilder;
223     poolBuilder.addType(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
224     const Move<VkDescriptorPool> descriptorPool =
225         poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1);
226 
227     DescriptorSetLayoutBuilder layoutBuilder;
228     layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_SHADER_STAGE_FRAGMENT_BIT);
229     const Move<VkDescriptorSetLayout> descriptorSetLayout = layoutBuilder.build(vk, device);
230 
231     const Move<VkDescriptorSet> descriptorSet =
232         makeDescriptorSet(vk, device, descriptorPool.get(), descriptorSetLayout.get());
233 
234     // update descriptor set
235     DescriptorSetUpdateBuilder updater;
236     VkDescriptorImageInfo imageInfo =
237         makeDescriptorImageInfo(*sampler, *textureImageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
238     updater.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
239                         VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &imageInfo);
240     updater.update(vk, device);
241 
242     // create shader modules, renderpass, framebuffer and pipeline
243     Move<VkShaderModule> vertShaderModule =
244         createShaderModule(vk, device, m_context.getBinaryCollection().get("vert"), 0);
245     Move<VkShaderModule> fragShaderModule =
246         createShaderModule(vk, device, m_context.getBinaryCollection().get("frag"), 0);
247     Move<VkRenderPass> renderPass         = makeRenderPass(vk, device, colorFormat);
248     Move<VkPipelineLayout> pipelineLayout = makePipelineLayout(vk, device, *descriptorSetLayout);
249     Move<VkFramebuffer> framebuffer =
250         makeFramebuffer(vk, device, *renderPass, *colorImageView, m_renderSize.x(), m_renderSize.y());
251     Move<VkPipeline> graphicsPipeline = makeGraphicsPipeline(
252         vk, device, *pipelineLayout, *vertShaderModule, DE_NULL, DE_NULL, DE_NULL, *fragShaderModule, *renderPass,
253         viewports, scissors, m_topology, 0u, 0u, &vertexInputState);
254 
255     Move<VkCommandPool> cmdPool =
256         createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex);
257     vk::Move<vk::VkCommandBuffer> cmdBuffer =
258         allocateCommandBuffer(vk, device, *cmdPool, vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY);
259 
260     beginCommandBuffer(vk, *cmdBuffer);
261 
262     // transition colorbuffer layout to attachment optimal
263     VkImageMemoryBarrier imageBarrier =
264         makeImageMemoryBarrier(0u, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
265                                VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, colorImage.get(), colorSRR);
266     vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0u, 0u,
267                           0u, 0u, 0u, 1u, &imageBarrier);
268 
269     // transition texture layout to transfer destination optimal
270     imageBarrier = makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
271                                           VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, textureImage.get(), textureSRR);
272     vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, 0u, 0u,
273                           0u, 1u, &imageBarrier);
274 
275     // clear texture lod levels to diferent colors
276     VkImageSubresourceRange textureMipSRR = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
277     for (uint32_t mipLevel = 0; mipLevel < mipLevels; ++mipLevel)
278     {
279         textureMipSRR.baseMipLevel = mipLevel;
280         vk.cmdClearColorImage(*cmdBuffer, textureImage.get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
281                               &m_mipColors[mipLevel], 1, &textureMipSRR);
282     }
283 
284     // transition texture layout to shader read optimal
285     imageBarrier = makeImageMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT,
286                                           VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
287                                           VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, textureImage.get(), textureSRR);
288     vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0u, 0u, 0u,
289                           0u, 0u, 1u, &imageBarrier);
290 
291     const VkRect2D renderArea = makeRect2D(0, 0, m_renderSize.x(), m_renderSize.y());
292     beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, renderArea, tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f));
293 
294     const VkDeviceSize vBuffOffset = 0;
295     vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
296     vk.cmdBindVertexBuffers(*cmdBuffer, 0, 1, &vertexBuffer.get(), &vBuffOffset);
297     vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u, &*descriptorSet, 0u,
298                              nullptr);
299 
300     vk.cmdDraw(*cmdBuffer, (uint32_t)m_vertices.size() / 6u, 1, 0, 0);
301 
302     endRenderPass(vk, *cmdBuffer);
303 
304     // transition colorbuffer layout to transfer source optimal
305     imageBarrier = makeImageMemoryBarrier(VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
306                                           VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
307                                           VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorImage.get(), colorSRR);
308     vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u,
309                           0u, 0u, 0u, 0u, 1u, &imageBarrier);
310 
311     // read back color image
312     const VkImageSubresourceLayers colorSL = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
313     const VkBufferImageCopy copyRegion     = makeBufferImageCopy(colorImageExtent, colorSL);
314     vk.cmdCopyImageToBuffer(*cmdBuffer, colorImage.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, outputBuffer.get(), 1u,
315                             &copyRegion);
316 
317     endCommandBuffer(vk, *cmdBuffer);
318 
319     VkQueue queue;
320     vk.getDeviceQueue(device, queueFamilyIndex, 0, &queue);
321     submitCommandsAndWait(vk, device, queue, *cmdBuffer);
322 
323     // get output buffer
324     invalidateAlloc(vk, device, outputBuffer.getAllocation());
325     const tcu::TextureFormat resultFormat = mapVkFormat(colorFormat);
326     tcu::ConstPixelBufferAccess outputAccess(resultFormat, m_renderSize.x(), m_renderSize.y(), 1u,
327                                              outputBuffer.getAllocation().getHostPtr());
328 
329     // verify result
330     if (isResultCorrect(outputAccess))
331         return tcu::TestStatus::pass("Pass");
332 
333     m_context.getTestContext().getLog() << tcu::TestLog::Image("Result", "Result", outputAccess);
334 
335     return tcu::TestStatus::fail("Fail");
336 }
337 
338 class QuadDerivativesInstance : public DrawWithQuadControlInstanceBase
339 {
340 public:
341     QuadDerivativesInstance(Context &context, TestMode mode);
342 
343     virtual ~QuadDerivativesInstance(void) = default;
344 
345     virtual bool isResultCorrect(const tcu::ConstPixelBufferAccess &outputAccess) const override;
346 };
347 
QuadDerivativesInstance(Context & context,TestMode mode)348 QuadDerivativesInstance::QuadDerivativesInstance(Context &context, TestMode mode)
349     : DrawWithQuadControlInstanceBase(context, mode)
350 {
351     // create vertex for 5 triangles - defined in order from displayed on the left to the right
352     m_vertices = {
353         // position                        uvCoords
354         0.0f,  1.2f,  0.0f, 1.0f, 0.0f,  0.0f, // uv adjusted to get lod 1
355         -1.2f, -2.0f, 0.0f, 1.0f, 1.0f,  1.0f,  -1.2f, 1.2f,  0.0f, 1.0f, 0.0f, 1.0f,
356 
357         -0.2f, 0.3f,  0.0f, 1.0f, 1.0f,  1.0f, // uv adjusted to get lod 2
358         -0.7f, -0.9f, 0.0f, 1.0f, 0.0f,  0.0f,  -0.3f, -0.8f, 0.0f, 1.0f, 0.0f, 1.0f,
359 
360         0.0f,  0.2f,  0.0f, 1.0f, 10.0f, 10.0f, // uv adjusted to get lod 5
361         0.1f,  -1.0f, 0.0f, 1.0f, 0.0f,  0.0f,  -0.3f, -1.0f, 0.0f, 1.0f, 0.0f, 10.0f,
362 
363         0.2f,  -0.1f, 0.0f, 1.0f, 4.0f,  4.0f, // uv adjusted to get lod 4
364         0.7f,  -1.2f, 0.0f, 1.0f, 0.0f,  0.0f,  0.2f,  -1.8f, 0.0f, 1.0f, 0.0f, 4.0f,
365 
366         -0.1f, 0.5f,  0.0f, 1.0f, 0.0f,  0.0f, // uv adjusted to get lod 3
367         0.8f,  -0.8f, 0.0f, 1.0f, 5.0f,  5.0f,  0.9f,  0.8f,  0.0f, 1.0f, 0.0f, 5.0f,
368     };
369 }
370 
isResultCorrect(const tcu::ConstPixelBufferAccess & outputAccess) const371 bool QuadDerivativesInstance::isResultCorrect(const tcu::ConstPixelBufferAccess &outputAccess) const
372 {
373     const tcu::UVec2 fragmentOnFirstTraingle(3, 8);
374     const uint32_t expectedColorPerFragment[]{0u, 1u, 4u, 3u, 2u};
375     const tcu::Vec4 colorPrecision(0.1f);
376 
377     for (uint32_t triangleIndex = 0u; triangleIndex < 5u; ++triangleIndex)
378     {
379         // on each triangle we are checking fragment that is 6 fragments away from fragment on previous triangle
380         const tcu::UVec2 fragmentOnTraingle(fragmentOnFirstTraingle.x() + 6u * triangleIndex,
381                                             fragmentOnFirstTraingle.y());
382         const uint32_t expectedMipmapIndex(expectedColorPerFragment[triangleIndex]);
383         const tcu::Vec4 expectedColor(m_mipColors[expectedMipmapIndex].float32);
384         tcu::Vec4 fragmentColor = outputAccess.getPixel(fragmentOnTraingle.x(), fragmentOnTraingle.y(), 0);
385 
386         // make sure that fragment has color from proper mipmap level
387         if (tcu::boolAny(tcu::greaterThan(tcu::absDiff(fragmentColor, expectedColor), colorPrecision)))
388             return false;
389     }
390 
391     return true;
392 }
393 
394 class RequireFullQuadsInstance : public DrawWithQuadControlInstanceBase
395 {
396 public:
397     RequireFullQuadsInstance(Context &context, TestMode mode);
398 
399     virtual ~RequireFullQuadsInstance(void) = default;
400 
401     virtual bool isResultCorrect(const tcu::ConstPixelBufferAccess &outputAccess) const override;
402 };
403 
RequireFullQuadsInstance(Context & context,TestMode mode)404 RequireFullQuadsInstance::RequireFullQuadsInstance(Context &context, TestMode mode)
405     : DrawWithQuadControlInstanceBase(context, mode)
406 {
407     // create vertex for 4 conected triangles with an odd angles
408     m_vertices = {
409         // position                        uvCoords
410         -0.9f, 0.6f,  0.0f, 1.0f,  0.0f,  1.0f,  -0.7f, -0.8f, 0.0f,
411         1.0f,  1.0f,  1.0f, -0.2f, 0.9f,  0.0f,  1.0f,  0.0f,  0.0f,
412 
413         0.0f,  0.2f,  0.0f, 1.0f,  20.0f, 20.0f,
414 
415         0.6f,  0.5f,  0.0f, 1.0f,  21.0f, 0.0f,
416 
417         1.2f,  -0.9f, 0.0f, 1.0f,  0.0f,  75.0f,
418     };
419     m_topology   = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
420     m_renderSize = tcu::UVec2(128);
421 }
422 
isResultCorrect(const tcu::ConstPixelBufferAccess & outputAccess) const423 bool RequireFullQuadsInstance::isResultCorrect(const tcu::ConstPixelBufferAccess &outputAccess) const
424 {
425     const float reference(0.9f);
426     uint32_t renderedCount(0);
427     uint32_t properIDsCount(0);
428     uint32_t withHelpersCount(0);
429     uint32_t withoutHelpersCount(0);
430 
431     // ensure at least some shaders have the vote return True and are filled with read color
432     for (uint32_t x = 0u; x < m_renderSize.x(); ++x)
433         for (uint32_t y = 0u; y < m_renderSize.y(); ++y)
434         {
435             tcu::Vec4 pixel = outputAccess.getPixel(x, y, 0);
436             if (pixel.x() < reference)
437                 continue;
438 
439             ++renderedCount;
440 
441             // if blue channel is 1 then quads had proper IDs
442             properIDsCount += uint32_t(pixel.y() > reference);
443 
444             // at least some shaders should have voted True if any helper invocations existed
445             withHelpersCount += uint32_t(pixel.z() > reference);
446 
447             // at least some shaders should have voted True if there were quads without helper invocations
448             withoutHelpersCount += uint32_t(pixel.w() > reference);
449         }
450 
451     return (renderedCount == properIDsCount) && (renderedCount == (withHelpersCount + withoutHelpersCount)) &&
452            (withoutHelpersCount > 50) && (withHelpersCount > 50);
453 }
454 
455 class DivergentConditionInstance : public DrawWithQuadControlInstanceBase
456 {
457 public:
458     DivergentConditionInstance(Context &context, TestMode mode);
459 
460     virtual ~DivergentConditionInstance(void) = default;
461 
462     virtual bool isResultCorrect(const tcu::ConstPixelBufferAccess &outputAccess) const override;
463 };
464 
DivergentConditionInstance(Context & context,TestMode mode)465 DivergentConditionInstance::DivergentConditionInstance(Context &context, TestMode mode)
466     : DrawWithQuadControlInstanceBase(context, mode)
467 {
468     // create vertex for 2 triangles forming full screen quad
469     m_vertices = {
470         // position                        uvCoords
471         -1.0f, 1.0f,  0.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f,  0.0f, 1.0f, 1.0f, 1.0f,
472         -1.0f, -1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f, -1.0f, 0.0f, 1.0f, 1.0f, 0.0f,
473     };
474     m_topology   = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
475     m_renderSize = tcu::UVec2(16);
476 }
477 
isResultCorrect(const tcu::ConstPixelBufferAccess & outputAccess) const478 bool DivergentConditionInstance::isResultCorrect(const tcu::ConstPixelBufferAccess &outputAccess) const
479 {
480     bool testPassed(true);
481     const float reference(0.99f);
482     const float lambda(0.01f);
483 
484     auto checkCondition = [](int x, int y) { return (((x % 8) > 4) || (((x % 8) == 2) && bool(y % 2))); };
485 
486     for (uint32_t x = 0u; x < m_renderSize.x(); x += 1u)
487         for (uint32_t y = 0u; y < m_renderSize.y(); y += 1u)
488         {
489             tcu::Vec4 pixel = outputAccess.getPixel(x, y, 0);
490 
491             // if the fragment coord does not meet the first condition then this fragment should have r and g component set to 0
492             if (!checkCondition(x, y))
493             {
494                 if ((pixel.x() > lambda) || (pixel.y() > lambda))
495                     testPassed = false;
496 
497                 // go to next pixel
498                 continue;
499             }
500 
501             uint32_t qx = x - (x % 2);
502             uint32_t qy = y - (y % 2);
503             int sum     = (checkCondition(qx + 0, qy + 0) && checkCondition(qy + 0, qx + 0)) +
504                       (checkCondition(qx + 0, qy + 1) && checkCondition(qy + 1, qx + 0)) +
505                       (checkCondition(qx + 1, qy + 0) && checkCondition(qy + 0, qx + 1)) +
506                       (checkCondition(qx + 1, qy + 1) && checkCondition(qy + 1, qx + 1));
507             int activeCount = checkCondition(qx + 0, qy + 0) + checkCondition(qx + 0, qy + 1) +
508                               checkCondition(qx + 1, qy + 0) + checkCondition(qx + 1, qy + 1);
509 
510             // if none of fragments in the quad meets second condition then this fragment should have r and g components set to 0
511             if (sum == 0)
512             {
513                 if ((pixel.x() > lambda) || (pixel.y() > lambda))
514                     testPassed = false;
515                 continue;
516             }
517 
518             // if all active quad fragments meets second condition then this fragment should have r and g components set to 1
519             if (sum == activeCount)
520             {
521                 if ((pixel.x() < reference) || (pixel.y() < reference))
522                     testPassed = false;
523                 continue;
524             }
525 
526             // if at least one active quad fragment meets second condition then this fragment should have r component set to 1 and g component to 0
527             if ((pixel.x() < reference) || (pixel.y() > lambda))
528                 testPassed = false;
529         }
530 
531     return testPassed;
532 }
533 
534 class DrawWithQuadControlTestCase : public vkt::TestCase
535 {
536 public:
537     DrawWithQuadControlTestCase(tcu::TestContext &testContext, const std::string &name, TestMode mode);
538 
539     virtual ~DrawWithQuadControlTestCase(void) = default;
540 
541     void checkSupport(Context &context) const override;
542     TestInstance *createInstance(Context &context) const override;
543     void initPrograms(SourceCollections &programCollection) const override;
544 
545 protected:
546     const TestMode m_testMode;
547 };
548 
DrawWithQuadControlTestCase(tcu::TestContext & testContext,const std::string & name,TestMode mode)549 DrawWithQuadControlTestCase::DrawWithQuadControlTestCase(tcu::TestContext &testContext, const std::string &name,
550                                                          TestMode mode)
551     : vkt::TestCase(testContext, name)
552     , m_testMode(mode)
553 {
554 }
555 
checkSupport(Context & context) const556 void DrawWithQuadControlTestCase::checkSupport(Context &context) const
557 {
558     context.requireDeviceFunctionality("VK_KHR_shader_quad_control");
559 }
560 
createInstance(Context & context) const561 TestInstance *DrawWithQuadControlTestCase::createInstance(Context &context) const
562 {
563     if (m_testMode == TestMode::QUAD_DERIVATIVES)
564         return new QuadDerivativesInstance(context, m_testMode);
565     if (m_testMode == TestMode::REQUIRE_FULL_QUADS)
566         return new RequireFullQuadsInstance(context, m_testMode);
567 
568     return new DivergentConditionInstance(context, m_testMode);
569 }
570 
initPrograms(SourceCollections & sourceCollections) const571 void DrawWithQuadControlTestCase::initPrograms(SourceCollections &sourceCollections) const
572 {
573     std::string vertexSource("#version 450\n"
574                              "layout(location = 0) in vec4 inPosition;\n"
575                              "layout(location = 1) in vec2 inTexCoords;\n"
576                              "layout(location = 0) out highp vec2 outTexCoords;\n"
577                              "void main(void)\n"
578                              "{\n"
579                              "\tgl_Position = inPosition;\n"
580                              "\toutTexCoords = inTexCoords;\n"
581                              "}\n");
582     sourceCollections.glslSources.add("vert") << glu::VertexSource(vertexSource);
583 
584     std::string fragmentSource;
585     if (m_testMode == TestMode::QUAD_DERIVATIVES)
586     {
587         // we are drawing few triangles and in shader we have a condition
588         // that will be true for exactly one fragment in each triangle
589 
590         fragmentSource = "#version 450\n"
591                          "precision highp float;\n"
592                          "precision highp int;\n"
593                          "#extension GL_EXT_shader_quad_control: enable\n"
594                          "#extension GL_KHR_shader_subgroup_vote: enable\n"
595                          "layout(quad_derivatives) in;\n"
596                          "layout(location = 0) in highp vec2 inTexCoords;\n"
597                          "layout(location = 0) out vec4 outFragColor;\n"
598                          "layout(binding = 0) uniform sampler2D texSampler;\n"
599                          "void main (void)\n"
600                          "{\n"
601                          "\tbool conditionTrueForOneFrag = (abs(gl_FragCoord.y - 8.5) < 0.1) && "
602                          "(mod(gl_FragCoord.x-3.5, 6.0) < 0.1);\n"
603                          "\tif (subgroupQuadAny(conditionTrueForOneFrag))\n"
604                          "\t\toutFragColor = texture(texSampler, inTexCoords);\n"
605                          "\telse\n"
606                          "\t\toutFragColor = vec4(0.9, 0.2, 0.2, 1.0);\n"
607                          "}\n";
608     }
609     else if (m_testMode == TestMode::REQUIRE_FULL_QUADS)
610     {
611         // we are drawing few connected triangles at odd angles
612         // RequireFullQuadsKHR ensures lots of helper lanes
613 
614         fragmentSource = "#version 450\n"
615                          "#extension GL_KHR_shader_subgroup_quad: enable\n"
616                          "#extension GL_KHR_shader_subgroup_vote: enable\n"
617                          "#extension GL_EXT_shader_quad_control: enable\n"
618                          "precision highp float;\n"
619                          "precision highp int;\n"
620                          "layout(full_quads) in;\n"
621                          "layout(location = 0) in highp vec2 inTexCoords;\n"
622                          "layout(location = 0) out vec4 outFragColor;\n"
623                          "layout(binding = 0) uniform sampler2D texSampler;\n"
624                          "void main (void)\n"
625                          "{\n"
626                          "\tuint quadID = gl_SubgroupInvocationID % 4;\n"
627                          "\tuint idSum = quadID;\n"
628                          "\tidSum += subgroupQuadSwapHorizontal(quadID);\n"
629                          "\tidSum += subgroupQuadSwapVertical(quadID);\n"
630                          "\tidSum += subgroupQuadSwapDiagonal(quadID);\n"
631                          "\toutFragColor = vec4(1.0, 0.0, 0.0, 0.0);\n"
632                          "\tif (idSum == 6)\n"
633                          "\t\toutFragColor.g = 1.0;\n"
634                          "\tif (subgroupQuadAny(gl_HelperInvocation))\n"
635                          "\t\toutFragColor.b = 1.0;\n"
636                          "\tif (subgroupQuadAll(!gl_HelperInvocation))\n"
637                          "\t\toutFragColor.a = 1.0;\n"
638                          "}\n";
639     }
640     else // TestMode::DIVERGENT_CONDITION
641     {
642         // draw fullscreen quad and use quadAny/quadAll
643         // inside divergent control flow
644 
645         fragmentSource = "#version 450\n"
646                          "#extension GL_KHR_shader_subgroup_vote: enable\n"
647                          "#extension GL_EXT_shader_quad_control: enable\n"
648                          "precision highp float;\n"
649                          "precision highp int;\n"
650                          "layout(location = 0) out vec4 outFragColor;\n"
651                          "bool checkCondition(int x, int y) {\n"
652                          "\treturn (((x % 8) > 4) || (((x % 8) == 2) && bool(y % 2)));\n"
653                          "}\n"
654                          "void main (void)\n"
655                          "{\n"
656                          "\toutFragColor = vec4(0.0, 0.0, 0.0, 1.0);\n"
657                          "\tint x = int(gl_FragCoord.x);\n"
658                          "\tint y = int(gl_FragCoord.y);\n"
659                          "\tif (checkCondition(x, y))\n"
660                          "\t{\n"
661                          "\t\tbool v = checkCondition(y, x);\n"
662                          "\t\tif (subgroupQuadAny(v))\n"
663                          "\t\t\toutFragColor.r = 1.0;\n"
664                          "\t\tif (subgroupQuadAll(v))\n"
665                          "\t\t\toutFragColor.g = 1.0;\n"
666                          "\t}\n"
667                          "}\n";
668     }
669 
670     const ShaderBuildOptions buildOptions(sourceCollections.usedVulkanVersion, SPIRV_VERSION_1_3, 0u);
671     sourceCollections.glslSources.add("frag") << glu::FragmentSource(fragmentSource) << buildOptions;
672 }
673 
createSubgroupsQuadControlTests(tcu::TestContext & testCtx)674 tcu::TestCaseGroup *createSubgroupsQuadControlTests(tcu::TestContext &testCtx)
675 {
676     de::MovePtr<tcu::TestCaseGroup> quadScopeTests(
677         new tcu::TestCaseGroup(testCtx, "shader_quad_control", "Test for VK_KHR_shader_quad_control"));
678 
679     quadScopeTests->addChild(new DrawWithQuadControlTestCase(testCtx, "quad_derivatives", TestMode::QUAD_DERIVATIVES));
680     quadScopeTests->addChild(
681         new DrawWithQuadControlTestCase(testCtx, "require_full_quads", TestMode::REQUIRE_FULL_QUADS));
682     quadScopeTests->addChild(
683         new DrawWithQuadControlTestCase(testCtx, "divergent_condition", TestMode::DIVERGENT_CONDITION));
684 
685     return quadScopeTests.release();
686 }
687 
688 } // namespace subgroups
689 } // namespace vkt
690