1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2017-2019 The Khronos Group Inc.
6  * Copyright (c) 2018-2019 NVIDIA Corporation
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Tests for VK_EXT_fragment_shader_interlock.
23  * These tests render a set of overlapping full-screen quads that use image
24  * or buffer reads and writes to accumulate values into a result image/buffer.
25  * They use fragment shader interlock to avoid race conditions on the read/write
26  * and validate that the final result includes all the writes.
27  * Each fragment shader invocation computes a coordinate, and does a read/modify/write
28  * into the image or buffer, inside the interlock. The value in memory accumulates a bitmask
29  * indicating which primitives or samples have already run through the interlock. e.g.
30  * for single sample, PIXEL_UNORDERED mode, there is one bit in the bitmask for each primitive
31  * and each primitive ORs in its own bit. For PIXEL_ORDERED mode, each invocation also tests
32  * that all the previous primitives (less significant bits) are also set, else it clobbers the
33  * value. Sample and shading_rate interlock are variants of this where there is one value per
34  * sample or per coarse fragment location, respectively. When there are multiple samples per
35  * fragment, we merge in the whole sample mask. But within a pixel, we don't try to distinguish
36  * primitive order between samples on the internal diagonal of the quad (triangle strip).
37  *//*--------------------------------------------------------------------*/
38 
39 #include "vktFragmentShaderInterlockBasic.hpp"
40 
41 #include "vkBufferWithMemory.hpp"
42 #include "vkImageWithMemory.hpp"
43 #include "vkQueryUtil.hpp"
44 #include "vkDeviceUtil.hpp"
45 #include "vkBuilderUtil.hpp"
46 #include "vkCmdUtil.hpp"
47 #include "vkTypeUtil.hpp"
48 #include "vkObjUtil.hpp"
49 
50 #include "vktTestGroupUtil.hpp"
51 #include "vktTestCase.hpp"
52 #include "vktCustomInstancesDevices.hpp"
53 
54 #include "deDefs.h"
55 #include "deMath.h"
56 #include "deRandom.h"
57 #include "deSharedPtr.hpp"
58 #include "deString.h"
59 
60 #include "tcuTestCase.hpp"
61 #include "tcuTestLog.hpp"
62 #include "tcuCommandLine.hpp"
63 
64 #include <string>
65 #include <sstream>
66 
67 namespace vkt
68 {
69 namespace FragmentShaderInterlock
70 {
71 namespace
72 {
73 using namespace vk;
74 using namespace std;
75 
76 typedef enum
77 {
78     RES_SSBO = 0,
79     RES_IMAGE,
80 } Resource;
81 
82 typedef enum
83 {
84     INT_PIXEL_ORDERED = 0,
85     INT_PIXEL_UNORDERED,
86     INT_SAMPLE_ORDERED,
87     INT_SAMPLE_UNORDERED,
88     INT_SHADING_RATE_ORDERED,
89     INT_SHADING_RATE_UNORDERED,
90 } Interlock;
91 
92 struct CaseDef
93 {
94     uint32_t dim;
95     Resource resType;
96     Interlock interlock;
97     VkSampleCountFlagBits samples;
98     bool killOdd;
99     bool sampleShading;
100 
isSampleInterlockvkt::FragmentShaderInterlock::__anon8db60c050111::CaseDef101     bool isSampleInterlock() const
102     {
103         return sampleShading || interlock == INT_SAMPLE_ORDERED || interlock == INT_SAMPLE_UNORDERED;
104     }
isOrderedvkt::FragmentShaderInterlock::__anon8db60c050111::CaseDef105     bool isOrdered() const
106     {
107         return interlock == INT_PIXEL_ORDERED || interlock == INT_SAMPLE_ORDERED ||
108                interlock == INT_SHADING_RATE_ORDERED;
109     }
110 };
111 
112 class FSITestInstance : public TestInstance
113 {
114 public:
115     FSITestInstance(Context &context, const CaseDef &data);
116     ~FSITestInstance(void);
117     tcu::TestStatus iterate(void);
118 
119 private:
120     CaseDef m_data;
121 };
122 
FSITestInstance(Context & context,const CaseDef & data)123 FSITestInstance::FSITestInstance(Context &context, const CaseDef &data) : vkt::TestInstance(context), m_data(data)
124 {
125 }
126 
~FSITestInstance(void)127 FSITestInstance::~FSITestInstance(void)
128 {
129 }
130 
131 class FSITestCase : public TestCase
132 {
133 public:
134     FSITestCase(tcu::TestContext &context, const char *name, const CaseDef data);
135     ~FSITestCase(void);
136     virtual void initPrograms(SourceCollections &programCollection) const;
137     virtual TestInstance *createInstance(Context &context) const;
138     virtual void checkSupport(Context &context) const;
139 
140 private:
141     CaseDef m_data;
142 };
143 
FSITestCase(tcu::TestContext & context,const char * name,const CaseDef data)144 FSITestCase::FSITestCase(tcu::TestContext &context, const char *name, const CaseDef data)
145     : vkt::TestCase(context, name)
146     , m_data(data)
147 {
148 }
149 
~FSITestCase(void)150 FSITestCase::~FSITestCase(void)
151 {
152 }
153 
checkSupport(Context & context) const154 void FSITestCase::checkSupport(Context &context) const
155 {
156     context.requireDeviceFunctionality("VK_EXT_fragment_shader_interlock");
157 
158     if ((m_data.interlock == INT_SAMPLE_ORDERED || m_data.interlock == INT_SAMPLE_UNORDERED) &&
159         !context.getFragmentShaderInterlockFeaturesEXT().fragmentShaderSampleInterlock)
160     {
161         TCU_THROW(NotSupportedError, "Fragment shader sample interlock not supported");
162     }
163 
164     if ((m_data.interlock == INT_PIXEL_ORDERED || m_data.interlock == INT_PIXEL_UNORDERED) &&
165         !context.getFragmentShaderInterlockFeaturesEXT().fragmentShaderPixelInterlock)
166     {
167         TCU_THROW(NotSupportedError, "Fragment shader pixel interlock not supported");
168     }
169 
170 #ifndef CTS_USES_VULKANSC
171     if ((m_data.interlock == INT_SHADING_RATE_ORDERED || m_data.interlock == INT_SHADING_RATE_UNORDERED) &&
172         !context.getFragmentShaderInterlockFeaturesEXT().fragmentShaderShadingRateInterlock)
173     {
174         TCU_THROW(NotSupportedError, "Fragment shader shading rate interlock not supported");
175     }
176     if ((m_data.interlock == INT_SHADING_RATE_ORDERED || m_data.interlock == INT_SHADING_RATE_UNORDERED) &&
177         (!context.getFragmentShadingRateFeatures().pipelineFragmentShadingRate ||
178          !context.getFragmentShadingRateProperties().fragmentShadingRateWithFragmentShaderInterlock))
179     {
180         TCU_THROW(NotSupportedError, "fragment shading rate not supported");
181     }
182 #endif // CTS_USES_VULKANSC
183 
184     if (m_data.isSampleInterlock())
185         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SAMPLE_RATE_SHADING);
186 }
187 
bitsPerQuad(const CaseDef & c)188 static int bitsPerQuad(const CaseDef &c)
189 {
190     uint32_t bpq = c.samples;
191 
192     if (c.isSampleInterlock())
193         bpq = 1;
194     else if (c.interlock == INT_SHADING_RATE_ORDERED || c.interlock == INT_SHADING_RATE_UNORDERED)
195         bpq *= 4;
196 
197     return bpq;
198 }
199 
initPrograms(SourceCollections & programCollection) const200 void FSITestCase::initPrograms(SourceCollections &programCollection) const
201 {
202     std::stringstream vss;
203 
204     vss << "#version 450 core\n"
205            "layout(location = 0) out int primID;\n"
206            "void main()\n"
207            "{\n"
208            "  primID = gl_InstanceIndex;\n"
209            // full-viewport quad
210            "  gl_Position = vec4( 2.0*float(gl_VertexIndex&2) - 1.0, 4.0*(gl_VertexIndex&1)-1.0, 1.0 - 2.0 * "
211            "float(gl_VertexIndex&1), 1);\n"
212            "}\n";
213 
214     programCollection.glslSources.add("vert") << glu::VertexSource(vss.str());
215 
216     std::stringstream fss;
217 
218     fss << "#version 450 core\n"
219            "#extension GL_ARB_fragment_shader_interlock : enable\n"
220            "#extension GL_NV_shading_rate_image : enable\n"
221            "layout(r32ui, set = 0, binding = 0) coherent uniform uimage2D image0;\n"
222            "layout(std430, set = 0, binding = 1) coherent buffer B1 { uint x[]; } buf1;\n"
223            "layout(location = 0) flat in int primID;\n";
224 
225     switch (m_data.interlock)
226     {
227     default:
228         DE_ASSERT(0); // fallthrough
229     case INT_PIXEL_ORDERED:
230         fss << "layout(pixel_interlock_ordered) in;\n";
231         break;
232     case INT_PIXEL_UNORDERED:
233         fss << "layout(pixel_interlock_unordered) in;\n";
234         break;
235     case INT_SAMPLE_ORDERED:
236         fss << "layout(sample_interlock_ordered) in;\n";
237         break;
238     case INT_SAMPLE_UNORDERED:
239         fss << "layout(sample_interlock_unordered) in;\n";
240         break;
241     case INT_SHADING_RATE_ORDERED:
242         fss << "layout(shading_rate_interlock_ordered) in;\n";
243         break;
244     case INT_SHADING_RATE_UNORDERED:
245         fss << "layout(shading_rate_interlock_unordered) in;\n";
246         break;
247     }
248 
249     // Each fragment shader invocation computes a coordinate, and does a read/modify/write
250     // into the image or buffer, inside the interlock. The value in memory accumulates a bitmask
251     // indicating which primitives or samples have already run through the interlock. e.g.
252     // for single sample, PIXEL_UNORDERED mode, there is one bit in the bitmask for each primitive
253     // and each primitive ORs in its own bit. For PIXEL_ORDERED mode, each invocation also tests
254     // that all the previous primitives (less significant bits) are also set, else it clobbers the
255     // value. Sample and shading_rate interlock are variants of this where there is one value per
256     // sample or per coarse fragment location, respectively. When there are multiple samples per
257     // fragment, we merge in the whole sample mask. But within a pixel, we don't try to distinguish
258     // primitive order between samples on the internal diagonal of the quad (triangle strip).
259 
260     fss << "void main()\n"
261            "{\n"
262            "  ivec2 coordxy = ivec2(gl_FragCoord.xy);\n"
263            "  uint stride = "
264         << m_data.dim
265         << ";\n"
266            "  uint bitsPerQuad = "
267         << bitsPerQuad(m_data) << ";\n";
268 
269     // Compute the coordinate
270     if (m_data.isSampleInterlock())
271     {
272         // Spread samples out in the x dimension
273         fss << "  coordxy.x = coordxy.x * " << m_data.samples << " + gl_SampleID;\n";
274         fss << "  stride *= " << m_data.samples << ";\n";
275     }
276     else if (m_data.interlock == INT_SHADING_RATE_ORDERED || m_data.interlock == INT_SHADING_RATE_UNORDERED)
277     {
278         // shading rate is 2x2. Divide xy by 2
279         fss << "  coordxy /= 2;\n";
280         fss << "  stride /= 2;\n";
281     }
282 
283     if (m_data.isSampleInterlock())
284     {
285         // sample interlock runs per-sample, and stores one bit per sample
286         fss << "  uint mask = 1 << primID;\n";
287         fss << "  uint previousMask = (1 << primID)-1;\n";
288     }
289     else
290     {
291         // pixel and shading_rate interlock run per-fragment, and store the sample mask
292         fss << "  uint mask = gl_SampleMaskIn[0] << (primID * bitsPerQuad);\n";
293         fss << "  uint previousMask = (1 << (primID * bitsPerQuad))-1;\n";
294     }
295 
296     // Exercise discard before and during the interlock
297     if (m_data.killOdd)
298         fss << "  if (coordxy.y < " << m_data.dim / 4 << " && (coordxy.x & 1) != 0) discard;\n";
299 
300     fss << "  beginInvocationInterlockARB();\n";
301 
302     if (m_data.killOdd)
303         fss << "  if ((coordxy.x & 1) != 0) discard;\n";
304 
305     // Read the current value from the image or buffer
306     if (m_data.resType == RES_IMAGE)
307         fss << "  uint temp = imageLoad(image0, coordxy).x;\n";
308     else
309     {
310         fss << "  uint coord = coordxy.y * stride + coordxy.x;\n";
311         fss << "  uint temp = buf1.x[coord];\n";
312     }
313 
314     // Update the value. For "ordered" modes, check that all the previous primitives'
315     // bits are already set
316     if (m_data.isOrdered())
317         fss << "  if ((temp & previousMask) == previousMask) temp |= mask; else temp = 0;\n";
318     else
319         fss << "  temp |= mask;\n";
320 
321     // Store out the new value
322     if (m_data.resType == RES_IMAGE)
323         fss << "  imageStore(image0, coordxy, uvec4(temp, 0, 0, 0));\n";
324     else
325         fss << "  buf1.x[coord] = temp;\n";
326 
327     fss << "  endInvocationInterlockARB();\n";
328 
329     if (m_data.killOdd)
330         fss << "  discard;\n";
331 
332     fss << "}\n";
333 
334     programCollection.glslSources.add("frag") << glu::FragmentSource(fss.str());
335 }
336 
createInstance(Context & context) const337 TestInstance *FSITestCase::createInstance(Context &context) const
338 {
339     return new FSITestInstance(context, m_data);
340 }
341 
iterate(void)342 tcu::TestStatus FSITestInstance::iterate(void)
343 {
344     const DeviceInterface &vk = m_context.getDeviceInterface();
345     const VkDevice device     = m_context.getDevice();
346     Allocator &allocator      = m_context.getDefaultAllocator();
347     VkFlags allShaderStages   = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
348     VkFlags allPipelineStages = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
349                                 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
350 
351     VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
352 
353     Move<vk::VkDescriptorSetLayout> descriptorSetLayout;
354     Move<vk::VkDescriptorPool> descriptorPool;
355     Move<vk::VkDescriptorSet> descriptorSet;
356 
357     VkDescriptorPoolCreateFlags poolCreateFlags        = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
358     VkDescriptorSetLayoutCreateFlags layoutCreateFlags = 0;
359 
360     const VkDescriptorSetLayoutBinding bindings[2] = {
361         {
362             0u,                               // binding
363             VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, // descriptorType
364             1u,                               // descriptorCount
365             allShaderStages,                  // stageFlags
366             DE_NULL,                          // pImmutableSamplers
367         },
368         {
369             1u,                                // binding
370             VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // descriptorType
371             1u,                                // descriptorCount
372             allShaderStages,                   // stageFlags
373             DE_NULL,                           // pImmutableSamplers
374         },
375     };
376 
377     // Create a layout and allocate a descriptor set for it.
378     const VkDescriptorSetLayoutCreateInfo setLayoutCreateInfo = {
379         vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, // sType
380         DE_NULL,                                                 // pNext
381         layoutCreateFlags,                                       // flags
382         2u,                                                      // bindingCount
383         &bindings[0]                                             // pBindings
384     };
385 
386     descriptorSetLayout = vk::createDescriptorSetLayout(vk, device, &setLayoutCreateInfo);
387 
388     vk::DescriptorPoolBuilder poolBuilder;
389     poolBuilder.addType(bindings[0].descriptorType, 1);
390     poolBuilder.addType(bindings[1].descriptorType, 1);
391 
392     descriptorPool = poolBuilder.build(vk, device, poolCreateFlags, 1u);
393     descriptorSet  = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
394 
395     // one uint per sample (max of 4 samples)
396     VkDeviceSize bufferSize = m_data.dim * m_data.dim * sizeof(uint32_t) * 4;
397 
398     de::MovePtr<BufferWithMemory> buffer;
399     buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
400         vk, device, allocator,
401         makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
402                                              VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
403         MemoryRequirement::HostVisible));
404 
405     flushAlloc(vk, device, buffer->getAllocation());
406 
407     const VkQueue queue             = getDeviceQueue(vk, device, m_context.getUniversalQueueFamilyIndex(), 0);
408     Move<VkCommandPool> cmdPool     = createCommandPool(vk, device, 0, m_context.getUniversalQueueFamilyIndex());
409     Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
410 
411     beginCommandBuffer(vk, *cmdBuffer, 0u);
412 
413     const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {
414         VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // sType
415         DE_NULL,                                       // pNext
416         (VkPipelineLayoutCreateFlags)0,
417         1,                          // setLayoutCount
418         &descriptorSetLayout.get(), // pSetLayouts
419         0u,                         // pushConstantRangeCount
420         DE_NULL,                    // pPushConstantRanges
421     };
422 
423     Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
424 
425     de::MovePtr<BufferWithMemory> copyBuffer;
426     copyBuffer = de::MovePtr<BufferWithMemory>(
427         new BufferWithMemory(vk, device, allocator, makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT),
428                              MemoryRequirement::HostVisible | MemoryRequirement::Cached));
429 
430     const VkImageCreateInfo imageCreateInfo = {
431         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
432         DE_NULL,                             // const void* pNext;
433         (VkImageCreateFlags)0u,              // VkImageCreateFlags flags;
434         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
435         VK_FORMAT_R32_UINT,                  // VkFormat format;
436         {
437             m_data.dim * m_data.samples, // uint32_t width;
438             m_data.dim,                  // uint32_t height;
439             1u                           // uint32_t depth;
440         },                               // VkExtent3D extent;
441         1u,                              // uint32_t mipLevels;
442         1u,                              // uint32_t arrayLayers;
443         VK_SAMPLE_COUNT_1_BIT,           // VkSampleCountFlagBits samples;
444         VK_IMAGE_TILING_OPTIMAL,         // VkImageTiling tiling;
445         VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
446             VK_IMAGE_USAGE_TRANSFER_DST_BIT, // VkImageUsageFlags usage;
447         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
448         0u,                                  // uint32_t queueFamilyIndexCount;
449         DE_NULL,                             // const uint32_t* pQueueFamilyIndices;
450         VK_IMAGE_LAYOUT_UNDEFINED            // VkImageLayout initialLayout;
451     };
452 
453     VkImageViewCreateInfo imageViewCreateInfo = {
454         VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
455         DE_NULL,                                  // const void* pNext;
456         (VkImageViewCreateFlags)0u,               // VkImageViewCreateFlags flags;
457         DE_NULL,                                  // VkImage image;
458         VK_IMAGE_VIEW_TYPE_2D,                    // VkImageViewType viewType;
459         VK_FORMAT_R32_UINT,                       // VkFormat format;
460         {
461             VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
462             VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
463             VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
464             VK_COMPONENT_SWIZZLE_A  // VkComponentSwizzle a;
465         },                          // VkComponentMapping  components;
466         {
467             VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
468             0u,                        // uint32_t baseMipLevel;
469             1u,                        // uint32_t levelCount;
470             0u,                        // uint32_t baseArrayLayer;
471             1u                         // uint32_t layerCount;
472         }                              // VkImageSubresourceRange subresourceRange;
473     };
474 
475     de::MovePtr<ImageWithMemory> image;
476     Move<VkImageView> imageView;
477 
478     image = de::MovePtr<ImageWithMemory>(
479         new ImageWithMemory(vk, device, allocator, imageCreateInfo, MemoryRequirement::Any));
480     imageViewCreateInfo.image = **image;
481     imageView                 = createImageView(vk, device, &imageViewCreateInfo, NULL);
482 
483     VkDescriptorImageInfo imageInfo   = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
484     VkDescriptorBufferInfo bufferInfo = makeDescriptorBufferInfo(**buffer, 0, bufferSize);
485 
486     VkWriteDescriptorSet w = {
487         VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
488         DE_NULL,                                // pNext
489         *descriptorSet,                         // dstSet
490         (uint32_t)0,                            // dstBinding
491         0,                                      // dstArrayElement
492         1u,                                     // descriptorCount
493         bindings[0].descriptorType,             // descriptorType
494         &imageInfo,                             // pImageInfo
495         &bufferInfo,                            // pBufferInfo
496         DE_NULL,                                // pTexelBufferView
497     };
498     vk.updateDescriptorSets(device, 1, &w, 0, NULL);
499 
500     w.dstBinding     = 1;
501     w.descriptorType = bindings[1].descriptorType;
502     vk.updateDescriptorSets(device, 1, &w, 0, NULL);
503 
504     vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, DE_NULL);
505 
506     VkBool32 shadingRateEnable =
507         m_data.interlock == INT_SHADING_RATE_ORDERED || m_data.interlock == INT_SHADING_RATE_UNORDERED ? VK_TRUE :
508                                                                                                          VK_FALSE;
509 
510     Move<VkPipeline> pipeline;
511     Move<VkRenderPass> renderPass;
512     Move<VkFramebuffer> framebuffer;
513 
514     {
515         const vk::VkSubpassDescription subpassDesc = {
516             (vk::VkSubpassDescriptionFlags)0,
517             vk::VK_PIPELINE_BIND_POINT_GRAPHICS, // pipelineBindPoint
518             0u,                                  // inputCount
519             DE_NULL,                             // pInputAttachments
520             0u,                                  // colorCount
521             DE_NULL,                             // pColorAttachments
522             DE_NULL,                             // pResolveAttachments
523             DE_NULL,                             // depthStencilAttachment
524             0u,                                  // preserveCount
525             DE_NULL,                             // pPreserveAttachments
526         };
527         const vk::VkRenderPassCreateInfo renderPassParams = {
528             vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // sType
529             DE_NULL,                                       // pNext
530             (vk::VkRenderPassCreateFlags)0,
531             0u,           // attachmentCount
532             DE_NULL,      // pAttachments
533             1u,           // subpassCount
534             &subpassDesc, // pSubpasses
535             0u,           // dependencyCount
536             DE_NULL,      // pDependencies
537         };
538 
539         renderPass = createRenderPass(vk, device, &renderPassParams);
540 
541         const vk::VkFramebufferCreateInfo framebufferParams = {
542             vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // sType
543             DE_NULL,                                       // pNext
544             (vk::VkFramebufferCreateFlags)0,
545             *renderPass, // renderPass
546             0u,          // attachmentCount
547             DE_NULL,     // pAttachments
548             m_data.dim,  // width
549             m_data.dim,  // height
550             1u,          // layers
551         };
552 
553         framebuffer = createFramebuffer(vk, device, &framebufferParams);
554 
555         const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo = {
556             VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
557             DE_NULL,                                                   // const void* pNext;
558             (VkPipelineVertexInputStateCreateFlags)0,                  // VkPipelineVertexInputStateCreateFlags flags;
559             0u,                                                        // uint32_t vertexBindingDescriptionCount;
560             DE_NULL, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
561             0u,      // uint32_t vertexAttributeDescriptionCount;
562             DE_NULL  // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
563         };
564 
565         const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo = {
566             VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType;
567             DE_NULL,                                                     // const void* pNext;
568             (VkPipelineInputAssemblyStateCreateFlags)0, // VkPipelineInputAssemblyStateCreateFlags flags;
569             VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,       // VkPrimitiveTopology topology;
570             VK_FALSE                                    // VkBool32 primitiveRestartEnable;
571         };
572 
573         const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfo = {
574             VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType;
575             DE_NULL,                                                    // const void* pNext;
576             (VkPipelineRasterizationStateCreateFlags)0, // VkPipelineRasterizationStateCreateFlags flags;
577             VK_FALSE,                                   // VkBool32 depthClampEnable;
578             VK_FALSE,                                   // VkBool32 rasterizerDiscardEnable;
579             VK_POLYGON_MODE_FILL,                       // VkPolygonMode polygonMode;
580             VK_CULL_MODE_NONE,                          // VkCullModeFlags cullMode;
581             VK_FRONT_FACE_CLOCKWISE,                    // VkFrontFace frontFace;
582             VK_FALSE,                                   // VkBool32 depthBiasEnable;
583             0.0f,                                       // float depthBiasConstantFactor;
584             0.0f,                                       // float depthBiasClamp;
585             0.0f,                                       // float depthBiasSlopeFactor;
586             1.0f                                        // float lineWidth;
587         };
588 
589         const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfo = {
590             VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType                            sType
591             DE_NULL,                               // const void*                                pNext
592             0u,                                    // VkPipelineMultisampleStateCreateFlags    flags
593             (VkSampleCountFlagBits)m_data.samples, // VkSampleCountFlagBits                    rasterizationSamples
594             m_data.sampleShading ? VK_TRUE :
595                                    VK_FALSE, // VkBool32                                    sampleShadingEnable
596             1.0f,                            // float                                    minSampleShading
597             DE_NULL,                         // const VkSampleMask*                        pSampleMask
598             VK_FALSE,                        // VkBool32                                    alphaToCoverageEnable
599             VK_FALSE                         // VkBool32                                    alphaToOneEnable
600         };
601 
602         VkViewport viewport = makeViewport(m_data.dim, m_data.dim);
603         VkRect2D scissor    = makeRect2D(m_data.dim, m_data.dim);
604 
605         VkPipelineFragmentShadingRateStateCreateInfoKHR shadingRateStateCreateInfo = {
606             VK_STRUCTURE_TYPE_PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR, // VkStructureType sType;
607             DE_NULL,                                                                // const void* pNext;
608             {2, 2},                                                                 // VkExtent2D fragmentSize;
609             {VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR,
610              VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR}, // VkFragmentShadingRateCombinerOpKHR combinerOps[2];
611         };
612 
613         const VkPipelineViewportStateCreateInfo viewportStateCreateInfo = {
614             VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType                            sType
615             DE_NULL,                                               // const void*                                pNext
616             (VkPipelineViewportStateCreateFlags)0,                 // VkPipelineViewportStateCreateFlags        flags
617             1u,        // uint32_t                                    viewportCount
618             &viewport, // const VkViewport*                        pViewports
619             1u,        // uint32_t                                    scissorCount
620             &scissor   // const VkRect2D*                            pScissors
621         };
622 
623         Move<VkShaderModule> fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("frag"), 0);
624         Move<VkShaderModule> vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("vert"), 0);
625         uint32_t numStages      = 2u;
626 
627         const VkPipelineShaderStageCreateInfo shaderCreateInfo[2] = {
628             {
629                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, DE_NULL, (VkPipelineShaderStageCreateFlags)0,
630                 VK_SHADER_STAGE_VERTEX_BIT, // stage
631                 *vs,                        // shader
632                 "main",
633                 DE_NULL, // pSpecializationInfo
634             },
635             {
636                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, DE_NULL, (VkPipelineShaderStageCreateFlags)0,
637                 VK_SHADER_STAGE_FRAGMENT_BIT, // stage
638                 *fs,                          // shader
639                 "main",
640                 DE_NULL, // pSpecializationInfo
641             }};
642 
643         const VkGraphicsPipelineCreateInfo graphicsPipelineCreateInfo = {
644             VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,           // VkStructureType sType;
645             shadingRateEnable ? &shadingRateStateCreateInfo : DE_NULL, // const void* pNext;
646             (VkPipelineCreateFlags)0,                                  // VkPipelineCreateFlags flags;
647             numStages,                                                 // uint32_t stageCount;
648             &shaderCreateInfo[0],          // const VkPipelineShaderStageCreateInfo* pStages;
649             &vertexInputStateCreateInfo,   // const VkPipelineVertexInputStateCreateInfo* pVertexInputState;
650             &inputAssemblyStateCreateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState;
651             DE_NULL,                       // const VkPipelineTessellationStateCreateInfo* pTessellationState;
652             &viewportStateCreateInfo,      // const VkPipelineViewportStateCreateInfo* pViewportState;
653             &rasterizationStateCreateInfo, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState;
654             &multisampleStateCreateInfo,   // const VkPipelineMultisampleStateCreateInfo* pMultisampleState;
655             DE_NULL,                       // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState;
656             DE_NULL,                       // const VkPipelineColorBlendStateCreateInfo* pColorBlendState;
657             DE_NULL,                       // const VkPipelineDynamicStateCreateInfo* pDynamicState;
658             pipelineLayout.get(),          // VkPipelineLayout layout;
659             renderPass.get(),              // VkRenderPass renderPass;
660             0u,                            // uint32_t subpass;
661             DE_NULL,                       // VkPipeline basePipelineHandle;
662             0                              // int basePipelineIndex;
663         };
664 
665         pipeline = createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineCreateInfo);
666     }
667 
668     const VkImageMemoryBarrier imageBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType        sType
669                                                DE_NULL,                                // const void*            pNext
670                                                0u,                           // VkAccessFlags        srcAccessMask
671                                                VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags        dstAccessMask
672                                                VK_IMAGE_LAYOUT_UNDEFINED,    // VkImageLayout        oldLayout
673                                                VK_IMAGE_LAYOUT_GENERAL,      // VkImageLayout        newLayout
674                                                VK_QUEUE_FAMILY_IGNORED, // uint32_t                srcQueueFamilyIndex
675                                                VK_QUEUE_FAMILY_IGNORED, // uint32_t                dstQueueFamilyIndex
676                                                **image,                 // VkImage                image
677                                                {
678                                                    VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags    aspectMask
679                                                    0u,                        // uint32_t                baseMipLevel
680                                                    1u,                        // uint32_t                mipLevels,
681                                                    0u,                        // uint32_t                baseArray
682                                                    1u,                        // uint32_t                arraySize
683                                                }};
684 
685     vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
686                           (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 0,
687                           (const VkBufferMemoryBarrier *)DE_NULL, 1, &imageBarrier);
688 
689     vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
690 
691     VkImageSubresourceRange range = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
692     VkClearValue clearColor       = makeClearValueColorU32(0, 0, 0, 0);
693 
694     VkMemoryBarrier memBarrier = {
695         VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
696         DE_NULL,                          // pNext
697         0u,                               // srcAccessMask
698         0u,                               // dstAccessMask
699     };
700 
701     vk.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, &clearColor.color, 1, &range);
702 
703     vk.cmdFillBuffer(*cmdBuffer, **buffer, 0, bufferSize, 0);
704 
705     memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
706     memBarrier.dstAccessMask =
707         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
708     vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, allPipelineStages, 0, 1, &memBarrier, 0, DE_NULL,
709                           0, DE_NULL);
710 
711     beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(m_data.dim, m_data.dim), 0, DE_NULL,
712                     VK_SUBPASS_CONTENTS_INLINE);
713 
714     // Draw N fullscreen "quads", one per instance.
715     uint32_t N             = 32 / bitsPerQuad(m_data);
716     uint32_t expectedValue = 0xFFFFFFFF;
717     vk.cmdDraw(*cmdBuffer, 4u, N, 0u, 0u);
718 
719     endRenderPass(vk, *cmdBuffer);
720 
721     memBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
722     memBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
723     vk.cmdPipelineBarrier(*cmdBuffer, allPipelineStages, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1, &memBarrier, 0, DE_NULL,
724                           0, DE_NULL);
725 
726     uint32_t copyDimX = m_data.dim;
727     uint32_t copyDimY = m_data.dim;
728 
729     if (m_data.isSampleInterlock())
730         copyDimX *= m_data.samples;
731 
732     if (shadingRateEnable)
733     {
734         copyDimX /= 2;
735         copyDimY /= 2;
736     }
737 
738     if (m_data.resType == RES_IMAGE)
739     {
740         const VkBufferImageCopy copyRegion = makeBufferImageCopy(
741             makeExtent3D(copyDimX, copyDimY, 1u), makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u));
742         vk.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **copyBuffer, 1u, &copyRegion);
743     }
744     else
745     {
746         const VkBufferCopy copyRegion = makeBufferCopy(0u, 0u, copyDimX * copyDimY * sizeof(uint32_t));
747         vk.cmdCopyBuffer(*cmdBuffer, **buffer, **copyBuffer, 1, &copyRegion);
748     }
749 
750     memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
751     memBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
752     vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 1, &memBarrier, 0,
753                           DE_NULL, 0, DE_NULL);
754 
755     endCommandBuffer(vk, *cmdBuffer);
756 
757     submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
758 
759     uint32_t *ptr = (uint32_t *)copyBuffer->getAllocation().getHostPtr();
760     invalidateAlloc(vk, device, copyBuffer->getAllocation());
761 
762     qpTestResult res = QP_TEST_RESULT_PASS;
763 
764     for (uint32_t i = 0; i < copyDimX * copyDimY; ++i)
765     {
766         if (m_data.killOdd && (i & 1))
767         {
768             if (ptr[i] != 0)
769                 res = QP_TEST_RESULT_FAIL;
770         }
771         else if (ptr[i] != expectedValue)
772             res = QP_TEST_RESULT_FAIL;
773     }
774 
775     return tcu::TestStatus(res, qpGetTestResultName(res));
776 }
777 
778 } // namespace
779 
createBasicTests(tcu::TestContext & testCtx)780 tcu::TestCaseGroup *createBasicTests(tcu::TestContext &testCtx)
781 {
782     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "basic"));
783 
784     typedef struct
785     {
786         uint32_t count;
787         const char *name;
788     } TestGroupCase;
789 
790     TestGroupCase dimCases[] = {
791         {8, "8x8"},       {16, "16x16"},    {32, "32x32"},    {64, "64x64"},
792         {128, "128x128"}, {256, "256x256"}, {512, "512x512"}, {1024, "1024x1024"},
793     };
794 
795     TestGroupCase resCases[] = {
796         {RES_IMAGE, "image"},
797         {RES_SSBO, "ssbo"},
798     };
799 
800     TestGroupCase killCases[] = {
801         {0, "nodiscard"},
802         {1, "discard"},
803     };
804 
805     TestGroupCase sampCases[] = {
806         {1, "1xaa"},
807         {4, "4xaa"},
808     };
809 
810     TestGroupCase ssCases[] = {
811         {0, "no_sample_shading"},
812         {1, "sample_shading"},
813     };
814 
815     TestGroupCase intCases[] = {
816         {INT_PIXEL_ORDERED, "pixel_ordered"},
817         {INT_PIXEL_UNORDERED, "pixel_unordered"},
818         {INT_SAMPLE_ORDERED, "sample_ordered"},
819         {INT_SAMPLE_UNORDERED, "sample_unordered"},
820 #ifndef CTS_USES_VULKANSC
821         {INT_SHADING_RATE_ORDERED, "shading_rate_ordered"},
822         {INT_SHADING_RATE_UNORDERED, "shading_rate_unordered"},
823 #endif // CTS_USES_VULKANSC
824     };
825 
826     for (int killNdx = 0; killNdx < DE_LENGTH_OF_ARRAY(killCases); killNdx++)
827     {
828         de::MovePtr<tcu::TestCaseGroup> killGroup(new tcu::TestCaseGroup(testCtx, killCases[killNdx].name));
829         for (int resNdx = 0; resNdx < DE_LENGTH_OF_ARRAY(resCases); resNdx++)
830         {
831             de::MovePtr<tcu::TestCaseGroup> resGroup(new tcu::TestCaseGroup(testCtx, resCases[resNdx].name));
832             for (int intNdx = 0; intNdx < DE_LENGTH_OF_ARRAY(intCases); intNdx++)
833             {
834                 de::MovePtr<tcu::TestCaseGroup> intGroup(new tcu::TestCaseGroup(testCtx, intCases[intNdx].name));
835                 for (int sampNdx = 0; sampNdx < DE_LENGTH_OF_ARRAY(sampCases); sampNdx++)
836                 {
837                     de::MovePtr<tcu::TestCaseGroup> sampGroup(new tcu::TestCaseGroup(testCtx, sampCases[sampNdx].name));
838                     for (int ssNdx = 0; ssNdx < DE_LENGTH_OF_ARRAY(ssCases); ssNdx++)
839                     {
840                         de::MovePtr<tcu::TestCaseGroup> ssGroup(new tcu::TestCaseGroup(testCtx, ssCases[ssNdx].name));
841                         for (int dimNdx = 0; dimNdx < DE_LENGTH_OF_ARRAY(dimCases); dimNdx++)
842                         {
843                             CaseDef c = {
844                                 dimCases[dimNdx].count,                          // uint32_t set;
845                                 (Resource)resCases[resNdx].count,                // Resource resType;
846                                 (Interlock)intCases[intNdx].count,               // Interlock interlock;
847                                 (VkSampleCountFlagBits)sampCases[sampNdx].count, // VkSampleCountFlagBits samples;
848                                 (bool)killCases[killNdx].count,                  // bool killOdd;
849                                 (bool)ssCases[ssNdx].count,                      // bool sampleShading;
850                             };
851 
852                             if (c.sampleShading && c.samples == 1)
853                                 continue;
854 
855                             ssGroup->addChild(new FSITestCase(testCtx, dimCases[dimNdx].name, c));
856                         }
857                         sampGroup->addChild(ssGroup.release());
858                     }
859                     intGroup->addChild(sampGroup.release());
860                 }
861                 resGroup->addChild(intGroup.release());
862             }
863             killGroup->addChild(resGroup.release());
864         }
865         group->addChild(killGroup.release());
866     }
867     return group.release();
868 }
869 
870 } // namespace FragmentShaderInterlock
871 } // namespace vkt
872