1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2017-2019 The Khronos Group Inc.
6 * Copyright (c) 2018-2019 NVIDIA Corporation
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Tests for VK_EXT_fragment_shader_interlock.
23 * These tests render a set of overlapping full-screen quads that use image
24 * or buffer reads and writes to accumulate values into a result image/buffer.
25 * They use fragment shader interlock to avoid race conditions on the read/write
26 * and validate that the final result includes all the writes.
27 * Each fragment shader invocation computes a coordinate, and does a read/modify/write
28 * into the image or buffer, inside the interlock. The value in memory accumulates a bitmask
29 * indicating which primitives or samples have already run through the interlock. e.g.
30 * for single sample, PIXEL_UNORDERED mode, there is one bit in the bitmask for each primitive
31 * and each primitive ORs in its own bit. For PIXEL_ORDERED mode, each invocation also tests
32 * that all the previous primitives (less significant bits) are also set, else it clobbers the
33 * value. Sample and shading_rate interlock are variants of this where there is one value per
34 * sample or per coarse fragment location, respectively. When there are multiple samples per
35 * fragment, we merge in the whole sample mask. But within a pixel, we don't try to distinguish
36 * primitive order between samples on the internal diagonal of the quad (triangle strip).
37 *//*--------------------------------------------------------------------*/
38
39 #include "vktFragmentShaderInterlockBasic.hpp"
40
41 #include "vkBufferWithMemory.hpp"
42 #include "vkImageWithMemory.hpp"
43 #include "vkQueryUtil.hpp"
44 #include "vkDeviceUtil.hpp"
45 #include "vkBuilderUtil.hpp"
46 #include "vkCmdUtil.hpp"
47 #include "vkTypeUtil.hpp"
48 #include "vkObjUtil.hpp"
49
50 #include "vktTestGroupUtil.hpp"
51 #include "vktTestCase.hpp"
52 #include "vktCustomInstancesDevices.hpp"
53
54 #include "deDefs.h"
55 #include "deMath.h"
56 #include "deRandom.h"
57 #include "deSharedPtr.hpp"
58 #include "deString.h"
59
60 #include "tcuTestCase.hpp"
61 #include "tcuTestLog.hpp"
62 #include "tcuCommandLine.hpp"
63
64 #include <string>
65 #include <sstream>
66
67 namespace vkt
68 {
69 namespace FragmentShaderInterlock
70 {
71 namespace
72 {
73 using namespace vk;
74 using namespace std;
75
76 typedef enum
77 {
78 RES_SSBO = 0,
79 RES_IMAGE,
80 } Resource;
81
82 typedef enum
83 {
84 INT_PIXEL_ORDERED = 0,
85 INT_PIXEL_UNORDERED,
86 INT_SAMPLE_ORDERED,
87 INT_SAMPLE_UNORDERED,
88 INT_SHADING_RATE_ORDERED,
89 INT_SHADING_RATE_UNORDERED,
90 } Interlock;
91
92 struct CaseDef
93 {
94 uint32_t dim;
95 Resource resType;
96 Interlock interlock;
97 VkSampleCountFlagBits samples;
98 bool killOdd;
99 bool sampleShading;
100
isSampleInterlockvkt::FragmentShaderInterlock::__anon8db60c050111::CaseDef101 bool isSampleInterlock() const
102 {
103 return sampleShading || interlock == INT_SAMPLE_ORDERED || interlock == INT_SAMPLE_UNORDERED;
104 }
isOrderedvkt::FragmentShaderInterlock::__anon8db60c050111::CaseDef105 bool isOrdered() const
106 {
107 return interlock == INT_PIXEL_ORDERED || interlock == INT_SAMPLE_ORDERED ||
108 interlock == INT_SHADING_RATE_ORDERED;
109 }
110 };
111
112 class FSITestInstance : public TestInstance
113 {
114 public:
115 FSITestInstance(Context &context, const CaseDef &data);
116 ~FSITestInstance(void);
117 tcu::TestStatus iterate(void);
118
119 private:
120 CaseDef m_data;
121 };
122
FSITestInstance(Context & context,const CaseDef & data)123 FSITestInstance::FSITestInstance(Context &context, const CaseDef &data) : vkt::TestInstance(context), m_data(data)
124 {
125 }
126
~FSITestInstance(void)127 FSITestInstance::~FSITestInstance(void)
128 {
129 }
130
131 class FSITestCase : public TestCase
132 {
133 public:
134 FSITestCase(tcu::TestContext &context, const char *name, const CaseDef data);
135 ~FSITestCase(void);
136 virtual void initPrograms(SourceCollections &programCollection) const;
137 virtual TestInstance *createInstance(Context &context) const;
138 virtual void checkSupport(Context &context) const;
139
140 private:
141 CaseDef m_data;
142 };
143
FSITestCase(tcu::TestContext & context,const char * name,const CaseDef data)144 FSITestCase::FSITestCase(tcu::TestContext &context, const char *name, const CaseDef data)
145 : vkt::TestCase(context, name)
146 , m_data(data)
147 {
148 }
149
~FSITestCase(void)150 FSITestCase::~FSITestCase(void)
151 {
152 }
153
checkSupport(Context & context) const154 void FSITestCase::checkSupport(Context &context) const
155 {
156 context.requireDeviceFunctionality("VK_EXT_fragment_shader_interlock");
157
158 if ((m_data.interlock == INT_SAMPLE_ORDERED || m_data.interlock == INT_SAMPLE_UNORDERED) &&
159 !context.getFragmentShaderInterlockFeaturesEXT().fragmentShaderSampleInterlock)
160 {
161 TCU_THROW(NotSupportedError, "Fragment shader sample interlock not supported");
162 }
163
164 if ((m_data.interlock == INT_PIXEL_ORDERED || m_data.interlock == INT_PIXEL_UNORDERED) &&
165 !context.getFragmentShaderInterlockFeaturesEXT().fragmentShaderPixelInterlock)
166 {
167 TCU_THROW(NotSupportedError, "Fragment shader pixel interlock not supported");
168 }
169
170 #ifndef CTS_USES_VULKANSC
171 if ((m_data.interlock == INT_SHADING_RATE_ORDERED || m_data.interlock == INT_SHADING_RATE_UNORDERED) &&
172 !context.getFragmentShaderInterlockFeaturesEXT().fragmentShaderShadingRateInterlock)
173 {
174 TCU_THROW(NotSupportedError, "Fragment shader shading rate interlock not supported");
175 }
176 if ((m_data.interlock == INT_SHADING_RATE_ORDERED || m_data.interlock == INT_SHADING_RATE_UNORDERED) &&
177 (!context.getFragmentShadingRateFeatures().pipelineFragmentShadingRate ||
178 !context.getFragmentShadingRateProperties().fragmentShadingRateWithFragmentShaderInterlock))
179 {
180 TCU_THROW(NotSupportedError, "fragment shading rate not supported");
181 }
182 #endif // CTS_USES_VULKANSC
183
184 if (m_data.isSampleInterlock())
185 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SAMPLE_RATE_SHADING);
186 }
187
bitsPerQuad(const CaseDef & c)188 static int bitsPerQuad(const CaseDef &c)
189 {
190 uint32_t bpq = c.samples;
191
192 if (c.isSampleInterlock())
193 bpq = 1;
194 else if (c.interlock == INT_SHADING_RATE_ORDERED || c.interlock == INT_SHADING_RATE_UNORDERED)
195 bpq *= 4;
196
197 return bpq;
198 }
199
initPrograms(SourceCollections & programCollection) const200 void FSITestCase::initPrograms(SourceCollections &programCollection) const
201 {
202 std::stringstream vss;
203
204 vss << "#version 450 core\n"
205 "layout(location = 0) out int primID;\n"
206 "void main()\n"
207 "{\n"
208 " primID = gl_InstanceIndex;\n"
209 // full-viewport quad
210 " gl_Position = vec4( 2.0*float(gl_VertexIndex&2) - 1.0, 4.0*(gl_VertexIndex&1)-1.0, 1.0 - 2.0 * "
211 "float(gl_VertexIndex&1), 1);\n"
212 "}\n";
213
214 programCollection.glslSources.add("vert") << glu::VertexSource(vss.str());
215
216 std::stringstream fss;
217
218 fss << "#version 450 core\n"
219 "#extension GL_ARB_fragment_shader_interlock : enable\n"
220 "#extension GL_NV_shading_rate_image : enable\n"
221 "layout(r32ui, set = 0, binding = 0) coherent uniform uimage2D image0;\n"
222 "layout(std430, set = 0, binding = 1) coherent buffer B1 { uint x[]; } buf1;\n"
223 "layout(location = 0) flat in int primID;\n";
224
225 switch (m_data.interlock)
226 {
227 default:
228 DE_ASSERT(0); // fallthrough
229 case INT_PIXEL_ORDERED:
230 fss << "layout(pixel_interlock_ordered) in;\n";
231 break;
232 case INT_PIXEL_UNORDERED:
233 fss << "layout(pixel_interlock_unordered) in;\n";
234 break;
235 case INT_SAMPLE_ORDERED:
236 fss << "layout(sample_interlock_ordered) in;\n";
237 break;
238 case INT_SAMPLE_UNORDERED:
239 fss << "layout(sample_interlock_unordered) in;\n";
240 break;
241 case INT_SHADING_RATE_ORDERED:
242 fss << "layout(shading_rate_interlock_ordered) in;\n";
243 break;
244 case INT_SHADING_RATE_UNORDERED:
245 fss << "layout(shading_rate_interlock_unordered) in;\n";
246 break;
247 }
248
249 // Each fragment shader invocation computes a coordinate, and does a read/modify/write
250 // into the image or buffer, inside the interlock. The value in memory accumulates a bitmask
251 // indicating which primitives or samples have already run through the interlock. e.g.
252 // for single sample, PIXEL_UNORDERED mode, there is one bit in the bitmask for each primitive
253 // and each primitive ORs in its own bit. For PIXEL_ORDERED mode, each invocation also tests
254 // that all the previous primitives (less significant bits) are also set, else it clobbers the
255 // value. Sample and shading_rate interlock are variants of this where there is one value per
256 // sample or per coarse fragment location, respectively. When there are multiple samples per
257 // fragment, we merge in the whole sample mask. But within a pixel, we don't try to distinguish
258 // primitive order between samples on the internal diagonal of the quad (triangle strip).
259
260 fss << "void main()\n"
261 "{\n"
262 " ivec2 coordxy = ivec2(gl_FragCoord.xy);\n"
263 " uint stride = "
264 << m_data.dim
265 << ";\n"
266 " uint bitsPerQuad = "
267 << bitsPerQuad(m_data) << ";\n";
268
269 // Compute the coordinate
270 if (m_data.isSampleInterlock())
271 {
272 // Spread samples out in the x dimension
273 fss << " coordxy.x = coordxy.x * " << m_data.samples << " + gl_SampleID;\n";
274 fss << " stride *= " << m_data.samples << ";\n";
275 }
276 else if (m_data.interlock == INT_SHADING_RATE_ORDERED || m_data.interlock == INT_SHADING_RATE_UNORDERED)
277 {
278 // shading rate is 2x2. Divide xy by 2
279 fss << " coordxy /= 2;\n";
280 fss << " stride /= 2;\n";
281 }
282
283 if (m_data.isSampleInterlock())
284 {
285 // sample interlock runs per-sample, and stores one bit per sample
286 fss << " uint mask = 1 << primID;\n";
287 fss << " uint previousMask = (1 << primID)-1;\n";
288 }
289 else
290 {
291 // pixel and shading_rate interlock run per-fragment, and store the sample mask
292 fss << " uint mask = gl_SampleMaskIn[0] << (primID * bitsPerQuad);\n";
293 fss << " uint previousMask = (1 << (primID * bitsPerQuad))-1;\n";
294 }
295
296 // Exercise discard before and during the interlock
297 if (m_data.killOdd)
298 fss << " if (coordxy.y < " << m_data.dim / 4 << " && (coordxy.x & 1) != 0) discard;\n";
299
300 fss << " beginInvocationInterlockARB();\n";
301
302 if (m_data.killOdd)
303 fss << " if ((coordxy.x & 1) != 0) discard;\n";
304
305 // Read the current value from the image or buffer
306 if (m_data.resType == RES_IMAGE)
307 fss << " uint temp = imageLoad(image0, coordxy).x;\n";
308 else
309 {
310 fss << " uint coord = coordxy.y * stride + coordxy.x;\n";
311 fss << " uint temp = buf1.x[coord];\n";
312 }
313
314 // Update the value. For "ordered" modes, check that all the previous primitives'
315 // bits are already set
316 if (m_data.isOrdered())
317 fss << " if ((temp & previousMask) == previousMask) temp |= mask; else temp = 0;\n";
318 else
319 fss << " temp |= mask;\n";
320
321 // Store out the new value
322 if (m_data.resType == RES_IMAGE)
323 fss << " imageStore(image0, coordxy, uvec4(temp, 0, 0, 0));\n";
324 else
325 fss << " buf1.x[coord] = temp;\n";
326
327 fss << " endInvocationInterlockARB();\n";
328
329 if (m_data.killOdd)
330 fss << " discard;\n";
331
332 fss << "}\n";
333
334 programCollection.glslSources.add("frag") << glu::FragmentSource(fss.str());
335 }
336
createInstance(Context & context) const337 TestInstance *FSITestCase::createInstance(Context &context) const
338 {
339 return new FSITestInstance(context, m_data);
340 }
341
iterate(void)342 tcu::TestStatus FSITestInstance::iterate(void)
343 {
344 const DeviceInterface &vk = m_context.getDeviceInterface();
345 const VkDevice device = m_context.getDevice();
346 Allocator &allocator = m_context.getDefaultAllocator();
347 VkFlags allShaderStages = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
348 VkFlags allPipelineStages = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
349 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
350
351 VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
352
353 Move<vk::VkDescriptorSetLayout> descriptorSetLayout;
354 Move<vk::VkDescriptorPool> descriptorPool;
355 Move<vk::VkDescriptorSet> descriptorSet;
356
357 VkDescriptorPoolCreateFlags poolCreateFlags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
358 VkDescriptorSetLayoutCreateFlags layoutCreateFlags = 0;
359
360 const VkDescriptorSetLayoutBinding bindings[2] = {
361 {
362 0u, // binding
363 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, // descriptorType
364 1u, // descriptorCount
365 allShaderStages, // stageFlags
366 DE_NULL, // pImmutableSamplers
367 },
368 {
369 1u, // binding
370 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // descriptorType
371 1u, // descriptorCount
372 allShaderStages, // stageFlags
373 DE_NULL, // pImmutableSamplers
374 },
375 };
376
377 // Create a layout and allocate a descriptor set for it.
378 const VkDescriptorSetLayoutCreateInfo setLayoutCreateInfo = {
379 vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, // sType
380 DE_NULL, // pNext
381 layoutCreateFlags, // flags
382 2u, // bindingCount
383 &bindings[0] // pBindings
384 };
385
386 descriptorSetLayout = vk::createDescriptorSetLayout(vk, device, &setLayoutCreateInfo);
387
388 vk::DescriptorPoolBuilder poolBuilder;
389 poolBuilder.addType(bindings[0].descriptorType, 1);
390 poolBuilder.addType(bindings[1].descriptorType, 1);
391
392 descriptorPool = poolBuilder.build(vk, device, poolCreateFlags, 1u);
393 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
394
395 // one uint per sample (max of 4 samples)
396 VkDeviceSize bufferSize = m_data.dim * m_data.dim * sizeof(uint32_t) * 4;
397
398 de::MovePtr<BufferWithMemory> buffer;
399 buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
400 vk, device, allocator,
401 makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
402 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
403 MemoryRequirement::HostVisible));
404
405 flushAlloc(vk, device, buffer->getAllocation());
406
407 const VkQueue queue = getDeviceQueue(vk, device, m_context.getUniversalQueueFamilyIndex(), 0);
408 Move<VkCommandPool> cmdPool = createCommandPool(vk, device, 0, m_context.getUniversalQueueFamilyIndex());
409 Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
410
411 beginCommandBuffer(vk, *cmdBuffer, 0u);
412
413 const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {
414 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // sType
415 DE_NULL, // pNext
416 (VkPipelineLayoutCreateFlags)0,
417 1, // setLayoutCount
418 &descriptorSetLayout.get(), // pSetLayouts
419 0u, // pushConstantRangeCount
420 DE_NULL, // pPushConstantRanges
421 };
422
423 Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
424
425 de::MovePtr<BufferWithMemory> copyBuffer;
426 copyBuffer = de::MovePtr<BufferWithMemory>(
427 new BufferWithMemory(vk, device, allocator, makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT),
428 MemoryRequirement::HostVisible | MemoryRequirement::Cached));
429
430 const VkImageCreateInfo imageCreateInfo = {
431 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
432 DE_NULL, // const void* pNext;
433 (VkImageCreateFlags)0u, // VkImageCreateFlags flags;
434 VK_IMAGE_TYPE_2D, // VkImageType imageType;
435 VK_FORMAT_R32_UINT, // VkFormat format;
436 {
437 m_data.dim * m_data.samples, // uint32_t width;
438 m_data.dim, // uint32_t height;
439 1u // uint32_t depth;
440 }, // VkExtent3D extent;
441 1u, // uint32_t mipLevels;
442 1u, // uint32_t arrayLayers;
443 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
444 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
445 VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
446 VK_IMAGE_USAGE_TRANSFER_DST_BIT, // VkImageUsageFlags usage;
447 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
448 0u, // uint32_t queueFamilyIndexCount;
449 DE_NULL, // const uint32_t* pQueueFamilyIndices;
450 VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout;
451 };
452
453 VkImageViewCreateInfo imageViewCreateInfo = {
454 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
455 DE_NULL, // const void* pNext;
456 (VkImageViewCreateFlags)0u, // VkImageViewCreateFlags flags;
457 DE_NULL, // VkImage image;
458 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
459 VK_FORMAT_R32_UINT, // VkFormat format;
460 {
461 VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
462 VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
463 VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
464 VK_COMPONENT_SWIZZLE_A // VkComponentSwizzle a;
465 }, // VkComponentMapping components;
466 {
467 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
468 0u, // uint32_t baseMipLevel;
469 1u, // uint32_t levelCount;
470 0u, // uint32_t baseArrayLayer;
471 1u // uint32_t layerCount;
472 } // VkImageSubresourceRange subresourceRange;
473 };
474
475 de::MovePtr<ImageWithMemory> image;
476 Move<VkImageView> imageView;
477
478 image = de::MovePtr<ImageWithMemory>(
479 new ImageWithMemory(vk, device, allocator, imageCreateInfo, MemoryRequirement::Any));
480 imageViewCreateInfo.image = **image;
481 imageView = createImageView(vk, device, &imageViewCreateInfo, NULL);
482
483 VkDescriptorImageInfo imageInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
484 VkDescriptorBufferInfo bufferInfo = makeDescriptorBufferInfo(**buffer, 0, bufferSize);
485
486 VkWriteDescriptorSet w = {
487 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
488 DE_NULL, // pNext
489 *descriptorSet, // dstSet
490 (uint32_t)0, // dstBinding
491 0, // dstArrayElement
492 1u, // descriptorCount
493 bindings[0].descriptorType, // descriptorType
494 &imageInfo, // pImageInfo
495 &bufferInfo, // pBufferInfo
496 DE_NULL, // pTexelBufferView
497 };
498 vk.updateDescriptorSets(device, 1, &w, 0, NULL);
499
500 w.dstBinding = 1;
501 w.descriptorType = bindings[1].descriptorType;
502 vk.updateDescriptorSets(device, 1, &w, 0, NULL);
503
504 vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, DE_NULL);
505
506 VkBool32 shadingRateEnable =
507 m_data.interlock == INT_SHADING_RATE_ORDERED || m_data.interlock == INT_SHADING_RATE_UNORDERED ? VK_TRUE :
508 VK_FALSE;
509
510 Move<VkPipeline> pipeline;
511 Move<VkRenderPass> renderPass;
512 Move<VkFramebuffer> framebuffer;
513
514 {
515 const vk::VkSubpassDescription subpassDesc = {
516 (vk::VkSubpassDescriptionFlags)0,
517 vk::VK_PIPELINE_BIND_POINT_GRAPHICS, // pipelineBindPoint
518 0u, // inputCount
519 DE_NULL, // pInputAttachments
520 0u, // colorCount
521 DE_NULL, // pColorAttachments
522 DE_NULL, // pResolveAttachments
523 DE_NULL, // depthStencilAttachment
524 0u, // preserveCount
525 DE_NULL, // pPreserveAttachments
526 };
527 const vk::VkRenderPassCreateInfo renderPassParams = {
528 vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // sType
529 DE_NULL, // pNext
530 (vk::VkRenderPassCreateFlags)0,
531 0u, // attachmentCount
532 DE_NULL, // pAttachments
533 1u, // subpassCount
534 &subpassDesc, // pSubpasses
535 0u, // dependencyCount
536 DE_NULL, // pDependencies
537 };
538
539 renderPass = createRenderPass(vk, device, &renderPassParams);
540
541 const vk::VkFramebufferCreateInfo framebufferParams = {
542 vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // sType
543 DE_NULL, // pNext
544 (vk::VkFramebufferCreateFlags)0,
545 *renderPass, // renderPass
546 0u, // attachmentCount
547 DE_NULL, // pAttachments
548 m_data.dim, // width
549 m_data.dim, // height
550 1u, // layers
551 };
552
553 framebuffer = createFramebuffer(vk, device, &framebufferParams);
554
555 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo = {
556 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
557 DE_NULL, // const void* pNext;
558 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
559 0u, // uint32_t vertexBindingDescriptionCount;
560 DE_NULL, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
561 0u, // uint32_t vertexAttributeDescriptionCount;
562 DE_NULL // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
563 };
564
565 const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo = {
566 VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType;
567 DE_NULL, // const void* pNext;
568 (VkPipelineInputAssemblyStateCreateFlags)0, // VkPipelineInputAssemblyStateCreateFlags flags;
569 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, // VkPrimitiveTopology topology;
570 VK_FALSE // VkBool32 primitiveRestartEnable;
571 };
572
573 const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfo = {
574 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType;
575 DE_NULL, // const void* pNext;
576 (VkPipelineRasterizationStateCreateFlags)0, // VkPipelineRasterizationStateCreateFlags flags;
577 VK_FALSE, // VkBool32 depthClampEnable;
578 VK_FALSE, // VkBool32 rasterizerDiscardEnable;
579 VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode;
580 VK_CULL_MODE_NONE, // VkCullModeFlags cullMode;
581 VK_FRONT_FACE_CLOCKWISE, // VkFrontFace frontFace;
582 VK_FALSE, // VkBool32 depthBiasEnable;
583 0.0f, // float depthBiasConstantFactor;
584 0.0f, // float depthBiasClamp;
585 0.0f, // float depthBiasSlopeFactor;
586 1.0f // float lineWidth;
587 };
588
589 const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfo = {
590 VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType
591 DE_NULL, // const void* pNext
592 0u, // VkPipelineMultisampleStateCreateFlags flags
593 (VkSampleCountFlagBits)m_data.samples, // VkSampleCountFlagBits rasterizationSamples
594 m_data.sampleShading ? VK_TRUE :
595 VK_FALSE, // VkBool32 sampleShadingEnable
596 1.0f, // float minSampleShading
597 DE_NULL, // const VkSampleMask* pSampleMask
598 VK_FALSE, // VkBool32 alphaToCoverageEnable
599 VK_FALSE // VkBool32 alphaToOneEnable
600 };
601
602 VkViewport viewport = makeViewport(m_data.dim, m_data.dim);
603 VkRect2D scissor = makeRect2D(m_data.dim, m_data.dim);
604
605 VkPipelineFragmentShadingRateStateCreateInfoKHR shadingRateStateCreateInfo = {
606 VK_STRUCTURE_TYPE_PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR, // VkStructureType sType;
607 DE_NULL, // const void* pNext;
608 {2, 2}, // VkExtent2D fragmentSize;
609 {VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR,
610 VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR}, // VkFragmentShadingRateCombinerOpKHR combinerOps[2];
611 };
612
613 const VkPipelineViewportStateCreateInfo viewportStateCreateInfo = {
614 VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType
615 DE_NULL, // const void* pNext
616 (VkPipelineViewportStateCreateFlags)0, // VkPipelineViewportStateCreateFlags flags
617 1u, // uint32_t viewportCount
618 &viewport, // const VkViewport* pViewports
619 1u, // uint32_t scissorCount
620 &scissor // const VkRect2D* pScissors
621 };
622
623 Move<VkShaderModule> fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("frag"), 0);
624 Move<VkShaderModule> vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("vert"), 0);
625 uint32_t numStages = 2u;
626
627 const VkPipelineShaderStageCreateInfo shaderCreateInfo[2] = {
628 {
629 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, DE_NULL, (VkPipelineShaderStageCreateFlags)0,
630 VK_SHADER_STAGE_VERTEX_BIT, // stage
631 *vs, // shader
632 "main",
633 DE_NULL, // pSpecializationInfo
634 },
635 {
636 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, DE_NULL, (VkPipelineShaderStageCreateFlags)0,
637 VK_SHADER_STAGE_FRAGMENT_BIT, // stage
638 *fs, // shader
639 "main",
640 DE_NULL, // pSpecializationInfo
641 }};
642
643 const VkGraphicsPipelineCreateInfo graphicsPipelineCreateInfo = {
644 VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType;
645 shadingRateEnable ? &shadingRateStateCreateInfo : DE_NULL, // const void* pNext;
646 (VkPipelineCreateFlags)0, // VkPipelineCreateFlags flags;
647 numStages, // uint32_t stageCount;
648 &shaderCreateInfo[0], // const VkPipelineShaderStageCreateInfo* pStages;
649 &vertexInputStateCreateInfo, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState;
650 &inputAssemblyStateCreateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState;
651 DE_NULL, // const VkPipelineTessellationStateCreateInfo* pTessellationState;
652 &viewportStateCreateInfo, // const VkPipelineViewportStateCreateInfo* pViewportState;
653 &rasterizationStateCreateInfo, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState;
654 &multisampleStateCreateInfo, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState;
655 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState;
656 DE_NULL, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState;
657 DE_NULL, // const VkPipelineDynamicStateCreateInfo* pDynamicState;
658 pipelineLayout.get(), // VkPipelineLayout layout;
659 renderPass.get(), // VkRenderPass renderPass;
660 0u, // uint32_t subpass;
661 DE_NULL, // VkPipeline basePipelineHandle;
662 0 // int basePipelineIndex;
663 };
664
665 pipeline = createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineCreateInfo);
666 }
667
668 const VkImageMemoryBarrier imageBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType
669 DE_NULL, // const void* pNext
670 0u, // VkAccessFlags srcAccessMask
671 VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags dstAccessMask
672 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout oldLayout
673 VK_IMAGE_LAYOUT_GENERAL, // VkImageLayout newLayout
674 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex
675 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex
676 **image, // VkImage image
677 {
678 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask
679 0u, // uint32_t baseMipLevel
680 1u, // uint32_t mipLevels,
681 0u, // uint32_t baseArray
682 1u, // uint32_t arraySize
683 }};
684
685 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
686 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 0,
687 (const VkBufferMemoryBarrier *)DE_NULL, 1, &imageBarrier);
688
689 vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
690
691 VkImageSubresourceRange range = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
692 VkClearValue clearColor = makeClearValueColorU32(0, 0, 0, 0);
693
694 VkMemoryBarrier memBarrier = {
695 VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
696 DE_NULL, // pNext
697 0u, // srcAccessMask
698 0u, // dstAccessMask
699 };
700
701 vk.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, &clearColor.color, 1, &range);
702
703 vk.cmdFillBuffer(*cmdBuffer, **buffer, 0, bufferSize, 0);
704
705 memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
706 memBarrier.dstAccessMask =
707 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
708 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, allPipelineStages, 0, 1, &memBarrier, 0, DE_NULL,
709 0, DE_NULL);
710
711 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(m_data.dim, m_data.dim), 0, DE_NULL,
712 VK_SUBPASS_CONTENTS_INLINE);
713
714 // Draw N fullscreen "quads", one per instance.
715 uint32_t N = 32 / bitsPerQuad(m_data);
716 uint32_t expectedValue = 0xFFFFFFFF;
717 vk.cmdDraw(*cmdBuffer, 4u, N, 0u, 0u);
718
719 endRenderPass(vk, *cmdBuffer);
720
721 memBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
722 memBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
723 vk.cmdPipelineBarrier(*cmdBuffer, allPipelineStages, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1, &memBarrier, 0, DE_NULL,
724 0, DE_NULL);
725
726 uint32_t copyDimX = m_data.dim;
727 uint32_t copyDimY = m_data.dim;
728
729 if (m_data.isSampleInterlock())
730 copyDimX *= m_data.samples;
731
732 if (shadingRateEnable)
733 {
734 copyDimX /= 2;
735 copyDimY /= 2;
736 }
737
738 if (m_data.resType == RES_IMAGE)
739 {
740 const VkBufferImageCopy copyRegion = makeBufferImageCopy(
741 makeExtent3D(copyDimX, copyDimY, 1u), makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u));
742 vk.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **copyBuffer, 1u, ©Region);
743 }
744 else
745 {
746 const VkBufferCopy copyRegion = makeBufferCopy(0u, 0u, copyDimX * copyDimY * sizeof(uint32_t));
747 vk.cmdCopyBuffer(*cmdBuffer, **buffer, **copyBuffer, 1, ©Region);
748 }
749
750 memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
751 memBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
752 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 1, &memBarrier, 0,
753 DE_NULL, 0, DE_NULL);
754
755 endCommandBuffer(vk, *cmdBuffer);
756
757 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
758
759 uint32_t *ptr = (uint32_t *)copyBuffer->getAllocation().getHostPtr();
760 invalidateAlloc(vk, device, copyBuffer->getAllocation());
761
762 qpTestResult res = QP_TEST_RESULT_PASS;
763
764 for (uint32_t i = 0; i < copyDimX * copyDimY; ++i)
765 {
766 if (m_data.killOdd && (i & 1))
767 {
768 if (ptr[i] != 0)
769 res = QP_TEST_RESULT_FAIL;
770 }
771 else if (ptr[i] != expectedValue)
772 res = QP_TEST_RESULT_FAIL;
773 }
774
775 return tcu::TestStatus(res, qpGetTestResultName(res));
776 }
777
778 } // namespace
779
createBasicTests(tcu::TestContext & testCtx)780 tcu::TestCaseGroup *createBasicTests(tcu::TestContext &testCtx)
781 {
782 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "basic"));
783
784 typedef struct
785 {
786 uint32_t count;
787 const char *name;
788 } TestGroupCase;
789
790 TestGroupCase dimCases[] = {
791 {8, "8x8"}, {16, "16x16"}, {32, "32x32"}, {64, "64x64"},
792 {128, "128x128"}, {256, "256x256"}, {512, "512x512"}, {1024, "1024x1024"},
793 };
794
795 TestGroupCase resCases[] = {
796 {RES_IMAGE, "image"},
797 {RES_SSBO, "ssbo"},
798 };
799
800 TestGroupCase killCases[] = {
801 {0, "nodiscard"},
802 {1, "discard"},
803 };
804
805 TestGroupCase sampCases[] = {
806 {1, "1xaa"},
807 {4, "4xaa"},
808 };
809
810 TestGroupCase ssCases[] = {
811 {0, "no_sample_shading"},
812 {1, "sample_shading"},
813 };
814
815 TestGroupCase intCases[] = {
816 {INT_PIXEL_ORDERED, "pixel_ordered"},
817 {INT_PIXEL_UNORDERED, "pixel_unordered"},
818 {INT_SAMPLE_ORDERED, "sample_ordered"},
819 {INT_SAMPLE_UNORDERED, "sample_unordered"},
820 #ifndef CTS_USES_VULKANSC
821 {INT_SHADING_RATE_ORDERED, "shading_rate_ordered"},
822 {INT_SHADING_RATE_UNORDERED, "shading_rate_unordered"},
823 #endif // CTS_USES_VULKANSC
824 };
825
826 for (int killNdx = 0; killNdx < DE_LENGTH_OF_ARRAY(killCases); killNdx++)
827 {
828 de::MovePtr<tcu::TestCaseGroup> killGroup(new tcu::TestCaseGroup(testCtx, killCases[killNdx].name));
829 for (int resNdx = 0; resNdx < DE_LENGTH_OF_ARRAY(resCases); resNdx++)
830 {
831 de::MovePtr<tcu::TestCaseGroup> resGroup(new tcu::TestCaseGroup(testCtx, resCases[resNdx].name));
832 for (int intNdx = 0; intNdx < DE_LENGTH_OF_ARRAY(intCases); intNdx++)
833 {
834 de::MovePtr<tcu::TestCaseGroup> intGroup(new tcu::TestCaseGroup(testCtx, intCases[intNdx].name));
835 for (int sampNdx = 0; sampNdx < DE_LENGTH_OF_ARRAY(sampCases); sampNdx++)
836 {
837 de::MovePtr<tcu::TestCaseGroup> sampGroup(new tcu::TestCaseGroup(testCtx, sampCases[sampNdx].name));
838 for (int ssNdx = 0; ssNdx < DE_LENGTH_OF_ARRAY(ssCases); ssNdx++)
839 {
840 de::MovePtr<tcu::TestCaseGroup> ssGroup(new tcu::TestCaseGroup(testCtx, ssCases[ssNdx].name));
841 for (int dimNdx = 0; dimNdx < DE_LENGTH_OF_ARRAY(dimCases); dimNdx++)
842 {
843 CaseDef c = {
844 dimCases[dimNdx].count, // uint32_t set;
845 (Resource)resCases[resNdx].count, // Resource resType;
846 (Interlock)intCases[intNdx].count, // Interlock interlock;
847 (VkSampleCountFlagBits)sampCases[sampNdx].count, // VkSampleCountFlagBits samples;
848 (bool)killCases[killNdx].count, // bool killOdd;
849 (bool)ssCases[ssNdx].count, // bool sampleShading;
850 };
851
852 if (c.sampleShading && c.samples == 1)
853 continue;
854
855 ssGroup->addChild(new FSITestCase(testCtx, dimCases[dimNdx].name, c));
856 }
857 sampGroup->addChild(ssGroup.release());
858 }
859 intGroup->addChild(sampGroup.release());
860 }
861 resGroup->addChild(intGroup.release());
862 }
863 killGroup->addChild(resGroup.release());
864 }
865 group->addChild(killGroup.release());
866 }
867 return group.release();
868 }
869
870 } // namespace FragmentShaderInterlock
871 } // namespace vkt
872