1 /*-------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2019 Google LLC
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Test Case Skeleton Based on Compute Shaders
22 *//*--------------------------------------------------------------------*/
23
24 #include "vktSpvAsmComputeShaderCase.hpp"
25
26 #include "deSharedPtr.hpp"
27 #include "deSTLUtil.hpp"
28
29 #include "vktSpvAsmUtils.hpp"
30
31 #include "vkBuilderUtil.hpp"
32 #include "vkMemUtil.hpp"
33 #include "vkPlatform.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vkQueryUtil.hpp"
36 #include "vkTypeUtil.hpp"
37 #include "vkCmdUtil.hpp"
38 #include "vkImageUtil.hpp"
39
40 #include <cassert>
41
42 namespace
43 {
44
45 using namespace vk;
46 using std::vector;
47
48 typedef vkt::SpirVAssembly::AllocationMp AllocationMp;
49 typedef vkt::SpirVAssembly::AllocationSp AllocationSp;
50 typedef vk::Unique<VkBuffer> BufferHandleUp;
51 typedef vk::Unique<VkImage> ImageHandleUp;
52 typedef vk::Unique<VkImageView> ImageViewHandleUp;
53 typedef vk::Unique<VkSampler> SamplerHandleUp;
54 typedef de::SharedPtr<BufferHandleUp> BufferHandleSp;
55 typedef de::SharedPtr<ImageHandleUp> ImageHandleSp;
56 typedef de::SharedPtr<ImageViewHandleUp> ImageViewHandleSp;
57 typedef de::SharedPtr<SamplerHandleUp> SamplerHandleSp;
58
59 /*--------------------------------------------------------------------*//*!
60 * \brief Create a buffer, allocate and bind memory for the buffer
61 *
62 * The memory is created as host visible and passed back as a vk::Allocation
63 * instance via outMemory.
64 *//*--------------------------------------------------------------------*/
createBufferAndBindMemory(vkt::Context & context,const DeviceInterface & vkdi,const VkDevice & device,VkDescriptorType dtype,Allocator & allocator,size_t numBytes,AllocationMp * outMemory,bool physStorageBuffer,bool coherent=false)65 Move<VkBuffer> createBufferAndBindMemory(vkt::Context &context, const DeviceInterface &vkdi, const VkDevice &device,
66 VkDescriptorType dtype, Allocator &allocator, size_t numBytes,
67 AllocationMp *outMemory, bool physStorageBuffer, bool coherent = false)
68 {
69 VkBufferUsageFlags usageFlags = (VkBufferUsageFlags)0u;
70
71 if (physStorageBuffer)
72 usageFlags |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
73
74 switch (dtype)
75 {
76 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
77 usageFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
78 break;
79 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
80 usageFlags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
81 break;
82 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
83 usageFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
84 break;
85 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
86 usageFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
87 break;
88 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
89 usageFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
90 break;
91 default:
92 DE_FATAL("Not implemented");
93 }
94
95 const VkBufferCreateInfo bufferCreateInfo = {
96 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // sType
97 DE_NULL, // pNext
98 0u, // flags
99 numBytes, // size
100 usageFlags, // usage
101 VK_SHARING_MODE_EXCLUSIVE, // sharingMode
102 0u, // queueFamilyCount
103 DE_NULL, // pQueueFamilyIndices
104 };
105
106 Move<VkBuffer> buffer(createBuffer(vkdi, device, &bufferCreateInfo));
107 const VkMemoryRequirements requirements = getBufferMemoryRequirements(vkdi, device, *buffer);
108 AllocationMp bufferMemory = allocator.allocate(
109 requirements, (coherent ? MemoryRequirement::Coherent : MemoryRequirement::Any) |
110 (context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address") && physStorageBuffer ?
111 MemoryRequirement::DeviceAddress :
112 MemoryRequirement::Any) |
113 MemoryRequirement::HostVisible);
114
115 VK_CHECK(vkdi.bindBufferMemory(device, *buffer, bufferMemory->getMemory(), bufferMemory->getOffset()));
116 *outMemory = bufferMemory;
117
118 return buffer;
119 }
120
121 /*--------------------------------------------------------------------*//*!
122 * \brief Create image, allocate and bind memory for the image
123 *
124 *//*--------------------------------------------------------------------*/
createImageAndBindMemory(const DeviceInterface & vkdi,const VkDevice & device,VkDescriptorType dtype,vk::VkFormat imageFormat,Allocator & allocator,uint32_t queueFamilyIndex,AllocationMp * outMemory)125 Move<VkImage> createImageAndBindMemory(const DeviceInterface &vkdi, const VkDevice &device, VkDescriptorType dtype,
126 vk::VkFormat imageFormat, Allocator &allocator, uint32_t queueFamilyIndex,
127 AllocationMp *outMemory)
128 {
129 VkImageUsageFlags usageBits = (VkImageUsageFlags)0;
130
131 switch (dtype)
132 {
133 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
134 usageBits = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
135 break;
136 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
137 usageBits = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
138 break;
139 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
140 usageBits = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
141 break;
142 default:
143 DE_FATAL("Not implemented");
144 }
145
146 const VkImageCreateInfo resourceImageParams = {
147 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
148 DE_NULL, // const void* pNext;
149 0u, // VkImageCreateFlags flags;
150 VK_IMAGE_TYPE_2D, // VkImageType imageType;
151 imageFormat, // VkFormat format;
152 {8, 8, 1}, // VkExtent3D extent;
153 1u, // uint32_t mipLevels;
154 1u, // uint32_t arraySize;
155 VK_SAMPLE_COUNT_1_BIT, // uint32_t samples;
156 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
157 usageBits, // VkImageUsageFlags usage;
158 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
159 1u, // uint32_t queueFamilyCount;
160 &queueFamilyIndex, // const uint32_t* pQueueFamilyIndices;
161 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
162 };
163
164 // Create image
165 Move<VkImage> image = createImage(vkdi, device, &resourceImageParams);
166 const VkMemoryRequirements requirements = getImageMemoryRequirements(vkdi, device, *image);
167 de::MovePtr<Allocation> imageMemory = allocator.allocate(requirements, MemoryRequirement::Any);
168
169 VK_CHECK(vkdi.bindImageMemory(device, *image, imageMemory->getMemory(), imageMemory->getOffset()));
170 *outMemory = imageMemory;
171
172 return image;
173 }
174
setMemory(const DeviceInterface & vkdi,const VkDevice & device,Allocation * destAlloc,size_t numBytes,const void * data,bool coherent=false)175 void setMemory(const DeviceInterface &vkdi, const VkDevice &device, Allocation *destAlloc, size_t numBytes,
176 const void *data, bool coherent = false)
177 {
178 void *const hostPtr = destAlloc->getHostPtr();
179
180 deMemcpy((uint8_t *)hostPtr, data, numBytes);
181
182 if (!coherent)
183 flushAlloc(vkdi, device, *destAlloc);
184 }
185
fillMemoryWithValue(const DeviceInterface & vkdi,const VkDevice & device,Allocation * destAlloc,size_t numBytes,uint8_t value,bool coherent=false)186 void fillMemoryWithValue(const DeviceInterface &vkdi, const VkDevice &device, Allocation *destAlloc, size_t numBytes,
187 uint8_t value, bool coherent = false)
188 {
189 void *const hostPtr = destAlloc->getHostPtr();
190
191 deMemset((uint8_t *)hostPtr, value, numBytes);
192
193 if (!coherent)
194 flushAlloc(vkdi, device, *destAlloc);
195 }
196
invalidateMemory(const DeviceInterface & vkdi,const VkDevice & device,Allocation * srcAlloc,bool coherent=false)197 void invalidateMemory(const DeviceInterface &vkdi, const VkDevice &device, Allocation *srcAlloc, bool coherent = false)
198 {
199 if (!coherent)
200 invalidateAlloc(vkdi, device, *srcAlloc);
201 }
202
203 /*--------------------------------------------------------------------*//*!
204 * \brief Create a descriptor set layout with the given descriptor types
205 *
206 * All descriptors are created for compute pipeline.
207 *//*--------------------------------------------------------------------*/
createDescriptorSetLayout(const DeviceInterface & vkdi,const VkDevice & device,const vector<VkDescriptorType> & dtypes)208 Move<VkDescriptorSetLayout> createDescriptorSetLayout(const DeviceInterface &vkdi, const VkDevice &device,
209 const vector<VkDescriptorType> &dtypes)
210 {
211 DescriptorSetLayoutBuilder builder;
212
213 for (size_t bindingNdx = 0; bindingNdx < dtypes.size(); ++bindingNdx)
214 builder.addSingleBinding(dtypes[bindingNdx], VK_SHADER_STAGE_COMPUTE_BIT);
215
216 return builder.build(vkdi, device);
217 }
218
219 /*--------------------------------------------------------------------*//*!
220 * \brief Create a pipeline layout with one descriptor set
221 *//*--------------------------------------------------------------------*/
createPipelineLayout(const DeviceInterface & vkdi,const VkDevice & device,VkDescriptorSetLayout descriptorSetLayout,const vkt::SpirVAssembly::BufferSp & pushConstants)222 Move<VkPipelineLayout> createPipelineLayout(const DeviceInterface &vkdi, const VkDevice &device,
223 VkDescriptorSetLayout descriptorSetLayout,
224 const vkt::SpirVAssembly::BufferSp &pushConstants)
225 {
226 VkPipelineLayoutCreateInfo createInfo = {
227 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // sType
228 DE_NULL, // pNext
229 (VkPipelineLayoutCreateFlags)0,
230 1u, // descriptorSetCount
231 &descriptorSetLayout, // pSetLayouts
232 0u, // pushConstantRangeCount
233 DE_NULL, // pPushConstantRanges
234 };
235
236 VkPushConstantRange range = {
237 VK_SHADER_STAGE_COMPUTE_BIT, // stageFlags
238 0, // offset
239 0, // size
240 };
241
242 if (pushConstants != DE_NULL)
243 {
244 vector<uint8_t> pushConstantsBytes;
245 pushConstants->getBytes(pushConstantsBytes);
246
247 range.size = static_cast<uint32_t>(pushConstantsBytes.size());
248 createInfo.pushConstantRangeCount = 1;
249 createInfo.pPushConstantRanges = ⦥
250 }
251
252 return createPipelineLayout(vkdi, device, &createInfo);
253 }
254
255 /*--------------------------------------------------------------------*//*!
256 * \brief Create a one-time descriptor pool for one descriptor set that
257 * support the given descriptor types.
258 *//*--------------------------------------------------------------------*/
createDescriptorPool(const DeviceInterface & vkdi,const VkDevice & device,const vector<VkDescriptorType> & dtypes)259 inline Move<VkDescriptorPool> createDescriptorPool(const DeviceInterface &vkdi, const VkDevice &device,
260 const vector<VkDescriptorType> &dtypes)
261 {
262 DescriptorPoolBuilder builder;
263
264 for (size_t typeNdx = 0; typeNdx < dtypes.size(); ++typeNdx)
265 builder.addType(dtypes[typeNdx], 1);
266
267 return builder.build(vkdi, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, /* maxSets = */ 1);
268 }
269
270 /*--------------------------------------------------------------------*//*!
271 * \brief Create a descriptor set
272 *
273 * The descriptor set's layout contains the given descriptor types,
274 * sequentially binded to binding points starting from 0.
275 *//*--------------------------------------------------------------------*/
createDescriptorSet(const DeviceInterface & vkdi,const VkDevice & device,VkDescriptorPool pool,VkDescriptorSetLayout layout,const vector<VkDescriptorType> & dtypes,const vector<VkDescriptorBufferInfo> & descriptorInfos,const vector<VkDescriptorImageInfo> & descriptorImageInfos)276 Move<VkDescriptorSet> createDescriptorSet(const DeviceInterface &vkdi, const VkDevice &device, VkDescriptorPool pool,
277 VkDescriptorSetLayout layout, const vector<VkDescriptorType> &dtypes,
278 const vector<VkDescriptorBufferInfo> &descriptorInfos,
279 const vector<VkDescriptorImageInfo> &descriptorImageInfos)
280 {
281 DE_ASSERT(dtypes.size() == descriptorInfos.size() + descriptorImageInfos.size());
282
283 const VkDescriptorSetAllocateInfo allocInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, DE_NULL, pool, 1u,
284 &layout};
285
286 Move<VkDescriptorSet> descriptorSet = allocateDescriptorSet(vkdi, device, &allocInfo);
287 DescriptorSetUpdateBuilder builder;
288
289 uint32_t bufferNdx = 0u;
290 uint32_t imageNdx = 0u;
291
292 for (uint32_t descriptorNdx = 0; descriptorNdx < dtypes.size(); ++descriptorNdx)
293 {
294 switch (dtypes[descriptorNdx])
295 {
296 // Write buffer descriptor
297 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
298 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
299 builder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(descriptorNdx),
300 dtypes[descriptorNdx], &descriptorInfos[bufferNdx++]);
301 break;
302
303 // Write image/sampler descriptor
304 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
305 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
306 case VK_DESCRIPTOR_TYPE_SAMPLER:
307 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
308 builder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(descriptorNdx),
309 dtypes[descriptorNdx], &descriptorImageInfos[imageNdx++]);
310 break;
311
312 default:
313 DE_FATAL("Not implemented");
314 }
315 }
316 builder.update(vkdi, device);
317
318 return descriptorSet;
319 }
320
321 /*--------------------------------------------------------------------*//*!
322 * \brief Create a compute pipeline based on the given shader
323 *//*--------------------------------------------------------------------*/
createComputePipeline(const DeviceInterface & vkdi,const VkDevice & device,VkPipelineLayout pipelineLayout,VkShaderModule shader,const char * entryPoint,const vkt::SpirVAssembly::SpecConstants & specConstants)324 Move<VkPipeline> createComputePipeline(const DeviceInterface &vkdi, const VkDevice &device,
325 VkPipelineLayout pipelineLayout, VkShaderModule shader, const char *entryPoint,
326 const vkt::SpirVAssembly::SpecConstants &specConstants)
327 {
328 const uint32_t numSpecConstants = (uint32_t)specConstants.getValuesCount();
329 vector<VkSpecializationMapEntry> entries;
330 VkSpecializationInfo specInfo;
331 size_t offset = 0;
332
333 if (numSpecConstants != 0)
334 {
335 entries.resize(numSpecConstants);
336
337 for (uint32_t ndx = 0; ndx < numSpecConstants; ++ndx)
338 {
339 const size_t valueSize = specConstants.getValueSize(ndx);
340
341 entries[ndx].constantID = ndx;
342 entries[ndx].offset = static_cast<uint32_t>(offset);
343 entries[ndx].size = valueSize;
344
345 offset += valueSize;
346 }
347
348 specInfo.mapEntryCount = numSpecConstants;
349 specInfo.pMapEntries = &entries[0];
350 specInfo.dataSize = offset;
351 specInfo.pData = specConstants.getValuesBuffer();
352 }
353
354 const VkPipelineShaderStageCreateInfo pipelineShaderStageCreateInfo = {
355 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // sType
356 DE_NULL, // pNext
357 (VkPipelineShaderStageCreateFlags)0, // flags
358 VK_SHADER_STAGE_COMPUTE_BIT, // stage
359 shader, // module
360 entryPoint, // pName
361 (numSpecConstants == 0) ? DE_NULL : &specInfo, // pSpecializationInfo
362 };
363 const VkComputePipelineCreateInfo pipelineCreateInfo = {
364 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // sType
365 DE_NULL, // pNext
366 (VkPipelineCreateFlags)0,
367 pipelineShaderStageCreateInfo, // cs
368 pipelineLayout, // layout
369 (VkPipeline)0, // basePipelineHandle
370 0u, // basePipelineIndex
371 };
372
373 return createComputePipeline(vkdi, device, (VkPipelineCache)0u, &pipelineCreateInfo);
374 }
375
376 } // namespace
377
378 namespace vkt
379 {
380 namespace SpirVAssembly
381 {
382
383 // ComputeShaderTestCase implementations
384
SpvAsmComputeShaderCase(tcu::TestContext & testCtx,const char * name,const ComputeShaderSpec & spec)385 SpvAsmComputeShaderCase::SpvAsmComputeShaderCase(tcu::TestContext &testCtx, const char *name,
386 const ComputeShaderSpec &spec)
387 : TestCase(testCtx, name)
388 , m_shaderSpec(spec)
389 {
390 }
391
checkSupport(Context & context) const392 void SpvAsmComputeShaderCase::checkSupport(Context &context) const
393 {
394 if (getMinRequiredVulkanVersion(m_shaderSpec.spirvVersion) > context.getUsedApiVersion())
395 {
396 TCU_THROW(NotSupportedError, std::string("Vulkan higher than or equal to " +
397 getVulkanName(getMinRequiredVulkanVersion(m_shaderSpec.spirvVersion)) +
398 " is required for this test to run")
399 .c_str());
400 }
401
402 // Check all required extensions are supported
403 for (const auto &ext : m_shaderSpec.extensions)
404 context.requireDeviceFunctionality(ext);
405
406 // Core features
407 // Check that we're not skipping tests needlessly based on things that don't affect compute.
408 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.fullDrawIndexUint32 == false);
409 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.independentBlend == false);
410 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.geometryShader == false);
411 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.tessellationShader == false);
412 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.sampleRateShading == false);
413 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.dualSrcBlend == false);
414 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.logicOp == false);
415 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.multiDrawIndirect == false);
416 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.drawIndirectFirstInstance == false);
417 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.depthClamp == false);
418 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.depthBiasClamp == false);
419 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.fillModeNonSolid == false);
420 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.depthBounds == false);
421 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.wideLines == false);
422 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.largePoints == false);
423 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.alphaToOne == false);
424 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.multiViewport == false);
425 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.occlusionQueryPrecise == false);
426 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.vertexPipelineStoresAndAtomics == false);
427 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.fragmentStoresAndAtomics == false);
428 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.shaderTessellationAndGeometryPointSize == false);
429 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.shaderClipDistance == false);
430 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.shaderCullDistance == false);
431 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.sparseBinding == false);
432 assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.variableMultisampleRate == false);
433
434 const char *unsupportedFeature = DE_NULL;
435 if (!isVulkanFeaturesSupported(context, m_shaderSpec.requestedVulkanFeatures, &unsupportedFeature))
436 TCU_THROW(NotSupportedError,
437 std::string("At least following requested feature is not supported: ") + unsupportedFeature);
438
439 // Extension features
440 if (m_shaderSpec.usesPhysStorageBuffer && !context.isBufferDeviceAddressSupported())
441 TCU_THROW(NotSupportedError, "Request physical storage buffer feature not supported");
442 }
443
initPrograms(SourceCollections & programCollection) const444 void SpvAsmComputeShaderCase::initPrograms(SourceCollections &programCollection) const
445 {
446 const auto &extensions = m_shaderSpec.extensions;
447 const bool allowSpirv14 = (std::find(extensions.begin(), extensions.end(), "VK_KHR_spirv_1_4") != extensions.end());
448 const bool allowMaintenance4 =
449 (std::find(extensions.begin(), extensions.end(), "VK_KHR_maintenance4") != extensions.end());
450
451 programCollection.spirvAsmSources.add("compute")
452 << m_shaderSpec.assembly.c_str()
453 << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, m_shaderSpec.spirvVersion, allowSpirv14,
454 allowMaintenance4);
455 }
456
createInstance(Context & ctx) const457 TestInstance *SpvAsmComputeShaderCase::createInstance(Context &ctx) const
458 {
459 return new SpvAsmComputeShaderInstance(ctx, m_shaderSpec);
460 }
461
462 // ComputeShaderTestInstance implementations
463
SpvAsmComputeShaderInstance(Context & ctx,const ComputeShaderSpec & spec)464 SpvAsmComputeShaderInstance::SpvAsmComputeShaderInstance(Context &ctx, const ComputeShaderSpec &spec)
465 : TestInstance(ctx)
466 , m_shaderSpec(spec)
467 {
468 }
469
getMatchingComputeImageUsageFlags(VkDescriptorType dType)470 VkImageUsageFlags getMatchingComputeImageUsageFlags(VkDescriptorType dType)
471 {
472 switch (dType)
473 {
474 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
475 return VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
476 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
477 return VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
478 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
479 return VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
480 default:
481 DE_FATAL("Not implemented");
482 }
483 return (VkImageUsageFlags)0;
484 }
485
iterate(void)486 tcu::TestStatus SpvAsmComputeShaderInstance::iterate(void)
487 {
488 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
489 const VkDevice &device = m_context.getDevice();
490 const DeviceInterface &vkdi = m_context.getDeviceInterface();
491 Allocator &allocator = m_context.getDefaultAllocator();
492 const VkQueue queue = m_context.getUniversalQueue();
493
494 vector<AllocationSp> inputAllocs;
495 vector<AllocationSp> outputAllocs;
496 vector<BufferHandleSp> inputBuffers;
497 vector<ImageHandleSp> inputImages;
498 vector<ImageViewHandleSp> inputImageViews;
499 vector<SamplerHandleSp> inputSamplers;
500 vector<BufferHandleSp> outputBuffers;
501 vector<VkDescriptorBufferInfo> descriptorInfos;
502 vector<VkDescriptorImageInfo> descriptorImageInfos;
503 vector<VkDescriptorType> descriptorTypes;
504
505 DE_ASSERT(!m_shaderSpec.outputs.empty());
506
507 // Create command pool and command buffer
508
509 const Unique<VkCommandPool> cmdPool(
510 createCommandPool(vkdi, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
511 Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vkdi, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
512
513 // Create buffer and image objects, allocate storage, and create view for all input/output buffers and images.
514
515 for (uint32_t inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx)
516 {
517 const VkDescriptorType descType = m_shaderSpec.inputs[inputNdx].getDescriptorType();
518
519 const bool hasImage = (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) ||
520 (descType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE) ||
521 (descType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
522
523 const bool hasSampler = (descType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE) ||
524 (descType == VK_DESCRIPTOR_TYPE_SAMPLER) ||
525 (descType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
526
527 descriptorTypes.push_back(descType);
528
529 // Buffer
530 if (!hasImage && !hasSampler)
531 {
532 const BufferSp &input = m_shaderSpec.inputs[inputNdx].getBuffer();
533 vector<uint8_t> inputBytes;
534
535 input->getBytes(inputBytes);
536
537 const size_t numBytes = inputBytes.size();
538
539 AllocationMp bufferAlloc;
540 BufferHandleUp *buffer = new BufferHandleUp(
541 createBufferAndBindMemory(m_context, vkdi, device, descType, allocator, numBytes, &bufferAlloc,
542 m_shaderSpec.usesPhysStorageBuffer, m_shaderSpec.coherentMemory));
543
544 setMemory(vkdi, device, &*bufferAlloc, numBytes, &inputBytes.front(), m_shaderSpec.coherentMemory);
545 inputBuffers.push_back(BufferHandleSp(buffer));
546 inputAllocs.push_back(de::SharedPtr<Allocation>(bufferAlloc.release()));
547 }
548 // Image
549 else if (hasImage)
550 {
551 const BufferSp &input = m_shaderSpec.inputs[inputNdx].getBuffer();
552 vector<uint8_t> inputBytes;
553
554 input->getBytes(inputBytes);
555
556 const size_t numBytes = inputBytes.size();
557
558 AllocationMp bufferAlloc;
559 BufferHandleUp *buffer =
560 new BufferHandleUp(createBufferAndBindMemory(m_context, vkdi, device, descType, allocator, numBytes,
561 &bufferAlloc, m_shaderSpec.usesPhysStorageBuffer));
562
563 AllocationMp imageAlloc;
564 ImageHandleUp *image = new ImageHandleUp(createImageAndBindMemory(
565 vkdi, device, descType, m_shaderSpec.inputFormat, allocator, queueFamilyIndex, &imageAlloc));
566
567 setMemory(vkdi, device, &*bufferAlloc, numBytes, &inputBytes.front());
568
569 inputBuffers.push_back(BufferHandleSp(buffer));
570 inputAllocs.push_back(de::SharedPtr<Allocation>(bufferAlloc.release()));
571
572 inputImages.push_back(ImageHandleSp(image));
573 inputAllocs.push_back(de::SharedPtr<Allocation>(imageAlloc.release()));
574
575 const VkImageLayout imageLayout = (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) ?
576 VK_IMAGE_LAYOUT_GENERAL :
577 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
578 const VkBufferImageCopy copyRegion = {
579 0u, // VkDeviceSize bufferOffset;
580 0u, // uint32_t bufferRowLength;
581 0u, // uint32_t bufferImageHeight;
582 {
583 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspect;
584 0u, // uint32_t mipLevel;
585 0u, // uint32_t baseArrayLayer;
586 1u, // uint32_t layerCount;
587 }, // VkImageSubresourceLayers imageSubresource;
588 {0, 0, 0}, // VkOffset3D imageOffset;
589 {8, 8, 1} // VkExtent3D imageExtent;
590 };
591 vector<VkBufferImageCopy> copyRegions;
592 copyRegions.push_back(copyRegion);
593
594 copyBufferToImage(vkdi, device, queue, queueFamilyIndex, buffer->get(), (uint32_t)numBytes, copyRegions,
595 DE_NULL, VK_IMAGE_ASPECT_COLOR_BIT, 1u, 1u, image->get(), imageLayout);
596 }
597 }
598
599 uint32_t imageNdx = 0u;
600 uint32_t bufferNdx = 0u;
601
602 for (uint32_t inputNdx = 0; inputNdx < descriptorTypes.size(); ++inputNdx)
603 {
604 const VkDescriptorType descType = descriptorTypes[inputNdx];
605
606 const bool hasImage = (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) ||
607 (descType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE) ||
608 (descType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
609
610 const bool hasSampler = (descType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE) ||
611 (descType == VK_DESCRIPTOR_TYPE_SAMPLER) ||
612 (descType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
613
614 // Create image view and sampler
615 if (hasImage)
616 {
617 const VkImageViewCreateInfo imgViewParams = {
618 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
619 DE_NULL, // const void* pNext;
620 0u, // VkImageViewCreateFlags flags;
621 **inputImages[imageNdx++], // VkImage image;
622 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
623 m_shaderSpec.inputFormat, // VkFormat format;
624 {VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B,
625 VK_COMPONENT_SWIZZLE_A}, // VkChannelMapping channels;
626 {
627 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
628 0u, // uint32_t baseMipLevel;
629 1u, // uint32_t mipLevels;
630 0u, // uint32_t baseArrayLayer;
631 1u, // uint32_t arraySize;
632 }, // VkImageSubresourceRange subresourceRange;
633 };
634
635 Move<VkImageView> imgView(createImageView(vkdi, device, &imgViewParams));
636 inputImageViews.push_back(ImageViewHandleSp(new ImageViewHandleUp(imgView)));
637 }
638
639 if (hasSampler)
640 {
641 const VkSamplerCreateInfo samplerParams = {
642 VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, // VkStructureType sType;
643 DE_NULL, // const void* pNext;
644 0, // VkSamplerCreateFlags flags;
645 VK_FILTER_NEAREST, // VkFilter magFilter:
646 VK_FILTER_NEAREST, // VkFilter minFilter;
647 VK_SAMPLER_MIPMAP_MODE_NEAREST, // VkSamplerMipmapMode mipmapMode;
648 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeU;
649 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeV;
650 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeW;
651 0.0f, // float mipLodBias;
652 VK_FALSE, // VkBool32 anistoropyEnable;
653 1.0f, // float maxAnisotropy;
654 VK_FALSE, // VkBool32 compareEnable;
655 VK_COMPARE_OP_ALWAYS, // VkCompareOp compareOp;
656 0.0f, // float minLod;
657 0.0f, // float maxLod;
658 VK_BORDER_COLOR_INT_OPAQUE_BLACK, // VkBorderColor borderColor;
659 VK_FALSE // VkBool32 unnormalizedCoordinates;
660 };
661
662 Move<VkSampler> sampler(createSampler(vkdi, device, &samplerParams));
663 inputSamplers.push_back(SamplerHandleSp(new SamplerHandleUp(sampler)));
664 }
665
666 // Create descriptor buffer and image infos
667 switch (descType)
668 {
669 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
670 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
671 {
672 const VkDescriptorBufferInfo bufInfo = {
673 **inputBuffers[bufferNdx++], // VkBuffer buffer;
674 0, // VkDeviceSize offset;
675 VK_WHOLE_SIZE, // VkDeviceSize size;
676 };
677
678 descriptorInfos.push_back(bufInfo);
679 break;
680 }
681
682 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
683 {
684 const VkDescriptorImageInfo imgInfo = {
685 DE_NULL, // VkSampler sampler;
686 **inputImageViews.back(), // VkImageView imageView;
687 VK_IMAGE_LAYOUT_GENERAL // VkImageLayout imageLayout;
688 };
689
690 descriptorImageInfos.push_back(imgInfo);
691 break;
692 }
693
694 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
695 {
696 const VkDescriptorImageInfo imgInfo = {
697 DE_NULL, // VkSampler sampler;
698 **inputImageViews.back(), // VkImageView imageView;
699 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL // VkImageLayout imageLayout;
700 };
701
702 descriptorImageInfos.push_back(imgInfo);
703 break;
704 }
705
706 case VK_DESCRIPTOR_TYPE_SAMPLER:
707 {
708 const VkDescriptorImageInfo imgInfo = {
709 **inputSamplers.back(), // VkSampler sampler;
710 DE_NULL, // VkImageView imageView;
711 VK_IMAGE_LAYOUT_GENERAL // VkImageLayout imageLayout;
712 };
713
714 descriptorImageInfos.push_back(imgInfo);
715 break;
716 }
717
718 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
719 {
720 const VkDescriptorImageInfo imgInfo = {
721 **inputSamplers.back(), // VkSampler sampler;
722 **inputImageViews.back(), // VkImageView imageView;
723 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL // VkImageLayout imageLayout;
724 };
725
726 descriptorImageInfos.push_back(imgInfo);
727 break;
728 }
729
730 default:
731 DE_FATAL("Not implemented");
732 }
733 }
734
735 for (uint32_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
736 {
737 DE_ASSERT(m_shaderSpec.outputs[outputNdx].getDescriptorType() == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
738
739 descriptorTypes.push_back(m_shaderSpec.outputs[outputNdx].getDescriptorType());
740
741 AllocationMp alloc;
742 const BufferSp &output = m_shaderSpec.outputs[outputNdx].getBuffer();
743 vector<uint8_t> outputBytes;
744
745 output->getBytes(outputBytes);
746
747 const size_t numBytes = outputBytes.size();
748 BufferHandleUp *buffer = new BufferHandleUp(
749 createBufferAndBindMemory(m_context, vkdi, device, descriptorTypes.back(), allocator, numBytes, &alloc,
750 m_shaderSpec.usesPhysStorageBuffer, m_shaderSpec.coherentMemory));
751
752 fillMemoryWithValue(vkdi, device, &*alloc, numBytes, 0xff, m_shaderSpec.coherentMemory);
753 descriptorInfos.push_back(vk::makeDescriptorBufferInfo(**buffer, 0u, numBytes));
754 outputBuffers.push_back(BufferHandleSp(buffer));
755 outputAllocs.push_back(de::SharedPtr<Allocation>(alloc.release()));
756 }
757
758 std::vector<VkDeviceAddress> gpuAddrs;
759 // Query the buffer device addresses, write them into a new buffer, and replace
760 // all the descriptors with just a desciptor to this new buffer.
761 if (m_shaderSpec.usesPhysStorageBuffer)
762 {
763 VkBufferDeviceAddressInfo info{
764 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType sType;
765 DE_NULL, // const void* pNext;
766 0, // VkBuffer buffer
767 };
768
769 for (uint32_t inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx)
770 {
771 info.buffer = **inputBuffers[inputNdx];
772 VkDeviceAddress addr = vkdi.getBufferDeviceAddress(device, &info);
773
774 gpuAddrs.push_back(addr);
775 }
776 for (uint32_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
777 {
778 info.buffer = **outputBuffers[outputNdx];
779 VkDeviceAddress addr = vkdi.getBufferDeviceAddress(device, &info);
780
781 gpuAddrs.push_back(addr);
782 }
783
784 descriptorInfos.clear();
785 descriptorTypes.clear();
786 descriptorTypes.push_back(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
787 const size_t numBytes = gpuAddrs.size() * sizeof(VkDeviceAddress);
788
789 AllocationMp bufferAlloc;
790 BufferHandleUp *buffer = new BufferHandleUp(
791 createBufferAndBindMemory(m_context, vkdi, device, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allocator, numBytes,
792 &bufferAlloc, false, m_shaderSpec.coherentMemory));
793
794 setMemory(vkdi, device, &*bufferAlloc, numBytes, &gpuAddrs.front(), m_shaderSpec.coherentMemory);
795 inputBuffers.push_back(BufferHandleSp(buffer));
796 inputAllocs.push_back(de::SharedPtr<Allocation>(bufferAlloc.release()));
797
798 descriptorInfos.push_back(vk::makeDescriptorBufferInfo(**buffer, 0u, numBytes));
799 }
800
801 // Create layouts and descriptor set.
802
803 Unique<VkDescriptorSetLayout> descriptorSetLayout(createDescriptorSetLayout(vkdi, device, descriptorTypes));
804 Unique<VkPipelineLayout> pipelineLayout(
805 createPipelineLayout(vkdi, device, *descriptorSetLayout, m_shaderSpec.pushConstants));
806 Unique<VkDescriptorPool> descriptorPool(createDescriptorPool(vkdi, device, descriptorTypes));
807 Unique<VkDescriptorSet> descriptorSet(createDescriptorSet(vkdi, device, *descriptorPool, *descriptorSetLayout,
808 descriptorTypes, descriptorInfos, descriptorImageInfos));
809
810 // Create compute shader and pipeline.
811
812 const ProgramBinary &binary = m_context.getBinaryCollection().get("compute");
813 if (m_shaderSpec.verifyBinary && !m_shaderSpec.verifyBinary(binary))
814 {
815 return tcu::TestStatus::fail("Binary verification of SPIR-V in the test failed");
816 }
817 Unique<VkShaderModule> module(createShaderModule(vkdi, device, binary, (VkShaderModuleCreateFlags)0u));
818
819 Unique<VkPipeline> computePipeline(createComputePipeline(
820 vkdi, device, *pipelineLayout, *module, m_shaderSpec.entryPoint.c_str(), m_shaderSpec.specConstants));
821
822 // Create command buffer and record commands
823
824 const tcu::IVec3 &numWorkGroups = m_shaderSpec.numWorkGroups;
825
826 beginCommandBuffer(vkdi, *cmdBuffer);
827 vkdi.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
828 vkdi.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0, 1, &descriptorSet.get(),
829 0, DE_NULL);
830 if (m_shaderSpec.pushConstants != DE_NULL)
831 {
832 vector<uint8_t> pushConstantsBytes;
833 m_shaderSpec.pushConstants->getBytes(pushConstantsBytes);
834
835 const uint32_t size = static_cast<uint32_t>(pushConstantsBytes.size());
836 const void *data = &pushConstantsBytes.front();
837
838 vkdi.cmdPushConstants(*cmdBuffer, *pipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, /* offset = */ 0,
839 /* size = */ size, data);
840 }
841 vkdi.cmdDispatch(*cmdBuffer, numWorkGroups.x(), numWorkGroups.y(), numWorkGroups.z());
842
843 // Insert a barrier so data written by the shader is available to the host
844 for (uint32_t outputBufferNdx = 0; outputBufferNdx < outputBuffers.size(); ++outputBufferNdx)
845 {
846 const VkBufferMemoryBarrier buf_barrier = {
847 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
848 DE_NULL, // const void* pNext;
849 VK_ACCESS_SHADER_WRITE_BIT, // VkAccessFlags srcAccessMask;
850 VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask;
851 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex;
852 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex;
853 **outputBuffers[outputBufferNdx], // VkBuffer buffer;
854 0, // VkDeviceSize offset;
855 VK_WHOLE_SIZE // VkDeviceSize size;
856 };
857
858 vkdi.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 0,
859 DE_NULL, 1, &buf_barrier, 0, DE_NULL);
860 }
861 endCommandBuffer(vkdi, *cmdBuffer);
862
863 submitCommandsAndWait(vkdi, device, queue, *cmdBuffer);
864 m_context.resetCommandPoolForVKSC(device, *cmdPool);
865
866 // Invalidate output memory ranges before checking on host.
867 for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
868 {
869 invalidateMemory(vkdi, device, outputAllocs[outputNdx].get(), m_shaderSpec.coherentMemory);
870 }
871
872 // Check output.
873 if (m_shaderSpec.verifyIO)
874 {
875 if (!(*m_shaderSpec.verifyIO)(m_shaderSpec.inputs, outputAllocs, m_shaderSpec.outputs,
876 m_context.getTestContext().getLog()))
877 return tcu::TestStatus(m_shaderSpec.failResult, m_shaderSpec.failMessage);
878 }
879 else
880 {
881 for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
882 {
883 const BufferSp &expectedOutput = m_shaderSpec.outputs[outputNdx].getBuffer();
884 vector<uint8_t> expectedBytes;
885
886 expectedOutput->getBytes(expectedBytes);
887
888 if (deMemCmp(&expectedBytes.front(), outputAllocs[outputNdx]->getHostPtr(), expectedBytes.size()))
889 {
890 const size_t errorsMax = 16u;
891 const uint8_t *ptrHost = static_cast<uint8_t *>(outputAllocs[outputNdx]->getHostPtr());
892 const uint8_t *ptrExpected = static_cast<uint8_t *>(&expectedBytes.front());
893 size_t errors = 0u;
894 size_t ndx = 0u;
895
896 for (; ndx < expectedBytes.size(); ++ndx)
897 {
898 if (ptrHost[ndx] != ptrExpected[ndx])
899 break;
900 }
901
902 for (; ndx < expectedBytes.size(); ++ndx)
903 {
904 if (ptrHost[ndx] != ptrExpected[ndx])
905 {
906 m_context.getTestContext().getLog()
907 << tcu::TestLog::Message << "OutputBuffer:" << outputNdx
908 << " got:" << ((uint32_t)ptrHost[ndx]) << " expected:" << ((uint32_t)ptrExpected[ndx])
909 << " at byte " << ndx << tcu::TestLog::EndMessage;
910 errors++;
911
912 if (errors >= errorsMax)
913 {
914 m_context.getTestContext().getLog()
915 << tcu::TestLog::Message << "Maximum error count reached (" << errors
916 << "). Stop output." << tcu::TestLog::EndMessage;
917 break;
918 }
919 }
920 }
921
922 return tcu::TestStatus(m_shaderSpec.failResult, m_shaderSpec.failMessage);
923 }
924 }
925 }
926
927 return tcu::TestStatus::pass("Output match with expected");
928 }
929
930 } // namespace SpirVAssembly
931 } // namespace vkt
932