1 /*-------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 Google LLC
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Test Case Skeleton Based on Compute Shaders
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktSpvAsmComputeShaderCase.hpp"
25 
26 #include "deSharedPtr.hpp"
27 #include "deSTLUtil.hpp"
28 
29 #include "vktSpvAsmUtils.hpp"
30 
31 #include "vkBuilderUtil.hpp"
32 #include "vkMemUtil.hpp"
33 #include "vkPlatform.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vkQueryUtil.hpp"
36 #include "vkTypeUtil.hpp"
37 #include "vkCmdUtil.hpp"
38 #include "vkImageUtil.hpp"
39 
40 #include <cassert>
41 
42 namespace
43 {
44 
45 using namespace vk;
46 using std::vector;
47 
48 typedef vkt::SpirVAssembly::AllocationMp AllocationMp;
49 typedef vkt::SpirVAssembly::AllocationSp AllocationSp;
50 typedef vk::Unique<VkBuffer> BufferHandleUp;
51 typedef vk::Unique<VkImage> ImageHandleUp;
52 typedef vk::Unique<VkImageView> ImageViewHandleUp;
53 typedef vk::Unique<VkSampler> SamplerHandleUp;
54 typedef de::SharedPtr<BufferHandleUp> BufferHandleSp;
55 typedef de::SharedPtr<ImageHandleUp> ImageHandleSp;
56 typedef de::SharedPtr<ImageViewHandleUp> ImageViewHandleSp;
57 typedef de::SharedPtr<SamplerHandleUp> SamplerHandleSp;
58 
59 /*--------------------------------------------------------------------*//*!
60  * \brief Create a buffer, allocate and bind memory for the buffer
61  *
62  * The memory is created as host visible and passed back as a vk::Allocation
63  * instance via outMemory.
64  *//*--------------------------------------------------------------------*/
createBufferAndBindMemory(vkt::Context & context,const DeviceInterface & vkdi,const VkDevice & device,VkDescriptorType dtype,Allocator & allocator,size_t numBytes,AllocationMp * outMemory,bool physStorageBuffer,bool coherent=false)65 Move<VkBuffer> createBufferAndBindMemory(vkt::Context &context, const DeviceInterface &vkdi, const VkDevice &device,
66                                          VkDescriptorType dtype, Allocator &allocator, size_t numBytes,
67                                          AllocationMp *outMemory, bool physStorageBuffer, bool coherent = false)
68 {
69     VkBufferUsageFlags usageFlags = (VkBufferUsageFlags)0u;
70 
71     if (physStorageBuffer)
72         usageFlags |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
73 
74     switch (dtype)
75     {
76     case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
77         usageFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
78         break;
79     case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
80         usageFlags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
81         break;
82     case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
83         usageFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
84         break;
85     case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
86         usageFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
87         break;
88     case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
89         usageFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
90         break;
91     default:
92         DE_FATAL("Not implemented");
93     }
94 
95     const VkBufferCreateInfo bufferCreateInfo = {
96         VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // sType
97         DE_NULL,                              // pNext
98         0u,                                   // flags
99         numBytes,                             // size
100         usageFlags,                           // usage
101         VK_SHARING_MODE_EXCLUSIVE,            // sharingMode
102         0u,                                   // queueFamilyCount
103         DE_NULL,                              // pQueueFamilyIndices
104     };
105 
106     Move<VkBuffer> buffer(createBuffer(vkdi, device, &bufferCreateInfo));
107     const VkMemoryRequirements requirements = getBufferMemoryRequirements(vkdi, device, *buffer);
108     AllocationMp bufferMemory               = allocator.allocate(
109         requirements, (coherent ? MemoryRequirement::Coherent : MemoryRequirement::Any) |
110                           (context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address") && physStorageBuffer ?
111                                              MemoryRequirement::DeviceAddress :
112                                              MemoryRequirement::Any) |
113                           MemoryRequirement::HostVisible);
114 
115     VK_CHECK(vkdi.bindBufferMemory(device, *buffer, bufferMemory->getMemory(), bufferMemory->getOffset()));
116     *outMemory = bufferMemory;
117 
118     return buffer;
119 }
120 
121 /*--------------------------------------------------------------------*//*!
122  * \brief Create image, allocate and bind memory for the image
123  *
124  *//*--------------------------------------------------------------------*/
createImageAndBindMemory(const DeviceInterface & vkdi,const VkDevice & device,VkDescriptorType dtype,vk::VkFormat imageFormat,Allocator & allocator,uint32_t queueFamilyIndex,AllocationMp * outMemory)125 Move<VkImage> createImageAndBindMemory(const DeviceInterface &vkdi, const VkDevice &device, VkDescriptorType dtype,
126                                        vk::VkFormat imageFormat, Allocator &allocator, uint32_t queueFamilyIndex,
127                                        AllocationMp *outMemory)
128 {
129     VkImageUsageFlags usageBits = (VkImageUsageFlags)0;
130 
131     switch (dtype)
132     {
133     case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
134         usageBits = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
135         break;
136     case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
137         usageBits = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
138         break;
139     case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
140         usageBits = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
141         break;
142     default:
143         DE_FATAL("Not implemented");
144     }
145 
146     const VkImageCreateInfo resourceImageParams = {
147         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
148         DE_NULL,                             // const void* pNext;
149         0u,                                  // VkImageCreateFlags flags;
150         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
151         imageFormat,                         // VkFormat format;
152         {8, 8, 1},                           //  VkExtent3D extent;
153         1u,                                  // uint32_t mipLevels;
154         1u,                                  // uint32_t arraySize;
155         VK_SAMPLE_COUNT_1_BIT,               // uint32_t samples;
156         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
157         usageBits,                           //  VkImageUsageFlags usage;
158         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
159         1u,                                  // uint32_t queueFamilyCount;
160         &queueFamilyIndex,                   // const uint32_t* pQueueFamilyIndices;
161         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
162     };
163 
164     // Create image
165     Move<VkImage> image                     = createImage(vkdi, device, &resourceImageParams);
166     const VkMemoryRequirements requirements = getImageMemoryRequirements(vkdi, device, *image);
167     de::MovePtr<Allocation> imageMemory     = allocator.allocate(requirements, MemoryRequirement::Any);
168 
169     VK_CHECK(vkdi.bindImageMemory(device, *image, imageMemory->getMemory(), imageMemory->getOffset()));
170     *outMemory = imageMemory;
171 
172     return image;
173 }
174 
setMemory(const DeviceInterface & vkdi,const VkDevice & device,Allocation * destAlloc,size_t numBytes,const void * data,bool coherent=false)175 void setMemory(const DeviceInterface &vkdi, const VkDevice &device, Allocation *destAlloc, size_t numBytes,
176                const void *data, bool coherent = false)
177 {
178     void *const hostPtr = destAlloc->getHostPtr();
179 
180     deMemcpy((uint8_t *)hostPtr, data, numBytes);
181 
182     if (!coherent)
183         flushAlloc(vkdi, device, *destAlloc);
184 }
185 
fillMemoryWithValue(const DeviceInterface & vkdi,const VkDevice & device,Allocation * destAlloc,size_t numBytes,uint8_t value,bool coherent=false)186 void fillMemoryWithValue(const DeviceInterface &vkdi, const VkDevice &device, Allocation *destAlloc, size_t numBytes,
187                          uint8_t value, bool coherent = false)
188 {
189     void *const hostPtr = destAlloc->getHostPtr();
190 
191     deMemset((uint8_t *)hostPtr, value, numBytes);
192 
193     if (!coherent)
194         flushAlloc(vkdi, device, *destAlloc);
195 }
196 
invalidateMemory(const DeviceInterface & vkdi,const VkDevice & device,Allocation * srcAlloc,bool coherent=false)197 void invalidateMemory(const DeviceInterface &vkdi, const VkDevice &device, Allocation *srcAlloc, bool coherent = false)
198 {
199     if (!coherent)
200         invalidateAlloc(vkdi, device, *srcAlloc);
201 }
202 
203 /*--------------------------------------------------------------------*//*!
204  * \brief Create a descriptor set layout with the given descriptor types
205  *
206  * All descriptors are created for compute pipeline.
207  *//*--------------------------------------------------------------------*/
createDescriptorSetLayout(const DeviceInterface & vkdi,const VkDevice & device,const vector<VkDescriptorType> & dtypes)208 Move<VkDescriptorSetLayout> createDescriptorSetLayout(const DeviceInterface &vkdi, const VkDevice &device,
209                                                       const vector<VkDescriptorType> &dtypes)
210 {
211     DescriptorSetLayoutBuilder builder;
212 
213     for (size_t bindingNdx = 0; bindingNdx < dtypes.size(); ++bindingNdx)
214         builder.addSingleBinding(dtypes[bindingNdx], VK_SHADER_STAGE_COMPUTE_BIT);
215 
216     return builder.build(vkdi, device);
217 }
218 
219 /*--------------------------------------------------------------------*//*!
220  * \brief Create a pipeline layout with one descriptor set
221  *//*--------------------------------------------------------------------*/
createPipelineLayout(const DeviceInterface & vkdi,const VkDevice & device,VkDescriptorSetLayout descriptorSetLayout,const vkt::SpirVAssembly::BufferSp & pushConstants)222 Move<VkPipelineLayout> createPipelineLayout(const DeviceInterface &vkdi, const VkDevice &device,
223                                             VkDescriptorSetLayout descriptorSetLayout,
224                                             const vkt::SpirVAssembly::BufferSp &pushConstants)
225 {
226     VkPipelineLayoutCreateInfo createInfo = {
227         VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // sType
228         DE_NULL,                                       // pNext
229         (VkPipelineLayoutCreateFlags)0,
230         1u,                   // descriptorSetCount
231         &descriptorSetLayout, // pSetLayouts
232         0u,                   // pushConstantRangeCount
233         DE_NULL,              // pPushConstantRanges
234     };
235 
236     VkPushConstantRange range = {
237         VK_SHADER_STAGE_COMPUTE_BIT, // stageFlags
238         0,                           // offset
239         0,                           // size
240     };
241 
242     if (pushConstants != DE_NULL)
243     {
244         vector<uint8_t> pushConstantsBytes;
245         pushConstants->getBytes(pushConstantsBytes);
246 
247         range.size                        = static_cast<uint32_t>(pushConstantsBytes.size());
248         createInfo.pushConstantRangeCount = 1;
249         createInfo.pPushConstantRanges    = &range;
250     }
251 
252     return createPipelineLayout(vkdi, device, &createInfo);
253 }
254 
255 /*--------------------------------------------------------------------*//*!
256  * \brief Create a one-time descriptor pool for one descriptor set that
257  * support the given descriptor types.
258  *//*--------------------------------------------------------------------*/
createDescriptorPool(const DeviceInterface & vkdi,const VkDevice & device,const vector<VkDescriptorType> & dtypes)259 inline Move<VkDescriptorPool> createDescriptorPool(const DeviceInterface &vkdi, const VkDevice &device,
260                                                    const vector<VkDescriptorType> &dtypes)
261 {
262     DescriptorPoolBuilder builder;
263 
264     for (size_t typeNdx = 0; typeNdx < dtypes.size(); ++typeNdx)
265         builder.addType(dtypes[typeNdx], 1);
266 
267     return builder.build(vkdi, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, /* maxSets = */ 1);
268 }
269 
270 /*--------------------------------------------------------------------*//*!
271  * \brief Create a descriptor set
272  *
273  * The descriptor set's layout contains the given descriptor types,
274  * sequentially binded to binding points starting from 0.
275  *//*--------------------------------------------------------------------*/
createDescriptorSet(const DeviceInterface & vkdi,const VkDevice & device,VkDescriptorPool pool,VkDescriptorSetLayout layout,const vector<VkDescriptorType> & dtypes,const vector<VkDescriptorBufferInfo> & descriptorInfos,const vector<VkDescriptorImageInfo> & descriptorImageInfos)276 Move<VkDescriptorSet> createDescriptorSet(const DeviceInterface &vkdi, const VkDevice &device, VkDescriptorPool pool,
277                                           VkDescriptorSetLayout layout, const vector<VkDescriptorType> &dtypes,
278                                           const vector<VkDescriptorBufferInfo> &descriptorInfos,
279                                           const vector<VkDescriptorImageInfo> &descriptorImageInfos)
280 {
281     DE_ASSERT(dtypes.size() == descriptorInfos.size() + descriptorImageInfos.size());
282 
283     const VkDescriptorSetAllocateInfo allocInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, DE_NULL, pool, 1u,
284                                                    &layout};
285 
286     Move<VkDescriptorSet> descriptorSet = allocateDescriptorSet(vkdi, device, &allocInfo);
287     DescriptorSetUpdateBuilder builder;
288 
289     uint32_t bufferNdx = 0u;
290     uint32_t imageNdx  = 0u;
291 
292     for (uint32_t descriptorNdx = 0; descriptorNdx < dtypes.size(); ++descriptorNdx)
293     {
294         switch (dtypes[descriptorNdx])
295         {
296         // Write buffer descriptor
297         case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
298         case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
299             builder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(descriptorNdx),
300                                 dtypes[descriptorNdx], &descriptorInfos[bufferNdx++]);
301             break;
302 
303         // Write image/sampler descriptor
304         case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
305         case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
306         case VK_DESCRIPTOR_TYPE_SAMPLER:
307         case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
308             builder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(descriptorNdx),
309                                 dtypes[descriptorNdx], &descriptorImageInfos[imageNdx++]);
310             break;
311 
312         default:
313             DE_FATAL("Not implemented");
314         }
315     }
316     builder.update(vkdi, device);
317 
318     return descriptorSet;
319 }
320 
321 /*--------------------------------------------------------------------*//*!
322  * \brief Create a compute pipeline based on the given shader
323  *//*--------------------------------------------------------------------*/
createComputePipeline(const DeviceInterface & vkdi,const VkDevice & device,VkPipelineLayout pipelineLayout,VkShaderModule shader,const char * entryPoint,const vkt::SpirVAssembly::SpecConstants & specConstants)324 Move<VkPipeline> createComputePipeline(const DeviceInterface &vkdi, const VkDevice &device,
325                                        VkPipelineLayout pipelineLayout, VkShaderModule shader, const char *entryPoint,
326                                        const vkt::SpirVAssembly::SpecConstants &specConstants)
327 {
328     const uint32_t numSpecConstants = (uint32_t)specConstants.getValuesCount();
329     vector<VkSpecializationMapEntry> entries;
330     VkSpecializationInfo specInfo;
331     size_t offset = 0;
332 
333     if (numSpecConstants != 0)
334     {
335         entries.resize(numSpecConstants);
336 
337         for (uint32_t ndx = 0; ndx < numSpecConstants; ++ndx)
338         {
339             const size_t valueSize = specConstants.getValueSize(ndx);
340 
341             entries[ndx].constantID = ndx;
342             entries[ndx].offset     = static_cast<uint32_t>(offset);
343             entries[ndx].size       = valueSize;
344 
345             offset += valueSize;
346         }
347 
348         specInfo.mapEntryCount = numSpecConstants;
349         specInfo.pMapEntries   = &entries[0];
350         specInfo.dataSize      = offset;
351         specInfo.pData         = specConstants.getValuesBuffer();
352     }
353 
354     const VkPipelineShaderStageCreateInfo pipelineShaderStageCreateInfo = {
355         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // sType
356         DE_NULL,                                             // pNext
357         (VkPipelineShaderStageCreateFlags)0,                 // flags
358         VK_SHADER_STAGE_COMPUTE_BIT,                         // stage
359         shader,                                              // module
360         entryPoint,                                          // pName
361         (numSpecConstants == 0) ? DE_NULL : &specInfo,       // pSpecializationInfo
362     };
363     const VkComputePipelineCreateInfo pipelineCreateInfo = {
364         VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // sType
365         DE_NULL,                                        // pNext
366         (VkPipelineCreateFlags)0,
367         pipelineShaderStageCreateInfo, // cs
368         pipelineLayout,                // layout
369         (VkPipeline)0,                 // basePipelineHandle
370         0u,                            // basePipelineIndex
371     };
372 
373     return createComputePipeline(vkdi, device, (VkPipelineCache)0u, &pipelineCreateInfo);
374 }
375 
376 } // namespace
377 
378 namespace vkt
379 {
380 namespace SpirVAssembly
381 {
382 
383 // ComputeShaderTestCase implementations
384 
SpvAsmComputeShaderCase(tcu::TestContext & testCtx,const char * name,const ComputeShaderSpec & spec)385 SpvAsmComputeShaderCase::SpvAsmComputeShaderCase(tcu::TestContext &testCtx, const char *name,
386                                                  const ComputeShaderSpec &spec)
387     : TestCase(testCtx, name)
388     , m_shaderSpec(spec)
389 {
390 }
391 
checkSupport(Context & context) const392 void SpvAsmComputeShaderCase::checkSupport(Context &context) const
393 {
394     if (getMinRequiredVulkanVersion(m_shaderSpec.spirvVersion) > context.getUsedApiVersion())
395     {
396         TCU_THROW(NotSupportedError, std::string("Vulkan higher than or equal to " +
397                                                  getVulkanName(getMinRequiredVulkanVersion(m_shaderSpec.spirvVersion)) +
398                                                  " is required for this test to run")
399                                          .c_str());
400     }
401 
402     // Check all required extensions are supported
403     for (const auto &ext : m_shaderSpec.extensions)
404         context.requireDeviceFunctionality(ext);
405 
406     // Core features
407     // Check that we're not skipping tests needlessly based on things that don't affect compute.
408     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.fullDrawIndexUint32 == false);
409     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.independentBlend == false);
410     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.geometryShader == false);
411     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.tessellationShader == false);
412     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.sampleRateShading == false);
413     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.dualSrcBlend == false);
414     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.logicOp == false);
415     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.multiDrawIndirect == false);
416     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.drawIndirectFirstInstance == false);
417     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.depthClamp == false);
418     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.depthBiasClamp == false);
419     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.fillModeNonSolid == false);
420     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.depthBounds == false);
421     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.wideLines == false);
422     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.largePoints == false);
423     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.alphaToOne == false);
424     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.multiViewport == false);
425     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.occlusionQueryPrecise == false);
426     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.vertexPipelineStoresAndAtomics == false);
427     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.fragmentStoresAndAtomics == false);
428     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.shaderTessellationAndGeometryPointSize == false);
429     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.shaderClipDistance == false);
430     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.shaderCullDistance == false);
431     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.sparseBinding == false);
432     assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.variableMultisampleRate == false);
433 
434     const char *unsupportedFeature = DE_NULL;
435     if (!isVulkanFeaturesSupported(context, m_shaderSpec.requestedVulkanFeatures, &unsupportedFeature))
436         TCU_THROW(NotSupportedError,
437                   std::string("At least following requested feature is not supported: ") + unsupportedFeature);
438 
439     // Extension features
440     if (m_shaderSpec.usesPhysStorageBuffer && !context.isBufferDeviceAddressSupported())
441         TCU_THROW(NotSupportedError, "Request physical storage buffer feature not supported");
442 }
443 
initPrograms(SourceCollections & programCollection) const444 void SpvAsmComputeShaderCase::initPrograms(SourceCollections &programCollection) const
445 {
446     const auto &extensions  = m_shaderSpec.extensions;
447     const bool allowSpirv14 = (std::find(extensions.begin(), extensions.end(), "VK_KHR_spirv_1_4") != extensions.end());
448     const bool allowMaintenance4 =
449         (std::find(extensions.begin(), extensions.end(), "VK_KHR_maintenance4") != extensions.end());
450 
451     programCollection.spirvAsmSources.add("compute")
452         << m_shaderSpec.assembly.c_str()
453         << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, m_shaderSpec.spirvVersion, allowSpirv14,
454                                 allowMaintenance4);
455 }
456 
createInstance(Context & ctx) const457 TestInstance *SpvAsmComputeShaderCase::createInstance(Context &ctx) const
458 {
459     return new SpvAsmComputeShaderInstance(ctx, m_shaderSpec);
460 }
461 
462 // ComputeShaderTestInstance implementations
463 
SpvAsmComputeShaderInstance(Context & ctx,const ComputeShaderSpec & spec)464 SpvAsmComputeShaderInstance::SpvAsmComputeShaderInstance(Context &ctx, const ComputeShaderSpec &spec)
465     : TestInstance(ctx)
466     , m_shaderSpec(spec)
467 {
468 }
469 
getMatchingComputeImageUsageFlags(VkDescriptorType dType)470 VkImageUsageFlags getMatchingComputeImageUsageFlags(VkDescriptorType dType)
471 {
472     switch (dType)
473     {
474     case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
475         return VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
476     case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
477         return VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
478     case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
479         return VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
480     default:
481         DE_FATAL("Not implemented");
482     }
483     return (VkImageUsageFlags)0;
484 }
485 
iterate(void)486 tcu::TestStatus SpvAsmComputeShaderInstance::iterate(void)
487 {
488     const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
489     const VkDevice &device          = m_context.getDevice();
490     const DeviceInterface &vkdi     = m_context.getDeviceInterface();
491     Allocator &allocator            = m_context.getDefaultAllocator();
492     const VkQueue queue             = m_context.getUniversalQueue();
493 
494     vector<AllocationSp> inputAllocs;
495     vector<AllocationSp> outputAllocs;
496     vector<BufferHandleSp> inputBuffers;
497     vector<ImageHandleSp> inputImages;
498     vector<ImageViewHandleSp> inputImageViews;
499     vector<SamplerHandleSp> inputSamplers;
500     vector<BufferHandleSp> outputBuffers;
501     vector<VkDescriptorBufferInfo> descriptorInfos;
502     vector<VkDescriptorImageInfo> descriptorImageInfos;
503     vector<VkDescriptorType> descriptorTypes;
504 
505     DE_ASSERT(!m_shaderSpec.outputs.empty());
506 
507     // Create command pool and command buffer
508 
509     const Unique<VkCommandPool> cmdPool(
510         createCommandPool(vkdi, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
511     Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vkdi, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
512 
513     // Create buffer and image objects, allocate storage, and create view for all input/output buffers and images.
514 
515     for (uint32_t inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx)
516     {
517         const VkDescriptorType descType = m_shaderSpec.inputs[inputNdx].getDescriptorType();
518 
519         const bool hasImage = (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) ||
520                               (descType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE) ||
521                               (descType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
522 
523         const bool hasSampler = (descType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE) ||
524                                 (descType == VK_DESCRIPTOR_TYPE_SAMPLER) ||
525                                 (descType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
526 
527         descriptorTypes.push_back(descType);
528 
529         // Buffer
530         if (!hasImage && !hasSampler)
531         {
532             const BufferSp &input = m_shaderSpec.inputs[inputNdx].getBuffer();
533             vector<uint8_t> inputBytes;
534 
535             input->getBytes(inputBytes);
536 
537             const size_t numBytes = inputBytes.size();
538 
539             AllocationMp bufferAlloc;
540             BufferHandleUp *buffer = new BufferHandleUp(
541                 createBufferAndBindMemory(m_context, vkdi, device, descType, allocator, numBytes, &bufferAlloc,
542                                           m_shaderSpec.usesPhysStorageBuffer, m_shaderSpec.coherentMemory));
543 
544             setMemory(vkdi, device, &*bufferAlloc, numBytes, &inputBytes.front(), m_shaderSpec.coherentMemory);
545             inputBuffers.push_back(BufferHandleSp(buffer));
546             inputAllocs.push_back(de::SharedPtr<Allocation>(bufferAlloc.release()));
547         }
548         // Image
549         else if (hasImage)
550         {
551             const BufferSp &input = m_shaderSpec.inputs[inputNdx].getBuffer();
552             vector<uint8_t> inputBytes;
553 
554             input->getBytes(inputBytes);
555 
556             const size_t numBytes = inputBytes.size();
557 
558             AllocationMp bufferAlloc;
559             BufferHandleUp *buffer =
560                 new BufferHandleUp(createBufferAndBindMemory(m_context, vkdi, device, descType, allocator, numBytes,
561                                                              &bufferAlloc, m_shaderSpec.usesPhysStorageBuffer));
562 
563             AllocationMp imageAlloc;
564             ImageHandleUp *image = new ImageHandleUp(createImageAndBindMemory(
565                 vkdi, device, descType, m_shaderSpec.inputFormat, allocator, queueFamilyIndex, &imageAlloc));
566 
567             setMemory(vkdi, device, &*bufferAlloc, numBytes, &inputBytes.front());
568 
569             inputBuffers.push_back(BufferHandleSp(buffer));
570             inputAllocs.push_back(de::SharedPtr<Allocation>(bufferAlloc.release()));
571 
572             inputImages.push_back(ImageHandleSp(image));
573             inputAllocs.push_back(de::SharedPtr<Allocation>(imageAlloc.release()));
574 
575             const VkImageLayout imageLayout    = (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) ?
576                                                      VK_IMAGE_LAYOUT_GENERAL :
577                                                      VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
578             const VkBufferImageCopy copyRegion = {
579                 0u, // VkDeviceSize bufferOffset;
580                 0u, // uint32_t bufferRowLength;
581                 0u, // uint32_t bufferImageHeight;
582                 {
583                     VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspect;
584                     0u,                        // uint32_t mipLevel;
585                     0u,                        // uint32_t baseArrayLayer;
586                     1u,                        // uint32_t layerCount;
587                 },                             // VkImageSubresourceLayers imageSubresource;
588                 {0, 0, 0},                     // VkOffset3D imageOffset;
589                 {8, 8, 1}                      // VkExtent3D imageExtent;
590             };
591             vector<VkBufferImageCopy> copyRegions;
592             copyRegions.push_back(copyRegion);
593 
594             copyBufferToImage(vkdi, device, queue, queueFamilyIndex, buffer->get(), (uint32_t)numBytes, copyRegions,
595                               DE_NULL, VK_IMAGE_ASPECT_COLOR_BIT, 1u, 1u, image->get(), imageLayout);
596         }
597     }
598 
599     uint32_t imageNdx  = 0u;
600     uint32_t bufferNdx = 0u;
601 
602     for (uint32_t inputNdx = 0; inputNdx < descriptorTypes.size(); ++inputNdx)
603     {
604         const VkDescriptorType descType = descriptorTypes[inputNdx];
605 
606         const bool hasImage = (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) ||
607                               (descType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE) ||
608                               (descType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
609 
610         const bool hasSampler = (descType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE) ||
611                                 (descType == VK_DESCRIPTOR_TYPE_SAMPLER) ||
612                                 (descType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
613 
614         // Create image view and sampler
615         if (hasImage)
616         {
617             const VkImageViewCreateInfo imgViewParams = {
618                 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
619                 DE_NULL,                                  // const void* pNext;
620                 0u,                                       // VkImageViewCreateFlags flags;
621                 **inputImages[imageNdx++],                // VkImage image;
622                 VK_IMAGE_VIEW_TYPE_2D,                    // VkImageViewType viewType;
623                 m_shaderSpec.inputFormat,                 // VkFormat format;
624                 {VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B,
625                  VK_COMPONENT_SWIZZLE_A}, // VkChannelMapping channels;
626                 {
627                     VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
628                     0u,                        // uint32_t baseMipLevel;
629                     1u,                        // uint32_t mipLevels;
630                     0u,                        // uint32_t baseArrayLayer;
631                     1u,                        // uint32_t arraySize;
632                 },                             // VkImageSubresourceRange subresourceRange;
633             };
634 
635             Move<VkImageView> imgView(createImageView(vkdi, device, &imgViewParams));
636             inputImageViews.push_back(ImageViewHandleSp(new ImageViewHandleUp(imgView)));
637         }
638 
639         if (hasSampler)
640         {
641             const VkSamplerCreateInfo samplerParams = {
642                 VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, // VkStructureType sType;
643                 DE_NULL,                               // const void* pNext;
644                 0,                                     // VkSamplerCreateFlags flags;
645                 VK_FILTER_NEAREST,                     // VkFilter                    magFilter:
646                 VK_FILTER_NEAREST,                     // VkFilter minFilter;
647                 VK_SAMPLER_MIPMAP_MODE_NEAREST,        // VkSamplerMipmapMode mipmapMode;
648                 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeU;
649                 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeV;
650                 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeW;
651                 0.0f,                                  // float mipLodBias;
652                 VK_FALSE,                              // VkBool32 anistoropyEnable;
653                 1.0f,                                  // float maxAnisotropy;
654                 VK_FALSE,                              // VkBool32 compareEnable;
655                 VK_COMPARE_OP_ALWAYS,                  // VkCompareOp compareOp;
656                 0.0f,                                  // float minLod;
657                 0.0f,                                  // float maxLod;
658                 VK_BORDER_COLOR_INT_OPAQUE_BLACK,      // VkBorderColor borderColor;
659                 VK_FALSE                               // VkBool32 unnormalizedCoordinates;
660             };
661 
662             Move<VkSampler> sampler(createSampler(vkdi, device, &samplerParams));
663             inputSamplers.push_back(SamplerHandleSp(new SamplerHandleUp(sampler)));
664         }
665 
666         // Create descriptor buffer and image infos
667         switch (descType)
668         {
669         case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
670         case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
671         {
672             const VkDescriptorBufferInfo bufInfo = {
673                 **inputBuffers[bufferNdx++], // VkBuffer buffer;
674                 0,                           // VkDeviceSize offset;
675                 VK_WHOLE_SIZE,               // VkDeviceSize size;
676             };
677 
678             descriptorInfos.push_back(bufInfo);
679             break;
680         }
681 
682         case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
683         {
684             const VkDescriptorImageInfo imgInfo = {
685                 DE_NULL,                  // VkSampler sampler;
686                 **inputImageViews.back(), // VkImageView imageView;
687                 VK_IMAGE_LAYOUT_GENERAL   // VkImageLayout imageLayout;
688             };
689 
690             descriptorImageInfos.push_back(imgInfo);
691             break;
692         }
693 
694         case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
695         {
696             const VkDescriptorImageInfo imgInfo = {
697                 DE_NULL,                                 // VkSampler sampler;
698                 **inputImageViews.back(),                // VkImageView imageView;
699                 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL // VkImageLayout imageLayout;
700             };
701 
702             descriptorImageInfos.push_back(imgInfo);
703             break;
704         }
705 
706         case VK_DESCRIPTOR_TYPE_SAMPLER:
707         {
708             const VkDescriptorImageInfo imgInfo = {
709                 **inputSamplers.back(), // VkSampler sampler;
710                 DE_NULL,                // VkImageView imageView;
711                 VK_IMAGE_LAYOUT_GENERAL // VkImageLayout imageLayout;
712             };
713 
714             descriptorImageInfos.push_back(imgInfo);
715             break;
716         }
717 
718         case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
719         {
720             const VkDescriptorImageInfo imgInfo = {
721                 **inputSamplers.back(),                  // VkSampler sampler;
722                 **inputImageViews.back(),                // VkImageView imageView;
723                 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL // VkImageLayout imageLayout;
724             };
725 
726             descriptorImageInfos.push_back(imgInfo);
727             break;
728         }
729 
730         default:
731             DE_FATAL("Not implemented");
732         }
733     }
734 
735     for (uint32_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
736     {
737         DE_ASSERT(m_shaderSpec.outputs[outputNdx].getDescriptorType() == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
738 
739         descriptorTypes.push_back(m_shaderSpec.outputs[outputNdx].getDescriptorType());
740 
741         AllocationMp alloc;
742         const BufferSp &output = m_shaderSpec.outputs[outputNdx].getBuffer();
743         vector<uint8_t> outputBytes;
744 
745         output->getBytes(outputBytes);
746 
747         const size_t numBytes  = outputBytes.size();
748         BufferHandleUp *buffer = new BufferHandleUp(
749             createBufferAndBindMemory(m_context, vkdi, device, descriptorTypes.back(), allocator, numBytes, &alloc,
750                                       m_shaderSpec.usesPhysStorageBuffer, m_shaderSpec.coherentMemory));
751 
752         fillMemoryWithValue(vkdi, device, &*alloc, numBytes, 0xff, m_shaderSpec.coherentMemory);
753         descriptorInfos.push_back(vk::makeDescriptorBufferInfo(**buffer, 0u, numBytes));
754         outputBuffers.push_back(BufferHandleSp(buffer));
755         outputAllocs.push_back(de::SharedPtr<Allocation>(alloc.release()));
756     }
757 
758     std::vector<VkDeviceAddress> gpuAddrs;
759     // Query the buffer device addresses, write them into a new buffer, and replace
760     // all the descriptors with just a desciptor to this new buffer.
761     if (m_shaderSpec.usesPhysStorageBuffer)
762     {
763         VkBufferDeviceAddressInfo info{
764             VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType sType;
765             DE_NULL,                                      // const void* pNext;
766             0,                                            // VkBuffer            buffer
767         };
768 
769         for (uint32_t inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx)
770         {
771             info.buffer          = **inputBuffers[inputNdx];
772             VkDeviceAddress addr = vkdi.getBufferDeviceAddress(device, &info);
773 
774             gpuAddrs.push_back(addr);
775         }
776         for (uint32_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
777         {
778             info.buffer          = **outputBuffers[outputNdx];
779             VkDeviceAddress addr = vkdi.getBufferDeviceAddress(device, &info);
780 
781             gpuAddrs.push_back(addr);
782         }
783 
784         descriptorInfos.clear();
785         descriptorTypes.clear();
786         descriptorTypes.push_back(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
787         const size_t numBytes = gpuAddrs.size() * sizeof(VkDeviceAddress);
788 
789         AllocationMp bufferAlloc;
790         BufferHandleUp *buffer = new BufferHandleUp(
791             createBufferAndBindMemory(m_context, vkdi, device, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allocator, numBytes,
792                                       &bufferAlloc, false, m_shaderSpec.coherentMemory));
793 
794         setMemory(vkdi, device, &*bufferAlloc, numBytes, &gpuAddrs.front(), m_shaderSpec.coherentMemory);
795         inputBuffers.push_back(BufferHandleSp(buffer));
796         inputAllocs.push_back(de::SharedPtr<Allocation>(bufferAlloc.release()));
797 
798         descriptorInfos.push_back(vk::makeDescriptorBufferInfo(**buffer, 0u, numBytes));
799     }
800 
801     // Create layouts and descriptor set.
802 
803     Unique<VkDescriptorSetLayout> descriptorSetLayout(createDescriptorSetLayout(vkdi, device, descriptorTypes));
804     Unique<VkPipelineLayout> pipelineLayout(
805         createPipelineLayout(vkdi, device, *descriptorSetLayout, m_shaderSpec.pushConstants));
806     Unique<VkDescriptorPool> descriptorPool(createDescriptorPool(vkdi, device, descriptorTypes));
807     Unique<VkDescriptorSet> descriptorSet(createDescriptorSet(vkdi, device, *descriptorPool, *descriptorSetLayout,
808                                                               descriptorTypes, descriptorInfos, descriptorImageInfos));
809 
810     // Create compute shader and pipeline.
811 
812     const ProgramBinary &binary = m_context.getBinaryCollection().get("compute");
813     if (m_shaderSpec.verifyBinary && !m_shaderSpec.verifyBinary(binary))
814     {
815         return tcu::TestStatus::fail("Binary verification of SPIR-V in the test failed");
816     }
817     Unique<VkShaderModule> module(createShaderModule(vkdi, device, binary, (VkShaderModuleCreateFlags)0u));
818 
819     Unique<VkPipeline> computePipeline(createComputePipeline(
820         vkdi, device, *pipelineLayout, *module, m_shaderSpec.entryPoint.c_str(), m_shaderSpec.specConstants));
821 
822     // Create command buffer and record commands
823 
824     const tcu::IVec3 &numWorkGroups = m_shaderSpec.numWorkGroups;
825 
826     beginCommandBuffer(vkdi, *cmdBuffer);
827     vkdi.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
828     vkdi.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0, 1, &descriptorSet.get(),
829                                0, DE_NULL);
830     if (m_shaderSpec.pushConstants != DE_NULL)
831     {
832         vector<uint8_t> pushConstantsBytes;
833         m_shaderSpec.pushConstants->getBytes(pushConstantsBytes);
834 
835         const uint32_t size = static_cast<uint32_t>(pushConstantsBytes.size());
836         const void *data    = &pushConstantsBytes.front();
837 
838         vkdi.cmdPushConstants(*cmdBuffer, *pipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, /* offset = */ 0,
839                               /* size = */ size, data);
840     }
841     vkdi.cmdDispatch(*cmdBuffer, numWorkGroups.x(), numWorkGroups.y(), numWorkGroups.z());
842 
843     // Insert a barrier so data written by the shader is available to the host
844     for (uint32_t outputBufferNdx = 0; outputBufferNdx < outputBuffers.size(); ++outputBufferNdx)
845     {
846         const VkBufferMemoryBarrier buf_barrier = {
847             VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, //    VkStructureType    sType;
848             DE_NULL,                                 //    const void*        pNext;
849             VK_ACCESS_SHADER_WRITE_BIT,              //    VkAccessFlags      srcAccessMask;
850             VK_ACCESS_HOST_READ_BIT,                 //    VkAccessFlags      dstAccessMask;
851             VK_QUEUE_FAMILY_IGNORED,                 //    uint32_t           srcQueueFamilyIndex;
852             VK_QUEUE_FAMILY_IGNORED,                 //    uint32_t           dstQueueFamilyIndex;
853             **outputBuffers[outputBufferNdx],        //    VkBuffer           buffer;
854             0,                                       //    VkDeviceSize       offset;
855             VK_WHOLE_SIZE                            //    VkDeviceSize       size;
856         };
857 
858         vkdi.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 0,
859                                 DE_NULL, 1, &buf_barrier, 0, DE_NULL);
860     }
861     endCommandBuffer(vkdi, *cmdBuffer);
862 
863     submitCommandsAndWait(vkdi, device, queue, *cmdBuffer);
864     m_context.resetCommandPoolForVKSC(device, *cmdPool);
865 
866     // Invalidate output memory ranges before checking on host.
867     for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
868     {
869         invalidateMemory(vkdi, device, outputAllocs[outputNdx].get(), m_shaderSpec.coherentMemory);
870     }
871 
872     // Check output.
873     if (m_shaderSpec.verifyIO)
874     {
875         if (!(*m_shaderSpec.verifyIO)(m_shaderSpec.inputs, outputAllocs, m_shaderSpec.outputs,
876                                       m_context.getTestContext().getLog()))
877             return tcu::TestStatus(m_shaderSpec.failResult, m_shaderSpec.failMessage);
878     }
879     else
880     {
881         for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
882         {
883             const BufferSp &expectedOutput = m_shaderSpec.outputs[outputNdx].getBuffer();
884             vector<uint8_t> expectedBytes;
885 
886             expectedOutput->getBytes(expectedBytes);
887 
888             if (deMemCmp(&expectedBytes.front(), outputAllocs[outputNdx]->getHostPtr(), expectedBytes.size()))
889             {
890                 const size_t errorsMax     = 16u;
891                 const uint8_t *ptrHost     = static_cast<uint8_t *>(outputAllocs[outputNdx]->getHostPtr());
892                 const uint8_t *ptrExpected = static_cast<uint8_t *>(&expectedBytes.front());
893                 size_t errors              = 0u;
894                 size_t ndx                 = 0u;
895 
896                 for (; ndx < expectedBytes.size(); ++ndx)
897                 {
898                     if (ptrHost[ndx] != ptrExpected[ndx])
899                         break;
900                 }
901 
902                 for (; ndx < expectedBytes.size(); ++ndx)
903                 {
904                     if (ptrHost[ndx] != ptrExpected[ndx])
905                     {
906                         m_context.getTestContext().getLog()
907                             << tcu::TestLog::Message << "OutputBuffer:" << outputNdx
908                             << " got:" << ((uint32_t)ptrHost[ndx]) << " expected:" << ((uint32_t)ptrExpected[ndx])
909                             << " at byte " << ndx << tcu::TestLog::EndMessage;
910                         errors++;
911 
912                         if (errors >= errorsMax)
913                         {
914                             m_context.getTestContext().getLog()
915                                 << tcu::TestLog::Message << "Maximum error count reached (" << errors
916                                 << "). Stop output." << tcu::TestLog::EndMessage;
917                             break;
918                         }
919                     }
920                 }
921 
922                 return tcu::TestStatus(m_shaderSpec.failResult, m_shaderSpec.failMessage);
923             }
924         }
925     }
926 
927     return tcu::TestStatus::pass("Output match with expected");
928 }
929 
930 } // namespace SpirVAssembly
931 } // namespace vkt
932