xref: /aosp_15_r20/external/deqp/external/vulkancts/modules/vulkan/image/vktImageAtomicOperationTests.cpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file  vktImageAtomicOperationTests.cpp
21  * \brief Image atomic operation tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktImageAtomicOperationTests.hpp"
25 #include "vktImageAtomicSpirvShaders.hpp"
26 
27 #include "deUniquePtr.hpp"
28 #include "deStringUtil.hpp"
29 #include "deSTLUtil.hpp"
30 
31 #include "vktTestCaseUtil.hpp"
32 #include "vkPrograms.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkQueryUtil.hpp"
35 #include "vkBarrierUtil.hpp"
36 #include "vktImageTestsUtil.hpp"
37 #include "vkBuilderUtil.hpp"
38 #include "vkRef.hpp"
39 #include "vkRefUtil.hpp"
40 #include "vkTypeUtil.hpp"
41 #include "vkCmdUtil.hpp"
42 #include "vkObjUtil.hpp"
43 #include "vkBufferWithMemory.hpp"
44 
45 #include "tcuTextureUtil.hpp"
46 #include "tcuTexture.hpp"
47 #include "tcuVectorType.hpp"
48 #include "tcuStringTemplate.hpp"
49 
50 namespace vkt
51 {
52 namespace image
53 {
54 namespace
55 {
56 
57 using namespace vk;
58 using namespace std;
59 using de::toString;
60 
61 using tcu::ConstPixelBufferAccess;
62 using tcu::CubeFace;
63 using tcu::IVec2;
64 using tcu::IVec3;
65 using tcu::IVec4;
66 using tcu::PixelBufferAccess;
67 using tcu::TestContext;
68 using tcu::Texture1D;
69 using tcu::Texture2D;
70 using tcu::Texture2DArray;
71 using tcu::Texture3D;
72 using tcu::TextureCube;
73 using tcu::TextureFormat;
74 using tcu::UVec3;
75 using tcu::UVec4;
76 using tcu::Vec4;
77 using tcu::Vector;
78 
79 enum
80 {
81     NUM_INVOCATIONS_PER_PIXEL = 5u
82 };
83 
84 enum AtomicOperation
85 {
86     ATOMIC_OPERATION_ADD = 0,
87     ATOMIC_OPERATION_SUB,
88     ATOMIC_OPERATION_INC,
89     ATOMIC_OPERATION_DEC,
90     ATOMIC_OPERATION_MIN,
91     ATOMIC_OPERATION_MAX,
92     ATOMIC_OPERATION_AND,
93     ATOMIC_OPERATION_OR,
94     ATOMIC_OPERATION_XOR,
95     ATOMIC_OPERATION_EXCHANGE,
96     ATOMIC_OPERATION_COMPARE_EXCHANGE,
97 
98     ATOMIC_OPERATION_LAST
99 };
100 
101 enum class ShaderReadType
102 {
103     NORMAL = 0,
104     SPARSE,
105 };
106 
107 enum class ImageBackingType
108 {
109     NORMAL = 0,
110     SPARSE,
111 };
112 
getCoordStr(const ImageType imageType,const std::string & x,const std::string & y,const std::string & z)113 static string getCoordStr(const ImageType imageType, const std::string &x, const std::string &y, const std::string &z)
114 {
115     switch (imageType)
116     {
117     case IMAGE_TYPE_1D:
118     case IMAGE_TYPE_BUFFER:
119         return x;
120     case IMAGE_TYPE_1D_ARRAY:
121     case IMAGE_TYPE_2D:
122         return string("ivec2(" + x + "," + y + ")");
123     case IMAGE_TYPE_2D_ARRAY:
124     case IMAGE_TYPE_3D:
125     case IMAGE_TYPE_CUBE:
126     case IMAGE_TYPE_CUBE_ARRAY:
127         return string("ivec3(" + x + "," + y + "," + z + ")");
128     default:
129         DE_ASSERT(false);
130         return "";
131     }
132 }
133 
getComponentTypeStr(uint32_t componentWidth,bool intFormat,bool uintFormat,bool floatFormat)134 static string getComponentTypeStr(uint32_t componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
135 {
136     DE_ASSERT(intFormat || uintFormat || floatFormat);
137 
138     const bool is64 = (componentWidth == 64);
139 
140     if (intFormat)
141         return (is64 ? "int64_t" : "int");
142     if (uintFormat)
143         return (is64 ? "uint64_t" : "uint");
144     if (floatFormat)
145         return (is64 ? "double" : "float");
146 
147     return "";
148 }
149 
getVec4TypeStr(uint32_t componentWidth,bool intFormat,bool uintFormat,bool floatFormat)150 static string getVec4TypeStr(uint32_t componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
151 {
152     DE_ASSERT(intFormat || uintFormat || floatFormat);
153 
154     const bool is64 = (componentWidth == 64);
155 
156     if (intFormat)
157         return (is64 ? "i64vec4" : "ivec4");
158     if (uintFormat)
159         return (is64 ? "u64vec4" : "uvec4");
160     if (floatFormat)
161         return (is64 ? "f64vec4" : "vec4");
162 
163     return "";
164 }
165 
getAtomicFuncArgumentShaderStr(const AtomicOperation op,const string & x,const string & y,const string & z,const IVec3 & gridSize)166 static string getAtomicFuncArgumentShaderStr(const AtomicOperation op, const string &x, const string &y,
167                                              const string &z, const IVec3 &gridSize)
168 {
169     switch (op)
170     {
171     case ATOMIC_OPERATION_ADD:
172     case ATOMIC_OPERATION_AND:
173     case ATOMIC_OPERATION_OR:
174     case ATOMIC_OPERATION_XOR:
175         return string("(" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + ")");
176     case ATOMIC_OPERATION_MIN:
177     case ATOMIC_OPERATION_MAX:
178         // multiply by (1-2*(value % 2) to make half of the data negative
179         // this will result in generating large numbers for uint formats
180         return string("((1 - 2*(" + x + " % 2)) * (" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + "))");
181     case ATOMIC_OPERATION_EXCHANGE:
182     case ATOMIC_OPERATION_COMPARE_EXCHANGE:
183         return string("((" + z + "*" + toString(gridSize.x()) + " + " + x + ")*" + toString(gridSize.y()) + " + " + y +
184                       ")");
185     default:
186         DE_ASSERT(false);
187         return "";
188     }
189 }
190 
getAtomicOperationCaseName(const AtomicOperation op)191 static string getAtomicOperationCaseName(const AtomicOperation op)
192 {
193     switch (op)
194     {
195     case ATOMIC_OPERATION_ADD:
196         return string("add");
197     case ATOMIC_OPERATION_SUB:
198         return string("sub");
199     case ATOMIC_OPERATION_INC:
200         return string("inc");
201     case ATOMIC_OPERATION_DEC:
202         return string("dec");
203     case ATOMIC_OPERATION_MIN:
204         return string("min");
205     case ATOMIC_OPERATION_MAX:
206         return string("max");
207     case ATOMIC_OPERATION_AND:
208         return string("and");
209     case ATOMIC_OPERATION_OR:
210         return string("or");
211     case ATOMIC_OPERATION_XOR:
212         return string("xor");
213     case ATOMIC_OPERATION_EXCHANGE:
214         return string("exchange");
215     case ATOMIC_OPERATION_COMPARE_EXCHANGE:
216         return string("compare_exchange");
217     default:
218         DE_ASSERT(false);
219         return "";
220     }
221 }
222 
getAtomicOperationShaderFuncName(const AtomicOperation op)223 static string getAtomicOperationShaderFuncName(const AtomicOperation op)
224 {
225     switch (op)
226     {
227     case ATOMIC_OPERATION_ADD:
228         return string("imageAtomicAdd");
229     case ATOMIC_OPERATION_MIN:
230         return string("imageAtomicMin");
231     case ATOMIC_OPERATION_MAX:
232         return string("imageAtomicMax");
233     case ATOMIC_OPERATION_AND:
234         return string("imageAtomicAnd");
235     case ATOMIC_OPERATION_OR:
236         return string("imageAtomicOr");
237     case ATOMIC_OPERATION_XOR:
238         return string("imageAtomicXor");
239     case ATOMIC_OPERATION_EXCHANGE:
240         return string("imageAtomicExchange");
241     case ATOMIC_OPERATION_COMPARE_EXCHANGE:
242         return string("imageAtomicCompSwap");
243     default:
244         DE_ASSERT(false);
245         return "";
246     }
247 }
248 
249 template <typename T>
getOperationInitialValue(const AtomicOperation op)250 T getOperationInitialValue(const AtomicOperation op)
251 {
252     switch (op)
253     {
254     // \note 18 is just an arbitrary small nonzero value.
255     case ATOMIC_OPERATION_ADD:
256         return 18;
257     case ATOMIC_OPERATION_INC:
258         return 18;
259     case ATOMIC_OPERATION_SUB:
260         return (1 << 24) - 1;
261     case ATOMIC_OPERATION_DEC:
262         return (1 << 24) - 1;
263     case ATOMIC_OPERATION_MIN:
264         return (1 << 15) - 1;
265     case ATOMIC_OPERATION_MAX:
266         return 18;
267     case ATOMIC_OPERATION_AND:
268         return (1 << 15) - 1;
269     case ATOMIC_OPERATION_OR:
270         return 18;
271     case ATOMIC_OPERATION_XOR:
272         return 18;
273     case ATOMIC_OPERATION_EXCHANGE:
274         return 18;
275     case ATOMIC_OPERATION_COMPARE_EXCHANGE:
276         return 18;
277     default:
278         DE_ASSERT(false);
279         return 0xFFFFFFFF;
280     }
281 }
282 
283 template <>
getOperationInitialValue(const AtomicOperation op)284 int64_t getOperationInitialValue<int64_t>(const AtomicOperation op)
285 {
286     switch (op)
287     {
288     // \note 0x000000BEFFFFFF18 is just an arbitrary nonzero value.
289     case ATOMIC_OPERATION_ADD:
290         return 0x000000BEFFFFFF18;
291     case ATOMIC_OPERATION_INC:
292         return 0x000000BEFFFFFF18;
293     case ATOMIC_OPERATION_SUB:
294         return (1ull << 56) - 1;
295     case ATOMIC_OPERATION_DEC:
296         return (1ull << 56) - 1;
297     case ATOMIC_OPERATION_MIN:
298         return (1ull << 47) - 1;
299     case ATOMIC_OPERATION_MAX:
300         return 0x000000BEFFFFFF18;
301     case ATOMIC_OPERATION_AND:
302         return (1ull << 47) - 1;
303     case ATOMIC_OPERATION_OR:
304         return 0x000000BEFFFFFF18;
305     case ATOMIC_OPERATION_XOR:
306         return 0x000000BEFFFFFF18;
307     case ATOMIC_OPERATION_EXCHANGE:
308         return 0x000000BEFFFFFF18;
309     case ATOMIC_OPERATION_COMPARE_EXCHANGE:
310         return 0x000000BEFFFFFF18;
311     default:
312         DE_ASSERT(false);
313         return 0xFFFFFFFFFFFFFFFF;
314     }
315 }
316 
317 template <>
getOperationInitialValue(const AtomicOperation op)318 uint64_t getOperationInitialValue<uint64_t>(const AtomicOperation op)
319 {
320     return (uint64_t)getOperationInitialValue<int64_t>(op);
321 }
322 
323 template <typename T>
getAtomicFuncArgument(const AtomicOperation op,const IVec3 & invocationID,const IVec3 & gridSize)324 static T getAtomicFuncArgument(const AtomicOperation op, const IVec3 &invocationID, const IVec3 &gridSize)
325 {
326     const T x = static_cast<T>(invocationID.x());
327     const T y = static_cast<T>(invocationID.y());
328     const T z = static_cast<T>(invocationID.z());
329 
330     switch (op)
331     {
332     // \note Fall-throughs.
333     case ATOMIC_OPERATION_ADD:
334     case ATOMIC_OPERATION_SUB:
335     case ATOMIC_OPERATION_AND:
336     case ATOMIC_OPERATION_OR:
337     case ATOMIC_OPERATION_XOR:
338         return x * x + y * y + z * z;
339     case ATOMIC_OPERATION_INC:
340     case ATOMIC_OPERATION_DEC:
341         return 1;
342     case ATOMIC_OPERATION_MIN:
343     case ATOMIC_OPERATION_MAX:
344         // multiply half of the data by -1
345         return (1 - 2 * (x % 2)) * (x * x + y * y + z * z);
346     case ATOMIC_OPERATION_EXCHANGE:
347     case ATOMIC_OPERATION_COMPARE_EXCHANGE:
348         return (z * static_cast<T>(gridSize.x()) + x) * static_cast<T>(gridSize.y()) + y;
349     default:
350         DE_ASSERT(false);
351         return -1;
352     }
353 }
354 
355 //! An order-independent operation is one for which the end result doesn't depend on the order in which the operations are carried (i.e. is both commutative and associative).
isOrderIndependentAtomicOperation(const AtomicOperation op)356 static bool isOrderIndependentAtomicOperation(const AtomicOperation op)
357 {
358     return op == ATOMIC_OPERATION_ADD || op == ATOMIC_OPERATION_SUB || op == ATOMIC_OPERATION_INC ||
359            op == ATOMIC_OPERATION_DEC || op == ATOMIC_OPERATION_MIN || op == ATOMIC_OPERATION_MAX ||
360            op == ATOMIC_OPERATION_AND || op == ATOMIC_OPERATION_OR || op == ATOMIC_OPERATION_XOR;
361 }
362 
363 //! Checks if the operation needs an SPIR-V shader.
isSpirvAtomicOperation(const AtomicOperation op)364 static bool isSpirvAtomicOperation(const AtomicOperation op)
365 {
366     return op == ATOMIC_OPERATION_SUB || op == ATOMIC_OPERATION_INC || op == ATOMIC_OPERATION_DEC;
367 }
368 
369 //! Returns the SPIR-V assembler name of the given operation.
getSpirvAtomicOpName(const AtomicOperation op)370 static std::string getSpirvAtomicOpName(const AtomicOperation op)
371 {
372     switch (op)
373     {
374     case ATOMIC_OPERATION_SUB:
375         return "OpAtomicISub";
376     case ATOMIC_OPERATION_INC:
377         return "OpAtomicIIncrement";
378     case ATOMIC_OPERATION_DEC:
379         return "OpAtomicIDecrement";
380     default:
381         break;
382     }
383 
384     DE_ASSERT(false);
385     return "";
386 }
387 
388 //! Returns true if the given SPIR-V operation does not need the last argument, compared to OpAtomicIAdd.
isSpirvAtomicNoLastArgOp(const AtomicOperation op)389 static bool isSpirvAtomicNoLastArgOp(const AtomicOperation op)
390 {
391     switch (op)
392     {
393     case ATOMIC_OPERATION_SUB:
394         return false;
395     case ATOMIC_OPERATION_INC: // fallthrough
396     case ATOMIC_OPERATION_DEC:
397         return true;
398     default:
399         break;
400     }
401 
402     DE_ASSERT(false);
403     return false;
404 }
405 
406 //! Computes the result of an atomic operation where "a" is the data operated on and "b" is the parameter to the atomic function.
407 template <typename T>
computeBinaryAtomicOperationResult(const AtomicOperation op,const T a,const T b)408 static T computeBinaryAtomicOperationResult(const AtomicOperation op, const T a, const T b)
409 {
410     switch (op)
411     {
412     case ATOMIC_OPERATION_INC: // fallthrough.
413     case ATOMIC_OPERATION_ADD:
414         return a + b;
415     case ATOMIC_OPERATION_DEC: // fallthrough.
416     case ATOMIC_OPERATION_SUB:
417         return a - b;
418     case ATOMIC_OPERATION_MIN:
419         return de::min(a, b);
420     case ATOMIC_OPERATION_MAX:
421         return de::max(a, b);
422     case ATOMIC_OPERATION_AND:
423         return a & b;
424     case ATOMIC_OPERATION_OR:
425         return a | b;
426     case ATOMIC_OPERATION_XOR:
427         return a ^ b;
428     case ATOMIC_OPERATION_EXCHANGE:
429         return b;
430     case ATOMIC_OPERATION_COMPARE_EXCHANGE:
431         return (a == (sizeof(T) == 8 ? 0xBEFFFFFF18 : 18)) ? b : a;
432     default:
433         DE_ASSERT(false);
434         return -1;
435     }
436 }
437 
getUsageFlags(bool useTransfer)438 VkImageUsageFlags getUsageFlags(bool useTransfer)
439 {
440     VkImageUsageFlags usageFlags = VK_IMAGE_USAGE_STORAGE_BIT;
441 
442     if (useTransfer)
443         usageFlags |= (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
444 
445     return usageFlags;
446 }
447 
AddFillReadShader(SourceCollections & sourceCollections,const ImageType & imageType,const tcu::TextureFormat & format,const string & componentType,const string & vec4Type)448 void AddFillReadShader(SourceCollections &sourceCollections, const ImageType &imageType,
449                        const tcu::TextureFormat &format, const string &componentType, const string &vec4Type)
450 {
451     const string imageInCoord         = getCoordStr(imageType, "gx", "gy", "gz");
452     const string shaderImageFormatStr = getShaderImageFormatQualifier(format);
453     const string shaderImageTypeStr   = getShaderImageType(format, imageType);
454     const auto componentWidth         = getFormatComponentWidth(mapTextureFormat(format), 0u);
455     const string extensions =
456         ((componentWidth == 64u) ? "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
457                                    "#extension GL_EXT_shader_image_int64 : require\n" :
458                                    "");
459 
460     const string fillShader =
461         "#version 450\n" + extensions + "precision highp " + shaderImageTypeStr +
462         ";\n"
463         "\n"
464         "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
465         "layout (" +
466         shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr +
467         " u_resultImage;\n"
468         "\n"
469         "layout(std430, binding = 1) buffer inputBuffer\n"
470         "{\n"
471         "    " +
472         componentType +
473         " data[];\n"
474         "} inBuffer;\n"
475         "\n"
476         "void main(void)\n"
477         "{\n"
478         "    int gx = int(gl_GlobalInvocationID.x);\n"
479         "    int gy = int(gl_GlobalInvocationID.y);\n"
480         "    int gz = int(gl_GlobalInvocationID.z);\n"
481         "    uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
482         "    imageStore(u_resultImage, " +
483         imageInCoord + ", " + vec4Type +
484         "(inBuffer.data[index]));\n"
485         "}\n";
486 
487     const string readShader =
488         "#version 450\n" + extensions + "precision highp " + shaderImageTypeStr +
489         ";\n"
490         "\n"
491         "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
492         "layout (" +
493         shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr +
494         " u_resultImage;\n"
495         "\n"
496         "layout(std430, binding = 1) buffer outputBuffer\n"
497         "{\n"
498         "    " +
499         componentType +
500         " data[];\n"
501         "} outBuffer;\n"
502         "\n"
503         "void main(void)\n"
504         "{\n"
505         "    int gx = int(gl_GlobalInvocationID.x);\n"
506         "    int gy = int(gl_GlobalInvocationID.y);\n"
507         "    int gz = int(gl_GlobalInvocationID.z);\n"
508         "    uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
509         "    outBuffer.data[index] = imageLoad(u_resultImage, " +
510         imageInCoord +
511         ").x;\n"
512         "}\n";
513 
514     if ((imageType != IMAGE_TYPE_1D) && (imageType != IMAGE_TYPE_1D_ARRAY) && (imageType != IMAGE_TYPE_BUFFER))
515     {
516         const string readShaderResidency =
517             "#version 450\n"
518             "#extension GL_ARB_sparse_texture2 : require\n" +
519             extensions + "precision highp " + shaderImageTypeStr +
520             ";\n"
521             "\n"
522             "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
523             "layout (" +
524             shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr +
525             " u_resultImage;\n"
526             "\n"
527             "layout(std430, binding = 1) buffer outputBuffer\n"
528             "{\n"
529             "    " +
530             componentType +
531             " data[];\n"
532             "} outBuffer;\n"
533             "\n"
534             "void main(void)\n"
535             "{\n"
536             "    int gx = int(gl_GlobalInvocationID.x);\n"
537             "    int gy = int(gl_GlobalInvocationID.y);\n"
538             "    int gz = int(gl_GlobalInvocationID.z);\n"
539             "    uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
540             "    outBuffer.data[index] = imageLoad(u_resultImage, " +
541             imageInCoord +
542             ").x;\n"
543             "    " +
544             vec4Type +
545             " sparseValue;\n"
546             "    sparseImageLoadARB(u_resultImage, " +
547             imageInCoord +
548             ", sparseValue);\n"
549             "    if (outBuffer.data[index] != sparseValue.x)\n"
550             "        outBuffer.data[index] = " +
551             vec4Type +
552             "(1234).x;\n"
553             "}\n";
554 
555         sourceCollections.glslSources.add("readShaderResidency")
556             << glu::ComputeSource(readShaderResidency.c_str())
557             << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
558     }
559 
560     sourceCollections.glslSources.add("fillShader")
561         << glu::ComputeSource(fillShader.c_str())
562         << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
563     sourceCollections.glslSources.add("readShader")
564         << glu::ComputeSource(readShader.c_str())
565         << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
566 }
567 
568 //! Prepare the initial data for the image
initDataForImage(const VkDevice device,const DeviceInterface & deviceInterface,const TextureFormat & format,const AtomicOperation operation,const tcu::UVec3 & gridSize,BufferWithMemory & buffer)569 static void initDataForImage(const VkDevice device, const DeviceInterface &deviceInterface, const TextureFormat &format,
570                              const AtomicOperation operation, const tcu::UVec3 &gridSize, BufferWithMemory &buffer)
571 {
572     Allocation &bufferAllocation = buffer.getAllocation();
573     const VkFormat imageFormat   = mapTextureFormat(format);
574     tcu::PixelBufferAccess pixelBuffer(format, gridSize.x(), gridSize.y(), gridSize.z(), bufferAllocation.getHostPtr());
575 
576     if (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)
577     {
578         const int64_t initialValue(getOperationInitialValue<int64_t>(operation));
579 
580         for (uint32_t z = 0; z < gridSize.z(); z++)
581             for (uint32_t y = 0; y < gridSize.y(); y++)
582                 for (uint32_t x = 0; x < gridSize.x(); x++)
583                 {
584                     *((int64_t *)pixelBuffer.getPixelPtr(x, y, z)) = initialValue;
585                 }
586     }
587     else
588     {
589         const tcu::IVec4 initialValue(getOperationInitialValue<int32_t>(operation));
590 
591         for (uint32_t z = 0; z < gridSize.z(); z++)
592             for (uint32_t y = 0; y < gridSize.y(); y++)
593                 for (uint32_t x = 0; x < gridSize.x(); x++)
594                 {
595                     pixelBuffer.setPixel(initialValue, x, y, z);
596                 }
597     }
598 
599     flushAlloc(deviceInterface, device, bufferAllocation);
600 }
601 
commonCheckSupport(Context & context,const tcu::TextureFormat & tcuFormat,VkImageTiling tiling,ImageType imageType,const tcu::UVec3 & imageSize,AtomicOperation operation,bool useTransfer,ShaderReadType readType,ImageBackingType backingType)602 void commonCheckSupport(Context &context, const tcu::TextureFormat &tcuFormat, VkImageTiling tiling,
603                         ImageType imageType, const tcu::UVec3 &imageSize, AtomicOperation operation, bool useTransfer,
604                         ShaderReadType readType, ImageBackingType backingType)
605 {
606     const VkFormat format       = mapTextureFormat(tcuFormat);
607     const VkImageType vkImgType = mapImageType(imageType);
608     const VkFormatFeatureFlags texelBufferSupport =
609         (VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT);
610 
611     const auto &vki           = context.getInstanceInterface();
612     const auto physicalDevice = context.getPhysicalDevice();
613     const auto usageFlags     = getUsageFlags(useTransfer);
614 
615     VkImageFormatProperties vkImageFormatProperties;
616     const auto result = vki.getPhysicalDeviceImageFormatProperties(physicalDevice, format, vkImgType, tiling,
617                                                                    usageFlags, 0, &vkImageFormatProperties);
618     if (result != VK_SUCCESS)
619     {
620         if (result == VK_ERROR_FORMAT_NOT_SUPPORTED)
621             TCU_THROW(NotSupportedError, "Format unsupported for tiling");
622         else
623             TCU_FAIL("vkGetPhysicalDeviceImageFormatProperties returned unexpected error");
624     }
625 
626     if (vkImageFormatProperties.maxArrayLayers < (uint32_t)getNumLayers(imageType, imageSize))
627     {
628         TCU_THROW(NotSupportedError, "This format and tiling combination does not support this number of aray layers");
629     }
630 
631     const VkFormatProperties formatProperties =
632         getPhysicalDeviceFormatProperties(context.getInstanceInterface(), context.getPhysicalDevice(), format);
633     if ((imageType == IMAGE_TYPE_BUFFER) &&
634         ((formatProperties.bufferFeatures & texelBufferSupport) != texelBufferSupport))
635         TCU_THROW(NotSupportedError, "Atomic storage texel buffers not supported");
636 
637     const VkFormatFeatureFlags requiredFeaturesLinear =
638         (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
639     if (tiling == vk::VK_IMAGE_TILING_LINEAR &&
640         ((formatProperties.linearTilingFeatures & requiredFeaturesLinear) != requiredFeaturesLinear))
641     {
642         TCU_THROW(NotSupportedError, "Format doesn't support atomic storage with linear tiling");
643     }
644 
645     if (imageType == IMAGE_TYPE_CUBE_ARRAY)
646         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_IMAGE_CUBE_ARRAY);
647 
648 #ifndef CTS_USES_VULKANSC
649     if (backingType == ImageBackingType::SPARSE)
650     {
651         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_BINDING);
652 
653         switch (vkImgType)
654         {
655         case VK_IMAGE_TYPE_2D:
656             context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE2D);
657             break;
658         case VK_IMAGE_TYPE_3D:
659             context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE3D);
660             break;
661         default:
662             DE_ASSERT(false);
663             break;
664         }
665 
666         if (!checkSparseImageFormatSupport(context.getPhysicalDevice(), context.getInstanceInterface(), format,
667                                            vkImgType, VK_SAMPLE_COUNT_1_BIT, usageFlags, tiling))
668             TCU_THROW(NotSupportedError, "Format does not support sparse images");
669     }
670 #endif // CTS_USES_VULKANSC
671 
672     if (isFloatFormat(format))
673     {
674         context.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
675 
676         const VkFormatFeatureFlags requiredFeatures =
677             (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
678         const auto &atomicFloatFeatures = context.getShaderAtomicFloatFeaturesEXT();
679 
680         if (!atomicFloatFeatures.shaderImageFloat32Atomics)
681             TCU_THROW(NotSupportedError, "shaderImageFloat32Atomics not supported");
682 
683         if ((operation == ATOMIC_OPERATION_ADD) && !atomicFloatFeatures.shaderImageFloat32AtomicAdd)
684             TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicAdd not supported");
685 
686         if (operation == ATOMIC_OPERATION_MIN || operation == ATOMIC_OPERATION_MAX)
687         {
688             context.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
689 #ifndef CTS_USES_VULKANSC
690             if (!context.getShaderAtomicFloat2FeaturesEXT().shaderImageFloat32AtomicMinMax)
691             {
692                 TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicMinMax not supported");
693             }
694 #endif // CTS_USES_VULKANSC
695         }
696 
697         if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
698             TCU_FAIL("Required format feature bits not supported");
699 
700         if (backingType == ImageBackingType::SPARSE)
701         {
702             if (!atomicFloatFeatures.sparseImageFloat32Atomics)
703                 TCU_THROW(NotSupportedError, "sparseImageFloat32Atomics not supported");
704 
705             if (operation == ATOMIC_OPERATION_ADD && !atomicFloatFeatures.sparseImageFloat32AtomicAdd)
706                 TCU_THROW(NotSupportedError, "sparseImageFloat32AtomicAdd not supported");
707         }
708     }
709     else if (format == VK_FORMAT_R64_UINT || format == VK_FORMAT_R64_SINT)
710     {
711         context.requireDeviceFunctionality("VK_EXT_shader_image_atomic_int64");
712 
713         const VkFormatFeatureFlags requiredFeatures =
714             (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
715         const auto &atomicInt64Features = context.getShaderImageAtomicInt64FeaturesEXT();
716 
717         if (!atomicInt64Features.shaderImageInt64Atomics)
718             TCU_THROW(NotSupportedError, "shaderImageInt64Atomics not supported");
719 
720         if (backingType == ImageBackingType::SPARSE && !atomicInt64Features.sparseImageInt64Atomics)
721             TCU_THROW(NotSupportedError, "sparseImageInt64Atomics not supported");
722 
723         if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
724             TCU_FAIL("Mandatory format features not supported");
725     }
726 
727     if (useTransfer)
728     {
729         const VkFormatFeatureFlags transferFeatures =
730             (VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT);
731         if ((formatProperties.optimalTilingFeatures & transferFeatures) != transferFeatures)
732             TCU_THROW(NotSupportedError, "Transfer features not supported for this format");
733     }
734 
735     if (readType == ShaderReadType::SPARSE)
736     {
737         DE_ASSERT(imageType != IMAGE_TYPE_1D && imageType != IMAGE_TYPE_1D_ARRAY && imageType != IMAGE_TYPE_BUFFER);
738         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_RESOURCE_RESIDENCY);
739     }
740 }
741 
742 class BinaryAtomicEndResultCase : public vkt::TestCase
743 {
744 public:
745     BinaryAtomicEndResultCase(tcu::TestContext &testCtx, const string &name, const ImageType imageType,
746                               const tcu::UVec3 &imageSize, const tcu::TextureFormat &format, const VkImageTiling tiling,
747                               const AtomicOperation operation, const bool useTransfer,
748                               const ShaderReadType shaderReadType, const ImageBackingType backingType,
749                               const glu::GLSLVersion glslVersion);
750 
751     void initPrograms(SourceCollections &sourceCollections) const;
752     TestInstance *createInstance(Context &context) const;
753     virtual void checkSupport(Context &context) const;
754 
755 private:
756     const ImageType m_imageType;
757     const tcu::UVec3 m_imageSize;
758     const tcu::TextureFormat m_format;
759     const VkImageTiling m_tiling;
760     const AtomicOperation m_operation;
761     const bool m_useTransfer;
762     const ShaderReadType m_readType;
763     const ImageBackingType m_backingType;
764     const glu::GLSLVersion m_glslVersion;
765 };
766 
BinaryAtomicEndResultCase(tcu::TestContext & testCtx,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const tcu::TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType,const glu::GLSLVersion glslVersion)767 BinaryAtomicEndResultCase::BinaryAtomicEndResultCase(tcu::TestContext &testCtx, const string &name,
768                                                      const ImageType imageType, const tcu::UVec3 &imageSize,
769                                                      const tcu::TextureFormat &format, const VkImageTiling tiling,
770                                                      const AtomicOperation operation, const bool useTransfer,
771                                                      const ShaderReadType shaderReadType,
772                                                      const ImageBackingType backingType,
773                                                      const glu::GLSLVersion glslVersion)
774     : TestCase(testCtx, name)
775     , m_imageType(imageType)
776     , m_imageSize(imageSize)
777     , m_format(format)
778     , m_tiling(tiling)
779     , m_operation(operation)
780     , m_useTransfer(useTransfer)
781     , m_readType(shaderReadType)
782     , m_backingType(backingType)
783     , m_glslVersion(glslVersion)
784 {
785 }
786 
checkSupport(Context & context) const787 void BinaryAtomicEndResultCase::checkSupport(Context &context) const
788 {
789     commonCheckSupport(context, m_format, m_tiling, m_imageType, m_imageSize, m_operation, m_useTransfer, m_readType,
790                        m_backingType);
791 }
792 
initPrograms(SourceCollections & sourceCollections) const793 void BinaryAtomicEndResultCase::initPrograms(SourceCollections &sourceCollections) const
794 {
795     const VkFormat imageFormat    = mapTextureFormat(m_format);
796     const uint32_t componentWidth = getFormatComponentWidth(imageFormat, 0);
797     const bool intFormat          = isIntFormat(imageFormat);
798     const bool uintFormat         = isUintFormat(imageFormat);
799     const bool floatFormat        = isFloatFormat(imageFormat);
800     const string type             = getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
801     const string vec4Type         = getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
802 
803     if (!m_useTransfer)
804     {
805         AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
806     }
807 
808     if (isSpirvAtomicOperation(m_operation))
809     {
810         const CaseVariant caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_END_RESULTS};
811         const tcu::StringTemplate shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
812         std::map<std::string, std::string> specializations;
813 
814         specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
815         if (isSpirvAtomicNoLastArgOp(m_operation))
816             specializations["LASTARG"] = "";
817 
818         sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
819     }
820     else
821     {
822         const string versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
823 
824         const UVec3 gridSize     = getShaderGridSize(m_imageType, m_imageSize);
825         const string atomicCoord = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
826 
827         const string atomicArgExpr =
828             type +
829             getAtomicFuncArgumentShaderStr(m_operation, "gx", "gy", "gz",
830                                            IVec3(NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z()));
831 
832         const string compareExchangeStr = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
833                                               (componentWidth == 64 ? ", 820338753304" : ", 18") +
834                                                   string(uintFormat ? "u" : "") +
835                                                   string(componentWidth == 64 ? "l" : "") :
836                                               "";
837         const string atomicInvocation   = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " +
838                                         atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
839         const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
840         const string shaderImageTypeStr   = getShaderImageType(m_format, m_imageType);
841         const string extensions           = "#extension GL_EXT_shader_atomic_float : enable\n"
842                                             "#extension GL_EXT_shader_atomic_float2 : enable\n"
843                                             "#extension GL_KHR_memory_scope_semantics : enable";
844 
845         string source = versionDecl + "\n" + extensions + "\n";
846 
847         if (64 == componentWidth)
848         {
849             source += "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
850                       "#extension GL_EXT_shader_image_int64 : require\n";
851         }
852 
853         source += "precision highp " + shaderImageTypeStr +
854                   ";\n"
855                   "\n"
856                   "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
857                   "layout (" +
858                   shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr +
859                   " u_resultImage;\n"
860                   "\n"
861                   "void main (void)\n"
862                   "{\n"
863                   "    int gx = int(gl_GlobalInvocationID.x);\n"
864                   "    int gy = int(gl_GlobalInvocationID.y);\n"
865                   "    int gz = int(gl_GlobalInvocationID.z);\n"
866                   "    " +
867                   atomicInvocation +
868                   ";\n"
869                   "}\n";
870 
871         sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
872     }
873 }
874 
875 class BinaryAtomicIntermValuesCase : public vkt::TestCase
876 {
877 public:
878     BinaryAtomicIntermValuesCase(tcu::TestContext &testCtx, const string &name, const ImageType imageType,
879                                  const tcu::UVec3 &imageSize, const tcu::TextureFormat &format,
880                                  const VkImageTiling tiling, const AtomicOperation operation, const bool useTransfer,
881                                  const ShaderReadType shaderReadType, const ImageBackingType backingType,
882                                  const glu::GLSLVersion glslVersion);
883 
884     void initPrograms(SourceCollections &sourceCollections) const;
885     TestInstance *createInstance(Context &context) const;
886     virtual void checkSupport(Context &context) const;
887 
888 private:
889     const ImageType m_imageType;
890     const tcu::UVec3 m_imageSize;
891     const tcu::TextureFormat m_format;
892     const VkImageTiling m_tiling;
893     const AtomicOperation m_operation;
894     const bool m_useTransfer;
895     const ShaderReadType m_readType;
896     const ImageBackingType m_backingType;
897     const glu::GLSLVersion m_glslVersion;
898 };
899 
BinaryAtomicIntermValuesCase(TestContext & testCtx,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType,const glu::GLSLVersion glslVersion)900 BinaryAtomicIntermValuesCase::BinaryAtomicIntermValuesCase(
901     TestContext &testCtx, const string &name, const ImageType imageType, const tcu::UVec3 &imageSize,
902     const TextureFormat &format, const VkImageTiling tiling, const AtomicOperation operation, const bool useTransfer,
903     const ShaderReadType shaderReadType, const ImageBackingType backingType, const glu::GLSLVersion glslVersion)
904     : TestCase(testCtx, name)
905     , m_imageType(imageType)
906     , m_imageSize(imageSize)
907     , m_format(format)
908     , m_tiling(tiling)
909     , m_operation(operation)
910     , m_useTransfer(useTransfer)
911     , m_readType(shaderReadType)
912     , m_backingType(backingType)
913     , m_glslVersion(glslVersion)
914 {
915 }
916 
checkSupport(Context & context) const917 void BinaryAtomicIntermValuesCase::checkSupport(Context &context) const
918 {
919     commonCheckSupport(context, m_format, m_tiling, m_imageType, m_imageSize, m_operation, m_useTransfer, m_readType,
920                        m_backingType);
921 }
922 
initPrograms(SourceCollections & sourceCollections) const923 void BinaryAtomicIntermValuesCase::initPrograms(SourceCollections &sourceCollections) const
924 {
925     const VkFormat imageFormat    = mapTextureFormat(m_format);
926     const uint32_t componentWidth = getFormatComponentWidth(imageFormat, 0);
927     const bool intFormat          = isIntFormat(imageFormat);
928     const bool uintFormat         = isUintFormat(imageFormat);
929     const bool floatFormat        = isFloatFormat(imageFormat);
930     const string type             = getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
931     const string vec4Type         = getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
932 
933     if (!m_useTransfer)
934     {
935         AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
936     }
937 
938     if (isSpirvAtomicOperation(m_operation))
939     {
940         const CaseVariant caseVariant{m_imageType, m_format.order, m_format.type,
941                                       CaseVariant::CHECK_TYPE_INTERMEDIATE_RESULTS};
942         const tcu::StringTemplate shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
943         std::map<std::string, std::string> specializations;
944 
945         specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
946         if (isSpirvAtomicNoLastArgOp(m_operation))
947             specializations["LASTARG"] = "";
948 
949         sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
950     }
951     else
952     {
953         const string versionDecl     = glu::getGLSLVersionDeclaration(m_glslVersion);
954         const UVec3 gridSize         = getShaderGridSize(m_imageType, m_imageSize);
955         const string atomicCoord     = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
956         const string invocationCoord = getCoordStr(m_imageType, "gx", "gy", "gz");
957         const string atomicArgExpr =
958             type +
959             getAtomicFuncArgumentShaderStr(m_operation, "gx", "gy", "gz",
960                                            IVec3(NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z()));
961 
962         const string compareExchangeStr = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
963                                               (componentWidth == 64 ? ", 820338753304" : ", 18") +
964                                                   string(uintFormat ? "u" : "") +
965                                                   string(componentWidth == 64 ? "l" : "") :
966                                               "";
967         const string atomicInvocation   = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " +
968                                         atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
969         const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
970         const string shaderImageTypeStr   = getShaderImageType(m_format, m_imageType);
971         const string extensions           = "#extension GL_EXT_shader_atomic_float : enable\n"
972                                             "#extension GL_EXT_shader_atomic_float2 : enable\n"
973                                             "#extension GL_KHR_memory_scope_semantics : enable";
974 
975         string source = versionDecl + "\n" + extensions +
976                         "\n"
977                         "\n";
978 
979         if (64 == componentWidth)
980         {
981             source += "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
982                       "#extension GL_EXT_shader_image_int64 : require\n";
983         }
984 
985         source += "precision highp " + shaderImageTypeStr +
986                   "; \n"
987                   "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
988                   "layout (" +
989                   shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr +
990                   " u_resultImage;\n"
991                   "layout (" +
992                   shaderImageFormatStr + ", binding=1) writeonly uniform " + shaderImageTypeStr +
993                   " u_intermValuesImage;\n"
994                   "\n"
995                   "void main (void)\n"
996                   "{\n"
997                   "    int gx = int(gl_GlobalInvocationID.x);\n"
998                   "    int gy = int(gl_GlobalInvocationID.y);\n"
999                   "    int gz = int(gl_GlobalInvocationID.z);\n"
1000                   "    imageStore(u_intermValuesImage, " +
1001                   invocationCoord + ", " + vec4Type + "(" + atomicInvocation +
1002                   "));\n"
1003                   "}\n";
1004 
1005         sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
1006     }
1007 }
1008 
1009 class BinaryAtomicInstanceBase : public vkt::TestInstance
1010 {
1011 public:
1012     BinaryAtomicInstanceBase(Context &context, const string &name, const ImageType imageType,
1013                              const tcu::UVec3 &imageSize, const TextureFormat &format, const VkImageTiling tiling,
1014                              const AtomicOperation operation, const bool useTransfer,
1015                              const ShaderReadType shaderReadType, const ImageBackingType backingType);
1016 
1017     tcu::TestStatus iterate(void);
1018 
1019     virtual uint32_t getOutputBufferSize(void) const = 0;
1020 
1021     virtual void prepareResources(const bool useTransfer)     = 0;
1022     virtual void prepareDescriptors(const bool isTexelBuffer) = 0;
1023 
1024     virtual void commandsBeforeCompute(const VkCommandBuffer cmdBuffer) const            = 0;
1025     virtual void commandsAfterCompute(const VkCommandBuffer cmdBuffer, const VkPipeline pipeline,
1026                                       const VkPipelineLayout pipelineLayout, const VkDescriptorSet descriptorSet,
1027                                       const VkDeviceSize &range, const bool useTransfer) = 0;
1028 
1029     virtual bool verifyResult(Allocation &outputBufferAllocation, const bool is64Bit) const = 0;
1030 
1031 protected:
1032     void shaderFillImage(const VkCommandBuffer cmdBuffer, const VkBuffer &buffer, const VkPipeline pipeline,
1033                          const VkPipelineLayout pipelineLayout, const VkDescriptorSet descriptorSet,
1034                          const VkDeviceSize &range, const tcu::UVec3 &gridSize);
1035 
1036     void createImageAndView(VkFormat imageFormat, const tcu::UVec3 &imageExent, bool useTransfer,
1037                             de::MovePtr<Image> &imagePtr, Move<VkImageView> &imageViewPtr);
1038 
1039     void createImageResources(const VkFormat &imageFormat, const bool useTransfer);
1040 
1041     const string m_name;
1042     const ImageType m_imageType;
1043     const tcu::UVec3 m_imageSize;
1044     const TextureFormat m_format;
1045     const VkImageTiling m_tiling;
1046     const AtomicOperation m_operation;
1047     const bool m_useTransfer;
1048     const ShaderReadType m_readType;
1049     const ImageBackingType m_backingType;
1050 
1051     de::MovePtr<BufferWithMemory> m_inputBuffer;
1052     de::MovePtr<BufferWithMemory> m_outputBuffer;
1053     Move<VkBufferView> m_descResultBufferView;
1054     Move<VkBufferView> m_descIntermResultsBufferView;
1055     Move<VkDescriptorPool> m_descriptorPool;
1056     Move<VkDescriptorSetLayout> m_descriptorSetLayout;
1057     Move<VkDescriptorSet> m_descriptorSet;
1058 
1059     Move<VkDescriptorSetLayout> m_descriptorSetLayoutNoTransfer;
1060     Move<VkDescriptorPool> m_descriptorPoolNoTransfer;
1061 
1062     de::MovePtr<Image> m_resultImage;
1063     Move<VkImageView> m_resultImageView;
1064 
1065     std::vector<VkSemaphore> m_waitSemaphores;
1066 };
1067 
BinaryAtomicInstanceBase(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1068 BinaryAtomicInstanceBase::BinaryAtomicInstanceBase(Context &context, const string &name, const ImageType imageType,
1069                                                    const tcu::UVec3 &imageSize, const TextureFormat &format,
1070                                                    const VkImageTiling tiling, const AtomicOperation operation,
1071                                                    const bool useTransfer, const ShaderReadType shaderReadType,
1072                                                    const ImageBackingType backingType)
1073     : vkt::TestInstance(context)
1074     , m_name(name)
1075     , m_imageType(imageType)
1076     , m_imageSize(imageSize)
1077     , m_format(format)
1078     , m_tiling(tiling)
1079     , m_operation(operation)
1080     , m_useTransfer(useTransfer)
1081     , m_readType(shaderReadType)
1082     , m_backingType(backingType)
1083 {
1084 }
1085 
iterate(void)1086 tcu::TestStatus BinaryAtomicInstanceBase::iterate(void)
1087 {
1088     const VkDevice device                  = m_context.getDevice();
1089     const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1090     const VkQueue queue                    = m_context.getUniversalQueue();
1091     const uint32_t queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
1092     Allocator &allocator                   = m_context.getDefaultAllocator();
1093     const VkDeviceSize imageSizeInBytes    = tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1094     const VkDeviceSize outBuffSizeInBytes  = getOutputBufferSize();
1095     const VkFormat imageFormat             = mapTextureFormat(m_format);
1096     const bool isTexelBuffer               = (m_imageType == IMAGE_TYPE_BUFFER);
1097 
1098     if (!isTexelBuffer)
1099     {
1100         createImageResources(imageFormat, m_useTransfer);
1101     }
1102 
1103     tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1104 
1105     //Prepare the buffer with the initial data for the image
1106     m_inputBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1107         deviceInterface, device, allocator,
1108         makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1109                                                    (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT :
1110                                                                     static_cast<VkBufferUsageFlagBits>(0u))),
1111         MemoryRequirement::HostVisible));
1112 
1113     // Fill in buffer with initial data used for image.
1114     initDataForImage(device, deviceInterface, m_format, m_operation, gridSize, *m_inputBuffer);
1115 
1116     // Create a buffer to store shader output copied from result image
1117     m_outputBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1118         deviceInterface, device, allocator,
1119         makeBufferCreateInfo(outBuffSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1120                                                      (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT :
1121                                                                       static_cast<VkBufferUsageFlagBits>(0u))),
1122         MemoryRequirement::HostVisible));
1123 
1124     if (!isTexelBuffer)
1125     {
1126         prepareResources(m_useTransfer);
1127     }
1128 
1129     prepareDescriptors(isTexelBuffer);
1130 
1131     Move<VkDescriptorSet> descriptorSetFillImage;
1132     Move<VkShaderModule> shaderModuleFillImage;
1133     Move<VkPipelineLayout> pipelineLayoutFillImage;
1134     Move<VkPipeline> pipelineFillImage;
1135 
1136     Move<VkDescriptorSet> descriptorSetReadImage;
1137     Move<VkShaderModule> shaderModuleReadImage;
1138     Move<VkPipelineLayout> pipelineLayoutReadImage;
1139     Move<VkPipeline> pipelineReadImage;
1140 
1141     if (!m_useTransfer)
1142     {
1143         m_descriptorSetLayoutNoTransfer =
1144             DescriptorSetLayoutBuilder()
1145                 .addSingleBinding(
1146                     (isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE),
1147                     VK_SHADER_STAGE_COMPUTE_BIT)
1148                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1149                 .build(deviceInterface, device);
1150 
1151         m_descriptorPoolNoTransfer =
1152             DescriptorPoolBuilder()
1153                 .addType((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE),
1154                          2)
1155                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2)
1156                 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 2u);
1157 
1158         descriptorSetFillImage =
1159             makeDescriptorSet(deviceInterface, device, *m_descriptorPoolNoTransfer, *m_descriptorSetLayoutNoTransfer);
1160 
1161         descriptorSetReadImage =
1162             makeDescriptorSet(deviceInterface, device, *m_descriptorPoolNoTransfer, *m_descriptorSetLayoutNoTransfer);
1163 
1164         shaderModuleFillImage =
1165             createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("fillShader"), 0);
1166         pipelineLayoutFillImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1167         pipelineFillImage =
1168             makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleFillImage);
1169 
1170         if (m_readType == ShaderReadType::SPARSE)
1171         {
1172             shaderModuleReadImage = createShaderModule(deviceInterface, device,
1173                                                        m_context.getBinaryCollection().get("readShaderResidency"), 0);
1174         }
1175         else
1176         {
1177             shaderModuleReadImage =
1178                 createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShader"), 0);
1179         }
1180         pipelineLayoutReadImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1181         pipelineReadImage =
1182             makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleReadImage);
1183     }
1184 
1185     // Create pipeline
1186     const Unique<VkShaderModule> shaderModule(
1187         createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get(m_name), 0));
1188     const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, device, *m_descriptorSetLayout));
1189     const Unique<VkPipeline> pipeline(makeComputePipeline(deviceInterface, device, *pipelineLayout, *shaderModule));
1190 
1191     // Create command buffer
1192     const Unique<VkCommandPool> cmdPool(
1193         createCommandPool(deviceInterface, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
1194     const Unique<VkCommandBuffer> cmdBuffer(
1195         allocateCommandBuffer(deviceInterface, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1196 
1197     beginCommandBuffer(deviceInterface, *cmdBuffer);
1198 
1199     if (!isTexelBuffer)
1200     {
1201         if (m_useTransfer)
1202         {
1203             const vector<VkBufferImageCopy> bufferImageCopy(
1204                 1, makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)),
1205                                        getNumLayers(m_imageType, m_imageSize)));
1206             copyBufferToImage(deviceInterface, *cmdBuffer, *(*m_inputBuffer), imageSizeInBytes, bufferImageCopy,
1207                               VK_IMAGE_ASPECT_COLOR_BIT, 1, getNumLayers(m_imageType, m_imageSize),
1208                               m_resultImage->get(), VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
1209         }
1210         else
1211         {
1212             shaderFillImage(*cmdBuffer, *(*m_inputBuffer), *pipelineFillImage, *pipelineLayoutFillImage,
1213                             *descriptorSetFillImage, imageSizeInBytes, gridSize);
1214         }
1215         commandsBeforeCompute(*cmdBuffer);
1216     }
1217 
1218     deviceInterface.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1219     deviceInterface.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u,
1220                                           &m_descriptorSet.get(), 0u, DE_NULL);
1221 
1222     deviceInterface.cmdDispatch(*cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z());
1223 
1224     commandsAfterCompute(*cmdBuffer, *pipelineReadImage, *pipelineLayoutReadImage, *descriptorSetReadImage,
1225                          outBuffSizeInBytes, m_useTransfer);
1226 
1227     const VkBufferMemoryBarrier outputBufferPreHostReadBarrier = makeBufferMemoryBarrier(
1228         ((m_useTransfer || isTexelBuffer) ? VK_ACCESS_TRANSFER_WRITE_BIT : VK_ACCESS_SHADER_WRITE_BIT),
1229         VK_ACCESS_HOST_READ_BIT, m_outputBuffer->get(), 0ull, outBuffSizeInBytes);
1230 
1231     deviceInterface.cmdPipelineBarrier(
1232         *cmdBuffer,
1233         ((m_useTransfer || isTexelBuffer) ? VK_PIPELINE_STAGE_TRANSFER_BIT : VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT),
1234         VK_PIPELINE_STAGE_HOST_BIT, false, 0u, DE_NULL, 1u, &outputBufferPreHostReadBarrier, 0u, DE_NULL);
1235 
1236     endCommandBuffer(deviceInterface, *cmdBuffer);
1237 
1238     std::vector<VkPipelineStageFlags> waitStages(m_waitSemaphores.size(), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
1239     submitCommandsAndWait(deviceInterface, device, queue, *cmdBuffer, false, 1u,
1240                           static_cast<uint32_t>(m_waitSemaphores.size()), de::dataOrNull(m_waitSemaphores),
1241                           de::dataOrNull(waitStages));
1242 
1243     Allocation &outputBufferAllocation = m_outputBuffer->getAllocation();
1244 
1245     invalidateAlloc(deviceInterface, device, outputBufferAllocation);
1246 
1247     if (verifyResult(outputBufferAllocation, (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)))
1248         return tcu::TestStatus::pass("Comparison succeeded");
1249     else
1250         return tcu::TestStatus::fail("Comparison failed");
1251 }
1252 
shaderFillImage(const VkCommandBuffer cmdBuffer,const VkBuffer & buffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const tcu::UVec3 & gridSize)1253 void BinaryAtomicInstanceBase::shaderFillImage(const VkCommandBuffer cmdBuffer, const VkBuffer &buffer,
1254                                                const VkPipeline pipeline, const VkPipelineLayout pipelineLayout,
1255                                                const VkDescriptorSet descriptorSet, const VkDeviceSize &range,
1256                                                const tcu::UVec3 &gridSize)
1257 {
1258     const VkDevice device                  = m_context.getDevice();
1259     const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1260     const VkDescriptorImageInfo descResultImageInfo =
1261         makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1262     const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(buffer, 0, range);
1263     const VkImageSubresourceRange subresourceRange =
1264         makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1265 
1266     DescriptorSetUpdateBuilder()
1267         .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1268                      &descResultImageInfo)
1269         .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
1270                      VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1271         .update(deviceInterface, device);
1272 
1273     const VkImageMemoryBarrier imageBarrierPre =
1274         makeImageMemoryBarrier(0, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
1275                                m_resultImage->get(), subresourceRange);
1276 
1277     deviceInterface.cmdPipelineBarrier(
1278         cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0,
1279         (const VkMemoryBarrier *)DE_NULL, 0, (const VkBufferMemoryBarrier *)DE_NULL, 1, &imageBarrierPre);
1280 
1281     deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1282     deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u,
1283                                           &descriptorSet, 0u, DE_NULL);
1284 
1285     deviceInterface.cmdDispatch(cmdBuffer, gridSize.x(), gridSize.y(), gridSize.z());
1286 
1287     const VkImageMemoryBarrier imageBarrierPost =
1288         makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL,
1289                                VK_IMAGE_LAYOUT_GENERAL, m_resultImage->get(), subresourceRange);
1290 
1291     deviceInterface.cmdPipelineBarrier(
1292         cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0,
1293         (const VkMemoryBarrier *)DE_NULL, 0, (const VkBufferMemoryBarrier *)DE_NULL, 1, &imageBarrierPost);
1294 }
1295 
createImageAndView(VkFormat imageFormat,const tcu::UVec3 & imageExent,bool useTransfer,de::MovePtr<Image> & imagePtr,Move<VkImageView> & imageViewPtr)1296 void BinaryAtomicInstanceBase::createImageAndView(VkFormat imageFormat, const tcu::UVec3 &imageExent, bool useTransfer,
1297                                                   de::MovePtr<Image> &imagePtr, Move<VkImageView> &imageViewPtr)
1298 {
1299     const VkDevice device                  = m_context.getDevice();
1300     const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1301     Allocator &allocator                   = m_context.getDefaultAllocator();
1302     const VkImageUsageFlags usageFlags     = getUsageFlags(useTransfer);
1303     VkImageCreateFlags createFlags         = 0u;
1304 
1305     if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
1306         createFlags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
1307 
1308     const auto numLayers = getNumLayers(m_imageType, m_imageSize);
1309 
1310     VkImageCreateInfo createInfo = {
1311         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
1312         DE_NULL,                             // const void* pNext;
1313         createFlags,                         // VkImageCreateFlags flags;
1314         mapImageType(m_imageType),           // VkImageType imageType;
1315         imageFormat,                         // VkFormat format;
1316         makeExtent3D(imageExent),            // VkExtent3D extent;
1317         1u,                                  // uint32_t mipLevels;
1318         numLayers,                           // uint32_t arrayLayers;
1319         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
1320         m_tiling,                            // VkImageTiling tiling;
1321         usageFlags,                          // VkImageUsageFlags usage;
1322         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
1323         0u,                                  // uint32_t queueFamilyIndexCount;
1324         DE_NULL,                             // const uint32_t* pQueueFamilyIndices;
1325         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
1326     };
1327 
1328 #ifndef CTS_USES_VULKANSC
1329     if (m_backingType == ImageBackingType::SPARSE)
1330     {
1331         const auto &vki               = m_context.getInstanceInterface();
1332         const auto physicalDevice     = m_context.getPhysicalDevice();
1333         const auto sparseQueue        = m_context.getSparseQueue();
1334         const auto sparseQueueIdx     = m_context.getSparseQueueFamilyIndex();
1335         const auto universalQIdx      = m_context.getUniversalQueueFamilyIndex();
1336         const uint32_t queueIndices[] = {universalQIdx, sparseQueueIdx};
1337 
1338         createInfo.flags |= (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT);
1339 
1340         if (sparseQueueIdx != universalQIdx)
1341         {
1342             createInfo.sharingMode           = VK_SHARING_MODE_CONCURRENT;
1343             createInfo.queueFamilyIndexCount = static_cast<uint32_t>(DE_LENGTH_OF_ARRAY(queueIndices));
1344             createInfo.pQueueFamilyIndices   = queueIndices;
1345         }
1346 
1347         const auto sparseImage =
1348             new SparseImage(deviceInterface, device, physicalDevice, vki, createInfo, sparseQueue, allocator, m_format);
1349         m_waitSemaphores.push_back(sparseImage->getSemaphore());
1350         imagePtr = de::MovePtr<Image>(sparseImage);
1351     }
1352     else
1353 #endif // CTS_USES_VULKANSC
1354         imagePtr =
1355             de::MovePtr<Image>(new Image(deviceInterface, device, allocator, createInfo, MemoryRequirement::Any));
1356 
1357     const VkImageSubresourceRange subresourceRange =
1358         makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, numLayers);
1359 
1360     imageViewPtr = makeImageView(deviceInterface, device, imagePtr->get(), mapImageViewType(m_imageType), imageFormat,
1361                                  subresourceRange);
1362 }
1363 
createImageResources(const VkFormat & imageFormat,const bool useTransfer)1364 void BinaryAtomicInstanceBase::createImageResources(const VkFormat &imageFormat, const bool useTransfer)
1365 {
1366     //Create the image that is going to store results of atomic operations
1367     createImageAndView(imageFormat, getLayerSize(m_imageType, m_imageSize), useTransfer, m_resultImage,
1368                        m_resultImageView);
1369 }
1370 
1371 class BinaryAtomicEndResultInstance : public BinaryAtomicInstanceBase
1372 {
1373 public:
BinaryAtomicEndResultInstance(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1374     BinaryAtomicEndResultInstance(Context &context, const string &name, const ImageType imageType,
1375                                   const tcu::UVec3 &imageSize, const TextureFormat &format, const VkImageTiling tiling,
1376                                   const AtomicOperation operation, const bool useTransfer,
1377                                   const ShaderReadType shaderReadType, const ImageBackingType backingType)
1378         : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, tiling, operation, useTransfer,
1379                                    shaderReadType, backingType)
1380     {
1381     }
1382 
1383     virtual uint32_t getOutputBufferSize(void) const;
1384 
prepareResources(const bool useTransfer)1385     virtual void prepareResources(const bool useTransfer)
1386     {
1387         DE_UNREF(useTransfer);
1388     }
1389     virtual void prepareDescriptors(const bool isTexelBuffer);
1390 
commandsBeforeCompute(const VkCommandBuffer) const1391     virtual void commandsBeforeCompute(const VkCommandBuffer) const
1392     {
1393     }
1394     virtual void commandsAfterCompute(const VkCommandBuffer cmdBuffer, const VkPipeline pipeline,
1395                                       const VkPipelineLayout pipelineLayout, const VkDescriptorSet descriptorSet,
1396                                       const VkDeviceSize &range, const bool useTransfer);
1397 
1398     virtual bool verifyResult(Allocation &outputBufferAllocation, const bool is64Bit) const;
1399 
1400 protected:
1401     template <typename T>
1402     bool isValueCorrect(const T resultValue, int32_t x, int32_t y, int32_t z, const UVec3 &gridSize,
1403                         const IVec3 extendedGridSize) const;
1404 };
1405 
getOutputBufferSize(void) const1406 uint32_t BinaryAtomicEndResultInstance::getOutputBufferSize(void) const
1407 {
1408     return tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1409 }
1410 
prepareDescriptors(const bool isTexelBuffer)1411 void BinaryAtomicEndResultInstance::prepareDescriptors(const bool isTexelBuffer)
1412 {
1413     const VkDescriptorType descriptorType =
1414         isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1415     const VkDevice device                  = m_context.getDevice();
1416     const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1417 
1418     m_descriptorSetLayout = DescriptorSetLayoutBuilder()
1419                                 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1420                                 .build(deviceInterface, device);
1421 
1422     m_descriptorPool = DescriptorPoolBuilder()
1423                            .addType(descriptorType)
1424                            .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1425 
1426     m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1427 
1428     if (isTexelBuffer)
1429     {
1430         m_descResultBufferView =
1431             makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1432 
1433         DescriptorSetUpdateBuilder()
1434             .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType,
1435                          &(m_descResultBufferView.get()))
1436             .update(deviceInterface, device);
1437     }
1438     else
1439     {
1440         const VkDescriptorImageInfo descResultImageInfo =
1441             makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1442 
1443         DescriptorSetUpdateBuilder()
1444             .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType,
1445                          &descResultImageInfo)
1446             .update(deviceInterface, device);
1447     }
1448 }
1449 
commandsAfterCompute(const VkCommandBuffer cmdBuffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const bool useTransfer)1450 void BinaryAtomicEndResultInstance::commandsAfterCompute(const VkCommandBuffer cmdBuffer, const VkPipeline pipeline,
1451                                                          const VkPipelineLayout pipelineLayout,
1452                                                          const VkDescriptorSet descriptorSet, const VkDeviceSize &range,
1453                                                          const bool useTransfer)
1454 {
1455     const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1456     const VkImageSubresourceRange subresourceRange =
1457         makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1458     const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1459 
1460     if (m_imageType == IMAGE_TYPE_BUFFER)
1461     {
1462         m_outputBuffer = m_inputBuffer;
1463     }
1464     else if (useTransfer)
1465     {
1466         const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1467             makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL,
1468                                    VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_resultImage->get(), subresourceRange);
1469 
1470         deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1471                                            VK_PIPELINE_STAGE_TRANSFER_BIT, false, 0u, DE_NULL, 0u, DE_NULL, 1u,
1472                                            &resultImagePostDispatchBarrier);
1473 
1474         const VkBufferImageCopy bufferImageCopyParams =
1475             makeBufferImageCopy(makeExtent3D(layerSize), getNumLayers(m_imageType, m_imageSize));
1476 
1477         deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1478                                              m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1479     }
1480     else
1481     {
1482         const VkDevice device = m_context.getDevice();
1483         const VkDescriptorImageInfo descResultImageInfo =
1484             makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1485         const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1486 
1487         DescriptorSetUpdateBuilder()
1488             .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
1489                          VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1490             .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
1491                          VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1492             .update(deviceInterface, device);
1493 
1494         const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1495             makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL,
1496                                    VK_IMAGE_LAYOUT_GENERAL, m_resultImage->get(), subresourceRange);
1497 
1498         deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1499                                            VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, false, 0u, DE_NULL, 0u, DE_NULL, 1u,
1500                                            &resultImagePostDispatchBarrier);
1501 
1502         deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1503         deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u,
1504                                               &descriptorSet, 0u, DE_NULL);
1505 
1506         switch (m_imageType)
1507         {
1508         case IMAGE_TYPE_1D_ARRAY:
1509             deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), subresourceRange.layerCount, layerSize.z());
1510             break;
1511         case IMAGE_TYPE_2D_ARRAY:
1512         case IMAGE_TYPE_CUBE:
1513         case IMAGE_TYPE_CUBE_ARRAY:
1514             deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1515             break;
1516         default:
1517             deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), layerSize.z());
1518             break;
1519         }
1520     }
1521 }
1522 
verifyResult(Allocation & outputBufferAllocation,const bool is64Bit) const1523 bool BinaryAtomicEndResultInstance::verifyResult(Allocation &outputBufferAllocation, const bool is64Bit) const
1524 {
1525     const UVec3 gridSize         = getShaderGridSize(m_imageType, m_imageSize);
1526     const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z());
1527 
1528     tcu::ConstPixelBufferAccess resultBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(),
1529                                              outputBufferAllocation.getHostPtr());
1530 
1531     for (int32_t z = 0; z < resultBuffer.getDepth(); z++)
1532         for (int32_t y = 0; y < resultBuffer.getHeight(); y++)
1533             for (int32_t x = 0; x < resultBuffer.getWidth(); x++)
1534             {
1535                 const void *resultValue = resultBuffer.getPixelPtr(x, y, z);
1536                 int32_t floatToIntValue = 0;
1537                 bool isFloatValue       = false;
1538                 if (isFloatFormat(mapTextureFormat(m_format)))
1539                 {
1540                     isFloatValue    = true;
1541                     floatToIntValue = static_cast<int32_t>(*((float *)resultValue));
1542                 }
1543 
1544                 if (isOrderIndependentAtomicOperation(m_operation))
1545                 {
1546                     if (isUintFormat(mapTextureFormat(m_format)))
1547                     {
1548                         if (is64Bit)
1549                         {
1550                             if (!isValueCorrect<uint64_t>(*((uint64_t *)resultValue), x, y, z, gridSize,
1551                                                           extendedGridSize))
1552                                 return false;
1553                         }
1554                         else
1555                         {
1556                             if (!isValueCorrect<uint32_t>(*((uint32_t *)resultValue), x, y, z, gridSize,
1557                                                           extendedGridSize))
1558                                 return false;
1559                         }
1560                     }
1561                     else if (isIntFormat(mapTextureFormat(m_format)))
1562                     {
1563                         if (is64Bit)
1564                         {
1565                             if (!isValueCorrect<int64_t>(*((int64_t *)resultValue), x, y, z, gridSize,
1566                                                          extendedGridSize))
1567                                 return false;
1568                         }
1569                         else
1570                         {
1571                             if (!isValueCorrect<int32_t>(*((int32_t *)resultValue), x, y, z, gridSize,
1572                                                          extendedGridSize))
1573                                 return false;
1574                         }
1575                     }
1576                     else
1577                     {
1578                         // 32-bit floating point
1579                         if (!isValueCorrect<int32_t>(floatToIntValue, x, y, z, gridSize, extendedGridSize))
1580                             return false;
1581                     }
1582                 }
1583                 else if (m_operation == ATOMIC_OPERATION_EXCHANGE)
1584                 {
1585                     // Check if the end result equals one of the atomic args.
1586                     bool matchFound = false;
1587 
1588                     for (int32_t i = 0; i < static_cast<int32_t>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1589                     {
1590                         const IVec3 gid(x + i * gridSize.x(), y, z);
1591                         matchFound = is64Bit ? (*((int64_t *)resultValue) ==
1592                                                 getAtomicFuncArgument<int64_t>(m_operation, gid, extendedGridSize)) :
1593                                      isFloatValue ?
1594                                                floatToIntValue ==
1595                                                    getAtomicFuncArgument<int32_t>(m_operation, gid, extendedGridSize) :
1596                                                (*((int32_t *)resultValue) ==
1597                                                 getAtomicFuncArgument<int32_t>(m_operation, gid, extendedGridSize));
1598                     }
1599 
1600                     if (!matchFound)
1601                         return false;
1602                 }
1603                 else if (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE)
1604                 {
1605                     // Check if the end result equals one of the atomic args.
1606                     bool matchFound = false;
1607 
1608                     for (int32_t i = 0; i < static_cast<int32_t>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1609                     {
1610                         const IVec3 gid(x + i * gridSize.x(), y, z);
1611                         matchFound = is64Bit ? (*((int64_t *)resultValue) ==
1612                                                 getAtomicFuncArgument<int64_t>(m_operation, gid, extendedGridSize)) :
1613                                      isFloatValue ?
1614                                                floatToIntValue ==
1615                                                    getAtomicFuncArgument<int32_t>(m_operation, gid, extendedGridSize) :
1616                                                (*((int32_t *)resultValue) ==
1617                                                 getAtomicFuncArgument<int32_t>(m_operation, gid, extendedGridSize));
1618                     }
1619 
1620                     if (!matchFound)
1621                         return false;
1622                 }
1623                 else
1624                     DE_ASSERT(false);
1625             }
1626     return true;
1627 }
1628 
1629 template <typename T>
isValueCorrect(const T resultValue,int32_t x,int32_t y,int32_t z,const UVec3 & gridSize,const IVec3 extendedGridSize) const1630 bool BinaryAtomicEndResultInstance::isValueCorrect(const T resultValue, int32_t x, int32_t y, int32_t z,
1631                                                    const UVec3 &gridSize, const IVec3 extendedGridSize) const
1632 {
1633     T reference = getOperationInitialValue<T>(m_operation);
1634     for (int32_t i = 0; i < static_cast<int32_t>(NUM_INVOCATIONS_PER_PIXEL); i++)
1635     {
1636         const IVec3 gid(x + i * gridSize.x(), y, z);
1637         T arg     = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1638         reference = computeBinaryAtomicOperationResult(m_operation, reference, arg);
1639     }
1640     return (resultValue == reference);
1641 }
1642 
createInstance(Context & context) const1643 TestInstance *BinaryAtomicEndResultCase::createInstance(Context &context) const
1644 {
1645     return new BinaryAtomicEndResultInstance(context, m_name, m_imageType, m_imageSize, m_format, m_tiling, m_operation,
1646                                              m_useTransfer, m_readType, m_backingType);
1647 }
1648 
1649 class BinaryAtomicIntermValuesInstance : public BinaryAtomicInstanceBase
1650 {
1651 public:
BinaryAtomicIntermValuesInstance(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1652     BinaryAtomicIntermValuesInstance(Context &context, const string &name, const ImageType imageType,
1653                                      const tcu::UVec3 &imageSize, const TextureFormat &format,
1654                                      const VkImageTiling tiling, const AtomicOperation operation,
1655                                      const bool useTransfer, const ShaderReadType shaderReadType,
1656                                      const ImageBackingType backingType)
1657         : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, tiling, operation, useTransfer,
1658                                    shaderReadType, backingType)
1659     {
1660     }
1661 
1662     virtual uint32_t getOutputBufferSize(void) const;
1663 
1664     virtual void prepareResources(const bool useTransfer);
1665     virtual void prepareDescriptors(const bool isTexelBuffer);
1666 
1667     virtual void commandsBeforeCompute(const VkCommandBuffer cmdBuffer) const;
1668     virtual void commandsAfterCompute(const VkCommandBuffer cmdBuffer, const VkPipeline pipeline,
1669                                       const VkPipelineLayout pipelineLayout, const VkDescriptorSet descriptorSet,
1670                                       const VkDeviceSize &range, const bool useTransfer);
1671 
1672     virtual bool verifyResult(Allocation &outputBufferAllocation, const bool is64Bit) const;
1673 
1674 protected:
1675     template <typename T>
1676     bool areValuesCorrect(tcu::ConstPixelBufferAccess &resultBuffer, const bool isFloatingPoint, int32_t x, int32_t y,
1677                           int32_t z, const UVec3 &gridSize, const IVec3 extendedGridSize) const;
1678 
1679     template <typename T>
1680     bool verifyRecursive(const int32_t index, const T valueSoFar, bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1681                          const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1682                          const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const;
1683     de::MovePtr<Image> m_intermResultsImage;
1684     Move<VkImageView> m_intermResultsImageView;
1685 };
1686 
getOutputBufferSize(void) const1687 uint32_t BinaryAtomicIntermValuesInstance::getOutputBufferSize(void) const
1688 {
1689     return NUM_INVOCATIONS_PER_PIXEL * tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1690 }
1691 
prepareResources(const bool useTransfer)1692 void BinaryAtomicIntermValuesInstance::prepareResources(const bool useTransfer)
1693 {
1694     const UVec3 layerSize       = getLayerSize(m_imageType, m_imageSize);
1695     const bool isCubeBasedImage = (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY);
1696     const UVec3 extendedLayerSize =
1697         isCubeBasedImage ?
1698             UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), NUM_INVOCATIONS_PER_PIXEL * layerSize.y(), layerSize.z()) :
1699             UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1700 
1701     createImageAndView(mapTextureFormat(m_format), extendedLayerSize, useTransfer, m_intermResultsImage,
1702                        m_intermResultsImageView);
1703 }
1704 
prepareDescriptors(const bool isTexelBuffer)1705 void BinaryAtomicIntermValuesInstance::prepareDescriptors(const bool isTexelBuffer)
1706 {
1707     const VkDescriptorType descriptorType =
1708         isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1709 
1710     const VkDevice device                  = m_context.getDevice();
1711     const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1712 
1713     m_descriptorSetLayout = DescriptorSetLayoutBuilder()
1714                                 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1715                                 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1716                                 .build(deviceInterface, device);
1717 
1718     m_descriptorPool = DescriptorPoolBuilder()
1719                            .addType(descriptorType, 2u)
1720                            .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1721 
1722     m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1723 
1724     if (isTexelBuffer)
1725     {
1726         m_descResultBufferView =
1727             makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1728         m_descIntermResultsBufferView =
1729             makeBufferView(deviceInterface, device, *(*m_outputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1730 
1731         DescriptorSetUpdateBuilder()
1732             .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType,
1733                          &(m_descResultBufferView.get()))
1734             .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType,
1735                          &(m_descIntermResultsBufferView.get()))
1736             .update(deviceInterface, device);
1737     }
1738     else
1739     {
1740         const VkDescriptorImageInfo descResultImageInfo =
1741             makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1742         const VkDescriptorImageInfo descIntermResultsImageInfo =
1743             makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1744 
1745         DescriptorSetUpdateBuilder()
1746             .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType,
1747                          &descResultImageInfo)
1748             .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType,
1749                          &descIntermResultsImageInfo)
1750             .update(deviceInterface, device);
1751     }
1752 }
1753 
commandsBeforeCompute(const VkCommandBuffer cmdBuffer) const1754 void BinaryAtomicIntermValuesInstance::commandsBeforeCompute(const VkCommandBuffer cmdBuffer) const
1755 {
1756     const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1757     const VkImageSubresourceRange subresourceRange =
1758         makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1759 
1760     const VkImageMemoryBarrier imagePreDispatchBarrier =
1761         makeImageMemoryBarrier(0u, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
1762                                m_intermResultsImage->get(), subresourceRange);
1763 
1764     deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1765                                        VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, false, 0u, DE_NULL, 0u, DE_NULL, 1u,
1766                                        &imagePreDispatchBarrier);
1767 }
1768 
commandsAfterCompute(const VkCommandBuffer cmdBuffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const bool useTransfer)1769 void BinaryAtomicIntermValuesInstance::commandsAfterCompute(const VkCommandBuffer cmdBuffer, const VkPipeline pipeline,
1770                                                             const VkPipelineLayout pipelineLayout,
1771                                                             const VkDescriptorSet descriptorSet,
1772                                                             const VkDeviceSize &range, const bool useTransfer)
1773 {
1774     // nothing is needed for texel image buffer
1775     if (m_imageType == IMAGE_TYPE_BUFFER)
1776         return;
1777 
1778     const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1779     const VkImageSubresourceRange subresourceRange =
1780         makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1781     const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1782 
1783     if (useTransfer)
1784     {
1785         const VkImageMemoryBarrier imagePostDispatchBarrier =
1786             makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL,
1787                                    VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_intermResultsImage->get(), subresourceRange);
1788 
1789         deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1790                                            VK_PIPELINE_STAGE_TRANSFER_BIT, false, 0u, DE_NULL, 0u, DE_NULL, 1u,
1791                                            &imagePostDispatchBarrier);
1792 
1793         const UVec3 extendedLayerSize = UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1794         const VkBufferImageCopy bufferImageCopyParams =
1795             makeBufferImageCopy(makeExtent3D(extendedLayerSize), getNumLayers(m_imageType, m_imageSize));
1796 
1797         deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_intermResultsImage->get(),
1798                                              VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u,
1799                                              &bufferImageCopyParams);
1800     }
1801     else
1802     {
1803         const VkDevice device = m_context.getDevice();
1804         const VkDescriptorImageInfo descResultImageInfo =
1805             makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1806         const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1807 
1808         DescriptorSetUpdateBuilder()
1809             .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
1810                          VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1811             .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
1812                          VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1813             .update(deviceInterface, device);
1814 
1815         const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1816             makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL,
1817                                    VK_IMAGE_LAYOUT_GENERAL, m_intermResultsImage->get(), subresourceRange);
1818 
1819         deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1820                                            VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, false, 0u, DE_NULL, 0u, DE_NULL, 1u,
1821                                            &resultImagePostDispatchBarrier);
1822 
1823         deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1824         deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u,
1825                                               &descriptorSet, 0u, DE_NULL);
1826 
1827         switch (m_imageType)
1828         {
1829         case IMAGE_TYPE_1D_ARRAY:
1830             deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(),
1831                                         subresourceRange.layerCount, layerSize.z());
1832             break;
1833         case IMAGE_TYPE_2D_ARRAY:
1834         case IMAGE_TYPE_CUBE:
1835         case IMAGE_TYPE_CUBE_ARRAY:
1836             deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(),
1837                                         subresourceRange.layerCount);
1838             break;
1839         default:
1840             deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(),
1841                                         layerSize.z());
1842             break;
1843         }
1844     }
1845 }
1846 
verifyResult(Allocation & outputBufferAllocation,const bool is64Bit) const1847 bool BinaryAtomicIntermValuesInstance::verifyResult(Allocation &outputBufferAllocation, const bool is64Bit) const
1848 {
1849     const UVec3 gridSize         = getShaderGridSize(m_imageType, m_imageSize);
1850     const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z());
1851 
1852     tcu::ConstPixelBufferAccess resultBuffer(m_format, extendedGridSize.x(), extendedGridSize.y(), extendedGridSize.z(),
1853                                              outputBufferAllocation.getHostPtr());
1854 
1855     for (int32_t z = 0; z < resultBuffer.getDepth(); z++)
1856         for (int32_t y = 0; y < resultBuffer.getHeight(); y++)
1857             for (uint32_t x = 0; x < gridSize.x(); x++)
1858             {
1859                 if (isUintFormat(mapTextureFormat(m_format)))
1860                 {
1861                     if (is64Bit)
1862                     {
1863                         if (!areValuesCorrect<uint64_t>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1864                             return false;
1865                     }
1866                     else
1867                     {
1868                         if (!areValuesCorrect<uint32_t>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1869                             return false;
1870                     }
1871                 }
1872                 else if (isIntFormat(mapTextureFormat(m_format)))
1873                 {
1874                     if (is64Bit)
1875                     {
1876                         if (!areValuesCorrect<int64_t>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1877                             return false;
1878                     }
1879                     else
1880                     {
1881                         if (!areValuesCorrect<int32_t>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1882                             return false;
1883                     }
1884                 }
1885                 else
1886                 {
1887                     // 32-bit floating point
1888                     if (!areValuesCorrect<int32_t>(resultBuffer, true, x, y, z, gridSize, extendedGridSize))
1889                         return false;
1890                 }
1891             }
1892 
1893     return true;
1894 }
1895 
1896 template <typename T>
areValuesCorrect(tcu::ConstPixelBufferAccess & resultBuffer,const bool isFloatingPoint,int32_t x,int32_t y,int32_t z,const UVec3 & gridSize,const IVec3 extendedGridSize) const1897 bool BinaryAtomicIntermValuesInstance::areValuesCorrect(tcu::ConstPixelBufferAccess &resultBuffer,
1898                                                         const bool isFloatingPoint, int32_t x, int32_t y, int32_t z,
1899                                                         const UVec3 &gridSize, const IVec3 extendedGridSize) const
1900 {
1901     T resultValues[NUM_INVOCATIONS_PER_PIXEL];
1902     T atomicArgs[NUM_INVOCATIONS_PER_PIXEL];
1903     bool argsUsed[NUM_INVOCATIONS_PER_PIXEL];
1904 
1905     for (int32_t i = 0; i < static_cast<int32_t>(NUM_INVOCATIONS_PER_PIXEL); i++)
1906     {
1907         IVec3 gid(x + i * gridSize.x(), y, z);
1908         T data = *((T *)resultBuffer.getPixelPtr(gid.x(), gid.y(), gid.z()));
1909         if (isFloatingPoint)
1910         {
1911             float fData;
1912             deMemcpy(&fData, &data, sizeof(fData));
1913             data = static_cast<T>(fData);
1914         }
1915         resultValues[i] = data;
1916         atomicArgs[i]   = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1917         argsUsed[i]     = false;
1918     }
1919 
1920     // Verify that the return values form a valid sequence.
1921     return verifyRecursive(0, getOperationInitialValue<T>(m_operation), argsUsed, atomicArgs, resultValues);
1922 }
1923 
1924 template <typename T>
verifyRecursive(const int32_t index,const T valueSoFar,bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const1925 bool BinaryAtomicIntermValuesInstance::verifyRecursive(const int32_t index, const T valueSoFar,
1926                                                        bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1927                                                        const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1928                                                        const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const
1929 {
1930     if (index >= static_cast<int32_t>(NUM_INVOCATIONS_PER_PIXEL))
1931         return true;
1932 
1933     for (int32_t i = 0; i < static_cast<int32_t>(NUM_INVOCATIONS_PER_PIXEL); i++)
1934     {
1935         if (!argsUsed[i] && resultValues[i] == valueSoFar)
1936         {
1937             argsUsed[i] = true;
1938 
1939             if (verifyRecursive(index + 1, computeBinaryAtomicOperationResult(m_operation, valueSoFar, atomicArgs[i]),
1940                                 argsUsed, atomicArgs, resultValues))
1941             {
1942                 return true;
1943             }
1944 
1945             argsUsed[i] = false;
1946         }
1947     }
1948 
1949     return false;
1950 }
1951 
createInstance(Context & context) const1952 TestInstance *BinaryAtomicIntermValuesCase::createInstance(Context &context) const
1953 {
1954     return new BinaryAtomicIntermValuesInstance(context, m_name, m_imageType, m_imageSize, m_format, m_tiling,
1955                                                 m_operation, m_useTransfer, m_readType, m_backingType);
1956 }
1957 
1958 } // namespace
1959 
createImageAtomicOperationTests(tcu::TestContext & testCtx)1960 tcu::TestCaseGroup *createImageAtomicOperationTests(tcu::TestContext &testCtx)
1961 {
1962     de::MovePtr<tcu::TestCaseGroup> imageAtomicOperationsTests(new tcu::TestCaseGroup(testCtx, "atomic_operations"));
1963 
1964     struct ImageParams
1965     {
1966         ImageParams(const ImageType imageType, const tcu::UVec3 &imageSize)
1967             : m_imageType(imageType)
1968             , m_imageSize(imageSize)
1969         {
1970         }
1971         const ImageType m_imageType;
1972         const tcu::UVec3 m_imageSize;
1973     };
1974 
1975     const ImageParams imageParamsArray[] = {ImageParams(IMAGE_TYPE_1D, tcu::UVec3(64u, 1u, 1u)),
1976                                             ImageParams(IMAGE_TYPE_1D_ARRAY, tcu::UVec3(64u, 1u, 8u)),
1977                                             ImageParams(IMAGE_TYPE_2D, tcu::UVec3(64u, 64u, 1u)),
1978                                             ImageParams(IMAGE_TYPE_2D_ARRAY, tcu::UVec3(64u, 64u, 8u)),
1979                                             ImageParams(IMAGE_TYPE_3D, tcu::UVec3(48u, 48u, 8u)),
1980                                             ImageParams(IMAGE_TYPE_CUBE, tcu::UVec3(64u, 64u, 1u)),
1981                                             ImageParams(IMAGE_TYPE_CUBE_ARRAY, tcu::UVec3(64u, 64u, 2u)),
1982                                             ImageParams(IMAGE_TYPE_BUFFER, tcu::UVec3(64u, 1u, 1u))};
1983 
1984     const tcu::TextureFormat formats[] = {tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT32),
1985                                           tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32),
1986                                           tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::FLOAT),
1987                                           tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT64),
1988                                           tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT64)};
1989 
1990     static const VkImageTiling s_tilings[] = {
1991         VK_IMAGE_TILING_OPTIMAL,
1992         VK_IMAGE_TILING_LINEAR,
1993     };
1994 
1995     const struct
1996     {
1997         ShaderReadType type;
1998         const char *name;
1999     } readTypes[] = {
2000         {ShaderReadType::NORMAL, "normal_read"},
2001 #ifndef CTS_USES_VULKANSC
2002         {ShaderReadType::SPARSE, "sparse_read"},
2003 #endif // CTS_USES_VULKANSC
2004     };
2005 
2006     const struct
2007     {
2008         ImageBackingType type;
2009         const char *name;
2010     } backingTypes[] = {
2011         {ImageBackingType::NORMAL, "normal_img"},
2012 #ifndef CTS_USES_VULKANSC
2013         {ImageBackingType::SPARSE, "sparse_img"},
2014 #endif // CTS_USES_VULKANSC
2015     };
2016 
2017     for (uint32_t operationI = 0; operationI < ATOMIC_OPERATION_LAST; operationI++)
2018     {
2019         const AtomicOperation operation = (AtomicOperation)operationI;
2020 
2021         de::MovePtr<tcu::TestCaseGroup> operationGroup(
2022             new tcu::TestCaseGroup(testCtx, getAtomicOperationCaseName(operation).c_str()));
2023 
2024         for (uint32_t imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParamsArray); imageTypeNdx++)
2025         {
2026             const ImageType imageType  = imageParamsArray[imageTypeNdx].m_imageType;
2027             const tcu::UVec3 imageSize = imageParamsArray[imageTypeNdx].m_imageSize;
2028 
2029             de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(
2030                 new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str()));
2031 
2032             for (int useTransferIdx = 0; useTransferIdx < 2; ++useTransferIdx)
2033             {
2034                 const bool useTransfer = (useTransferIdx > 0);
2035                 const string groupName = (!useTransfer ? "no" : "") + string("transfer");
2036 
2037                 de::MovePtr<tcu::TestCaseGroup> transferGroup(new tcu::TestCaseGroup(testCtx, groupName.c_str()));
2038 
2039                 for (int readTypeIdx = 0; readTypeIdx < DE_LENGTH_OF_ARRAY(readTypes); ++readTypeIdx)
2040                 {
2041                     const auto &readType = readTypes[readTypeIdx];
2042 
2043                     de::MovePtr<tcu::TestCaseGroup> readTypeGroup(new tcu::TestCaseGroup(testCtx, readType.name));
2044 
2045                     for (int backingTypeIdx = 0; backingTypeIdx < DE_LENGTH_OF_ARRAY(backingTypes); ++backingTypeIdx)
2046                     {
2047                         const auto &backingType = backingTypes[backingTypeIdx];
2048 
2049                         de::MovePtr<tcu::TestCaseGroup> backingTypeGroup(
2050                             new tcu::TestCaseGroup(testCtx, backingType.name));
2051 
2052                         for (uint32_t formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); formatNdx++)
2053                         {
2054                             for (int tilingNdx = 0; tilingNdx < DE_LENGTH_OF_ARRAY(s_tilings); tilingNdx++)
2055                             {
2056                                 const TextureFormat &format  = formats[formatNdx];
2057                                 const std::string formatName = getShaderImageFormatQualifier(format);
2058                                 const char *suffix = (s_tilings[tilingNdx] == VK_IMAGE_TILING_OPTIMAL) ? "" : "_linear";
2059 
2060                                 // Need SPIRV programs in vktImageAtomicSpirvShaders.cpp
2061                                 if (imageType == IMAGE_TYPE_BUFFER && (format.type != tcu::TextureFormat::FLOAT))
2062                                 {
2063                                     continue;
2064                                 }
2065 
2066                                 // Only 2D and 3D images may support sparse residency.
2067                                 // VK_IMAGE_TILING_LINEAR does not support sparse residency
2068                                 const auto vkImageType = mapImageType(imageType);
2069                                 if (backingType.type == ImageBackingType::SPARSE &&
2070                                     ((vkImageType != VK_IMAGE_TYPE_2D && vkImageType != VK_IMAGE_TYPE_3D) ||
2071                                      (s_tilings[tilingNdx] == VK_IMAGE_TILING_LINEAR)))
2072                                     continue;
2073 
2074                                 // Only some operations are supported on floating-point
2075                                 if (format.type == tcu::TextureFormat::FLOAT)
2076                                 {
2077                                     if (operation != ATOMIC_OPERATION_ADD &&
2078 #ifndef CTS_USES_VULKANSC
2079                                         operation != ATOMIC_OPERATION_MIN && operation != ATOMIC_OPERATION_MAX &&
2080 #endif // CTS_USES_VULKANSC
2081                                         operation != ATOMIC_OPERATION_EXCHANGE)
2082                                     {
2083                                         continue;
2084                                     }
2085                                 }
2086 
2087                                 if (readType.type == ShaderReadType::SPARSE)
2088                                 {
2089                                     // When using transfer, shader reads will not be used, so avoid creating two identical cases.
2090                                     if (useTransfer)
2091                                         continue;
2092 
2093                                     // Sparse reads are not supported for all types of images.
2094                                     if (imageType == IMAGE_TYPE_1D || imageType == IMAGE_TYPE_1D_ARRAY ||
2095                                         imageType == IMAGE_TYPE_BUFFER)
2096                                         continue;
2097                                 }
2098 
2099                                 //!< Atomic case checks the end result of the operations, and not the intermediate return values
2100                                 const string caseEndResult = formatName + "_end_result" + suffix;
2101                                 backingTypeGroup->addChild(new BinaryAtomicEndResultCase(
2102                                     testCtx, caseEndResult, imageType, imageSize, format, s_tilings[tilingNdx],
2103                                     operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2104 
2105                                 //!< Atomic case checks the return values of the atomic function and not the end result.
2106                                 const string caseIntermValues = formatName + "_intermediate_values" + suffix;
2107                                 backingTypeGroup->addChild(new BinaryAtomicIntermValuesCase(
2108                                     testCtx, caseIntermValues, imageType, imageSize, format, s_tilings[tilingNdx],
2109                                     operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2110                             }
2111                         }
2112 
2113                         readTypeGroup->addChild(backingTypeGroup.release());
2114                     }
2115 
2116                     transferGroup->addChild(readTypeGroup.release());
2117                 }
2118 
2119                 imageTypeGroup->addChild(transferGroup.release());
2120             }
2121 
2122             operationGroup->addChild(imageTypeGroup.release());
2123         }
2124 
2125         imageAtomicOperationsTests->addChild(operationGroup.release());
2126     }
2127 
2128     return imageAtomicOperationsTests.release();
2129 }
2130 
2131 } // namespace image
2132 } // namespace vkt
2133