1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2016 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file vktImageAtomicOperationTests.cpp
21 * \brief Image atomic operation tests
22 *//*--------------------------------------------------------------------*/
23
24 #include "vktImageAtomicOperationTests.hpp"
25 #include "vktImageAtomicSpirvShaders.hpp"
26
27 #include "deUniquePtr.hpp"
28 #include "deStringUtil.hpp"
29 #include "deSTLUtil.hpp"
30
31 #include "vktTestCaseUtil.hpp"
32 #include "vkPrograms.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkQueryUtil.hpp"
35 #include "vkBarrierUtil.hpp"
36 #include "vktImageTestsUtil.hpp"
37 #include "vkBuilderUtil.hpp"
38 #include "vkRef.hpp"
39 #include "vkRefUtil.hpp"
40 #include "vkTypeUtil.hpp"
41 #include "vkCmdUtil.hpp"
42 #include "vkObjUtil.hpp"
43 #include "vkBufferWithMemory.hpp"
44
45 #include "tcuTextureUtil.hpp"
46 #include "tcuTexture.hpp"
47 #include "tcuVectorType.hpp"
48 #include "tcuStringTemplate.hpp"
49
50 namespace vkt
51 {
52 namespace image
53 {
54 namespace
55 {
56
57 using namespace vk;
58 using namespace std;
59 using de::toString;
60
61 using tcu::ConstPixelBufferAccess;
62 using tcu::CubeFace;
63 using tcu::IVec2;
64 using tcu::IVec3;
65 using tcu::IVec4;
66 using tcu::PixelBufferAccess;
67 using tcu::TestContext;
68 using tcu::Texture1D;
69 using tcu::Texture2D;
70 using tcu::Texture2DArray;
71 using tcu::Texture3D;
72 using tcu::TextureCube;
73 using tcu::TextureFormat;
74 using tcu::UVec3;
75 using tcu::UVec4;
76 using tcu::Vec4;
77 using tcu::Vector;
78
79 enum
80 {
81 NUM_INVOCATIONS_PER_PIXEL = 5u
82 };
83
84 enum AtomicOperation
85 {
86 ATOMIC_OPERATION_ADD = 0,
87 ATOMIC_OPERATION_SUB,
88 ATOMIC_OPERATION_INC,
89 ATOMIC_OPERATION_DEC,
90 ATOMIC_OPERATION_MIN,
91 ATOMIC_OPERATION_MAX,
92 ATOMIC_OPERATION_AND,
93 ATOMIC_OPERATION_OR,
94 ATOMIC_OPERATION_XOR,
95 ATOMIC_OPERATION_EXCHANGE,
96 ATOMIC_OPERATION_COMPARE_EXCHANGE,
97
98 ATOMIC_OPERATION_LAST
99 };
100
101 enum class ShaderReadType
102 {
103 NORMAL = 0,
104 SPARSE,
105 };
106
107 enum class ImageBackingType
108 {
109 NORMAL = 0,
110 SPARSE,
111 };
112
getCoordStr(const ImageType imageType,const std::string & x,const std::string & y,const std::string & z)113 static string getCoordStr(const ImageType imageType, const std::string &x, const std::string &y, const std::string &z)
114 {
115 switch (imageType)
116 {
117 case IMAGE_TYPE_1D:
118 case IMAGE_TYPE_BUFFER:
119 return x;
120 case IMAGE_TYPE_1D_ARRAY:
121 case IMAGE_TYPE_2D:
122 return string("ivec2(" + x + "," + y + ")");
123 case IMAGE_TYPE_2D_ARRAY:
124 case IMAGE_TYPE_3D:
125 case IMAGE_TYPE_CUBE:
126 case IMAGE_TYPE_CUBE_ARRAY:
127 return string("ivec3(" + x + "," + y + "," + z + ")");
128 default:
129 DE_ASSERT(false);
130 return "";
131 }
132 }
133
getComponentTypeStr(uint32_t componentWidth,bool intFormat,bool uintFormat,bool floatFormat)134 static string getComponentTypeStr(uint32_t componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
135 {
136 DE_ASSERT(intFormat || uintFormat || floatFormat);
137
138 const bool is64 = (componentWidth == 64);
139
140 if (intFormat)
141 return (is64 ? "int64_t" : "int");
142 if (uintFormat)
143 return (is64 ? "uint64_t" : "uint");
144 if (floatFormat)
145 return (is64 ? "double" : "float");
146
147 return "";
148 }
149
getVec4TypeStr(uint32_t componentWidth,bool intFormat,bool uintFormat,bool floatFormat)150 static string getVec4TypeStr(uint32_t componentWidth, bool intFormat, bool uintFormat, bool floatFormat)
151 {
152 DE_ASSERT(intFormat || uintFormat || floatFormat);
153
154 const bool is64 = (componentWidth == 64);
155
156 if (intFormat)
157 return (is64 ? "i64vec4" : "ivec4");
158 if (uintFormat)
159 return (is64 ? "u64vec4" : "uvec4");
160 if (floatFormat)
161 return (is64 ? "f64vec4" : "vec4");
162
163 return "";
164 }
165
getAtomicFuncArgumentShaderStr(const AtomicOperation op,const string & x,const string & y,const string & z,const IVec3 & gridSize)166 static string getAtomicFuncArgumentShaderStr(const AtomicOperation op, const string &x, const string &y,
167 const string &z, const IVec3 &gridSize)
168 {
169 switch (op)
170 {
171 case ATOMIC_OPERATION_ADD:
172 case ATOMIC_OPERATION_AND:
173 case ATOMIC_OPERATION_OR:
174 case ATOMIC_OPERATION_XOR:
175 return string("(" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + ")");
176 case ATOMIC_OPERATION_MIN:
177 case ATOMIC_OPERATION_MAX:
178 // multiply by (1-2*(value % 2) to make half of the data negative
179 // this will result in generating large numbers for uint formats
180 return string("((1 - 2*(" + x + " % 2)) * (" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + "))");
181 case ATOMIC_OPERATION_EXCHANGE:
182 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
183 return string("((" + z + "*" + toString(gridSize.x()) + " + " + x + ")*" + toString(gridSize.y()) + " + " + y +
184 ")");
185 default:
186 DE_ASSERT(false);
187 return "";
188 }
189 }
190
getAtomicOperationCaseName(const AtomicOperation op)191 static string getAtomicOperationCaseName(const AtomicOperation op)
192 {
193 switch (op)
194 {
195 case ATOMIC_OPERATION_ADD:
196 return string("add");
197 case ATOMIC_OPERATION_SUB:
198 return string("sub");
199 case ATOMIC_OPERATION_INC:
200 return string("inc");
201 case ATOMIC_OPERATION_DEC:
202 return string("dec");
203 case ATOMIC_OPERATION_MIN:
204 return string("min");
205 case ATOMIC_OPERATION_MAX:
206 return string("max");
207 case ATOMIC_OPERATION_AND:
208 return string("and");
209 case ATOMIC_OPERATION_OR:
210 return string("or");
211 case ATOMIC_OPERATION_XOR:
212 return string("xor");
213 case ATOMIC_OPERATION_EXCHANGE:
214 return string("exchange");
215 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
216 return string("compare_exchange");
217 default:
218 DE_ASSERT(false);
219 return "";
220 }
221 }
222
getAtomicOperationShaderFuncName(const AtomicOperation op)223 static string getAtomicOperationShaderFuncName(const AtomicOperation op)
224 {
225 switch (op)
226 {
227 case ATOMIC_OPERATION_ADD:
228 return string("imageAtomicAdd");
229 case ATOMIC_OPERATION_MIN:
230 return string("imageAtomicMin");
231 case ATOMIC_OPERATION_MAX:
232 return string("imageAtomicMax");
233 case ATOMIC_OPERATION_AND:
234 return string("imageAtomicAnd");
235 case ATOMIC_OPERATION_OR:
236 return string("imageAtomicOr");
237 case ATOMIC_OPERATION_XOR:
238 return string("imageAtomicXor");
239 case ATOMIC_OPERATION_EXCHANGE:
240 return string("imageAtomicExchange");
241 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
242 return string("imageAtomicCompSwap");
243 default:
244 DE_ASSERT(false);
245 return "";
246 }
247 }
248
249 template <typename T>
getOperationInitialValue(const AtomicOperation op)250 T getOperationInitialValue(const AtomicOperation op)
251 {
252 switch (op)
253 {
254 // \note 18 is just an arbitrary small nonzero value.
255 case ATOMIC_OPERATION_ADD:
256 return 18;
257 case ATOMIC_OPERATION_INC:
258 return 18;
259 case ATOMIC_OPERATION_SUB:
260 return (1 << 24) - 1;
261 case ATOMIC_OPERATION_DEC:
262 return (1 << 24) - 1;
263 case ATOMIC_OPERATION_MIN:
264 return (1 << 15) - 1;
265 case ATOMIC_OPERATION_MAX:
266 return 18;
267 case ATOMIC_OPERATION_AND:
268 return (1 << 15) - 1;
269 case ATOMIC_OPERATION_OR:
270 return 18;
271 case ATOMIC_OPERATION_XOR:
272 return 18;
273 case ATOMIC_OPERATION_EXCHANGE:
274 return 18;
275 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
276 return 18;
277 default:
278 DE_ASSERT(false);
279 return 0xFFFFFFFF;
280 }
281 }
282
283 template <>
getOperationInitialValue(const AtomicOperation op)284 int64_t getOperationInitialValue<int64_t>(const AtomicOperation op)
285 {
286 switch (op)
287 {
288 // \note 0x000000BEFFFFFF18 is just an arbitrary nonzero value.
289 case ATOMIC_OPERATION_ADD:
290 return 0x000000BEFFFFFF18;
291 case ATOMIC_OPERATION_INC:
292 return 0x000000BEFFFFFF18;
293 case ATOMIC_OPERATION_SUB:
294 return (1ull << 56) - 1;
295 case ATOMIC_OPERATION_DEC:
296 return (1ull << 56) - 1;
297 case ATOMIC_OPERATION_MIN:
298 return (1ull << 47) - 1;
299 case ATOMIC_OPERATION_MAX:
300 return 0x000000BEFFFFFF18;
301 case ATOMIC_OPERATION_AND:
302 return (1ull << 47) - 1;
303 case ATOMIC_OPERATION_OR:
304 return 0x000000BEFFFFFF18;
305 case ATOMIC_OPERATION_XOR:
306 return 0x000000BEFFFFFF18;
307 case ATOMIC_OPERATION_EXCHANGE:
308 return 0x000000BEFFFFFF18;
309 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
310 return 0x000000BEFFFFFF18;
311 default:
312 DE_ASSERT(false);
313 return 0xFFFFFFFFFFFFFFFF;
314 }
315 }
316
317 template <>
getOperationInitialValue(const AtomicOperation op)318 uint64_t getOperationInitialValue<uint64_t>(const AtomicOperation op)
319 {
320 return (uint64_t)getOperationInitialValue<int64_t>(op);
321 }
322
323 template <typename T>
getAtomicFuncArgument(const AtomicOperation op,const IVec3 & invocationID,const IVec3 & gridSize)324 static T getAtomicFuncArgument(const AtomicOperation op, const IVec3 &invocationID, const IVec3 &gridSize)
325 {
326 const T x = static_cast<T>(invocationID.x());
327 const T y = static_cast<T>(invocationID.y());
328 const T z = static_cast<T>(invocationID.z());
329
330 switch (op)
331 {
332 // \note Fall-throughs.
333 case ATOMIC_OPERATION_ADD:
334 case ATOMIC_OPERATION_SUB:
335 case ATOMIC_OPERATION_AND:
336 case ATOMIC_OPERATION_OR:
337 case ATOMIC_OPERATION_XOR:
338 return x * x + y * y + z * z;
339 case ATOMIC_OPERATION_INC:
340 case ATOMIC_OPERATION_DEC:
341 return 1;
342 case ATOMIC_OPERATION_MIN:
343 case ATOMIC_OPERATION_MAX:
344 // multiply half of the data by -1
345 return (1 - 2 * (x % 2)) * (x * x + y * y + z * z);
346 case ATOMIC_OPERATION_EXCHANGE:
347 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
348 return (z * static_cast<T>(gridSize.x()) + x) * static_cast<T>(gridSize.y()) + y;
349 default:
350 DE_ASSERT(false);
351 return -1;
352 }
353 }
354
355 //! An order-independent operation is one for which the end result doesn't depend on the order in which the operations are carried (i.e. is both commutative and associative).
isOrderIndependentAtomicOperation(const AtomicOperation op)356 static bool isOrderIndependentAtomicOperation(const AtomicOperation op)
357 {
358 return op == ATOMIC_OPERATION_ADD || op == ATOMIC_OPERATION_SUB || op == ATOMIC_OPERATION_INC ||
359 op == ATOMIC_OPERATION_DEC || op == ATOMIC_OPERATION_MIN || op == ATOMIC_OPERATION_MAX ||
360 op == ATOMIC_OPERATION_AND || op == ATOMIC_OPERATION_OR || op == ATOMIC_OPERATION_XOR;
361 }
362
363 //! Checks if the operation needs an SPIR-V shader.
isSpirvAtomicOperation(const AtomicOperation op)364 static bool isSpirvAtomicOperation(const AtomicOperation op)
365 {
366 return op == ATOMIC_OPERATION_SUB || op == ATOMIC_OPERATION_INC || op == ATOMIC_OPERATION_DEC;
367 }
368
369 //! Returns the SPIR-V assembler name of the given operation.
getSpirvAtomicOpName(const AtomicOperation op)370 static std::string getSpirvAtomicOpName(const AtomicOperation op)
371 {
372 switch (op)
373 {
374 case ATOMIC_OPERATION_SUB:
375 return "OpAtomicISub";
376 case ATOMIC_OPERATION_INC:
377 return "OpAtomicIIncrement";
378 case ATOMIC_OPERATION_DEC:
379 return "OpAtomicIDecrement";
380 default:
381 break;
382 }
383
384 DE_ASSERT(false);
385 return "";
386 }
387
388 //! Returns true if the given SPIR-V operation does not need the last argument, compared to OpAtomicIAdd.
isSpirvAtomicNoLastArgOp(const AtomicOperation op)389 static bool isSpirvAtomicNoLastArgOp(const AtomicOperation op)
390 {
391 switch (op)
392 {
393 case ATOMIC_OPERATION_SUB:
394 return false;
395 case ATOMIC_OPERATION_INC: // fallthrough
396 case ATOMIC_OPERATION_DEC:
397 return true;
398 default:
399 break;
400 }
401
402 DE_ASSERT(false);
403 return false;
404 }
405
406 //! Computes the result of an atomic operation where "a" is the data operated on and "b" is the parameter to the atomic function.
407 template <typename T>
computeBinaryAtomicOperationResult(const AtomicOperation op,const T a,const T b)408 static T computeBinaryAtomicOperationResult(const AtomicOperation op, const T a, const T b)
409 {
410 switch (op)
411 {
412 case ATOMIC_OPERATION_INC: // fallthrough.
413 case ATOMIC_OPERATION_ADD:
414 return a + b;
415 case ATOMIC_OPERATION_DEC: // fallthrough.
416 case ATOMIC_OPERATION_SUB:
417 return a - b;
418 case ATOMIC_OPERATION_MIN:
419 return de::min(a, b);
420 case ATOMIC_OPERATION_MAX:
421 return de::max(a, b);
422 case ATOMIC_OPERATION_AND:
423 return a & b;
424 case ATOMIC_OPERATION_OR:
425 return a | b;
426 case ATOMIC_OPERATION_XOR:
427 return a ^ b;
428 case ATOMIC_OPERATION_EXCHANGE:
429 return b;
430 case ATOMIC_OPERATION_COMPARE_EXCHANGE:
431 return (a == (sizeof(T) == 8 ? 0xBEFFFFFF18 : 18)) ? b : a;
432 default:
433 DE_ASSERT(false);
434 return -1;
435 }
436 }
437
getUsageFlags(bool useTransfer)438 VkImageUsageFlags getUsageFlags(bool useTransfer)
439 {
440 VkImageUsageFlags usageFlags = VK_IMAGE_USAGE_STORAGE_BIT;
441
442 if (useTransfer)
443 usageFlags |= (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
444
445 return usageFlags;
446 }
447
AddFillReadShader(SourceCollections & sourceCollections,const ImageType & imageType,const tcu::TextureFormat & format,const string & componentType,const string & vec4Type)448 void AddFillReadShader(SourceCollections &sourceCollections, const ImageType &imageType,
449 const tcu::TextureFormat &format, const string &componentType, const string &vec4Type)
450 {
451 const string imageInCoord = getCoordStr(imageType, "gx", "gy", "gz");
452 const string shaderImageFormatStr = getShaderImageFormatQualifier(format);
453 const string shaderImageTypeStr = getShaderImageType(format, imageType);
454 const auto componentWidth = getFormatComponentWidth(mapTextureFormat(format), 0u);
455 const string extensions =
456 ((componentWidth == 64u) ? "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
457 "#extension GL_EXT_shader_image_int64 : require\n" :
458 "");
459
460 const string fillShader =
461 "#version 450\n" + extensions + "precision highp " + shaderImageTypeStr +
462 ";\n"
463 "\n"
464 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
465 "layout (" +
466 shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr +
467 " u_resultImage;\n"
468 "\n"
469 "layout(std430, binding = 1) buffer inputBuffer\n"
470 "{\n"
471 " " +
472 componentType +
473 " data[];\n"
474 "} inBuffer;\n"
475 "\n"
476 "void main(void)\n"
477 "{\n"
478 " int gx = int(gl_GlobalInvocationID.x);\n"
479 " int gy = int(gl_GlobalInvocationID.y);\n"
480 " int gz = int(gl_GlobalInvocationID.z);\n"
481 " uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
482 " imageStore(u_resultImage, " +
483 imageInCoord + ", " + vec4Type +
484 "(inBuffer.data[index]));\n"
485 "}\n";
486
487 const string readShader =
488 "#version 450\n" + extensions + "precision highp " + shaderImageTypeStr +
489 ";\n"
490 "\n"
491 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
492 "layout (" +
493 shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr +
494 " u_resultImage;\n"
495 "\n"
496 "layout(std430, binding = 1) buffer outputBuffer\n"
497 "{\n"
498 " " +
499 componentType +
500 " data[];\n"
501 "} outBuffer;\n"
502 "\n"
503 "void main(void)\n"
504 "{\n"
505 " int gx = int(gl_GlobalInvocationID.x);\n"
506 " int gy = int(gl_GlobalInvocationID.y);\n"
507 " int gz = int(gl_GlobalInvocationID.z);\n"
508 " uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
509 " outBuffer.data[index] = imageLoad(u_resultImage, " +
510 imageInCoord +
511 ").x;\n"
512 "}\n";
513
514 if ((imageType != IMAGE_TYPE_1D) && (imageType != IMAGE_TYPE_1D_ARRAY) && (imageType != IMAGE_TYPE_BUFFER))
515 {
516 const string readShaderResidency =
517 "#version 450\n"
518 "#extension GL_ARB_sparse_texture2 : require\n" +
519 extensions + "precision highp " + shaderImageTypeStr +
520 ";\n"
521 "\n"
522 "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
523 "layout (" +
524 shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr +
525 " u_resultImage;\n"
526 "\n"
527 "layout(std430, binding = 1) buffer outputBuffer\n"
528 "{\n"
529 " " +
530 componentType +
531 " data[];\n"
532 "} outBuffer;\n"
533 "\n"
534 "void main(void)\n"
535 "{\n"
536 " int gx = int(gl_GlobalInvocationID.x);\n"
537 " int gy = int(gl_GlobalInvocationID.y);\n"
538 " int gz = int(gl_GlobalInvocationID.z);\n"
539 " uint index = gx + (gy * gl_NumWorkGroups.x) + (gz *gl_NumWorkGroups.x * gl_NumWorkGroups.y);\n"
540 " outBuffer.data[index] = imageLoad(u_resultImage, " +
541 imageInCoord +
542 ").x;\n"
543 " " +
544 vec4Type +
545 " sparseValue;\n"
546 " sparseImageLoadARB(u_resultImage, " +
547 imageInCoord +
548 ", sparseValue);\n"
549 " if (outBuffer.data[index] != sparseValue.x)\n"
550 " outBuffer.data[index] = " +
551 vec4Type +
552 "(1234).x;\n"
553 "}\n";
554
555 sourceCollections.glslSources.add("readShaderResidency")
556 << glu::ComputeSource(readShaderResidency.c_str())
557 << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
558 }
559
560 sourceCollections.glslSources.add("fillShader")
561 << glu::ComputeSource(fillShader.c_str())
562 << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
563 sourceCollections.glslSources.add("readShader")
564 << glu::ComputeSource(readShader.c_str())
565 << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
566 }
567
568 //! Prepare the initial data for the image
initDataForImage(const VkDevice device,const DeviceInterface & deviceInterface,const TextureFormat & format,const AtomicOperation operation,const tcu::UVec3 & gridSize,BufferWithMemory & buffer)569 static void initDataForImage(const VkDevice device, const DeviceInterface &deviceInterface, const TextureFormat &format,
570 const AtomicOperation operation, const tcu::UVec3 &gridSize, BufferWithMemory &buffer)
571 {
572 Allocation &bufferAllocation = buffer.getAllocation();
573 const VkFormat imageFormat = mapTextureFormat(format);
574 tcu::PixelBufferAccess pixelBuffer(format, gridSize.x(), gridSize.y(), gridSize.z(), bufferAllocation.getHostPtr());
575
576 if (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)
577 {
578 const int64_t initialValue(getOperationInitialValue<int64_t>(operation));
579
580 for (uint32_t z = 0; z < gridSize.z(); z++)
581 for (uint32_t y = 0; y < gridSize.y(); y++)
582 for (uint32_t x = 0; x < gridSize.x(); x++)
583 {
584 *((int64_t *)pixelBuffer.getPixelPtr(x, y, z)) = initialValue;
585 }
586 }
587 else
588 {
589 const tcu::IVec4 initialValue(getOperationInitialValue<int32_t>(operation));
590
591 for (uint32_t z = 0; z < gridSize.z(); z++)
592 for (uint32_t y = 0; y < gridSize.y(); y++)
593 for (uint32_t x = 0; x < gridSize.x(); x++)
594 {
595 pixelBuffer.setPixel(initialValue, x, y, z);
596 }
597 }
598
599 flushAlloc(deviceInterface, device, bufferAllocation);
600 }
601
commonCheckSupport(Context & context,const tcu::TextureFormat & tcuFormat,VkImageTiling tiling,ImageType imageType,const tcu::UVec3 & imageSize,AtomicOperation operation,bool useTransfer,ShaderReadType readType,ImageBackingType backingType)602 void commonCheckSupport(Context &context, const tcu::TextureFormat &tcuFormat, VkImageTiling tiling,
603 ImageType imageType, const tcu::UVec3 &imageSize, AtomicOperation operation, bool useTransfer,
604 ShaderReadType readType, ImageBackingType backingType)
605 {
606 const VkFormat format = mapTextureFormat(tcuFormat);
607 const VkImageType vkImgType = mapImageType(imageType);
608 const VkFormatFeatureFlags texelBufferSupport =
609 (VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT);
610
611 const auto &vki = context.getInstanceInterface();
612 const auto physicalDevice = context.getPhysicalDevice();
613 const auto usageFlags = getUsageFlags(useTransfer);
614
615 VkImageFormatProperties vkImageFormatProperties;
616 const auto result = vki.getPhysicalDeviceImageFormatProperties(physicalDevice, format, vkImgType, tiling,
617 usageFlags, 0, &vkImageFormatProperties);
618 if (result != VK_SUCCESS)
619 {
620 if (result == VK_ERROR_FORMAT_NOT_SUPPORTED)
621 TCU_THROW(NotSupportedError, "Format unsupported for tiling");
622 else
623 TCU_FAIL("vkGetPhysicalDeviceImageFormatProperties returned unexpected error");
624 }
625
626 if (vkImageFormatProperties.maxArrayLayers < (uint32_t)getNumLayers(imageType, imageSize))
627 {
628 TCU_THROW(NotSupportedError, "This format and tiling combination does not support this number of aray layers");
629 }
630
631 const VkFormatProperties formatProperties =
632 getPhysicalDeviceFormatProperties(context.getInstanceInterface(), context.getPhysicalDevice(), format);
633 if ((imageType == IMAGE_TYPE_BUFFER) &&
634 ((formatProperties.bufferFeatures & texelBufferSupport) != texelBufferSupport))
635 TCU_THROW(NotSupportedError, "Atomic storage texel buffers not supported");
636
637 const VkFormatFeatureFlags requiredFeaturesLinear =
638 (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
639 if (tiling == vk::VK_IMAGE_TILING_LINEAR &&
640 ((formatProperties.linearTilingFeatures & requiredFeaturesLinear) != requiredFeaturesLinear))
641 {
642 TCU_THROW(NotSupportedError, "Format doesn't support atomic storage with linear tiling");
643 }
644
645 if (imageType == IMAGE_TYPE_CUBE_ARRAY)
646 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_IMAGE_CUBE_ARRAY);
647
648 #ifndef CTS_USES_VULKANSC
649 if (backingType == ImageBackingType::SPARSE)
650 {
651 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_BINDING);
652
653 switch (vkImgType)
654 {
655 case VK_IMAGE_TYPE_2D:
656 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE2D);
657 break;
658 case VK_IMAGE_TYPE_3D:
659 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_IMAGE3D);
660 break;
661 default:
662 DE_ASSERT(false);
663 break;
664 }
665
666 if (!checkSparseImageFormatSupport(context.getPhysicalDevice(), context.getInstanceInterface(), format,
667 vkImgType, VK_SAMPLE_COUNT_1_BIT, usageFlags, tiling))
668 TCU_THROW(NotSupportedError, "Format does not support sparse images");
669 }
670 #endif // CTS_USES_VULKANSC
671
672 if (isFloatFormat(format))
673 {
674 context.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
675
676 const VkFormatFeatureFlags requiredFeatures =
677 (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
678 const auto &atomicFloatFeatures = context.getShaderAtomicFloatFeaturesEXT();
679
680 if (!atomicFloatFeatures.shaderImageFloat32Atomics)
681 TCU_THROW(NotSupportedError, "shaderImageFloat32Atomics not supported");
682
683 if ((operation == ATOMIC_OPERATION_ADD) && !atomicFloatFeatures.shaderImageFloat32AtomicAdd)
684 TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicAdd not supported");
685
686 if (operation == ATOMIC_OPERATION_MIN || operation == ATOMIC_OPERATION_MAX)
687 {
688 context.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
689 #ifndef CTS_USES_VULKANSC
690 if (!context.getShaderAtomicFloat2FeaturesEXT().shaderImageFloat32AtomicMinMax)
691 {
692 TCU_THROW(NotSupportedError, "shaderImageFloat32AtomicMinMax not supported");
693 }
694 #endif // CTS_USES_VULKANSC
695 }
696
697 if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
698 TCU_FAIL("Required format feature bits not supported");
699
700 if (backingType == ImageBackingType::SPARSE)
701 {
702 if (!atomicFloatFeatures.sparseImageFloat32Atomics)
703 TCU_THROW(NotSupportedError, "sparseImageFloat32Atomics not supported");
704
705 if (operation == ATOMIC_OPERATION_ADD && !atomicFloatFeatures.sparseImageFloat32AtomicAdd)
706 TCU_THROW(NotSupportedError, "sparseImageFloat32AtomicAdd not supported");
707 }
708 }
709 else if (format == VK_FORMAT_R64_UINT || format == VK_FORMAT_R64_SINT)
710 {
711 context.requireDeviceFunctionality("VK_EXT_shader_image_atomic_int64");
712
713 const VkFormatFeatureFlags requiredFeatures =
714 (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT);
715 const auto &atomicInt64Features = context.getShaderImageAtomicInt64FeaturesEXT();
716
717 if (!atomicInt64Features.shaderImageInt64Atomics)
718 TCU_THROW(NotSupportedError, "shaderImageInt64Atomics not supported");
719
720 if (backingType == ImageBackingType::SPARSE && !atomicInt64Features.sparseImageInt64Atomics)
721 TCU_THROW(NotSupportedError, "sparseImageInt64Atomics not supported");
722
723 if ((formatProperties.optimalTilingFeatures & requiredFeatures) != requiredFeatures)
724 TCU_FAIL("Mandatory format features not supported");
725 }
726
727 if (useTransfer)
728 {
729 const VkFormatFeatureFlags transferFeatures =
730 (VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT);
731 if ((formatProperties.optimalTilingFeatures & transferFeatures) != transferFeatures)
732 TCU_THROW(NotSupportedError, "Transfer features not supported for this format");
733 }
734
735 if (readType == ShaderReadType::SPARSE)
736 {
737 DE_ASSERT(imageType != IMAGE_TYPE_1D && imageType != IMAGE_TYPE_1D_ARRAY && imageType != IMAGE_TYPE_BUFFER);
738 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_RESOURCE_RESIDENCY);
739 }
740 }
741
742 class BinaryAtomicEndResultCase : public vkt::TestCase
743 {
744 public:
745 BinaryAtomicEndResultCase(tcu::TestContext &testCtx, const string &name, const ImageType imageType,
746 const tcu::UVec3 &imageSize, const tcu::TextureFormat &format, const VkImageTiling tiling,
747 const AtomicOperation operation, const bool useTransfer,
748 const ShaderReadType shaderReadType, const ImageBackingType backingType,
749 const glu::GLSLVersion glslVersion);
750
751 void initPrograms(SourceCollections &sourceCollections) const;
752 TestInstance *createInstance(Context &context) const;
753 virtual void checkSupport(Context &context) const;
754
755 private:
756 const ImageType m_imageType;
757 const tcu::UVec3 m_imageSize;
758 const tcu::TextureFormat m_format;
759 const VkImageTiling m_tiling;
760 const AtomicOperation m_operation;
761 const bool m_useTransfer;
762 const ShaderReadType m_readType;
763 const ImageBackingType m_backingType;
764 const glu::GLSLVersion m_glslVersion;
765 };
766
BinaryAtomicEndResultCase(tcu::TestContext & testCtx,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const tcu::TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType,const glu::GLSLVersion glslVersion)767 BinaryAtomicEndResultCase::BinaryAtomicEndResultCase(tcu::TestContext &testCtx, const string &name,
768 const ImageType imageType, const tcu::UVec3 &imageSize,
769 const tcu::TextureFormat &format, const VkImageTiling tiling,
770 const AtomicOperation operation, const bool useTransfer,
771 const ShaderReadType shaderReadType,
772 const ImageBackingType backingType,
773 const glu::GLSLVersion glslVersion)
774 : TestCase(testCtx, name)
775 , m_imageType(imageType)
776 , m_imageSize(imageSize)
777 , m_format(format)
778 , m_tiling(tiling)
779 , m_operation(operation)
780 , m_useTransfer(useTransfer)
781 , m_readType(shaderReadType)
782 , m_backingType(backingType)
783 , m_glslVersion(glslVersion)
784 {
785 }
786
checkSupport(Context & context) const787 void BinaryAtomicEndResultCase::checkSupport(Context &context) const
788 {
789 commonCheckSupport(context, m_format, m_tiling, m_imageType, m_imageSize, m_operation, m_useTransfer, m_readType,
790 m_backingType);
791 }
792
initPrograms(SourceCollections & sourceCollections) const793 void BinaryAtomicEndResultCase::initPrograms(SourceCollections &sourceCollections) const
794 {
795 const VkFormat imageFormat = mapTextureFormat(m_format);
796 const uint32_t componentWidth = getFormatComponentWidth(imageFormat, 0);
797 const bool intFormat = isIntFormat(imageFormat);
798 const bool uintFormat = isUintFormat(imageFormat);
799 const bool floatFormat = isFloatFormat(imageFormat);
800 const string type = getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
801 const string vec4Type = getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
802
803 if (!m_useTransfer)
804 {
805 AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
806 }
807
808 if (isSpirvAtomicOperation(m_operation))
809 {
810 const CaseVariant caseVariant{m_imageType, m_format.order, m_format.type, CaseVariant::CHECK_TYPE_END_RESULTS};
811 const tcu::StringTemplate shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
812 std::map<std::string, std::string> specializations;
813
814 specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
815 if (isSpirvAtomicNoLastArgOp(m_operation))
816 specializations["LASTARG"] = "";
817
818 sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
819 }
820 else
821 {
822 const string versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
823
824 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
825 const string atomicCoord = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
826
827 const string atomicArgExpr =
828 type +
829 getAtomicFuncArgumentShaderStr(m_operation, "gx", "gy", "gz",
830 IVec3(NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z()));
831
832 const string compareExchangeStr = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
833 (componentWidth == 64 ? ", 820338753304" : ", 18") +
834 string(uintFormat ? "u" : "") +
835 string(componentWidth == 64 ? "l" : "") :
836 "";
837 const string atomicInvocation = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " +
838 atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
839 const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
840 const string shaderImageTypeStr = getShaderImageType(m_format, m_imageType);
841 const string extensions = "#extension GL_EXT_shader_atomic_float : enable\n"
842 "#extension GL_EXT_shader_atomic_float2 : enable\n"
843 "#extension GL_KHR_memory_scope_semantics : enable";
844
845 string source = versionDecl + "\n" + extensions + "\n";
846
847 if (64 == componentWidth)
848 {
849 source += "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
850 "#extension GL_EXT_shader_image_int64 : require\n";
851 }
852
853 source += "precision highp " + shaderImageTypeStr +
854 ";\n"
855 "\n"
856 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
857 "layout (" +
858 shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr +
859 " u_resultImage;\n"
860 "\n"
861 "void main (void)\n"
862 "{\n"
863 " int gx = int(gl_GlobalInvocationID.x);\n"
864 " int gy = int(gl_GlobalInvocationID.y);\n"
865 " int gz = int(gl_GlobalInvocationID.z);\n"
866 " " +
867 atomicInvocation +
868 ";\n"
869 "}\n";
870
871 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
872 }
873 }
874
875 class BinaryAtomicIntermValuesCase : public vkt::TestCase
876 {
877 public:
878 BinaryAtomicIntermValuesCase(tcu::TestContext &testCtx, const string &name, const ImageType imageType,
879 const tcu::UVec3 &imageSize, const tcu::TextureFormat &format,
880 const VkImageTiling tiling, const AtomicOperation operation, const bool useTransfer,
881 const ShaderReadType shaderReadType, const ImageBackingType backingType,
882 const glu::GLSLVersion glslVersion);
883
884 void initPrograms(SourceCollections &sourceCollections) const;
885 TestInstance *createInstance(Context &context) const;
886 virtual void checkSupport(Context &context) const;
887
888 private:
889 const ImageType m_imageType;
890 const tcu::UVec3 m_imageSize;
891 const tcu::TextureFormat m_format;
892 const VkImageTiling m_tiling;
893 const AtomicOperation m_operation;
894 const bool m_useTransfer;
895 const ShaderReadType m_readType;
896 const ImageBackingType m_backingType;
897 const glu::GLSLVersion m_glslVersion;
898 };
899
BinaryAtomicIntermValuesCase(TestContext & testCtx,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType,const glu::GLSLVersion glslVersion)900 BinaryAtomicIntermValuesCase::BinaryAtomicIntermValuesCase(
901 TestContext &testCtx, const string &name, const ImageType imageType, const tcu::UVec3 &imageSize,
902 const TextureFormat &format, const VkImageTiling tiling, const AtomicOperation operation, const bool useTransfer,
903 const ShaderReadType shaderReadType, const ImageBackingType backingType, const glu::GLSLVersion glslVersion)
904 : TestCase(testCtx, name)
905 , m_imageType(imageType)
906 , m_imageSize(imageSize)
907 , m_format(format)
908 , m_tiling(tiling)
909 , m_operation(operation)
910 , m_useTransfer(useTransfer)
911 , m_readType(shaderReadType)
912 , m_backingType(backingType)
913 , m_glslVersion(glslVersion)
914 {
915 }
916
checkSupport(Context & context) const917 void BinaryAtomicIntermValuesCase::checkSupport(Context &context) const
918 {
919 commonCheckSupport(context, m_format, m_tiling, m_imageType, m_imageSize, m_operation, m_useTransfer, m_readType,
920 m_backingType);
921 }
922
initPrograms(SourceCollections & sourceCollections) const923 void BinaryAtomicIntermValuesCase::initPrograms(SourceCollections &sourceCollections) const
924 {
925 const VkFormat imageFormat = mapTextureFormat(m_format);
926 const uint32_t componentWidth = getFormatComponentWidth(imageFormat, 0);
927 const bool intFormat = isIntFormat(imageFormat);
928 const bool uintFormat = isUintFormat(imageFormat);
929 const bool floatFormat = isFloatFormat(imageFormat);
930 const string type = getComponentTypeStr(componentWidth, intFormat, uintFormat, floatFormat);
931 const string vec4Type = getVec4TypeStr(componentWidth, intFormat, uintFormat, floatFormat);
932
933 if (!m_useTransfer)
934 {
935 AddFillReadShader(sourceCollections, m_imageType, m_format, type, vec4Type);
936 }
937
938 if (isSpirvAtomicOperation(m_operation))
939 {
940 const CaseVariant caseVariant{m_imageType, m_format.order, m_format.type,
941 CaseVariant::CHECK_TYPE_INTERMEDIATE_RESULTS};
942 const tcu::StringTemplate shaderTemplate{getSpirvAtomicOpShader(caseVariant)};
943 std::map<std::string, std::string> specializations;
944
945 specializations["OPNAME"] = getSpirvAtomicOpName(m_operation);
946 if (isSpirvAtomicNoLastArgOp(m_operation))
947 specializations["LASTARG"] = "";
948
949 sourceCollections.spirvAsmSources.add(m_name) << shaderTemplate.specialize(specializations);
950 }
951 else
952 {
953 const string versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
954 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
955 const string atomicCoord = getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
956 const string invocationCoord = getCoordStr(m_imageType, "gx", "gy", "gz");
957 const string atomicArgExpr =
958 type +
959 getAtomicFuncArgumentShaderStr(m_operation, "gx", "gy", "gz",
960 IVec3(NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z()));
961
962 const string compareExchangeStr = (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE) ?
963 (componentWidth == 64 ? ", 820338753304" : ", 18") +
964 string(uintFormat ? "u" : "") +
965 string(componentWidth == 64 ? "l" : "") :
966 "";
967 const string atomicInvocation = getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " +
968 atomicCoord + compareExchangeStr + ", " + atomicArgExpr + ")";
969 const string shaderImageFormatStr = getShaderImageFormatQualifier(m_format);
970 const string shaderImageTypeStr = getShaderImageType(m_format, m_imageType);
971 const string extensions = "#extension GL_EXT_shader_atomic_float : enable\n"
972 "#extension GL_EXT_shader_atomic_float2 : enable\n"
973 "#extension GL_KHR_memory_scope_semantics : enable";
974
975 string source = versionDecl + "\n" + extensions +
976 "\n"
977 "\n";
978
979 if (64 == componentWidth)
980 {
981 source += "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
982 "#extension GL_EXT_shader_image_int64 : require\n";
983 }
984
985 source += "precision highp " + shaderImageTypeStr +
986 "; \n"
987 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
988 "layout (" +
989 shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr +
990 " u_resultImage;\n"
991 "layout (" +
992 shaderImageFormatStr + ", binding=1) writeonly uniform " + shaderImageTypeStr +
993 " u_intermValuesImage;\n"
994 "\n"
995 "void main (void)\n"
996 "{\n"
997 " int gx = int(gl_GlobalInvocationID.x);\n"
998 " int gy = int(gl_GlobalInvocationID.y);\n"
999 " int gz = int(gl_GlobalInvocationID.z);\n"
1000 " imageStore(u_intermValuesImage, " +
1001 invocationCoord + ", " + vec4Type + "(" + atomicInvocation +
1002 "));\n"
1003 "}\n";
1004
1005 sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
1006 }
1007 }
1008
1009 class BinaryAtomicInstanceBase : public vkt::TestInstance
1010 {
1011 public:
1012 BinaryAtomicInstanceBase(Context &context, const string &name, const ImageType imageType,
1013 const tcu::UVec3 &imageSize, const TextureFormat &format, const VkImageTiling tiling,
1014 const AtomicOperation operation, const bool useTransfer,
1015 const ShaderReadType shaderReadType, const ImageBackingType backingType);
1016
1017 tcu::TestStatus iterate(void);
1018
1019 virtual uint32_t getOutputBufferSize(void) const = 0;
1020
1021 virtual void prepareResources(const bool useTransfer) = 0;
1022 virtual void prepareDescriptors(const bool isTexelBuffer) = 0;
1023
1024 virtual void commandsBeforeCompute(const VkCommandBuffer cmdBuffer) const = 0;
1025 virtual void commandsAfterCompute(const VkCommandBuffer cmdBuffer, const VkPipeline pipeline,
1026 const VkPipelineLayout pipelineLayout, const VkDescriptorSet descriptorSet,
1027 const VkDeviceSize &range, const bool useTransfer) = 0;
1028
1029 virtual bool verifyResult(Allocation &outputBufferAllocation, const bool is64Bit) const = 0;
1030
1031 protected:
1032 void shaderFillImage(const VkCommandBuffer cmdBuffer, const VkBuffer &buffer, const VkPipeline pipeline,
1033 const VkPipelineLayout pipelineLayout, const VkDescriptorSet descriptorSet,
1034 const VkDeviceSize &range, const tcu::UVec3 &gridSize);
1035
1036 void createImageAndView(VkFormat imageFormat, const tcu::UVec3 &imageExent, bool useTransfer,
1037 de::MovePtr<Image> &imagePtr, Move<VkImageView> &imageViewPtr);
1038
1039 void createImageResources(const VkFormat &imageFormat, const bool useTransfer);
1040
1041 const string m_name;
1042 const ImageType m_imageType;
1043 const tcu::UVec3 m_imageSize;
1044 const TextureFormat m_format;
1045 const VkImageTiling m_tiling;
1046 const AtomicOperation m_operation;
1047 const bool m_useTransfer;
1048 const ShaderReadType m_readType;
1049 const ImageBackingType m_backingType;
1050
1051 de::MovePtr<BufferWithMemory> m_inputBuffer;
1052 de::MovePtr<BufferWithMemory> m_outputBuffer;
1053 Move<VkBufferView> m_descResultBufferView;
1054 Move<VkBufferView> m_descIntermResultsBufferView;
1055 Move<VkDescriptorPool> m_descriptorPool;
1056 Move<VkDescriptorSetLayout> m_descriptorSetLayout;
1057 Move<VkDescriptorSet> m_descriptorSet;
1058
1059 Move<VkDescriptorSetLayout> m_descriptorSetLayoutNoTransfer;
1060 Move<VkDescriptorPool> m_descriptorPoolNoTransfer;
1061
1062 de::MovePtr<Image> m_resultImage;
1063 Move<VkImageView> m_resultImageView;
1064
1065 std::vector<VkSemaphore> m_waitSemaphores;
1066 };
1067
BinaryAtomicInstanceBase(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1068 BinaryAtomicInstanceBase::BinaryAtomicInstanceBase(Context &context, const string &name, const ImageType imageType,
1069 const tcu::UVec3 &imageSize, const TextureFormat &format,
1070 const VkImageTiling tiling, const AtomicOperation operation,
1071 const bool useTransfer, const ShaderReadType shaderReadType,
1072 const ImageBackingType backingType)
1073 : vkt::TestInstance(context)
1074 , m_name(name)
1075 , m_imageType(imageType)
1076 , m_imageSize(imageSize)
1077 , m_format(format)
1078 , m_tiling(tiling)
1079 , m_operation(operation)
1080 , m_useTransfer(useTransfer)
1081 , m_readType(shaderReadType)
1082 , m_backingType(backingType)
1083 {
1084 }
1085
iterate(void)1086 tcu::TestStatus BinaryAtomicInstanceBase::iterate(void)
1087 {
1088 const VkDevice device = m_context.getDevice();
1089 const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1090 const VkQueue queue = m_context.getUniversalQueue();
1091 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1092 Allocator &allocator = m_context.getDefaultAllocator();
1093 const VkDeviceSize imageSizeInBytes = tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1094 const VkDeviceSize outBuffSizeInBytes = getOutputBufferSize();
1095 const VkFormat imageFormat = mapTextureFormat(m_format);
1096 const bool isTexelBuffer = (m_imageType == IMAGE_TYPE_BUFFER);
1097
1098 if (!isTexelBuffer)
1099 {
1100 createImageResources(imageFormat, m_useTransfer);
1101 }
1102
1103 tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1104
1105 //Prepare the buffer with the initial data for the image
1106 m_inputBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1107 deviceInterface, device, allocator,
1108 makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1109 (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT :
1110 static_cast<VkBufferUsageFlagBits>(0u))),
1111 MemoryRequirement::HostVisible));
1112
1113 // Fill in buffer with initial data used for image.
1114 initDataForImage(device, deviceInterface, m_format, m_operation, gridSize, *m_inputBuffer);
1115
1116 // Create a buffer to store shader output copied from result image
1117 m_outputBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1118 deviceInterface, device, allocator,
1119 makeBufferCreateInfo(outBuffSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1120 (isTexelBuffer ? VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT :
1121 static_cast<VkBufferUsageFlagBits>(0u))),
1122 MemoryRequirement::HostVisible));
1123
1124 if (!isTexelBuffer)
1125 {
1126 prepareResources(m_useTransfer);
1127 }
1128
1129 prepareDescriptors(isTexelBuffer);
1130
1131 Move<VkDescriptorSet> descriptorSetFillImage;
1132 Move<VkShaderModule> shaderModuleFillImage;
1133 Move<VkPipelineLayout> pipelineLayoutFillImage;
1134 Move<VkPipeline> pipelineFillImage;
1135
1136 Move<VkDescriptorSet> descriptorSetReadImage;
1137 Move<VkShaderModule> shaderModuleReadImage;
1138 Move<VkPipelineLayout> pipelineLayoutReadImage;
1139 Move<VkPipeline> pipelineReadImage;
1140
1141 if (!m_useTransfer)
1142 {
1143 m_descriptorSetLayoutNoTransfer =
1144 DescriptorSetLayoutBuilder()
1145 .addSingleBinding(
1146 (isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE),
1147 VK_SHADER_STAGE_COMPUTE_BIT)
1148 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1149 .build(deviceInterface, device);
1150
1151 m_descriptorPoolNoTransfer =
1152 DescriptorPoolBuilder()
1153 .addType((isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE),
1154 2)
1155 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2)
1156 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 2u);
1157
1158 descriptorSetFillImage =
1159 makeDescriptorSet(deviceInterface, device, *m_descriptorPoolNoTransfer, *m_descriptorSetLayoutNoTransfer);
1160
1161 descriptorSetReadImage =
1162 makeDescriptorSet(deviceInterface, device, *m_descriptorPoolNoTransfer, *m_descriptorSetLayoutNoTransfer);
1163
1164 shaderModuleFillImage =
1165 createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("fillShader"), 0);
1166 pipelineLayoutFillImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1167 pipelineFillImage =
1168 makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleFillImage);
1169
1170 if (m_readType == ShaderReadType::SPARSE)
1171 {
1172 shaderModuleReadImage = createShaderModule(deviceInterface, device,
1173 m_context.getBinaryCollection().get("readShaderResidency"), 0);
1174 }
1175 else
1176 {
1177 shaderModuleReadImage =
1178 createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get("readShader"), 0);
1179 }
1180 pipelineLayoutReadImage = makePipelineLayout(deviceInterface, device, *m_descriptorSetLayoutNoTransfer);
1181 pipelineReadImage =
1182 makeComputePipeline(deviceInterface, device, *pipelineLayoutFillImage, *shaderModuleReadImage);
1183 }
1184
1185 // Create pipeline
1186 const Unique<VkShaderModule> shaderModule(
1187 createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get(m_name), 0));
1188 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, device, *m_descriptorSetLayout));
1189 const Unique<VkPipeline> pipeline(makeComputePipeline(deviceInterface, device, *pipelineLayout, *shaderModule));
1190
1191 // Create command buffer
1192 const Unique<VkCommandPool> cmdPool(
1193 createCommandPool(deviceInterface, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
1194 const Unique<VkCommandBuffer> cmdBuffer(
1195 allocateCommandBuffer(deviceInterface, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1196
1197 beginCommandBuffer(deviceInterface, *cmdBuffer);
1198
1199 if (!isTexelBuffer)
1200 {
1201 if (m_useTransfer)
1202 {
1203 const vector<VkBufferImageCopy> bufferImageCopy(
1204 1, makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)),
1205 getNumLayers(m_imageType, m_imageSize)));
1206 copyBufferToImage(deviceInterface, *cmdBuffer, *(*m_inputBuffer), imageSizeInBytes, bufferImageCopy,
1207 VK_IMAGE_ASPECT_COLOR_BIT, 1, getNumLayers(m_imageType, m_imageSize),
1208 m_resultImage->get(), VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
1209 }
1210 else
1211 {
1212 shaderFillImage(*cmdBuffer, *(*m_inputBuffer), *pipelineFillImage, *pipelineLayoutFillImage,
1213 *descriptorSetFillImage, imageSizeInBytes, gridSize);
1214 }
1215 commandsBeforeCompute(*cmdBuffer);
1216 }
1217
1218 deviceInterface.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1219 deviceInterface.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u,
1220 &m_descriptorSet.get(), 0u, DE_NULL);
1221
1222 deviceInterface.cmdDispatch(*cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z());
1223
1224 commandsAfterCompute(*cmdBuffer, *pipelineReadImage, *pipelineLayoutReadImage, *descriptorSetReadImage,
1225 outBuffSizeInBytes, m_useTransfer);
1226
1227 const VkBufferMemoryBarrier outputBufferPreHostReadBarrier = makeBufferMemoryBarrier(
1228 ((m_useTransfer || isTexelBuffer) ? VK_ACCESS_TRANSFER_WRITE_BIT : VK_ACCESS_SHADER_WRITE_BIT),
1229 VK_ACCESS_HOST_READ_BIT, m_outputBuffer->get(), 0ull, outBuffSizeInBytes);
1230
1231 deviceInterface.cmdPipelineBarrier(
1232 *cmdBuffer,
1233 ((m_useTransfer || isTexelBuffer) ? VK_PIPELINE_STAGE_TRANSFER_BIT : VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT),
1234 VK_PIPELINE_STAGE_HOST_BIT, false, 0u, DE_NULL, 1u, &outputBufferPreHostReadBarrier, 0u, DE_NULL);
1235
1236 endCommandBuffer(deviceInterface, *cmdBuffer);
1237
1238 std::vector<VkPipelineStageFlags> waitStages(m_waitSemaphores.size(), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
1239 submitCommandsAndWait(deviceInterface, device, queue, *cmdBuffer, false, 1u,
1240 static_cast<uint32_t>(m_waitSemaphores.size()), de::dataOrNull(m_waitSemaphores),
1241 de::dataOrNull(waitStages));
1242
1243 Allocation &outputBufferAllocation = m_outputBuffer->getAllocation();
1244
1245 invalidateAlloc(deviceInterface, device, outputBufferAllocation);
1246
1247 if (verifyResult(outputBufferAllocation, (imageFormat == VK_FORMAT_R64_UINT || imageFormat == VK_FORMAT_R64_SINT)))
1248 return tcu::TestStatus::pass("Comparison succeeded");
1249 else
1250 return tcu::TestStatus::fail("Comparison failed");
1251 }
1252
shaderFillImage(const VkCommandBuffer cmdBuffer,const VkBuffer & buffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const tcu::UVec3 & gridSize)1253 void BinaryAtomicInstanceBase::shaderFillImage(const VkCommandBuffer cmdBuffer, const VkBuffer &buffer,
1254 const VkPipeline pipeline, const VkPipelineLayout pipelineLayout,
1255 const VkDescriptorSet descriptorSet, const VkDeviceSize &range,
1256 const tcu::UVec3 &gridSize)
1257 {
1258 const VkDevice device = m_context.getDevice();
1259 const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1260 const VkDescriptorImageInfo descResultImageInfo =
1261 makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1262 const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(buffer, 0, range);
1263 const VkImageSubresourceRange subresourceRange =
1264 makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1265
1266 DescriptorSetUpdateBuilder()
1267 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1268 &descResultImageInfo)
1269 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
1270 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1271 .update(deviceInterface, device);
1272
1273 const VkImageMemoryBarrier imageBarrierPre =
1274 makeImageMemoryBarrier(0, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
1275 m_resultImage->get(), subresourceRange);
1276
1277 deviceInterface.cmdPipelineBarrier(
1278 cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0,
1279 (const VkMemoryBarrier *)DE_NULL, 0, (const VkBufferMemoryBarrier *)DE_NULL, 1, &imageBarrierPre);
1280
1281 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1282 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u,
1283 &descriptorSet, 0u, DE_NULL);
1284
1285 deviceInterface.cmdDispatch(cmdBuffer, gridSize.x(), gridSize.y(), gridSize.z());
1286
1287 const VkImageMemoryBarrier imageBarrierPost =
1288 makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL,
1289 VK_IMAGE_LAYOUT_GENERAL, m_resultImage->get(), subresourceRange);
1290
1291 deviceInterface.cmdPipelineBarrier(
1292 cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0,
1293 (const VkMemoryBarrier *)DE_NULL, 0, (const VkBufferMemoryBarrier *)DE_NULL, 1, &imageBarrierPost);
1294 }
1295
createImageAndView(VkFormat imageFormat,const tcu::UVec3 & imageExent,bool useTransfer,de::MovePtr<Image> & imagePtr,Move<VkImageView> & imageViewPtr)1296 void BinaryAtomicInstanceBase::createImageAndView(VkFormat imageFormat, const tcu::UVec3 &imageExent, bool useTransfer,
1297 de::MovePtr<Image> &imagePtr, Move<VkImageView> &imageViewPtr)
1298 {
1299 const VkDevice device = m_context.getDevice();
1300 const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1301 Allocator &allocator = m_context.getDefaultAllocator();
1302 const VkImageUsageFlags usageFlags = getUsageFlags(useTransfer);
1303 VkImageCreateFlags createFlags = 0u;
1304
1305 if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
1306 createFlags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
1307
1308 const auto numLayers = getNumLayers(m_imageType, m_imageSize);
1309
1310 VkImageCreateInfo createInfo = {
1311 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
1312 DE_NULL, // const void* pNext;
1313 createFlags, // VkImageCreateFlags flags;
1314 mapImageType(m_imageType), // VkImageType imageType;
1315 imageFormat, // VkFormat format;
1316 makeExtent3D(imageExent), // VkExtent3D extent;
1317 1u, // uint32_t mipLevels;
1318 numLayers, // uint32_t arrayLayers;
1319 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
1320 m_tiling, // VkImageTiling tiling;
1321 usageFlags, // VkImageUsageFlags usage;
1322 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1323 0u, // uint32_t queueFamilyIndexCount;
1324 DE_NULL, // const uint32_t* pQueueFamilyIndices;
1325 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
1326 };
1327
1328 #ifndef CTS_USES_VULKANSC
1329 if (m_backingType == ImageBackingType::SPARSE)
1330 {
1331 const auto &vki = m_context.getInstanceInterface();
1332 const auto physicalDevice = m_context.getPhysicalDevice();
1333 const auto sparseQueue = m_context.getSparseQueue();
1334 const auto sparseQueueIdx = m_context.getSparseQueueFamilyIndex();
1335 const auto universalQIdx = m_context.getUniversalQueueFamilyIndex();
1336 const uint32_t queueIndices[] = {universalQIdx, sparseQueueIdx};
1337
1338 createInfo.flags |= (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT);
1339
1340 if (sparseQueueIdx != universalQIdx)
1341 {
1342 createInfo.sharingMode = VK_SHARING_MODE_CONCURRENT;
1343 createInfo.queueFamilyIndexCount = static_cast<uint32_t>(DE_LENGTH_OF_ARRAY(queueIndices));
1344 createInfo.pQueueFamilyIndices = queueIndices;
1345 }
1346
1347 const auto sparseImage =
1348 new SparseImage(deviceInterface, device, physicalDevice, vki, createInfo, sparseQueue, allocator, m_format);
1349 m_waitSemaphores.push_back(sparseImage->getSemaphore());
1350 imagePtr = de::MovePtr<Image>(sparseImage);
1351 }
1352 else
1353 #endif // CTS_USES_VULKANSC
1354 imagePtr =
1355 de::MovePtr<Image>(new Image(deviceInterface, device, allocator, createInfo, MemoryRequirement::Any));
1356
1357 const VkImageSubresourceRange subresourceRange =
1358 makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, numLayers);
1359
1360 imageViewPtr = makeImageView(deviceInterface, device, imagePtr->get(), mapImageViewType(m_imageType), imageFormat,
1361 subresourceRange);
1362 }
1363
createImageResources(const VkFormat & imageFormat,const bool useTransfer)1364 void BinaryAtomicInstanceBase::createImageResources(const VkFormat &imageFormat, const bool useTransfer)
1365 {
1366 //Create the image that is going to store results of atomic operations
1367 createImageAndView(imageFormat, getLayerSize(m_imageType, m_imageSize), useTransfer, m_resultImage,
1368 m_resultImageView);
1369 }
1370
1371 class BinaryAtomicEndResultInstance : public BinaryAtomicInstanceBase
1372 {
1373 public:
BinaryAtomicEndResultInstance(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1374 BinaryAtomicEndResultInstance(Context &context, const string &name, const ImageType imageType,
1375 const tcu::UVec3 &imageSize, const TextureFormat &format, const VkImageTiling tiling,
1376 const AtomicOperation operation, const bool useTransfer,
1377 const ShaderReadType shaderReadType, const ImageBackingType backingType)
1378 : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, tiling, operation, useTransfer,
1379 shaderReadType, backingType)
1380 {
1381 }
1382
1383 virtual uint32_t getOutputBufferSize(void) const;
1384
prepareResources(const bool useTransfer)1385 virtual void prepareResources(const bool useTransfer)
1386 {
1387 DE_UNREF(useTransfer);
1388 }
1389 virtual void prepareDescriptors(const bool isTexelBuffer);
1390
commandsBeforeCompute(const VkCommandBuffer) const1391 virtual void commandsBeforeCompute(const VkCommandBuffer) const
1392 {
1393 }
1394 virtual void commandsAfterCompute(const VkCommandBuffer cmdBuffer, const VkPipeline pipeline,
1395 const VkPipelineLayout pipelineLayout, const VkDescriptorSet descriptorSet,
1396 const VkDeviceSize &range, const bool useTransfer);
1397
1398 virtual bool verifyResult(Allocation &outputBufferAllocation, const bool is64Bit) const;
1399
1400 protected:
1401 template <typename T>
1402 bool isValueCorrect(const T resultValue, int32_t x, int32_t y, int32_t z, const UVec3 &gridSize,
1403 const IVec3 extendedGridSize) const;
1404 };
1405
getOutputBufferSize(void) const1406 uint32_t BinaryAtomicEndResultInstance::getOutputBufferSize(void) const
1407 {
1408 return tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1409 }
1410
prepareDescriptors(const bool isTexelBuffer)1411 void BinaryAtomicEndResultInstance::prepareDescriptors(const bool isTexelBuffer)
1412 {
1413 const VkDescriptorType descriptorType =
1414 isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1415 const VkDevice device = m_context.getDevice();
1416 const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1417
1418 m_descriptorSetLayout = DescriptorSetLayoutBuilder()
1419 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1420 .build(deviceInterface, device);
1421
1422 m_descriptorPool = DescriptorPoolBuilder()
1423 .addType(descriptorType)
1424 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1425
1426 m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1427
1428 if (isTexelBuffer)
1429 {
1430 m_descResultBufferView =
1431 makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1432
1433 DescriptorSetUpdateBuilder()
1434 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType,
1435 &(m_descResultBufferView.get()))
1436 .update(deviceInterface, device);
1437 }
1438 else
1439 {
1440 const VkDescriptorImageInfo descResultImageInfo =
1441 makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1442
1443 DescriptorSetUpdateBuilder()
1444 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType,
1445 &descResultImageInfo)
1446 .update(deviceInterface, device);
1447 }
1448 }
1449
commandsAfterCompute(const VkCommandBuffer cmdBuffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const bool useTransfer)1450 void BinaryAtomicEndResultInstance::commandsAfterCompute(const VkCommandBuffer cmdBuffer, const VkPipeline pipeline,
1451 const VkPipelineLayout pipelineLayout,
1452 const VkDescriptorSet descriptorSet, const VkDeviceSize &range,
1453 const bool useTransfer)
1454 {
1455 const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1456 const VkImageSubresourceRange subresourceRange =
1457 makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1458 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1459
1460 if (m_imageType == IMAGE_TYPE_BUFFER)
1461 {
1462 m_outputBuffer = m_inputBuffer;
1463 }
1464 else if (useTransfer)
1465 {
1466 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1467 makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL,
1468 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_resultImage->get(), subresourceRange);
1469
1470 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1471 VK_PIPELINE_STAGE_TRANSFER_BIT, false, 0u, DE_NULL, 0u, DE_NULL, 1u,
1472 &resultImagePostDispatchBarrier);
1473
1474 const VkBufferImageCopy bufferImageCopyParams =
1475 makeBufferImageCopy(makeExtent3D(layerSize), getNumLayers(m_imageType, m_imageSize));
1476
1477 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1478 m_outputBuffer->get(), 1u, &bufferImageCopyParams);
1479 }
1480 else
1481 {
1482 const VkDevice device = m_context.getDevice();
1483 const VkDescriptorImageInfo descResultImageInfo =
1484 makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1485 const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1486
1487 DescriptorSetUpdateBuilder()
1488 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
1489 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1490 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
1491 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1492 .update(deviceInterface, device);
1493
1494 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1495 makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL,
1496 VK_IMAGE_LAYOUT_GENERAL, m_resultImage->get(), subresourceRange);
1497
1498 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1499 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, false, 0u, DE_NULL, 0u, DE_NULL, 1u,
1500 &resultImagePostDispatchBarrier);
1501
1502 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1503 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u,
1504 &descriptorSet, 0u, DE_NULL);
1505
1506 switch (m_imageType)
1507 {
1508 case IMAGE_TYPE_1D_ARRAY:
1509 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), subresourceRange.layerCount, layerSize.z());
1510 break;
1511 case IMAGE_TYPE_2D_ARRAY:
1512 case IMAGE_TYPE_CUBE:
1513 case IMAGE_TYPE_CUBE_ARRAY:
1514 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), subresourceRange.layerCount);
1515 break;
1516 default:
1517 deviceInterface.cmdDispatch(cmdBuffer, layerSize.x(), layerSize.y(), layerSize.z());
1518 break;
1519 }
1520 }
1521 }
1522
verifyResult(Allocation & outputBufferAllocation,const bool is64Bit) const1523 bool BinaryAtomicEndResultInstance::verifyResult(Allocation &outputBufferAllocation, const bool is64Bit) const
1524 {
1525 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1526 const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z());
1527
1528 tcu::ConstPixelBufferAccess resultBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(),
1529 outputBufferAllocation.getHostPtr());
1530
1531 for (int32_t z = 0; z < resultBuffer.getDepth(); z++)
1532 for (int32_t y = 0; y < resultBuffer.getHeight(); y++)
1533 for (int32_t x = 0; x < resultBuffer.getWidth(); x++)
1534 {
1535 const void *resultValue = resultBuffer.getPixelPtr(x, y, z);
1536 int32_t floatToIntValue = 0;
1537 bool isFloatValue = false;
1538 if (isFloatFormat(mapTextureFormat(m_format)))
1539 {
1540 isFloatValue = true;
1541 floatToIntValue = static_cast<int32_t>(*((float *)resultValue));
1542 }
1543
1544 if (isOrderIndependentAtomicOperation(m_operation))
1545 {
1546 if (isUintFormat(mapTextureFormat(m_format)))
1547 {
1548 if (is64Bit)
1549 {
1550 if (!isValueCorrect<uint64_t>(*((uint64_t *)resultValue), x, y, z, gridSize,
1551 extendedGridSize))
1552 return false;
1553 }
1554 else
1555 {
1556 if (!isValueCorrect<uint32_t>(*((uint32_t *)resultValue), x, y, z, gridSize,
1557 extendedGridSize))
1558 return false;
1559 }
1560 }
1561 else if (isIntFormat(mapTextureFormat(m_format)))
1562 {
1563 if (is64Bit)
1564 {
1565 if (!isValueCorrect<int64_t>(*((int64_t *)resultValue), x, y, z, gridSize,
1566 extendedGridSize))
1567 return false;
1568 }
1569 else
1570 {
1571 if (!isValueCorrect<int32_t>(*((int32_t *)resultValue), x, y, z, gridSize,
1572 extendedGridSize))
1573 return false;
1574 }
1575 }
1576 else
1577 {
1578 // 32-bit floating point
1579 if (!isValueCorrect<int32_t>(floatToIntValue, x, y, z, gridSize, extendedGridSize))
1580 return false;
1581 }
1582 }
1583 else if (m_operation == ATOMIC_OPERATION_EXCHANGE)
1584 {
1585 // Check if the end result equals one of the atomic args.
1586 bool matchFound = false;
1587
1588 for (int32_t i = 0; i < static_cast<int32_t>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1589 {
1590 const IVec3 gid(x + i * gridSize.x(), y, z);
1591 matchFound = is64Bit ? (*((int64_t *)resultValue) ==
1592 getAtomicFuncArgument<int64_t>(m_operation, gid, extendedGridSize)) :
1593 isFloatValue ?
1594 floatToIntValue ==
1595 getAtomicFuncArgument<int32_t>(m_operation, gid, extendedGridSize) :
1596 (*((int32_t *)resultValue) ==
1597 getAtomicFuncArgument<int32_t>(m_operation, gid, extendedGridSize));
1598 }
1599
1600 if (!matchFound)
1601 return false;
1602 }
1603 else if (m_operation == ATOMIC_OPERATION_COMPARE_EXCHANGE)
1604 {
1605 // Check if the end result equals one of the atomic args.
1606 bool matchFound = false;
1607
1608 for (int32_t i = 0; i < static_cast<int32_t>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
1609 {
1610 const IVec3 gid(x + i * gridSize.x(), y, z);
1611 matchFound = is64Bit ? (*((int64_t *)resultValue) ==
1612 getAtomicFuncArgument<int64_t>(m_operation, gid, extendedGridSize)) :
1613 isFloatValue ?
1614 floatToIntValue ==
1615 getAtomicFuncArgument<int32_t>(m_operation, gid, extendedGridSize) :
1616 (*((int32_t *)resultValue) ==
1617 getAtomicFuncArgument<int32_t>(m_operation, gid, extendedGridSize));
1618 }
1619
1620 if (!matchFound)
1621 return false;
1622 }
1623 else
1624 DE_ASSERT(false);
1625 }
1626 return true;
1627 }
1628
1629 template <typename T>
isValueCorrect(const T resultValue,int32_t x,int32_t y,int32_t z,const UVec3 & gridSize,const IVec3 extendedGridSize) const1630 bool BinaryAtomicEndResultInstance::isValueCorrect(const T resultValue, int32_t x, int32_t y, int32_t z,
1631 const UVec3 &gridSize, const IVec3 extendedGridSize) const
1632 {
1633 T reference = getOperationInitialValue<T>(m_operation);
1634 for (int32_t i = 0; i < static_cast<int32_t>(NUM_INVOCATIONS_PER_PIXEL); i++)
1635 {
1636 const IVec3 gid(x + i * gridSize.x(), y, z);
1637 T arg = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1638 reference = computeBinaryAtomicOperationResult(m_operation, reference, arg);
1639 }
1640 return (resultValue == reference);
1641 }
1642
createInstance(Context & context) const1643 TestInstance *BinaryAtomicEndResultCase::createInstance(Context &context) const
1644 {
1645 return new BinaryAtomicEndResultInstance(context, m_name, m_imageType, m_imageSize, m_format, m_tiling, m_operation,
1646 m_useTransfer, m_readType, m_backingType);
1647 }
1648
1649 class BinaryAtomicIntermValuesInstance : public BinaryAtomicInstanceBase
1650 {
1651 public:
BinaryAtomicIntermValuesInstance(Context & context,const string & name,const ImageType imageType,const tcu::UVec3 & imageSize,const TextureFormat & format,const VkImageTiling tiling,const AtomicOperation operation,const bool useTransfer,const ShaderReadType shaderReadType,const ImageBackingType backingType)1652 BinaryAtomicIntermValuesInstance(Context &context, const string &name, const ImageType imageType,
1653 const tcu::UVec3 &imageSize, const TextureFormat &format,
1654 const VkImageTiling tiling, const AtomicOperation operation,
1655 const bool useTransfer, const ShaderReadType shaderReadType,
1656 const ImageBackingType backingType)
1657 : BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, tiling, operation, useTransfer,
1658 shaderReadType, backingType)
1659 {
1660 }
1661
1662 virtual uint32_t getOutputBufferSize(void) const;
1663
1664 virtual void prepareResources(const bool useTransfer);
1665 virtual void prepareDescriptors(const bool isTexelBuffer);
1666
1667 virtual void commandsBeforeCompute(const VkCommandBuffer cmdBuffer) const;
1668 virtual void commandsAfterCompute(const VkCommandBuffer cmdBuffer, const VkPipeline pipeline,
1669 const VkPipelineLayout pipelineLayout, const VkDescriptorSet descriptorSet,
1670 const VkDeviceSize &range, const bool useTransfer);
1671
1672 virtual bool verifyResult(Allocation &outputBufferAllocation, const bool is64Bit) const;
1673
1674 protected:
1675 template <typename T>
1676 bool areValuesCorrect(tcu::ConstPixelBufferAccess &resultBuffer, const bool isFloatingPoint, int32_t x, int32_t y,
1677 int32_t z, const UVec3 &gridSize, const IVec3 extendedGridSize) const;
1678
1679 template <typename T>
1680 bool verifyRecursive(const int32_t index, const T valueSoFar, bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1681 const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1682 const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const;
1683 de::MovePtr<Image> m_intermResultsImage;
1684 Move<VkImageView> m_intermResultsImageView;
1685 };
1686
getOutputBufferSize(void) const1687 uint32_t BinaryAtomicIntermValuesInstance::getOutputBufferSize(void) const
1688 {
1689 return NUM_INVOCATIONS_PER_PIXEL * tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
1690 }
1691
prepareResources(const bool useTransfer)1692 void BinaryAtomicIntermValuesInstance::prepareResources(const bool useTransfer)
1693 {
1694 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1695 const bool isCubeBasedImage = (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY);
1696 const UVec3 extendedLayerSize =
1697 isCubeBasedImage ?
1698 UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), NUM_INVOCATIONS_PER_PIXEL * layerSize.y(), layerSize.z()) :
1699 UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1700
1701 createImageAndView(mapTextureFormat(m_format), extendedLayerSize, useTransfer, m_intermResultsImage,
1702 m_intermResultsImageView);
1703 }
1704
prepareDescriptors(const bool isTexelBuffer)1705 void BinaryAtomicIntermValuesInstance::prepareDescriptors(const bool isTexelBuffer)
1706 {
1707 const VkDescriptorType descriptorType =
1708 isTexelBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
1709
1710 const VkDevice device = m_context.getDevice();
1711 const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1712
1713 m_descriptorSetLayout = DescriptorSetLayoutBuilder()
1714 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1715 .addSingleBinding(descriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1716 .build(deviceInterface, device);
1717
1718 m_descriptorPool = DescriptorPoolBuilder()
1719 .addType(descriptorType, 2u)
1720 .build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1721
1722 m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
1723
1724 if (isTexelBuffer)
1725 {
1726 m_descResultBufferView =
1727 makeBufferView(deviceInterface, device, *(*m_inputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1728 m_descIntermResultsBufferView =
1729 makeBufferView(deviceInterface, device, *(*m_outputBuffer), mapTextureFormat(m_format), 0, VK_WHOLE_SIZE);
1730
1731 DescriptorSetUpdateBuilder()
1732 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType,
1733 &(m_descResultBufferView.get()))
1734 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType,
1735 &(m_descIntermResultsBufferView.get()))
1736 .update(deviceInterface, device);
1737 }
1738 else
1739 {
1740 const VkDescriptorImageInfo descResultImageInfo =
1741 makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
1742 const VkDescriptorImageInfo descIntermResultsImageInfo =
1743 makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1744
1745 DescriptorSetUpdateBuilder()
1746 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), descriptorType,
1747 &descResultImageInfo)
1748 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), descriptorType,
1749 &descIntermResultsImageInfo)
1750 .update(deviceInterface, device);
1751 }
1752 }
1753
commandsBeforeCompute(const VkCommandBuffer cmdBuffer) const1754 void BinaryAtomicIntermValuesInstance::commandsBeforeCompute(const VkCommandBuffer cmdBuffer) const
1755 {
1756 const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1757 const VkImageSubresourceRange subresourceRange =
1758 makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1759
1760 const VkImageMemoryBarrier imagePreDispatchBarrier =
1761 makeImageMemoryBarrier(0u, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
1762 m_intermResultsImage->get(), subresourceRange);
1763
1764 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1765 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, false, 0u, DE_NULL, 0u, DE_NULL, 1u,
1766 &imagePreDispatchBarrier);
1767 }
1768
commandsAfterCompute(const VkCommandBuffer cmdBuffer,const VkPipeline pipeline,const VkPipelineLayout pipelineLayout,const VkDescriptorSet descriptorSet,const VkDeviceSize & range,const bool useTransfer)1769 void BinaryAtomicIntermValuesInstance::commandsAfterCompute(const VkCommandBuffer cmdBuffer, const VkPipeline pipeline,
1770 const VkPipelineLayout pipelineLayout,
1771 const VkDescriptorSet descriptorSet,
1772 const VkDeviceSize &range, const bool useTransfer)
1773 {
1774 // nothing is needed for texel image buffer
1775 if (m_imageType == IMAGE_TYPE_BUFFER)
1776 return;
1777
1778 const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
1779 const VkImageSubresourceRange subresourceRange =
1780 makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
1781 const UVec3 layerSize = getLayerSize(m_imageType, m_imageSize);
1782
1783 if (useTransfer)
1784 {
1785 const VkImageMemoryBarrier imagePostDispatchBarrier =
1786 makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL,
1787 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_intermResultsImage->get(), subresourceRange);
1788
1789 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1790 VK_PIPELINE_STAGE_TRANSFER_BIT, false, 0u, DE_NULL, 0u, DE_NULL, 1u,
1791 &imagePostDispatchBarrier);
1792
1793 const UVec3 extendedLayerSize = UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
1794 const VkBufferImageCopy bufferImageCopyParams =
1795 makeBufferImageCopy(makeExtent3D(extendedLayerSize), getNumLayers(m_imageType, m_imageSize));
1796
1797 deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_intermResultsImage->get(),
1798 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u,
1799 &bufferImageCopyParams);
1800 }
1801 else
1802 {
1803 const VkDevice device = m_context.getDevice();
1804 const VkDescriptorImageInfo descResultImageInfo =
1805 makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
1806 const VkDescriptorBufferInfo descResultBufferInfo = makeDescriptorBufferInfo(m_outputBuffer->get(), 0, range);
1807
1808 DescriptorSetUpdateBuilder()
1809 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
1810 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
1811 .writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
1812 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descResultBufferInfo)
1813 .update(deviceInterface, device);
1814
1815 const VkImageMemoryBarrier resultImagePostDispatchBarrier =
1816 makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL,
1817 VK_IMAGE_LAYOUT_GENERAL, m_intermResultsImage->get(), subresourceRange);
1818
1819 deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1820 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, false, 0u, DE_NULL, 0u, DE_NULL, 1u,
1821 &resultImagePostDispatchBarrier);
1822
1823 deviceInterface.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1824 deviceInterface.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u,
1825 &descriptorSet, 0u, DE_NULL);
1826
1827 switch (m_imageType)
1828 {
1829 case IMAGE_TYPE_1D_ARRAY:
1830 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(),
1831 subresourceRange.layerCount, layerSize.z());
1832 break;
1833 case IMAGE_TYPE_2D_ARRAY:
1834 case IMAGE_TYPE_CUBE:
1835 case IMAGE_TYPE_CUBE_ARRAY:
1836 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(),
1837 subresourceRange.layerCount);
1838 break;
1839 default:
1840 deviceInterface.cmdDispatch(cmdBuffer, NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(),
1841 layerSize.z());
1842 break;
1843 }
1844 }
1845 }
1846
verifyResult(Allocation & outputBufferAllocation,const bool is64Bit) const1847 bool BinaryAtomicIntermValuesInstance::verifyResult(Allocation &outputBufferAllocation, const bool is64Bit) const
1848 {
1849 const UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
1850 const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL * gridSize.x(), gridSize.y(), gridSize.z());
1851
1852 tcu::ConstPixelBufferAccess resultBuffer(m_format, extendedGridSize.x(), extendedGridSize.y(), extendedGridSize.z(),
1853 outputBufferAllocation.getHostPtr());
1854
1855 for (int32_t z = 0; z < resultBuffer.getDepth(); z++)
1856 for (int32_t y = 0; y < resultBuffer.getHeight(); y++)
1857 for (uint32_t x = 0; x < gridSize.x(); x++)
1858 {
1859 if (isUintFormat(mapTextureFormat(m_format)))
1860 {
1861 if (is64Bit)
1862 {
1863 if (!areValuesCorrect<uint64_t>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1864 return false;
1865 }
1866 else
1867 {
1868 if (!areValuesCorrect<uint32_t>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1869 return false;
1870 }
1871 }
1872 else if (isIntFormat(mapTextureFormat(m_format)))
1873 {
1874 if (is64Bit)
1875 {
1876 if (!areValuesCorrect<int64_t>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1877 return false;
1878 }
1879 else
1880 {
1881 if (!areValuesCorrect<int32_t>(resultBuffer, false, x, y, z, gridSize, extendedGridSize))
1882 return false;
1883 }
1884 }
1885 else
1886 {
1887 // 32-bit floating point
1888 if (!areValuesCorrect<int32_t>(resultBuffer, true, x, y, z, gridSize, extendedGridSize))
1889 return false;
1890 }
1891 }
1892
1893 return true;
1894 }
1895
1896 template <typename T>
areValuesCorrect(tcu::ConstPixelBufferAccess & resultBuffer,const bool isFloatingPoint,int32_t x,int32_t y,int32_t z,const UVec3 & gridSize,const IVec3 extendedGridSize) const1897 bool BinaryAtomicIntermValuesInstance::areValuesCorrect(tcu::ConstPixelBufferAccess &resultBuffer,
1898 const bool isFloatingPoint, int32_t x, int32_t y, int32_t z,
1899 const UVec3 &gridSize, const IVec3 extendedGridSize) const
1900 {
1901 T resultValues[NUM_INVOCATIONS_PER_PIXEL];
1902 T atomicArgs[NUM_INVOCATIONS_PER_PIXEL];
1903 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL];
1904
1905 for (int32_t i = 0; i < static_cast<int32_t>(NUM_INVOCATIONS_PER_PIXEL); i++)
1906 {
1907 IVec3 gid(x + i * gridSize.x(), y, z);
1908 T data = *((T *)resultBuffer.getPixelPtr(gid.x(), gid.y(), gid.z()));
1909 if (isFloatingPoint)
1910 {
1911 float fData;
1912 deMemcpy(&fData, &data, sizeof(fData));
1913 data = static_cast<T>(fData);
1914 }
1915 resultValues[i] = data;
1916 atomicArgs[i] = getAtomicFuncArgument<T>(m_operation, gid, extendedGridSize);
1917 argsUsed[i] = false;
1918 }
1919
1920 // Verify that the return values form a valid sequence.
1921 return verifyRecursive(0, getOperationInitialValue<T>(m_operation), argsUsed, atomicArgs, resultValues);
1922 }
1923
1924 template <typename T>
verifyRecursive(const int32_t index,const T valueSoFar,bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const1925 bool BinaryAtomicIntermValuesInstance::verifyRecursive(const int32_t index, const T valueSoFar,
1926 bool argsUsed[NUM_INVOCATIONS_PER_PIXEL],
1927 const T atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
1928 const T resultValues[NUM_INVOCATIONS_PER_PIXEL]) const
1929 {
1930 if (index >= static_cast<int32_t>(NUM_INVOCATIONS_PER_PIXEL))
1931 return true;
1932
1933 for (int32_t i = 0; i < static_cast<int32_t>(NUM_INVOCATIONS_PER_PIXEL); i++)
1934 {
1935 if (!argsUsed[i] && resultValues[i] == valueSoFar)
1936 {
1937 argsUsed[i] = true;
1938
1939 if (verifyRecursive(index + 1, computeBinaryAtomicOperationResult(m_operation, valueSoFar, atomicArgs[i]),
1940 argsUsed, atomicArgs, resultValues))
1941 {
1942 return true;
1943 }
1944
1945 argsUsed[i] = false;
1946 }
1947 }
1948
1949 return false;
1950 }
1951
createInstance(Context & context) const1952 TestInstance *BinaryAtomicIntermValuesCase::createInstance(Context &context) const
1953 {
1954 return new BinaryAtomicIntermValuesInstance(context, m_name, m_imageType, m_imageSize, m_format, m_tiling,
1955 m_operation, m_useTransfer, m_readType, m_backingType);
1956 }
1957
1958 } // namespace
1959
createImageAtomicOperationTests(tcu::TestContext & testCtx)1960 tcu::TestCaseGroup *createImageAtomicOperationTests(tcu::TestContext &testCtx)
1961 {
1962 de::MovePtr<tcu::TestCaseGroup> imageAtomicOperationsTests(new tcu::TestCaseGroup(testCtx, "atomic_operations"));
1963
1964 struct ImageParams
1965 {
1966 ImageParams(const ImageType imageType, const tcu::UVec3 &imageSize)
1967 : m_imageType(imageType)
1968 , m_imageSize(imageSize)
1969 {
1970 }
1971 const ImageType m_imageType;
1972 const tcu::UVec3 m_imageSize;
1973 };
1974
1975 const ImageParams imageParamsArray[] = {ImageParams(IMAGE_TYPE_1D, tcu::UVec3(64u, 1u, 1u)),
1976 ImageParams(IMAGE_TYPE_1D_ARRAY, tcu::UVec3(64u, 1u, 8u)),
1977 ImageParams(IMAGE_TYPE_2D, tcu::UVec3(64u, 64u, 1u)),
1978 ImageParams(IMAGE_TYPE_2D_ARRAY, tcu::UVec3(64u, 64u, 8u)),
1979 ImageParams(IMAGE_TYPE_3D, tcu::UVec3(48u, 48u, 8u)),
1980 ImageParams(IMAGE_TYPE_CUBE, tcu::UVec3(64u, 64u, 1u)),
1981 ImageParams(IMAGE_TYPE_CUBE_ARRAY, tcu::UVec3(64u, 64u, 2u)),
1982 ImageParams(IMAGE_TYPE_BUFFER, tcu::UVec3(64u, 1u, 1u))};
1983
1984 const tcu::TextureFormat formats[] = {tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT32),
1985 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32),
1986 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::FLOAT),
1987 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT64),
1988 tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT64)};
1989
1990 static const VkImageTiling s_tilings[] = {
1991 VK_IMAGE_TILING_OPTIMAL,
1992 VK_IMAGE_TILING_LINEAR,
1993 };
1994
1995 const struct
1996 {
1997 ShaderReadType type;
1998 const char *name;
1999 } readTypes[] = {
2000 {ShaderReadType::NORMAL, "normal_read"},
2001 #ifndef CTS_USES_VULKANSC
2002 {ShaderReadType::SPARSE, "sparse_read"},
2003 #endif // CTS_USES_VULKANSC
2004 };
2005
2006 const struct
2007 {
2008 ImageBackingType type;
2009 const char *name;
2010 } backingTypes[] = {
2011 {ImageBackingType::NORMAL, "normal_img"},
2012 #ifndef CTS_USES_VULKANSC
2013 {ImageBackingType::SPARSE, "sparse_img"},
2014 #endif // CTS_USES_VULKANSC
2015 };
2016
2017 for (uint32_t operationI = 0; operationI < ATOMIC_OPERATION_LAST; operationI++)
2018 {
2019 const AtomicOperation operation = (AtomicOperation)operationI;
2020
2021 de::MovePtr<tcu::TestCaseGroup> operationGroup(
2022 new tcu::TestCaseGroup(testCtx, getAtomicOperationCaseName(operation).c_str()));
2023
2024 for (uint32_t imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParamsArray); imageTypeNdx++)
2025 {
2026 const ImageType imageType = imageParamsArray[imageTypeNdx].m_imageType;
2027 const tcu::UVec3 imageSize = imageParamsArray[imageTypeNdx].m_imageSize;
2028
2029 de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(
2030 new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str()));
2031
2032 for (int useTransferIdx = 0; useTransferIdx < 2; ++useTransferIdx)
2033 {
2034 const bool useTransfer = (useTransferIdx > 0);
2035 const string groupName = (!useTransfer ? "no" : "") + string("transfer");
2036
2037 de::MovePtr<tcu::TestCaseGroup> transferGroup(new tcu::TestCaseGroup(testCtx, groupName.c_str()));
2038
2039 for (int readTypeIdx = 0; readTypeIdx < DE_LENGTH_OF_ARRAY(readTypes); ++readTypeIdx)
2040 {
2041 const auto &readType = readTypes[readTypeIdx];
2042
2043 de::MovePtr<tcu::TestCaseGroup> readTypeGroup(new tcu::TestCaseGroup(testCtx, readType.name));
2044
2045 for (int backingTypeIdx = 0; backingTypeIdx < DE_LENGTH_OF_ARRAY(backingTypes); ++backingTypeIdx)
2046 {
2047 const auto &backingType = backingTypes[backingTypeIdx];
2048
2049 de::MovePtr<tcu::TestCaseGroup> backingTypeGroup(
2050 new tcu::TestCaseGroup(testCtx, backingType.name));
2051
2052 for (uint32_t formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); formatNdx++)
2053 {
2054 for (int tilingNdx = 0; tilingNdx < DE_LENGTH_OF_ARRAY(s_tilings); tilingNdx++)
2055 {
2056 const TextureFormat &format = formats[formatNdx];
2057 const std::string formatName = getShaderImageFormatQualifier(format);
2058 const char *suffix = (s_tilings[tilingNdx] == VK_IMAGE_TILING_OPTIMAL) ? "" : "_linear";
2059
2060 // Need SPIRV programs in vktImageAtomicSpirvShaders.cpp
2061 if (imageType == IMAGE_TYPE_BUFFER && (format.type != tcu::TextureFormat::FLOAT))
2062 {
2063 continue;
2064 }
2065
2066 // Only 2D and 3D images may support sparse residency.
2067 // VK_IMAGE_TILING_LINEAR does not support sparse residency
2068 const auto vkImageType = mapImageType(imageType);
2069 if (backingType.type == ImageBackingType::SPARSE &&
2070 ((vkImageType != VK_IMAGE_TYPE_2D && vkImageType != VK_IMAGE_TYPE_3D) ||
2071 (s_tilings[tilingNdx] == VK_IMAGE_TILING_LINEAR)))
2072 continue;
2073
2074 // Only some operations are supported on floating-point
2075 if (format.type == tcu::TextureFormat::FLOAT)
2076 {
2077 if (operation != ATOMIC_OPERATION_ADD &&
2078 #ifndef CTS_USES_VULKANSC
2079 operation != ATOMIC_OPERATION_MIN && operation != ATOMIC_OPERATION_MAX &&
2080 #endif // CTS_USES_VULKANSC
2081 operation != ATOMIC_OPERATION_EXCHANGE)
2082 {
2083 continue;
2084 }
2085 }
2086
2087 if (readType.type == ShaderReadType::SPARSE)
2088 {
2089 // When using transfer, shader reads will not be used, so avoid creating two identical cases.
2090 if (useTransfer)
2091 continue;
2092
2093 // Sparse reads are not supported for all types of images.
2094 if (imageType == IMAGE_TYPE_1D || imageType == IMAGE_TYPE_1D_ARRAY ||
2095 imageType == IMAGE_TYPE_BUFFER)
2096 continue;
2097 }
2098
2099 //!< Atomic case checks the end result of the operations, and not the intermediate return values
2100 const string caseEndResult = formatName + "_end_result" + suffix;
2101 backingTypeGroup->addChild(new BinaryAtomicEndResultCase(
2102 testCtx, caseEndResult, imageType, imageSize, format, s_tilings[tilingNdx],
2103 operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2104
2105 //!< Atomic case checks the return values of the atomic function and not the end result.
2106 const string caseIntermValues = formatName + "_intermediate_values" + suffix;
2107 backingTypeGroup->addChild(new BinaryAtomicIntermValuesCase(
2108 testCtx, caseIntermValues, imageType, imageSize, format, s_tilings[tilingNdx],
2109 operation, useTransfer, readType.type, backingType.type, glu::GLSL_VERSION_450));
2110 }
2111 }
2112
2113 readTypeGroup->addChild(backingTypeGroup.release());
2114 }
2115
2116 transferGroup->addChild(readTypeGroup.release());
2117 }
2118
2119 imageTypeGroup->addChild(transferGroup.release());
2120 }
2121
2122 operationGroup->addChild(imageTypeGroup.release());
2123 }
2124
2125 imageAtomicOperationsTests->addChild(operationGroup.release());
2126 }
2127
2128 return imageAtomicOperationsTests.release();
2129 }
2130
2131 } // namespace image
2132 } // namespace vkt
2133