xref: /aosp_15_r20/external/deqp/external/vulkancts/framework/vulkan/vkRayTracingUtil.hpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 #ifndef _VKRAYTRACINGUTIL_HPP
2 #define _VKRAYTRACINGUTIL_HPP
3 /*-------------------------------------------------------------------------
4  * Vulkan CTS Framework
5  * --------------------
6  *
7  * Copyright (c) 2020 The Khronos Group Inc.
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  *//*!
22  * \file
23  * \brief Vulkan ray tracing utility.
24  *//*--------------------------------------------------------------------*/
25 
26 #include "vkDefs.hpp"
27 #include "vkRef.hpp"
28 #include "vkMemUtil.hpp"
29 #include "vkBufferWithMemory.hpp"
30 #include "vkImageWithMemory.hpp"
31 #include "vkBuilderUtil.hpp"
32 #include "vkObjUtil.hpp"
33 #include "vkTypeUtil.hpp"
34 #include "vkPrograms.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkBarrierUtil.hpp"
37 #include "vkImageUtil.hpp"
38 
39 #include "deFloat16.h"
40 
41 #include "tcuVector.hpp"
42 #include "tcuVectorType.hpp"
43 #include "tcuTexture.hpp"
44 #include "qpWatchDog.h"
45 
46 #include <vector>
47 #include <map>
48 #include <limits>
49 #include <stdexcept>
50 
51 namespace vk
52 {
53 
54 #ifndef CTS_USES_VULKANSC
55 
56 constexpr VkShaderStageFlags SHADER_STAGE_ALL_RAY_TRACING =
57     VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_ANY_HIT_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
58     VK_SHADER_STAGE_MISS_BIT_KHR | VK_SHADER_STAGE_INTERSECTION_BIT_KHR | VK_SHADER_STAGE_CALLABLE_BIT_KHR;
59 
60 const VkTransformMatrixKHR identityMatrix3x4 = {
61     {{1.0f, 0.0f, 0.0f, 0.0f}, {0.0f, 1.0f, 0.0f, 0.0f}, {0.0f, 0.0f, 1.0f, 0.0f}}};
62 
63 template <typename T>
makeVkSharedPtr(Move<T> move)64 inline de::SharedPtr<Move<T>> makeVkSharedPtr(Move<T> move)
65 {
66     return de::SharedPtr<Move<T>>(new Move<T>(move));
67 }
68 
69 template <typename T>
makeVkSharedPtr(de::MovePtr<T> movePtr)70 inline de::SharedPtr<de::MovePtr<T>> makeVkSharedPtr(de::MovePtr<T> movePtr)
71 {
72     return de::SharedPtr<de::MovePtr<T>>(new de::MovePtr<T>(movePtr));
73 }
74 
updateRayTracingGLSL(const std::string & str)75 inline std::string updateRayTracingGLSL(const std::string &str)
76 {
77     return str;
78 }
79 
80 std::string getCommonRayGenerationShader(void);
81 
82 // Get lowercase version of the format name with no VK_FORMAT_ prefix.
83 std::string getFormatSimpleName(vk::VkFormat format);
84 
85 // Test whether given poin p belons to the triangle (p0, p1, p2)
86 bool pointInTriangle2D(const tcu::Vec3 &p, const tcu::Vec3 &p0, const tcu::Vec3 &p1, const tcu::Vec3 &p2);
87 
88 // Checks the given vertex buffer format is valid for acceleration structures.
89 // Note: VK_KHR_get_physical_device_properties2 and VK_KHR_acceleration_structure are supposed to be supported.
90 void checkAccelerationStructureVertexBufferFormat(const vk::InstanceInterface &vki, vk::VkPhysicalDevice physicalDevice,
91                                                   vk::VkFormat format);
92 
93 class RaytracedGeometryBase
94 {
95 public:
96     RaytracedGeometryBase()                                      = delete;
97     RaytracedGeometryBase(const RaytracedGeometryBase &geometry) = delete;
98     RaytracedGeometryBase(VkGeometryTypeKHR geometryType, VkFormat vertexFormat, VkIndexType indexType);
99     virtual ~RaytracedGeometryBase();
100 
getGeometryType(void) const101     inline VkGeometryTypeKHR getGeometryType(void) const
102     {
103         return m_geometryType;
104     }
isTrianglesType(void) const105     inline bool isTrianglesType(void) const
106     {
107         return m_geometryType == VK_GEOMETRY_TYPE_TRIANGLES_KHR;
108     }
getVertexFormat(void) const109     inline VkFormat getVertexFormat(void) const
110     {
111         return m_vertexFormat;
112     }
getIndexType(void) const113     inline VkIndexType getIndexType(void) const
114     {
115         return m_indexType;
116     }
usesIndices(void) const117     inline bool usesIndices(void) const
118     {
119         return m_indexType != VK_INDEX_TYPE_NONE_KHR;
120     }
getGeometryFlags(void) const121     inline VkGeometryFlagsKHR getGeometryFlags(void) const
122     {
123         return m_geometryFlags;
124     }
setGeometryFlags(const VkGeometryFlagsKHR geometryFlags)125     inline void setGeometryFlags(const VkGeometryFlagsKHR geometryFlags)
126     {
127         m_geometryFlags = geometryFlags;
128     }
getOpacityMicromap(void)129     inline VkAccelerationStructureTrianglesOpacityMicromapEXT &getOpacityMicromap(void)
130     {
131         return m_opacityGeometryMicromap;
132     }
getHasOpacityMicromap(void) const133     inline bool getHasOpacityMicromap(void) const
134     {
135         return m_hasOpacityMicromap;
136     }
setOpacityMicromap(const VkAccelerationStructureTrianglesOpacityMicromapEXT * opacityGeometryMicromap)137     inline void setOpacityMicromap(const VkAccelerationStructureTrianglesOpacityMicromapEXT *opacityGeometryMicromap)
138     {
139         m_hasOpacityMicromap      = true;
140         m_opacityGeometryMicromap = *opacityGeometryMicromap;
141     }
142     virtual uint32_t getVertexCount(void) const         = 0;
143     virtual const uint8_t *getVertexPointer(void) const = 0;
144     virtual VkDeviceSize getVertexStride(void) const    = 0;
145     virtual VkDeviceSize getAABBStride(void) const      = 0;
146     virtual size_t getVertexByteSize(void) const        = 0;
147     virtual uint32_t getIndexCount(void) const          = 0;
148     virtual const uint8_t *getIndexPointer(void) const  = 0;
149     virtual VkDeviceSize getIndexStride(void) const     = 0;
150     virtual size_t getIndexByteSize(void) const         = 0;
151     virtual uint32_t getPrimitiveCount(void) const      = 0;
152     virtual void addVertex(const tcu::Vec3 &vertex)     = 0;
153     virtual void addIndex(const uint32_t &index)        = 0;
154 
155 private:
156     VkGeometryTypeKHR m_geometryType;
157     VkFormat m_vertexFormat;
158     VkIndexType m_indexType;
159     VkGeometryFlagsKHR m_geometryFlags;
160     bool m_hasOpacityMicromap;
161     VkAccelerationStructureTrianglesOpacityMicromapEXT m_opacityGeometryMicromap;
162 };
163 
164 template <typename T>
convertSatRte(float f)165 inline T convertSatRte(float f)
166 {
167     // \note Doesn't work for 64-bit types
168     DE_STATIC_ASSERT(sizeof(T) < sizeof(uint64_t));
169     DE_STATIC_ASSERT((-3 % 2 != 0) && (-4 % 2 == 0));
170 
171     int64_t minVal = std::numeric_limits<T>::min();
172     int64_t maxVal = std::numeric_limits<T>::max();
173     float q        = deFloatFrac(f);
174     int64_t intVal = (int64_t)(f - q);
175 
176     // Rounding.
177     if (q == 0.5f)
178     {
179         if (intVal % 2 != 0)
180             intVal++;
181     }
182     else if (q > 0.5f)
183         intVal++;
184     // else Don't add anything
185 
186     // Saturate.
187     intVal = de::max(minVal, de::min(maxVal, intVal));
188 
189     return (T)intVal;
190 }
191 
192 // Converts float to signed integer with variable width.
193 // Source float is assumed to be in the [-1, 1] range.
194 template <typename T>
deFloat32ToSNorm(float src)195 inline T deFloat32ToSNorm(float src)
196 {
197     DE_STATIC_ASSERT(std::numeric_limits<T>::is_integer && std::numeric_limits<T>::is_signed);
198     const T range  = std::numeric_limits<T>::max();
199     const T intVal = convertSatRte<T>(src * static_cast<float>(range));
200     return de::clamp<T>(intVal, -range, range);
201 }
202 
203 typedef tcu::Vector<deFloat16, 2> Vec2_16;
204 typedef tcu::Vector<deFloat16, 3> Vec3_16;
205 typedef tcu::Vector<deFloat16, 4> Vec4_16;
206 typedef tcu::Vector<int16_t, 2> Vec2_16SNorm;
207 typedef tcu::Vector<int16_t, 3> Vec3_16SNorm;
208 typedef tcu::Vector<int16_t, 4> Vec4_16SNorm;
209 typedef tcu::Vector<int8_t, 2> Vec2_8SNorm;
210 typedef tcu::Vector<int8_t, 3> Vec3_8SNorm;
211 typedef tcu::Vector<int8_t, 4> Vec4_8SNorm;
212 
213 template <typename V>
214 VkFormat vertexFormatFromType();
215 template <>
vertexFormatFromType()216 inline VkFormat vertexFormatFromType<tcu::Vec2>()
217 {
218     return VK_FORMAT_R32G32_SFLOAT;
219 }
220 template <>
vertexFormatFromType()221 inline VkFormat vertexFormatFromType<tcu::Vec3>()
222 {
223     return VK_FORMAT_R32G32B32_SFLOAT;
224 }
225 template <>
vertexFormatFromType()226 inline VkFormat vertexFormatFromType<tcu::Vec4>()
227 {
228     return VK_FORMAT_R32G32B32A32_SFLOAT;
229 }
230 template <>
vertexFormatFromType()231 inline VkFormat vertexFormatFromType<Vec2_16>()
232 {
233     return VK_FORMAT_R16G16_SFLOAT;
234 }
235 template <>
vertexFormatFromType()236 inline VkFormat vertexFormatFromType<Vec3_16>()
237 {
238     return VK_FORMAT_R16G16B16_SFLOAT;
239 }
240 template <>
vertexFormatFromType()241 inline VkFormat vertexFormatFromType<Vec4_16>()
242 {
243     return VK_FORMAT_R16G16B16A16_SFLOAT;
244 }
245 template <>
vertexFormatFromType()246 inline VkFormat vertexFormatFromType<Vec2_16SNorm>()
247 {
248     return VK_FORMAT_R16G16_SNORM;
249 }
250 template <>
vertexFormatFromType()251 inline VkFormat vertexFormatFromType<Vec3_16SNorm>()
252 {
253     return VK_FORMAT_R16G16B16_SNORM;
254 }
255 template <>
vertexFormatFromType()256 inline VkFormat vertexFormatFromType<Vec4_16SNorm>()
257 {
258     return VK_FORMAT_R16G16B16A16_SNORM;
259 }
260 template <>
vertexFormatFromType()261 inline VkFormat vertexFormatFromType<tcu::DVec2>()
262 {
263     return VK_FORMAT_R64G64_SFLOAT;
264 }
265 template <>
vertexFormatFromType()266 inline VkFormat vertexFormatFromType<tcu::DVec3>()
267 {
268     return VK_FORMAT_R64G64B64_SFLOAT;
269 }
270 template <>
vertexFormatFromType()271 inline VkFormat vertexFormatFromType<tcu::DVec4>()
272 {
273     return VK_FORMAT_R64G64B64A64_SFLOAT;
274 }
275 template <>
vertexFormatFromType()276 inline VkFormat vertexFormatFromType<Vec2_8SNorm>()
277 {
278     return VK_FORMAT_R8G8_SNORM;
279 }
280 template <>
vertexFormatFromType()281 inline VkFormat vertexFormatFromType<Vec3_8SNorm>()
282 {
283     return VK_FORMAT_R8G8B8_SNORM;
284 }
285 template <>
vertexFormatFromType()286 inline VkFormat vertexFormatFromType<Vec4_8SNorm>()
287 {
288     return VK_FORMAT_R8G8B8A8_SNORM;
289 }
290 
291 struct EmptyIndex
292 {
293 };
294 template <typename I>
295 VkIndexType indexTypeFromType();
296 template <>
indexTypeFromType()297 inline VkIndexType indexTypeFromType<uint16_t>()
298 {
299     return VK_INDEX_TYPE_UINT16;
300 }
301 template <>
indexTypeFromType()302 inline VkIndexType indexTypeFromType<uint32_t>()
303 {
304     return VK_INDEX_TYPE_UINT32;
305 }
306 template <>
indexTypeFromType()307 inline VkIndexType indexTypeFromType<EmptyIndex>()
308 {
309     return VK_INDEX_TYPE_NONE_KHR;
310 }
311 
312 template <typename V>
313 V convertFloatTo(const tcu::Vec3 &vertex);
314 template <>
convertFloatTo(const tcu::Vec3 & vertex)315 inline tcu::Vec2 convertFloatTo<tcu::Vec2>(const tcu::Vec3 &vertex)
316 {
317     return tcu::Vec2(vertex.x(), vertex.y());
318 }
319 template <>
convertFloatTo(const tcu::Vec3 & vertex)320 inline tcu::Vec3 convertFloatTo<tcu::Vec3>(const tcu::Vec3 &vertex)
321 {
322     return vertex;
323 }
324 template <>
convertFloatTo(const tcu::Vec3 & vertex)325 inline tcu::Vec4 convertFloatTo<tcu::Vec4>(const tcu::Vec3 &vertex)
326 {
327     return tcu::Vec4(vertex.x(), vertex.y(), vertex.z(), 0.0f);
328 }
329 template <>
convertFloatTo(const tcu::Vec3 & vertex)330 inline Vec2_16 convertFloatTo<Vec2_16>(const tcu::Vec3 &vertex)
331 {
332     return Vec2_16(deFloat32To16(vertex.x()), deFloat32To16(vertex.y()));
333 }
334 template <>
convertFloatTo(const tcu::Vec3 & vertex)335 inline Vec3_16 convertFloatTo<Vec3_16>(const tcu::Vec3 &vertex)
336 {
337     return Vec3_16(deFloat32To16(vertex.x()), deFloat32To16(vertex.y()), deFloat32To16(vertex.z()));
338 }
339 template <>
convertFloatTo(const tcu::Vec3 & vertex)340 inline Vec4_16 convertFloatTo<Vec4_16>(const tcu::Vec3 &vertex)
341 {
342     return Vec4_16(deFloat32To16(vertex.x()), deFloat32To16(vertex.y()), deFloat32To16(vertex.z()),
343                    deFloat32To16(0.0f));
344 }
345 template <>
convertFloatTo(const tcu::Vec3 & vertex)346 inline Vec2_16SNorm convertFloatTo<Vec2_16SNorm>(const tcu::Vec3 &vertex)
347 {
348     return Vec2_16SNorm(deFloat32ToSNorm<int16_t>(vertex.x()), deFloat32ToSNorm<int16_t>(vertex.y()));
349 }
350 template <>
convertFloatTo(const tcu::Vec3 & vertex)351 inline Vec3_16SNorm convertFloatTo<Vec3_16SNorm>(const tcu::Vec3 &vertex)
352 {
353     return Vec3_16SNorm(deFloat32ToSNorm<int16_t>(vertex.x()), deFloat32ToSNorm<int16_t>(vertex.y()),
354                         deFloat32ToSNorm<int16_t>(vertex.z()));
355 }
356 template <>
convertFloatTo(const tcu::Vec3 & vertex)357 inline Vec4_16SNorm convertFloatTo<Vec4_16SNorm>(const tcu::Vec3 &vertex)
358 {
359     return Vec4_16SNorm(deFloat32ToSNorm<int16_t>(vertex.x()), deFloat32ToSNorm<int16_t>(vertex.y()),
360                         deFloat32ToSNorm<int16_t>(vertex.z()), deFloat32ToSNorm<int16_t>(0.0f));
361 }
362 template <>
convertFloatTo(const tcu::Vec3 & vertex)363 inline tcu::DVec2 convertFloatTo<tcu::DVec2>(const tcu::Vec3 &vertex)
364 {
365     return tcu::DVec2(static_cast<double>(vertex.x()), static_cast<double>(vertex.y()));
366 }
367 template <>
convertFloatTo(const tcu::Vec3 & vertex)368 inline tcu::DVec3 convertFloatTo<tcu::DVec3>(const tcu::Vec3 &vertex)
369 {
370     return tcu::DVec3(static_cast<double>(vertex.x()), static_cast<double>(vertex.y()),
371                       static_cast<double>(vertex.z()));
372 }
373 template <>
convertFloatTo(const tcu::Vec3 & vertex)374 inline tcu::DVec4 convertFloatTo<tcu::DVec4>(const tcu::Vec3 &vertex)
375 {
376     return tcu::DVec4(static_cast<double>(vertex.x()), static_cast<double>(vertex.y()), static_cast<double>(vertex.z()),
377                       0.0);
378 }
379 template <>
convertFloatTo(const tcu::Vec3 & vertex)380 inline Vec2_8SNorm convertFloatTo<Vec2_8SNorm>(const tcu::Vec3 &vertex)
381 {
382     return Vec2_8SNorm(deFloat32ToSNorm<int8_t>(vertex.x()), deFloat32ToSNorm<int8_t>(vertex.y()));
383 }
384 template <>
convertFloatTo(const tcu::Vec3 & vertex)385 inline Vec3_8SNorm convertFloatTo<Vec3_8SNorm>(const tcu::Vec3 &vertex)
386 {
387     return Vec3_8SNorm(deFloat32ToSNorm<int8_t>(vertex.x()), deFloat32ToSNorm<int8_t>(vertex.y()),
388                        deFloat32ToSNorm<int8_t>(vertex.z()));
389 }
390 template <>
convertFloatTo(const tcu::Vec3 & vertex)391 inline Vec4_8SNorm convertFloatTo<Vec4_8SNorm>(const tcu::Vec3 &vertex)
392 {
393     return Vec4_8SNorm(deFloat32ToSNorm<int8_t>(vertex.x()), deFloat32ToSNorm<int8_t>(vertex.y()),
394                        deFloat32ToSNorm<int8_t>(vertex.z()), deFloat32ToSNorm<int8_t>(0.0f));
395 }
396 
397 template <typename V>
398 V convertIndexTo(uint32_t index);
399 template <>
convertIndexTo(uint32_t index)400 inline EmptyIndex convertIndexTo<EmptyIndex>(uint32_t index)
401 {
402     DE_UNREF(index);
403     TCU_THROW(TestError, "Cannot add empty index");
404 }
405 template <>
convertIndexTo(uint32_t index)406 inline uint16_t convertIndexTo<uint16_t>(uint32_t index)
407 {
408     return static_cast<uint16_t>(index);
409 }
410 template <>
convertIndexTo(uint32_t index)411 inline uint32_t convertIndexTo<uint32_t>(uint32_t index)
412 {
413     return index;
414 }
415 
416 template <typename V, typename I>
417 class RaytracedGeometry : public RaytracedGeometryBase
418 {
419 public:
420     RaytracedGeometry()                                  = delete;
421     RaytracedGeometry(const RaytracedGeometry &geometry) = delete;
422     RaytracedGeometry(VkGeometryTypeKHR geometryType, uint32_t paddingBlocks = 0u);
423     RaytracedGeometry(VkGeometryTypeKHR geometryType, const std::vector<V> &vertices,
424                       const std::vector<I> &indices = std::vector<I>(), uint32_t paddingBlocks = 0u);
425 
426     uint32_t getVertexCount(void) const override;
427     const uint8_t *getVertexPointer(void) const override;
428     VkDeviceSize getVertexStride(void) const override;
429     VkDeviceSize getAABBStride(void) const override;
430     size_t getVertexByteSize(void) const override;
431     uint32_t getIndexCount(void) const override;
432     const uint8_t *getIndexPointer(void) const override;
433     VkDeviceSize getIndexStride(void) const override;
434     size_t getIndexByteSize(void) const override;
435     uint32_t getPrimitiveCount(void) const override;
436 
437     void addVertex(const tcu::Vec3 &vertex) override;
438     void addIndex(const uint32_t &index) override;
439 
440 private:
441     void init();                           // To be run in constructors.
442     void checkGeometryType() const;        // Checks geometry type is valid.
443     void calcBlockSize();                  // Calculates and saves vertex buffer block size.
444     size_t getBlockSize() const;           // Return stored vertex buffer block size.
445     void addNativeVertex(const V &vertex); // Adds new vertex in native format.
446 
447     // The implementation below stores vertices as byte blocks to take the requested padding into account. m_vertices is the array
448     // of bytes containing vertex data.
449     //
450     // For triangles, the padding block has a size that is a multiple of the vertex size and each vertex is stored in a byte block
451     // equivalent to:
452     //
453     //    struct Vertex
454     //    {
455     // V vertex;
456     // uint8_t padding[m_paddingBlocks * sizeof(V)];
457     // };
458     //
459     // For AABBs, the padding block has a size that is a multiple of kAABBPadBaseSize (see below) and vertices are stored in pairs
460     // before the padding block. This is equivalent to:
461     //
462     //        struct VertexPair
463     //        {
464     // V vertices[2];
465     // uint8_t padding[m_paddingBlocks * kAABBPadBaseSize];
466     // };
467     //
468     // The size of each pseudo-structure above is saved to one of the correspoding union members below.
469     union BlockSize
470     {
471         size_t trianglesBlockSize;
472         size_t aabbsBlockSize;
473     };
474 
475     const uint32_t m_paddingBlocks;
476     size_t m_vertexCount;
477     std::vector<uint8_t> m_vertices; // Vertices are stored as byte blocks.
478     std::vector<I> m_indices;        // Indices are stored natively.
479     BlockSize m_blockSize;           // For m_vertices.
480 
481     // Data sizes.
482     static constexpr size_t kVertexSize      = sizeof(V);
483     static constexpr size_t kIndexSize       = sizeof(I);
484     static constexpr size_t kAABBPadBaseSize = 8; // As required by the spec.
485 };
486 
487 template <typename V, typename I>
RaytracedGeometry(VkGeometryTypeKHR geometryType,uint32_t paddingBlocks)488 RaytracedGeometry<V, I>::RaytracedGeometry(VkGeometryTypeKHR geometryType, uint32_t paddingBlocks)
489     : RaytracedGeometryBase(geometryType, vertexFormatFromType<V>(), indexTypeFromType<I>())
490     , m_paddingBlocks(paddingBlocks)
491     , m_vertexCount(0)
492 {
493     init();
494 }
495 
496 template <typename V, typename I>
RaytracedGeometry(VkGeometryTypeKHR geometryType,const std::vector<V> & vertices,const std::vector<I> & indices,uint32_t paddingBlocks)497 RaytracedGeometry<V, I>::RaytracedGeometry(VkGeometryTypeKHR geometryType, const std::vector<V> &vertices,
498                                            const std::vector<I> &indices, uint32_t paddingBlocks)
499     : RaytracedGeometryBase(geometryType, vertexFormatFromType<V>(), indexTypeFromType<I>())
500     , m_paddingBlocks(paddingBlocks)
501     , m_vertexCount(0)
502     , m_vertices()
503     , m_indices(indices)
504 {
505     init();
506     for (const auto &vertex : vertices)
507         addNativeVertex(vertex);
508 }
509 
510 template <typename V, typename I>
getVertexCount(void) const511 uint32_t RaytracedGeometry<V, I>::getVertexCount(void) const
512 {
513     return (isTrianglesType() ? static_cast<uint32_t>(m_vertexCount) : 0u);
514 }
515 
516 template <typename V, typename I>
getVertexPointer(void) const517 const uint8_t *RaytracedGeometry<V, I>::getVertexPointer(void) const
518 {
519     DE_ASSERT(!m_vertices.empty());
520     return reinterpret_cast<const uint8_t *>(m_vertices.data());
521 }
522 
523 template <typename V, typename I>
getVertexStride(void) const524 VkDeviceSize RaytracedGeometry<V, I>::getVertexStride(void) const
525 {
526     return ((!isTrianglesType()) ? 0ull : static_cast<VkDeviceSize>(getBlockSize()));
527 }
528 
529 template <typename V, typename I>
getAABBStride(void) const530 VkDeviceSize RaytracedGeometry<V, I>::getAABBStride(void) const
531 {
532     return (isTrianglesType() ? 0ull : static_cast<VkDeviceSize>(getBlockSize()));
533 }
534 
535 template <typename V, typename I>
getVertexByteSize(void) const536 size_t RaytracedGeometry<V, I>::getVertexByteSize(void) const
537 {
538     return m_vertices.size();
539 }
540 
541 template <typename V, typename I>
getIndexCount(void) const542 uint32_t RaytracedGeometry<V, I>::getIndexCount(void) const
543 {
544     return static_cast<uint32_t>(isTrianglesType() ? m_indices.size() : 0);
545 }
546 
547 template <typename V, typename I>
getIndexPointer(void) const548 const uint8_t *RaytracedGeometry<V, I>::getIndexPointer(void) const
549 {
550     const auto indexCount = getIndexCount();
551     DE_UNREF(indexCount); // For release builds.
552     DE_ASSERT(indexCount > 0u);
553 
554     return reinterpret_cast<const uint8_t *>(m_indices.data());
555 }
556 
557 template <typename V, typename I>
getIndexStride(void) const558 VkDeviceSize RaytracedGeometry<V, I>::getIndexStride(void) const
559 {
560     return static_cast<VkDeviceSize>(kIndexSize);
561 }
562 
563 template <typename V, typename I>
getIndexByteSize(void) const564 size_t RaytracedGeometry<V, I>::getIndexByteSize(void) const
565 {
566     const auto indexCount = getIndexCount();
567     DE_ASSERT(indexCount > 0u);
568 
569     return (indexCount * kIndexSize);
570 }
571 
572 template <typename V, typename I>
getPrimitiveCount(void) const573 uint32_t RaytracedGeometry<V, I>::getPrimitiveCount(void) const
574 {
575     return static_cast<uint32_t>(isTrianglesType() ? (usesIndices() ? m_indices.size() / 3 : m_vertexCount / 3) :
576                                                      (m_vertexCount / 2));
577 }
578 
579 template <typename V, typename I>
addVertex(const tcu::Vec3 & vertex)580 void RaytracedGeometry<V, I>::addVertex(const tcu::Vec3 &vertex)
581 {
582     addNativeVertex(convertFloatTo<V>(vertex));
583 }
584 
585 template <typename V, typename I>
addNativeVertex(const V & vertex)586 void RaytracedGeometry<V, I>::addNativeVertex(const V &vertex)
587 {
588     const auto oldSize   = m_vertices.size();
589     const auto blockSize = getBlockSize();
590 
591     if (isTrianglesType())
592     {
593         // Reserve new block, copy vertex at the beginning of the new block.
594         m_vertices.resize(oldSize + blockSize, uint8_t{0});
595         deMemcpy(&m_vertices[oldSize], &vertex, kVertexSize);
596     }
597     else // AABB
598     {
599         if (m_vertexCount % 2 == 0)
600         {
601             // New block needed.
602             m_vertices.resize(oldSize + blockSize, uint8_t{0});
603             deMemcpy(&m_vertices[oldSize], &vertex, kVertexSize);
604         }
605         else
606         {
607             // Insert in the second position of last existing block.
608             //
609             //                                                Vertex Size
610             //                                                +-------+
611             //    +-------------+------------+----------------------------------------+
612             //    |             |            |      ...       | vertex vertex padding |
613             //    +-------------+------------+----------------+-----------------------+
614             //                                                +-----------------------+
615             //                                                        Block Size
616             //    +-------------------------------------------------------------------+
617             //                            Old Size
618             //
619             deMemcpy(&m_vertices[oldSize - blockSize + kVertexSize], &vertex, kVertexSize);
620         }
621     }
622 
623     ++m_vertexCount;
624 }
625 
626 template <typename V, typename I>
addIndex(const uint32_t & index)627 void RaytracedGeometry<V, I>::addIndex(const uint32_t &index)
628 {
629     m_indices.push_back(convertIndexTo<I>(index));
630 }
631 
632 template <typename V, typename I>
init()633 void RaytracedGeometry<V, I>::init()
634 {
635     checkGeometryType();
636     calcBlockSize();
637 }
638 
639 template <typename V, typename I>
checkGeometryType() const640 void RaytracedGeometry<V, I>::checkGeometryType() const
641 {
642     const auto geometryType = getGeometryType();
643     DE_UNREF(geometryType); // For release builds.
644     DE_ASSERT(geometryType == VK_GEOMETRY_TYPE_TRIANGLES_KHR || geometryType == VK_GEOMETRY_TYPE_AABBS_KHR);
645 }
646 
647 template <typename V, typename I>
calcBlockSize()648 void RaytracedGeometry<V, I>::calcBlockSize()
649 {
650     if (isTrianglesType())
651         m_blockSize.trianglesBlockSize = kVertexSize * static_cast<size_t>(1u + m_paddingBlocks);
652     else
653         m_blockSize.aabbsBlockSize = 2 * kVertexSize + m_paddingBlocks * kAABBPadBaseSize;
654 }
655 
656 template <typename V, typename I>
getBlockSize() const657 size_t RaytracedGeometry<V, I>::getBlockSize() const
658 {
659     return (isTrianglesType() ? m_blockSize.trianglesBlockSize : m_blockSize.aabbsBlockSize);
660 }
661 
662 de::SharedPtr<RaytracedGeometryBase> makeRaytracedGeometry(VkGeometryTypeKHR geometryType, VkFormat vertexFormat,
663                                                            VkIndexType indexType, bool padVertices = false);
664 
665 VkDeviceAddress getBufferDeviceAddress(const DeviceInterface &vkd, const VkDevice device, const VkBuffer buffer,
666                                        VkDeviceSize offset);
667 
668 // type used for creating a deep serialization/deserialization of top-level acceleration structures
669 class SerialInfo
670 {
671     std::vector<uint64_t> m_addresses;
672     std::vector<VkDeviceSize> m_sizes;
673 
674 public:
675     SerialInfo() = default;
676 
677     // addresses: { (owner-top-level AS address) [, (first bottom_level AS address), (second bottom_level AS address), ...] }
678     // sizes:     { (owner-top-level AS serial size) [, (first bottom_level AS serial size), (second bottom_level AS serial size), ...] }
SerialInfo(const std::vector<uint64_t> & addresses,const std::vector<VkDeviceSize> & sizes)679     SerialInfo(const std::vector<uint64_t> &addresses, const std::vector<VkDeviceSize> &sizes)
680         : m_addresses(addresses)
681         , m_sizes(sizes)
682     {
683         DE_ASSERT(!addresses.empty() && addresses.size() == sizes.size());
684     }
685 
addresses() const686     const std::vector<uint64_t> &addresses() const
687     {
688         return m_addresses;
689     }
sizes() const690     const std::vector<VkDeviceSize> &sizes() const
691     {
692         return m_sizes;
693     }
694 };
695 
696 class SerialStorage
697 {
698 public:
699     enum
700     {
701         DE_SERIALIZED_FIELD(
702             DRIVER_UUID, VK_UUID_SIZE), // VK_UUID_SIZE bytes of data matching VkPhysicalDeviceIDProperties::driverUUID
703         DE_SERIALIZED_FIELD(
704             COMPAT_UUID,
705             VK_UUID_SIZE), // VK_UUID_SIZE bytes of data identifying the compatibility for comparison using vkGetDeviceAccelerationStructureCompatibilityKHR
706         DE_SERIALIZED_FIELD(
707             SERIALIZED_SIZE,
708             sizeof(
709                 uint64_t)), // A 64-bit integer of the total size matching the value queried using VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR
710         DE_SERIALIZED_FIELD(
711             DESERIALIZED_SIZE,
712             sizeof(
713                 uint64_t)), // A 64-bit integer of the deserialized size to be passed in to VkAccelerationStructureCreateInfoKHR::size
714         DE_SERIALIZED_FIELD(
715             HANDLES_COUNT,
716             sizeof(
717                 uint64_t)), // A 64-bit integer of the count of the number of acceleration structure handles following. This will be zero for a bottom-level acceleration structure.
718         SERIAL_STORAGE_SIZE_MIN
719     };
720 
721     // An old fashion C-style structure that simplifies an access to the AS header
722     struct alignas(16) AccelerationStructureHeader
723     {
724         union
725         {
726             struct
727             {
728                 uint8_t driverUUID[VK_UUID_SIZE];
729                 uint8_t compactUUID[VK_UUID_SIZE];
730             };
731             uint8_t uuids[VK_UUID_SIZE * 2];
732         };
733         uint64_t serializedSize;
734         uint64_t deserializedSize;
735         uint64_t handleCount;
736         VkDeviceAddress handleArray[1];
737     };
738 
739     SerialStorage() = delete;
740     SerialStorage(const DeviceInterface &vk, const VkDevice device, Allocator &allocator,
741                   const VkAccelerationStructureBuildTypeKHR buildType, const VkDeviceSize storageSize);
742     // An additional constructor for creating a deep copy of top-level AS's.
743     SerialStorage(const DeviceInterface &vk, const VkDevice device, Allocator &allocator,
744                   const VkAccelerationStructureBuildTypeKHR buildType, const SerialInfo &SerialInfo);
745 
746     // below methods will return host addres if AS was build on cpu and device addres when it was build on gpu
747     VkDeviceOrHostAddressKHR getAddress(const DeviceInterface &vk, const VkDevice device,
748                                         const VkAccelerationStructureBuildTypeKHR buildType);
749     VkDeviceOrHostAddressConstKHR getAddressConst(const DeviceInterface &vk, const VkDevice device,
750                                                   const VkAccelerationStructureBuildTypeKHR buildType);
751 
752     // this methods retun host address regardless of where AS was built
753     VkDeviceOrHostAddressKHR getHostAddress(VkDeviceSize offset = 0);
754     VkDeviceOrHostAddressConstKHR getHostAddressConst(VkDeviceSize offset = 0);
755 
756     // works the similar way as getHostAddressConst() but returns more readable/intuitive object
757     AccelerationStructureHeader *getASHeader();
758     bool hasDeepFormat() const;
759     de::SharedPtr<SerialStorage> getBottomStorage(uint32_t index) const;
760 
761     VkDeviceSize getStorageSize() const;
762     const SerialInfo &getSerialInfo() const;
763     uint64_t getDeserializedSize();
764 
765 protected:
766     const VkAccelerationStructureBuildTypeKHR m_buildType;
767     const VkDeviceSize m_storageSize;
768     const SerialInfo m_serialInfo;
769     de::MovePtr<BufferWithMemory> m_buffer;
770     std::vector<de::SharedPtr<SerialStorage>> m_bottoms;
771 };
772 
773 class BottomLevelAccelerationStructure
774 {
775 public:
776     static uint32_t getRequiredAllocationCount(void);
777 
778     BottomLevelAccelerationStructure();
779     BottomLevelAccelerationStructure(const BottomLevelAccelerationStructure &other) = delete;
780     virtual ~BottomLevelAccelerationStructure();
781 
782     virtual void setGeometryData(const std::vector<tcu::Vec3> &geometryData, const bool triangles,
783                                  const VkGeometryFlagsKHR geometryFlags = 0u);
784     virtual void setDefaultGeometryData(const VkShaderStageFlagBits testStage,
785                                         const VkGeometryFlagsKHR geometryFlags = 0u);
786     virtual void setGeometryCount(const size_t geometryCount);
787     virtual void addGeometry(de::SharedPtr<RaytracedGeometryBase> &raytracedGeometry);
788     virtual void addGeometry(
789         const std::vector<tcu::Vec3> &geometryData, const bool triangles, const VkGeometryFlagsKHR geometryFlags = 0u,
790         const VkAccelerationStructureTrianglesOpacityMicromapEXT *opacityGeometryMicromap = DE_NULL);
791 
792     virtual void setBuildType(const VkAccelerationStructureBuildTypeKHR buildType)                         = DE_NULL;
793     virtual VkAccelerationStructureBuildTypeKHR getBuildType() const                                       = 0;
794     virtual void setCreateFlags(const VkAccelerationStructureCreateFlagsKHR createFlags)                   = DE_NULL;
795     virtual void setCreateGeneric(bool createGeneric)                                                      = 0;
796     virtual void setCreationBufferUnbounded(bool creationBufferUnbounded)                                  = 0;
797     virtual void setBuildFlags(const VkBuildAccelerationStructureFlagsKHR buildFlags)                      = DE_NULL;
798     virtual void setBuildWithoutGeometries(bool buildWithoutGeometries)                                    = 0;
799     virtual void setBuildWithoutPrimitives(bool buildWithoutPrimitives)                                    = 0;
800     virtual void setDeferredOperation(const bool deferredOperation, const uint32_t workerThreadCount = 0u) = DE_NULL;
801     virtual void setUseArrayOfPointers(const bool useArrayOfPointers)                                      = DE_NULL;
802     virtual void setUseMaintenance5(const bool useMaintenance5)                                            = DE_NULL;
803     virtual void setIndirectBuildParameters(const VkBuffer indirectBuffer, const VkDeviceSize indirectBufferOffset,
804                                             const uint32_t indirectBufferStride)                           = DE_NULL;
805     virtual VkBuildAccelerationStructureFlagsKHR getBuildFlags() const                                     = DE_NULL;
806     VkAccelerationStructureBuildSizesInfoKHR getStructureBuildSizes() const;
807 
808     // methods specific for each acceleration structure
809     virtual void create(const DeviceInterface &vk, const VkDevice device, Allocator &allocator,
810                         VkDeviceSize structureSize, VkDeviceAddress deviceAddress = 0u, const void *pNext = DE_NULL,
811                         const MemoryRequirement &addMemoryRequirement = MemoryRequirement::Any,
812                         const VkBuffer creationBuffer                 = VK_NULL_HANDLE,
813                         const VkDeviceSize creationBufferSize         = 0u)                                  = DE_NULL;
814     virtual void build(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
815                        BottomLevelAccelerationStructure *srcAccelerationStructure = DE_NULL)         = DE_NULL;
816     virtual void copyFrom(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
817                           BottomLevelAccelerationStructure *accelerationStructure, bool compactCopy) = DE_NULL;
818 
819     virtual void serialize(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
820                            SerialStorage *storage)   = DE_NULL;
821     virtual void deserialize(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
822                              SerialStorage *storage) = DE_NULL;
823 
824     // helper methods for typical acceleration structure creation tasks
825     void createAndBuild(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
826                         Allocator &allocator, VkDeviceAddress deviceAddress = 0u);
827     void createAndCopyFrom(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
828                            Allocator &allocator, BottomLevelAccelerationStructure *accelerationStructure,
829                            VkDeviceSize compactCopySize = 0u, VkDeviceAddress deviceAddress = 0u);
830     void createAndDeserializeFrom(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
831                                   Allocator &allocator, SerialStorage *storage, VkDeviceAddress deviceAddress = 0u);
832     virtual const VkAccelerationStructureKHR *getPtr(void) const                         = DE_NULL;
833     virtual void updateGeometry(size_t geometryIndex,
834                                 de::SharedPtr<RaytracedGeometryBase> &raytracedGeometry) = DE_NULL;
835 
836 protected:
837     std::vector<de::SharedPtr<RaytracedGeometryBase>> m_geometriesData;
838     VkDeviceSize m_structureSize;
839     VkDeviceSize m_updateScratchSize;
840     VkDeviceSize m_buildScratchSize;
841 };
842 
843 de::MovePtr<BottomLevelAccelerationStructure> makeBottomLevelAccelerationStructure();
844 
845 /**
846  * @brief Implements a pool of BottomLevelAccelerationStructure
847  */
848 class BottomLevelAccelerationStructurePool
849 {
850 public:
851     typedef de::SharedPtr<BottomLevelAccelerationStructure> BlasPtr;
852     struct BlasInfo
853     {
854         VkDeviceSize structureSize;
855         VkDeviceAddress deviceAddress;
856     };
857 
858     BottomLevelAccelerationStructurePool();
859     virtual ~BottomLevelAccelerationStructurePool();
860 
at(uint32_t index) const861     BlasPtr at(uint32_t index) const
862     {
863         return m_structs[index];
864     }
operator [](uint32_t index) const865     BlasPtr operator[](uint32_t index) const
866     {
867         return m_structs[index];
868     }
structures() const869     auto structures() const -> const std::vector<BlasPtr> &
870     {
871         return m_structs;
872     }
structCount() const873     uint32_t structCount() const
874     {
875         return static_cast<uint32_t>(m_structs.size());
876     }
877 
878     // defines how many structures will be packet in single buffer
batchStructCount() const879     uint32_t batchStructCount() const
880     {
881         return m_batchStructCount;
882     }
883     void batchStructCount(const uint32_t &value);
884 
885     // defines how many geometries (vertices and/or indices) will be packet in single buffer
batchGeomCount() const886     uint32_t batchGeomCount() const
887     {
888         return m_batchGeomCount;
889     }
batchGeomCount(const uint32_t & value)890     void batchGeomCount(const uint32_t &value)
891     {
892         m_batchGeomCount = value;
893     }
894 
tryCachedMemory() const895     bool tryCachedMemory() const
896     {
897         return m_tryCachedMemory;
898     }
tryCachedMemory(const bool cachedMemory)899     void tryCachedMemory(const bool cachedMemory)
900     {
901         m_tryCachedMemory = cachedMemory;
902     }
903 
904     BlasPtr add(VkDeviceSize structureSize = 0, VkDeviceAddress deviceAddress = 0);
905     /**
906      * @brief Creates previously added bottoms at a time.
907      * @note  All geometries must be known before call this method.
908      */
909     void batchCreate(const DeviceInterface &vkd, const VkDevice device, Allocator &allocator);
910     void batchCreateAdjust(const DeviceInterface &vkd, const VkDevice device, Allocator &allocator,
911                            const VkDeviceSize maxBufferSize);
912     void batchBuild(const DeviceInterface &vk, const VkDevice device, VkCommandBuffer cmdBuffer);
913     void batchBuild(const DeviceInterface &vk, const VkDevice device, VkCommandPool cmdPool, VkQueue queue,
914                     qpWatchDog *watchDog);
915     size_t getAllocationCount() const;
916     size_t getAllocationCount(const DeviceInterface &vk, const VkDevice device, const VkDeviceSize maxBufferSize) const;
917     auto getAllocationSizes(const DeviceInterface &vk, // (strBuff, scratchBuff, vertBuff, indexBuff)
918                             const VkDevice device) const -> tcu::Vector<VkDeviceSize, 4>;
919 
920 protected:
921     uint32_t m_batchStructCount; // default is 4
922     uint32_t m_batchGeomCount;   // default is 0, if zero then batchStructCount is used
923     std::vector<BlasInfo> m_infos;
924     std::vector<BlasPtr> m_structs;
925     bool m_createOnce;
926     bool m_tryCachedMemory;
927     VkDeviceSize m_structsBuffSize;
928     VkDeviceSize m_updatesScratchSize;
929     VkDeviceSize m_buildsScratchSize;
930     VkDeviceSize m_verticesSize;
931     VkDeviceSize m_indicesSize;
932 
933 protected:
934     struct Impl;
935     Impl *m_impl;
936 };
937 
938 struct InstanceData
939 {
InstanceDatavk::InstanceData940     InstanceData(VkTransformMatrixKHR matrix_, uint32_t instanceCustomIndex_, uint32_t mask_,
941                  uint32_t instanceShaderBindingTableRecordOffset_, VkGeometryInstanceFlagsKHR flags_)
942         : matrix(matrix_)
943         , instanceCustomIndex(instanceCustomIndex_)
944         , mask(mask_)
945         , instanceShaderBindingTableRecordOffset(instanceShaderBindingTableRecordOffset_)
946         , flags(flags_)
947     {
948     }
949     VkTransformMatrixKHR matrix;
950     uint32_t instanceCustomIndex;
951     uint32_t mask;
952     uint32_t instanceShaderBindingTableRecordOffset;
953     VkGeometryInstanceFlagsKHR flags;
954 };
955 
956 class TopLevelAccelerationStructure
957 {
958 public:
959     struct CreationSizes
960     {
961         VkDeviceSize structure;
962         VkDeviceSize updateScratch;
963         VkDeviceSize buildScratch;
964         VkDeviceSize instancePointers;
965         VkDeviceSize instancesBuffer;
966         VkDeviceSize sum() const;
967     };
968 
969     static uint32_t getRequiredAllocationCount(void);
970 
971     TopLevelAccelerationStructure();
972     TopLevelAccelerationStructure(const TopLevelAccelerationStructure &other) = delete;
973     virtual ~TopLevelAccelerationStructure();
974 
975     virtual void setInstanceCount(const size_t instanceCount);
976     virtual void addInstance(de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelStructure,
977                              const VkTransformMatrixKHR &matrix = identityMatrix3x4, uint32_t instanceCustomIndex = 0,
978                              uint32_t mask = 0xFF, uint32_t instanceShaderBindingTableRecordOffset = 0,
979                              VkGeometryInstanceFlagsKHR flags = VkGeometryInstanceFlagBitsKHR(0u));
980 
981     virtual void setBuildType(const VkAccelerationStructureBuildTypeKHR buildType)                         = DE_NULL;
982     virtual void setCreateFlags(const VkAccelerationStructureCreateFlagsKHR createFlags)                   = DE_NULL;
983     virtual void setCreateGeneric(bool createGeneric)                                                      = 0;
984     virtual void setCreationBufferUnbounded(bool creationBufferUnbounded)                                  = 0;
985     virtual void setBuildFlags(const VkBuildAccelerationStructureFlagsKHR buildFlags)                      = DE_NULL;
986     virtual void setBuildWithoutPrimitives(bool buildWithoutPrimitives)                                    = 0;
987     virtual void setInactiveInstances(bool inactiveInstances)                                              = 0;
988     virtual void setDeferredOperation(const bool deferredOperation, const uint32_t workerThreadCount = 0u) = DE_NULL;
989     virtual void setUseArrayOfPointers(const bool useArrayOfPointers)                                      = DE_NULL;
990     virtual void setIndirectBuildParameters(const VkBuffer indirectBuffer, const VkDeviceSize indirectBufferOffset,
991                                             const uint32_t indirectBufferStride)                           = DE_NULL;
992     virtual void setUsePPGeometries(const bool usePPGeometries)                                            = 0;
993     virtual void setTryCachedMemory(const bool tryCachedMemory)                                            = 0;
994     virtual VkBuildAccelerationStructureFlagsKHR getBuildFlags() const                                     = DE_NULL;
995     VkAccelerationStructureBuildSizesInfoKHR getStructureBuildSizes() const;
996 
997     // methods specific for each acceleration structure
998     virtual void getCreationSizes(const DeviceInterface &vk, const VkDevice device, const VkDeviceSize structureSize,
999                                   CreationSizes &sizes)                                           = 0;
1000     virtual void create(const DeviceInterface &vk, const VkDevice device, Allocator &allocator,
1001                         VkDeviceSize structureSize = 0u, VkDeviceAddress deviceAddress = 0u,
1002                         const void *pNext                             = DE_NULL,
1003                         const MemoryRequirement &addMemoryRequirement = MemoryRequirement::Any,
1004                         const VkBuffer creationBuffer                 = VK_NULL_HANDLE,
1005                         const VkDeviceSize creationBufferSize         = 0u)                               = DE_NULL;
1006     virtual void build(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
1007                        TopLevelAccelerationStructure *srcAccelerationStructure = DE_NULL)         = DE_NULL;
1008     virtual void copyFrom(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
1009                           TopLevelAccelerationStructure *accelerationStructure, bool compactCopy) = DE_NULL;
1010 
1011     virtual void serialize(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
1012                            SerialStorage *storage)   = DE_NULL;
1013     virtual void deserialize(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
1014                              SerialStorage *storage) = DE_NULL;
1015 
1016     virtual std::vector<VkDeviceSize> getSerializingSizes(const DeviceInterface &vk, const VkDevice device,
1017                                                           const VkQueue queue,
1018                                                           const uint32_t queueFamilyIndex) = DE_NULL;
1019 
1020     virtual std::vector<uint64_t> getSerializingAddresses(const DeviceInterface &vk,
1021                                                           const VkDevice device) const = DE_NULL;
1022 
1023     // helper methods for typical acceleration structure creation tasks
1024     void createAndBuild(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
1025                         Allocator &allocator, VkDeviceAddress deviceAddress = 0u);
1026     void createAndCopyFrom(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
1027                            Allocator &allocator, TopLevelAccelerationStructure *accelerationStructure,
1028                            VkDeviceSize compactCopySize = 0u, VkDeviceAddress deviceAddress = 0u);
1029     void createAndDeserializeFrom(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
1030                                   Allocator &allocator, SerialStorage *storage, VkDeviceAddress deviceAddress = 0u);
1031 
1032     virtual const VkAccelerationStructureKHR *getPtr(void) const = DE_NULL;
1033 
1034     virtual void updateInstanceMatrix(const DeviceInterface &vk, const VkDevice device, size_t instanceIndex,
1035                                       const VkTransformMatrixKHR &matrix) = 0;
1036 
1037 protected:
1038     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> m_bottomLevelInstances;
1039     std::vector<InstanceData> m_instanceData;
1040     VkDeviceSize m_structureSize;
1041     VkDeviceSize m_updateScratchSize;
1042     VkDeviceSize m_buildScratchSize;
1043 
1044     virtual void createAndDeserializeBottoms(const DeviceInterface &vk, const VkDevice device,
1045                                              const VkCommandBuffer cmdBuffer, Allocator &allocator,
1046                                              SerialStorage *storage) = DE_NULL;
1047 };
1048 
1049 de::MovePtr<TopLevelAccelerationStructure> makeTopLevelAccelerationStructure();
1050 
1051 template <class ASType>
1052 de::MovePtr<ASType> makeAccelerationStructure();
1053 template <>
makeAccelerationStructure()1054 inline de::MovePtr<BottomLevelAccelerationStructure> makeAccelerationStructure()
1055 {
1056     return makeBottomLevelAccelerationStructure();
1057 }
1058 template <>
makeAccelerationStructure()1059 inline de::MovePtr<TopLevelAccelerationStructure> makeAccelerationStructure()
1060 {
1061     return makeTopLevelAccelerationStructure();
1062 }
1063 
1064 bool queryAccelerationStructureSize(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
1065                                     const std::vector<VkAccelerationStructureKHR> &accelerationStructureHandles,
1066                                     VkAccelerationStructureBuildTypeKHR buildType, const VkQueryPool queryPool,
1067                                     VkQueryType queryType, uint32_t firstQuery, std::vector<VkDeviceSize> &results);
1068 
1069 class RayTracingPipeline
1070 {
1071 public:
1072     class CompileRequiredError : public std::runtime_error
1073     {
1074     public:
CompileRequiredError(const std::string & error)1075         CompileRequiredError(const std::string &error) : std::runtime_error(error)
1076         {
1077         }
1078     };
1079 
1080     RayTracingPipeline();
1081     ~RayTracingPipeline();
1082 
1083     void addShader(VkShaderStageFlagBits shaderStage, Move<VkShaderModule> shaderModule, uint32_t group,
1084                    const VkSpecializationInfo *specializationInfo = nullptr,
1085                    const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags =
1086                        static_cast<VkPipelineShaderStageCreateFlags>(0),
1087                    const void *pipelineShaderStageCreateInfopNext = nullptr);
1088     void addShader(VkShaderStageFlagBits shaderStage, de::SharedPtr<Move<VkShaderModule>> shaderModule, uint32_t group,
1089                    const VkSpecializationInfo *specializationInfoPtr = nullptr,
1090                    const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags =
1091                        static_cast<VkPipelineShaderStageCreateFlags>(0),
1092                    const void *pipelineShaderStageCreateInfopNext = nullptr);
1093     void addShader(VkShaderStageFlagBits shaderStage, VkShaderModule shaderModule, uint32_t group,
1094                    const VkSpecializationInfo *specializationInfo = nullptr,
1095                    const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags =
1096                        static_cast<VkPipelineShaderStageCreateFlags>(0),
1097                    const void *pipelineShaderStageCreateInfopNext = nullptr);
1098     void setGroupCaptureReplayHandle(uint32_t group, const void *pShaderGroupCaptureReplayHandle);
1099     void addLibrary(de::SharedPtr<de::MovePtr<RayTracingPipeline>> pipelineLibrary);
1100     uint32_t getShaderGroupCount(void);     // This pipeline only.
1101     uint32_t getFullShaderGroupCount(void); // This pipeline and its included pipeline libraries, recursively.
1102     Move<VkPipeline> createPipeline(const DeviceInterface &vk, const VkDevice device,
1103                                     const VkPipelineLayout pipelineLayout,
1104                                     const std::vector<de::SharedPtr<Move<VkPipeline>>> &pipelineLibraries =
1105                                         std::vector<de::SharedPtr<Move<VkPipeline>>>());
1106     Move<VkPipeline> createPipeline(const DeviceInterface &vk, const VkDevice device,
1107                                     const VkPipelineLayout pipelineLayout,
1108                                     const std::vector<VkPipeline> &pipelineLibraries,
1109                                     const VkPipelineCache pipelineCache);
1110     std::vector<de::SharedPtr<Move<VkPipeline>>> createPipelineWithLibraries(const DeviceInterface &vk,
1111                                                                              const VkDevice device,
1112                                                                              const VkPipelineLayout pipelineLayout);
1113     std::vector<uint8_t> getShaderGroupHandles(const DeviceInterface &vk, const VkDevice device,
1114                                                const VkPipeline pipeline, const uint32_t shaderGroupHandleSize,
1115                                                const uint32_t firstGroup, const uint32_t groupCount) const;
1116     std::vector<uint8_t> getShaderGroupReplayHandles(const DeviceInterface &vk, const VkDevice device,
1117                                                      const VkPipeline pipeline,
1118                                                      const uint32_t shaderGroupHandleReplaySize,
1119                                                      const uint32_t firstGroup, const uint32_t groupCount) const;
1120     de::MovePtr<BufferWithMemory> createShaderBindingTable(
1121         const DeviceInterface &vk, const VkDevice device, const VkPipeline pipeline, Allocator &allocator,
1122         const uint32_t &shaderGroupHandleSize, const uint32_t shaderGroupBaseAlignment, const uint32_t &firstGroup,
1123         const uint32_t &groupCount, const VkBufferCreateFlags &additionalBufferCreateFlags = VkBufferCreateFlags(0u),
1124         const VkBufferUsageFlags &additionalBufferUsageFlags = VkBufferUsageFlags(0u),
1125         const MemoryRequirement &additionalMemoryRequirement = MemoryRequirement::Any,
1126         const VkDeviceAddress &opaqueCaptureAddress = 0u, const uint32_t shaderBindingTableOffset = 0u,
1127         const uint32_t shaderRecordSize = 0u, const void **shaderGroupDataPtrPerGroup = nullptr,
1128         const bool autoAlignRecords = true);
1129     de::MovePtr<BufferWithMemory> createShaderBindingTable(
1130         const DeviceInterface &vk, const VkDevice device, Allocator &allocator, const uint32_t shaderGroupHandleSize,
1131         const uint32_t shaderGroupBaseAlignment, const std::vector<uint8_t> &shaderHandles,
1132         const VkBufferCreateFlags additionalBufferCreateFlags = VkBufferCreateFlags(0u),
1133         const VkBufferUsageFlags additionalBufferUsageFlags   = VkBufferUsageFlags(0u),
1134         const MemoryRequirement &additionalMemoryRequirement  = MemoryRequirement::Any,
1135         const VkDeviceAddress opaqueCaptureAddress = 0u, const uint32_t shaderBindingTableOffset = 0u,
1136         const uint32_t shaderRecordSize = 0u, const void **shaderGroupDataPtrPerGroup = nullptr,
1137         const bool autoAlignRecords = true);
1138     void setCreateFlags(const VkPipelineCreateFlags &pipelineCreateFlags);
1139     void setCreateFlags2(const VkPipelineCreateFlags2KHR &pipelineCreateFlags2);
1140     void setMaxRecursionDepth(const uint32_t &maxRecursionDepth);
1141     void setMaxPayloadSize(const uint32_t &maxPayloadSize);
1142     void setMaxAttributeSize(const uint32_t &maxAttributeSize);
1143     void setDeferredOperation(const bool deferredOperation, const uint32_t workerThreadCount = 0);
1144     void addDynamicState(const VkDynamicState &dynamicState);
1145 
1146 protected:
1147     Move<VkPipeline> createPipelineKHR(const DeviceInterface &vk, const VkDevice device,
1148                                        const VkPipelineLayout pipelineLayout,
1149                                        const std::vector<VkPipeline> &pipelineLibraries,
1150                                        const VkPipelineCache pipelineCache = DE_NULL);
1151 
1152     std::vector<de::SharedPtr<Move<VkShaderModule>>> m_shadersModules;
1153     std::vector<de::SharedPtr<de::MovePtr<RayTracingPipeline>>> m_pipelineLibraries;
1154     std::vector<VkPipelineShaderStageCreateInfo> m_shaderCreateInfos;
1155     std::vector<VkRayTracingShaderGroupCreateInfoKHR> m_shadersGroupCreateInfos;
1156     VkPipelineCreateFlags m_pipelineCreateFlags;
1157     VkPipelineCreateFlags2KHR m_pipelineCreateFlags2;
1158     uint32_t m_maxRecursionDepth;
1159     uint32_t m_maxPayloadSize;
1160     uint32_t m_maxAttributeSize;
1161     bool m_deferredOperation;
1162     uint32_t m_workerThreadCount;
1163     std::vector<VkDynamicState> m_dynamicStates;
1164 };
1165 
1166 class RayTracingProperties
1167 {
1168 protected:
RayTracingProperties()1169     RayTracingProperties()
1170     {
1171     }
1172 
1173 public:
RayTracingProperties(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)1174     RayTracingProperties(const InstanceInterface &vki, const VkPhysicalDevice physicalDevice)
1175     {
1176         DE_UNREF(vki);
1177         DE_UNREF(physicalDevice);
1178     }
~RayTracingProperties()1179     virtual ~RayTracingProperties()
1180     {
1181     }
1182 
1183     virtual uint32_t getShaderGroupHandleSize(void)                  = 0;
1184     virtual uint32_t getShaderGroupHandleAlignment(void)             = 0;
1185     virtual uint32_t getShaderGroupHandleCaptureReplaySize(void)     = 0;
1186     virtual uint32_t getMaxRecursionDepth(void)                      = 0;
1187     virtual uint32_t getMaxShaderGroupStride(void)                   = 0;
1188     virtual uint32_t getShaderGroupBaseAlignment(void)               = 0;
1189     virtual uint64_t getMaxGeometryCount(void)                       = 0;
1190     virtual uint64_t getMaxInstanceCount(void)                       = 0;
1191     virtual uint64_t getMaxPrimitiveCount(void)                      = 0;
1192     virtual uint32_t getMaxDescriptorSetAccelerationStructures(void) = 0;
1193     virtual uint32_t getMaxRayDispatchInvocationCount(void)          = 0;
1194     virtual uint32_t getMaxRayHitAttributeSize(void)                 = 0;
1195     virtual uint32_t getMaxMemoryAllocationCount(void)               = 0;
1196 };
1197 
1198 de::MovePtr<RayTracingProperties> makeRayTracingProperties(const InstanceInterface &vki,
1199                                                            const VkPhysicalDevice physicalDevice);
1200 
1201 void cmdTraceRays(const DeviceInterface &vk, VkCommandBuffer commandBuffer,
1202                   const VkStridedDeviceAddressRegionKHR *raygenShaderBindingTableRegion,
1203                   const VkStridedDeviceAddressRegionKHR *missShaderBindingTableRegion,
1204                   const VkStridedDeviceAddressRegionKHR *hitShaderBindingTableRegion,
1205                   const VkStridedDeviceAddressRegionKHR *callableShaderBindingTableRegion, uint32_t width,
1206                   uint32_t height, uint32_t depth);
1207 
1208 void cmdTraceRaysIndirect(const DeviceInterface &vk, VkCommandBuffer commandBuffer,
1209                           const VkStridedDeviceAddressRegionKHR *raygenShaderBindingTableRegion,
1210                           const VkStridedDeviceAddressRegionKHR *missShaderBindingTableRegion,
1211                           const VkStridedDeviceAddressRegionKHR *hitShaderBindingTableRegion,
1212                           const VkStridedDeviceAddressRegionKHR *callableShaderBindingTableRegion,
1213                           VkDeviceAddress indirectDeviceAddress);
1214 
1215 void cmdTraceRaysIndirect2(const DeviceInterface &vk, VkCommandBuffer commandBuffer,
1216                            VkDeviceAddress indirectDeviceAddress);
1217 
makeDeviceOrHostAddressConstKHR(const void * hostAddress)1218 static inline VkDeviceOrHostAddressConstKHR makeDeviceOrHostAddressConstKHR(const void *hostAddress)
1219 {
1220     // VS2015: Cannot create as a const due to cannot assign hostAddress due to it is a second field. Only assigning of first field supported.
1221     VkDeviceOrHostAddressConstKHR result;
1222 
1223     deMemset(&result, 0, sizeof(result));
1224 
1225     result.hostAddress = hostAddress;
1226 
1227     return result;
1228 }
1229 
makeDeviceOrHostAddressKHR(void * hostAddress)1230 static inline VkDeviceOrHostAddressKHR makeDeviceOrHostAddressKHR(void *hostAddress)
1231 {
1232     // VS2015: Cannot create as a const due to cannot assign hostAddress due to it is a second field. Only assigning of first field supported.
1233     VkDeviceOrHostAddressKHR result;
1234 
1235     deMemset(&result, 0, sizeof(result));
1236 
1237     result.hostAddress = hostAddress;
1238 
1239     return result;
1240 }
1241 
makeDeviceOrHostAddressConstKHR(const DeviceInterface & vk,const VkDevice device,VkBuffer buffer,VkDeviceSize offset)1242 static inline VkDeviceOrHostAddressConstKHR makeDeviceOrHostAddressConstKHR(const DeviceInterface &vk,
1243                                                                             const VkDevice device, VkBuffer buffer,
1244                                                                             VkDeviceSize offset)
1245 {
1246     // VS2015: Cannot create as a const due to cannot assign hostAddress due to it is a second field. Only assigning of first field supported.
1247     VkDeviceOrHostAddressConstKHR result;
1248 
1249     deMemset(&result, 0, sizeof(result));
1250 
1251     VkBufferDeviceAddressInfo bufferDeviceAddressInfo = {
1252         VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR, // VkStructureType  sType;
1253         DE_NULL,                                          // const void*  pNext;
1254         buffer,                                           // VkBuffer            buffer
1255     };
1256     result.deviceAddress = vk.getBufferDeviceAddress(device, &bufferDeviceAddressInfo) + offset;
1257 
1258     return result;
1259 }
1260 
makeDeviceOrHostAddressKHR(const DeviceInterface & vk,const VkDevice device,VkBuffer buffer,VkDeviceSize offset)1261 static inline VkDeviceOrHostAddressKHR makeDeviceOrHostAddressKHR(const DeviceInterface &vk, const VkDevice device,
1262                                                                   VkBuffer buffer, VkDeviceSize offset)
1263 {
1264     // VS2015: Cannot create as a const due to cannot assign hostAddress due to it is a second field. Only assigning of first field supported.
1265     VkDeviceOrHostAddressKHR result;
1266 
1267     deMemset(&result, 0, sizeof(result));
1268 
1269     VkBufferDeviceAddressInfo bufferDeviceAddressInfo = {
1270         VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR, // VkStructureType  sType;
1271         DE_NULL,                                          // const void*  pNext;
1272         buffer,                                           // VkBuffer            buffer
1273     };
1274     result.deviceAddress = vk.getBufferDeviceAddress(device, &bufferDeviceAddressInfo) + offset;
1275 
1276     return result;
1277 }
1278 
1279 enum class RayQueryShaderSourcePipeline
1280 {
1281     COMPUTE,
1282     GRAPHICS,
1283     RAYTRACING,
1284     INVALID_PIPELINE
1285 };
1286 
1287 enum class RayQueryShaderSourceType
1288 {
1289     VERTEX,
1290     TESSELLATION_CONTROL,
1291     TESSELLATION_EVALUATION,
1292     GEOMETRY,
1293     FRAGMENT,
1294     COMPUTE,
1295     RAY_GENERATION_RT,
1296     RAY_GENERATION,
1297     INTERSECTION,
1298     ANY_HIT,
1299     CLOSEST_HIT,
1300     MISS,
1301     CALLABLE,
1302     INVALID
1303 };
1304 
1305 struct Ray
1306 {
Rayvk::Ray1307     Ray() : o(0.0f), tmin(0.0f), d(0.0f), tmax(0.0f)
1308     {
1309     }
Rayvk::Ray1310     Ray(const tcu::Vec3 &io, float imin, const tcu::Vec3 &id, float imax) : o(io), tmin(imin), d(id), tmax(imax)
1311     {
1312     }
1313     tcu::Vec3 o;
1314     float tmin;
1315     tcu::Vec3 d;
1316     float tmax;
1317 };
1318 
1319 struct RayQueryTestParams
1320 {
1321     uint32_t rayFlags;
1322     std::string name;
1323     std::string shaderFunctions;
1324     std::vector<Ray> rays;
1325     std::vector<std::vector<tcu::Vec3>> verts;
1326     std::vector<std::vector<tcu::Vec3>> aabbs;
1327     bool triangles;
1328     RayQueryShaderSourcePipeline pipelineType;
1329     RayQueryShaderSourceType shaderSourceType;
1330     VkTransformMatrixKHR transform;
1331 };
1332 
1333 struct RayQueryTestState
1334 {
RayQueryTestStatevk::RayQueryTestState1335     RayQueryTestState(const vk::DeviceInterface &devInterface, vk::VkDevice dev,
1336                       const vk::InstanceInterface &instInterface, vk::VkPhysicalDevice pDevice,
1337                       uint32_t uQueueFamilyIndex)
1338         : deviceInterface(devInterface)
1339         , device(dev)
1340         , instanceInterface(instInterface)
1341         , physDevice(pDevice)
1342         , allocator(new SimpleAllocator(deviceInterface, device,
1343                                         getPhysicalDeviceMemoryProperties(instanceInterface, physDevice)))
1344         , cmdPool(createCommandPool(deviceInterface, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
1345                                     uQueueFamilyIndex))
1346     {
1347         pipelineBind = VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR;
1348     }
1349 
1350     const vk::DeviceInterface &deviceInterface;
1351     vk::VkDevice device;
1352     const vk::InstanceInterface &instanceInterface;
1353     vk::VkPhysicalDevice physDevice;
1354     const de::UniquePtr<vk::Allocator> allocator;
1355     const Unique<VkCommandPool> cmdPool;
1356     VkPipelineBindPoint pipelineBind;
1357 };
1358 
registerRayQueryShaderModule(const DeviceInterface & vkd,const VkDevice device,vk::BinaryCollection & binaryCollection,std::vector<de::SharedPtr<Move<VkShaderModule>>> & shaderModules,std::vector<VkPipelineShaderStageCreateInfo> & shaderCreateInfos,VkShaderStageFlagBits stage,const std::string & name)1359 static inline bool registerRayQueryShaderModule(const DeviceInterface &vkd, const VkDevice device,
1360                                                 vk::BinaryCollection &binaryCollection,
1361                                                 std::vector<de::SharedPtr<Move<VkShaderModule>>> &shaderModules,
1362                                                 std::vector<VkPipelineShaderStageCreateInfo> &shaderCreateInfos,
1363                                                 VkShaderStageFlagBits stage, const std::string &name)
1364 {
1365     if (name.size() == 0)
1366         return false;
1367 
1368     shaderModules.push_back(de::SharedPtr<Move<VkShaderModule>>(
1369         new Move<VkShaderModule>(createShaderModule(vkd, device, binaryCollection.get(name), 0))));
1370 
1371     shaderCreateInfos.push_back({
1372         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, DE_NULL, (VkPipelineShaderStageCreateFlags)0,
1373         stage,                       // stage
1374         shaderModules.back()->get(), // shader
1375         "main",
1376         DE_NULL, // pSpecializationInfo
1377     });
1378 
1379     return true;
1380 }
1381 
initRayQueryAccelerationStructures(const vk::DeviceInterface & vkd,const vk::VkDevice & device,vk::Allocator & allocator,RayQueryTestParams testParams,VkCommandBuffer cmdBuffer,std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> & bottomAccelerationStructures,de::SharedPtr<vk::TopLevelAccelerationStructure> & topAccelerationStructure)1382 static inline void initRayQueryAccelerationStructures(
1383     const vk::DeviceInterface &vkd, const vk::VkDevice &device, vk::Allocator &allocator, RayQueryTestParams testParams,
1384     VkCommandBuffer cmdBuffer,
1385     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> &bottomAccelerationStructures,
1386     de::SharedPtr<vk::TopLevelAccelerationStructure> &topAccelerationStructure)
1387 {
1388     uint32_t instanceCount = static_cast<uint32_t>(testParams.verts.size());
1389 
1390     const uint32_t instancesGroupCount = instanceCount;
1391     de::MovePtr<vk::TopLevelAccelerationStructure> rayQueryTopLevelAccelerationStructure =
1392         makeTopLevelAccelerationStructure();
1393 
1394     topAccelerationStructure =
1395         de::SharedPtr<vk::TopLevelAccelerationStructure>(rayQueryTopLevelAccelerationStructure.release());
1396     topAccelerationStructure->setInstanceCount(instancesGroupCount);
1397 
1398     for (size_t instanceNdx = 0; instanceNdx < instancesGroupCount; ++instanceNdx)
1399     {
1400         de::MovePtr<BottomLevelAccelerationStructure> rayQueryBottomLevelAccelerationStructure =
1401             makeBottomLevelAccelerationStructure();
1402 
1403         bool triangles         = testParams.verts[instanceNdx].size() > 0;
1404         uint32_t geometryCount = (triangles) ? static_cast<uint32_t>(testParams.verts[instanceNdx].size()) / 3 :
1405                                                static_cast<uint32_t>(testParams.aabbs[instanceNdx].size()) / 2;
1406         std::vector<tcu::Vec3> geometryData;
1407 
1408         for (size_t geometryNdx = 0; geometryNdx < geometryCount; ++geometryNdx)
1409         {
1410             if (triangles)
1411             {
1412                 tcu::Vec3 v0 = tcu::Vec3(testParams.verts[instanceNdx][geometryNdx * 3 + 0].x(),
1413                                          testParams.verts[instanceNdx][geometryNdx * 3 + 0].y(),
1414                                          testParams.verts[instanceNdx][geometryNdx * 3 + 0].z());
1415                 tcu::Vec3 v1 = tcu::Vec3(testParams.verts[instanceNdx][geometryNdx * 3 + 1].x(),
1416                                          testParams.verts[instanceNdx][geometryNdx * 3 + 1].y(),
1417                                          testParams.verts[instanceNdx][geometryNdx * 3 + 1].z());
1418                 tcu::Vec3 v2 = tcu::Vec3(testParams.verts[instanceNdx][geometryNdx * 3 + 2].x(),
1419                                          testParams.verts[instanceNdx][geometryNdx * 3 + 2].y(),
1420                                          testParams.verts[instanceNdx][geometryNdx * 3 + 2].z());
1421 
1422                 geometryData.push_back(v0);
1423                 geometryData.push_back(v1);
1424                 geometryData.push_back(v2);
1425             }
1426             else
1427             {
1428                 tcu::Vec3 v0 = tcu::Vec3(testParams.aabbs[instanceNdx][geometryNdx * 2 + 0].x(),
1429                                          testParams.aabbs[instanceNdx][geometryNdx * 2 + 0].y(),
1430                                          testParams.aabbs[instanceNdx][geometryNdx * 2 + 0].z());
1431                 tcu::Vec3 v1 = tcu::Vec3(testParams.aabbs[instanceNdx][geometryNdx * 2 + 1].x(),
1432                                          testParams.aabbs[instanceNdx][geometryNdx * 2 + 1].y(),
1433                                          testParams.aabbs[instanceNdx][geometryNdx * 2 + 1].z());
1434 
1435                 geometryData.push_back(v0);
1436                 geometryData.push_back(v1);
1437             }
1438         }
1439 
1440         rayQueryBottomLevelAccelerationStructure->addGeometry(geometryData, triangles);
1441         rayQueryBottomLevelAccelerationStructure->createAndBuild(vkd, device, cmdBuffer, allocator);
1442 
1443         bottomAccelerationStructures.push_back(
1444             de::SharedPtr<BottomLevelAccelerationStructure>(rayQueryBottomLevelAccelerationStructure.release()));
1445 
1446         topAccelerationStructure->addInstance(bottomAccelerationStructures.back());
1447     }
1448 
1449     topAccelerationStructure->createAndBuild(vkd, device, cmdBuffer, allocator);
1450 }
1451 
1452 template <typename T>
rayQueryRayTracingTestSetup(const vk::DeviceInterface & vkd,const vk::VkDevice & device,vk::Allocator & allocator,const vk::InstanceInterface & instanceInterface,vk::VkPhysicalDevice physDevice,vk::BinaryCollection & binaryCollection,vk::VkQueue universalQueue,uint32_t universalQueueFamilyIndex,const RayQueryTestParams params)1453 std::vector<T> rayQueryRayTracingTestSetup(const vk::DeviceInterface &vkd, const vk::VkDevice &device,
1454                                            vk::Allocator &allocator, const vk::InstanceInterface &instanceInterface,
1455                                            vk::VkPhysicalDevice physDevice, vk::BinaryCollection &binaryCollection,
1456                                            vk::VkQueue universalQueue, uint32_t universalQueueFamilyIndex,
1457                                            const RayQueryTestParams params)
1458 {
1459     RayQueryTestState state(vkd, device, instanceInterface, physDevice, universalQueueFamilyIndex);
1460 
1461     vk::Move<VkDescriptorPool> descriptorPool;
1462     vk::Move<VkDescriptorSetLayout> descriptorSetLayout;
1463     vk::Move<VkDescriptorSet> descriptorSet;
1464     vk::Move<VkPipelineLayout> pipelineLayout;
1465     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> rayQueryBottomAccelerationStructures;
1466     de::SharedPtr<TopLevelAccelerationStructure> rayQueryTopAccelerationStructure;
1467     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> traceBottomAccelerationStructures;
1468     de::MovePtr<TopLevelAccelerationStructure> traceAccelerationStructure;
1469 
1470     de::MovePtr<RayTracingProperties> rayTracingPropertiesKHR = makeRayTracingProperties(instanceInterface, physDevice);
1471     uint32_t shaderGroupHandleSize                            = rayTracingPropertiesKHR->getShaderGroupHandleSize();
1472     uint32_t shaderGroupBaseAlignment                         = rayTracingPropertiesKHR->getShaderGroupBaseAlignment();
1473 
1474     const VkBufferCreateInfo resultDataCreateInfo =
1475         makeBufferCreateInfo(params.rays.size() * sizeof(T), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
1476     de::MovePtr<BufferWithMemory> resultData = de::MovePtr<BufferWithMemory>(
1477         new BufferWithMemory(vkd, device, allocator, resultDataCreateInfo, MemoryRequirement::HostVisible));
1478 
1479     const uint32_t AllStages = VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR |
1480                                VK_SHADER_STAGE_INTERSECTION_BIT_KHR | VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
1481                                VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_CALLABLE_BIT_KHR;
1482 
1483     descriptorSetLayout =
1484         DescriptorSetLayoutBuilder()
1485             .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, AllStages)
1486             .addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, AllStages)
1487             .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, AllStages)
1488             .addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, vk::VK_SHADER_STAGE_RAYGEN_BIT_KHR)
1489             .build(vkd, device);
1490     descriptorPool = DescriptorPoolBuilder()
1491                          .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1492                          .addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
1493                          .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1494                          .addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
1495                          .build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1496     descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), descriptorSetLayout.get());
1497 
1498     pipelineLayout = makePipelineLayout(vkd, device, descriptorSetLayout.get());
1499 
1500     const std::map<RayQueryShaderSourceType, std::vector<std::string>> shaderNames = {
1501         {RayQueryShaderSourceType::RAY_GENERATION_RT, {"rgen", "isect_rt", "ahit_rt", "chit_rt", "miss_rt", ""}},
1502         {RayQueryShaderSourceType::RAY_GENERATION, {"rgen", "", "", "", "", ""}},
1503         {RayQueryShaderSourceType::INTERSECTION, {"rgen", "isect_1", "", "chit", "miss", ""}},
1504         {RayQueryShaderSourceType::ANY_HIT, {"rgen", "isect", "ahit", "", "miss", ""}},
1505         {RayQueryShaderSourceType::CLOSEST_HIT, {"rgen", "isect", "", "chit", "miss", ""}},
1506         {RayQueryShaderSourceType::MISS, {"rgen", "isect", "", "chit", "miss_1", ""}},
1507         {RayQueryShaderSourceType::CALLABLE, {"rgen", "", "", "chit", "miss", "call"}}};
1508 
1509     auto shaderNameIt = shaderNames.find(params.shaderSourceType);
1510     if (shaderNameIt == end(shaderNames))
1511         TCU_THROW(InternalError, "Wrong shader source type");
1512 
1513     std::vector<VkPipelineShaderStageCreateInfo> shaderCreateInfos;
1514     std::vector<de::SharedPtr<Move<VkShaderModule>>> shaderModules;
1515     bool rgen, isect, ahit, chit, miss, call;
1516 
1517     rgen  = registerRayQueryShaderModule(vkd, device, binaryCollection, shaderModules, shaderCreateInfos,
1518                                          VK_SHADER_STAGE_RAYGEN_BIT_KHR, shaderNameIt->second[0]);
1519     isect = registerRayQueryShaderModule(vkd, device, binaryCollection, shaderModules, shaderCreateInfos,
1520                                          VK_SHADER_STAGE_INTERSECTION_BIT_KHR, shaderNameIt->second[1]);
1521     ahit  = registerRayQueryShaderModule(vkd, device, binaryCollection, shaderModules, shaderCreateInfos,
1522                                          VK_SHADER_STAGE_ANY_HIT_BIT_KHR, shaderNameIt->second[2]);
1523     chit  = registerRayQueryShaderModule(vkd, device, binaryCollection, shaderModules, shaderCreateInfos,
1524                                          VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, shaderNameIt->second[3]);
1525     miss  = registerRayQueryShaderModule(vkd, device, binaryCollection, shaderModules, shaderCreateInfos,
1526                                          VK_SHADER_STAGE_MISS_BIT_KHR, shaderNameIt->second[4]);
1527     call  = registerRayQueryShaderModule(vkd, device, binaryCollection, shaderModules, shaderCreateInfos,
1528                                          VK_SHADER_STAGE_CALLABLE_BIT_KHR, shaderNameIt->second[5]);
1529 
1530     bool rgenRTTest = rgen && chit && ahit && miss && isect;
1531     bool isectTest  = rgen && isect && chit && miss && (shaderNameIt->second[1] == "isect_1");
1532     bool ahitTest   = rgen && ahit;
1533     bool chitTest =
1534         rgen && isect && chit && miss && (shaderNameIt->second[4] == "miss") && (shaderNameIt->second[1] == "isect");
1535     bool missTest = rgen && isect && chit && miss && (shaderNameIt->second[4] == "miss_1");
1536     bool callTest = rgen && chit && miss && call;
1537 
1538     de::MovePtr<RayTracingPipeline> rt_pipeline = de::newMovePtr<RayTracingPipeline>();
1539 
1540     int raygenGroup   = 0;
1541     int hitGroup      = -1;
1542     int missGroup     = -1;
1543     int callableGroup = -1;
1544 
1545     rt_pipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR, shaderModules[0].get()->get(), raygenGroup);
1546 
1547     if (rgenRTTest)
1548     {
1549         hitGroup  = 1;
1550         missGroup = 2;
1551         rt_pipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR, shaderModules[1].get()->get(), hitGroup);
1552         rt_pipeline->addShader(VK_SHADER_STAGE_ANY_HIT_BIT_KHR, shaderModules[2].get()->get(), hitGroup);
1553         rt_pipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, shaderModules[3].get()->get(), hitGroup);
1554         rt_pipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR, shaderModules[4].get()->get(), missGroup);
1555     }
1556     else if (ahitTest)
1557     {
1558         hitGroup  = 1;
1559         missGroup = 2;
1560         rt_pipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR, shaderModules[1].get()->get(), hitGroup);
1561         rt_pipeline->addShader(VK_SHADER_STAGE_ANY_HIT_BIT_KHR, shaderModules[2].get()->get(), hitGroup);
1562         rt_pipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR, shaderModules[3].get()->get(), missGroup);
1563     }
1564     else if (missTest)
1565     {
1566         hitGroup  = 1;
1567         missGroup = 2;
1568         rt_pipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR, shaderModules[1].get()->get(), hitGroup);
1569         rt_pipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, shaderModules[2].get()->get(), hitGroup);
1570         rt_pipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR, shaderModules[3].get()->get(), missGroup);
1571     }
1572     else if (chitTest)
1573     {
1574         hitGroup  = 1;
1575         missGroup = 2;
1576         rt_pipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR, shaderModules[1].get()->get(), hitGroup);
1577         rt_pipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, shaderModules[2].get()->get(), hitGroup);
1578         rt_pipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR, shaderModules[3].get()->get(), missGroup);
1579     }
1580     else if (isectTest)
1581     {
1582         hitGroup  = 1;
1583         missGroup = 2;
1584         rt_pipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR, shaderModules[1].get()->get(), hitGroup);
1585         rt_pipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, shaderModules[2].get()->get(), hitGroup);
1586         rt_pipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR, shaderModules[3].get()->get(), missGroup);
1587     }
1588     else if (callTest)
1589     {
1590         hitGroup      = 1;
1591         missGroup     = 2;
1592         callableGroup = 3;
1593         rt_pipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, shaderModules[1].get()->get(), hitGroup);
1594         rt_pipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR, shaderModules[2].get()->get(), missGroup);
1595         rt_pipeline->addShader(VK_SHADER_STAGE_CALLABLE_BIT_KHR, shaderModules[3].get()->get(), callableGroup);
1596     }
1597 
1598     Move<VkPipeline> pipeline = rt_pipeline->createPipeline(vkd, device, *pipelineLayout);
1599 
1600     de::MovePtr<BufferWithMemory> raygenShaderBindingTable = rt_pipeline->createShaderBindingTable(
1601         vkd, device, *pipeline, *state.allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, raygenGroup, 1u);
1602     de::MovePtr<BufferWithMemory> missShaderBindingTable =
1603         missGroup > 0 ?
1604             rt_pipeline->createShaderBindingTable(vkd, device, *pipeline, *state.allocator, shaderGroupHandleSize,
1605                                                   shaderGroupBaseAlignment, missGroup, 1u) :
1606             de::MovePtr<BufferWithMemory>();
1607     de::MovePtr<BufferWithMemory> hitShaderBindingTable =
1608         hitGroup > 0 ?
1609             rt_pipeline->createShaderBindingTable(vkd, device, *pipeline, *state.allocator, shaderGroupHandleSize,
1610                                                   shaderGroupBaseAlignment, hitGroup, 1u) :
1611             de::MovePtr<BufferWithMemory>();
1612     de::MovePtr<BufferWithMemory> callableShaderBindingTable =
1613         callableGroup > 0 ?
1614             rt_pipeline->createShaderBindingTable(vkd, device, *pipeline, *state.allocator, shaderGroupHandleSize,
1615                                                   shaderGroupBaseAlignment, callableGroup, 1u) :
1616             de::MovePtr<BufferWithMemory>();
1617 
1618     VkStridedDeviceAddressRegionKHR raygenRegion =
1619         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, (*raygenShaderBindingTable).get(), 0),
1620                                           shaderGroupHandleSize, shaderGroupHandleSize);
1621     VkStridedDeviceAddressRegionKHR missRegion =
1622         missGroup > 0 ?
1623             makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, (*missShaderBindingTable).get(), 0),
1624                                               shaderGroupHandleSize, shaderGroupHandleSize) :
1625             VkStridedDeviceAddressRegionKHR{0, 0, 0};
1626     VkStridedDeviceAddressRegionKHR hitRegion =
1627         hitGroup > 0 ?
1628             makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, (*hitShaderBindingTable).get(), 0),
1629                                               shaderGroupHandleSize, shaderGroupHandleSize) :
1630             VkStridedDeviceAddressRegionKHR{0, 0, 0};
1631     VkStridedDeviceAddressRegionKHR callableRegion =
1632         callableGroup > 0 ? makeStridedDeviceAddressRegionKHR(
1633                                 getBufferDeviceAddress(vkd, device, (*callableShaderBindingTable).get(), 0),
1634                                 shaderGroupHandleSize, shaderGroupHandleSize) :
1635                             VkStridedDeviceAddressRegionKHR{0, 0, 0};
1636 
1637     const Unique<VkCommandBuffer> cmdBuffer(
1638         allocateCommandBuffer(vkd, device, *state.cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1639 
1640     de::MovePtr<BufferWithMemory> rayBuffer;
1641 
1642     if (params.rays.empty() == false)
1643     {
1644         const VkBufferCreateInfo rayBufferCreateInfo =
1645             makeBufferCreateInfo(params.rays.size() * sizeof(Ray), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
1646         rayBuffer = de::MovePtr<BufferWithMemory>(
1647             new BufferWithMemory(vkd, device, allocator, rayBufferCreateInfo, MemoryRequirement::HostVisible));
1648 
1649         memcpy(rayBuffer->getAllocation().getHostPtr(), &params.rays[0], params.rays.size() * sizeof(Ray));
1650         flushMappedMemoryRange(vkd, device, rayBuffer->getAllocation().getMemory(),
1651                                rayBuffer->getAllocation().getOffset(), VK_WHOLE_SIZE);
1652     }
1653 
1654     beginCommandBuffer(vkd, *cmdBuffer);
1655 
1656     // build acceleration structures for ray query
1657     initRayQueryAccelerationStructures(vkd, device, allocator, params, *cmdBuffer, rayQueryBottomAccelerationStructures,
1658                                        rayQueryTopAccelerationStructure);
1659     // build acceleration structures for trace
1660     std::vector<tcu::Vec3> geomData;
1661     switch (params.shaderSourceType)
1662     {
1663     case RayQueryShaderSourceType::MISS:
1664         geomData.push_back(tcu::Vec3(0, 0, -1));
1665         geomData.push_back(tcu::Vec3(1, 0, -1));
1666         geomData.push_back(tcu::Vec3(0, 1, -1));
1667         break;
1668     case RayQueryShaderSourceType::CLOSEST_HIT:
1669     case RayQueryShaderSourceType::CALLABLE:
1670         geomData.push_back(tcu::Vec3(0, 0, 1));
1671         geomData.push_back(tcu::Vec3(1, 0, 1));
1672         geomData.push_back(tcu::Vec3(0, 1, 1));
1673         break;
1674     case RayQueryShaderSourceType::ANY_HIT:
1675     case RayQueryShaderSourceType::INTERSECTION:
1676         geomData.push_back(tcu::Vec3(0, 0, 1));
1677         geomData.push_back(tcu::Vec3(0.5, 0.5, 1));
1678         break;
1679     default:
1680         break;
1681     }
1682 
1683     VkDescriptorBufferInfo resultBufferDesc = {(*resultData).get(), 0, VK_WHOLE_SIZE};
1684     VkDescriptorBufferInfo rayBufferDesc    = {(*rayBuffer).get(), 0, VK_WHOLE_SIZE};
1685 
1686     const TopLevelAccelerationStructure *rayQueryTopLevelAccelerationStructurePtr =
1687         rayQueryTopAccelerationStructure.get();
1688     VkWriteDescriptorSetAccelerationStructureKHR rayQueryAccelerationStructureWriteDescriptorSet = {
1689         VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, //  VkStructureType sType;
1690         DE_NULL,                                                           //  const void* pNext;
1691         1u,                                                                //  uint32_t accelerationStructureCount;
1692         rayQueryTopLevelAccelerationStructurePtr
1693             ->getPtr(), //  const VkAccelerationStructureKHR* pAccelerationStructures;
1694     };
1695 
1696     VkWriteDescriptorSetAccelerationStructureKHR traceAccelerationStructureWriteDescriptorSet = {};
1697     if (geomData.size() > 0)
1698     {
1699         traceAccelerationStructure = makeTopLevelAccelerationStructure();
1700         traceAccelerationStructure->setInstanceCount(1);
1701 
1702         de::MovePtr<BottomLevelAccelerationStructure> traceBottomLevelAccelerationStructure =
1703             makeBottomLevelAccelerationStructure();
1704 
1705         traceBottomLevelAccelerationStructure->addGeometry(geomData, ((geomData.size() % 3) == 0), 0);
1706         traceBottomLevelAccelerationStructure->createAndBuild(vkd, device, *cmdBuffer, allocator);
1707         traceBottomAccelerationStructures.push_back(
1708             de::SharedPtr<BottomLevelAccelerationStructure>(traceBottomLevelAccelerationStructure.release()));
1709         traceAccelerationStructure->addInstance(traceBottomAccelerationStructures.back(), identityMatrix3x4, 0, 255U, 0,
1710                                                 0);
1711         traceAccelerationStructure->createAndBuild(vkd, device, *cmdBuffer, allocator);
1712 
1713         const TopLevelAccelerationStructure *traceTopLevelAccelerationStructurePtr = traceAccelerationStructure.get();
1714         traceAccelerationStructureWriteDescriptorSet                               = {
1715             VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, //  VkStructureType sType;
1716             DE_NULL, //  const void* pNext;
1717             1u,      //  uint32_t accelerationStructureCount;
1718             traceTopLevelAccelerationStructurePtr
1719                 ->getPtr(), //  const VkAccelerationStructureKHR* pAccelerationStructures;
1720         };
1721 
1722         DescriptorSetUpdateBuilder()
1723             .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
1724                          VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultBufferDesc)
1725             .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u),
1726                          VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR,
1727                          &rayQueryAccelerationStructureWriteDescriptorSet)
1728             .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(2u),
1729                          VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &rayBufferDesc)
1730             .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(3u),
1731                          VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &traceAccelerationStructureWriteDescriptorSet)
1732             .update(vkd, device);
1733     }
1734     else
1735     {
1736         DescriptorSetUpdateBuilder()
1737             .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
1738                          VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultBufferDesc)
1739             .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u),
1740                          VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR,
1741                          &rayQueryAccelerationStructureWriteDescriptorSet)
1742             .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(2u),
1743                          VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &rayBufferDesc)
1744             .update(vkd, device);
1745     }
1746 
1747     VkDescriptorSet setHandle = descriptorSet.get();
1748 
1749     vkd.cmdBindPipeline(*cmdBuffer, state.pipelineBind, *pipeline);
1750     vkd.cmdBindDescriptorSets(*cmdBuffer, state.pipelineBind, *pipelineLayout, 0, 1, &setHandle, 0, DE_NULL);
1751 
1752     cmdTraceRays(vkd, *cmdBuffer, &raygenRegion, &missRegion, &hitRegion, &callableRegion,
1753                  static_cast<uint32_t>(params.rays.size()), 1, 1);
1754 
1755     endCommandBuffer(vkd, *cmdBuffer);
1756 
1757     submitCommandsAndWait(vkd, device, universalQueue, *cmdBuffer);
1758 
1759     invalidateMappedMemoryRange(vkd, device, resultData->getAllocation().getMemory(),
1760                                 resultData->getAllocation().getOffset(), VK_WHOLE_SIZE);
1761 
1762     std::vector<T> results(params.rays.size());
1763     memcpy(&results[0], resultData->getAllocation().getHostPtr(), sizeof(T) * params.rays.size());
1764 
1765     rayQueryBottomAccelerationStructures.clear();
1766     rayQueryTopAccelerationStructure.clear();
1767     traceBottomAccelerationStructures.clear();
1768     traceAccelerationStructure.clear();
1769 
1770     return results;
1771 }
1772 
1773 template <typename T>
rayQueryComputeTestSetup(const vk::DeviceInterface & vkd,const vk::VkDevice & device,vk::Allocator & allocator,const vk::InstanceInterface & instanceInterface,vk::VkPhysicalDevice physDevice,vk::BinaryCollection & binaryCollection,vk::VkQueue universalQueue,uint32_t universalQueueFamilyIndex,RayQueryTestParams params)1774 std::vector<T> rayQueryComputeTestSetup(const vk::DeviceInterface &vkd, const vk::VkDevice &device,
1775                                         vk::Allocator &allocator, const vk::InstanceInterface &instanceInterface,
1776                                         vk::VkPhysicalDevice physDevice, vk::BinaryCollection &binaryCollection,
1777                                         vk::VkQueue universalQueue, uint32_t universalQueueFamilyIndex,
1778                                         RayQueryTestParams params)
1779 {
1780     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> bottomAccelerationStructures;
1781     de::SharedPtr<TopLevelAccelerationStructure> topAccelerationStructure;
1782 
1783     RayQueryTestState state(vkd, device, instanceInterface, physDevice, universalQueueFamilyIndex);
1784 
1785     const DeviceInterface &vk = vkd;
1786 
1787     int power    = static_cast<int>(ceil(log2(params.rays.size())));
1788     power        = (power % 2 == 0) ? power : power + 1;
1789     const int sz = de::max<int>(static_cast<int>(pow(2, power)), 64);
1790     Ray ray      = Ray();
1791 
1792     for (int idx = static_cast<int>(params.rays.size()); idx < sz; ++idx)
1793     {
1794         params.rays.push_back(ray);
1795     }
1796 
1797     const VkBufferCreateInfo resultDataCreateInfo =
1798         makeBufferCreateInfo(params.rays.size() * sizeof(T), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
1799     de::MovePtr<BufferWithMemory> resultData = de::MovePtr<BufferWithMemory>(
1800         new BufferWithMemory(vkd, device, allocator, resultDataCreateInfo, MemoryRequirement::HostVisible));
1801 
1802     const Move<VkDescriptorSetLayout> descriptorSetLayout =
1803         DescriptorSetLayoutBuilder()
1804             .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1805             .addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, VK_SHADER_STAGE_COMPUTE_BIT)
1806             .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1807             .build(vk, device);
1808     const Move<VkDescriptorPool> descriptorPool =
1809         DescriptorPoolBuilder()
1810             .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1811             .addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
1812             .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1813             .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1814     const Move<VkDescriptorSet> descriptorSet   = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
1815     const Move<VkPipelineLayout> pipelineLayout = makePipelineLayout(vk, device, descriptorSetLayout.get());
1816 
1817     const Unique<VkShaderModule> rayQueryModule(createShaderModule(vkd, device, binaryCollection.get("comp"), 0u));
1818 
1819     const VkPipelineShaderStageCreateInfo pipelineShaderStageParams = {
1820         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
1821         DE_NULL,                                             // const void* pNext;
1822         static_cast<VkPipelineShaderStageCreateFlags>(0u),   // VkPipelineShaderStageCreateFlags flags;
1823         VK_SHADER_STAGE_COMPUTE_BIT,                         // VkShaderStageFlagBits stage;
1824         *rayQueryModule,                                     // VkShaderModule module;
1825         "main",                                              // const char* pName;
1826         DE_NULL,                                             // const VkSpecializationInfo* pSpecializationInfo;
1827     };
1828     const VkComputePipelineCreateInfo pipelineCreateInfo = {
1829         VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
1830         DE_NULL,                                        // const void* pNext;
1831         static_cast<VkPipelineCreateFlags>(0u),         // VkPipelineCreateFlags flags;
1832         pipelineShaderStageParams,                      // VkPipelineShaderStageCreateInfo stage;
1833         *pipelineLayout,                                // VkPipelineLayout layout;
1834         DE_NULL,                                        // VkPipeline basePipelineHandle;
1835         0,                                              // int32_t basePipelineIndex;
1836     };
1837     Move<VkPipeline> pipeline(createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo));
1838 
1839     const Unique<VkCommandBuffer> cmdBuffer(
1840         allocateCommandBuffer(vk, device, *state.cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1841 
1842     de::MovePtr<BufferWithMemory> rayBuffer;
1843 
1844     if (params.rays.empty() == false)
1845     {
1846         const VkBufferCreateInfo rayBufferCreateInfo =
1847             makeBufferCreateInfo(params.rays.size() * sizeof(Ray), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
1848         rayBuffer = de::MovePtr<BufferWithMemory>(
1849             new BufferWithMemory(vkd, device, allocator, rayBufferCreateInfo, MemoryRequirement::HostVisible));
1850 
1851         memcpy(rayBuffer->getAllocation().getHostPtr(), &params.rays[0], params.rays.size() * sizeof(Ray));
1852         flushMappedMemoryRange(vkd, device, rayBuffer->getAllocation().getMemory(),
1853                                rayBuffer->getAllocation().getOffset(), VK_WHOLE_SIZE);
1854     }
1855 
1856     beginCommandBuffer(vk, *cmdBuffer);
1857 
1858     // build acceleration structures for ray query
1859     initRayQueryAccelerationStructures(vkd, device, allocator, params, *cmdBuffer, bottomAccelerationStructures,
1860                                        topAccelerationStructure);
1861 
1862     const TopLevelAccelerationStructure *rayQueryTopLevelAccelerationStructurePtr = topAccelerationStructure.get();
1863     VkWriteDescriptorSetAccelerationStructureKHR accelerationStructureWriteDescriptorSet = {
1864         VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, //  VkStructureType sType;
1865         DE_NULL,                                                           //  const void* pNext;
1866         1u,                                                                //  uint32_t accelerationStructureCount;
1867         rayQueryTopLevelAccelerationStructurePtr
1868             ->getPtr(), //  const VkAccelerationStructureKHR* pAccelerationStructures;
1869     };
1870 
1871     VkDescriptorBufferInfo resultBufferDesc = {(*resultData).get(), 0, VK_WHOLE_SIZE};
1872     VkDescriptorBufferInfo rayBufferDesc    = {(*rayBuffer).get(), 0, VK_WHOLE_SIZE};
1873 
1874     DescriptorSetUpdateBuilder()
1875         .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
1876                      VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultBufferDesc)
1877         .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u),
1878                      VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
1879         .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(2u),
1880                      VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &rayBufferDesc)
1881         .update(vk, device);
1882 
1883     VkDescriptorSet setHandle = descriptorSet.get();
1884 
1885     vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1886 
1887     vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0, 1, &setHandle, 0, DE_NULL);
1888 
1889     vk.cmdDispatch(*cmdBuffer, static_cast<uint32_t>(params.rays.size()), 1, 1);
1890 
1891     endCommandBuffer(vk, *cmdBuffer);
1892 
1893     submitCommandsAndWait(vk, device, universalQueue, *cmdBuffer);
1894 
1895     invalidateMappedMemoryRange(vk, device, resultData->getAllocation().getMemory(),
1896                                 resultData->getAllocation().getOffset(), VK_WHOLE_SIZE);
1897 
1898     std::vector<T> results(params.rays.size());
1899 
1900     memcpy(&results[0], resultData->getAllocation().getHostPtr(), sizeof(T) * params.rays.size());
1901 
1902     topAccelerationStructure.clear();
1903     bottomAccelerationStructures.clear();
1904 
1905     return results;
1906 }
1907 
1908 template <typename T>
rayQueryGraphicsTestSetup(const DeviceInterface & vkd,const VkDevice device,const uint32_t queueFamilyIndex,Allocator & allocator,vk::BinaryCollection & binaryCollection,vk::VkQueue universalQueue,const vk::InstanceInterface & instanceInterface,vk::VkPhysicalDevice physDevice,RayQueryTestParams params)1909 static std::vector<T> rayQueryGraphicsTestSetup(const DeviceInterface &vkd, const VkDevice device,
1910                                                 const uint32_t queueFamilyIndex, Allocator &allocator,
1911                                                 vk::BinaryCollection &binaryCollection, vk::VkQueue universalQueue,
1912                                                 const vk::InstanceInterface &instanceInterface,
1913                                                 vk::VkPhysicalDevice physDevice, RayQueryTestParams params)
1914 {
1915     int width = static_cast<int>(params.rays.size());
1916     int power = static_cast<int>(ceil(log2(width)));
1917     power     = (power % 2 == 0) ? power : power + 1;
1918     int sz    = static_cast<int>(pow(2, power / 2));
1919 
1920     Ray ray           = Ray();
1921     const int totalSz = sz * sz;
1922 
1923     for (int idx = static_cast<int>(params.rays.size()); idx < totalSz; ++idx)
1924     {
1925         params.rays.push_back(ray);
1926     }
1927 
1928     const tcu::UVec2 renderSz = {static_cast<uint32_t>(sz), static_cast<uint32_t>(sz)};
1929 
1930     Move<VkDescriptorSetLayout> descriptorSetLayout;
1931     Move<VkDescriptorPool> descriptorPool;
1932     Move<VkDescriptorSet> descriptorSet;
1933     Move<VkPipelineLayout> pipelineLayout;
1934     Move<VkRenderPass> renderPass;
1935     Move<VkFramebuffer> framebuffer;
1936     Move<VkPipeline> pipeline;
1937     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> rayQueryBottomAccelerationStructures;
1938     de::SharedPtr<TopLevelAccelerationStructure> rayQueryTopAccelerationStructure;
1939 
1940     descriptorSetLayout =
1941         DescriptorSetLayoutBuilder()
1942             .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_ALL_GRAPHICS)
1943             .addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, VK_SHADER_STAGE_ALL_GRAPHICS)
1944             .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL_GRAPHICS)
1945             .build(vkd, device);
1946     descriptorPool = DescriptorPoolBuilder()
1947                          .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
1948                          .addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
1949                          .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1950                          .build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1951     descriptorSet  = makeDescriptorSet(vkd, device, descriptorPool.get(), descriptorSetLayout.get());
1952     pipelineLayout = makePipelineLayout(vkd, device, descriptorSetLayout.get());
1953 
1954     const std::map<RayQueryShaderSourceType, std::vector<std::string>> shaderNames = {
1955         //idx: 0                1                2                3                4
1956         //shader: vert, tesc, tese, geom, frag,
1957         {RayQueryShaderSourceType::VERTEX,
1958          {
1959              "vert",
1960              "",
1961              "",
1962              "",
1963              "",
1964          }},
1965         {RayQueryShaderSourceType::TESSELLATION_CONTROL,
1966          {
1967              "vert",
1968              "tesc",
1969              "tese",
1970              "",
1971              "",
1972          }},
1973         {RayQueryShaderSourceType::TESSELLATION_EVALUATION,
1974          {
1975              "vert",
1976              "tesc",
1977              "tese",
1978              "",
1979              "",
1980          }},
1981         {RayQueryShaderSourceType::GEOMETRY,
1982          {
1983              "vert",
1984              "",
1985              "",
1986              "geom",
1987              "",
1988          }},
1989         {RayQueryShaderSourceType::FRAGMENT,
1990          {
1991              "vert",
1992              "",
1993              "",
1994              "",
1995              "frag",
1996          }},
1997     };
1998 
1999     auto shaderNameIt = shaderNames.find(params.shaderSourceType);
2000     if (shaderNameIt == end(shaderNames))
2001         TCU_THROW(InternalError, "Wrong shader source type");
2002 
2003     std::vector<VkPipelineShaderStageCreateInfo> shaderCreateInfos;
2004     std::vector<de::SharedPtr<Move<VkShaderModule>>> shaderModules;
2005     bool tescX, teseX, fragX;
2006     registerRayQueryShaderModule(vkd, device, binaryCollection, shaderModules, shaderCreateInfos,
2007                                  VK_SHADER_STAGE_VERTEX_BIT, shaderNameIt->second[0]);
2008     tescX = registerRayQueryShaderModule(vkd, device, binaryCollection, shaderModules, shaderCreateInfos,
2009                                          VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, shaderNameIt->second[1]);
2010     teseX = registerRayQueryShaderModule(vkd, device, binaryCollection, shaderModules, shaderCreateInfos,
2011                                          VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, shaderNameIt->second[2]);
2012     registerRayQueryShaderModule(vkd, device, binaryCollection, shaderModules, shaderCreateInfos,
2013                                  VK_SHADER_STAGE_GEOMETRY_BIT, shaderNameIt->second[3]);
2014     fragX = registerRayQueryShaderModule(vkd, device, binaryCollection, shaderModules, shaderCreateInfos,
2015                                          VK_SHADER_STAGE_FRAGMENT_BIT, shaderNameIt->second[4]);
2016 
2017     const vk::VkSubpassDescription subpassDesc = {
2018         (vk::VkSubpassDescriptionFlags)0,
2019         vk::VK_PIPELINE_BIND_POINT_GRAPHICS, // pipelineBindPoint
2020         0u,                                  // inputCount
2021         DE_NULL,                             // pInputAttachments
2022         0u,                                  // colorCount
2023         DE_NULL,                             // pColorAttachments
2024         DE_NULL,                             // pResolveAttachments
2025         DE_NULL,                             // depthStencilAttachment
2026         0u,                                  // preserveCount
2027         DE_NULL,                             // pPreserveAttachments
2028     };
2029     const vk::VkRenderPassCreateInfo renderPassParams = {
2030         vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // sType
2031         DE_NULL,                                       // pNext
2032         (vk::VkRenderPassCreateFlags)0,
2033         0u,           // attachmentCount
2034         DE_NULL,      // pAttachments
2035         1u,           // subpassCount
2036         &subpassDesc, // pSubpasses
2037         0u,           // dependencyCount
2038         DE_NULL,      // pDependencies
2039     };
2040 
2041     renderPass = createRenderPass(vkd, device, &renderPassParams);
2042 
2043     const vk::VkFramebufferCreateInfo framebufferParams = {
2044         vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // sType
2045         DE_NULL,                                       // pNext
2046         (vk::VkFramebufferCreateFlags)0,
2047         *renderPass, // renderPass
2048         0u,          // attachmentCount
2049         DE_NULL,     // pAttachments
2050         renderSz[0], // width
2051         renderSz[1], // height
2052         1u,          // layers
2053     };
2054 
2055     framebuffer = createFramebuffer(vkd, device, &framebufferParams);
2056 
2057     VkPrimitiveTopology testTopology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
2058     std::vector<tcu::Vec3> vertices;
2059 
2060     switch (params.shaderSourceType)
2061     {
2062     case RayQueryShaderSourceType::TESSELLATION_CONTROL:
2063     case RayQueryShaderSourceType::TESSELLATION_EVALUATION:
2064     case RayQueryShaderSourceType::VERTEX:
2065     case RayQueryShaderSourceType::GEOMETRY:
2066     {
2067         if ((params.shaderSourceType == RayQueryShaderSourceType::VERTEX) ||
2068             (params.shaderSourceType == RayQueryShaderSourceType::GEOMETRY))
2069         {
2070             testTopology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
2071         }
2072         else
2073         {
2074             testTopology = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
2075         }
2076         const int numTriangles = static_cast<int>(params.rays.size());
2077         const float halfStepSz = 1.f / (static_cast<float>(numTriangles) * 2.f);
2078         float startX           = 0.0;
2079         for (int index = 0; index < numTriangles; ++index)
2080         {
2081             vertices.push_back(tcu::Vec3(startX, 0.0, static_cast<float>(index)));
2082             startX += halfStepSz;
2083             vertices.push_back(tcu::Vec3(startX, 1.0, static_cast<float>(index)));
2084             startX += halfStepSz;
2085             vertices.push_back(tcu::Vec3(startX, 0.0, static_cast<float>(index)));
2086         }
2087         break;
2088     }
2089     case RayQueryShaderSourceType::FRAGMENT:
2090         vertices.push_back(tcu::Vec3(-1.0f, -1.0f, 0.0f));
2091         vertices.push_back(tcu::Vec3(1.0f, -1.0f, 0.0f));
2092         vertices.push_back(tcu::Vec3(-1.0f, 1.0f, 0.0f));
2093         vertices.push_back(tcu::Vec3(1.0f, 1.0f, 0.0f));
2094         break;
2095     default:
2096         TCU_THROW(InternalError, "Wrong shader source type");
2097     };
2098 
2099     const VkVertexInputBindingDescription vertexInputBindingDescription = {
2100         0u,                          // uint32_t binding;
2101         sizeof(tcu::Vec3),           // uint32_t stride;
2102         VK_VERTEX_INPUT_RATE_VERTEX, // VkVertexInputRate inputRate;
2103     };
2104 
2105     const VkVertexInputAttributeDescription vertexInputAttributeDescription = {
2106         0u,                         // uint32_t location;
2107         0u,                         // uint32_t binding;
2108         VK_FORMAT_R32G32B32_SFLOAT, // VkFormat format;
2109         0u,                         // uint32_t offset;
2110     };
2111 
2112     const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo = {
2113         VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
2114         DE_NULL,                                                   // const void* pNext;
2115         (VkPipelineVertexInputStateCreateFlags)0,                  // VkPipelineVertexInputStateCreateFlags flags;
2116         1u,                                                        // uint32_t vertexBindingDescriptionCount;
2117         &vertexInputBindingDescription,  // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
2118         1u,                              // uint32_t vertexAttributeDescriptionCount;
2119         &vertexInputAttributeDescription // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
2120     };
2121 
2122     const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo = {
2123         VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType;
2124         DE_NULL,                                                     // const void* pNext;
2125         (VkPipelineInputAssemblyStateCreateFlags)0,                  // VkPipelineInputAssemblyStateCreateFlags flags;
2126         testTopology,                                                // VkPrimitiveTopology topology;
2127         VK_FALSE                                                     // VkBool32 primitiveRestartEnable;
2128     };
2129 
2130     const VkPipelineTessellationStateCreateInfo tessellationStateCreateInfo = {
2131         VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, // VkStructureType sType;
2132         DE_NULL,                                                   // const void* pNext;
2133         VkPipelineTessellationStateCreateFlags(0u),                // VkPipelineTessellationStateCreateFlags flags;
2134         3u                                                         // uint32_t patchControlPoints;
2135     };
2136 
2137     VkViewport viewport = makeViewport(renderSz[0], renderSz[1]);
2138     VkRect2D scissor    = makeRect2D(renderSz[0], renderSz[1]);
2139 
2140     const VkPipelineViewportStateCreateInfo viewportStateCreateInfo = {
2141         VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType                                    sType
2142         DE_NULL,                               // const void*                                        pNext
2143         (VkPipelineViewportStateCreateFlags)0, // VkPipelineViewportStateCreateFlags                flags
2144         1u,                                    // uint32_t                                            viewportCount
2145         &viewport,                             // const VkViewport*                                pViewports
2146         1u,                                    // uint32_t                                            scissorCount
2147         &scissor                               // const VkRect2D*                                    pScissors
2148     };
2149 
2150     const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfo = {
2151         VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType;
2152         DE_NULL,                                                    // const void* pNext;
2153         (VkPipelineRasterizationStateCreateFlags)0,                 // VkPipelineRasterizationStateCreateFlags flags;
2154         VK_FALSE,                                                   // VkBool32 depthClampEnable;
2155         fragX ? VK_FALSE : VK_TRUE,                                 // VkBool32 rasterizerDiscardEnable;
2156         VK_POLYGON_MODE_FILL,                                       // VkPolygonMode polygonMode;
2157         VK_CULL_MODE_NONE,                                          // VkCullModeFlags cullMode;
2158         VK_FRONT_FACE_CLOCKWISE,                                    // VkFrontFace frontFace;
2159         VK_FALSE,                                                   // VkBool32 depthBiasEnable;
2160         0.0f,                                                       // float depthBiasConstantFactor;
2161         0.0f,                                                       // float depthBiasClamp;
2162         0.0f,                                                       // float depthBiasSlopeFactor;
2163         1.0f                                                        // float lineWidth;
2164     };
2165 
2166     const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfo = {
2167         VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType;
2168         DE_NULL,                                                  // const void* pNext;
2169         (VkPipelineMultisampleStateCreateFlags)0,                 // VkPipelineMultisampleStateCreateFlags flags;
2170         VK_SAMPLE_COUNT_1_BIT,                                    // VkSampleCountFlagBits rasterizationSamples;
2171         VK_FALSE,                                                 // VkBool32 sampleShadingEnable;
2172         0.0f,                                                     // float minSampleShading;
2173         DE_NULL,                                                  // const VkSampleMask* pSampleMask;
2174         VK_FALSE,                                                 // VkBool32 alphaToCoverageEnable;
2175         VK_FALSE                                                  // VkBool32 alphaToOneEnable;
2176     };
2177 
2178     const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo = {
2179         VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
2180         DE_NULL,                                                  // const void* pNext;
2181         (VkPipelineColorBlendStateCreateFlags)0,                  // VkPipelineColorBlendStateCreateFlags flags;
2182         false,                                                    // VkBool32 logicOpEnable;
2183         VK_LOGIC_OP_CLEAR,                                        // VkLogicOp logicOp;
2184         0,                                                        // uint32_t attachmentCount;
2185         DE_NULL,                 // const VkPipelineColorBlendAttachmentState* pAttachments;
2186         {1.0f, 1.0f, 1.0f, 1.0f} // float blendConstants[4];
2187     };
2188 
2189     const VkGraphicsPipelineCreateInfo graphicsPipelineCreateInfo = {
2190         VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType;
2191         DE_NULL,                                         // const void* pNext;
2192         (VkPipelineCreateFlags)0,                        // VkPipelineCreateFlags flags;
2193         static_cast<uint32_t>(shaderCreateInfos.size()), // uint32_t stageCount;
2194         shaderCreateInfos.data(),                        // const VkPipelineShaderStageCreateInfo* pStages;
2195         &vertexInputStateCreateInfo,   // const VkPipelineVertexInputStateCreateInfo* pVertexInputState;
2196         &inputAssemblyStateCreateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState;
2197         (tescX || teseX) ? &tessellationStateCreateInfo :
2198                            DE_NULL,                 // const VkPipelineTessellationStateCreateInfo* pTessellationState;
2199         fragX ? &viewportStateCreateInfo : DE_NULL, // const VkPipelineViewportStateCreateInfo* pViewportState;
2200         &rasterizationStateCreateInfo, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState;
2201         fragX ? &multisampleStateCreateInfo : DE_NULL, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState;
2202         DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState;
2203         fragX ? &colorBlendStateCreateInfo : DE_NULL, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState;
2204         DE_NULL,                                      // const VkPipelineDynamicStateCreateInfo* pDynamicState;
2205         pipelineLayout.get(),                         // VkPipelineLayout layout;
2206         renderPass.get(),                             // VkRenderPass renderPass;
2207         0u,                                           // uint32_t subpass;
2208         DE_NULL,                                      // VkPipeline basePipelineHandle;
2209         0                                             // int basePipelineIndex;
2210     };
2211 
2212     pipeline = createGraphicsPipeline(vkd, device, DE_NULL, &graphicsPipelineCreateInfo);
2213 
2214     const VkBufferCreateInfo vertexBufferParams = {
2215         VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,                                 // VkStructureType sType;
2216         DE_NULL,                                                              // const void* pNext;
2217         0u,                                                                   // VkBufferCreateFlags flags;
2218         VkDeviceSize(sizeof(tcu::Vec3) * vertices.size()),                    // VkDeviceSize size;
2219         VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, // VkBufferUsageFlags usage;
2220         VK_SHARING_MODE_EXCLUSIVE,                                            // VkSharingMode sharingMode;
2221         1u,                                                                   // uint32_t queueFamilyIndexCount;
2222         &queueFamilyIndex                                                     // const uint32_t* pQueueFamilyIndices;
2223     };
2224 
2225     Move<VkBuffer> vertexBuffer;
2226     de::MovePtr<Allocation> vertexAlloc;
2227 
2228     vertexBuffer = createBuffer(vkd, device, &vertexBufferParams);
2229     vertexAlloc =
2230         allocator.allocate(getBufferMemoryRequirements(vkd, device, *vertexBuffer), MemoryRequirement::HostVisible);
2231     VK_CHECK(vkd.bindBufferMemory(device, *vertexBuffer, vertexAlloc->getMemory(), vertexAlloc->getOffset()));
2232 
2233     // Upload vertex data
2234     deMemcpy(vertexAlloc->getHostPtr(), vertices.data(), vertices.size() * sizeof(tcu::Vec3));
2235     flushAlloc(vkd, device, *vertexAlloc);
2236 
2237     RayQueryTestState state(vkd, device, instanceInterface, physDevice, queueFamilyIndex);
2238 
2239     de::MovePtr<BufferWithMemory> rayBuffer;
2240 
2241     if (params.rays.empty() == false)
2242     {
2243         const VkBufferCreateInfo rayBufferCreateInfo =
2244             makeBufferCreateInfo(params.rays.size() * sizeof(Ray), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
2245         rayBuffer = de::MovePtr<BufferWithMemory>(
2246             new BufferWithMemory(vkd, device, allocator, rayBufferCreateInfo, MemoryRequirement::HostVisible));
2247 
2248         memcpy(rayBuffer->getAllocation().getHostPtr(), &params.rays[0], params.rays.size() * sizeof(Ray));
2249         flushMappedMemoryRange(vkd, device, rayBuffer->getAllocation().getMemory(),
2250                                rayBuffer->getAllocation().getOffset(), VK_WHOLE_SIZE);
2251     }
2252 
2253     const VkQueue queue                     = universalQueue;
2254     const VkFormat imageFormat              = VK_FORMAT_R32G32B32A32_SFLOAT;
2255     const VkImageCreateInfo imageCreateInfo = {
2256         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,       // VkStructureType sType;
2257         DE_NULL,                                   // const void* pNext;
2258         (VkImageCreateFlags)0u,                    // VkImageCreateFlags flags;
2259         VK_IMAGE_TYPE_3D,                          // VkImageType imageType;
2260         imageFormat,                               // VkFormat format;
2261         makeExtent3D(renderSz[0], renderSz[1], 1), // VkExtent3D extent;
2262         1u,                                        // uint32_t mipLevels;
2263         1u,                                        // uint32_t arrayLayers;
2264         VK_SAMPLE_COUNT_1_BIT,                     // VkSampleCountFlagBits samples;
2265         VK_IMAGE_TILING_OPTIMAL,                   // VkImageTiling tiling;
2266         VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
2267             VK_IMAGE_USAGE_TRANSFER_DST_BIT, // VkImageUsageFlags usage;
2268         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
2269         0u,                                  // uint32_t queueFamilyIndexCount;
2270         DE_NULL,                             // const uint32_t* pQueueFamilyIndices;
2271         VK_IMAGE_LAYOUT_UNDEFINED            // VkImageLayout initialLayout;
2272     };
2273     const VkImageSubresourceRange imageSubresourceRange =
2274         makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2275     const de::MovePtr<ImageWithMemory> image = de::MovePtr<ImageWithMemory>(
2276         new ImageWithMemory(vkd, device, allocator, imageCreateInfo, MemoryRequirement::Any));
2277     const Move<VkImageView> imageView =
2278         makeImageView(vkd, device, **image, VK_IMAGE_VIEW_TYPE_3D, imageFormat, imageSubresourceRange);
2279 
2280     const VkBufferCreateInfo resultBufferCreateInfo =
2281         makeBufferCreateInfo(renderSz[0] * renderSz[1] * 1 * 4 * sizeof(float), VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2282     const VkImageSubresourceLayers resultBufferImageSubresourceLayers =
2283         makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
2284     const VkBufferImageCopy resultBufferImageRegion =
2285         makeBufferImageCopy(makeExtent3D(renderSz[0], renderSz[1], 1), resultBufferImageSubresourceLayers);
2286     de::MovePtr<BufferWithMemory> resultBuffer = de::MovePtr<BufferWithMemory>(
2287         new BufferWithMemory(vkd, device, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible));
2288 
2289     const VkDescriptorImageInfo resultImageInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2290 
2291     const Move<VkCommandPool> cmdPool = createCommandPool(vkd, device, 0, queueFamilyIndex);
2292     const Move<VkCommandBuffer> cmdBuffer =
2293         allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2294 
2295     const VkDescriptorBufferInfo rayBufferDescriptorInfo =
2296         makeDescriptorBufferInfo((*rayBuffer).get(), 0, VK_WHOLE_SIZE);
2297 
2298     beginCommandBuffer(vkd, *cmdBuffer, 0u);
2299     {
2300         const VkImageMemoryBarrier preImageBarrier =
2301             makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
2302                                    VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, **image, imageSubresourceRange);
2303         cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
2304                                       VK_PIPELINE_STAGE_TRANSFER_BIT, &preImageBarrier);
2305 
2306         const VkClearValue clearValue = makeClearValueColorU32(0xFF, 0u, 0u, 0u);
2307         vkd.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1,
2308                                &imageSubresourceRange);
2309 
2310         const VkImageMemoryBarrier postImageBarrier = makeImageMemoryBarrier(
2311             VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
2312             VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, **image, imageSubresourceRange);
2313         cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, SHADER_STAGE_ALL_RAY_TRACING,
2314                                       &postImageBarrier);
2315 
2316         // build acceleration structures for ray query
2317         initRayQueryAccelerationStructures(vkd, device, allocator, params, *cmdBuffer,
2318                                            rayQueryBottomAccelerationStructures, rayQueryTopAccelerationStructure);
2319 
2320         const TopLevelAccelerationStructure *rayQueryTopLevelAccelerationStructurePtr =
2321             rayQueryTopAccelerationStructure.get();
2322         VkWriteDescriptorSetAccelerationStructureKHR rayQueryAccelerationStructureWriteDescriptorSet = {
2323             VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, //  VkStructureType sType;
2324             DE_NULL,                                                           //  const void* pNext;
2325             1u,                                                                //  uint32_t accelerationStructureCount;
2326             rayQueryTopLevelAccelerationStructurePtr
2327                 ->getPtr(), //  const VkAccelerationStructureKHR* pAccelerationStructures;
2328         };
2329 
2330         DescriptorSetUpdateBuilder()
2331             .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
2332                          VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &resultImageInfo)
2333             .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u),
2334                          VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR,
2335                          &rayQueryAccelerationStructureWriteDescriptorSet)
2336             .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(2u),
2337                          VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &rayBufferDescriptorInfo)
2338             .update(vkd, device);
2339 
2340         const VkRenderPassBeginInfo renderPassBeginInfo = {
2341             VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, // VkStructureType sType;
2342             DE_NULL,                                  // const void* pNext;
2343             renderPass.get(),                         // VkRenderPass renderPass;
2344             framebuffer.get(),                        // VkFramebuffer framebuffer;
2345             makeRect2D(renderSz[0], renderSz[1]),     // VkRect2D renderArea;
2346             0u,                                       // uint32_t clearValueCount;
2347             DE_NULL                                   // const VkClearValue* pClearValues;
2348         };
2349         VkDeviceSize vertexBufferOffset = 0u;
2350 
2351         vkd.cmdBeginRenderPass(*cmdBuffer, &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE);
2352         vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
2353         vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u,
2354                                   &descriptorSet.get(), 0u, DE_NULL);
2355         vkd.cmdBindVertexBuffers(*cmdBuffer, 0, 1, &vertexBuffer.get(), &vertexBufferOffset);
2356         vkd.cmdDraw(*cmdBuffer, uint32_t(vertices.size()), 1, 0, 0);
2357         vkd.cmdEndRenderPass(*cmdBuffer);
2358 
2359         const VkMemoryBarrier postTestMemoryBarrier =
2360             makeMemoryBarrier(VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
2361         cmdPipelineMemoryBarrier(vkd, *cmdBuffer, SHADER_STAGE_ALL_RAY_TRACING, VK_PIPELINE_STAGE_TRANSFER_BIT,
2362                                  &postTestMemoryBarrier);
2363 
2364         vkd.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **resultBuffer, 1u,
2365                                  &resultBufferImageRegion);
2366     }
2367     endCommandBuffer(vkd, *cmdBuffer);
2368 
2369     submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
2370 
2371     invalidateMappedMemoryRange(vkd, device, resultBuffer->getAllocation().getMemory(),
2372                                 resultBuffer->getAllocation().getOffset(), VK_WHOLE_SIZE);
2373 
2374     rayQueryBottomAccelerationStructures.clear();
2375     rayQueryTopAccelerationStructure.clear();
2376 
2377     std::vector<T> results;
2378     const uint32_t depth = 1;
2379 
2380     // create result image
2381     tcu::TextureFormat imageFormatMapped = vk::mapVkFormat(imageFormat);
2382     tcu::ConstPixelBufferAccess resultAccess(imageFormatMapped, renderSz[0], renderSz[1], depth,
2383                                              resultBuffer->getAllocation().getHostPtr());
2384 
2385     for (uint32_t z = 0; z < depth; z++)
2386     {
2387         for (uint32_t y = 0; y < renderSz[1]; y++)
2388         {
2389             for (uint32_t x = 0; x < renderSz[0]; x++)
2390             {
2391                 tcu::Vec4 pixel = resultAccess.getPixel(x, y, z);
2392                 T resData       = {pixel[0], pixel[1], pixel[2], pixel[3]};
2393                 results.push_back(resData);
2394                 if (results.size() >= params.rays.size())
2395                 {
2396                     return (results);
2397                 }
2398             }
2399         }
2400     }
2401 
2402     return results;
2403 }
2404 
2405 void generateRayQueryShaders(SourceCollections &programCollection, RayQueryTestParams params, std::string rayQueryPart,
2406                              float max_t);
2407 
2408 #else
2409 
2410 uint32_t rayTracingDefineAnything();
2411 
2412 #endif // CTS_USES_VULKANSC
2413 
2414 } // namespace vk
2415 
2416 #endif // _VKRAYTRACINGUTIL_HPP
2417