1 #ifndef _VKRAYTRACINGUTIL_HPP
2 #define _VKRAYTRACINGUTIL_HPP
3 /*-------------------------------------------------------------------------
4 * Vulkan CTS Framework
5 * --------------------
6 *
7 * Copyright (c) 2020 The Khronos Group Inc.
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 *//*!
22 * \file
23 * \brief Vulkan ray tracing utility.
24 *//*--------------------------------------------------------------------*/
25
26 #include "vkDefs.hpp"
27 #include "vkRef.hpp"
28 #include "vkMemUtil.hpp"
29 #include "vkBufferWithMemory.hpp"
30 #include "vkImageWithMemory.hpp"
31 #include "vkBuilderUtil.hpp"
32 #include "vkObjUtil.hpp"
33 #include "vkTypeUtil.hpp"
34 #include "vkPrograms.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkBarrierUtil.hpp"
37 #include "vkImageUtil.hpp"
38
39 #include "deFloat16.h"
40
41 #include "tcuVector.hpp"
42 #include "tcuVectorType.hpp"
43 #include "tcuTexture.hpp"
44 #include "qpWatchDog.h"
45
46 #include <vector>
47 #include <map>
48 #include <limits>
49 #include <stdexcept>
50
51 namespace vk
52 {
53
54 #ifndef CTS_USES_VULKANSC
55
56 constexpr VkShaderStageFlags SHADER_STAGE_ALL_RAY_TRACING =
57 VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_ANY_HIT_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
58 VK_SHADER_STAGE_MISS_BIT_KHR | VK_SHADER_STAGE_INTERSECTION_BIT_KHR | VK_SHADER_STAGE_CALLABLE_BIT_KHR;
59
60 const VkTransformMatrixKHR identityMatrix3x4 = {
61 {{1.0f, 0.0f, 0.0f, 0.0f}, {0.0f, 1.0f, 0.0f, 0.0f}, {0.0f, 0.0f, 1.0f, 0.0f}}};
62
63 template <typename T>
makeVkSharedPtr(Move<T> move)64 inline de::SharedPtr<Move<T>> makeVkSharedPtr(Move<T> move)
65 {
66 return de::SharedPtr<Move<T>>(new Move<T>(move));
67 }
68
69 template <typename T>
makeVkSharedPtr(de::MovePtr<T> movePtr)70 inline de::SharedPtr<de::MovePtr<T>> makeVkSharedPtr(de::MovePtr<T> movePtr)
71 {
72 return de::SharedPtr<de::MovePtr<T>>(new de::MovePtr<T>(movePtr));
73 }
74
updateRayTracingGLSL(const std::string & str)75 inline std::string updateRayTracingGLSL(const std::string &str)
76 {
77 return str;
78 }
79
80 std::string getCommonRayGenerationShader(void);
81
82 // Get lowercase version of the format name with no VK_FORMAT_ prefix.
83 std::string getFormatSimpleName(vk::VkFormat format);
84
85 // Test whether given poin p belons to the triangle (p0, p1, p2)
86 bool pointInTriangle2D(const tcu::Vec3 &p, const tcu::Vec3 &p0, const tcu::Vec3 &p1, const tcu::Vec3 &p2);
87
88 // Checks the given vertex buffer format is valid for acceleration structures.
89 // Note: VK_KHR_get_physical_device_properties2 and VK_KHR_acceleration_structure are supposed to be supported.
90 void checkAccelerationStructureVertexBufferFormat(const vk::InstanceInterface &vki, vk::VkPhysicalDevice physicalDevice,
91 vk::VkFormat format);
92
93 class RaytracedGeometryBase
94 {
95 public:
96 RaytracedGeometryBase() = delete;
97 RaytracedGeometryBase(const RaytracedGeometryBase &geometry) = delete;
98 RaytracedGeometryBase(VkGeometryTypeKHR geometryType, VkFormat vertexFormat, VkIndexType indexType);
99 virtual ~RaytracedGeometryBase();
100
getGeometryType(void) const101 inline VkGeometryTypeKHR getGeometryType(void) const
102 {
103 return m_geometryType;
104 }
isTrianglesType(void) const105 inline bool isTrianglesType(void) const
106 {
107 return m_geometryType == VK_GEOMETRY_TYPE_TRIANGLES_KHR;
108 }
getVertexFormat(void) const109 inline VkFormat getVertexFormat(void) const
110 {
111 return m_vertexFormat;
112 }
getIndexType(void) const113 inline VkIndexType getIndexType(void) const
114 {
115 return m_indexType;
116 }
usesIndices(void) const117 inline bool usesIndices(void) const
118 {
119 return m_indexType != VK_INDEX_TYPE_NONE_KHR;
120 }
getGeometryFlags(void) const121 inline VkGeometryFlagsKHR getGeometryFlags(void) const
122 {
123 return m_geometryFlags;
124 }
setGeometryFlags(const VkGeometryFlagsKHR geometryFlags)125 inline void setGeometryFlags(const VkGeometryFlagsKHR geometryFlags)
126 {
127 m_geometryFlags = geometryFlags;
128 }
getOpacityMicromap(void)129 inline VkAccelerationStructureTrianglesOpacityMicromapEXT &getOpacityMicromap(void)
130 {
131 return m_opacityGeometryMicromap;
132 }
getHasOpacityMicromap(void) const133 inline bool getHasOpacityMicromap(void) const
134 {
135 return m_hasOpacityMicromap;
136 }
setOpacityMicromap(const VkAccelerationStructureTrianglesOpacityMicromapEXT * opacityGeometryMicromap)137 inline void setOpacityMicromap(const VkAccelerationStructureTrianglesOpacityMicromapEXT *opacityGeometryMicromap)
138 {
139 m_hasOpacityMicromap = true;
140 m_opacityGeometryMicromap = *opacityGeometryMicromap;
141 }
142 virtual uint32_t getVertexCount(void) const = 0;
143 virtual const uint8_t *getVertexPointer(void) const = 0;
144 virtual VkDeviceSize getVertexStride(void) const = 0;
145 virtual VkDeviceSize getAABBStride(void) const = 0;
146 virtual size_t getVertexByteSize(void) const = 0;
147 virtual uint32_t getIndexCount(void) const = 0;
148 virtual const uint8_t *getIndexPointer(void) const = 0;
149 virtual VkDeviceSize getIndexStride(void) const = 0;
150 virtual size_t getIndexByteSize(void) const = 0;
151 virtual uint32_t getPrimitiveCount(void) const = 0;
152 virtual void addVertex(const tcu::Vec3 &vertex) = 0;
153 virtual void addIndex(const uint32_t &index) = 0;
154
155 private:
156 VkGeometryTypeKHR m_geometryType;
157 VkFormat m_vertexFormat;
158 VkIndexType m_indexType;
159 VkGeometryFlagsKHR m_geometryFlags;
160 bool m_hasOpacityMicromap;
161 VkAccelerationStructureTrianglesOpacityMicromapEXT m_opacityGeometryMicromap;
162 };
163
164 template <typename T>
convertSatRte(float f)165 inline T convertSatRte(float f)
166 {
167 // \note Doesn't work for 64-bit types
168 DE_STATIC_ASSERT(sizeof(T) < sizeof(uint64_t));
169 DE_STATIC_ASSERT((-3 % 2 != 0) && (-4 % 2 == 0));
170
171 int64_t minVal = std::numeric_limits<T>::min();
172 int64_t maxVal = std::numeric_limits<T>::max();
173 float q = deFloatFrac(f);
174 int64_t intVal = (int64_t)(f - q);
175
176 // Rounding.
177 if (q == 0.5f)
178 {
179 if (intVal % 2 != 0)
180 intVal++;
181 }
182 else if (q > 0.5f)
183 intVal++;
184 // else Don't add anything
185
186 // Saturate.
187 intVal = de::max(minVal, de::min(maxVal, intVal));
188
189 return (T)intVal;
190 }
191
192 // Converts float to signed integer with variable width.
193 // Source float is assumed to be in the [-1, 1] range.
194 template <typename T>
deFloat32ToSNorm(float src)195 inline T deFloat32ToSNorm(float src)
196 {
197 DE_STATIC_ASSERT(std::numeric_limits<T>::is_integer && std::numeric_limits<T>::is_signed);
198 const T range = std::numeric_limits<T>::max();
199 const T intVal = convertSatRte<T>(src * static_cast<float>(range));
200 return de::clamp<T>(intVal, -range, range);
201 }
202
203 typedef tcu::Vector<deFloat16, 2> Vec2_16;
204 typedef tcu::Vector<deFloat16, 3> Vec3_16;
205 typedef tcu::Vector<deFloat16, 4> Vec4_16;
206 typedef tcu::Vector<int16_t, 2> Vec2_16SNorm;
207 typedef tcu::Vector<int16_t, 3> Vec3_16SNorm;
208 typedef tcu::Vector<int16_t, 4> Vec4_16SNorm;
209 typedef tcu::Vector<int8_t, 2> Vec2_8SNorm;
210 typedef tcu::Vector<int8_t, 3> Vec3_8SNorm;
211 typedef tcu::Vector<int8_t, 4> Vec4_8SNorm;
212
213 template <typename V>
214 VkFormat vertexFormatFromType();
215 template <>
vertexFormatFromType()216 inline VkFormat vertexFormatFromType<tcu::Vec2>()
217 {
218 return VK_FORMAT_R32G32_SFLOAT;
219 }
220 template <>
vertexFormatFromType()221 inline VkFormat vertexFormatFromType<tcu::Vec3>()
222 {
223 return VK_FORMAT_R32G32B32_SFLOAT;
224 }
225 template <>
vertexFormatFromType()226 inline VkFormat vertexFormatFromType<tcu::Vec4>()
227 {
228 return VK_FORMAT_R32G32B32A32_SFLOAT;
229 }
230 template <>
vertexFormatFromType()231 inline VkFormat vertexFormatFromType<Vec2_16>()
232 {
233 return VK_FORMAT_R16G16_SFLOAT;
234 }
235 template <>
vertexFormatFromType()236 inline VkFormat vertexFormatFromType<Vec3_16>()
237 {
238 return VK_FORMAT_R16G16B16_SFLOAT;
239 }
240 template <>
vertexFormatFromType()241 inline VkFormat vertexFormatFromType<Vec4_16>()
242 {
243 return VK_FORMAT_R16G16B16A16_SFLOAT;
244 }
245 template <>
vertexFormatFromType()246 inline VkFormat vertexFormatFromType<Vec2_16SNorm>()
247 {
248 return VK_FORMAT_R16G16_SNORM;
249 }
250 template <>
vertexFormatFromType()251 inline VkFormat vertexFormatFromType<Vec3_16SNorm>()
252 {
253 return VK_FORMAT_R16G16B16_SNORM;
254 }
255 template <>
vertexFormatFromType()256 inline VkFormat vertexFormatFromType<Vec4_16SNorm>()
257 {
258 return VK_FORMAT_R16G16B16A16_SNORM;
259 }
260 template <>
vertexFormatFromType()261 inline VkFormat vertexFormatFromType<tcu::DVec2>()
262 {
263 return VK_FORMAT_R64G64_SFLOAT;
264 }
265 template <>
vertexFormatFromType()266 inline VkFormat vertexFormatFromType<tcu::DVec3>()
267 {
268 return VK_FORMAT_R64G64B64_SFLOAT;
269 }
270 template <>
vertexFormatFromType()271 inline VkFormat vertexFormatFromType<tcu::DVec4>()
272 {
273 return VK_FORMAT_R64G64B64A64_SFLOAT;
274 }
275 template <>
vertexFormatFromType()276 inline VkFormat vertexFormatFromType<Vec2_8SNorm>()
277 {
278 return VK_FORMAT_R8G8_SNORM;
279 }
280 template <>
vertexFormatFromType()281 inline VkFormat vertexFormatFromType<Vec3_8SNorm>()
282 {
283 return VK_FORMAT_R8G8B8_SNORM;
284 }
285 template <>
vertexFormatFromType()286 inline VkFormat vertexFormatFromType<Vec4_8SNorm>()
287 {
288 return VK_FORMAT_R8G8B8A8_SNORM;
289 }
290
291 struct EmptyIndex
292 {
293 };
294 template <typename I>
295 VkIndexType indexTypeFromType();
296 template <>
indexTypeFromType()297 inline VkIndexType indexTypeFromType<uint16_t>()
298 {
299 return VK_INDEX_TYPE_UINT16;
300 }
301 template <>
indexTypeFromType()302 inline VkIndexType indexTypeFromType<uint32_t>()
303 {
304 return VK_INDEX_TYPE_UINT32;
305 }
306 template <>
indexTypeFromType()307 inline VkIndexType indexTypeFromType<EmptyIndex>()
308 {
309 return VK_INDEX_TYPE_NONE_KHR;
310 }
311
312 template <typename V>
313 V convertFloatTo(const tcu::Vec3 &vertex);
314 template <>
convertFloatTo(const tcu::Vec3 & vertex)315 inline tcu::Vec2 convertFloatTo<tcu::Vec2>(const tcu::Vec3 &vertex)
316 {
317 return tcu::Vec2(vertex.x(), vertex.y());
318 }
319 template <>
convertFloatTo(const tcu::Vec3 & vertex)320 inline tcu::Vec3 convertFloatTo<tcu::Vec3>(const tcu::Vec3 &vertex)
321 {
322 return vertex;
323 }
324 template <>
convertFloatTo(const tcu::Vec3 & vertex)325 inline tcu::Vec4 convertFloatTo<tcu::Vec4>(const tcu::Vec3 &vertex)
326 {
327 return tcu::Vec4(vertex.x(), vertex.y(), vertex.z(), 0.0f);
328 }
329 template <>
convertFloatTo(const tcu::Vec3 & vertex)330 inline Vec2_16 convertFloatTo<Vec2_16>(const tcu::Vec3 &vertex)
331 {
332 return Vec2_16(deFloat32To16(vertex.x()), deFloat32To16(vertex.y()));
333 }
334 template <>
convertFloatTo(const tcu::Vec3 & vertex)335 inline Vec3_16 convertFloatTo<Vec3_16>(const tcu::Vec3 &vertex)
336 {
337 return Vec3_16(deFloat32To16(vertex.x()), deFloat32To16(vertex.y()), deFloat32To16(vertex.z()));
338 }
339 template <>
convertFloatTo(const tcu::Vec3 & vertex)340 inline Vec4_16 convertFloatTo<Vec4_16>(const tcu::Vec3 &vertex)
341 {
342 return Vec4_16(deFloat32To16(vertex.x()), deFloat32To16(vertex.y()), deFloat32To16(vertex.z()),
343 deFloat32To16(0.0f));
344 }
345 template <>
convertFloatTo(const tcu::Vec3 & vertex)346 inline Vec2_16SNorm convertFloatTo<Vec2_16SNorm>(const tcu::Vec3 &vertex)
347 {
348 return Vec2_16SNorm(deFloat32ToSNorm<int16_t>(vertex.x()), deFloat32ToSNorm<int16_t>(vertex.y()));
349 }
350 template <>
convertFloatTo(const tcu::Vec3 & vertex)351 inline Vec3_16SNorm convertFloatTo<Vec3_16SNorm>(const tcu::Vec3 &vertex)
352 {
353 return Vec3_16SNorm(deFloat32ToSNorm<int16_t>(vertex.x()), deFloat32ToSNorm<int16_t>(vertex.y()),
354 deFloat32ToSNorm<int16_t>(vertex.z()));
355 }
356 template <>
convertFloatTo(const tcu::Vec3 & vertex)357 inline Vec4_16SNorm convertFloatTo<Vec4_16SNorm>(const tcu::Vec3 &vertex)
358 {
359 return Vec4_16SNorm(deFloat32ToSNorm<int16_t>(vertex.x()), deFloat32ToSNorm<int16_t>(vertex.y()),
360 deFloat32ToSNorm<int16_t>(vertex.z()), deFloat32ToSNorm<int16_t>(0.0f));
361 }
362 template <>
convertFloatTo(const tcu::Vec3 & vertex)363 inline tcu::DVec2 convertFloatTo<tcu::DVec2>(const tcu::Vec3 &vertex)
364 {
365 return tcu::DVec2(static_cast<double>(vertex.x()), static_cast<double>(vertex.y()));
366 }
367 template <>
convertFloatTo(const tcu::Vec3 & vertex)368 inline tcu::DVec3 convertFloatTo<tcu::DVec3>(const tcu::Vec3 &vertex)
369 {
370 return tcu::DVec3(static_cast<double>(vertex.x()), static_cast<double>(vertex.y()),
371 static_cast<double>(vertex.z()));
372 }
373 template <>
convertFloatTo(const tcu::Vec3 & vertex)374 inline tcu::DVec4 convertFloatTo<tcu::DVec4>(const tcu::Vec3 &vertex)
375 {
376 return tcu::DVec4(static_cast<double>(vertex.x()), static_cast<double>(vertex.y()), static_cast<double>(vertex.z()),
377 0.0);
378 }
379 template <>
convertFloatTo(const tcu::Vec3 & vertex)380 inline Vec2_8SNorm convertFloatTo<Vec2_8SNorm>(const tcu::Vec3 &vertex)
381 {
382 return Vec2_8SNorm(deFloat32ToSNorm<int8_t>(vertex.x()), deFloat32ToSNorm<int8_t>(vertex.y()));
383 }
384 template <>
convertFloatTo(const tcu::Vec3 & vertex)385 inline Vec3_8SNorm convertFloatTo<Vec3_8SNorm>(const tcu::Vec3 &vertex)
386 {
387 return Vec3_8SNorm(deFloat32ToSNorm<int8_t>(vertex.x()), deFloat32ToSNorm<int8_t>(vertex.y()),
388 deFloat32ToSNorm<int8_t>(vertex.z()));
389 }
390 template <>
convertFloatTo(const tcu::Vec3 & vertex)391 inline Vec4_8SNorm convertFloatTo<Vec4_8SNorm>(const tcu::Vec3 &vertex)
392 {
393 return Vec4_8SNorm(deFloat32ToSNorm<int8_t>(vertex.x()), deFloat32ToSNorm<int8_t>(vertex.y()),
394 deFloat32ToSNorm<int8_t>(vertex.z()), deFloat32ToSNorm<int8_t>(0.0f));
395 }
396
397 template <typename V>
398 V convertIndexTo(uint32_t index);
399 template <>
convertIndexTo(uint32_t index)400 inline EmptyIndex convertIndexTo<EmptyIndex>(uint32_t index)
401 {
402 DE_UNREF(index);
403 TCU_THROW(TestError, "Cannot add empty index");
404 }
405 template <>
convertIndexTo(uint32_t index)406 inline uint16_t convertIndexTo<uint16_t>(uint32_t index)
407 {
408 return static_cast<uint16_t>(index);
409 }
410 template <>
convertIndexTo(uint32_t index)411 inline uint32_t convertIndexTo<uint32_t>(uint32_t index)
412 {
413 return index;
414 }
415
416 template <typename V, typename I>
417 class RaytracedGeometry : public RaytracedGeometryBase
418 {
419 public:
420 RaytracedGeometry() = delete;
421 RaytracedGeometry(const RaytracedGeometry &geometry) = delete;
422 RaytracedGeometry(VkGeometryTypeKHR geometryType, uint32_t paddingBlocks = 0u);
423 RaytracedGeometry(VkGeometryTypeKHR geometryType, const std::vector<V> &vertices,
424 const std::vector<I> &indices = std::vector<I>(), uint32_t paddingBlocks = 0u);
425
426 uint32_t getVertexCount(void) const override;
427 const uint8_t *getVertexPointer(void) const override;
428 VkDeviceSize getVertexStride(void) const override;
429 VkDeviceSize getAABBStride(void) const override;
430 size_t getVertexByteSize(void) const override;
431 uint32_t getIndexCount(void) const override;
432 const uint8_t *getIndexPointer(void) const override;
433 VkDeviceSize getIndexStride(void) const override;
434 size_t getIndexByteSize(void) const override;
435 uint32_t getPrimitiveCount(void) const override;
436
437 void addVertex(const tcu::Vec3 &vertex) override;
438 void addIndex(const uint32_t &index) override;
439
440 private:
441 void init(); // To be run in constructors.
442 void checkGeometryType() const; // Checks geometry type is valid.
443 void calcBlockSize(); // Calculates and saves vertex buffer block size.
444 size_t getBlockSize() const; // Return stored vertex buffer block size.
445 void addNativeVertex(const V &vertex); // Adds new vertex in native format.
446
447 // The implementation below stores vertices as byte blocks to take the requested padding into account. m_vertices is the array
448 // of bytes containing vertex data.
449 //
450 // For triangles, the padding block has a size that is a multiple of the vertex size and each vertex is stored in a byte block
451 // equivalent to:
452 //
453 // struct Vertex
454 // {
455 // V vertex;
456 // uint8_t padding[m_paddingBlocks * sizeof(V)];
457 // };
458 //
459 // For AABBs, the padding block has a size that is a multiple of kAABBPadBaseSize (see below) and vertices are stored in pairs
460 // before the padding block. This is equivalent to:
461 //
462 // struct VertexPair
463 // {
464 // V vertices[2];
465 // uint8_t padding[m_paddingBlocks * kAABBPadBaseSize];
466 // };
467 //
468 // The size of each pseudo-structure above is saved to one of the correspoding union members below.
469 union BlockSize
470 {
471 size_t trianglesBlockSize;
472 size_t aabbsBlockSize;
473 };
474
475 const uint32_t m_paddingBlocks;
476 size_t m_vertexCount;
477 std::vector<uint8_t> m_vertices; // Vertices are stored as byte blocks.
478 std::vector<I> m_indices; // Indices are stored natively.
479 BlockSize m_blockSize; // For m_vertices.
480
481 // Data sizes.
482 static constexpr size_t kVertexSize = sizeof(V);
483 static constexpr size_t kIndexSize = sizeof(I);
484 static constexpr size_t kAABBPadBaseSize = 8; // As required by the spec.
485 };
486
487 template <typename V, typename I>
RaytracedGeometry(VkGeometryTypeKHR geometryType,uint32_t paddingBlocks)488 RaytracedGeometry<V, I>::RaytracedGeometry(VkGeometryTypeKHR geometryType, uint32_t paddingBlocks)
489 : RaytracedGeometryBase(geometryType, vertexFormatFromType<V>(), indexTypeFromType<I>())
490 , m_paddingBlocks(paddingBlocks)
491 , m_vertexCount(0)
492 {
493 init();
494 }
495
496 template <typename V, typename I>
RaytracedGeometry(VkGeometryTypeKHR geometryType,const std::vector<V> & vertices,const std::vector<I> & indices,uint32_t paddingBlocks)497 RaytracedGeometry<V, I>::RaytracedGeometry(VkGeometryTypeKHR geometryType, const std::vector<V> &vertices,
498 const std::vector<I> &indices, uint32_t paddingBlocks)
499 : RaytracedGeometryBase(geometryType, vertexFormatFromType<V>(), indexTypeFromType<I>())
500 , m_paddingBlocks(paddingBlocks)
501 , m_vertexCount(0)
502 , m_vertices()
503 , m_indices(indices)
504 {
505 init();
506 for (const auto &vertex : vertices)
507 addNativeVertex(vertex);
508 }
509
510 template <typename V, typename I>
getVertexCount(void) const511 uint32_t RaytracedGeometry<V, I>::getVertexCount(void) const
512 {
513 return (isTrianglesType() ? static_cast<uint32_t>(m_vertexCount) : 0u);
514 }
515
516 template <typename V, typename I>
getVertexPointer(void) const517 const uint8_t *RaytracedGeometry<V, I>::getVertexPointer(void) const
518 {
519 DE_ASSERT(!m_vertices.empty());
520 return reinterpret_cast<const uint8_t *>(m_vertices.data());
521 }
522
523 template <typename V, typename I>
getVertexStride(void) const524 VkDeviceSize RaytracedGeometry<V, I>::getVertexStride(void) const
525 {
526 return ((!isTrianglesType()) ? 0ull : static_cast<VkDeviceSize>(getBlockSize()));
527 }
528
529 template <typename V, typename I>
getAABBStride(void) const530 VkDeviceSize RaytracedGeometry<V, I>::getAABBStride(void) const
531 {
532 return (isTrianglesType() ? 0ull : static_cast<VkDeviceSize>(getBlockSize()));
533 }
534
535 template <typename V, typename I>
getVertexByteSize(void) const536 size_t RaytracedGeometry<V, I>::getVertexByteSize(void) const
537 {
538 return m_vertices.size();
539 }
540
541 template <typename V, typename I>
getIndexCount(void) const542 uint32_t RaytracedGeometry<V, I>::getIndexCount(void) const
543 {
544 return static_cast<uint32_t>(isTrianglesType() ? m_indices.size() : 0);
545 }
546
547 template <typename V, typename I>
getIndexPointer(void) const548 const uint8_t *RaytracedGeometry<V, I>::getIndexPointer(void) const
549 {
550 const auto indexCount = getIndexCount();
551 DE_UNREF(indexCount); // For release builds.
552 DE_ASSERT(indexCount > 0u);
553
554 return reinterpret_cast<const uint8_t *>(m_indices.data());
555 }
556
557 template <typename V, typename I>
getIndexStride(void) const558 VkDeviceSize RaytracedGeometry<V, I>::getIndexStride(void) const
559 {
560 return static_cast<VkDeviceSize>(kIndexSize);
561 }
562
563 template <typename V, typename I>
getIndexByteSize(void) const564 size_t RaytracedGeometry<V, I>::getIndexByteSize(void) const
565 {
566 const auto indexCount = getIndexCount();
567 DE_ASSERT(indexCount > 0u);
568
569 return (indexCount * kIndexSize);
570 }
571
572 template <typename V, typename I>
getPrimitiveCount(void) const573 uint32_t RaytracedGeometry<V, I>::getPrimitiveCount(void) const
574 {
575 return static_cast<uint32_t>(isTrianglesType() ? (usesIndices() ? m_indices.size() / 3 : m_vertexCount / 3) :
576 (m_vertexCount / 2));
577 }
578
579 template <typename V, typename I>
addVertex(const tcu::Vec3 & vertex)580 void RaytracedGeometry<V, I>::addVertex(const tcu::Vec3 &vertex)
581 {
582 addNativeVertex(convertFloatTo<V>(vertex));
583 }
584
585 template <typename V, typename I>
addNativeVertex(const V & vertex)586 void RaytracedGeometry<V, I>::addNativeVertex(const V &vertex)
587 {
588 const auto oldSize = m_vertices.size();
589 const auto blockSize = getBlockSize();
590
591 if (isTrianglesType())
592 {
593 // Reserve new block, copy vertex at the beginning of the new block.
594 m_vertices.resize(oldSize + blockSize, uint8_t{0});
595 deMemcpy(&m_vertices[oldSize], &vertex, kVertexSize);
596 }
597 else // AABB
598 {
599 if (m_vertexCount % 2 == 0)
600 {
601 // New block needed.
602 m_vertices.resize(oldSize + blockSize, uint8_t{0});
603 deMemcpy(&m_vertices[oldSize], &vertex, kVertexSize);
604 }
605 else
606 {
607 // Insert in the second position of last existing block.
608 //
609 // Vertex Size
610 // +-------+
611 // +-------------+------------+----------------------------------------+
612 // | | | ... | vertex vertex padding |
613 // +-------------+------------+----------------+-----------------------+
614 // +-----------------------+
615 // Block Size
616 // +-------------------------------------------------------------------+
617 // Old Size
618 //
619 deMemcpy(&m_vertices[oldSize - blockSize + kVertexSize], &vertex, kVertexSize);
620 }
621 }
622
623 ++m_vertexCount;
624 }
625
626 template <typename V, typename I>
addIndex(const uint32_t & index)627 void RaytracedGeometry<V, I>::addIndex(const uint32_t &index)
628 {
629 m_indices.push_back(convertIndexTo<I>(index));
630 }
631
632 template <typename V, typename I>
init()633 void RaytracedGeometry<V, I>::init()
634 {
635 checkGeometryType();
636 calcBlockSize();
637 }
638
639 template <typename V, typename I>
checkGeometryType() const640 void RaytracedGeometry<V, I>::checkGeometryType() const
641 {
642 const auto geometryType = getGeometryType();
643 DE_UNREF(geometryType); // For release builds.
644 DE_ASSERT(geometryType == VK_GEOMETRY_TYPE_TRIANGLES_KHR || geometryType == VK_GEOMETRY_TYPE_AABBS_KHR);
645 }
646
647 template <typename V, typename I>
calcBlockSize()648 void RaytracedGeometry<V, I>::calcBlockSize()
649 {
650 if (isTrianglesType())
651 m_blockSize.trianglesBlockSize = kVertexSize * static_cast<size_t>(1u + m_paddingBlocks);
652 else
653 m_blockSize.aabbsBlockSize = 2 * kVertexSize + m_paddingBlocks * kAABBPadBaseSize;
654 }
655
656 template <typename V, typename I>
getBlockSize() const657 size_t RaytracedGeometry<V, I>::getBlockSize() const
658 {
659 return (isTrianglesType() ? m_blockSize.trianglesBlockSize : m_blockSize.aabbsBlockSize);
660 }
661
662 de::SharedPtr<RaytracedGeometryBase> makeRaytracedGeometry(VkGeometryTypeKHR geometryType, VkFormat vertexFormat,
663 VkIndexType indexType, bool padVertices = false);
664
665 VkDeviceAddress getBufferDeviceAddress(const DeviceInterface &vkd, const VkDevice device, const VkBuffer buffer,
666 VkDeviceSize offset);
667
668 // type used for creating a deep serialization/deserialization of top-level acceleration structures
669 class SerialInfo
670 {
671 std::vector<uint64_t> m_addresses;
672 std::vector<VkDeviceSize> m_sizes;
673
674 public:
675 SerialInfo() = default;
676
677 // addresses: { (owner-top-level AS address) [, (first bottom_level AS address), (second bottom_level AS address), ...] }
678 // sizes: { (owner-top-level AS serial size) [, (first bottom_level AS serial size), (second bottom_level AS serial size), ...] }
SerialInfo(const std::vector<uint64_t> & addresses,const std::vector<VkDeviceSize> & sizes)679 SerialInfo(const std::vector<uint64_t> &addresses, const std::vector<VkDeviceSize> &sizes)
680 : m_addresses(addresses)
681 , m_sizes(sizes)
682 {
683 DE_ASSERT(!addresses.empty() && addresses.size() == sizes.size());
684 }
685
addresses() const686 const std::vector<uint64_t> &addresses() const
687 {
688 return m_addresses;
689 }
sizes() const690 const std::vector<VkDeviceSize> &sizes() const
691 {
692 return m_sizes;
693 }
694 };
695
696 class SerialStorage
697 {
698 public:
699 enum
700 {
701 DE_SERIALIZED_FIELD(
702 DRIVER_UUID, VK_UUID_SIZE), // VK_UUID_SIZE bytes of data matching VkPhysicalDeviceIDProperties::driverUUID
703 DE_SERIALIZED_FIELD(
704 COMPAT_UUID,
705 VK_UUID_SIZE), // VK_UUID_SIZE bytes of data identifying the compatibility for comparison using vkGetDeviceAccelerationStructureCompatibilityKHR
706 DE_SERIALIZED_FIELD(
707 SERIALIZED_SIZE,
708 sizeof(
709 uint64_t)), // A 64-bit integer of the total size matching the value queried using VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR
710 DE_SERIALIZED_FIELD(
711 DESERIALIZED_SIZE,
712 sizeof(
713 uint64_t)), // A 64-bit integer of the deserialized size to be passed in to VkAccelerationStructureCreateInfoKHR::size
714 DE_SERIALIZED_FIELD(
715 HANDLES_COUNT,
716 sizeof(
717 uint64_t)), // A 64-bit integer of the count of the number of acceleration structure handles following. This will be zero for a bottom-level acceleration structure.
718 SERIAL_STORAGE_SIZE_MIN
719 };
720
721 // An old fashion C-style structure that simplifies an access to the AS header
722 struct alignas(16) AccelerationStructureHeader
723 {
724 union
725 {
726 struct
727 {
728 uint8_t driverUUID[VK_UUID_SIZE];
729 uint8_t compactUUID[VK_UUID_SIZE];
730 };
731 uint8_t uuids[VK_UUID_SIZE * 2];
732 };
733 uint64_t serializedSize;
734 uint64_t deserializedSize;
735 uint64_t handleCount;
736 VkDeviceAddress handleArray[1];
737 };
738
739 SerialStorage() = delete;
740 SerialStorage(const DeviceInterface &vk, const VkDevice device, Allocator &allocator,
741 const VkAccelerationStructureBuildTypeKHR buildType, const VkDeviceSize storageSize);
742 // An additional constructor for creating a deep copy of top-level AS's.
743 SerialStorage(const DeviceInterface &vk, const VkDevice device, Allocator &allocator,
744 const VkAccelerationStructureBuildTypeKHR buildType, const SerialInfo &SerialInfo);
745
746 // below methods will return host addres if AS was build on cpu and device addres when it was build on gpu
747 VkDeviceOrHostAddressKHR getAddress(const DeviceInterface &vk, const VkDevice device,
748 const VkAccelerationStructureBuildTypeKHR buildType);
749 VkDeviceOrHostAddressConstKHR getAddressConst(const DeviceInterface &vk, const VkDevice device,
750 const VkAccelerationStructureBuildTypeKHR buildType);
751
752 // this methods retun host address regardless of where AS was built
753 VkDeviceOrHostAddressKHR getHostAddress(VkDeviceSize offset = 0);
754 VkDeviceOrHostAddressConstKHR getHostAddressConst(VkDeviceSize offset = 0);
755
756 // works the similar way as getHostAddressConst() but returns more readable/intuitive object
757 AccelerationStructureHeader *getASHeader();
758 bool hasDeepFormat() const;
759 de::SharedPtr<SerialStorage> getBottomStorage(uint32_t index) const;
760
761 VkDeviceSize getStorageSize() const;
762 const SerialInfo &getSerialInfo() const;
763 uint64_t getDeserializedSize();
764
765 protected:
766 const VkAccelerationStructureBuildTypeKHR m_buildType;
767 const VkDeviceSize m_storageSize;
768 const SerialInfo m_serialInfo;
769 de::MovePtr<BufferWithMemory> m_buffer;
770 std::vector<de::SharedPtr<SerialStorage>> m_bottoms;
771 };
772
773 class BottomLevelAccelerationStructure
774 {
775 public:
776 static uint32_t getRequiredAllocationCount(void);
777
778 BottomLevelAccelerationStructure();
779 BottomLevelAccelerationStructure(const BottomLevelAccelerationStructure &other) = delete;
780 virtual ~BottomLevelAccelerationStructure();
781
782 virtual void setGeometryData(const std::vector<tcu::Vec3> &geometryData, const bool triangles,
783 const VkGeometryFlagsKHR geometryFlags = 0u);
784 virtual void setDefaultGeometryData(const VkShaderStageFlagBits testStage,
785 const VkGeometryFlagsKHR geometryFlags = 0u);
786 virtual void setGeometryCount(const size_t geometryCount);
787 virtual void addGeometry(de::SharedPtr<RaytracedGeometryBase> &raytracedGeometry);
788 virtual void addGeometry(
789 const std::vector<tcu::Vec3> &geometryData, const bool triangles, const VkGeometryFlagsKHR geometryFlags = 0u,
790 const VkAccelerationStructureTrianglesOpacityMicromapEXT *opacityGeometryMicromap = DE_NULL);
791
792 virtual void setBuildType(const VkAccelerationStructureBuildTypeKHR buildType) = DE_NULL;
793 virtual VkAccelerationStructureBuildTypeKHR getBuildType() const = 0;
794 virtual void setCreateFlags(const VkAccelerationStructureCreateFlagsKHR createFlags) = DE_NULL;
795 virtual void setCreateGeneric(bool createGeneric) = 0;
796 virtual void setCreationBufferUnbounded(bool creationBufferUnbounded) = 0;
797 virtual void setBuildFlags(const VkBuildAccelerationStructureFlagsKHR buildFlags) = DE_NULL;
798 virtual void setBuildWithoutGeometries(bool buildWithoutGeometries) = 0;
799 virtual void setBuildWithoutPrimitives(bool buildWithoutPrimitives) = 0;
800 virtual void setDeferredOperation(const bool deferredOperation, const uint32_t workerThreadCount = 0u) = DE_NULL;
801 virtual void setUseArrayOfPointers(const bool useArrayOfPointers) = DE_NULL;
802 virtual void setUseMaintenance5(const bool useMaintenance5) = DE_NULL;
803 virtual void setIndirectBuildParameters(const VkBuffer indirectBuffer, const VkDeviceSize indirectBufferOffset,
804 const uint32_t indirectBufferStride) = DE_NULL;
805 virtual VkBuildAccelerationStructureFlagsKHR getBuildFlags() const = DE_NULL;
806 VkAccelerationStructureBuildSizesInfoKHR getStructureBuildSizes() const;
807
808 // methods specific for each acceleration structure
809 virtual void create(const DeviceInterface &vk, const VkDevice device, Allocator &allocator,
810 VkDeviceSize structureSize, VkDeviceAddress deviceAddress = 0u, const void *pNext = DE_NULL,
811 const MemoryRequirement &addMemoryRequirement = MemoryRequirement::Any,
812 const VkBuffer creationBuffer = VK_NULL_HANDLE,
813 const VkDeviceSize creationBufferSize = 0u) = DE_NULL;
814 virtual void build(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
815 BottomLevelAccelerationStructure *srcAccelerationStructure = DE_NULL) = DE_NULL;
816 virtual void copyFrom(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
817 BottomLevelAccelerationStructure *accelerationStructure, bool compactCopy) = DE_NULL;
818
819 virtual void serialize(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
820 SerialStorage *storage) = DE_NULL;
821 virtual void deserialize(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
822 SerialStorage *storage) = DE_NULL;
823
824 // helper methods for typical acceleration structure creation tasks
825 void createAndBuild(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
826 Allocator &allocator, VkDeviceAddress deviceAddress = 0u);
827 void createAndCopyFrom(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
828 Allocator &allocator, BottomLevelAccelerationStructure *accelerationStructure,
829 VkDeviceSize compactCopySize = 0u, VkDeviceAddress deviceAddress = 0u);
830 void createAndDeserializeFrom(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
831 Allocator &allocator, SerialStorage *storage, VkDeviceAddress deviceAddress = 0u);
832 virtual const VkAccelerationStructureKHR *getPtr(void) const = DE_NULL;
833 virtual void updateGeometry(size_t geometryIndex,
834 de::SharedPtr<RaytracedGeometryBase> &raytracedGeometry) = DE_NULL;
835
836 protected:
837 std::vector<de::SharedPtr<RaytracedGeometryBase>> m_geometriesData;
838 VkDeviceSize m_structureSize;
839 VkDeviceSize m_updateScratchSize;
840 VkDeviceSize m_buildScratchSize;
841 };
842
843 de::MovePtr<BottomLevelAccelerationStructure> makeBottomLevelAccelerationStructure();
844
845 /**
846 * @brief Implements a pool of BottomLevelAccelerationStructure
847 */
848 class BottomLevelAccelerationStructurePool
849 {
850 public:
851 typedef de::SharedPtr<BottomLevelAccelerationStructure> BlasPtr;
852 struct BlasInfo
853 {
854 VkDeviceSize structureSize;
855 VkDeviceAddress deviceAddress;
856 };
857
858 BottomLevelAccelerationStructurePool();
859 virtual ~BottomLevelAccelerationStructurePool();
860
at(uint32_t index) const861 BlasPtr at(uint32_t index) const
862 {
863 return m_structs[index];
864 }
operator [](uint32_t index) const865 BlasPtr operator[](uint32_t index) const
866 {
867 return m_structs[index];
868 }
structures() const869 auto structures() const -> const std::vector<BlasPtr> &
870 {
871 return m_structs;
872 }
structCount() const873 uint32_t structCount() const
874 {
875 return static_cast<uint32_t>(m_structs.size());
876 }
877
878 // defines how many structures will be packet in single buffer
batchStructCount() const879 uint32_t batchStructCount() const
880 {
881 return m_batchStructCount;
882 }
883 void batchStructCount(const uint32_t &value);
884
885 // defines how many geometries (vertices and/or indices) will be packet in single buffer
batchGeomCount() const886 uint32_t batchGeomCount() const
887 {
888 return m_batchGeomCount;
889 }
batchGeomCount(const uint32_t & value)890 void batchGeomCount(const uint32_t &value)
891 {
892 m_batchGeomCount = value;
893 }
894
tryCachedMemory() const895 bool tryCachedMemory() const
896 {
897 return m_tryCachedMemory;
898 }
tryCachedMemory(const bool cachedMemory)899 void tryCachedMemory(const bool cachedMemory)
900 {
901 m_tryCachedMemory = cachedMemory;
902 }
903
904 BlasPtr add(VkDeviceSize structureSize = 0, VkDeviceAddress deviceAddress = 0);
905 /**
906 * @brief Creates previously added bottoms at a time.
907 * @note All geometries must be known before call this method.
908 */
909 void batchCreate(const DeviceInterface &vkd, const VkDevice device, Allocator &allocator);
910 void batchCreateAdjust(const DeviceInterface &vkd, const VkDevice device, Allocator &allocator,
911 const VkDeviceSize maxBufferSize);
912 void batchBuild(const DeviceInterface &vk, const VkDevice device, VkCommandBuffer cmdBuffer);
913 void batchBuild(const DeviceInterface &vk, const VkDevice device, VkCommandPool cmdPool, VkQueue queue,
914 qpWatchDog *watchDog);
915 size_t getAllocationCount() const;
916 size_t getAllocationCount(const DeviceInterface &vk, const VkDevice device, const VkDeviceSize maxBufferSize) const;
917 auto getAllocationSizes(const DeviceInterface &vk, // (strBuff, scratchBuff, vertBuff, indexBuff)
918 const VkDevice device) const -> tcu::Vector<VkDeviceSize, 4>;
919
920 protected:
921 uint32_t m_batchStructCount; // default is 4
922 uint32_t m_batchGeomCount; // default is 0, if zero then batchStructCount is used
923 std::vector<BlasInfo> m_infos;
924 std::vector<BlasPtr> m_structs;
925 bool m_createOnce;
926 bool m_tryCachedMemory;
927 VkDeviceSize m_structsBuffSize;
928 VkDeviceSize m_updatesScratchSize;
929 VkDeviceSize m_buildsScratchSize;
930 VkDeviceSize m_verticesSize;
931 VkDeviceSize m_indicesSize;
932
933 protected:
934 struct Impl;
935 Impl *m_impl;
936 };
937
938 struct InstanceData
939 {
InstanceDatavk::InstanceData940 InstanceData(VkTransformMatrixKHR matrix_, uint32_t instanceCustomIndex_, uint32_t mask_,
941 uint32_t instanceShaderBindingTableRecordOffset_, VkGeometryInstanceFlagsKHR flags_)
942 : matrix(matrix_)
943 , instanceCustomIndex(instanceCustomIndex_)
944 , mask(mask_)
945 , instanceShaderBindingTableRecordOffset(instanceShaderBindingTableRecordOffset_)
946 , flags(flags_)
947 {
948 }
949 VkTransformMatrixKHR matrix;
950 uint32_t instanceCustomIndex;
951 uint32_t mask;
952 uint32_t instanceShaderBindingTableRecordOffset;
953 VkGeometryInstanceFlagsKHR flags;
954 };
955
956 class TopLevelAccelerationStructure
957 {
958 public:
959 struct CreationSizes
960 {
961 VkDeviceSize structure;
962 VkDeviceSize updateScratch;
963 VkDeviceSize buildScratch;
964 VkDeviceSize instancePointers;
965 VkDeviceSize instancesBuffer;
966 VkDeviceSize sum() const;
967 };
968
969 static uint32_t getRequiredAllocationCount(void);
970
971 TopLevelAccelerationStructure();
972 TopLevelAccelerationStructure(const TopLevelAccelerationStructure &other) = delete;
973 virtual ~TopLevelAccelerationStructure();
974
975 virtual void setInstanceCount(const size_t instanceCount);
976 virtual void addInstance(de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelStructure,
977 const VkTransformMatrixKHR &matrix = identityMatrix3x4, uint32_t instanceCustomIndex = 0,
978 uint32_t mask = 0xFF, uint32_t instanceShaderBindingTableRecordOffset = 0,
979 VkGeometryInstanceFlagsKHR flags = VkGeometryInstanceFlagBitsKHR(0u));
980
981 virtual void setBuildType(const VkAccelerationStructureBuildTypeKHR buildType) = DE_NULL;
982 virtual void setCreateFlags(const VkAccelerationStructureCreateFlagsKHR createFlags) = DE_NULL;
983 virtual void setCreateGeneric(bool createGeneric) = 0;
984 virtual void setCreationBufferUnbounded(bool creationBufferUnbounded) = 0;
985 virtual void setBuildFlags(const VkBuildAccelerationStructureFlagsKHR buildFlags) = DE_NULL;
986 virtual void setBuildWithoutPrimitives(bool buildWithoutPrimitives) = 0;
987 virtual void setInactiveInstances(bool inactiveInstances) = 0;
988 virtual void setDeferredOperation(const bool deferredOperation, const uint32_t workerThreadCount = 0u) = DE_NULL;
989 virtual void setUseArrayOfPointers(const bool useArrayOfPointers) = DE_NULL;
990 virtual void setIndirectBuildParameters(const VkBuffer indirectBuffer, const VkDeviceSize indirectBufferOffset,
991 const uint32_t indirectBufferStride) = DE_NULL;
992 virtual void setUsePPGeometries(const bool usePPGeometries) = 0;
993 virtual void setTryCachedMemory(const bool tryCachedMemory) = 0;
994 virtual VkBuildAccelerationStructureFlagsKHR getBuildFlags() const = DE_NULL;
995 VkAccelerationStructureBuildSizesInfoKHR getStructureBuildSizes() const;
996
997 // methods specific for each acceleration structure
998 virtual void getCreationSizes(const DeviceInterface &vk, const VkDevice device, const VkDeviceSize structureSize,
999 CreationSizes &sizes) = 0;
1000 virtual void create(const DeviceInterface &vk, const VkDevice device, Allocator &allocator,
1001 VkDeviceSize structureSize = 0u, VkDeviceAddress deviceAddress = 0u,
1002 const void *pNext = DE_NULL,
1003 const MemoryRequirement &addMemoryRequirement = MemoryRequirement::Any,
1004 const VkBuffer creationBuffer = VK_NULL_HANDLE,
1005 const VkDeviceSize creationBufferSize = 0u) = DE_NULL;
1006 virtual void build(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
1007 TopLevelAccelerationStructure *srcAccelerationStructure = DE_NULL) = DE_NULL;
1008 virtual void copyFrom(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
1009 TopLevelAccelerationStructure *accelerationStructure, bool compactCopy) = DE_NULL;
1010
1011 virtual void serialize(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
1012 SerialStorage *storage) = DE_NULL;
1013 virtual void deserialize(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
1014 SerialStorage *storage) = DE_NULL;
1015
1016 virtual std::vector<VkDeviceSize> getSerializingSizes(const DeviceInterface &vk, const VkDevice device,
1017 const VkQueue queue,
1018 const uint32_t queueFamilyIndex) = DE_NULL;
1019
1020 virtual std::vector<uint64_t> getSerializingAddresses(const DeviceInterface &vk,
1021 const VkDevice device) const = DE_NULL;
1022
1023 // helper methods for typical acceleration structure creation tasks
1024 void createAndBuild(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
1025 Allocator &allocator, VkDeviceAddress deviceAddress = 0u);
1026 void createAndCopyFrom(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
1027 Allocator &allocator, TopLevelAccelerationStructure *accelerationStructure,
1028 VkDeviceSize compactCopySize = 0u, VkDeviceAddress deviceAddress = 0u);
1029 void createAndDeserializeFrom(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
1030 Allocator &allocator, SerialStorage *storage, VkDeviceAddress deviceAddress = 0u);
1031
1032 virtual const VkAccelerationStructureKHR *getPtr(void) const = DE_NULL;
1033
1034 virtual void updateInstanceMatrix(const DeviceInterface &vk, const VkDevice device, size_t instanceIndex,
1035 const VkTransformMatrixKHR &matrix) = 0;
1036
1037 protected:
1038 std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> m_bottomLevelInstances;
1039 std::vector<InstanceData> m_instanceData;
1040 VkDeviceSize m_structureSize;
1041 VkDeviceSize m_updateScratchSize;
1042 VkDeviceSize m_buildScratchSize;
1043
1044 virtual void createAndDeserializeBottoms(const DeviceInterface &vk, const VkDevice device,
1045 const VkCommandBuffer cmdBuffer, Allocator &allocator,
1046 SerialStorage *storage) = DE_NULL;
1047 };
1048
1049 de::MovePtr<TopLevelAccelerationStructure> makeTopLevelAccelerationStructure();
1050
1051 template <class ASType>
1052 de::MovePtr<ASType> makeAccelerationStructure();
1053 template <>
makeAccelerationStructure()1054 inline de::MovePtr<BottomLevelAccelerationStructure> makeAccelerationStructure()
1055 {
1056 return makeBottomLevelAccelerationStructure();
1057 }
1058 template <>
makeAccelerationStructure()1059 inline de::MovePtr<TopLevelAccelerationStructure> makeAccelerationStructure()
1060 {
1061 return makeTopLevelAccelerationStructure();
1062 }
1063
1064 bool queryAccelerationStructureSize(const DeviceInterface &vk, const VkDevice device, const VkCommandBuffer cmdBuffer,
1065 const std::vector<VkAccelerationStructureKHR> &accelerationStructureHandles,
1066 VkAccelerationStructureBuildTypeKHR buildType, const VkQueryPool queryPool,
1067 VkQueryType queryType, uint32_t firstQuery, std::vector<VkDeviceSize> &results);
1068
1069 class RayTracingPipeline
1070 {
1071 public:
1072 class CompileRequiredError : public std::runtime_error
1073 {
1074 public:
CompileRequiredError(const std::string & error)1075 CompileRequiredError(const std::string &error) : std::runtime_error(error)
1076 {
1077 }
1078 };
1079
1080 RayTracingPipeline();
1081 ~RayTracingPipeline();
1082
1083 void addShader(VkShaderStageFlagBits shaderStage, Move<VkShaderModule> shaderModule, uint32_t group,
1084 const VkSpecializationInfo *specializationInfo = nullptr,
1085 const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags =
1086 static_cast<VkPipelineShaderStageCreateFlags>(0),
1087 const void *pipelineShaderStageCreateInfopNext = nullptr);
1088 void addShader(VkShaderStageFlagBits shaderStage, de::SharedPtr<Move<VkShaderModule>> shaderModule, uint32_t group,
1089 const VkSpecializationInfo *specializationInfoPtr = nullptr,
1090 const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags =
1091 static_cast<VkPipelineShaderStageCreateFlags>(0),
1092 const void *pipelineShaderStageCreateInfopNext = nullptr);
1093 void addShader(VkShaderStageFlagBits shaderStage, VkShaderModule shaderModule, uint32_t group,
1094 const VkSpecializationInfo *specializationInfo = nullptr,
1095 const VkPipelineShaderStageCreateFlags pipelineShaderStageCreateFlags =
1096 static_cast<VkPipelineShaderStageCreateFlags>(0),
1097 const void *pipelineShaderStageCreateInfopNext = nullptr);
1098 void setGroupCaptureReplayHandle(uint32_t group, const void *pShaderGroupCaptureReplayHandle);
1099 void addLibrary(de::SharedPtr<de::MovePtr<RayTracingPipeline>> pipelineLibrary);
1100 uint32_t getShaderGroupCount(void); // This pipeline only.
1101 uint32_t getFullShaderGroupCount(void); // This pipeline and its included pipeline libraries, recursively.
1102 Move<VkPipeline> createPipeline(const DeviceInterface &vk, const VkDevice device,
1103 const VkPipelineLayout pipelineLayout,
1104 const std::vector<de::SharedPtr<Move<VkPipeline>>> &pipelineLibraries =
1105 std::vector<de::SharedPtr<Move<VkPipeline>>>());
1106 Move<VkPipeline> createPipeline(const DeviceInterface &vk, const VkDevice device,
1107 const VkPipelineLayout pipelineLayout,
1108 const std::vector<VkPipeline> &pipelineLibraries,
1109 const VkPipelineCache pipelineCache);
1110 std::vector<de::SharedPtr<Move<VkPipeline>>> createPipelineWithLibraries(const DeviceInterface &vk,
1111 const VkDevice device,
1112 const VkPipelineLayout pipelineLayout);
1113 std::vector<uint8_t> getShaderGroupHandles(const DeviceInterface &vk, const VkDevice device,
1114 const VkPipeline pipeline, const uint32_t shaderGroupHandleSize,
1115 const uint32_t firstGroup, const uint32_t groupCount) const;
1116 std::vector<uint8_t> getShaderGroupReplayHandles(const DeviceInterface &vk, const VkDevice device,
1117 const VkPipeline pipeline,
1118 const uint32_t shaderGroupHandleReplaySize,
1119 const uint32_t firstGroup, const uint32_t groupCount) const;
1120 de::MovePtr<BufferWithMemory> createShaderBindingTable(
1121 const DeviceInterface &vk, const VkDevice device, const VkPipeline pipeline, Allocator &allocator,
1122 const uint32_t &shaderGroupHandleSize, const uint32_t shaderGroupBaseAlignment, const uint32_t &firstGroup,
1123 const uint32_t &groupCount, const VkBufferCreateFlags &additionalBufferCreateFlags = VkBufferCreateFlags(0u),
1124 const VkBufferUsageFlags &additionalBufferUsageFlags = VkBufferUsageFlags(0u),
1125 const MemoryRequirement &additionalMemoryRequirement = MemoryRequirement::Any,
1126 const VkDeviceAddress &opaqueCaptureAddress = 0u, const uint32_t shaderBindingTableOffset = 0u,
1127 const uint32_t shaderRecordSize = 0u, const void **shaderGroupDataPtrPerGroup = nullptr,
1128 const bool autoAlignRecords = true);
1129 de::MovePtr<BufferWithMemory> createShaderBindingTable(
1130 const DeviceInterface &vk, const VkDevice device, Allocator &allocator, const uint32_t shaderGroupHandleSize,
1131 const uint32_t shaderGroupBaseAlignment, const std::vector<uint8_t> &shaderHandles,
1132 const VkBufferCreateFlags additionalBufferCreateFlags = VkBufferCreateFlags(0u),
1133 const VkBufferUsageFlags additionalBufferUsageFlags = VkBufferUsageFlags(0u),
1134 const MemoryRequirement &additionalMemoryRequirement = MemoryRequirement::Any,
1135 const VkDeviceAddress opaqueCaptureAddress = 0u, const uint32_t shaderBindingTableOffset = 0u,
1136 const uint32_t shaderRecordSize = 0u, const void **shaderGroupDataPtrPerGroup = nullptr,
1137 const bool autoAlignRecords = true);
1138 void setCreateFlags(const VkPipelineCreateFlags &pipelineCreateFlags);
1139 void setCreateFlags2(const VkPipelineCreateFlags2KHR &pipelineCreateFlags2);
1140 void setMaxRecursionDepth(const uint32_t &maxRecursionDepth);
1141 void setMaxPayloadSize(const uint32_t &maxPayloadSize);
1142 void setMaxAttributeSize(const uint32_t &maxAttributeSize);
1143 void setDeferredOperation(const bool deferredOperation, const uint32_t workerThreadCount = 0);
1144 void addDynamicState(const VkDynamicState &dynamicState);
1145
1146 protected:
1147 Move<VkPipeline> createPipelineKHR(const DeviceInterface &vk, const VkDevice device,
1148 const VkPipelineLayout pipelineLayout,
1149 const std::vector<VkPipeline> &pipelineLibraries,
1150 const VkPipelineCache pipelineCache = DE_NULL);
1151
1152 std::vector<de::SharedPtr<Move<VkShaderModule>>> m_shadersModules;
1153 std::vector<de::SharedPtr<de::MovePtr<RayTracingPipeline>>> m_pipelineLibraries;
1154 std::vector<VkPipelineShaderStageCreateInfo> m_shaderCreateInfos;
1155 std::vector<VkRayTracingShaderGroupCreateInfoKHR> m_shadersGroupCreateInfos;
1156 VkPipelineCreateFlags m_pipelineCreateFlags;
1157 VkPipelineCreateFlags2KHR m_pipelineCreateFlags2;
1158 uint32_t m_maxRecursionDepth;
1159 uint32_t m_maxPayloadSize;
1160 uint32_t m_maxAttributeSize;
1161 bool m_deferredOperation;
1162 uint32_t m_workerThreadCount;
1163 std::vector<VkDynamicState> m_dynamicStates;
1164 };
1165
1166 class RayTracingProperties
1167 {
1168 protected:
RayTracingProperties()1169 RayTracingProperties()
1170 {
1171 }
1172
1173 public:
RayTracingProperties(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)1174 RayTracingProperties(const InstanceInterface &vki, const VkPhysicalDevice physicalDevice)
1175 {
1176 DE_UNREF(vki);
1177 DE_UNREF(physicalDevice);
1178 }
~RayTracingProperties()1179 virtual ~RayTracingProperties()
1180 {
1181 }
1182
1183 virtual uint32_t getShaderGroupHandleSize(void) = 0;
1184 virtual uint32_t getShaderGroupHandleAlignment(void) = 0;
1185 virtual uint32_t getShaderGroupHandleCaptureReplaySize(void) = 0;
1186 virtual uint32_t getMaxRecursionDepth(void) = 0;
1187 virtual uint32_t getMaxShaderGroupStride(void) = 0;
1188 virtual uint32_t getShaderGroupBaseAlignment(void) = 0;
1189 virtual uint64_t getMaxGeometryCount(void) = 0;
1190 virtual uint64_t getMaxInstanceCount(void) = 0;
1191 virtual uint64_t getMaxPrimitiveCount(void) = 0;
1192 virtual uint32_t getMaxDescriptorSetAccelerationStructures(void) = 0;
1193 virtual uint32_t getMaxRayDispatchInvocationCount(void) = 0;
1194 virtual uint32_t getMaxRayHitAttributeSize(void) = 0;
1195 virtual uint32_t getMaxMemoryAllocationCount(void) = 0;
1196 };
1197
1198 de::MovePtr<RayTracingProperties> makeRayTracingProperties(const InstanceInterface &vki,
1199 const VkPhysicalDevice physicalDevice);
1200
1201 void cmdTraceRays(const DeviceInterface &vk, VkCommandBuffer commandBuffer,
1202 const VkStridedDeviceAddressRegionKHR *raygenShaderBindingTableRegion,
1203 const VkStridedDeviceAddressRegionKHR *missShaderBindingTableRegion,
1204 const VkStridedDeviceAddressRegionKHR *hitShaderBindingTableRegion,
1205 const VkStridedDeviceAddressRegionKHR *callableShaderBindingTableRegion, uint32_t width,
1206 uint32_t height, uint32_t depth);
1207
1208 void cmdTraceRaysIndirect(const DeviceInterface &vk, VkCommandBuffer commandBuffer,
1209 const VkStridedDeviceAddressRegionKHR *raygenShaderBindingTableRegion,
1210 const VkStridedDeviceAddressRegionKHR *missShaderBindingTableRegion,
1211 const VkStridedDeviceAddressRegionKHR *hitShaderBindingTableRegion,
1212 const VkStridedDeviceAddressRegionKHR *callableShaderBindingTableRegion,
1213 VkDeviceAddress indirectDeviceAddress);
1214
1215 void cmdTraceRaysIndirect2(const DeviceInterface &vk, VkCommandBuffer commandBuffer,
1216 VkDeviceAddress indirectDeviceAddress);
1217
makeDeviceOrHostAddressConstKHR(const void * hostAddress)1218 static inline VkDeviceOrHostAddressConstKHR makeDeviceOrHostAddressConstKHR(const void *hostAddress)
1219 {
1220 // VS2015: Cannot create as a const due to cannot assign hostAddress due to it is a second field. Only assigning of first field supported.
1221 VkDeviceOrHostAddressConstKHR result;
1222
1223 deMemset(&result, 0, sizeof(result));
1224
1225 result.hostAddress = hostAddress;
1226
1227 return result;
1228 }
1229
makeDeviceOrHostAddressKHR(void * hostAddress)1230 static inline VkDeviceOrHostAddressKHR makeDeviceOrHostAddressKHR(void *hostAddress)
1231 {
1232 // VS2015: Cannot create as a const due to cannot assign hostAddress due to it is a second field. Only assigning of first field supported.
1233 VkDeviceOrHostAddressKHR result;
1234
1235 deMemset(&result, 0, sizeof(result));
1236
1237 result.hostAddress = hostAddress;
1238
1239 return result;
1240 }
1241
makeDeviceOrHostAddressConstKHR(const DeviceInterface & vk,const VkDevice device,VkBuffer buffer,VkDeviceSize offset)1242 static inline VkDeviceOrHostAddressConstKHR makeDeviceOrHostAddressConstKHR(const DeviceInterface &vk,
1243 const VkDevice device, VkBuffer buffer,
1244 VkDeviceSize offset)
1245 {
1246 // VS2015: Cannot create as a const due to cannot assign hostAddress due to it is a second field. Only assigning of first field supported.
1247 VkDeviceOrHostAddressConstKHR result;
1248
1249 deMemset(&result, 0, sizeof(result));
1250
1251 VkBufferDeviceAddressInfo bufferDeviceAddressInfo = {
1252 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR, // VkStructureType sType;
1253 DE_NULL, // const void* pNext;
1254 buffer, // VkBuffer buffer
1255 };
1256 result.deviceAddress = vk.getBufferDeviceAddress(device, &bufferDeviceAddressInfo) + offset;
1257
1258 return result;
1259 }
1260
makeDeviceOrHostAddressKHR(const DeviceInterface & vk,const VkDevice device,VkBuffer buffer,VkDeviceSize offset)1261 static inline VkDeviceOrHostAddressKHR makeDeviceOrHostAddressKHR(const DeviceInterface &vk, const VkDevice device,
1262 VkBuffer buffer, VkDeviceSize offset)
1263 {
1264 // VS2015: Cannot create as a const due to cannot assign hostAddress due to it is a second field. Only assigning of first field supported.
1265 VkDeviceOrHostAddressKHR result;
1266
1267 deMemset(&result, 0, sizeof(result));
1268
1269 VkBufferDeviceAddressInfo bufferDeviceAddressInfo = {
1270 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR, // VkStructureType sType;
1271 DE_NULL, // const void* pNext;
1272 buffer, // VkBuffer buffer
1273 };
1274 result.deviceAddress = vk.getBufferDeviceAddress(device, &bufferDeviceAddressInfo) + offset;
1275
1276 return result;
1277 }
1278
1279 enum class RayQueryShaderSourcePipeline
1280 {
1281 COMPUTE,
1282 GRAPHICS,
1283 RAYTRACING,
1284 INVALID_PIPELINE
1285 };
1286
1287 enum class RayQueryShaderSourceType
1288 {
1289 VERTEX,
1290 TESSELLATION_CONTROL,
1291 TESSELLATION_EVALUATION,
1292 GEOMETRY,
1293 FRAGMENT,
1294 COMPUTE,
1295 RAY_GENERATION_RT,
1296 RAY_GENERATION,
1297 INTERSECTION,
1298 ANY_HIT,
1299 CLOSEST_HIT,
1300 MISS,
1301 CALLABLE,
1302 INVALID
1303 };
1304
1305 struct Ray
1306 {
Rayvk::Ray1307 Ray() : o(0.0f), tmin(0.0f), d(0.0f), tmax(0.0f)
1308 {
1309 }
Rayvk::Ray1310 Ray(const tcu::Vec3 &io, float imin, const tcu::Vec3 &id, float imax) : o(io), tmin(imin), d(id), tmax(imax)
1311 {
1312 }
1313 tcu::Vec3 o;
1314 float tmin;
1315 tcu::Vec3 d;
1316 float tmax;
1317 };
1318
1319 struct RayQueryTestParams
1320 {
1321 uint32_t rayFlags;
1322 std::string name;
1323 std::string shaderFunctions;
1324 std::vector<Ray> rays;
1325 std::vector<std::vector<tcu::Vec3>> verts;
1326 std::vector<std::vector<tcu::Vec3>> aabbs;
1327 bool triangles;
1328 RayQueryShaderSourcePipeline pipelineType;
1329 RayQueryShaderSourceType shaderSourceType;
1330 VkTransformMatrixKHR transform;
1331 };
1332
1333 struct RayQueryTestState
1334 {
RayQueryTestStatevk::RayQueryTestState1335 RayQueryTestState(const vk::DeviceInterface &devInterface, vk::VkDevice dev,
1336 const vk::InstanceInterface &instInterface, vk::VkPhysicalDevice pDevice,
1337 uint32_t uQueueFamilyIndex)
1338 : deviceInterface(devInterface)
1339 , device(dev)
1340 , instanceInterface(instInterface)
1341 , physDevice(pDevice)
1342 , allocator(new SimpleAllocator(deviceInterface, device,
1343 getPhysicalDeviceMemoryProperties(instanceInterface, physDevice)))
1344 , cmdPool(createCommandPool(deviceInterface, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
1345 uQueueFamilyIndex))
1346 {
1347 pipelineBind = VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR;
1348 }
1349
1350 const vk::DeviceInterface &deviceInterface;
1351 vk::VkDevice device;
1352 const vk::InstanceInterface &instanceInterface;
1353 vk::VkPhysicalDevice physDevice;
1354 const de::UniquePtr<vk::Allocator> allocator;
1355 const Unique<VkCommandPool> cmdPool;
1356 VkPipelineBindPoint pipelineBind;
1357 };
1358
registerRayQueryShaderModule(const DeviceInterface & vkd,const VkDevice device,vk::BinaryCollection & binaryCollection,std::vector<de::SharedPtr<Move<VkShaderModule>>> & shaderModules,std::vector<VkPipelineShaderStageCreateInfo> & shaderCreateInfos,VkShaderStageFlagBits stage,const std::string & name)1359 static inline bool registerRayQueryShaderModule(const DeviceInterface &vkd, const VkDevice device,
1360 vk::BinaryCollection &binaryCollection,
1361 std::vector<de::SharedPtr<Move<VkShaderModule>>> &shaderModules,
1362 std::vector<VkPipelineShaderStageCreateInfo> &shaderCreateInfos,
1363 VkShaderStageFlagBits stage, const std::string &name)
1364 {
1365 if (name.size() == 0)
1366 return false;
1367
1368 shaderModules.push_back(de::SharedPtr<Move<VkShaderModule>>(
1369 new Move<VkShaderModule>(createShaderModule(vkd, device, binaryCollection.get(name), 0))));
1370
1371 shaderCreateInfos.push_back({
1372 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, DE_NULL, (VkPipelineShaderStageCreateFlags)0,
1373 stage, // stage
1374 shaderModules.back()->get(), // shader
1375 "main",
1376 DE_NULL, // pSpecializationInfo
1377 });
1378
1379 return true;
1380 }
1381
initRayQueryAccelerationStructures(const vk::DeviceInterface & vkd,const vk::VkDevice & device,vk::Allocator & allocator,RayQueryTestParams testParams,VkCommandBuffer cmdBuffer,std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> & bottomAccelerationStructures,de::SharedPtr<vk::TopLevelAccelerationStructure> & topAccelerationStructure)1382 static inline void initRayQueryAccelerationStructures(
1383 const vk::DeviceInterface &vkd, const vk::VkDevice &device, vk::Allocator &allocator, RayQueryTestParams testParams,
1384 VkCommandBuffer cmdBuffer,
1385 std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> &bottomAccelerationStructures,
1386 de::SharedPtr<vk::TopLevelAccelerationStructure> &topAccelerationStructure)
1387 {
1388 uint32_t instanceCount = static_cast<uint32_t>(testParams.verts.size());
1389
1390 const uint32_t instancesGroupCount = instanceCount;
1391 de::MovePtr<vk::TopLevelAccelerationStructure> rayQueryTopLevelAccelerationStructure =
1392 makeTopLevelAccelerationStructure();
1393
1394 topAccelerationStructure =
1395 de::SharedPtr<vk::TopLevelAccelerationStructure>(rayQueryTopLevelAccelerationStructure.release());
1396 topAccelerationStructure->setInstanceCount(instancesGroupCount);
1397
1398 for (size_t instanceNdx = 0; instanceNdx < instancesGroupCount; ++instanceNdx)
1399 {
1400 de::MovePtr<BottomLevelAccelerationStructure> rayQueryBottomLevelAccelerationStructure =
1401 makeBottomLevelAccelerationStructure();
1402
1403 bool triangles = testParams.verts[instanceNdx].size() > 0;
1404 uint32_t geometryCount = (triangles) ? static_cast<uint32_t>(testParams.verts[instanceNdx].size()) / 3 :
1405 static_cast<uint32_t>(testParams.aabbs[instanceNdx].size()) / 2;
1406 std::vector<tcu::Vec3> geometryData;
1407
1408 for (size_t geometryNdx = 0; geometryNdx < geometryCount; ++geometryNdx)
1409 {
1410 if (triangles)
1411 {
1412 tcu::Vec3 v0 = tcu::Vec3(testParams.verts[instanceNdx][geometryNdx * 3 + 0].x(),
1413 testParams.verts[instanceNdx][geometryNdx * 3 + 0].y(),
1414 testParams.verts[instanceNdx][geometryNdx * 3 + 0].z());
1415 tcu::Vec3 v1 = tcu::Vec3(testParams.verts[instanceNdx][geometryNdx * 3 + 1].x(),
1416 testParams.verts[instanceNdx][geometryNdx * 3 + 1].y(),
1417 testParams.verts[instanceNdx][geometryNdx * 3 + 1].z());
1418 tcu::Vec3 v2 = tcu::Vec3(testParams.verts[instanceNdx][geometryNdx * 3 + 2].x(),
1419 testParams.verts[instanceNdx][geometryNdx * 3 + 2].y(),
1420 testParams.verts[instanceNdx][geometryNdx * 3 + 2].z());
1421
1422 geometryData.push_back(v0);
1423 geometryData.push_back(v1);
1424 geometryData.push_back(v2);
1425 }
1426 else
1427 {
1428 tcu::Vec3 v0 = tcu::Vec3(testParams.aabbs[instanceNdx][geometryNdx * 2 + 0].x(),
1429 testParams.aabbs[instanceNdx][geometryNdx * 2 + 0].y(),
1430 testParams.aabbs[instanceNdx][geometryNdx * 2 + 0].z());
1431 tcu::Vec3 v1 = tcu::Vec3(testParams.aabbs[instanceNdx][geometryNdx * 2 + 1].x(),
1432 testParams.aabbs[instanceNdx][geometryNdx * 2 + 1].y(),
1433 testParams.aabbs[instanceNdx][geometryNdx * 2 + 1].z());
1434
1435 geometryData.push_back(v0);
1436 geometryData.push_back(v1);
1437 }
1438 }
1439
1440 rayQueryBottomLevelAccelerationStructure->addGeometry(geometryData, triangles);
1441 rayQueryBottomLevelAccelerationStructure->createAndBuild(vkd, device, cmdBuffer, allocator);
1442
1443 bottomAccelerationStructures.push_back(
1444 de::SharedPtr<BottomLevelAccelerationStructure>(rayQueryBottomLevelAccelerationStructure.release()));
1445
1446 topAccelerationStructure->addInstance(bottomAccelerationStructures.back());
1447 }
1448
1449 topAccelerationStructure->createAndBuild(vkd, device, cmdBuffer, allocator);
1450 }
1451
1452 template <typename T>
rayQueryRayTracingTestSetup(const vk::DeviceInterface & vkd,const vk::VkDevice & device,vk::Allocator & allocator,const vk::InstanceInterface & instanceInterface,vk::VkPhysicalDevice physDevice,vk::BinaryCollection & binaryCollection,vk::VkQueue universalQueue,uint32_t universalQueueFamilyIndex,const RayQueryTestParams params)1453 std::vector<T> rayQueryRayTracingTestSetup(const vk::DeviceInterface &vkd, const vk::VkDevice &device,
1454 vk::Allocator &allocator, const vk::InstanceInterface &instanceInterface,
1455 vk::VkPhysicalDevice physDevice, vk::BinaryCollection &binaryCollection,
1456 vk::VkQueue universalQueue, uint32_t universalQueueFamilyIndex,
1457 const RayQueryTestParams params)
1458 {
1459 RayQueryTestState state(vkd, device, instanceInterface, physDevice, universalQueueFamilyIndex);
1460
1461 vk::Move<VkDescriptorPool> descriptorPool;
1462 vk::Move<VkDescriptorSetLayout> descriptorSetLayout;
1463 vk::Move<VkDescriptorSet> descriptorSet;
1464 vk::Move<VkPipelineLayout> pipelineLayout;
1465 std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> rayQueryBottomAccelerationStructures;
1466 de::SharedPtr<TopLevelAccelerationStructure> rayQueryTopAccelerationStructure;
1467 std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> traceBottomAccelerationStructures;
1468 de::MovePtr<TopLevelAccelerationStructure> traceAccelerationStructure;
1469
1470 de::MovePtr<RayTracingProperties> rayTracingPropertiesKHR = makeRayTracingProperties(instanceInterface, physDevice);
1471 uint32_t shaderGroupHandleSize = rayTracingPropertiesKHR->getShaderGroupHandleSize();
1472 uint32_t shaderGroupBaseAlignment = rayTracingPropertiesKHR->getShaderGroupBaseAlignment();
1473
1474 const VkBufferCreateInfo resultDataCreateInfo =
1475 makeBufferCreateInfo(params.rays.size() * sizeof(T), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
1476 de::MovePtr<BufferWithMemory> resultData = de::MovePtr<BufferWithMemory>(
1477 new BufferWithMemory(vkd, device, allocator, resultDataCreateInfo, MemoryRequirement::HostVisible));
1478
1479 const uint32_t AllStages = VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR |
1480 VK_SHADER_STAGE_INTERSECTION_BIT_KHR | VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
1481 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_CALLABLE_BIT_KHR;
1482
1483 descriptorSetLayout =
1484 DescriptorSetLayoutBuilder()
1485 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, AllStages)
1486 .addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, AllStages)
1487 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, AllStages)
1488 .addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, vk::VK_SHADER_STAGE_RAYGEN_BIT_KHR)
1489 .build(vkd, device);
1490 descriptorPool = DescriptorPoolBuilder()
1491 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1492 .addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
1493 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1494 .addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
1495 .build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1496 descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), descriptorSetLayout.get());
1497
1498 pipelineLayout = makePipelineLayout(vkd, device, descriptorSetLayout.get());
1499
1500 const std::map<RayQueryShaderSourceType, std::vector<std::string>> shaderNames = {
1501 {RayQueryShaderSourceType::RAY_GENERATION_RT, {"rgen", "isect_rt", "ahit_rt", "chit_rt", "miss_rt", ""}},
1502 {RayQueryShaderSourceType::RAY_GENERATION, {"rgen", "", "", "", "", ""}},
1503 {RayQueryShaderSourceType::INTERSECTION, {"rgen", "isect_1", "", "chit", "miss", ""}},
1504 {RayQueryShaderSourceType::ANY_HIT, {"rgen", "isect", "ahit", "", "miss", ""}},
1505 {RayQueryShaderSourceType::CLOSEST_HIT, {"rgen", "isect", "", "chit", "miss", ""}},
1506 {RayQueryShaderSourceType::MISS, {"rgen", "isect", "", "chit", "miss_1", ""}},
1507 {RayQueryShaderSourceType::CALLABLE, {"rgen", "", "", "chit", "miss", "call"}}};
1508
1509 auto shaderNameIt = shaderNames.find(params.shaderSourceType);
1510 if (shaderNameIt == end(shaderNames))
1511 TCU_THROW(InternalError, "Wrong shader source type");
1512
1513 std::vector<VkPipelineShaderStageCreateInfo> shaderCreateInfos;
1514 std::vector<de::SharedPtr<Move<VkShaderModule>>> shaderModules;
1515 bool rgen, isect, ahit, chit, miss, call;
1516
1517 rgen = registerRayQueryShaderModule(vkd, device, binaryCollection, shaderModules, shaderCreateInfos,
1518 VK_SHADER_STAGE_RAYGEN_BIT_KHR, shaderNameIt->second[0]);
1519 isect = registerRayQueryShaderModule(vkd, device, binaryCollection, shaderModules, shaderCreateInfos,
1520 VK_SHADER_STAGE_INTERSECTION_BIT_KHR, shaderNameIt->second[1]);
1521 ahit = registerRayQueryShaderModule(vkd, device, binaryCollection, shaderModules, shaderCreateInfos,
1522 VK_SHADER_STAGE_ANY_HIT_BIT_KHR, shaderNameIt->second[2]);
1523 chit = registerRayQueryShaderModule(vkd, device, binaryCollection, shaderModules, shaderCreateInfos,
1524 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, shaderNameIt->second[3]);
1525 miss = registerRayQueryShaderModule(vkd, device, binaryCollection, shaderModules, shaderCreateInfos,
1526 VK_SHADER_STAGE_MISS_BIT_KHR, shaderNameIt->second[4]);
1527 call = registerRayQueryShaderModule(vkd, device, binaryCollection, shaderModules, shaderCreateInfos,
1528 VK_SHADER_STAGE_CALLABLE_BIT_KHR, shaderNameIt->second[5]);
1529
1530 bool rgenRTTest = rgen && chit && ahit && miss && isect;
1531 bool isectTest = rgen && isect && chit && miss && (shaderNameIt->second[1] == "isect_1");
1532 bool ahitTest = rgen && ahit;
1533 bool chitTest =
1534 rgen && isect && chit && miss && (shaderNameIt->second[4] == "miss") && (shaderNameIt->second[1] == "isect");
1535 bool missTest = rgen && isect && chit && miss && (shaderNameIt->second[4] == "miss_1");
1536 bool callTest = rgen && chit && miss && call;
1537
1538 de::MovePtr<RayTracingPipeline> rt_pipeline = de::newMovePtr<RayTracingPipeline>();
1539
1540 int raygenGroup = 0;
1541 int hitGroup = -1;
1542 int missGroup = -1;
1543 int callableGroup = -1;
1544
1545 rt_pipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR, shaderModules[0].get()->get(), raygenGroup);
1546
1547 if (rgenRTTest)
1548 {
1549 hitGroup = 1;
1550 missGroup = 2;
1551 rt_pipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR, shaderModules[1].get()->get(), hitGroup);
1552 rt_pipeline->addShader(VK_SHADER_STAGE_ANY_HIT_BIT_KHR, shaderModules[2].get()->get(), hitGroup);
1553 rt_pipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, shaderModules[3].get()->get(), hitGroup);
1554 rt_pipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR, shaderModules[4].get()->get(), missGroup);
1555 }
1556 else if (ahitTest)
1557 {
1558 hitGroup = 1;
1559 missGroup = 2;
1560 rt_pipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR, shaderModules[1].get()->get(), hitGroup);
1561 rt_pipeline->addShader(VK_SHADER_STAGE_ANY_HIT_BIT_KHR, shaderModules[2].get()->get(), hitGroup);
1562 rt_pipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR, shaderModules[3].get()->get(), missGroup);
1563 }
1564 else if (missTest)
1565 {
1566 hitGroup = 1;
1567 missGroup = 2;
1568 rt_pipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR, shaderModules[1].get()->get(), hitGroup);
1569 rt_pipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, shaderModules[2].get()->get(), hitGroup);
1570 rt_pipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR, shaderModules[3].get()->get(), missGroup);
1571 }
1572 else if (chitTest)
1573 {
1574 hitGroup = 1;
1575 missGroup = 2;
1576 rt_pipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR, shaderModules[1].get()->get(), hitGroup);
1577 rt_pipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, shaderModules[2].get()->get(), hitGroup);
1578 rt_pipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR, shaderModules[3].get()->get(), missGroup);
1579 }
1580 else if (isectTest)
1581 {
1582 hitGroup = 1;
1583 missGroup = 2;
1584 rt_pipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR, shaderModules[1].get()->get(), hitGroup);
1585 rt_pipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, shaderModules[2].get()->get(), hitGroup);
1586 rt_pipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR, shaderModules[3].get()->get(), missGroup);
1587 }
1588 else if (callTest)
1589 {
1590 hitGroup = 1;
1591 missGroup = 2;
1592 callableGroup = 3;
1593 rt_pipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, shaderModules[1].get()->get(), hitGroup);
1594 rt_pipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR, shaderModules[2].get()->get(), missGroup);
1595 rt_pipeline->addShader(VK_SHADER_STAGE_CALLABLE_BIT_KHR, shaderModules[3].get()->get(), callableGroup);
1596 }
1597
1598 Move<VkPipeline> pipeline = rt_pipeline->createPipeline(vkd, device, *pipelineLayout);
1599
1600 de::MovePtr<BufferWithMemory> raygenShaderBindingTable = rt_pipeline->createShaderBindingTable(
1601 vkd, device, *pipeline, *state.allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, raygenGroup, 1u);
1602 de::MovePtr<BufferWithMemory> missShaderBindingTable =
1603 missGroup > 0 ?
1604 rt_pipeline->createShaderBindingTable(vkd, device, *pipeline, *state.allocator, shaderGroupHandleSize,
1605 shaderGroupBaseAlignment, missGroup, 1u) :
1606 de::MovePtr<BufferWithMemory>();
1607 de::MovePtr<BufferWithMemory> hitShaderBindingTable =
1608 hitGroup > 0 ?
1609 rt_pipeline->createShaderBindingTable(vkd, device, *pipeline, *state.allocator, shaderGroupHandleSize,
1610 shaderGroupBaseAlignment, hitGroup, 1u) :
1611 de::MovePtr<BufferWithMemory>();
1612 de::MovePtr<BufferWithMemory> callableShaderBindingTable =
1613 callableGroup > 0 ?
1614 rt_pipeline->createShaderBindingTable(vkd, device, *pipeline, *state.allocator, shaderGroupHandleSize,
1615 shaderGroupBaseAlignment, callableGroup, 1u) :
1616 de::MovePtr<BufferWithMemory>();
1617
1618 VkStridedDeviceAddressRegionKHR raygenRegion =
1619 makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, (*raygenShaderBindingTable).get(), 0),
1620 shaderGroupHandleSize, shaderGroupHandleSize);
1621 VkStridedDeviceAddressRegionKHR missRegion =
1622 missGroup > 0 ?
1623 makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, (*missShaderBindingTable).get(), 0),
1624 shaderGroupHandleSize, shaderGroupHandleSize) :
1625 VkStridedDeviceAddressRegionKHR{0, 0, 0};
1626 VkStridedDeviceAddressRegionKHR hitRegion =
1627 hitGroup > 0 ?
1628 makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, (*hitShaderBindingTable).get(), 0),
1629 shaderGroupHandleSize, shaderGroupHandleSize) :
1630 VkStridedDeviceAddressRegionKHR{0, 0, 0};
1631 VkStridedDeviceAddressRegionKHR callableRegion =
1632 callableGroup > 0 ? makeStridedDeviceAddressRegionKHR(
1633 getBufferDeviceAddress(vkd, device, (*callableShaderBindingTable).get(), 0),
1634 shaderGroupHandleSize, shaderGroupHandleSize) :
1635 VkStridedDeviceAddressRegionKHR{0, 0, 0};
1636
1637 const Unique<VkCommandBuffer> cmdBuffer(
1638 allocateCommandBuffer(vkd, device, *state.cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1639
1640 de::MovePtr<BufferWithMemory> rayBuffer;
1641
1642 if (params.rays.empty() == false)
1643 {
1644 const VkBufferCreateInfo rayBufferCreateInfo =
1645 makeBufferCreateInfo(params.rays.size() * sizeof(Ray), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
1646 rayBuffer = de::MovePtr<BufferWithMemory>(
1647 new BufferWithMemory(vkd, device, allocator, rayBufferCreateInfo, MemoryRequirement::HostVisible));
1648
1649 memcpy(rayBuffer->getAllocation().getHostPtr(), ¶ms.rays[0], params.rays.size() * sizeof(Ray));
1650 flushMappedMemoryRange(vkd, device, rayBuffer->getAllocation().getMemory(),
1651 rayBuffer->getAllocation().getOffset(), VK_WHOLE_SIZE);
1652 }
1653
1654 beginCommandBuffer(vkd, *cmdBuffer);
1655
1656 // build acceleration structures for ray query
1657 initRayQueryAccelerationStructures(vkd, device, allocator, params, *cmdBuffer, rayQueryBottomAccelerationStructures,
1658 rayQueryTopAccelerationStructure);
1659 // build acceleration structures for trace
1660 std::vector<tcu::Vec3> geomData;
1661 switch (params.shaderSourceType)
1662 {
1663 case RayQueryShaderSourceType::MISS:
1664 geomData.push_back(tcu::Vec3(0, 0, -1));
1665 geomData.push_back(tcu::Vec3(1, 0, -1));
1666 geomData.push_back(tcu::Vec3(0, 1, -1));
1667 break;
1668 case RayQueryShaderSourceType::CLOSEST_HIT:
1669 case RayQueryShaderSourceType::CALLABLE:
1670 geomData.push_back(tcu::Vec3(0, 0, 1));
1671 geomData.push_back(tcu::Vec3(1, 0, 1));
1672 geomData.push_back(tcu::Vec3(0, 1, 1));
1673 break;
1674 case RayQueryShaderSourceType::ANY_HIT:
1675 case RayQueryShaderSourceType::INTERSECTION:
1676 geomData.push_back(tcu::Vec3(0, 0, 1));
1677 geomData.push_back(tcu::Vec3(0.5, 0.5, 1));
1678 break;
1679 default:
1680 break;
1681 }
1682
1683 VkDescriptorBufferInfo resultBufferDesc = {(*resultData).get(), 0, VK_WHOLE_SIZE};
1684 VkDescriptorBufferInfo rayBufferDesc = {(*rayBuffer).get(), 0, VK_WHOLE_SIZE};
1685
1686 const TopLevelAccelerationStructure *rayQueryTopLevelAccelerationStructurePtr =
1687 rayQueryTopAccelerationStructure.get();
1688 VkWriteDescriptorSetAccelerationStructureKHR rayQueryAccelerationStructureWriteDescriptorSet = {
1689 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, // VkStructureType sType;
1690 DE_NULL, // const void* pNext;
1691 1u, // uint32_t accelerationStructureCount;
1692 rayQueryTopLevelAccelerationStructurePtr
1693 ->getPtr(), // const VkAccelerationStructureKHR* pAccelerationStructures;
1694 };
1695
1696 VkWriteDescriptorSetAccelerationStructureKHR traceAccelerationStructureWriteDescriptorSet = {};
1697 if (geomData.size() > 0)
1698 {
1699 traceAccelerationStructure = makeTopLevelAccelerationStructure();
1700 traceAccelerationStructure->setInstanceCount(1);
1701
1702 de::MovePtr<BottomLevelAccelerationStructure> traceBottomLevelAccelerationStructure =
1703 makeBottomLevelAccelerationStructure();
1704
1705 traceBottomLevelAccelerationStructure->addGeometry(geomData, ((geomData.size() % 3) == 0), 0);
1706 traceBottomLevelAccelerationStructure->createAndBuild(vkd, device, *cmdBuffer, allocator);
1707 traceBottomAccelerationStructures.push_back(
1708 de::SharedPtr<BottomLevelAccelerationStructure>(traceBottomLevelAccelerationStructure.release()));
1709 traceAccelerationStructure->addInstance(traceBottomAccelerationStructures.back(), identityMatrix3x4, 0, 255U, 0,
1710 0);
1711 traceAccelerationStructure->createAndBuild(vkd, device, *cmdBuffer, allocator);
1712
1713 const TopLevelAccelerationStructure *traceTopLevelAccelerationStructurePtr = traceAccelerationStructure.get();
1714 traceAccelerationStructureWriteDescriptorSet = {
1715 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, // VkStructureType sType;
1716 DE_NULL, // const void* pNext;
1717 1u, // uint32_t accelerationStructureCount;
1718 traceTopLevelAccelerationStructurePtr
1719 ->getPtr(), // const VkAccelerationStructureKHR* pAccelerationStructures;
1720 };
1721
1722 DescriptorSetUpdateBuilder()
1723 .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
1724 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultBufferDesc)
1725 .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u),
1726 VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR,
1727 &rayQueryAccelerationStructureWriteDescriptorSet)
1728 .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(2u),
1729 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &rayBufferDesc)
1730 .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(3u),
1731 VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &traceAccelerationStructureWriteDescriptorSet)
1732 .update(vkd, device);
1733 }
1734 else
1735 {
1736 DescriptorSetUpdateBuilder()
1737 .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
1738 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultBufferDesc)
1739 .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u),
1740 VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR,
1741 &rayQueryAccelerationStructureWriteDescriptorSet)
1742 .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(2u),
1743 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &rayBufferDesc)
1744 .update(vkd, device);
1745 }
1746
1747 VkDescriptorSet setHandle = descriptorSet.get();
1748
1749 vkd.cmdBindPipeline(*cmdBuffer, state.pipelineBind, *pipeline);
1750 vkd.cmdBindDescriptorSets(*cmdBuffer, state.pipelineBind, *pipelineLayout, 0, 1, &setHandle, 0, DE_NULL);
1751
1752 cmdTraceRays(vkd, *cmdBuffer, &raygenRegion, &missRegion, &hitRegion, &callableRegion,
1753 static_cast<uint32_t>(params.rays.size()), 1, 1);
1754
1755 endCommandBuffer(vkd, *cmdBuffer);
1756
1757 submitCommandsAndWait(vkd, device, universalQueue, *cmdBuffer);
1758
1759 invalidateMappedMemoryRange(vkd, device, resultData->getAllocation().getMemory(),
1760 resultData->getAllocation().getOffset(), VK_WHOLE_SIZE);
1761
1762 std::vector<T> results(params.rays.size());
1763 memcpy(&results[0], resultData->getAllocation().getHostPtr(), sizeof(T) * params.rays.size());
1764
1765 rayQueryBottomAccelerationStructures.clear();
1766 rayQueryTopAccelerationStructure.clear();
1767 traceBottomAccelerationStructures.clear();
1768 traceAccelerationStructure.clear();
1769
1770 return results;
1771 }
1772
1773 template <typename T>
rayQueryComputeTestSetup(const vk::DeviceInterface & vkd,const vk::VkDevice & device,vk::Allocator & allocator,const vk::InstanceInterface & instanceInterface,vk::VkPhysicalDevice physDevice,vk::BinaryCollection & binaryCollection,vk::VkQueue universalQueue,uint32_t universalQueueFamilyIndex,RayQueryTestParams params)1774 std::vector<T> rayQueryComputeTestSetup(const vk::DeviceInterface &vkd, const vk::VkDevice &device,
1775 vk::Allocator &allocator, const vk::InstanceInterface &instanceInterface,
1776 vk::VkPhysicalDevice physDevice, vk::BinaryCollection &binaryCollection,
1777 vk::VkQueue universalQueue, uint32_t universalQueueFamilyIndex,
1778 RayQueryTestParams params)
1779 {
1780 std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> bottomAccelerationStructures;
1781 de::SharedPtr<TopLevelAccelerationStructure> topAccelerationStructure;
1782
1783 RayQueryTestState state(vkd, device, instanceInterface, physDevice, universalQueueFamilyIndex);
1784
1785 const DeviceInterface &vk = vkd;
1786
1787 int power = static_cast<int>(ceil(log2(params.rays.size())));
1788 power = (power % 2 == 0) ? power : power + 1;
1789 const int sz = de::max<int>(static_cast<int>(pow(2, power)), 64);
1790 Ray ray = Ray();
1791
1792 for (int idx = static_cast<int>(params.rays.size()); idx < sz; ++idx)
1793 {
1794 params.rays.push_back(ray);
1795 }
1796
1797 const VkBufferCreateInfo resultDataCreateInfo =
1798 makeBufferCreateInfo(params.rays.size() * sizeof(T), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
1799 de::MovePtr<BufferWithMemory> resultData = de::MovePtr<BufferWithMemory>(
1800 new BufferWithMemory(vkd, device, allocator, resultDataCreateInfo, MemoryRequirement::HostVisible));
1801
1802 const Move<VkDescriptorSetLayout> descriptorSetLayout =
1803 DescriptorSetLayoutBuilder()
1804 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1805 .addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, VK_SHADER_STAGE_COMPUTE_BIT)
1806 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1807 .build(vk, device);
1808 const Move<VkDescriptorPool> descriptorPool =
1809 DescriptorPoolBuilder()
1810 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1811 .addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
1812 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1813 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1814 const Move<VkDescriptorSet> descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
1815 const Move<VkPipelineLayout> pipelineLayout = makePipelineLayout(vk, device, descriptorSetLayout.get());
1816
1817 const Unique<VkShaderModule> rayQueryModule(createShaderModule(vkd, device, binaryCollection.get("comp"), 0u));
1818
1819 const VkPipelineShaderStageCreateInfo pipelineShaderStageParams = {
1820 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
1821 DE_NULL, // const void* pNext;
1822 static_cast<VkPipelineShaderStageCreateFlags>(0u), // VkPipelineShaderStageCreateFlags flags;
1823 VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagBits stage;
1824 *rayQueryModule, // VkShaderModule module;
1825 "main", // const char* pName;
1826 DE_NULL, // const VkSpecializationInfo* pSpecializationInfo;
1827 };
1828 const VkComputePipelineCreateInfo pipelineCreateInfo = {
1829 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
1830 DE_NULL, // const void* pNext;
1831 static_cast<VkPipelineCreateFlags>(0u), // VkPipelineCreateFlags flags;
1832 pipelineShaderStageParams, // VkPipelineShaderStageCreateInfo stage;
1833 *pipelineLayout, // VkPipelineLayout layout;
1834 DE_NULL, // VkPipeline basePipelineHandle;
1835 0, // int32_t basePipelineIndex;
1836 };
1837 Move<VkPipeline> pipeline(createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo));
1838
1839 const Unique<VkCommandBuffer> cmdBuffer(
1840 allocateCommandBuffer(vk, device, *state.cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1841
1842 de::MovePtr<BufferWithMemory> rayBuffer;
1843
1844 if (params.rays.empty() == false)
1845 {
1846 const VkBufferCreateInfo rayBufferCreateInfo =
1847 makeBufferCreateInfo(params.rays.size() * sizeof(Ray), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
1848 rayBuffer = de::MovePtr<BufferWithMemory>(
1849 new BufferWithMemory(vkd, device, allocator, rayBufferCreateInfo, MemoryRequirement::HostVisible));
1850
1851 memcpy(rayBuffer->getAllocation().getHostPtr(), ¶ms.rays[0], params.rays.size() * sizeof(Ray));
1852 flushMappedMemoryRange(vkd, device, rayBuffer->getAllocation().getMemory(),
1853 rayBuffer->getAllocation().getOffset(), VK_WHOLE_SIZE);
1854 }
1855
1856 beginCommandBuffer(vk, *cmdBuffer);
1857
1858 // build acceleration structures for ray query
1859 initRayQueryAccelerationStructures(vkd, device, allocator, params, *cmdBuffer, bottomAccelerationStructures,
1860 topAccelerationStructure);
1861
1862 const TopLevelAccelerationStructure *rayQueryTopLevelAccelerationStructurePtr = topAccelerationStructure.get();
1863 VkWriteDescriptorSetAccelerationStructureKHR accelerationStructureWriteDescriptorSet = {
1864 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, // VkStructureType sType;
1865 DE_NULL, // const void* pNext;
1866 1u, // uint32_t accelerationStructureCount;
1867 rayQueryTopLevelAccelerationStructurePtr
1868 ->getPtr(), // const VkAccelerationStructureKHR* pAccelerationStructures;
1869 };
1870
1871 VkDescriptorBufferInfo resultBufferDesc = {(*resultData).get(), 0, VK_WHOLE_SIZE};
1872 VkDescriptorBufferInfo rayBufferDesc = {(*rayBuffer).get(), 0, VK_WHOLE_SIZE};
1873
1874 DescriptorSetUpdateBuilder()
1875 .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
1876 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultBufferDesc)
1877 .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u),
1878 VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
1879 .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(2u),
1880 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &rayBufferDesc)
1881 .update(vk, device);
1882
1883 VkDescriptorSet setHandle = descriptorSet.get();
1884
1885 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
1886
1887 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0, 1, &setHandle, 0, DE_NULL);
1888
1889 vk.cmdDispatch(*cmdBuffer, static_cast<uint32_t>(params.rays.size()), 1, 1);
1890
1891 endCommandBuffer(vk, *cmdBuffer);
1892
1893 submitCommandsAndWait(vk, device, universalQueue, *cmdBuffer);
1894
1895 invalidateMappedMemoryRange(vk, device, resultData->getAllocation().getMemory(),
1896 resultData->getAllocation().getOffset(), VK_WHOLE_SIZE);
1897
1898 std::vector<T> results(params.rays.size());
1899
1900 memcpy(&results[0], resultData->getAllocation().getHostPtr(), sizeof(T) * params.rays.size());
1901
1902 topAccelerationStructure.clear();
1903 bottomAccelerationStructures.clear();
1904
1905 return results;
1906 }
1907
1908 template <typename T>
rayQueryGraphicsTestSetup(const DeviceInterface & vkd,const VkDevice device,const uint32_t queueFamilyIndex,Allocator & allocator,vk::BinaryCollection & binaryCollection,vk::VkQueue universalQueue,const vk::InstanceInterface & instanceInterface,vk::VkPhysicalDevice physDevice,RayQueryTestParams params)1909 static std::vector<T> rayQueryGraphicsTestSetup(const DeviceInterface &vkd, const VkDevice device,
1910 const uint32_t queueFamilyIndex, Allocator &allocator,
1911 vk::BinaryCollection &binaryCollection, vk::VkQueue universalQueue,
1912 const vk::InstanceInterface &instanceInterface,
1913 vk::VkPhysicalDevice physDevice, RayQueryTestParams params)
1914 {
1915 int width = static_cast<int>(params.rays.size());
1916 int power = static_cast<int>(ceil(log2(width)));
1917 power = (power % 2 == 0) ? power : power + 1;
1918 int sz = static_cast<int>(pow(2, power / 2));
1919
1920 Ray ray = Ray();
1921 const int totalSz = sz * sz;
1922
1923 for (int idx = static_cast<int>(params.rays.size()); idx < totalSz; ++idx)
1924 {
1925 params.rays.push_back(ray);
1926 }
1927
1928 const tcu::UVec2 renderSz = {static_cast<uint32_t>(sz), static_cast<uint32_t>(sz)};
1929
1930 Move<VkDescriptorSetLayout> descriptorSetLayout;
1931 Move<VkDescriptorPool> descriptorPool;
1932 Move<VkDescriptorSet> descriptorSet;
1933 Move<VkPipelineLayout> pipelineLayout;
1934 Move<VkRenderPass> renderPass;
1935 Move<VkFramebuffer> framebuffer;
1936 Move<VkPipeline> pipeline;
1937 std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> rayQueryBottomAccelerationStructures;
1938 de::SharedPtr<TopLevelAccelerationStructure> rayQueryTopAccelerationStructure;
1939
1940 descriptorSetLayout =
1941 DescriptorSetLayoutBuilder()
1942 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_ALL_GRAPHICS)
1943 .addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, VK_SHADER_STAGE_ALL_GRAPHICS)
1944 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL_GRAPHICS)
1945 .build(vkd, device);
1946 descriptorPool = DescriptorPoolBuilder()
1947 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
1948 .addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
1949 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1950 .build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1951 descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), descriptorSetLayout.get());
1952 pipelineLayout = makePipelineLayout(vkd, device, descriptorSetLayout.get());
1953
1954 const std::map<RayQueryShaderSourceType, std::vector<std::string>> shaderNames = {
1955 //idx: 0 1 2 3 4
1956 //shader: vert, tesc, tese, geom, frag,
1957 {RayQueryShaderSourceType::VERTEX,
1958 {
1959 "vert",
1960 "",
1961 "",
1962 "",
1963 "",
1964 }},
1965 {RayQueryShaderSourceType::TESSELLATION_CONTROL,
1966 {
1967 "vert",
1968 "tesc",
1969 "tese",
1970 "",
1971 "",
1972 }},
1973 {RayQueryShaderSourceType::TESSELLATION_EVALUATION,
1974 {
1975 "vert",
1976 "tesc",
1977 "tese",
1978 "",
1979 "",
1980 }},
1981 {RayQueryShaderSourceType::GEOMETRY,
1982 {
1983 "vert",
1984 "",
1985 "",
1986 "geom",
1987 "",
1988 }},
1989 {RayQueryShaderSourceType::FRAGMENT,
1990 {
1991 "vert",
1992 "",
1993 "",
1994 "",
1995 "frag",
1996 }},
1997 };
1998
1999 auto shaderNameIt = shaderNames.find(params.shaderSourceType);
2000 if (shaderNameIt == end(shaderNames))
2001 TCU_THROW(InternalError, "Wrong shader source type");
2002
2003 std::vector<VkPipelineShaderStageCreateInfo> shaderCreateInfos;
2004 std::vector<de::SharedPtr<Move<VkShaderModule>>> shaderModules;
2005 bool tescX, teseX, fragX;
2006 registerRayQueryShaderModule(vkd, device, binaryCollection, shaderModules, shaderCreateInfos,
2007 VK_SHADER_STAGE_VERTEX_BIT, shaderNameIt->second[0]);
2008 tescX = registerRayQueryShaderModule(vkd, device, binaryCollection, shaderModules, shaderCreateInfos,
2009 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, shaderNameIt->second[1]);
2010 teseX = registerRayQueryShaderModule(vkd, device, binaryCollection, shaderModules, shaderCreateInfos,
2011 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, shaderNameIt->second[2]);
2012 registerRayQueryShaderModule(vkd, device, binaryCollection, shaderModules, shaderCreateInfos,
2013 VK_SHADER_STAGE_GEOMETRY_BIT, shaderNameIt->second[3]);
2014 fragX = registerRayQueryShaderModule(vkd, device, binaryCollection, shaderModules, shaderCreateInfos,
2015 VK_SHADER_STAGE_FRAGMENT_BIT, shaderNameIt->second[4]);
2016
2017 const vk::VkSubpassDescription subpassDesc = {
2018 (vk::VkSubpassDescriptionFlags)0,
2019 vk::VK_PIPELINE_BIND_POINT_GRAPHICS, // pipelineBindPoint
2020 0u, // inputCount
2021 DE_NULL, // pInputAttachments
2022 0u, // colorCount
2023 DE_NULL, // pColorAttachments
2024 DE_NULL, // pResolveAttachments
2025 DE_NULL, // depthStencilAttachment
2026 0u, // preserveCount
2027 DE_NULL, // pPreserveAttachments
2028 };
2029 const vk::VkRenderPassCreateInfo renderPassParams = {
2030 vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // sType
2031 DE_NULL, // pNext
2032 (vk::VkRenderPassCreateFlags)0,
2033 0u, // attachmentCount
2034 DE_NULL, // pAttachments
2035 1u, // subpassCount
2036 &subpassDesc, // pSubpasses
2037 0u, // dependencyCount
2038 DE_NULL, // pDependencies
2039 };
2040
2041 renderPass = createRenderPass(vkd, device, &renderPassParams);
2042
2043 const vk::VkFramebufferCreateInfo framebufferParams = {
2044 vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // sType
2045 DE_NULL, // pNext
2046 (vk::VkFramebufferCreateFlags)0,
2047 *renderPass, // renderPass
2048 0u, // attachmentCount
2049 DE_NULL, // pAttachments
2050 renderSz[0], // width
2051 renderSz[1], // height
2052 1u, // layers
2053 };
2054
2055 framebuffer = createFramebuffer(vkd, device, &framebufferParams);
2056
2057 VkPrimitiveTopology testTopology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
2058 std::vector<tcu::Vec3> vertices;
2059
2060 switch (params.shaderSourceType)
2061 {
2062 case RayQueryShaderSourceType::TESSELLATION_CONTROL:
2063 case RayQueryShaderSourceType::TESSELLATION_EVALUATION:
2064 case RayQueryShaderSourceType::VERTEX:
2065 case RayQueryShaderSourceType::GEOMETRY:
2066 {
2067 if ((params.shaderSourceType == RayQueryShaderSourceType::VERTEX) ||
2068 (params.shaderSourceType == RayQueryShaderSourceType::GEOMETRY))
2069 {
2070 testTopology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
2071 }
2072 else
2073 {
2074 testTopology = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
2075 }
2076 const int numTriangles = static_cast<int>(params.rays.size());
2077 const float halfStepSz = 1.f / (static_cast<float>(numTriangles) * 2.f);
2078 float startX = 0.0;
2079 for (int index = 0; index < numTriangles; ++index)
2080 {
2081 vertices.push_back(tcu::Vec3(startX, 0.0, static_cast<float>(index)));
2082 startX += halfStepSz;
2083 vertices.push_back(tcu::Vec3(startX, 1.0, static_cast<float>(index)));
2084 startX += halfStepSz;
2085 vertices.push_back(tcu::Vec3(startX, 0.0, static_cast<float>(index)));
2086 }
2087 break;
2088 }
2089 case RayQueryShaderSourceType::FRAGMENT:
2090 vertices.push_back(tcu::Vec3(-1.0f, -1.0f, 0.0f));
2091 vertices.push_back(tcu::Vec3(1.0f, -1.0f, 0.0f));
2092 vertices.push_back(tcu::Vec3(-1.0f, 1.0f, 0.0f));
2093 vertices.push_back(tcu::Vec3(1.0f, 1.0f, 0.0f));
2094 break;
2095 default:
2096 TCU_THROW(InternalError, "Wrong shader source type");
2097 };
2098
2099 const VkVertexInputBindingDescription vertexInputBindingDescription = {
2100 0u, // uint32_t binding;
2101 sizeof(tcu::Vec3), // uint32_t stride;
2102 VK_VERTEX_INPUT_RATE_VERTEX, // VkVertexInputRate inputRate;
2103 };
2104
2105 const VkVertexInputAttributeDescription vertexInputAttributeDescription = {
2106 0u, // uint32_t location;
2107 0u, // uint32_t binding;
2108 VK_FORMAT_R32G32B32_SFLOAT, // VkFormat format;
2109 0u, // uint32_t offset;
2110 };
2111
2112 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo = {
2113 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
2114 DE_NULL, // const void* pNext;
2115 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
2116 1u, // uint32_t vertexBindingDescriptionCount;
2117 &vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
2118 1u, // uint32_t vertexAttributeDescriptionCount;
2119 &vertexInputAttributeDescription // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
2120 };
2121
2122 const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo = {
2123 VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType;
2124 DE_NULL, // const void* pNext;
2125 (VkPipelineInputAssemblyStateCreateFlags)0, // VkPipelineInputAssemblyStateCreateFlags flags;
2126 testTopology, // VkPrimitiveTopology topology;
2127 VK_FALSE // VkBool32 primitiveRestartEnable;
2128 };
2129
2130 const VkPipelineTessellationStateCreateInfo tessellationStateCreateInfo = {
2131 VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, // VkStructureType sType;
2132 DE_NULL, // const void* pNext;
2133 VkPipelineTessellationStateCreateFlags(0u), // VkPipelineTessellationStateCreateFlags flags;
2134 3u // uint32_t patchControlPoints;
2135 };
2136
2137 VkViewport viewport = makeViewport(renderSz[0], renderSz[1]);
2138 VkRect2D scissor = makeRect2D(renderSz[0], renderSz[1]);
2139
2140 const VkPipelineViewportStateCreateInfo viewportStateCreateInfo = {
2141 VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType
2142 DE_NULL, // const void* pNext
2143 (VkPipelineViewportStateCreateFlags)0, // VkPipelineViewportStateCreateFlags flags
2144 1u, // uint32_t viewportCount
2145 &viewport, // const VkViewport* pViewports
2146 1u, // uint32_t scissorCount
2147 &scissor // const VkRect2D* pScissors
2148 };
2149
2150 const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfo = {
2151 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType;
2152 DE_NULL, // const void* pNext;
2153 (VkPipelineRasterizationStateCreateFlags)0, // VkPipelineRasterizationStateCreateFlags flags;
2154 VK_FALSE, // VkBool32 depthClampEnable;
2155 fragX ? VK_FALSE : VK_TRUE, // VkBool32 rasterizerDiscardEnable;
2156 VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode;
2157 VK_CULL_MODE_NONE, // VkCullModeFlags cullMode;
2158 VK_FRONT_FACE_CLOCKWISE, // VkFrontFace frontFace;
2159 VK_FALSE, // VkBool32 depthBiasEnable;
2160 0.0f, // float depthBiasConstantFactor;
2161 0.0f, // float depthBiasClamp;
2162 0.0f, // float depthBiasSlopeFactor;
2163 1.0f // float lineWidth;
2164 };
2165
2166 const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfo = {
2167 VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType;
2168 DE_NULL, // const void* pNext;
2169 (VkPipelineMultisampleStateCreateFlags)0, // VkPipelineMultisampleStateCreateFlags flags;
2170 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits rasterizationSamples;
2171 VK_FALSE, // VkBool32 sampleShadingEnable;
2172 0.0f, // float minSampleShading;
2173 DE_NULL, // const VkSampleMask* pSampleMask;
2174 VK_FALSE, // VkBool32 alphaToCoverageEnable;
2175 VK_FALSE // VkBool32 alphaToOneEnable;
2176 };
2177
2178 const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo = {
2179 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
2180 DE_NULL, // const void* pNext;
2181 (VkPipelineColorBlendStateCreateFlags)0, // VkPipelineColorBlendStateCreateFlags flags;
2182 false, // VkBool32 logicOpEnable;
2183 VK_LOGIC_OP_CLEAR, // VkLogicOp logicOp;
2184 0, // uint32_t attachmentCount;
2185 DE_NULL, // const VkPipelineColorBlendAttachmentState* pAttachments;
2186 {1.0f, 1.0f, 1.0f, 1.0f} // float blendConstants[4];
2187 };
2188
2189 const VkGraphicsPipelineCreateInfo graphicsPipelineCreateInfo = {
2190 VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType;
2191 DE_NULL, // const void* pNext;
2192 (VkPipelineCreateFlags)0, // VkPipelineCreateFlags flags;
2193 static_cast<uint32_t>(shaderCreateInfos.size()), // uint32_t stageCount;
2194 shaderCreateInfos.data(), // const VkPipelineShaderStageCreateInfo* pStages;
2195 &vertexInputStateCreateInfo, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState;
2196 &inputAssemblyStateCreateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState;
2197 (tescX || teseX) ? &tessellationStateCreateInfo :
2198 DE_NULL, // const VkPipelineTessellationStateCreateInfo* pTessellationState;
2199 fragX ? &viewportStateCreateInfo : DE_NULL, // const VkPipelineViewportStateCreateInfo* pViewportState;
2200 &rasterizationStateCreateInfo, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState;
2201 fragX ? &multisampleStateCreateInfo : DE_NULL, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState;
2202 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState;
2203 fragX ? &colorBlendStateCreateInfo : DE_NULL, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState;
2204 DE_NULL, // const VkPipelineDynamicStateCreateInfo* pDynamicState;
2205 pipelineLayout.get(), // VkPipelineLayout layout;
2206 renderPass.get(), // VkRenderPass renderPass;
2207 0u, // uint32_t subpass;
2208 DE_NULL, // VkPipeline basePipelineHandle;
2209 0 // int basePipelineIndex;
2210 };
2211
2212 pipeline = createGraphicsPipeline(vkd, device, DE_NULL, &graphicsPipelineCreateInfo);
2213
2214 const VkBufferCreateInfo vertexBufferParams = {
2215 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
2216 DE_NULL, // const void* pNext;
2217 0u, // VkBufferCreateFlags flags;
2218 VkDeviceSize(sizeof(tcu::Vec3) * vertices.size()), // VkDeviceSize size;
2219 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, // VkBufferUsageFlags usage;
2220 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
2221 1u, // uint32_t queueFamilyIndexCount;
2222 &queueFamilyIndex // const uint32_t* pQueueFamilyIndices;
2223 };
2224
2225 Move<VkBuffer> vertexBuffer;
2226 de::MovePtr<Allocation> vertexAlloc;
2227
2228 vertexBuffer = createBuffer(vkd, device, &vertexBufferParams);
2229 vertexAlloc =
2230 allocator.allocate(getBufferMemoryRequirements(vkd, device, *vertexBuffer), MemoryRequirement::HostVisible);
2231 VK_CHECK(vkd.bindBufferMemory(device, *vertexBuffer, vertexAlloc->getMemory(), vertexAlloc->getOffset()));
2232
2233 // Upload vertex data
2234 deMemcpy(vertexAlloc->getHostPtr(), vertices.data(), vertices.size() * sizeof(tcu::Vec3));
2235 flushAlloc(vkd, device, *vertexAlloc);
2236
2237 RayQueryTestState state(vkd, device, instanceInterface, physDevice, queueFamilyIndex);
2238
2239 de::MovePtr<BufferWithMemory> rayBuffer;
2240
2241 if (params.rays.empty() == false)
2242 {
2243 const VkBufferCreateInfo rayBufferCreateInfo =
2244 makeBufferCreateInfo(params.rays.size() * sizeof(Ray), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
2245 rayBuffer = de::MovePtr<BufferWithMemory>(
2246 new BufferWithMemory(vkd, device, allocator, rayBufferCreateInfo, MemoryRequirement::HostVisible));
2247
2248 memcpy(rayBuffer->getAllocation().getHostPtr(), ¶ms.rays[0], params.rays.size() * sizeof(Ray));
2249 flushMappedMemoryRange(vkd, device, rayBuffer->getAllocation().getMemory(),
2250 rayBuffer->getAllocation().getOffset(), VK_WHOLE_SIZE);
2251 }
2252
2253 const VkQueue queue = universalQueue;
2254 const VkFormat imageFormat = VK_FORMAT_R32G32B32A32_SFLOAT;
2255 const VkImageCreateInfo imageCreateInfo = {
2256 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
2257 DE_NULL, // const void* pNext;
2258 (VkImageCreateFlags)0u, // VkImageCreateFlags flags;
2259 VK_IMAGE_TYPE_3D, // VkImageType imageType;
2260 imageFormat, // VkFormat format;
2261 makeExtent3D(renderSz[0], renderSz[1], 1), // VkExtent3D extent;
2262 1u, // uint32_t mipLevels;
2263 1u, // uint32_t arrayLayers;
2264 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
2265 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
2266 VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
2267 VK_IMAGE_USAGE_TRANSFER_DST_BIT, // VkImageUsageFlags usage;
2268 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
2269 0u, // uint32_t queueFamilyIndexCount;
2270 DE_NULL, // const uint32_t* pQueueFamilyIndices;
2271 VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout;
2272 };
2273 const VkImageSubresourceRange imageSubresourceRange =
2274 makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2275 const de::MovePtr<ImageWithMemory> image = de::MovePtr<ImageWithMemory>(
2276 new ImageWithMemory(vkd, device, allocator, imageCreateInfo, MemoryRequirement::Any));
2277 const Move<VkImageView> imageView =
2278 makeImageView(vkd, device, **image, VK_IMAGE_VIEW_TYPE_3D, imageFormat, imageSubresourceRange);
2279
2280 const VkBufferCreateInfo resultBufferCreateInfo =
2281 makeBufferCreateInfo(renderSz[0] * renderSz[1] * 1 * 4 * sizeof(float), VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2282 const VkImageSubresourceLayers resultBufferImageSubresourceLayers =
2283 makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
2284 const VkBufferImageCopy resultBufferImageRegion =
2285 makeBufferImageCopy(makeExtent3D(renderSz[0], renderSz[1], 1), resultBufferImageSubresourceLayers);
2286 de::MovePtr<BufferWithMemory> resultBuffer = de::MovePtr<BufferWithMemory>(
2287 new BufferWithMemory(vkd, device, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible));
2288
2289 const VkDescriptorImageInfo resultImageInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2290
2291 const Move<VkCommandPool> cmdPool = createCommandPool(vkd, device, 0, queueFamilyIndex);
2292 const Move<VkCommandBuffer> cmdBuffer =
2293 allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2294
2295 const VkDescriptorBufferInfo rayBufferDescriptorInfo =
2296 makeDescriptorBufferInfo((*rayBuffer).get(), 0, VK_WHOLE_SIZE);
2297
2298 beginCommandBuffer(vkd, *cmdBuffer, 0u);
2299 {
2300 const VkImageMemoryBarrier preImageBarrier =
2301 makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
2302 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, **image, imageSubresourceRange);
2303 cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
2304 VK_PIPELINE_STAGE_TRANSFER_BIT, &preImageBarrier);
2305
2306 const VkClearValue clearValue = makeClearValueColorU32(0xFF, 0u, 0u, 0u);
2307 vkd.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1,
2308 &imageSubresourceRange);
2309
2310 const VkImageMemoryBarrier postImageBarrier = makeImageMemoryBarrier(
2311 VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
2312 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, **image, imageSubresourceRange);
2313 cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, SHADER_STAGE_ALL_RAY_TRACING,
2314 &postImageBarrier);
2315
2316 // build acceleration structures for ray query
2317 initRayQueryAccelerationStructures(vkd, device, allocator, params, *cmdBuffer,
2318 rayQueryBottomAccelerationStructures, rayQueryTopAccelerationStructure);
2319
2320 const TopLevelAccelerationStructure *rayQueryTopLevelAccelerationStructurePtr =
2321 rayQueryTopAccelerationStructure.get();
2322 VkWriteDescriptorSetAccelerationStructureKHR rayQueryAccelerationStructureWriteDescriptorSet = {
2323 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, // VkStructureType sType;
2324 DE_NULL, // const void* pNext;
2325 1u, // uint32_t accelerationStructureCount;
2326 rayQueryTopLevelAccelerationStructurePtr
2327 ->getPtr(), // const VkAccelerationStructureKHR* pAccelerationStructures;
2328 };
2329
2330 DescriptorSetUpdateBuilder()
2331 .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
2332 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &resultImageInfo)
2333 .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u),
2334 VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR,
2335 &rayQueryAccelerationStructureWriteDescriptorSet)
2336 .writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(2u),
2337 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &rayBufferDescriptorInfo)
2338 .update(vkd, device);
2339
2340 const VkRenderPassBeginInfo renderPassBeginInfo = {
2341 VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, // VkStructureType sType;
2342 DE_NULL, // const void* pNext;
2343 renderPass.get(), // VkRenderPass renderPass;
2344 framebuffer.get(), // VkFramebuffer framebuffer;
2345 makeRect2D(renderSz[0], renderSz[1]), // VkRect2D renderArea;
2346 0u, // uint32_t clearValueCount;
2347 DE_NULL // const VkClearValue* pClearValues;
2348 };
2349 VkDeviceSize vertexBufferOffset = 0u;
2350
2351 vkd.cmdBeginRenderPass(*cmdBuffer, &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE);
2352 vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
2353 vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u,
2354 &descriptorSet.get(), 0u, DE_NULL);
2355 vkd.cmdBindVertexBuffers(*cmdBuffer, 0, 1, &vertexBuffer.get(), &vertexBufferOffset);
2356 vkd.cmdDraw(*cmdBuffer, uint32_t(vertices.size()), 1, 0, 0);
2357 vkd.cmdEndRenderPass(*cmdBuffer);
2358
2359 const VkMemoryBarrier postTestMemoryBarrier =
2360 makeMemoryBarrier(VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
2361 cmdPipelineMemoryBarrier(vkd, *cmdBuffer, SHADER_STAGE_ALL_RAY_TRACING, VK_PIPELINE_STAGE_TRANSFER_BIT,
2362 &postTestMemoryBarrier);
2363
2364 vkd.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **resultBuffer, 1u,
2365 &resultBufferImageRegion);
2366 }
2367 endCommandBuffer(vkd, *cmdBuffer);
2368
2369 submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
2370
2371 invalidateMappedMemoryRange(vkd, device, resultBuffer->getAllocation().getMemory(),
2372 resultBuffer->getAllocation().getOffset(), VK_WHOLE_SIZE);
2373
2374 rayQueryBottomAccelerationStructures.clear();
2375 rayQueryTopAccelerationStructure.clear();
2376
2377 std::vector<T> results;
2378 const uint32_t depth = 1;
2379
2380 // create result image
2381 tcu::TextureFormat imageFormatMapped = vk::mapVkFormat(imageFormat);
2382 tcu::ConstPixelBufferAccess resultAccess(imageFormatMapped, renderSz[0], renderSz[1], depth,
2383 resultBuffer->getAllocation().getHostPtr());
2384
2385 for (uint32_t z = 0; z < depth; z++)
2386 {
2387 for (uint32_t y = 0; y < renderSz[1]; y++)
2388 {
2389 for (uint32_t x = 0; x < renderSz[0]; x++)
2390 {
2391 tcu::Vec4 pixel = resultAccess.getPixel(x, y, z);
2392 T resData = {pixel[0], pixel[1], pixel[2], pixel[3]};
2393 results.push_back(resData);
2394 if (results.size() >= params.rays.size())
2395 {
2396 return (results);
2397 }
2398 }
2399 }
2400 }
2401
2402 return results;
2403 }
2404
2405 void generateRayQueryShaders(SourceCollections &programCollection, RayQueryTestParams params, std::string rayQueryPart,
2406 float max_t);
2407
2408 #else
2409
2410 uint32_t rayTracingDefineAnything();
2411
2412 #endif // CTS_USES_VULKANSC
2413
2414 } // namespace vk
2415
2416 #endif // _VKRAYTRACINGUTIL_HPP
2417