1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2018 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Vulkan Performance Query Tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktQueryPoolPerformanceTests.hpp"
25 #include "vktTestCase.hpp"
26 
27 #include "vktDrawImageObjectUtil.hpp"
28 #include "vktDrawBufferObjectUtil.hpp"
29 #include "vktDrawCreateInfoUtil.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPrograms.hpp"
33 #include "vkTypeUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkQueryUtil.hpp"
36 #include "vkObjUtil.hpp"
37 #include "vkBarrierUtil.hpp"
38 
39 #include "deMath.h"
40 #include "deRandom.hpp"
41 
42 #include "tcuTestLog.hpp"
43 #include "tcuResource.hpp"
44 #include "tcuImageCompare.hpp"
45 #include "vkImageUtil.hpp"
46 #include "tcuCommandLine.hpp"
47 #include "tcuRGBA.hpp"
48 
49 #include <algorithm>
50 #include <iterator>
51 
52 namespace vkt
53 {
54 namespace QueryPool
55 {
56 namespace
57 {
58 
59 using namespace vk;
60 using namespace Draw;
61 
uuidToHex(const uint8_t uuid[])62 std::string uuidToHex(const uint8_t uuid[])
63 {
64     const size_t bytesPerPart[] = {4, 2, 2, 2, 6};
65     const uint8_t *ptr          = &uuid[0];
66     const size_t stringSize     = VK_UUID_SIZE * 2 + DE_LENGTH_OF_ARRAY(bytesPerPart) - 1;
67     std::string result;
68 
69     result.reserve(stringSize);
70 
71     for (size_t partNdx = 0; partNdx < DE_LENGTH_OF_ARRAY(bytesPerPart); ++partNdx)
72     {
73         const size_t bytesInPart   = bytesPerPart[partNdx];
74         const size_t symbolsInPart = 2 * bytesInPart;
75         uint64_t part              = 0;
76         std::string partString;
77 
78         for (size_t byteInPartNdx = 0; byteInPartNdx < bytesInPart; ++byteInPartNdx)
79         {
80             part = (part << 8) | *ptr;
81             ++ptr;
82         }
83 
84         partString = tcu::toHex(part).toString();
85 
86         DE_ASSERT(partString.size() > symbolsInPart);
87 
88         result +=
89             (symbolsInPart >= partString.size()) ? partString : partString.substr(partString.size() - symbolsInPart);
90 
91         if (partNdx + 1 != DE_LENGTH_OF_ARRAY(bytesPerPart))
92             result += '-';
93     }
94 
95     DE_ASSERT(ptr == &uuid[VK_UUID_SIZE]);
96     DE_ASSERT(result.size() == stringSize);
97 
98     return result;
99 }
100 
101 // Helper class to acquire and release the profiling lock in an orderly manner.
102 // If an exception is thrown from a test (e.g. from VK_CHECK), the profiling lock is still released.
103 class ProfilingLockGuard
104 {
105 public:
ProfilingLockGuard(const DeviceInterface & vkd,const VkDevice device)106     ProfilingLockGuard(const DeviceInterface &vkd, const VkDevice device) : m_vkd(vkd), m_device(device)
107     {
108         const auto timeout                           = std::numeric_limits<uint64_t>::max(); // Must always succeed.
109         const VkAcquireProfilingLockInfoKHR lockInfo = {
110             VK_STRUCTURE_TYPE_ACQUIRE_PROFILING_LOCK_INFO_KHR,
111             NULL,
112             0,
113             timeout,
114         };
115 
116         VK_CHECK(m_vkd.acquireProfilingLockKHR(m_device, &lockInfo));
117     }
118 
~ProfilingLockGuard(void)119     ~ProfilingLockGuard(void)
120     {
121         m_vkd.releaseProfilingLockKHR(m_device);
122     }
123 
124 protected:
125     const DeviceInterface &m_vkd;
126     const VkDevice m_device;
127 };
128 
129 using PerformanceCounterVec = std::vector<VkPerformanceCounterKHR>;
130 
131 class EnumerateAndValidateTest : public TestInstance
132 {
133 public:
134     EnumerateAndValidateTest(vkt::Context &context, VkQueueFlagBits queueFlagBits);
135     tcu::TestStatus iterate(void);
136 
137 protected:
138     void basicValidateCounter(const uint32_t familyIndex);
139 
140 private:
141     VkQueueFlagBits m_queueFlagBits;
142     bool m_requiredExtensionsPresent;
143 };
144 
EnumerateAndValidateTest(vkt::Context & context,VkQueueFlagBits queueFlagBits)145 EnumerateAndValidateTest::EnumerateAndValidateTest(vkt::Context &context, VkQueueFlagBits queueFlagBits)
146     : TestInstance(context)
147     , m_queueFlagBits(queueFlagBits)
148     , m_requiredExtensionsPresent(context.requireDeviceFunctionality("VK_KHR_performance_query"))
149 {
150 }
151 
iterate(void)152 tcu::TestStatus EnumerateAndValidateTest::iterate(void)
153 {
154     const InstanceInterface &vki          = m_context.getInstanceInterface();
155     const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
156     const std::vector<VkQueueFamilyProperties> queueProperties =
157         getPhysicalDeviceQueueFamilyProperties(vki, physicalDevice);
158 
159     for (uint32_t queueNdx = 0; queueNdx < queueProperties.size(); queueNdx++)
160     {
161         if ((queueProperties[queueNdx].queueFlags & m_queueFlagBits) == 0)
162             continue;
163 
164         uint32_t counterCount = 0;
165         VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueNdx,
166                                                                                    &counterCount, DE_NULL, DE_NULL));
167 
168         if (counterCount == 0)
169             continue;
170 
171         {
172             const VkPerformanceCounterKHR defaultCounterVal = initVulkanStructure();
173             PerformanceCounterVec counters(counterCount, defaultCounterVal);
174             uint32_t counterCountRead = counterCount;
175             std::map<std::string, size_t> uuidValidator;
176 
177             if (counterCount > 1)
178             {
179                 uint32_t incompleteCounterCount = counterCount - 1;
180                 VkResult result;
181 
182                 result = vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
183                     physicalDevice, queueNdx, &incompleteCounterCount, &counters[0], DE_NULL);
184                 if (result != VK_INCOMPLETE)
185                     TCU_FAIL("VK_INCOMPLETE not returned");
186             }
187 
188             VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
189                 physicalDevice, queueNdx, &counterCountRead, &counters[0], DE_NULL));
190 
191             if (counterCountRead != counterCount)
192                 TCU_FAIL("Number of counters read (" + de::toString(counterCountRead) +
193                          ") is not equal to number of counters reported (" + de::toString(counterCount) + ")");
194 
195             for (size_t counterNdx = 0; counterNdx < counters.size(); ++counterNdx)
196             {
197                 const VkPerformanceCounterKHR &counter = counters[counterNdx];
198                 const std::string uuidStr              = uuidToHex(counter.uuid);
199 
200                 if (uuidValidator.find(uuidStr) != uuidValidator.end())
201                     TCU_FAIL("Duplicate counter UUID detected " + uuidStr);
202                 else
203                     uuidValidator[uuidStr] = counterNdx;
204 
205                 if (counter.scope >= VK_PERFORMANCE_COUNTER_SCOPE_KHR_LAST)
206                     TCU_FAIL("Counter scope is invalid " + de::toString(static_cast<size_t>(counter.scope)));
207 
208                 if (counter.storage >= VK_PERFORMANCE_COUNTER_STORAGE_KHR_LAST)
209                     TCU_FAIL("Counter storage is invalid " + de::toString(static_cast<size_t>(counter.storage)));
210 
211                 if (counter.unit >= VK_PERFORMANCE_COUNTER_UNIT_KHR_LAST)
212                     TCU_FAIL("Counter unit is invalid " + de::toString(static_cast<size_t>(counter.unit)));
213             }
214         }
215         {
216             const VkPerformanceCounterDescriptionKHR defaultDescription = initVulkanStructure();
217             std::vector<VkPerformanceCounterDescriptionKHR> counterDescriptors(counterCount, defaultDescription);
218             uint32_t counterCountRead = counterCount;
219 
220             VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
221                 physicalDevice, queueNdx, &counterCountRead, DE_NULL, &counterDescriptors[0]));
222 
223             if (counterCountRead != counterCount)
224                 TCU_FAIL("Number of counters read (" + de::toString(counterCountRead) +
225                          ") is not equal to number of counters reported (" + de::toString(counterCount) + ")");
226 
227             for (size_t counterNdx = 0; counterNdx < counterDescriptors.size(); ++counterNdx)
228             {
229                 const VkPerformanceCounterDescriptionKHR &counterDescriptor = counterDescriptors[counterNdx];
230                 const VkPerformanceCounterDescriptionFlagsKHR allowedFlags =
231                     VK_PERFORMANCE_COUNTER_DESCRIPTION_PERFORMANCE_IMPACTING_KHR |
232                     VK_PERFORMANCE_COUNTER_DESCRIPTION_CONCURRENTLY_IMPACTED_KHR;
233 
234                 if ((counterDescriptor.flags & ~allowedFlags) != 0)
235                     TCU_FAIL("Invalid flags present in VkPerformanceCounterDescriptionFlagsKHR");
236             }
237         }
238     }
239 
240     return tcu::TestStatus::pass("Pass");
241 }
242 
243 using ResultsVec          = std::vector<VkPerformanceCounterResultKHR>;
244 using BufferWithMemoryPtr = std::unique_ptr<BufferWithMemory>;
245 
246 class QueryTestBase : public TestInstance
247 {
248 public:
249     QueryTestBase(vkt::Context &context, bool copyResults, uint32_t seed);
250 
251 protected:
252     void setupCounters(void);
253     Move<VkQueryPool> createQueryPool(uint32_t enabledCounterOffset, uint32_t enabledCounterStride);
254     ResultsVec createResultsVector(const VkQueryPool pool) const;
255     BufferWithMemoryPtr createResultsBuffer(const ResultsVec &resultsVector) const;
256     void verifyQueryResults(uint32_t qfIndex, VkQueue queue, VkQueryPool queryPool) const;
257     uint32_t getRequiredPassCount(void) const;
258 
259 private:
260     const bool m_copyResults;
261     const uint32_t m_seed;
262     bool m_requiredExtensionsPresent;
263     uint32_t m_requiredNumerOfPasses;
264     std::map<uint64_t, uint32_t> m_enabledCountersCountMap; // number of counters that were enabled per query pool
265     PerformanceCounterVec m_counters;                       // counters provided by the device
266 };
267 
QueryTestBase(vkt::Context & context,bool copyResults,uint32_t seed)268 QueryTestBase::QueryTestBase(vkt::Context &context, bool copyResults, uint32_t seed)
269     : TestInstance(context)
270     , m_copyResults(copyResults)
271     , m_seed(seed)
272     , m_requiredExtensionsPresent(context.requireDeviceFunctionality("VK_KHR_performance_query"))
273     , m_requiredNumerOfPasses(0)
274 {
275 }
276 
setupCounters()277 void QueryTestBase::setupCounters()
278 {
279     const InstanceInterface &vki          = m_context.getInstanceInterface();
280     const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
281     const auto queueFamilyIndex           = m_context.getUniversalQueueFamilyIndex();
282     const CmdPoolCreateInfo cmdPoolCreateInfo(queueFamilyIndex);
283     uint32_t counterCount;
284 
285     // Get the number of supported counters.
286     VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueFamilyIndex,
287                                                                                &counterCount, nullptr, nullptr));
288 
289     // Get supported counters.
290     const VkPerformanceCounterKHR defaultCounterVal = initVulkanStructure();
291     m_counters.resize(counterCount, defaultCounterVal);
292     VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
293         physicalDevice, queueFamilyIndex, &counterCount, de::dataOrNull(m_counters), nullptr));
294 
295     // Filter out all counters with scope
296     // VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_BUFFER_KHR. For these counters, the
297     // begin and end command must be at the beginning/end of the command buffer,
298     // which does not match what these tests do.
299     const auto scopeIsNotCmdBuffer = [](const VkPerformanceCounterKHR &c)
300     { return (c.scope != VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_BUFFER_KHR); };
301     PerformanceCounterVec filteredCounters;
302 
303     filteredCounters.reserve(m_counters.size());
304     std::copy_if(begin(m_counters), end(m_counters), std::back_inserter(filteredCounters), scopeIsNotCmdBuffer);
305     m_counters.swap(filteredCounters);
306 
307     if (m_counters.empty())
308         TCU_THROW(NotSupportedError, "No counters without command buffer scope found");
309 }
310 
createQueryPool(uint32_t enabledCounterOffset,uint32_t enabledCounterStride)311 Move<VkQueryPool> QueryTestBase::createQueryPool(uint32_t enabledCounterOffset, uint32_t enabledCounterStride)
312 {
313     const InstanceInterface &vki              = m_context.getInstanceInterface();
314     const DeviceInterface &vkd                = m_context.getDeviceInterface();
315     const VkPhysicalDevice physicalDevice     = m_context.getPhysicalDevice();
316     const VkDevice device                     = m_context.getDevice();
317     const CmdPoolCreateInfo cmdPoolCreateInfo = m_context.getUniversalQueueFamilyIndex();
318     const uint32_t counterCount               = (uint32_t)m_counters.size();
319     uint32_t enabledIndex                     = enabledCounterOffset ? 0 : enabledCounterStride;
320     std::vector<uint32_t> enabledCounters;
321 
322     // enable every <enabledCounterStride> counter that has command or render pass scope
323     for (uint32_t i = 0; i < counterCount; i++)
324     {
325         // handle offset
326         if (enabledCounterOffset)
327         {
328             if (enabledCounterOffset == enabledIndex)
329             {
330                 // disable handling offset
331                 enabledCounterOffset = 0;
332 
333                 // eneble next index in stride condition
334                 enabledIndex = enabledCounterStride;
335             }
336             else
337             {
338                 ++enabledIndex;
339                 continue;
340             }
341         }
342 
343         // handle stride
344         if (enabledIndex == enabledCounterStride)
345         {
346             enabledCounters.push_back(i);
347             enabledIndex = 0;
348         }
349         else
350             ++enabledIndex;
351     }
352 
353     // Get number of counters that were enabled for this query pool.
354     if (enabledCounters.empty())
355         TCU_THROW(NotSupportedError, "No suitable performance counters found for this test");
356 
357     const auto enabledCountersCount = de::sizeU32(enabledCounters);
358 
359     // define performance query
360     const VkQueryPoolPerformanceCreateInfoKHR performanceQueryCreateInfo = {
361         VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR, NULL,
362         cmdPoolCreateInfo.queueFamilyIndex, // queue family that this performance query is performed on
363         enabledCountersCount,               // number of counters to enable
364         &enabledCounters[0]                 // array of indices of counters to enable
365     };
366 
367     // get the number of passes counters will require
368     vki.getPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(physicalDevice, &performanceQueryCreateInfo,
369                                                               &m_requiredNumerOfPasses);
370 
371     // create query pool
372     const VkQueryPoolCreateInfo queryPoolCreateInfo = {VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
373                                                        &performanceQueryCreateInfo,
374                                                        0,                                   // flags
375                                                        VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR, // new query type
376                                                        1,                                   // queryCount
377                                                        0};
378 
379     Move<VkQueryPool> queryPool = vk::createQueryPool(vkd, device, &queryPoolCreateInfo);
380 
381     // memorize number of enabled counters for this query pool
382     m_enabledCountersCountMap[queryPool.get().getInternal()] = enabledCountersCount;
383 
384     return queryPool;
385 }
386 
createResultsVector(const VkQueryPool pool) const387 ResultsVec QueryTestBase::createResultsVector(const VkQueryPool pool) const
388 {
389     const auto itemCount = m_enabledCountersCountMap.at(pool.getInternal());
390     ResultsVec resultsVector(itemCount);
391     const auto byteSize = de::dataSize(resultsVector);
392     const auto contents = reinterpret_cast<uint8_t *>(resultsVector.data());
393     de::Random rnd(m_seed);
394 
395     // Fill vector with random bytes.
396     for (size_t i = 0u; i < byteSize; ++i)
397     {
398         const auto byte = rnd.getInt(1, 255); // Do not use zeros.
399         contents[i]     = static_cast<uint8_t>(byte);
400     }
401 
402     return resultsVector;
403 }
404 
createResultsBuffer(const ResultsVec & resultsVector) const405 BufferWithMemoryPtr QueryTestBase::createResultsBuffer(const ResultsVec &resultsVector) const
406 {
407     const auto &vkd       = m_context.getDeviceInterface();
408     const auto device     = m_context.getDevice();
409     auto &alloc           = m_context.getDefaultAllocator();
410     const auto bufferSize = static_cast<VkDeviceSize>(de::dataSize(resultsVector));
411     const auto createInfo = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
412 
413     BufferWithMemoryPtr resultBuffer(
414         new BufferWithMemory(vkd, device, alloc, createInfo, MemoryRequirement::HostVisible));
415     auto &bufferAlloc = resultBuffer->getAllocation();
416     void *bufferData  = bufferAlloc.getHostPtr();
417 
418     deMemcpy(bufferData, resultsVector.data(), de::dataSize(resultsVector));
419     flushAlloc(vkd, device, bufferAlloc);
420 
421     return resultBuffer;
422 }
423 
verifyQueryResults(uint32_t qfIndex,VkQueue queue,VkQueryPool queryPool) const424 void QueryTestBase::verifyQueryResults(uint32_t qfIndex, VkQueue queue, VkQueryPool queryPool) const
425 {
426     const DeviceInterface &vkd = m_context.getDeviceInterface();
427     const VkDevice device      = m_context.getDevice();
428 
429     const auto initialVector = createResultsVector(queryPool);
430     const auto resultsBuffer = createResultsBuffer(initialVector);
431     auto &resultsBufferAlloc = resultsBuffer->getAllocation();
432     void *resultsBufferData  = resultsBufferAlloc.getHostPtr();
433 
434     const auto resultsStride =
435         static_cast<VkDeviceSize>(sizeof(decltype(initialVector)::value_type) * initialVector.size());
436     const auto hostBufferSize = de::dataSize(initialVector);
437     const auto resultFlags    = static_cast<VkQueryResultFlags>(VK_QUERY_RESULT_WAIT_BIT);
438 
439     // Get or copy query pool results.
440     if (m_copyResults)
441     {
442         const auto cmdPool   = createCommandPool(vkd, device, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, qfIndex);
443         const auto cmdBuffer = allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
444         const auto barrier   = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
445 
446         beginCommandBuffer(vkd, *cmdBuffer);
447         vkd.cmdCopyQueryPoolResults(*cmdBuffer, queryPool, 0u, 1u, resultsBuffer->get(), 0ull, resultsStride,
448                                     resultFlags);
449         cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &barrier);
450         endCommandBuffer(vkd, *cmdBuffer);
451         submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
452         invalidateAlloc(vkd, device, resultsBufferAlloc);
453     }
454     else
455     {
456         VK_CHECK(vkd.getQueryPoolResults(device, queryPool, 0u, 1u, hostBufferSize, resultsBufferData, resultsStride,
457                                          resultFlags));
458     }
459 
460     // Check that the buffer was modified without analyzing result semantics.
461     ResultsVec resultsVector(initialVector.size());
462     deMemcpy(de::dataOrNull(resultsVector), resultsBufferData, hostBufferSize);
463 
464     for (size_t i = 0u; i < initialVector.size(); ++i)
465     {
466         if (deMemCmp(&initialVector[i], &resultsVector[i], sizeof(resultsVector[i])) == 0)
467         {
468             std::ostringstream msg;
469             msg << "Result " << i << " was not modified by the implementation";
470             TCU_FAIL(msg.str());
471         }
472     }
473 }
474 
getRequiredPassCount() const475 uint32_t QueryTestBase::getRequiredPassCount() const
476 {
477     return m_requiredNumerOfPasses;
478 }
479 
480 // Base class for all graphic tests
481 class GraphicQueryTestBase : public QueryTestBase
482 {
483 public:
484     GraphicQueryTestBase(vkt::Context &context, bool copyResults, uint32_t seed);
485 
486 protected:
487     void initStateObjects(void);
488 
489 protected:
490     Move<VkPipeline> m_pipeline;
491     Move<VkPipelineLayout> m_pipelineLayout;
492 
493     de::SharedPtr<Image> m_colorAttachmentImage;
494     Move<VkImageView> m_attachmentView;
495 
496     Move<VkRenderPass> m_renderPass;
497     Move<VkFramebuffer> m_framebuffer;
498 
499     de::SharedPtr<Buffer> m_vertexBuffer;
500 
501     VkFormat m_colorAttachmentFormat;
502     uint32_t m_size;
503 };
504 
GraphicQueryTestBase(vkt::Context & context,bool copyResults,uint32_t seed)505 GraphicQueryTestBase::GraphicQueryTestBase(vkt::Context &context, bool copyResults, uint32_t seed)
506     : QueryTestBase(context, copyResults, seed)
507     , m_colorAttachmentFormat(VK_FORMAT_R8G8B8A8_UNORM)
508     , m_size(32)
509 {
510 }
511 
initStateObjects(void)512 void GraphicQueryTestBase::initStateObjects(void)
513 {
514     const VkDevice device      = m_context.getDevice();
515     const DeviceInterface &vkd = m_context.getDeviceInterface();
516 
517     //attachment images and views
518     {
519         VkExtent3D imageExtent = {
520             m_size, // width
521             m_size, // height
522             1       // depth
523         };
524 
525         const ImageCreateInfo colorImageCreateInfo(
526             VK_IMAGE_TYPE_2D, m_colorAttachmentFormat, imageExtent, 1, 1, VK_SAMPLE_COUNT_1_BIT,
527             VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
528 
529         m_colorAttachmentImage =
530             Image::createAndAlloc(vkd, device, colorImageCreateInfo, m_context.getDefaultAllocator(),
531                                   m_context.getUniversalQueueFamilyIndex());
532 
533         const ImageViewCreateInfo attachmentViewInfo(m_colorAttachmentImage->object(), VK_IMAGE_VIEW_TYPE_2D,
534                                                      m_colorAttachmentFormat);
535         m_attachmentView = createImageView(vkd, device, &attachmentViewInfo);
536     }
537 
538     // renderpass and framebuffer
539     {
540         RenderPassCreateInfo renderPassCreateInfo;
541         renderPassCreateInfo.addAttachment(AttachmentDescription(m_colorAttachmentFormat,          // format
542                                                                  VK_SAMPLE_COUNT_1_BIT,            // samples
543                                                                  VK_ATTACHMENT_LOAD_OP_CLEAR,      // loadOp
544                                                                  VK_ATTACHMENT_STORE_OP_DONT_CARE, // storeOp
545                                                                  VK_ATTACHMENT_LOAD_OP_DONT_CARE,  // stencilLoadOp
546                                                                  VK_ATTACHMENT_STORE_OP_DONT_CARE, // stencilLoadOp
547                                                                  VK_IMAGE_LAYOUT_GENERAL,          // initialLauout
548                                                                  VK_IMAGE_LAYOUT_GENERAL));        // finalLayout
549 
550         const VkAttachmentReference colorAttachmentReference = {
551             0,                      // attachment
552             VK_IMAGE_LAYOUT_GENERAL // layout
553         };
554 
555         renderPassCreateInfo.addSubpass(SubpassDescription(VK_PIPELINE_BIND_POINT_GRAPHICS, // pipelineBindPoint
556                                                            0,                               // flags
557                                                            0,                               // inputCount
558                                                            DE_NULL,                         // pInputAttachments
559                                                            1,                               // colorCount
560                                                            &colorAttachmentReference,       // pColorAttachments
561                                                            DE_NULL,                         // pResolveAttachments
562                                                            AttachmentReference(),           // depthStencilAttachment
563                                                            0,                               // preserveCount
564                                                            DE_NULL));                       // preserveAttachments
565 
566         m_renderPass = createRenderPass(vkd, device, &renderPassCreateInfo);
567 
568         std::vector<VkImageView> attachments(1);
569         attachments[0] = *m_attachmentView;
570 
571         FramebufferCreateInfo framebufferCreateInfo(*m_renderPass, attachments, m_size, m_size, 1);
572         m_framebuffer = createFramebuffer(vkd, device, &framebufferCreateInfo);
573     }
574 
575     // pipeline
576     {
577         Unique<VkShaderModule> vs(createShaderModule(vkd, device, m_context.getBinaryCollection().get("vert"), 0));
578         Unique<VkShaderModule> fs(createShaderModule(vkd, device, m_context.getBinaryCollection().get("frag"), 0));
579 
580         const PipelineCreateInfo::ColorBlendState::Attachment attachmentState;
581 
582         const PipelineLayoutCreateInfo pipelineLayoutCreateInfo;
583         m_pipelineLayout = createPipelineLayout(vkd, device, &pipelineLayoutCreateInfo);
584 
585         const VkVertexInputBindingDescription vf_binding_desc = {
586             0,                           // binding
587             4 * (uint32_t)sizeof(float), // stride
588             VK_VERTEX_INPUT_RATE_VERTEX  // inputRate
589         };
590 
591         const VkVertexInputAttributeDescription vf_attribute_desc = {
592             0,                             // location
593             0,                             // binding
594             VK_FORMAT_R32G32B32A32_SFLOAT, // format
595             0                              // offset
596         };
597 
598         const VkPipelineVertexInputStateCreateInfo vf_info = {
599             VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // sType
600             NULL,                                                      // pNext
601             0u,                                                        // flags
602             1,                                                         // vertexBindingDescriptionCount
603             &vf_binding_desc,                                          // pVertexBindingDescriptions
604             1,                                                         // vertexAttributeDescriptionCount
605             &vf_attribute_desc                                         // pVertexAttributeDescriptions
606         };
607 
608         PipelineCreateInfo pipelineCreateInfo(*m_pipelineLayout, *m_renderPass, 0, 0);
609         pipelineCreateInfo.addShader(PipelineCreateInfo::PipelineShaderStage(*vs, "main", VK_SHADER_STAGE_VERTEX_BIT));
610         pipelineCreateInfo.addShader(
611             PipelineCreateInfo::PipelineShaderStage(*fs, "main", VK_SHADER_STAGE_FRAGMENT_BIT));
612         pipelineCreateInfo.addState(PipelineCreateInfo::InputAssemblerState(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST));
613         pipelineCreateInfo.addState(PipelineCreateInfo::ColorBlendState(1, &attachmentState));
614         const VkViewport viewport = makeViewport(m_size, m_size);
615         const VkRect2D scissor    = makeRect2D(m_size, m_size);
616         pipelineCreateInfo.addState(PipelineCreateInfo::ViewportState(1, std::vector<VkViewport>(1, viewport),
617                                                                       std::vector<VkRect2D>(1, scissor)));
618         pipelineCreateInfo.addState(
619             PipelineCreateInfo::DepthStencilState(false, false, VK_COMPARE_OP_GREATER_OR_EQUAL));
620         pipelineCreateInfo.addState(PipelineCreateInfo::RasterizerState());
621         pipelineCreateInfo.addState(PipelineCreateInfo::MultiSampleState());
622         pipelineCreateInfo.addState(vf_info);
623         m_pipeline = createGraphicsPipeline(vkd, device, DE_NULL, &pipelineCreateInfo);
624     }
625 
626     // vertex buffer
627     {
628         std::vector<tcu::Vec4> vertices(3);
629         vertices[0] = tcu::Vec4(0.5, 0.5, 0.0, 1.0);
630         vertices[1] = tcu::Vec4(0.5, 0.0, 0.0, 1.0);
631         vertices[2] = tcu::Vec4(0.0, 0.5, 0.0, 1.0);
632 
633         const size_t kBufferSize = vertices.size() * sizeof(tcu::Vec4);
634         m_vertexBuffer =
635             Buffer::createAndAlloc(vkd, device, BufferCreateInfo(kBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT),
636                                    m_context.getDefaultAllocator(), MemoryRequirement::HostVisible);
637 
638         tcu::Vec4 *ptr = reinterpret_cast<tcu::Vec4 *>(m_vertexBuffer->getBoundMemory().getHostPtr());
639         deMemcpy(ptr, &vertices[0], kBufferSize);
640 
641         flushAlloc(vkd, device, m_vertexBuffer->getBoundMemory());
642     }
643 }
644 
645 class GraphicQueryTest : public GraphicQueryTestBase
646 {
647 public:
648     GraphicQueryTest(vkt::Context &context, bool copyResults, uint32_t seed);
649     tcu::TestStatus iterate(void);
650 };
651 
GraphicQueryTest(vkt::Context & context,bool copyResults,uint32_t seed)652 GraphicQueryTest::GraphicQueryTest(vkt::Context &context, bool copyResults, uint32_t seed)
653     : GraphicQueryTestBase(context, copyResults, seed)
654 {
655 }
656 
iterate(void)657 tcu::TestStatus GraphicQueryTest::iterate(void)
658 {
659     const DeviceInterface &vkd                = m_context.getDeviceInterface();
660     const VkDevice device                     = m_context.getDevice();
661     const VkQueue queue                       = m_context.getUniversalQueue();
662     const auto qfIndex                        = m_context.getUniversalQueueFamilyIndex();
663     const CmdPoolCreateInfo cmdPoolCreateInfo = qfIndex;
664     Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, &cmdPoolCreateInfo));
665     Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
666 
667     initStateObjects();
668     setupCounters();
669 
670     vk::Unique<VkQueryPool> queryPool(createQueryPool(0, 1));
671 
672     {
673         const ProfilingLockGuard guard(vkd, device);
674 
675         // reset query pool
676         {
677             Unique<VkCommandBuffer> resetCmdBuffer(
678                 allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
679             const Unique<VkFence> fence(createFence(vkd, device));
680             const VkSubmitInfo submitInfo = {
681                 VK_STRUCTURE_TYPE_SUBMIT_INFO,         // sType
682                 DE_NULL,                               // pNext
683                 0u,                                    // waitSemaphoreCount
684                 DE_NULL,                               // pWaitSemaphores
685                 (const VkPipelineStageFlags *)DE_NULL, // pWaitDstStageMask
686                 1u,                                    // commandBufferCount
687                 &resetCmdBuffer.get(),                 // pCommandBuffers
688                 0u,                                    // signalSemaphoreCount
689                 DE_NULL,                               // pSignalSemaphores
690             };
691 
692             beginCommandBuffer(vkd, *resetCmdBuffer);
693             vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool, 0u, 1u);
694             endCommandBuffer(vkd, *resetCmdBuffer);
695 
696             VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
697             VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), true, ~0ull));
698         }
699 
700         // begin command buffer
701         beginCommandBuffer(vkd, *cmdBuffer, 0u);
702 
703         initialTransitionColor2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_LAYOUT_GENERAL,
704                                       VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
705                                       VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
706 
707         // begin render pass
708         VkClearValue renderPassClearValue;
709         deMemset(&renderPassClearValue, 0, sizeof(VkClearValue));
710 
711         // perform query during triangle draw
712         vkd.cmdBeginQuery(*cmdBuffer, *queryPool, 0, 0u);
713 
714         beginRenderPass(vkd, *cmdBuffer, *m_renderPass, *m_framebuffer, makeRect2D(0, 0, m_size, m_size), 1,
715                         &renderPassClearValue);
716 
717         // bind pipeline
718         vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);
719 
720         // bind vertex buffer
721         VkBuffer vertexBuffer                 = m_vertexBuffer->object();
722         const VkDeviceSize vertexBufferOffset = 0;
723         vkd.cmdBindVertexBuffers(*cmdBuffer, 0, 1, &vertexBuffer, &vertexBufferOffset);
724 
725         vkd.cmdDraw(*cmdBuffer, 3, 1, 0, 0);
726 
727         endRenderPass(vkd, *cmdBuffer);
728 
729         vkd.cmdEndQuery(*cmdBuffer, *queryPool, 0);
730 
731         transition2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_ASPECT_COLOR_BIT,
732                           VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
733                           VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
734                           VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
735 
736         endCommandBuffer(vkd, *cmdBuffer);
737 
738         // submit command buffer for each pass and wait for its completion
739         const auto requiredPassCount = getRequiredPassCount();
740         for (uint32_t passIndex = 0; passIndex < requiredPassCount; passIndex++)
741         {
742             const Unique<VkFence> fence(createFence(vkd, device));
743 
744             VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo = {
745                 VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR, NULL, passIndex};
746 
747             const VkSubmitInfo submitInfo = {
748                 VK_STRUCTURE_TYPE_SUBMIT_INFO,         // sType
749                 &performanceQuerySubmitInfo,           // pNext
750                 0u,                                    // waitSemaphoreCount
751                 DE_NULL,                               // pWaitSemaphores
752                 (const VkPipelineStageFlags *)DE_NULL, // pWaitDstStageMask
753                 1u,                                    // commandBufferCount
754                 &cmdBuffer.get(),                      // pCommandBuffers
755                 0u,                                    // signalSemaphoreCount
756                 DE_NULL,                               // pSignalSemaphores
757             };
758 
759             VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
760             VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), true, ~0ull));
761         }
762     }
763 
764     VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
765 
766     verifyQueryResults(qfIndex, queue, *queryPool);
767     return tcu::TestStatus::pass("Pass");
768 }
769 
770 class GraphicMultiplePoolsTest : public GraphicQueryTestBase
771 {
772 public:
773     GraphicMultiplePoolsTest(vkt::Context &context, bool copyResults, uint32_t seed);
774     tcu::TestStatus iterate(void);
775 };
776 
GraphicMultiplePoolsTest(vkt::Context & context,bool copyResults,uint32_t seed)777 GraphicMultiplePoolsTest::GraphicMultiplePoolsTest(vkt::Context &context, bool copyResults, uint32_t seed)
778     : GraphicQueryTestBase(context, copyResults, seed)
779 {
780 }
781 
iterate(void)782 tcu::TestStatus GraphicMultiplePoolsTest::iterate(void)
783 {
784     const DeviceInterface &vkd                = m_context.getDeviceInterface();
785     const VkDevice device                     = m_context.getDevice();
786     const VkQueue queue                       = m_context.getUniversalQueue();
787     const auto qfIndex                        = m_context.getUniversalQueueFamilyIndex();
788     const CmdPoolCreateInfo cmdPoolCreateInfo = qfIndex;
789     Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, &cmdPoolCreateInfo));
790     Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
791 
792     initStateObjects();
793     setupCounters();
794 
795     vk::Unique<VkQueryPool> queryPool1(createQueryPool(0, 2)), queryPool2(createQueryPool(1, 2));
796 
797     {
798         const ProfilingLockGuard guard(vkd, device);
799 
800         // reset query pools
801         {
802             Unique<VkCommandBuffer> resetCmdBuffer(
803                 allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
804             const Unique<VkFence> fence(createFence(vkd, device));
805             const VkSubmitInfo submitInfo = {
806                 VK_STRUCTURE_TYPE_SUBMIT_INFO,         // sType
807                 DE_NULL,                               // pNext
808                 0u,                                    // waitSemaphoreCount
809                 DE_NULL,                               // pWaitSemaphores
810                 (const VkPipelineStageFlags *)DE_NULL, // pWaitDstStageMask
811                 1u,                                    // commandBufferCount
812                 &resetCmdBuffer.get(),                 // pCommandBuffers
813                 0u,                                    // signalSemaphoreCount
814                 DE_NULL,                               // pSignalSemaphores
815             };
816 
817             beginCommandBuffer(vkd, *resetCmdBuffer);
818             vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool1, 0u, 1u);
819             vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool2, 0u, 1u);
820             endCommandBuffer(vkd, *resetCmdBuffer);
821 
822             VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
823             VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), true, ~0ull));
824         }
825 
826         // begin command buffer
827         beginCommandBuffer(vkd, *cmdBuffer, 0u);
828 
829         initialTransitionColor2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_LAYOUT_GENERAL,
830                                       VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
831                                       VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
832 
833         // begin render pass
834         VkClearValue renderPassClearValue;
835         deMemset(&renderPassClearValue, 0, sizeof(VkClearValue));
836 
837         VkBuffer vertexBuffer                 = m_vertexBuffer->object();
838         const VkDeviceSize vertexBufferOffset = 0;
839         const VkQueryPool queryPools[]        = {*queryPool1, *queryPool2};
840 
841         // perform two queries during triangle draw
842         for (uint32_t loop = 0; loop < DE_LENGTH_OF_ARRAY(queryPools); ++loop)
843         {
844             const VkQueryPool queryPool = queryPools[loop];
845             vkd.cmdBeginQuery(*cmdBuffer, queryPool, 0u, (VkQueryControlFlags)0u);
846             beginRenderPass(vkd, *cmdBuffer, *m_renderPass, *m_framebuffer, makeRect2D(0, 0, m_size, m_size), 1,
847                             &renderPassClearValue);
848 
849             vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);
850             vkd.cmdBindVertexBuffers(*cmdBuffer, 0, 1, &vertexBuffer, &vertexBufferOffset);
851             vkd.cmdDraw(*cmdBuffer, 3, 1, 0, 0);
852 
853             endRenderPass(vkd, *cmdBuffer);
854             vkd.cmdEndQuery(*cmdBuffer, queryPool, 0u);
855         }
856 
857         transition2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_ASPECT_COLOR_BIT,
858                           VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
859                           VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
860                           VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
861 
862         endCommandBuffer(vkd, *cmdBuffer);
863 
864         // submit command buffer for each pass and wait for its completion
865         const auto requiredPassCount = getRequiredPassCount();
866         for (uint32_t passIndex = 0; passIndex < requiredPassCount; passIndex++)
867         {
868             const Unique<VkFence> fence(createFence(vkd, device));
869 
870             VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo = {
871                 VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR, NULL, passIndex};
872 
873             const VkSubmitInfo submitInfo = {
874                 VK_STRUCTURE_TYPE_SUBMIT_INFO,         // sType
875                 &performanceQuerySubmitInfo,           // pNext
876                 0u,                                    // waitSemaphoreCount
877                 DE_NULL,                               // pWaitSemaphores
878                 (const VkPipelineStageFlags *)DE_NULL, // pWaitDstStageMask
879                 1u,                                    // commandBufferCount
880                 &cmdBuffer.get(),                      // pCommandBuffers
881                 0u,                                    // signalSemaphoreCount
882                 DE_NULL,                               // pSignalSemaphores
883             };
884 
885             VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
886             VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), true, ~0ull));
887         }
888     }
889 
890     VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
891 
892     verifyQueryResults(qfIndex, queue, *queryPool1);
893     verifyQueryResults(qfIndex, queue, *queryPool2);
894     return tcu::TestStatus::pass("Pass");
895 }
896 
897 // Base class for all compute tests
898 class ComputeQueryTestBase : public QueryTestBase
899 {
900 public:
901     ComputeQueryTestBase(vkt::Context &context, bool copyResults, uint32_t seed);
902 
903 protected:
904     void initStateObjects(void);
905 
906 protected:
907     Move<VkPipeline> m_pipeline;
908     Move<VkPipelineLayout> m_pipelineLayout;
909     de::SharedPtr<Buffer> m_buffer;
910     Move<VkDescriptorPool> m_descriptorPool;
911     Move<VkDescriptorSet> m_descriptorSet;
912     VkDescriptorBufferInfo m_descriptorBufferInfo;
913     VkBufferMemoryBarrier m_computeFinishBarrier;
914 };
915 
ComputeQueryTestBase(vkt::Context & context,bool copyResults,uint32_t seed)916 ComputeQueryTestBase::ComputeQueryTestBase(vkt::Context &context, bool copyResults, uint32_t seed)
917     : QueryTestBase(context, copyResults, seed)
918 {
919 }
920 
initStateObjects(void)921 void ComputeQueryTestBase::initStateObjects(void)
922 {
923     const DeviceInterface &vkd    = m_context.getDeviceInterface();
924     const VkDevice device         = m_context.getDevice();
925     const VkDeviceSize bufferSize = 32 * sizeof(uint32_t);
926     const CmdPoolCreateInfo cmdPoolCreateInfo(m_context.getUniversalQueueFamilyIndex());
927     const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, &cmdPoolCreateInfo));
928     const Unique<VkCommandBuffer> cmdBuffer(
929         allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
930 
931     const Unique<VkDescriptorSetLayout> descriptorSetLayout(
932         DescriptorSetLayoutBuilder()
933             .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
934             .build(vkd, device));
935 
936     // create pipeline layout
937     {
938         const VkPipelineLayoutCreateInfo pipelineLayoutParams = {
939             VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // sType
940             DE_NULL,                                       // pNext
941             0u,                                            // flags
942             1u,                                            // setLayoutCount
943             &(*descriptorSetLayout),                       // pSetLayouts
944             0u,                                            // pushConstantRangeCount
945             DE_NULL,                                       // pPushConstantRanges
946         };
947         m_pipelineLayout = createPipelineLayout(vkd, device, &pipelineLayoutParams);
948     }
949 
950     // create compute pipeline
951     {
952         const Unique<VkShaderModule> cs(
953             createShaderModule(vkd, device, m_context.getBinaryCollection().get("comp"), 0u));
954         const VkPipelineShaderStageCreateInfo pipelineShaderStageParams = {
955             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // sType
956             DE_NULL,                                             // pNext
957             (VkPipelineShaderStageCreateFlags)0u,                // flags
958             VK_SHADER_STAGE_COMPUTE_BIT,                         // stage
959             *cs,                                                 // module
960             "main",                                              // pName
961             DE_NULL,                                             // pSpecializationInfo
962         };
963         const VkComputePipelineCreateInfo pipelineCreateInfo = {
964             VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // sType
965             DE_NULL,                                        // pNext
966             (VkPipelineCreateFlags)0u,                      // flags
967             pipelineShaderStageParams,                      // stage
968             *m_pipelineLayout,                              // layout
969             DE_NULL,                                        // basePipelineHandle
970             0,                                              // basePipelineIndex
971         };
972         m_pipeline = createComputePipeline(vkd, device, DE_NULL, &pipelineCreateInfo);
973     }
974 
975     m_buffer = Buffer::createAndAlloc(vkd, device, BufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
976                                       m_context.getDefaultAllocator(), MemoryRequirement::HostVisible);
977     m_descriptorPool = DescriptorPoolBuilder()
978                            .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
979                            .build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
980     const VkDescriptorSetAllocateInfo allocateParams = {
981         VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // sType
982         DE_NULL,                                        // pNext
983         *m_descriptorPool,                              // descriptorPool
984         1u,                                             // setLayoutCount
985         &(*descriptorSetLayout),                        // pSetLayouts
986     };
987 
988     m_descriptorSet                             = allocateDescriptorSet(vkd, device, &allocateParams);
989     const VkDescriptorBufferInfo descriptorInfo = {
990         m_buffer->object(), // buffer
991         0ull,               // offset
992         bufferSize,         // range
993     };
994 
995     DescriptorSetUpdateBuilder()
996         .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
997                      VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
998         .update(vkd, device);
999 
1000     // clear buffer
1001     const std::vector<uint8_t> data((size_t)bufferSize, 0u);
1002     const Allocation &allocation = m_buffer->getBoundMemory();
1003     void *allocationData         = allocation.getHostPtr();
1004     invalidateAlloc(vkd, device, allocation);
1005     deMemcpy(allocationData, &data[0], (size_t)bufferSize);
1006 
1007     const VkBufferMemoryBarrier barrier = {
1008         VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,                // sType
1009         DE_NULL,                                                // pNext
1010         VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, // srcAccessMask
1011         VK_ACCESS_HOST_READ_BIT,                                // dstAccessMask
1012         VK_QUEUE_FAMILY_IGNORED,                                // srcQueueFamilyIndex
1013         VK_QUEUE_FAMILY_IGNORED,                                // destQueueFamilyIndex
1014         m_buffer->object(),                                     // buffer
1015         0ull,                                                   // offset
1016         bufferSize,                                             // size
1017     };
1018     m_computeFinishBarrier = barrier;
1019 }
1020 
1021 class ComputeQueryTest : public ComputeQueryTestBase
1022 {
1023 public:
1024     ComputeQueryTest(vkt::Context &context, bool copyResults, uint32_t seed);
1025     tcu::TestStatus iterate(void);
1026 };
1027 
ComputeQueryTest(vkt::Context & context,bool copyResults,uint32_t seed)1028 ComputeQueryTest::ComputeQueryTest(vkt::Context &context, bool copyResults, uint32_t seed)
1029     : ComputeQueryTestBase(context, copyResults, seed)
1030 {
1031 }
1032 
iterate(void)1033 tcu::TestStatus ComputeQueryTest::iterate(void)
1034 {
1035     const DeviceInterface &vkd = m_context.getDeviceInterface();
1036     const VkDevice device      = m_context.getDevice();
1037     const VkQueue queue        = m_context.getUniversalQueue();
1038     const auto qfIndex         = m_context.getUniversalQueueFamilyIndex();
1039     const CmdPoolCreateInfo cmdPoolCreateInfo(qfIndex);
1040     const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, &cmdPoolCreateInfo));
1041     const Unique<VkCommandBuffer> resetCmdBuffer(
1042         allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1043     const Unique<VkCommandBuffer> cmdBuffer(
1044         allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1045 
1046     initStateObjects();
1047     setupCounters();
1048 
1049     vk::Unique<VkQueryPool> queryPool(createQueryPool(0, 1));
1050 
1051     {
1052         const ProfilingLockGuard guard(vkd, device);
1053 
1054         beginCommandBuffer(vkd, *resetCmdBuffer);
1055         vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool, 0u, 1u);
1056         endCommandBuffer(vkd, *resetCmdBuffer);
1057 
1058         beginCommandBuffer(vkd, *cmdBuffer, 0u);
1059         vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipeline);
1060         vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u,
1061                                   &(m_descriptorSet.get()), 0u, DE_NULL);
1062 
1063         vkd.cmdBeginQuery(*cmdBuffer, *queryPool, 0u, (VkQueryControlFlags)0u);
1064         vkd.cmdDispatch(*cmdBuffer, 2, 2, 2);
1065         vkd.cmdEndQuery(*cmdBuffer, *queryPool, 0u);
1066 
1067         vkd.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1068                                (VkDependencyFlags)0u, 0u, (const VkMemoryBarrier *)DE_NULL, 1u, &m_computeFinishBarrier,
1069                                0u, (const VkImageMemoryBarrier *)DE_NULL);
1070         endCommandBuffer(vkd, *cmdBuffer);
1071 
1072         // submit reset of queries only once
1073         {
1074             const VkSubmitInfo submitInfo = {
1075                 VK_STRUCTURE_TYPE_SUBMIT_INFO,         // sType
1076                 DE_NULL,                               // pNext
1077                 0u,                                    // waitSemaphoreCount
1078                 DE_NULL,                               // pWaitSemaphores
1079                 (const VkPipelineStageFlags *)DE_NULL, // pWaitDstStageMask
1080                 1u,                                    // commandBufferCount
1081                 &resetCmdBuffer.get(),                 // pCommandBuffers
1082                 0u,                                    // signalSemaphoreCount
1083                 DE_NULL,                               // pSignalSemaphores
1084             };
1085 
1086             VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, DE_NULL));
1087         }
1088 
1089         // submit command buffer for each pass and wait for its completion
1090         const auto requiredPassCount = getRequiredPassCount();
1091         for (uint32_t passIndex = 0; passIndex < requiredPassCount; passIndex++)
1092         {
1093             const Unique<VkFence> fence(createFence(vkd, device));
1094 
1095             VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo = {
1096                 VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR, NULL, passIndex};
1097 
1098             const VkSubmitInfo submitInfo = {
1099                 VK_STRUCTURE_TYPE_SUBMIT_INFO,         // sType
1100                 &performanceQuerySubmitInfo,           // pNext
1101                 0u,                                    // waitSemaphoreCount
1102                 DE_NULL,                               // pWaitSemaphores
1103                 (const VkPipelineStageFlags *)DE_NULL, // pWaitDstStageMask
1104                 1u,                                    // commandBufferCount
1105                 &cmdBuffer.get(),                      // pCommandBuffers
1106                 0u,                                    // signalSemaphoreCount
1107                 DE_NULL,                               // pSignalSemaphores
1108             };
1109 
1110             VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
1111             VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), true, ~0ull));
1112         }
1113     }
1114 
1115     VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
1116 
1117     verifyQueryResults(qfIndex, queue, *queryPool);
1118     return tcu::TestStatus::pass("Pass");
1119 }
1120 
1121 class ComputeMultiplePoolsTest : public ComputeQueryTestBase
1122 {
1123 public:
1124     ComputeMultiplePoolsTest(vkt::Context &context, bool copyResults, uint32_t seed);
1125     tcu::TestStatus iterate(void);
1126 };
1127 
ComputeMultiplePoolsTest(vkt::Context & context,bool copyResults,uint32_t seed)1128 ComputeMultiplePoolsTest::ComputeMultiplePoolsTest(vkt::Context &context, bool copyResults, uint32_t seed)
1129     : ComputeQueryTestBase(context, copyResults, seed)
1130 {
1131 }
1132 
iterate(void)1133 tcu::TestStatus ComputeMultiplePoolsTest::iterate(void)
1134 {
1135     const DeviceInterface &vkd = m_context.getDeviceInterface();
1136     const VkDevice device      = m_context.getDevice();
1137     const VkQueue queue        = m_context.getUniversalQueue();
1138     const auto qfIndex         = m_context.getUniversalQueueFamilyIndex();
1139     const CmdPoolCreateInfo cmdPoolCreateInfo(qfIndex);
1140     const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, &cmdPoolCreateInfo));
1141     const Unique<VkCommandBuffer> resetCmdBuffer(
1142         allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1143     const Unique<VkCommandBuffer> cmdBuffer(
1144         allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1145 
1146     initStateObjects();
1147     setupCounters();
1148 
1149     vk::Unique<VkQueryPool> queryPool1(createQueryPool(0, 2)), queryPool2(createQueryPool(1, 2));
1150 
1151     {
1152         const ProfilingLockGuard guard(vkd, device);
1153 
1154         const VkQueryPool queryPools[] = {*queryPool1, *queryPool2};
1155 
1156         beginCommandBuffer(vkd, *resetCmdBuffer);
1157         vkd.cmdResetQueryPool(*resetCmdBuffer, queryPools[0], 0u, 1u);
1158         vkd.cmdResetQueryPool(*resetCmdBuffer, queryPools[1], 0u, 1u);
1159         endCommandBuffer(vkd, *resetCmdBuffer);
1160 
1161         beginCommandBuffer(vkd, *cmdBuffer, 0u);
1162         vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipeline);
1163         vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u,
1164                                   &(m_descriptorSet.get()), 0u, DE_NULL);
1165 
1166         // perform two queries
1167         for (uint32_t loop = 0; loop < DE_LENGTH_OF_ARRAY(queryPools); ++loop)
1168         {
1169             const VkQueryPool queryPool = queryPools[loop];
1170             vkd.cmdBeginQuery(*cmdBuffer, queryPool, 0u, (VkQueryControlFlags)0u);
1171             vkd.cmdDispatch(*cmdBuffer, 2, 2, 2);
1172             vkd.cmdEndQuery(*cmdBuffer, queryPool, 0u);
1173         }
1174 
1175         vkd.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1176                                (VkDependencyFlags)0u, 0u, (const VkMemoryBarrier *)DE_NULL, 1u, &m_computeFinishBarrier,
1177                                0u, (const VkImageMemoryBarrier *)DE_NULL);
1178         endCommandBuffer(vkd, *cmdBuffer);
1179 
1180         // submit reset of queries only once
1181         {
1182             const VkSubmitInfo submitInfo = {
1183                 VK_STRUCTURE_TYPE_SUBMIT_INFO,         // sType
1184                 DE_NULL,                               // pNext
1185                 0u,                                    // waitSemaphoreCount
1186                 DE_NULL,                               // pWaitSemaphores
1187                 (const VkPipelineStageFlags *)DE_NULL, // pWaitDstStageMask
1188                 1u,                                    // commandBufferCount
1189                 &resetCmdBuffer.get(),                 // pCommandBuffers
1190                 0u,                                    // signalSemaphoreCount
1191                 DE_NULL,                               // pSignalSemaphores
1192             };
1193 
1194             VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, DE_NULL));
1195         }
1196 
1197         // submit command buffer for each pass and wait for its completion
1198         const auto requiredPassCount = getRequiredPassCount();
1199         for (uint32_t passIndex = 0; passIndex < requiredPassCount; passIndex++)
1200         {
1201             const Unique<VkFence> fence(createFence(vkd, device));
1202 
1203             VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo = {
1204                 VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR, NULL, passIndex};
1205 
1206             const VkSubmitInfo submitInfo = {
1207                 VK_STRUCTURE_TYPE_SUBMIT_INFO,         // sType
1208                 &performanceQuerySubmitInfo,           // pNext
1209                 0u,                                    // waitSemaphoreCount
1210                 DE_NULL,                               // pWaitSemaphores
1211                 (const VkPipelineStageFlags *)DE_NULL, // pWaitDstStageMask
1212                 1u,                                    // commandBufferCount
1213                 &cmdBuffer.get(),                      // pCommandBuffers
1214                 0u,                                    // signalSemaphoreCount
1215                 DE_NULL,                               // pSignalSemaphores
1216             };
1217 
1218             VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
1219             VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), true, ~0ull));
1220         }
1221     }
1222 
1223     VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
1224 
1225     verifyQueryResults(qfIndex, queue, *queryPool1);
1226     verifyQueryResults(qfIndex, queue, *queryPool2);
1227     return tcu::TestStatus::pass("Pass");
1228 }
1229 
1230 enum TestType
1231 {
1232     TT_ENUMERATE_AND_VALIDATE = 0,
1233     TT_QUERY,
1234     TT_MULTIPLE_POOLS
1235 };
1236 
1237 class QueryPoolPerformanceTest : public TestCase
1238 {
1239 public:
QueryPoolPerformanceTest(tcu::TestContext & context,TestType testType,VkQueueFlagBits queueFlagBits,bool copyResults,uint32_t seed,const std::string & name)1240     QueryPoolPerformanceTest(tcu::TestContext &context, TestType testType, VkQueueFlagBits queueFlagBits,
1241                              bool copyResults, uint32_t seed, const std::string &name)
1242         : TestCase(context, name)
1243         , m_testType(testType)
1244         , m_queueFlagBits(queueFlagBits)
1245         , m_copyResults(copyResults)
1246         , m_seed(seed)
1247     {
1248     }
1249 
createInstance(vkt::Context & context) const1250     vkt::TestInstance *createInstance(vkt::Context &context) const override
1251     {
1252         if (m_testType == TT_ENUMERATE_AND_VALIDATE)
1253             return new EnumerateAndValidateTest(context, m_queueFlagBits);
1254 
1255         if (m_queueFlagBits == VK_QUEUE_GRAPHICS_BIT)
1256         {
1257             if (m_testType == TT_QUERY)
1258                 return new GraphicQueryTest(context, m_copyResults, m_seed);
1259             return new GraphicMultiplePoolsTest(context, m_copyResults, m_seed);
1260         }
1261 
1262         // tests for VK_QUEUE_COMPUTE_BIT
1263         if (m_testType == TT_QUERY)
1264             return new ComputeQueryTest(context, m_copyResults, m_seed);
1265         return new ComputeMultiplePoolsTest(context, m_copyResults, m_seed);
1266     }
1267 
initPrograms(SourceCollections & programCollection) const1268     void initPrograms(SourceCollections &programCollection) const override
1269     {
1270         // validation test do not need programs
1271         if (m_testType == TT_ENUMERATE_AND_VALIDATE)
1272             return;
1273 
1274         if (m_queueFlagBits == VK_QUEUE_COMPUTE_BIT)
1275         {
1276             programCollection.glslSources.add("comp")
1277                 << glu::ComputeSource("#version 430\n"
1278                                       "layout (local_size_x = 1) in;\n"
1279                                       "layout(binding = 0) writeonly buffer Output {\n"
1280                                       "        uint values[];\n"
1281                                       "} sb_out;\n\n"
1282                                       "void main (void) {\n"
1283                                       "        uint index = uint(gl_GlobalInvocationID.x);\n"
1284                                       "        sb_out.values[index] += gl_GlobalInvocationID.y*2;\n"
1285                                       "}\n");
1286             return;
1287         }
1288 
1289         programCollection.glslSources.add("frag")
1290             << glu::FragmentSource("#version 430\n"
1291                                    "layout(location = 0) out vec4 out_FragColor;\n"
1292                                    "void main()\n"
1293                                    "{\n"
1294                                    "    out_FragColor = vec4(1.0, 0.0, 0.0, 1.0);\n"
1295                                    "}\n");
1296 
1297         programCollection.glslSources.add("vert")
1298             << glu::VertexSource("#version 430\n"
1299                                  "layout(location = 0) in vec4 in_Position;\n"
1300                                  "out gl_PerVertex { vec4 gl_Position; float gl_PointSize; };\n"
1301                                  "void main() {\n"
1302                                  "    gl_Position  = in_Position;\n"
1303                                  "    gl_PointSize = 1.0;\n"
1304                                  "}\n");
1305     }
1306 
checkSupport(Context & context) const1307     void checkSupport(Context &context) const override
1308     {
1309         const auto &perfQueryFeatures = context.getPerformanceQueryFeatures();
1310 
1311         if (!perfQueryFeatures.performanceCounterQueryPools)
1312             TCU_THROW(NotSupportedError, "performanceCounterQueryPools not supported");
1313 
1314         if (m_testType == TT_MULTIPLE_POOLS && !perfQueryFeatures.performanceCounterMultipleQueryPools)
1315             TCU_THROW(NotSupportedError, "performanceCounterMultipleQueryPools not supported");
1316 
1317         const auto &vki           = context.getInstanceInterface();
1318         const auto physicalDevice = context.getPhysicalDevice();
1319         const auto qfIndex        = context.getUniversalQueueFamilyIndex();
1320 
1321         // Get the number of supported counters;
1322         uint32_t counterCount;
1323         VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, qfIndex,
1324                                                                                    &counterCount, NULL, NULL));
1325 
1326         if (!counterCount)
1327             TCU_THROW(QualityWarning, "There are no performance counters");
1328 
1329         if (m_copyResults && !context.getPerformanceQueryProperties().allowCommandBufferQueryCopies)
1330             TCU_THROW(NotSupportedError,
1331                       "VkPhysicalDevicePerformanceQueryPropertiesKHR::allowCommandBufferQueryCopies not supported");
1332     }
1333 
1334 private:
1335     TestType m_testType;
1336     VkQueueFlagBits m_queueFlagBits;
1337     const bool m_copyResults;
1338     const uint32_t m_seed;
1339 };
1340 
1341 } // namespace
1342 
QueryPoolPerformanceTests(tcu::TestContext & testCtx)1343 QueryPoolPerformanceTests::QueryPoolPerformanceTests(tcu::TestContext &testCtx)
1344     : TestCaseGroup(testCtx, "performance_query")
1345 {
1346 }
1347 
init(void)1348 void QueryPoolPerformanceTests::init(void)
1349 {
1350 
1351     const struct
1352     {
1353         const bool copyResults;
1354         const std::string suffix;
1355     } copyCases[]{
1356         {false, ""},
1357         {true, "_copy"},
1358     };
1359 
1360     uint32_t seed = 1692187611u;
1361     for (const auto &copyCase : copyCases)
1362     {
1363         addChild(new QueryPoolPerformanceTest(m_testCtx, TT_ENUMERATE_AND_VALIDATE, VK_QUEUE_GRAPHICS_BIT,
1364                                               copyCase.copyResults, seed++,
1365                                               "enumerate_and_validate_graphic" + copyCase.suffix));
1366         addChild(new QueryPoolPerformanceTest(m_testCtx, TT_ENUMERATE_AND_VALIDATE, VK_QUEUE_COMPUTE_BIT,
1367                                               copyCase.copyResults, seed++,
1368                                               "enumerate_and_validate_compute" + copyCase.suffix));
1369         addChild(new QueryPoolPerformanceTest(m_testCtx, TT_QUERY, VK_QUEUE_GRAPHICS_BIT, copyCase.copyResults, seed++,
1370                                               "query_graphic" + copyCase.suffix));
1371         addChild(new QueryPoolPerformanceTest(m_testCtx, TT_QUERY, VK_QUEUE_COMPUTE_BIT, copyCase.copyResults, seed++,
1372                                               "query_compute" + copyCase.suffix));
1373         addChild(new QueryPoolPerformanceTest(m_testCtx, TT_MULTIPLE_POOLS, VK_QUEUE_GRAPHICS_BIT, copyCase.copyResults,
1374                                               seed++, "multiple_pools_graphic" + copyCase.suffix));
1375         addChild(new QueryPoolPerformanceTest(m_testCtx, TT_MULTIPLE_POOLS, VK_QUEUE_COMPUTE_BIT, copyCase.copyResults,
1376                                               seed++, "multiple_pools_compute" + copyCase.suffix));
1377     }
1378 }
1379 
1380 } // namespace QueryPool
1381 } // namespace vkt
1382