1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2018 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Vulkan Performance Query Tests
22 *//*--------------------------------------------------------------------*/
23
24 #include "vktQueryPoolPerformanceTests.hpp"
25 #include "vktTestCase.hpp"
26
27 #include "vktDrawImageObjectUtil.hpp"
28 #include "vktDrawBufferObjectUtil.hpp"
29 #include "vktDrawCreateInfoUtil.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPrograms.hpp"
33 #include "vkTypeUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkQueryUtil.hpp"
36 #include "vkObjUtil.hpp"
37 #include "vkBarrierUtil.hpp"
38
39 #include "deMath.h"
40 #include "deRandom.hpp"
41
42 #include "tcuTestLog.hpp"
43 #include "tcuResource.hpp"
44 #include "tcuImageCompare.hpp"
45 #include "vkImageUtil.hpp"
46 #include "tcuCommandLine.hpp"
47 #include "tcuRGBA.hpp"
48
49 #include <algorithm>
50 #include <iterator>
51
52 namespace vkt
53 {
54 namespace QueryPool
55 {
56 namespace
57 {
58
59 using namespace vk;
60 using namespace Draw;
61
uuidToHex(const uint8_t uuid[])62 std::string uuidToHex(const uint8_t uuid[])
63 {
64 const size_t bytesPerPart[] = {4, 2, 2, 2, 6};
65 const uint8_t *ptr = &uuid[0];
66 const size_t stringSize = VK_UUID_SIZE * 2 + DE_LENGTH_OF_ARRAY(bytesPerPart) - 1;
67 std::string result;
68
69 result.reserve(stringSize);
70
71 for (size_t partNdx = 0; partNdx < DE_LENGTH_OF_ARRAY(bytesPerPart); ++partNdx)
72 {
73 const size_t bytesInPart = bytesPerPart[partNdx];
74 const size_t symbolsInPart = 2 * bytesInPart;
75 uint64_t part = 0;
76 std::string partString;
77
78 for (size_t byteInPartNdx = 0; byteInPartNdx < bytesInPart; ++byteInPartNdx)
79 {
80 part = (part << 8) | *ptr;
81 ++ptr;
82 }
83
84 partString = tcu::toHex(part).toString();
85
86 DE_ASSERT(partString.size() > symbolsInPart);
87
88 result +=
89 (symbolsInPart >= partString.size()) ? partString : partString.substr(partString.size() - symbolsInPart);
90
91 if (partNdx + 1 != DE_LENGTH_OF_ARRAY(bytesPerPart))
92 result += '-';
93 }
94
95 DE_ASSERT(ptr == &uuid[VK_UUID_SIZE]);
96 DE_ASSERT(result.size() == stringSize);
97
98 return result;
99 }
100
101 // Helper class to acquire and release the profiling lock in an orderly manner.
102 // If an exception is thrown from a test (e.g. from VK_CHECK), the profiling lock is still released.
103 class ProfilingLockGuard
104 {
105 public:
ProfilingLockGuard(const DeviceInterface & vkd,const VkDevice device)106 ProfilingLockGuard(const DeviceInterface &vkd, const VkDevice device) : m_vkd(vkd), m_device(device)
107 {
108 const auto timeout = std::numeric_limits<uint64_t>::max(); // Must always succeed.
109 const VkAcquireProfilingLockInfoKHR lockInfo = {
110 VK_STRUCTURE_TYPE_ACQUIRE_PROFILING_LOCK_INFO_KHR,
111 NULL,
112 0,
113 timeout,
114 };
115
116 VK_CHECK(m_vkd.acquireProfilingLockKHR(m_device, &lockInfo));
117 }
118
~ProfilingLockGuard(void)119 ~ProfilingLockGuard(void)
120 {
121 m_vkd.releaseProfilingLockKHR(m_device);
122 }
123
124 protected:
125 const DeviceInterface &m_vkd;
126 const VkDevice m_device;
127 };
128
129 using PerformanceCounterVec = std::vector<VkPerformanceCounterKHR>;
130
131 class EnumerateAndValidateTest : public TestInstance
132 {
133 public:
134 EnumerateAndValidateTest(vkt::Context &context, VkQueueFlagBits queueFlagBits);
135 tcu::TestStatus iterate(void);
136
137 protected:
138 void basicValidateCounter(const uint32_t familyIndex);
139
140 private:
141 VkQueueFlagBits m_queueFlagBits;
142 bool m_requiredExtensionsPresent;
143 };
144
EnumerateAndValidateTest(vkt::Context & context,VkQueueFlagBits queueFlagBits)145 EnumerateAndValidateTest::EnumerateAndValidateTest(vkt::Context &context, VkQueueFlagBits queueFlagBits)
146 : TestInstance(context)
147 , m_queueFlagBits(queueFlagBits)
148 , m_requiredExtensionsPresent(context.requireDeviceFunctionality("VK_KHR_performance_query"))
149 {
150 }
151
iterate(void)152 tcu::TestStatus EnumerateAndValidateTest::iterate(void)
153 {
154 const InstanceInterface &vki = m_context.getInstanceInterface();
155 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
156 const std::vector<VkQueueFamilyProperties> queueProperties =
157 getPhysicalDeviceQueueFamilyProperties(vki, physicalDevice);
158
159 for (uint32_t queueNdx = 0; queueNdx < queueProperties.size(); queueNdx++)
160 {
161 if ((queueProperties[queueNdx].queueFlags & m_queueFlagBits) == 0)
162 continue;
163
164 uint32_t counterCount = 0;
165 VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueNdx,
166 &counterCount, DE_NULL, DE_NULL));
167
168 if (counterCount == 0)
169 continue;
170
171 {
172 const VkPerformanceCounterKHR defaultCounterVal = initVulkanStructure();
173 PerformanceCounterVec counters(counterCount, defaultCounterVal);
174 uint32_t counterCountRead = counterCount;
175 std::map<std::string, size_t> uuidValidator;
176
177 if (counterCount > 1)
178 {
179 uint32_t incompleteCounterCount = counterCount - 1;
180 VkResult result;
181
182 result = vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
183 physicalDevice, queueNdx, &incompleteCounterCount, &counters[0], DE_NULL);
184 if (result != VK_INCOMPLETE)
185 TCU_FAIL("VK_INCOMPLETE not returned");
186 }
187
188 VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
189 physicalDevice, queueNdx, &counterCountRead, &counters[0], DE_NULL));
190
191 if (counterCountRead != counterCount)
192 TCU_FAIL("Number of counters read (" + de::toString(counterCountRead) +
193 ") is not equal to number of counters reported (" + de::toString(counterCount) + ")");
194
195 for (size_t counterNdx = 0; counterNdx < counters.size(); ++counterNdx)
196 {
197 const VkPerformanceCounterKHR &counter = counters[counterNdx];
198 const std::string uuidStr = uuidToHex(counter.uuid);
199
200 if (uuidValidator.find(uuidStr) != uuidValidator.end())
201 TCU_FAIL("Duplicate counter UUID detected " + uuidStr);
202 else
203 uuidValidator[uuidStr] = counterNdx;
204
205 if (counter.scope >= VK_PERFORMANCE_COUNTER_SCOPE_KHR_LAST)
206 TCU_FAIL("Counter scope is invalid " + de::toString(static_cast<size_t>(counter.scope)));
207
208 if (counter.storage >= VK_PERFORMANCE_COUNTER_STORAGE_KHR_LAST)
209 TCU_FAIL("Counter storage is invalid " + de::toString(static_cast<size_t>(counter.storage)));
210
211 if (counter.unit >= VK_PERFORMANCE_COUNTER_UNIT_KHR_LAST)
212 TCU_FAIL("Counter unit is invalid " + de::toString(static_cast<size_t>(counter.unit)));
213 }
214 }
215 {
216 const VkPerformanceCounterDescriptionKHR defaultDescription = initVulkanStructure();
217 std::vector<VkPerformanceCounterDescriptionKHR> counterDescriptors(counterCount, defaultDescription);
218 uint32_t counterCountRead = counterCount;
219
220 VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
221 physicalDevice, queueNdx, &counterCountRead, DE_NULL, &counterDescriptors[0]));
222
223 if (counterCountRead != counterCount)
224 TCU_FAIL("Number of counters read (" + de::toString(counterCountRead) +
225 ") is not equal to number of counters reported (" + de::toString(counterCount) + ")");
226
227 for (size_t counterNdx = 0; counterNdx < counterDescriptors.size(); ++counterNdx)
228 {
229 const VkPerformanceCounterDescriptionKHR &counterDescriptor = counterDescriptors[counterNdx];
230 const VkPerformanceCounterDescriptionFlagsKHR allowedFlags =
231 VK_PERFORMANCE_COUNTER_DESCRIPTION_PERFORMANCE_IMPACTING_KHR |
232 VK_PERFORMANCE_COUNTER_DESCRIPTION_CONCURRENTLY_IMPACTED_KHR;
233
234 if ((counterDescriptor.flags & ~allowedFlags) != 0)
235 TCU_FAIL("Invalid flags present in VkPerformanceCounterDescriptionFlagsKHR");
236 }
237 }
238 }
239
240 return tcu::TestStatus::pass("Pass");
241 }
242
243 using ResultsVec = std::vector<VkPerformanceCounterResultKHR>;
244 using BufferWithMemoryPtr = std::unique_ptr<BufferWithMemory>;
245
246 class QueryTestBase : public TestInstance
247 {
248 public:
249 QueryTestBase(vkt::Context &context, bool copyResults, uint32_t seed);
250
251 protected:
252 void setupCounters(void);
253 Move<VkQueryPool> createQueryPool(uint32_t enabledCounterOffset, uint32_t enabledCounterStride);
254 ResultsVec createResultsVector(const VkQueryPool pool) const;
255 BufferWithMemoryPtr createResultsBuffer(const ResultsVec &resultsVector) const;
256 void verifyQueryResults(uint32_t qfIndex, VkQueue queue, VkQueryPool queryPool) const;
257 uint32_t getRequiredPassCount(void) const;
258
259 private:
260 const bool m_copyResults;
261 const uint32_t m_seed;
262 bool m_requiredExtensionsPresent;
263 uint32_t m_requiredNumerOfPasses;
264 std::map<uint64_t, uint32_t> m_enabledCountersCountMap; // number of counters that were enabled per query pool
265 PerformanceCounterVec m_counters; // counters provided by the device
266 };
267
QueryTestBase(vkt::Context & context,bool copyResults,uint32_t seed)268 QueryTestBase::QueryTestBase(vkt::Context &context, bool copyResults, uint32_t seed)
269 : TestInstance(context)
270 , m_copyResults(copyResults)
271 , m_seed(seed)
272 , m_requiredExtensionsPresent(context.requireDeviceFunctionality("VK_KHR_performance_query"))
273 , m_requiredNumerOfPasses(0)
274 {
275 }
276
setupCounters()277 void QueryTestBase::setupCounters()
278 {
279 const InstanceInterface &vki = m_context.getInstanceInterface();
280 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
281 const auto queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
282 const CmdPoolCreateInfo cmdPoolCreateInfo(queueFamilyIndex);
283 uint32_t counterCount;
284
285 // Get the number of supported counters.
286 VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, queueFamilyIndex,
287 &counterCount, nullptr, nullptr));
288
289 // Get supported counters.
290 const VkPerformanceCounterKHR defaultCounterVal = initVulkanStructure();
291 m_counters.resize(counterCount, defaultCounterVal);
292 VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
293 physicalDevice, queueFamilyIndex, &counterCount, de::dataOrNull(m_counters), nullptr));
294
295 // Filter out all counters with scope
296 // VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_BUFFER_KHR. For these counters, the
297 // begin and end command must be at the beginning/end of the command buffer,
298 // which does not match what these tests do.
299 const auto scopeIsNotCmdBuffer = [](const VkPerformanceCounterKHR &c)
300 { return (c.scope != VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_BUFFER_KHR); };
301 PerformanceCounterVec filteredCounters;
302
303 filteredCounters.reserve(m_counters.size());
304 std::copy_if(begin(m_counters), end(m_counters), std::back_inserter(filteredCounters), scopeIsNotCmdBuffer);
305 m_counters.swap(filteredCounters);
306
307 if (m_counters.empty())
308 TCU_THROW(NotSupportedError, "No counters without command buffer scope found");
309 }
310
createQueryPool(uint32_t enabledCounterOffset,uint32_t enabledCounterStride)311 Move<VkQueryPool> QueryTestBase::createQueryPool(uint32_t enabledCounterOffset, uint32_t enabledCounterStride)
312 {
313 const InstanceInterface &vki = m_context.getInstanceInterface();
314 const DeviceInterface &vkd = m_context.getDeviceInterface();
315 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
316 const VkDevice device = m_context.getDevice();
317 const CmdPoolCreateInfo cmdPoolCreateInfo = m_context.getUniversalQueueFamilyIndex();
318 const uint32_t counterCount = (uint32_t)m_counters.size();
319 uint32_t enabledIndex = enabledCounterOffset ? 0 : enabledCounterStride;
320 std::vector<uint32_t> enabledCounters;
321
322 // enable every <enabledCounterStride> counter that has command or render pass scope
323 for (uint32_t i = 0; i < counterCount; i++)
324 {
325 // handle offset
326 if (enabledCounterOffset)
327 {
328 if (enabledCounterOffset == enabledIndex)
329 {
330 // disable handling offset
331 enabledCounterOffset = 0;
332
333 // eneble next index in stride condition
334 enabledIndex = enabledCounterStride;
335 }
336 else
337 {
338 ++enabledIndex;
339 continue;
340 }
341 }
342
343 // handle stride
344 if (enabledIndex == enabledCounterStride)
345 {
346 enabledCounters.push_back(i);
347 enabledIndex = 0;
348 }
349 else
350 ++enabledIndex;
351 }
352
353 // Get number of counters that were enabled for this query pool.
354 if (enabledCounters.empty())
355 TCU_THROW(NotSupportedError, "No suitable performance counters found for this test");
356
357 const auto enabledCountersCount = de::sizeU32(enabledCounters);
358
359 // define performance query
360 const VkQueryPoolPerformanceCreateInfoKHR performanceQueryCreateInfo = {
361 VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR, NULL,
362 cmdPoolCreateInfo.queueFamilyIndex, // queue family that this performance query is performed on
363 enabledCountersCount, // number of counters to enable
364 &enabledCounters[0] // array of indices of counters to enable
365 };
366
367 // get the number of passes counters will require
368 vki.getPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(physicalDevice, &performanceQueryCreateInfo,
369 &m_requiredNumerOfPasses);
370
371 // create query pool
372 const VkQueryPoolCreateInfo queryPoolCreateInfo = {VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
373 &performanceQueryCreateInfo,
374 0, // flags
375 VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR, // new query type
376 1, // queryCount
377 0};
378
379 Move<VkQueryPool> queryPool = vk::createQueryPool(vkd, device, &queryPoolCreateInfo);
380
381 // memorize number of enabled counters for this query pool
382 m_enabledCountersCountMap[queryPool.get().getInternal()] = enabledCountersCount;
383
384 return queryPool;
385 }
386
createResultsVector(const VkQueryPool pool) const387 ResultsVec QueryTestBase::createResultsVector(const VkQueryPool pool) const
388 {
389 const auto itemCount = m_enabledCountersCountMap.at(pool.getInternal());
390 ResultsVec resultsVector(itemCount);
391 const auto byteSize = de::dataSize(resultsVector);
392 const auto contents = reinterpret_cast<uint8_t *>(resultsVector.data());
393 de::Random rnd(m_seed);
394
395 // Fill vector with random bytes.
396 for (size_t i = 0u; i < byteSize; ++i)
397 {
398 const auto byte = rnd.getInt(1, 255); // Do not use zeros.
399 contents[i] = static_cast<uint8_t>(byte);
400 }
401
402 return resultsVector;
403 }
404
createResultsBuffer(const ResultsVec & resultsVector) const405 BufferWithMemoryPtr QueryTestBase::createResultsBuffer(const ResultsVec &resultsVector) const
406 {
407 const auto &vkd = m_context.getDeviceInterface();
408 const auto device = m_context.getDevice();
409 auto &alloc = m_context.getDefaultAllocator();
410 const auto bufferSize = static_cast<VkDeviceSize>(de::dataSize(resultsVector));
411 const auto createInfo = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
412
413 BufferWithMemoryPtr resultBuffer(
414 new BufferWithMemory(vkd, device, alloc, createInfo, MemoryRequirement::HostVisible));
415 auto &bufferAlloc = resultBuffer->getAllocation();
416 void *bufferData = bufferAlloc.getHostPtr();
417
418 deMemcpy(bufferData, resultsVector.data(), de::dataSize(resultsVector));
419 flushAlloc(vkd, device, bufferAlloc);
420
421 return resultBuffer;
422 }
423
verifyQueryResults(uint32_t qfIndex,VkQueue queue,VkQueryPool queryPool) const424 void QueryTestBase::verifyQueryResults(uint32_t qfIndex, VkQueue queue, VkQueryPool queryPool) const
425 {
426 const DeviceInterface &vkd = m_context.getDeviceInterface();
427 const VkDevice device = m_context.getDevice();
428
429 const auto initialVector = createResultsVector(queryPool);
430 const auto resultsBuffer = createResultsBuffer(initialVector);
431 auto &resultsBufferAlloc = resultsBuffer->getAllocation();
432 void *resultsBufferData = resultsBufferAlloc.getHostPtr();
433
434 const auto resultsStride =
435 static_cast<VkDeviceSize>(sizeof(decltype(initialVector)::value_type) * initialVector.size());
436 const auto hostBufferSize = de::dataSize(initialVector);
437 const auto resultFlags = static_cast<VkQueryResultFlags>(VK_QUERY_RESULT_WAIT_BIT);
438
439 // Get or copy query pool results.
440 if (m_copyResults)
441 {
442 const auto cmdPool = createCommandPool(vkd, device, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, qfIndex);
443 const auto cmdBuffer = allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
444 const auto barrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
445
446 beginCommandBuffer(vkd, *cmdBuffer);
447 vkd.cmdCopyQueryPoolResults(*cmdBuffer, queryPool, 0u, 1u, resultsBuffer->get(), 0ull, resultsStride,
448 resultFlags);
449 cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &barrier);
450 endCommandBuffer(vkd, *cmdBuffer);
451 submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
452 invalidateAlloc(vkd, device, resultsBufferAlloc);
453 }
454 else
455 {
456 VK_CHECK(vkd.getQueryPoolResults(device, queryPool, 0u, 1u, hostBufferSize, resultsBufferData, resultsStride,
457 resultFlags));
458 }
459
460 // Check that the buffer was modified without analyzing result semantics.
461 ResultsVec resultsVector(initialVector.size());
462 deMemcpy(de::dataOrNull(resultsVector), resultsBufferData, hostBufferSize);
463
464 for (size_t i = 0u; i < initialVector.size(); ++i)
465 {
466 if (deMemCmp(&initialVector[i], &resultsVector[i], sizeof(resultsVector[i])) == 0)
467 {
468 std::ostringstream msg;
469 msg << "Result " << i << " was not modified by the implementation";
470 TCU_FAIL(msg.str());
471 }
472 }
473 }
474
getRequiredPassCount() const475 uint32_t QueryTestBase::getRequiredPassCount() const
476 {
477 return m_requiredNumerOfPasses;
478 }
479
480 // Base class for all graphic tests
481 class GraphicQueryTestBase : public QueryTestBase
482 {
483 public:
484 GraphicQueryTestBase(vkt::Context &context, bool copyResults, uint32_t seed);
485
486 protected:
487 void initStateObjects(void);
488
489 protected:
490 Move<VkPipeline> m_pipeline;
491 Move<VkPipelineLayout> m_pipelineLayout;
492
493 de::SharedPtr<Image> m_colorAttachmentImage;
494 Move<VkImageView> m_attachmentView;
495
496 Move<VkRenderPass> m_renderPass;
497 Move<VkFramebuffer> m_framebuffer;
498
499 de::SharedPtr<Buffer> m_vertexBuffer;
500
501 VkFormat m_colorAttachmentFormat;
502 uint32_t m_size;
503 };
504
GraphicQueryTestBase(vkt::Context & context,bool copyResults,uint32_t seed)505 GraphicQueryTestBase::GraphicQueryTestBase(vkt::Context &context, bool copyResults, uint32_t seed)
506 : QueryTestBase(context, copyResults, seed)
507 , m_colorAttachmentFormat(VK_FORMAT_R8G8B8A8_UNORM)
508 , m_size(32)
509 {
510 }
511
initStateObjects(void)512 void GraphicQueryTestBase::initStateObjects(void)
513 {
514 const VkDevice device = m_context.getDevice();
515 const DeviceInterface &vkd = m_context.getDeviceInterface();
516
517 //attachment images and views
518 {
519 VkExtent3D imageExtent = {
520 m_size, // width
521 m_size, // height
522 1 // depth
523 };
524
525 const ImageCreateInfo colorImageCreateInfo(
526 VK_IMAGE_TYPE_2D, m_colorAttachmentFormat, imageExtent, 1, 1, VK_SAMPLE_COUNT_1_BIT,
527 VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
528
529 m_colorAttachmentImage =
530 Image::createAndAlloc(vkd, device, colorImageCreateInfo, m_context.getDefaultAllocator(),
531 m_context.getUniversalQueueFamilyIndex());
532
533 const ImageViewCreateInfo attachmentViewInfo(m_colorAttachmentImage->object(), VK_IMAGE_VIEW_TYPE_2D,
534 m_colorAttachmentFormat);
535 m_attachmentView = createImageView(vkd, device, &attachmentViewInfo);
536 }
537
538 // renderpass and framebuffer
539 {
540 RenderPassCreateInfo renderPassCreateInfo;
541 renderPassCreateInfo.addAttachment(AttachmentDescription(m_colorAttachmentFormat, // format
542 VK_SAMPLE_COUNT_1_BIT, // samples
543 VK_ATTACHMENT_LOAD_OP_CLEAR, // loadOp
544 VK_ATTACHMENT_STORE_OP_DONT_CARE, // storeOp
545 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // stencilLoadOp
546 VK_ATTACHMENT_STORE_OP_DONT_CARE, // stencilLoadOp
547 VK_IMAGE_LAYOUT_GENERAL, // initialLauout
548 VK_IMAGE_LAYOUT_GENERAL)); // finalLayout
549
550 const VkAttachmentReference colorAttachmentReference = {
551 0, // attachment
552 VK_IMAGE_LAYOUT_GENERAL // layout
553 };
554
555 renderPassCreateInfo.addSubpass(SubpassDescription(VK_PIPELINE_BIND_POINT_GRAPHICS, // pipelineBindPoint
556 0, // flags
557 0, // inputCount
558 DE_NULL, // pInputAttachments
559 1, // colorCount
560 &colorAttachmentReference, // pColorAttachments
561 DE_NULL, // pResolveAttachments
562 AttachmentReference(), // depthStencilAttachment
563 0, // preserveCount
564 DE_NULL)); // preserveAttachments
565
566 m_renderPass = createRenderPass(vkd, device, &renderPassCreateInfo);
567
568 std::vector<VkImageView> attachments(1);
569 attachments[0] = *m_attachmentView;
570
571 FramebufferCreateInfo framebufferCreateInfo(*m_renderPass, attachments, m_size, m_size, 1);
572 m_framebuffer = createFramebuffer(vkd, device, &framebufferCreateInfo);
573 }
574
575 // pipeline
576 {
577 Unique<VkShaderModule> vs(createShaderModule(vkd, device, m_context.getBinaryCollection().get("vert"), 0));
578 Unique<VkShaderModule> fs(createShaderModule(vkd, device, m_context.getBinaryCollection().get("frag"), 0));
579
580 const PipelineCreateInfo::ColorBlendState::Attachment attachmentState;
581
582 const PipelineLayoutCreateInfo pipelineLayoutCreateInfo;
583 m_pipelineLayout = createPipelineLayout(vkd, device, &pipelineLayoutCreateInfo);
584
585 const VkVertexInputBindingDescription vf_binding_desc = {
586 0, // binding
587 4 * (uint32_t)sizeof(float), // stride
588 VK_VERTEX_INPUT_RATE_VERTEX // inputRate
589 };
590
591 const VkVertexInputAttributeDescription vf_attribute_desc = {
592 0, // location
593 0, // binding
594 VK_FORMAT_R32G32B32A32_SFLOAT, // format
595 0 // offset
596 };
597
598 const VkPipelineVertexInputStateCreateInfo vf_info = {
599 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // sType
600 NULL, // pNext
601 0u, // flags
602 1, // vertexBindingDescriptionCount
603 &vf_binding_desc, // pVertexBindingDescriptions
604 1, // vertexAttributeDescriptionCount
605 &vf_attribute_desc // pVertexAttributeDescriptions
606 };
607
608 PipelineCreateInfo pipelineCreateInfo(*m_pipelineLayout, *m_renderPass, 0, 0);
609 pipelineCreateInfo.addShader(PipelineCreateInfo::PipelineShaderStage(*vs, "main", VK_SHADER_STAGE_VERTEX_BIT));
610 pipelineCreateInfo.addShader(
611 PipelineCreateInfo::PipelineShaderStage(*fs, "main", VK_SHADER_STAGE_FRAGMENT_BIT));
612 pipelineCreateInfo.addState(PipelineCreateInfo::InputAssemblerState(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST));
613 pipelineCreateInfo.addState(PipelineCreateInfo::ColorBlendState(1, &attachmentState));
614 const VkViewport viewport = makeViewport(m_size, m_size);
615 const VkRect2D scissor = makeRect2D(m_size, m_size);
616 pipelineCreateInfo.addState(PipelineCreateInfo::ViewportState(1, std::vector<VkViewport>(1, viewport),
617 std::vector<VkRect2D>(1, scissor)));
618 pipelineCreateInfo.addState(
619 PipelineCreateInfo::DepthStencilState(false, false, VK_COMPARE_OP_GREATER_OR_EQUAL));
620 pipelineCreateInfo.addState(PipelineCreateInfo::RasterizerState());
621 pipelineCreateInfo.addState(PipelineCreateInfo::MultiSampleState());
622 pipelineCreateInfo.addState(vf_info);
623 m_pipeline = createGraphicsPipeline(vkd, device, DE_NULL, &pipelineCreateInfo);
624 }
625
626 // vertex buffer
627 {
628 std::vector<tcu::Vec4> vertices(3);
629 vertices[0] = tcu::Vec4(0.5, 0.5, 0.0, 1.0);
630 vertices[1] = tcu::Vec4(0.5, 0.0, 0.0, 1.0);
631 vertices[2] = tcu::Vec4(0.0, 0.5, 0.0, 1.0);
632
633 const size_t kBufferSize = vertices.size() * sizeof(tcu::Vec4);
634 m_vertexBuffer =
635 Buffer::createAndAlloc(vkd, device, BufferCreateInfo(kBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT),
636 m_context.getDefaultAllocator(), MemoryRequirement::HostVisible);
637
638 tcu::Vec4 *ptr = reinterpret_cast<tcu::Vec4 *>(m_vertexBuffer->getBoundMemory().getHostPtr());
639 deMemcpy(ptr, &vertices[0], kBufferSize);
640
641 flushAlloc(vkd, device, m_vertexBuffer->getBoundMemory());
642 }
643 }
644
645 class GraphicQueryTest : public GraphicQueryTestBase
646 {
647 public:
648 GraphicQueryTest(vkt::Context &context, bool copyResults, uint32_t seed);
649 tcu::TestStatus iterate(void);
650 };
651
GraphicQueryTest(vkt::Context & context,bool copyResults,uint32_t seed)652 GraphicQueryTest::GraphicQueryTest(vkt::Context &context, bool copyResults, uint32_t seed)
653 : GraphicQueryTestBase(context, copyResults, seed)
654 {
655 }
656
iterate(void)657 tcu::TestStatus GraphicQueryTest::iterate(void)
658 {
659 const DeviceInterface &vkd = m_context.getDeviceInterface();
660 const VkDevice device = m_context.getDevice();
661 const VkQueue queue = m_context.getUniversalQueue();
662 const auto qfIndex = m_context.getUniversalQueueFamilyIndex();
663 const CmdPoolCreateInfo cmdPoolCreateInfo = qfIndex;
664 Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, &cmdPoolCreateInfo));
665 Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
666
667 initStateObjects();
668 setupCounters();
669
670 vk::Unique<VkQueryPool> queryPool(createQueryPool(0, 1));
671
672 {
673 const ProfilingLockGuard guard(vkd, device);
674
675 // reset query pool
676 {
677 Unique<VkCommandBuffer> resetCmdBuffer(
678 allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
679 const Unique<VkFence> fence(createFence(vkd, device));
680 const VkSubmitInfo submitInfo = {
681 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
682 DE_NULL, // pNext
683 0u, // waitSemaphoreCount
684 DE_NULL, // pWaitSemaphores
685 (const VkPipelineStageFlags *)DE_NULL, // pWaitDstStageMask
686 1u, // commandBufferCount
687 &resetCmdBuffer.get(), // pCommandBuffers
688 0u, // signalSemaphoreCount
689 DE_NULL, // pSignalSemaphores
690 };
691
692 beginCommandBuffer(vkd, *resetCmdBuffer);
693 vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool, 0u, 1u);
694 endCommandBuffer(vkd, *resetCmdBuffer);
695
696 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
697 VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), true, ~0ull));
698 }
699
700 // begin command buffer
701 beginCommandBuffer(vkd, *cmdBuffer, 0u);
702
703 initialTransitionColor2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_LAYOUT_GENERAL,
704 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
705 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
706
707 // begin render pass
708 VkClearValue renderPassClearValue;
709 deMemset(&renderPassClearValue, 0, sizeof(VkClearValue));
710
711 // perform query during triangle draw
712 vkd.cmdBeginQuery(*cmdBuffer, *queryPool, 0, 0u);
713
714 beginRenderPass(vkd, *cmdBuffer, *m_renderPass, *m_framebuffer, makeRect2D(0, 0, m_size, m_size), 1,
715 &renderPassClearValue);
716
717 // bind pipeline
718 vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);
719
720 // bind vertex buffer
721 VkBuffer vertexBuffer = m_vertexBuffer->object();
722 const VkDeviceSize vertexBufferOffset = 0;
723 vkd.cmdBindVertexBuffers(*cmdBuffer, 0, 1, &vertexBuffer, &vertexBufferOffset);
724
725 vkd.cmdDraw(*cmdBuffer, 3, 1, 0, 0);
726
727 endRenderPass(vkd, *cmdBuffer);
728
729 vkd.cmdEndQuery(*cmdBuffer, *queryPool, 0);
730
731 transition2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_ASPECT_COLOR_BIT,
732 VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
733 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
734 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
735
736 endCommandBuffer(vkd, *cmdBuffer);
737
738 // submit command buffer for each pass and wait for its completion
739 const auto requiredPassCount = getRequiredPassCount();
740 for (uint32_t passIndex = 0; passIndex < requiredPassCount; passIndex++)
741 {
742 const Unique<VkFence> fence(createFence(vkd, device));
743
744 VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo = {
745 VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR, NULL, passIndex};
746
747 const VkSubmitInfo submitInfo = {
748 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
749 &performanceQuerySubmitInfo, // pNext
750 0u, // waitSemaphoreCount
751 DE_NULL, // pWaitSemaphores
752 (const VkPipelineStageFlags *)DE_NULL, // pWaitDstStageMask
753 1u, // commandBufferCount
754 &cmdBuffer.get(), // pCommandBuffers
755 0u, // signalSemaphoreCount
756 DE_NULL, // pSignalSemaphores
757 };
758
759 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
760 VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), true, ~0ull));
761 }
762 }
763
764 VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
765
766 verifyQueryResults(qfIndex, queue, *queryPool);
767 return tcu::TestStatus::pass("Pass");
768 }
769
770 class GraphicMultiplePoolsTest : public GraphicQueryTestBase
771 {
772 public:
773 GraphicMultiplePoolsTest(vkt::Context &context, bool copyResults, uint32_t seed);
774 tcu::TestStatus iterate(void);
775 };
776
GraphicMultiplePoolsTest(vkt::Context & context,bool copyResults,uint32_t seed)777 GraphicMultiplePoolsTest::GraphicMultiplePoolsTest(vkt::Context &context, bool copyResults, uint32_t seed)
778 : GraphicQueryTestBase(context, copyResults, seed)
779 {
780 }
781
iterate(void)782 tcu::TestStatus GraphicMultiplePoolsTest::iterate(void)
783 {
784 const DeviceInterface &vkd = m_context.getDeviceInterface();
785 const VkDevice device = m_context.getDevice();
786 const VkQueue queue = m_context.getUniversalQueue();
787 const auto qfIndex = m_context.getUniversalQueueFamilyIndex();
788 const CmdPoolCreateInfo cmdPoolCreateInfo = qfIndex;
789 Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, &cmdPoolCreateInfo));
790 Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
791
792 initStateObjects();
793 setupCounters();
794
795 vk::Unique<VkQueryPool> queryPool1(createQueryPool(0, 2)), queryPool2(createQueryPool(1, 2));
796
797 {
798 const ProfilingLockGuard guard(vkd, device);
799
800 // reset query pools
801 {
802 Unique<VkCommandBuffer> resetCmdBuffer(
803 allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
804 const Unique<VkFence> fence(createFence(vkd, device));
805 const VkSubmitInfo submitInfo = {
806 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
807 DE_NULL, // pNext
808 0u, // waitSemaphoreCount
809 DE_NULL, // pWaitSemaphores
810 (const VkPipelineStageFlags *)DE_NULL, // pWaitDstStageMask
811 1u, // commandBufferCount
812 &resetCmdBuffer.get(), // pCommandBuffers
813 0u, // signalSemaphoreCount
814 DE_NULL, // pSignalSemaphores
815 };
816
817 beginCommandBuffer(vkd, *resetCmdBuffer);
818 vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool1, 0u, 1u);
819 vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool2, 0u, 1u);
820 endCommandBuffer(vkd, *resetCmdBuffer);
821
822 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
823 VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), true, ~0ull));
824 }
825
826 // begin command buffer
827 beginCommandBuffer(vkd, *cmdBuffer, 0u);
828
829 initialTransitionColor2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_LAYOUT_GENERAL,
830 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
831 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
832
833 // begin render pass
834 VkClearValue renderPassClearValue;
835 deMemset(&renderPassClearValue, 0, sizeof(VkClearValue));
836
837 VkBuffer vertexBuffer = m_vertexBuffer->object();
838 const VkDeviceSize vertexBufferOffset = 0;
839 const VkQueryPool queryPools[] = {*queryPool1, *queryPool2};
840
841 // perform two queries during triangle draw
842 for (uint32_t loop = 0; loop < DE_LENGTH_OF_ARRAY(queryPools); ++loop)
843 {
844 const VkQueryPool queryPool = queryPools[loop];
845 vkd.cmdBeginQuery(*cmdBuffer, queryPool, 0u, (VkQueryControlFlags)0u);
846 beginRenderPass(vkd, *cmdBuffer, *m_renderPass, *m_framebuffer, makeRect2D(0, 0, m_size, m_size), 1,
847 &renderPassClearValue);
848
849 vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);
850 vkd.cmdBindVertexBuffers(*cmdBuffer, 0, 1, &vertexBuffer, &vertexBufferOffset);
851 vkd.cmdDraw(*cmdBuffer, 3, 1, 0, 0);
852
853 endRenderPass(vkd, *cmdBuffer);
854 vkd.cmdEndQuery(*cmdBuffer, queryPool, 0u);
855 }
856
857 transition2DImage(vkd, *cmdBuffer, m_colorAttachmentImage->object(), VK_IMAGE_ASPECT_COLOR_BIT,
858 VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
859 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
860 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
861
862 endCommandBuffer(vkd, *cmdBuffer);
863
864 // submit command buffer for each pass and wait for its completion
865 const auto requiredPassCount = getRequiredPassCount();
866 for (uint32_t passIndex = 0; passIndex < requiredPassCount; passIndex++)
867 {
868 const Unique<VkFence> fence(createFence(vkd, device));
869
870 VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo = {
871 VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR, NULL, passIndex};
872
873 const VkSubmitInfo submitInfo = {
874 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
875 &performanceQuerySubmitInfo, // pNext
876 0u, // waitSemaphoreCount
877 DE_NULL, // pWaitSemaphores
878 (const VkPipelineStageFlags *)DE_NULL, // pWaitDstStageMask
879 1u, // commandBufferCount
880 &cmdBuffer.get(), // pCommandBuffers
881 0u, // signalSemaphoreCount
882 DE_NULL, // pSignalSemaphores
883 };
884
885 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
886 VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), true, ~0ull));
887 }
888 }
889
890 VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
891
892 verifyQueryResults(qfIndex, queue, *queryPool1);
893 verifyQueryResults(qfIndex, queue, *queryPool2);
894 return tcu::TestStatus::pass("Pass");
895 }
896
897 // Base class for all compute tests
898 class ComputeQueryTestBase : public QueryTestBase
899 {
900 public:
901 ComputeQueryTestBase(vkt::Context &context, bool copyResults, uint32_t seed);
902
903 protected:
904 void initStateObjects(void);
905
906 protected:
907 Move<VkPipeline> m_pipeline;
908 Move<VkPipelineLayout> m_pipelineLayout;
909 de::SharedPtr<Buffer> m_buffer;
910 Move<VkDescriptorPool> m_descriptorPool;
911 Move<VkDescriptorSet> m_descriptorSet;
912 VkDescriptorBufferInfo m_descriptorBufferInfo;
913 VkBufferMemoryBarrier m_computeFinishBarrier;
914 };
915
ComputeQueryTestBase(vkt::Context & context,bool copyResults,uint32_t seed)916 ComputeQueryTestBase::ComputeQueryTestBase(vkt::Context &context, bool copyResults, uint32_t seed)
917 : QueryTestBase(context, copyResults, seed)
918 {
919 }
920
initStateObjects(void)921 void ComputeQueryTestBase::initStateObjects(void)
922 {
923 const DeviceInterface &vkd = m_context.getDeviceInterface();
924 const VkDevice device = m_context.getDevice();
925 const VkDeviceSize bufferSize = 32 * sizeof(uint32_t);
926 const CmdPoolCreateInfo cmdPoolCreateInfo(m_context.getUniversalQueueFamilyIndex());
927 const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, &cmdPoolCreateInfo));
928 const Unique<VkCommandBuffer> cmdBuffer(
929 allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
930
931 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
932 DescriptorSetLayoutBuilder()
933 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
934 .build(vkd, device));
935
936 // create pipeline layout
937 {
938 const VkPipelineLayoutCreateInfo pipelineLayoutParams = {
939 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // sType
940 DE_NULL, // pNext
941 0u, // flags
942 1u, // setLayoutCount
943 &(*descriptorSetLayout), // pSetLayouts
944 0u, // pushConstantRangeCount
945 DE_NULL, // pPushConstantRanges
946 };
947 m_pipelineLayout = createPipelineLayout(vkd, device, &pipelineLayoutParams);
948 }
949
950 // create compute pipeline
951 {
952 const Unique<VkShaderModule> cs(
953 createShaderModule(vkd, device, m_context.getBinaryCollection().get("comp"), 0u));
954 const VkPipelineShaderStageCreateInfo pipelineShaderStageParams = {
955 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // sType
956 DE_NULL, // pNext
957 (VkPipelineShaderStageCreateFlags)0u, // flags
958 VK_SHADER_STAGE_COMPUTE_BIT, // stage
959 *cs, // module
960 "main", // pName
961 DE_NULL, // pSpecializationInfo
962 };
963 const VkComputePipelineCreateInfo pipelineCreateInfo = {
964 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // sType
965 DE_NULL, // pNext
966 (VkPipelineCreateFlags)0u, // flags
967 pipelineShaderStageParams, // stage
968 *m_pipelineLayout, // layout
969 DE_NULL, // basePipelineHandle
970 0, // basePipelineIndex
971 };
972 m_pipeline = createComputePipeline(vkd, device, DE_NULL, &pipelineCreateInfo);
973 }
974
975 m_buffer = Buffer::createAndAlloc(vkd, device, BufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
976 m_context.getDefaultAllocator(), MemoryRequirement::HostVisible);
977 m_descriptorPool = DescriptorPoolBuilder()
978 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
979 .build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
980 const VkDescriptorSetAllocateInfo allocateParams = {
981 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // sType
982 DE_NULL, // pNext
983 *m_descriptorPool, // descriptorPool
984 1u, // setLayoutCount
985 &(*descriptorSetLayout), // pSetLayouts
986 };
987
988 m_descriptorSet = allocateDescriptorSet(vkd, device, &allocateParams);
989 const VkDescriptorBufferInfo descriptorInfo = {
990 m_buffer->object(), // buffer
991 0ull, // offset
992 bufferSize, // range
993 };
994
995 DescriptorSetUpdateBuilder()
996 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
997 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
998 .update(vkd, device);
999
1000 // clear buffer
1001 const std::vector<uint8_t> data((size_t)bufferSize, 0u);
1002 const Allocation &allocation = m_buffer->getBoundMemory();
1003 void *allocationData = allocation.getHostPtr();
1004 invalidateAlloc(vkd, device, allocation);
1005 deMemcpy(allocationData, &data[0], (size_t)bufferSize);
1006
1007 const VkBufferMemoryBarrier barrier = {
1008 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // sType
1009 DE_NULL, // pNext
1010 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, // srcAccessMask
1011 VK_ACCESS_HOST_READ_BIT, // dstAccessMask
1012 VK_QUEUE_FAMILY_IGNORED, // srcQueueFamilyIndex
1013 VK_QUEUE_FAMILY_IGNORED, // destQueueFamilyIndex
1014 m_buffer->object(), // buffer
1015 0ull, // offset
1016 bufferSize, // size
1017 };
1018 m_computeFinishBarrier = barrier;
1019 }
1020
1021 class ComputeQueryTest : public ComputeQueryTestBase
1022 {
1023 public:
1024 ComputeQueryTest(vkt::Context &context, bool copyResults, uint32_t seed);
1025 tcu::TestStatus iterate(void);
1026 };
1027
ComputeQueryTest(vkt::Context & context,bool copyResults,uint32_t seed)1028 ComputeQueryTest::ComputeQueryTest(vkt::Context &context, bool copyResults, uint32_t seed)
1029 : ComputeQueryTestBase(context, copyResults, seed)
1030 {
1031 }
1032
iterate(void)1033 tcu::TestStatus ComputeQueryTest::iterate(void)
1034 {
1035 const DeviceInterface &vkd = m_context.getDeviceInterface();
1036 const VkDevice device = m_context.getDevice();
1037 const VkQueue queue = m_context.getUniversalQueue();
1038 const auto qfIndex = m_context.getUniversalQueueFamilyIndex();
1039 const CmdPoolCreateInfo cmdPoolCreateInfo(qfIndex);
1040 const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, &cmdPoolCreateInfo));
1041 const Unique<VkCommandBuffer> resetCmdBuffer(
1042 allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1043 const Unique<VkCommandBuffer> cmdBuffer(
1044 allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1045
1046 initStateObjects();
1047 setupCounters();
1048
1049 vk::Unique<VkQueryPool> queryPool(createQueryPool(0, 1));
1050
1051 {
1052 const ProfilingLockGuard guard(vkd, device);
1053
1054 beginCommandBuffer(vkd, *resetCmdBuffer);
1055 vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool, 0u, 1u);
1056 endCommandBuffer(vkd, *resetCmdBuffer);
1057
1058 beginCommandBuffer(vkd, *cmdBuffer, 0u);
1059 vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipeline);
1060 vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u,
1061 &(m_descriptorSet.get()), 0u, DE_NULL);
1062
1063 vkd.cmdBeginQuery(*cmdBuffer, *queryPool, 0u, (VkQueryControlFlags)0u);
1064 vkd.cmdDispatch(*cmdBuffer, 2, 2, 2);
1065 vkd.cmdEndQuery(*cmdBuffer, *queryPool, 0u);
1066
1067 vkd.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1068 (VkDependencyFlags)0u, 0u, (const VkMemoryBarrier *)DE_NULL, 1u, &m_computeFinishBarrier,
1069 0u, (const VkImageMemoryBarrier *)DE_NULL);
1070 endCommandBuffer(vkd, *cmdBuffer);
1071
1072 // submit reset of queries only once
1073 {
1074 const VkSubmitInfo submitInfo = {
1075 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
1076 DE_NULL, // pNext
1077 0u, // waitSemaphoreCount
1078 DE_NULL, // pWaitSemaphores
1079 (const VkPipelineStageFlags *)DE_NULL, // pWaitDstStageMask
1080 1u, // commandBufferCount
1081 &resetCmdBuffer.get(), // pCommandBuffers
1082 0u, // signalSemaphoreCount
1083 DE_NULL, // pSignalSemaphores
1084 };
1085
1086 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, DE_NULL));
1087 }
1088
1089 // submit command buffer for each pass and wait for its completion
1090 const auto requiredPassCount = getRequiredPassCount();
1091 for (uint32_t passIndex = 0; passIndex < requiredPassCount; passIndex++)
1092 {
1093 const Unique<VkFence> fence(createFence(vkd, device));
1094
1095 VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo = {
1096 VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR, NULL, passIndex};
1097
1098 const VkSubmitInfo submitInfo = {
1099 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
1100 &performanceQuerySubmitInfo, // pNext
1101 0u, // waitSemaphoreCount
1102 DE_NULL, // pWaitSemaphores
1103 (const VkPipelineStageFlags *)DE_NULL, // pWaitDstStageMask
1104 1u, // commandBufferCount
1105 &cmdBuffer.get(), // pCommandBuffers
1106 0u, // signalSemaphoreCount
1107 DE_NULL, // pSignalSemaphores
1108 };
1109
1110 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
1111 VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), true, ~0ull));
1112 }
1113 }
1114
1115 VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
1116
1117 verifyQueryResults(qfIndex, queue, *queryPool);
1118 return tcu::TestStatus::pass("Pass");
1119 }
1120
1121 class ComputeMultiplePoolsTest : public ComputeQueryTestBase
1122 {
1123 public:
1124 ComputeMultiplePoolsTest(vkt::Context &context, bool copyResults, uint32_t seed);
1125 tcu::TestStatus iterate(void);
1126 };
1127
ComputeMultiplePoolsTest(vkt::Context & context,bool copyResults,uint32_t seed)1128 ComputeMultiplePoolsTest::ComputeMultiplePoolsTest(vkt::Context &context, bool copyResults, uint32_t seed)
1129 : ComputeQueryTestBase(context, copyResults, seed)
1130 {
1131 }
1132
iterate(void)1133 tcu::TestStatus ComputeMultiplePoolsTest::iterate(void)
1134 {
1135 const DeviceInterface &vkd = m_context.getDeviceInterface();
1136 const VkDevice device = m_context.getDevice();
1137 const VkQueue queue = m_context.getUniversalQueue();
1138 const auto qfIndex = m_context.getUniversalQueueFamilyIndex();
1139 const CmdPoolCreateInfo cmdPoolCreateInfo(qfIndex);
1140 const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, &cmdPoolCreateInfo));
1141 const Unique<VkCommandBuffer> resetCmdBuffer(
1142 allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1143 const Unique<VkCommandBuffer> cmdBuffer(
1144 allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1145
1146 initStateObjects();
1147 setupCounters();
1148
1149 vk::Unique<VkQueryPool> queryPool1(createQueryPool(0, 2)), queryPool2(createQueryPool(1, 2));
1150
1151 {
1152 const ProfilingLockGuard guard(vkd, device);
1153
1154 const VkQueryPool queryPools[] = {*queryPool1, *queryPool2};
1155
1156 beginCommandBuffer(vkd, *resetCmdBuffer);
1157 vkd.cmdResetQueryPool(*resetCmdBuffer, queryPools[0], 0u, 1u);
1158 vkd.cmdResetQueryPool(*resetCmdBuffer, queryPools[1], 0u, 1u);
1159 endCommandBuffer(vkd, *resetCmdBuffer);
1160
1161 beginCommandBuffer(vkd, *cmdBuffer, 0u);
1162 vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipeline);
1163 vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u,
1164 &(m_descriptorSet.get()), 0u, DE_NULL);
1165
1166 // perform two queries
1167 for (uint32_t loop = 0; loop < DE_LENGTH_OF_ARRAY(queryPools); ++loop)
1168 {
1169 const VkQueryPool queryPool = queryPools[loop];
1170 vkd.cmdBeginQuery(*cmdBuffer, queryPool, 0u, (VkQueryControlFlags)0u);
1171 vkd.cmdDispatch(*cmdBuffer, 2, 2, 2);
1172 vkd.cmdEndQuery(*cmdBuffer, queryPool, 0u);
1173 }
1174
1175 vkd.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1176 (VkDependencyFlags)0u, 0u, (const VkMemoryBarrier *)DE_NULL, 1u, &m_computeFinishBarrier,
1177 0u, (const VkImageMemoryBarrier *)DE_NULL);
1178 endCommandBuffer(vkd, *cmdBuffer);
1179
1180 // submit reset of queries only once
1181 {
1182 const VkSubmitInfo submitInfo = {
1183 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
1184 DE_NULL, // pNext
1185 0u, // waitSemaphoreCount
1186 DE_NULL, // pWaitSemaphores
1187 (const VkPipelineStageFlags *)DE_NULL, // pWaitDstStageMask
1188 1u, // commandBufferCount
1189 &resetCmdBuffer.get(), // pCommandBuffers
1190 0u, // signalSemaphoreCount
1191 DE_NULL, // pSignalSemaphores
1192 };
1193
1194 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, DE_NULL));
1195 }
1196
1197 // submit command buffer for each pass and wait for its completion
1198 const auto requiredPassCount = getRequiredPassCount();
1199 for (uint32_t passIndex = 0; passIndex < requiredPassCount; passIndex++)
1200 {
1201 const Unique<VkFence> fence(createFence(vkd, device));
1202
1203 VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo = {
1204 VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR, NULL, passIndex};
1205
1206 const VkSubmitInfo submitInfo = {
1207 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
1208 &performanceQuerySubmitInfo, // pNext
1209 0u, // waitSemaphoreCount
1210 DE_NULL, // pWaitSemaphores
1211 (const VkPipelineStageFlags *)DE_NULL, // pWaitDstStageMask
1212 1u, // commandBufferCount
1213 &cmdBuffer.get(), // pCommandBuffers
1214 0u, // signalSemaphoreCount
1215 DE_NULL, // pSignalSemaphores
1216 };
1217
1218 VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
1219 VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), true, ~0ull));
1220 }
1221 }
1222
1223 VK_CHECK(vkd.resetCommandBuffer(*cmdBuffer, 0));
1224
1225 verifyQueryResults(qfIndex, queue, *queryPool1);
1226 verifyQueryResults(qfIndex, queue, *queryPool2);
1227 return tcu::TestStatus::pass("Pass");
1228 }
1229
1230 enum TestType
1231 {
1232 TT_ENUMERATE_AND_VALIDATE = 0,
1233 TT_QUERY,
1234 TT_MULTIPLE_POOLS
1235 };
1236
1237 class QueryPoolPerformanceTest : public TestCase
1238 {
1239 public:
QueryPoolPerformanceTest(tcu::TestContext & context,TestType testType,VkQueueFlagBits queueFlagBits,bool copyResults,uint32_t seed,const std::string & name)1240 QueryPoolPerformanceTest(tcu::TestContext &context, TestType testType, VkQueueFlagBits queueFlagBits,
1241 bool copyResults, uint32_t seed, const std::string &name)
1242 : TestCase(context, name)
1243 , m_testType(testType)
1244 , m_queueFlagBits(queueFlagBits)
1245 , m_copyResults(copyResults)
1246 , m_seed(seed)
1247 {
1248 }
1249
createInstance(vkt::Context & context) const1250 vkt::TestInstance *createInstance(vkt::Context &context) const override
1251 {
1252 if (m_testType == TT_ENUMERATE_AND_VALIDATE)
1253 return new EnumerateAndValidateTest(context, m_queueFlagBits);
1254
1255 if (m_queueFlagBits == VK_QUEUE_GRAPHICS_BIT)
1256 {
1257 if (m_testType == TT_QUERY)
1258 return new GraphicQueryTest(context, m_copyResults, m_seed);
1259 return new GraphicMultiplePoolsTest(context, m_copyResults, m_seed);
1260 }
1261
1262 // tests for VK_QUEUE_COMPUTE_BIT
1263 if (m_testType == TT_QUERY)
1264 return new ComputeQueryTest(context, m_copyResults, m_seed);
1265 return new ComputeMultiplePoolsTest(context, m_copyResults, m_seed);
1266 }
1267
initPrograms(SourceCollections & programCollection) const1268 void initPrograms(SourceCollections &programCollection) const override
1269 {
1270 // validation test do not need programs
1271 if (m_testType == TT_ENUMERATE_AND_VALIDATE)
1272 return;
1273
1274 if (m_queueFlagBits == VK_QUEUE_COMPUTE_BIT)
1275 {
1276 programCollection.glslSources.add("comp")
1277 << glu::ComputeSource("#version 430\n"
1278 "layout (local_size_x = 1) in;\n"
1279 "layout(binding = 0) writeonly buffer Output {\n"
1280 " uint values[];\n"
1281 "} sb_out;\n\n"
1282 "void main (void) {\n"
1283 " uint index = uint(gl_GlobalInvocationID.x);\n"
1284 " sb_out.values[index] += gl_GlobalInvocationID.y*2;\n"
1285 "}\n");
1286 return;
1287 }
1288
1289 programCollection.glslSources.add("frag")
1290 << glu::FragmentSource("#version 430\n"
1291 "layout(location = 0) out vec4 out_FragColor;\n"
1292 "void main()\n"
1293 "{\n"
1294 " out_FragColor = vec4(1.0, 0.0, 0.0, 1.0);\n"
1295 "}\n");
1296
1297 programCollection.glslSources.add("vert")
1298 << glu::VertexSource("#version 430\n"
1299 "layout(location = 0) in vec4 in_Position;\n"
1300 "out gl_PerVertex { vec4 gl_Position; float gl_PointSize; };\n"
1301 "void main() {\n"
1302 " gl_Position = in_Position;\n"
1303 " gl_PointSize = 1.0;\n"
1304 "}\n");
1305 }
1306
checkSupport(Context & context) const1307 void checkSupport(Context &context) const override
1308 {
1309 const auto &perfQueryFeatures = context.getPerformanceQueryFeatures();
1310
1311 if (!perfQueryFeatures.performanceCounterQueryPools)
1312 TCU_THROW(NotSupportedError, "performanceCounterQueryPools not supported");
1313
1314 if (m_testType == TT_MULTIPLE_POOLS && !perfQueryFeatures.performanceCounterMultipleQueryPools)
1315 TCU_THROW(NotSupportedError, "performanceCounterMultipleQueryPools not supported");
1316
1317 const auto &vki = context.getInstanceInterface();
1318 const auto physicalDevice = context.getPhysicalDevice();
1319 const auto qfIndex = context.getUniversalQueueFamilyIndex();
1320
1321 // Get the number of supported counters;
1322 uint32_t counterCount;
1323 VK_CHECK(vki.enumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, qfIndex,
1324 &counterCount, NULL, NULL));
1325
1326 if (!counterCount)
1327 TCU_THROW(QualityWarning, "There are no performance counters");
1328
1329 if (m_copyResults && !context.getPerformanceQueryProperties().allowCommandBufferQueryCopies)
1330 TCU_THROW(NotSupportedError,
1331 "VkPhysicalDevicePerformanceQueryPropertiesKHR::allowCommandBufferQueryCopies not supported");
1332 }
1333
1334 private:
1335 TestType m_testType;
1336 VkQueueFlagBits m_queueFlagBits;
1337 const bool m_copyResults;
1338 const uint32_t m_seed;
1339 };
1340
1341 } // namespace
1342
QueryPoolPerformanceTests(tcu::TestContext & testCtx)1343 QueryPoolPerformanceTests::QueryPoolPerformanceTests(tcu::TestContext &testCtx)
1344 : TestCaseGroup(testCtx, "performance_query")
1345 {
1346 }
1347
init(void)1348 void QueryPoolPerformanceTests::init(void)
1349 {
1350
1351 const struct
1352 {
1353 const bool copyResults;
1354 const std::string suffix;
1355 } copyCases[]{
1356 {false, ""},
1357 {true, "_copy"},
1358 };
1359
1360 uint32_t seed = 1692187611u;
1361 for (const auto ©Case : copyCases)
1362 {
1363 addChild(new QueryPoolPerformanceTest(m_testCtx, TT_ENUMERATE_AND_VALIDATE, VK_QUEUE_GRAPHICS_BIT,
1364 copyCase.copyResults, seed++,
1365 "enumerate_and_validate_graphic" + copyCase.suffix));
1366 addChild(new QueryPoolPerformanceTest(m_testCtx, TT_ENUMERATE_AND_VALIDATE, VK_QUEUE_COMPUTE_BIT,
1367 copyCase.copyResults, seed++,
1368 "enumerate_and_validate_compute" + copyCase.suffix));
1369 addChild(new QueryPoolPerformanceTest(m_testCtx, TT_QUERY, VK_QUEUE_GRAPHICS_BIT, copyCase.copyResults, seed++,
1370 "query_graphic" + copyCase.suffix));
1371 addChild(new QueryPoolPerformanceTest(m_testCtx, TT_QUERY, VK_QUEUE_COMPUTE_BIT, copyCase.copyResults, seed++,
1372 "query_compute" + copyCase.suffix));
1373 addChild(new QueryPoolPerformanceTest(m_testCtx, TT_MULTIPLE_POOLS, VK_QUEUE_GRAPHICS_BIT, copyCase.copyResults,
1374 seed++, "multiple_pools_graphic" + copyCase.suffix));
1375 addChild(new QueryPoolPerformanceTest(m_testCtx, TT_MULTIPLE_POOLS, VK_QUEUE_COMPUTE_BIT, copyCase.copyResults,
1376 seed++, "multiple_pools_compute" + copyCase.suffix));
1377 }
1378 }
1379
1380 } // namespace QueryPool
1381 } // namespace vkt
1382