1 /*
2 * Copyright 2019 Google LLC
3 * SPDX-License-Identifier: MIT
4 *
5 * based in part on anv and radv which are:
6 * Copyright © 2015 Intel Corporation
7 * Copyright © 2016 Red Hat.
8 * Copyright © 2016 Bas Nieuwenhuizen
9 */
10
11 #include "vn_query_pool.h"
12
13 #include "venus-protocol/vn_protocol_driver_query_pool.h"
14
15 #include "vn_device.h"
16 #include "vn_feedback.h"
17 #include "vn_physical_device.h"
18
19 /* query pool commands */
20
21 VkResult
vn_CreateQueryPool(VkDevice device,const VkQueryPoolCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkQueryPool * pQueryPool)22 vn_CreateQueryPool(VkDevice device,
23 const VkQueryPoolCreateInfo *pCreateInfo,
24 const VkAllocationCallbacks *pAllocator,
25 VkQueryPool *pQueryPool)
26 {
27 struct vn_device *dev = vn_device_from_handle(device);
28 const VkAllocationCallbacks *alloc =
29 pAllocator ? pAllocator : &dev->base.base.alloc;
30
31 struct vn_query_pool *pool =
32 vk_zalloc(alloc, sizeof(*pool), VN_DEFAULT_ALIGN,
33 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
34 if (!pool)
35 return vn_error(dev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
36
37 vn_object_base_init(&pool->base, VK_OBJECT_TYPE_QUERY_POOL, &dev->base);
38
39 pool->allocator = *alloc;
40 pool->query_count = pCreateInfo->queryCount;
41
42 simple_mtx_init(&pool->mutex, mtx_plain);
43
44 switch (pCreateInfo->queryType) {
45 case VK_QUERY_TYPE_OCCLUSION:
46 /*
47 * Occlusion queries write one integer value - the number of samples
48 * passed.
49 */
50 pool->result_array_size = 1;
51 break;
52 case VK_QUERY_TYPE_PIPELINE_STATISTICS:
53 /*
54 * Pipeline statistics queries write one integer value for each bit that
55 * is enabled in the pipelineStatistics when the pool is created, and
56 * the statistics values are written in bit order starting from the
57 * least significant bit.
58 */
59 pool->result_array_size =
60 util_bitcount(pCreateInfo->pipelineStatistics);
61 break;
62 case VK_QUERY_TYPE_TIMESTAMP:
63 /* Timestamp queries write one integer value. */
64 pool->result_array_size = 1;
65 break;
66 case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
67 /*
68 * Transform feedback queries write two integers; the first integer is
69 * the number of primitives successfully written to the corresponding
70 * transform feedback buffer and the second is the number of primitives
71 * output to the vertex stream, regardless of whether they were
72 * successfully captured or not.
73 */
74 pool->result_array_size = 2;
75 break;
76 case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
77 /*
78 * Primitives generated queries write one integer value; the number of
79 * primitives output to the vertex stream, regardless of whether
80 * transform feedback is active or not, or whether they were
81 * successfully captured by transform feedback or not. This is identical
82 * to the second integer of the transform feedback queries if transform
83 * feedback is active.
84 */
85 pool->result_array_size = 1;
86 break;
87 default:
88 unreachable("bad query type");
89 break;
90 }
91
92 /* Venus has to handle overflow behavior with query feedback to keep
93 * consistency between vkCmdCopyQueryPoolResults and vkGetQueryPoolResults.
94 * The default query feedback behavior is to wrap on overflow. However, per
95 * spec:
96 *
97 * If an unsigned integer query’s value overflows the result type, the
98 * value may either wrap or saturate.
99 *
100 * We detect the renderer side implementation to align with the
101 * implementation specific behavior.
102 */
103 switch (dev->physical_device->renderer_driver_id) {
104 case VK_DRIVER_ID_ARM_PROPRIETARY:
105 case VK_DRIVER_ID_MESA_LLVMPIPE:
106 case VK_DRIVER_ID_MESA_TURNIP:
107 pool->saturate_on_overflow = true;
108 break;
109 default:
110 break;
111 };
112
113 VkQueryPool pool_handle = vn_query_pool_to_handle(pool);
114 vn_async_vkCreateQueryPool(dev->primary_ring, device, pCreateInfo, NULL,
115 &pool_handle);
116
117 *pQueryPool = pool_handle;
118
119 return VK_SUCCESS;
120 }
121
122 void
vn_DestroyQueryPool(VkDevice device,VkQueryPool queryPool,const VkAllocationCallbacks * pAllocator)123 vn_DestroyQueryPool(VkDevice device,
124 VkQueryPool queryPool,
125 const VkAllocationCallbacks *pAllocator)
126 {
127 struct vn_device *dev = vn_device_from_handle(device);
128 struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
129 const VkAllocationCallbacks *alloc;
130
131 if (!pool)
132 return;
133
134 alloc = pAllocator ? pAllocator : &pool->allocator;
135
136 if (pool->fb_buf)
137 vn_feedback_buffer_destroy(dev, pool->fb_buf, alloc);
138
139 simple_mtx_destroy(&pool->mutex);
140
141 vn_async_vkDestroyQueryPool(dev->primary_ring, device, queryPool, NULL);
142
143 vn_object_base_fini(&pool->base);
144 vk_free(alloc, pool);
145 }
146
147 void
vn_ResetQueryPool(VkDevice device,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount)148 vn_ResetQueryPool(VkDevice device,
149 VkQueryPool queryPool,
150 uint32_t firstQuery,
151 uint32_t queryCount)
152 {
153 struct vn_device *dev = vn_device_from_handle(device);
154 struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
155
156 vn_async_vkResetQueryPool(dev->primary_ring, device, queryPool, firstQuery,
157 queryCount);
158 if (pool->fb_buf) {
159 /* Feedback results are always 64 bit and include availability bit
160 * (also 64 bit)
161 */
162 const uint32_t slot_size = (pool->result_array_size * 8) + 8;
163 const uint32_t offset = slot_size * firstQuery;
164 memset(pool->fb_buf->data + offset, 0, slot_size * queryCount);
165 }
166 }
167
168 static VkResult
vn_get_query_pool_feedback(struct vn_query_pool * pool,uint32_t firstQuery,uint32_t queryCount,void * pData,VkDeviceSize stride,VkQueryResultFlags flags)169 vn_get_query_pool_feedback(struct vn_query_pool *pool,
170 uint32_t firstQuery,
171 uint32_t queryCount,
172 void *pData,
173 VkDeviceSize stride,
174 VkQueryResultFlags flags)
175 {
176 VkResult result = VK_SUCCESS;
177 /* Feedback results are always 64 bit and include availability bit
178 * (also 64 bit)
179 */
180 const uint32_t slot_array_size = pool->result_array_size + 1;
181 uint64_t *src = pool->fb_buf->data;
182 src += slot_array_size * firstQuery;
183
184 uint32_t dst_index = 0;
185 uint32_t src_index = 0;
186 if (flags & VK_QUERY_RESULT_64_BIT) {
187 uint64_t *dst = pData;
188 uint32_t index_stride = stride / sizeof(uint64_t);
189 for (uint32_t i = 0; i < queryCount; i++) {
190 /* Copy the result if its available */
191 const uint64_t avail = src[src_index + pool->result_array_size];
192 if (avail) {
193 memcpy(&dst[dst_index], &src[src_index],
194 pool->result_array_size * sizeof(uint64_t));
195 } else {
196 result = VK_NOT_READY;
197 /* valid to return result of 0 if partial bit is set */
198 if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
199 memset(&dst[dst_index], 0,
200 pool->result_array_size * sizeof(uint64_t));
201 }
202 }
203 /* Set the availability bit if requested */
204 if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
205 dst[dst_index + pool->result_array_size] = avail;
206
207 dst_index += index_stride;
208 src_index += slot_array_size;
209 }
210 } else {
211 uint32_t *dst = pData;
212 uint32_t index_stride = stride / sizeof(uint32_t);
213 for (uint32_t i = 0; i < queryCount; i++) {
214 /* Copy the result if its available, converting down to uint32_t */
215 const uint32_t avail =
216 (uint32_t)src[src_index + pool->result_array_size];
217 if (avail) {
218 for (uint32_t j = 0; j < pool->result_array_size; j++) {
219 const uint64_t src_val = src[src_index + j];
220 dst[dst_index + j] =
221 src_val > UINT32_MAX && pool->saturate_on_overflow
222 ? UINT32_MAX
223 : (uint32_t)src_val;
224 }
225 } else {
226 result = VK_NOT_READY;
227 /* valid to return result of 0 if partial bit is set */
228 if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
229 for (uint32_t j = 0; j < pool->result_array_size; j++)
230 dst[dst_index + j] = 0;
231 }
232 }
233 /* Set the availability bit if requested */
234 if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
235 dst[dst_index + pool->result_array_size] = avail;
236
237 dst_index += index_stride;
238 src_index += slot_array_size;
239 }
240 }
241 return result;
242 }
243
244 static void
vn_query_feedback_wait_ready(struct vn_device * dev,struct vn_query_pool * pool,uint32_t first_query,uint32_t query_count)245 vn_query_feedback_wait_ready(struct vn_device *dev,
246 struct vn_query_pool *pool,
247 uint32_t first_query,
248 uint32_t query_count)
249 {
250 VN_TRACE_FUNC();
251
252 /* Feedback results are always 64 bit and include availability bit
253 * (also 64 bit)
254 */
255 const uint32_t step = pool->result_array_size + 1;
256 const uint64_t *avail = (uint64_t *)pool->fb_buf->data +
257 first_query * step + pool->result_array_size;
258
259 struct vn_relax_state relax_state =
260 vn_relax_init(dev->instance, VN_RELAX_REASON_QUERY);
261 for (uint32_t i = 0, j = 0; i < query_count; i++, j += step) {
262 while (!avail[j]) {
263 vn_relax(&relax_state);
264 }
265 }
266 vn_relax_fini(&relax_state);
267 }
268
269 VkResult
vn_GetQueryPoolResults(VkDevice device,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount,size_t dataSize,void * pData,VkDeviceSize stride,VkQueryResultFlags flags)270 vn_GetQueryPoolResults(VkDevice device,
271 VkQueryPool queryPool,
272 uint32_t firstQuery,
273 uint32_t queryCount,
274 size_t dataSize,
275 void *pData,
276 VkDeviceSize stride,
277 VkQueryResultFlags flags)
278 {
279 struct vn_device *dev = vn_device_from_handle(device);
280 struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
281 const VkAllocationCallbacks *alloc = &pool->allocator;
282 VkResult result;
283
284 const size_t result_width = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
285 const size_t result_size = pool->result_array_size * result_width;
286 const bool result_always_written =
287 flags & (VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_PARTIAL_BIT);
288
289 /* Get results from feedback buffers
290 * Not possible for VK_QUERY_RESULT_PARTIAL_BIT
291 */
292 if (pool->fb_buf) {
293 /* If wait bit is set, wait poll until query is ready */
294 if (flags & VK_QUERY_RESULT_WAIT_BIT)
295 vn_query_feedback_wait_ready(dev, pool, firstQuery, queryCount);
296
297 result = vn_get_query_pool_feedback(pool, firstQuery, queryCount, pData,
298 stride, flags);
299 return vn_result(dev->instance, result);
300 }
301
302 VkQueryResultFlags packed_flags = flags;
303 size_t packed_stride = result_size;
304 if (!result_always_written)
305 packed_flags |= VK_QUERY_RESULT_WITH_AVAILABILITY_BIT;
306 if (packed_flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
307 packed_stride += result_width;
308
309 const size_t packed_size = packed_stride * queryCount;
310 void *packed_data;
311 if (result_always_written && packed_stride == stride) {
312 packed_data = pData;
313 } else {
314 packed_data = vk_alloc(alloc, packed_size, VN_DEFAULT_ALIGN,
315 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
316 if (!packed_data)
317 return vn_error(dev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
318 }
319 result = vn_call_vkGetQueryPoolResults(
320 dev->primary_ring, device, queryPool, firstQuery, queryCount,
321 packed_size, packed_data, packed_stride, packed_flags);
322
323 if (packed_data == pData)
324 return vn_result(dev->instance, result);
325
326 const size_t copy_size =
327 result_size +
328 (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT ? result_width : 0);
329 const void *src = packed_data;
330 void *dst = pData;
331 if (result == VK_SUCCESS) {
332 for (uint32_t i = 0; i < queryCount; i++) {
333 memcpy(dst, src, copy_size);
334 src += packed_stride;
335 dst += stride;
336 }
337 } else if (result == VK_NOT_READY) {
338 assert(!result_always_written &&
339 (packed_flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT));
340 if (flags & VK_QUERY_RESULT_64_BIT) {
341 for (uint32_t i = 0; i < queryCount; i++) {
342 const bool avail = *(const uint64_t *)(src + result_size);
343 if (avail)
344 memcpy(dst, src, copy_size);
345 else if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
346 *(uint64_t *)(dst + result_size) = 0;
347
348 src += packed_stride;
349 dst += stride;
350 }
351 } else {
352 for (uint32_t i = 0; i < queryCount; i++) {
353 const bool avail = *(const uint32_t *)(src + result_size);
354 if (avail)
355 memcpy(dst, src, copy_size);
356 else if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
357 *(uint32_t *)(dst + result_size) = 0;
358
359 src += packed_stride;
360 dst += stride;
361 }
362 }
363 }
364
365 vk_free(alloc, packed_data);
366 return vn_result(dev->instance, result);
367 }
368
369 VkResult
vn_query_feedback_buffer_init_once(struct vn_device * dev,struct vn_query_pool * pool)370 vn_query_feedback_buffer_init_once(struct vn_device *dev,
371 struct vn_query_pool *pool)
372 {
373 VkResult result = VK_SUCCESS;
374
375 simple_mtx_lock(&pool->mutex);
376 if (pool->fb_buf)
377 goto out_unlock;
378
379 const uint32_t fb_buf_size =
380 (pool->result_array_size + 1) * sizeof(uint64_t) * pool->query_count;
381 struct vn_feedback_buffer *fb_buf;
382 result =
383 vn_feedback_buffer_create(dev, fb_buf_size, &pool->allocator, &fb_buf);
384 if (result == VK_SUCCESS)
385 pool->fb_buf = fb_buf;
386
387 out_unlock:
388 simple_mtx_unlock(&pool->mutex);
389 return result;
390 }
391