xref: /aosp_15_r20/external/mesa3d/src/virtio/vulkan/vn_query_pool.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2019 Google LLC
3  * SPDX-License-Identifier: MIT
4  *
5  * based in part on anv and radv which are:
6  * Copyright © 2015 Intel Corporation
7  * Copyright © 2016 Red Hat.
8  * Copyright © 2016 Bas Nieuwenhuizen
9  */
10 
11 #include "vn_query_pool.h"
12 
13 #include "venus-protocol/vn_protocol_driver_query_pool.h"
14 
15 #include "vn_device.h"
16 #include "vn_feedback.h"
17 #include "vn_physical_device.h"
18 
19 /* query pool commands */
20 
21 VkResult
vn_CreateQueryPool(VkDevice device,const VkQueryPoolCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkQueryPool * pQueryPool)22 vn_CreateQueryPool(VkDevice device,
23                    const VkQueryPoolCreateInfo *pCreateInfo,
24                    const VkAllocationCallbacks *pAllocator,
25                    VkQueryPool *pQueryPool)
26 {
27    struct vn_device *dev = vn_device_from_handle(device);
28    const VkAllocationCallbacks *alloc =
29       pAllocator ? pAllocator : &dev->base.base.alloc;
30 
31    struct vn_query_pool *pool =
32       vk_zalloc(alloc, sizeof(*pool), VN_DEFAULT_ALIGN,
33                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
34    if (!pool)
35       return vn_error(dev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
36 
37    vn_object_base_init(&pool->base, VK_OBJECT_TYPE_QUERY_POOL, &dev->base);
38 
39    pool->allocator = *alloc;
40    pool->query_count = pCreateInfo->queryCount;
41 
42    simple_mtx_init(&pool->mutex, mtx_plain);
43 
44    switch (pCreateInfo->queryType) {
45    case VK_QUERY_TYPE_OCCLUSION:
46       /*
47        * Occlusion queries write one integer value - the number of samples
48        * passed.
49        */
50       pool->result_array_size = 1;
51       break;
52    case VK_QUERY_TYPE_PIPELINE_STATISTICS:
53       /*
54        * Pipeline statistics queries write one integer value for each bit that
55        * is enabled in the pipelineStatistics when the pool is created, and
56        * the statistics values are written in bit order starting from the
57        * least significant bit.
58        */
59       pool->result_array_size =
60          util_bitcount(pCreateInfo->pipelineStatistics);
61       break;
62    case VK_QUERY_TYPE_TIMESTAMP:
63       /*  Timestamp queries write one integer value. */
64       pool->result_array_size = 1;
65       break;
66    case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
67       /*
68        * Transform feedback queries write two integers; the first integer is
69        * the number of primitives successfully written to the corresponding
70        * transform feedback buffer and the second is the number of primitives
71        * output to the vertex stream, regardless of whether they were
72        * successfully captured or not.
73        */
74       pool->result_array_size = 2;
75       break;
76    case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
77       /*
78        * Primitives generated queries write one integer value; the number of
79        * primitives output to the vertex stream, regardless of whether
80        * transform feedback is active or not, or whether they were
81        * successfully captured by transform feedback or not. This is identical
82        * to the second integer of the transform feedback queries if transform
83        * feedback is active.
84        */
85       pool->result_array_size = 1;
86       break;
87    default:
88       unreachable("bad query type");
89       break;
90    }
91 
92    /* Venus has to handle overflow behavior with query feedback to keep
93     * consistency between vkCmdCopyQueryPoolResults and vkGetQueryPoolResults.
94     * The default query feedback behavior is to wrap on overflow. However, per
95     * spec:
96     *
97     * If an unsigned integer query’s value overflows the result type, the
98     * value may either wrap or saturate.
99     *
100     * We detect the renderer side implementation to align with the
101     * implementation specific behavior.
102     */
103    switch (dev->physical_device->renderer_driver_id) {
104    case VK_DRIVER_ID_ARM_PROPRIETARY:
105    case VK_DRIVER_ID_MESA_LLVMPIPE:
106    case VK_DRIVER_ID_MESA_TURNIP:
107       pool->saturate_on_overflow = true;
108       break;
109    default:
110       break;
111    };
112 
113    VkQueryPool pool_handle = vn_query_pool_to_handle(pool);
114    vn_async_vkCreateQueryPool(dev->primary_ring, device, pCreateInfo, NULL,
115                               &pool_handle);
116 
117    *pQueryPool = pool_handle;
118 
119    return VK_SUCCESS;
120 }
121 
122 void
vn_DestroyQueryPool(VkDevice device,VkQueryPool queryPool,const VkAllocationCallbacks * pAllocator)123 vn_DestroyQueryPool(VkDevice device,
124                     VkQueryPool queryPool,
125                     const VkAllocationCallbacks *pAllocator)
126 {
127    struct vn_device *dev = vn_device_from_handle(device);
128    struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
129    const VkAllocationCallbacks *alloc;
130 
131    if (!pool)
132       return;
133 
134    alloc = pAllocator ? pAllocator : &pool->allocator;
135 
136    if (pool->fb_buf)
137       vn_feedback_buffer_destroy(dev, pool->fb_buf, alloc);
138 
139    simple_mtx_destroy(&pool->mutex);
140 
141    vn_async_vkDestroyQueryPool(dev->primary_ring, device, queryPool, NULL);
142 
143    vn_object_base_fini(&pool->base);
144    vk_free(alloc, pool);
145 }
146 
147 void
vn_ResetQueryPool(VkDevice device,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount)148 vn_ResetQueryPool(VkDevice device,
149                   VkQueryPool queryPool,
150                   uint32_t firstQuery,
151                   uint32_t queryCount)
152 {
153    struct vn_device *dev = vn_device_from_handle(device);
154    struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
155 
156    vn_async_vkResetQueryPool(dev->primary_ring, device, queryPool, firstQuery,
157                              queryCount);
158    if (pool->fb_buf) {
159       /* Feedback results are always 64 bit and include availability bit
160        * (also 64 bit)
161        */
162       const uint32_t slot_size = (pool->result_array_size * 8) + 8;
163       const uint32_t offset = slot_size * firstQuery;
164       memset(pool->fb_buf->data + offset, 0, slot_size * queryCount);
165    }
166 }
167 
168 static VkResult
vn_get_query_pool_feedback(struct vn_query_pool * pool,uint32_t firstQuery,uint32_t queryCount,void * pData,VkDeviceSize stride,VkQueryResultFlags flags)169 vn_get_query_pool_feedback(struct vn_query_pool *pool,
170                            uint32_t firstQuery,
171                            uint32_t queryCount,
172                            void *pData,
173                            VkDeviceSize stride,
174                            VkQueryResultFlags flags)
175 {
176    VkResult result = VK_SUCCESS;
177    /* Feedback results are always 64 bit and include availability bit
178     * (also 64 bit)
179     */
180    const uint32_t slot_array_size = pool->result_array_size + 1;
181    uint64_t *src = pool->fb_buf->data;
182    src += slot_array_size * firstQuery;
183 
184    uint32_t dst_index = 0;
185    uint32_t src_index = 0;
186    if (flags & VK_QUERY_RESULT_64_BIT) {
187       uint64_t *dst = pData;
188       uint32_t index_stride = stride / sizeof(uint64_t);
189       for (uint32_t i = 0; i < queryCount; i++) {
190          /* Copy the result if its available */
191          const uint64_t avail = src[src_index + pool->result_array_size];
192          if (avail) {
193             memcpy(&dst[dst_index], &src[src_index],
194                    pool->result_array_size * sizeof(uint64_t));
195          } else {
196             result = VK_NOT_READY;
197             /* valid to return result of 0 if partial bit is set */
198             if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
199                memset(&dst[dst_index], 0,
200                       pool->result_array_size * sizeof(uint64_t));
201             }
202          }
203          /* Set the availability bit if requested */
204          if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
205             dst[dst_index + pool->result_array_size] = avail;
206 
207          dst_index += index_stride;
208          src_index += slot_array_size;
209       }
210    } else {
211       uint32_t *dst = pData;
212       uint32_t index_stride = stride / sizeof(uint32_t);
213       for (uint32_t i = 0; i < queryCount; i++) {
214          /* Copy the result if its available, converting down to uint32_t */
215          const uint32_t avail =
216             (uint32_t)src[src_index + pool->result_array_size];
217          if (avail) {
218             for (uint32_t j = 0; j < pool->result_array_size; j++) {
219                const uint64_t src_val = src[src_index + j];
220                dst[dst_index + j] =
221                   src_val > UINT32_MAX && pool->saturate_on_overflow
222                      ? UINT32_MAX
223                      : (uint32_t)src_val;
224             }
225          } else {
226             result = VK_NOT_READY;
227             /* valid to return result of 0 if partial bit is set */
228             if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
229                for (uint32_t j = 0; j < pool->result_array_size; j++)
230                   dst[dst_index + j] = 0;
231             }
232          }
233          /* Set the availability bit if requested */
234          if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
235             dst[dst_index + pool->result_array_size] = avail;
236 
237          dst_index += index_stride;
238          src_index += slot_array_size;
239       }
240    }
241    return result;
242 }
243 
244 static void
vn_query_feedback_wait_ready(struct vn_device * dev,struct vn_query_pool * pool,uint32_t first_query,uint32_t query_count)245 vn_query_feedback_wait_ready(struct vn_device *dev,
246                              struct vn_query_pool *pool,
247                              uint32_t first_query,
248                              uint32_t query_count)
249 {
250    VN_TRACE_FUNC();
251 
252    /* Feedback results are always 64 bit and include availability bit
253     * (also 64 bit)
254     */
255    const uint32_t step = pool->result_array_size + 1;
256    const uint64_t *avail = (uint64_t *)pool->fb_buf->data +
257                            first_query * step + pool->result_array_size;
258 
259    struct vn_relax_state relax_state =
260       vn_relax_init(dev->instance, VN_RELAX_REASON_QUERY);
261    for (uint32_t i = 0, j = 0; i < query_count; i++, j += step) {
262       while (!avail[j]) {
263          vn_relax(&relax_state);
264       }
265    }
266    vn_relax_fini(&relax_state);
267 }
268 
269 VkResult
vn_GetQueryPoolResults(VkDevice device,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount,size_t dataSize,void * pData,VkDeviceSize stride,VkQueryResultFlags flags)270 vn_GetQueryPoolResults(VkDevice device,
271                        VkQueryPool queryPool,
272                        uint32_t firstQuery,
273                        uint32_t queryCount,
274                        size_t dataSize,
275                        void *pData,
276                        VkDeviceSize stride,
277                        VkQueryResultFlags flags)
278 {
279    struct vn_device *dev = vn_device_from_handle(device);
280    struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
281    const VkAllocationCallbacks *alloc = &pool->allocator;
282    VkResult result;
283 
284    const size_t result_width = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
285    const size_t result_size = pool->result_array_size * result_width;
286    const bool result_always_written =
287       flags & (VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_PARTIAL_BIT);
288 
289    /* Get results from feedback buffers
290     * Not possible for VK_QUERY_RESULT_PARTIAL_BIT
291     */
292    if (pool->fb_buf) {
293       /* If wait bit is set, wait poll until query is ready */
294       if (flags & VK_QUERY_RESULT_WAIT_BIT)
295          vn_query_feedback_wait_ready(dev, pool, firstQuery, queryCount);
296 
297       result = vn_get_query_pool_feedback(pool, firstQuery, queryCount, pData,
298                                           stride, flags);
299       return vn_result(dev->instance, result);
300    }
301 
302    VkQueryResultFlags packed_flags = flags;
303    size_t packed_stride = result_size;
304    if (!result_always_written)
305       packed_flags |= VK_QUERY_RESULT_WITH_AVAILABILITY_BIT;
306    if (packed_flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
307       packed_stride += result_width;
308 
309    const size_t packed_size = packed_stride * queryCount;
310    void *packed_data;
311    if (result_always_written && packed_stride == stride) {
312       packed_data = pData;
313    } else {
314       packed_data = vk_alloc(alloc, packed_size, VN_DEFAULT_ALIGN,
315                              VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
316       if (!packed_data)
317          return vn_error(dev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
318    }
319    result = vn_call_vkGetQueryPoolResults(
320       dev->primary_ring, device, queryPool, firstQuery, queryCount,
321       packed_size, packed_data, packed_stride, packed_flags);
322 
323    if (packed_data == pData)
324       return vn_result(dev->instance, result);
325 
326    const size_t copy_size =
327       result_size +
328       (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT ? result_width : 0);
329    const void *src = packed_data;
330    void *dst = pData;
331    if (result == VK_SUCCESS) {
332       for (uint32_t i = 0; i < queryCount; i++) {
333          memcpy(dst, src, copy_size);
334          src += packed_stride;
335          dst += stride;
336       }
337    } else if (result == VK_NOT_READY) {
338       assert(!result_always_written &&
339              (packed_flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT));
340       if (flags & VK_QUERY_RESULT_64_BIT) {
341          for (uint32_t i = 0; i < queryCount; i++) {
342             const bool avail = *(const uint64_t *)(src + result_size);
343             if (avail)
344                memcpy(dst, src, copy_size);
345             else if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
346                *(uint64_t *)(dst + result_size) = 0;
347 
348             src += packed_stride;
349             dst += stride;
350          }
351       } else {
352          for (uint32_t i = 0; i < queryCount; i++) {
353             const bool avail = *(const uint32_t *)(src + result_size);
354             if (avail)
355                memcpy(dst, src, copy_size);
356             else if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
357                *(uint32_t *)(dst + result_size) = 0;
358 
359             src += packed_stride;
360             dst += stride;
361          }
362       }
363    }
364 
365    vk_free(alloc, packed_data);
366    return vn_result(dev->instance, result);
367 }
368 
369 VkResult
vn_query_feedback_buffer_init_once(struct vn_device * dev,struct vn_query_pool * pool)370 vn_query_feedback_buffer_init_once(struct vn_device *dev,
371                                    struct vn_query_pool *pool)
372 {
373    VkResult result = VK_SUCCESS;
374 
375    simple_mtx_lock(&pool->mutex);
376    if (pool->fb_buf)
377       goto out_unlock;
378 
379    const uint32_t fb_buf_size =
380       (pool->result_array_size + 1) * sizeof(uint64_t) * pool->query_count;
381    struct vn_feedback_buffer *fb_buf;
382    result =
383       vn_feedback_buffer_create(dev, fb_buf_size, &pool->allocator, &fb_buf);
384    if (result == VK_SUCCESS)
385       pool->fb_buf = fb_buf;
386 
387 out_unlock:
388    simple_mtx_unlock(&pool->mutex);
389    return result;
390 }
391