xref: /aosp_15_r20/external/mesa3d/src/imagination/vulkan/pvr_query.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * 'pvr_write_query_to_buffer()' and 'pvr_wait_for_available()' based on anv:
24  * Copyright © 2015 Intel Corporation
25  */
26 
27 #include <assert.h>
28 #include <stddef.h>
29 #include <stdint.h>
30 #include <string.h>
31 #include <vulkan/vulkan.h>
32 
33 #include "pvr_bo.h"
34 #include "pvr_csb.h"
35 #include "pvr_device_info.h"
36 #include "pvr_private.h"
37 #include "util/macros.h"
38 #include "util/os_time.h"
39 #include "vk_log.h"
40 #include "vk_object.h"
41 
pvr_CreateQueryPool(VkDevice _device,const VkQueryPoolCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkQueryPool * pQueryPool)42 VkResult pvr_CreateQueryPool(VkDevice _device,
43                              const VkQueryPoolCreateInfo *pCreateInfo,
44                              const VkAllocationCallbacks *pAllocator,
45                              VkQueryPool *pQueryPool)
46 {
47    PVR_FROM_HANDLE(pvr_device, device, _device);
48    const uint32_t core_count = device->pdevice->dev_runtime_info.core_count;
49    const uint32_t query_size = pCreateInfo->queryCount * sizeof(uint32_t);
50    struct pvr_query_pool *pool;
51    uint64_t alloc_size;
52    VkResult result;
53 
54    /* Vulkan 1.0 supports only occlusion, timestamp, and pipeline statistics
55     * query.
56     * We don't currently support timestamp queries.
57     * VkQueueFamilyProperties->timestampValidBits = 0.
58     * We don't currently support pipeline statistics queries.
59     * VkPhysicalDeviceFeatures->pipelineStatisticsQuery = false.
60     */
61    assert(!device->vk.enabled_features.pipelineStatisticsQuery);
62    assert(pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION);
63 
64    pool = vk_object_alloc(&device->vk,
65                           pAllocator,
66                           sizeof(*pool),
67                           VK_OBJECT_TYPE_QUERY_POOL);
68    if (!pool)
69       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
70 
71    pool->result_stride =
72       ALIGN_POT(query_size, PVRX(CR_ISP_OCLQRY_BASE_ADDR_ALIGNMENT));
73 
74    pool->query_count = pCreateInfo->queryCount;
75 
76    /* Each Phantom writes to a separate offset within the vis test heap so
77     * allocate space for the total number of Phantoms.
78     */
79    alloc_size = (uint64_t)pool->result_stride * core_count;
80 
81    result = pvr_bo_suballoc(&device->suballoc_vis_test,
82                             alloc_size,
83                             PVRX(CR_ISP_OCLQRY_BASE_ADDR_ALIGNMENT),
84                             false,
85                             &pool->result_buffer);
86    if (result != VK_SUCCESS)
87       goto err_free_pool;
88 
89    result = pvr_bo_suballoc(&device->suballoc_general,
90                             query_size,
91                             sizeof(uint32_t),
92                             false,
93                             &pool->availability_buffer);
94    if (result != VK_SUCCESS)
95       goto err_free_result_buffer;
96 
97    *pQueryPool = pvr_query_pool_to_handle(pool);
98 
99    return VK_SUCCESS;
100 
101 err_free_result_buffer:
102    pvr_bo_suballoc_free(pool->result_buffer);
103 
104 err_free_pool:
105    vk_object_free(&device->vk, pAllocator, pool);
106 
107    return result;
108 }
109 
pvr_DestroyQueryPool(VkDevice _device,VkQueryPool queryPool,const VkAllocationCallbacks * pAllocator)110 void pvr_DestroyQueryPool(VkDevice _device,
111                           VkQueryPool queryPool,
112                           const VkAllocationCallbacks *pAllocator)
113 {
114    PVR_FROM_HANDLE(pvr_query_pool, pool, queryPool);
115    PVR_FROM_HANDLE(pvr_device, device, _device);
116 
117    if (!pool)
118       return;
119 
120    pvr_bo_suballoc_free(pool->availability_buffer);
121    pvr_bo_suballoc_free(pool->result_buffer);
122 
123    vk_object_free(&device->vk, pAllocator, pool);
124 }
125 
126 /* Note: make sure to make the availability buffer's memory defined in
127  * accordance to how the device is expected to fill it. We don't make it defined
128  * here since that would cover up usage of this function while the underlying
129  * buffer region being accessed wasn't expect to have been written by the
130  * device.
131  */
pvr_query_is_available(const struct pvr_query_pool * pool,uint32_t query_idx)132 static inline bool pvr_query_is_available(const struct pvr_query_pool *pool,
133                                           uint32_t query_idx)
134 {
135    volatile uint32_t *available =
136       pvr_bo_suballoc_get_map_addr(pool->availability_buffer);
137    return !!available[query_idx];
138 }
139 
140 #define NSEC_PER_SEC UINT64_C(1000000000)
141 #define PVR_WAIT_TIMEOUT UINT64_C(5)
142 
143 /* Note: make sure to make the availability buffer's memory defined in
144  * accordance to how the device is expected to fill it. We don't make it defined
145  * here since that would cover up usage of this function while the underlying
146  * buffer region being accessed wasn't expect to have been written by the
147  * device.
148  */
149 /* TODO: Handle device loss scenario properly. */
pvr_wait_for_available(struct pvr_device * device,const struct pvr_query_pool * pool,uint32_t query_idx)150 static VkResult pvr_wait_for_available(struct pvr_device *device,
151                                        const struct pvr_query_pool *pool,
152                                        uint32_t query_idx)
153 {
154    const uint64_t abs_timeout =
155       os_time_get_absolute_timeout(PVR_WAIT_TIMEOUT * NSEC_PER_SEC);
156 
157    /* From the Vulkan 1.0 spec:
158     *
159     *    Commands that wait indefinitely for device execution (namely
160     *    vkDeviceWaitIdle, vkQueueWaitIdle, vkWaitForFences or
161     *    vkAcquireNextImageKHR with a maximum timeout, and
162     *    vkGetQueryPoolResults with the VK_QUERY_RESULT_WAIT_BIT bit set in
163     *    flags) must return in finite time even in the case of a lost device,
164     *    and return either VK_SUCCESS or VK_ERROR_DEVICE_LOST.
165     */
166    while (os_time_get_nano() < abs_timeout) {
167       if (pvr_query_is_available(pool, query_idx) != 0)
168          return VK_SUCCESS;
169    }
170 
171    return vk_error(device, VK_ERROR_DEVICE_LOST);
172 }
173 
174 #undef NSEC_PER_SEC
175 #undef PVR_WAIT_TIMEOUT
176 
pvr_write_query_to_buffer(uint8_t * buffer,VkQueryResultFlags flags,uint32_t idx,uint64_t value)177 static inline void pvr_write_query_to_buffer(uint8_t *buffer,
178                                              VkQueryResultFlags flags,
179                                              uint32_t idx,
180                                              uint64_t value)
181 {
182    if (flags & VK_QUERY_RESULT_64_BIT) {
183       uint64_t *query_data = (uint64_t *)buffer;
184       query_data[idx] = value;
185    } else {
186       uint32_t *query_data = (uint32_t *)buffer;
187       query_data[idx] = value;
188    }
189 }
190 
pvr_GetQueryPoolResults(VkDevice _device,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount,size_t dataSize,void * pData,VkDeviceSize stride,VkQueryResultFlags flags)191 VkResult pvr_GetQueryPoolResults(VkDevice _device,
192                                  VkQueryPool queryPool,
193                                  uint32_t firstQuery,
194                                  uint32_t queryCount,
195                                  size_t dataSize,
196                                  void *pData,
197                                  VkDeviceSize stride,
198                                  VkQueryResultFlags flags)
199 {
200    PVR_FROM_HANDLE(pvr_query_pool, pool, queryPool);
201    PVR_FROM_HANDLE(pvr_device, device, _device);
202    VG(volatile uint32_t *available =
203          pvr_bo_suballoc_get_map_addr(pool->availability_buffer));
204    volatile uint32_t *query_results =
205       pvr_bo_suballoc_get_map_addr(pool->result_buffer);
206    const uint32_t core_count = device->pdevice->dev_runtime_info.core_count;
207    uint8_t *data = (uint8_t *)pData;
208    VkResult result = VK_SUCCESS;
209 
210    /* TODO: Instead of making the memory defined here for valgrind, to better
211     * catch out of bounds access and other memory errors we should move them
212     * where where the query buffers are changed by the driver or device (e.g.
213     * "vkCmdResetQueryPool()", "vkGetQueryPoolResults()", etc.).
214     */
215 
216    VG(VALGRIND_MAKE_MEM_DEFINED(&available[firstQuery],
217                                 queryCount * sizeof(uint32_t)));
218 
219    for (uint32_t i = 0; i < core_count; i++) {
220       VG(VALGRIND_MAKE_MEM_DEFINED(
221          &query_results[firstQuery + i * pool->result_stride],
222          queryCount * sizeof(uint32_t)));
223    }
224 
225    for (uint32_t i = 0; i < queryCount; i++) {
226       bool is_available = pvr_query_is_available(pool, firstQuery + i);
227       uint64_t count = 0;
228 
229       if (flags & VK_QUERY_RESULT_WAIT_BIT && !is_available) {
230          result = pvr_wait_for_available(device, pool, firstQuery + i);
231          if (result != VK_SUCCESS)
232             return result;
233 
234          is_available = true;
235       }
236 
237       for (uint32_t j = 0; j < core_count; j++)
238          count += query_results[pool->result_stride * j + firstQuery + i];
239 
240       if (is_available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
241          pvr_write_query_to_buffer(data, flags, 0, count);
242       else
243          result = VK_NOT_READY;
244 
245       if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
246          pvr_write_query_to_buffer(data, flags, 1, is_available);
247 
248       data += stride;
249    }
250 
251    VG(VALGRIND_MAKE_MEM_UNDEFINED(&available[firstQuery],
252                                   queryCount * sizeof(uint32_t)));
253 
254    for (uint32_t i = 0; i < core_count; i++) {
255       VG(VALGRIND_MAKE_MEM_UNDEFINED(
256          &query_results[firstQuery + i * pool->result_stride],
257          queryCount * sizeof(uint32_t)));
258    }
259 
260    return result;
261 }
262 
pvr_CmdResetQueryPool(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount)263 void pvr_CmdResetQueryPool(VkCommandBuffer commandBuffer,
264                            VkQueryPool queryPool,
265                            uint32_t firstQuery,
266                            uint32_t queryCount)
267 {
268    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
269    struct pvr_query_info query_info;
270 
271    PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
272 
273    query_info.type = PVR_QUERY_TYPE_RESET_QUERY_POOL;
274 
275    query_info.reset_query_pool.query_pool = queryPool;
276    query_info.reset_query_pool.first_query = firstQuery;
277    query_info.reset_query_pool.query_count = queryCount;
278 
279    pvr_add_query_program(cmd_buffer, &query_info);
280 }
281 
pvr_ResetQueryPool(VkDevice _device,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount)282 void pvr_ResetQueryPool(VkDevice _device,
283                         VkQueryPool queryPool,
284                         uint32_t firstQuery,
285                         uint32_t queryCount)
286 {
287    PVR_FROM_HANDLE(pvr_query_pool, pool, queryPool);
288    uint32_t *availability =
289       pvr_bo_suballoc_get_map_addr(pool->availability_buffer);
290 
291    memset(availability + firstQuery, 0, sizeof(uint32_t) * queryCount);
292 }
293 
pvr_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize stride,VkQueryResultFlags flags)294 void pvr_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
295                                  VkQueryPool queryPool,
296                                  uint32_t firstQuery,
297                                  uint32_t queryCount,
298                                  VkBuffer dstBuffer,
299                                  VkDeviceSize dstOffset,
300                                  VkDeviceSize stride,
301                                  VkQueryResultFlags flags)
302 {
303    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
304    struct pvr_query_info query_info;
305    VkResult result;
306 
307    PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
308 
309    query_info.type = PVR_QUERY_TYPE_COPY_QUERY_RESULTS;
310 
311    query_info.copy_query_results.query_pool = queryPool;
312    query_info.copy_query_results.first_query = firstQuery;
313    query_info.copy_query_results.query_count = queryCount;
314    query_info.copy_query_results.dst_buffer = dstBuffer;
315    query_info.copy_query_results.dst_offset = dstOffset;
316    query_info.copy_query_results.stride = stride;
317    query_info.copy_query_results.flags = flags;
318 
319    result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_EVENT);
320    if (result != VK_SUCCESS)
321       return;
322 
323    /* The Vulkan 1.3.231 spec says:
324     *
325     *    "vkCmdCopyQueryPoolResults is considered to be a transfer operation,
326     *    and its writes to buffer memory must be synchronized using
327     *    VK_PIPELINE_STAGE_TRANSFER_BIT and VK_ACCESS_TRANSFER_WRITE_BIT before
328     *    using the results."
329     *
330     */
331    /* We record barrier event sub commands to sync the compute job used for the
332     * copy query results program with transfer jobs to prevent an overlapping
333     * transfer job with the compute job.
334     */
335 
336    cmd_buffer->state.current_sub_cmd->event = (struct pvr_sub_cmd_event){
337       .type = PVR_EVENT_TYPE_BARRIER,
338       .barrier = {
339          .wait_for_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
340          .wait_at_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT,
341       },
342    };
343 
344    result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
345    if (result != VK_SUCCESS)
346       return;
347 
348    pvr_add_query_program(cmd_buffer, &query_info);
349 
350    result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_EVENT);
351    if (result != VK_SUCCESS)
352       return;
353 
354    cmd_buffer->state.current_sub_cmd->event = (struct pvr_sub_cmd_event){
355       .type = PVR_EVENT_TYPE_BARRIER,
356       .barrier = {
357          .wait_for_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT,
358          .wait_at_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
359       },
360    };
361 }
362 
pvr_CmdBeginQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query,VkQueryControlFlags flags)363 void pvr_CmdBeginQuery(VkCommandBuffer commandBuffer,
364                        VkQueryPool queryPool,
365                        uint32_t query,
366                        VkQueryControlFlags flags)
367 {
368    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
369    struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
370    PVR_FROM_HANDLE(pvr_query_pool, pool, queryPool);
371 
372    PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
373 
374    /* Occlusion queries can't be nested. */
375    assert(!state->vis_test_enabled);
376 
377    if (state->current_sub_cmd) {
378       assert(state->current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS);
379 
380       if (!state->current_sub_cmd->gfx.query_pool) {
381          state->current_sub_cmd->gfx.query_pool = pool;
382       } else if (state->current_sub_cmd->gfx.query_pool != pool) {
383          VkResult result;
384 
385          /* Kick render. */
386          state->current_sub_cmd->gfx.barrier_store = true;
387 
388          result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
389          if (result != VK_SUCCESS)
390             return;
391 
392          result =
393             pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS);
394          if (result != VK_SUCCESS)
395             return;
396 
397          /* Use existing render setup, but load color attachments from HW
398           * BGOBJ.
399           */
400          state->current_sub_cmd->gfx.barrier_load = true;
401          state->current_sub_cmd->gfx.barrier_store = false;
402          state->current_sub_cmd->gfx.query_pool = pool;
403       }
404    }
405 
406    state->query_pool = pool;
407    state->vis_test_enabled = true;
408    state->vis_reg = query;
409    state->dirty.vis_test = true;
410 
411    /* Add the index to the list for this render. */
412    util_dynarray_append(&state->query_indices, __typeof__(query), query);
413 }
414 
pvr_CmdEndQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query)415 void pvr_CmdEndQuery(VkCommandBuffer commandBuffer,
416                      VkQueryPool queryPool,
417                      uint32_t query)
418 {
419    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
420    struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
421 
422    PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
423 
424    state->vis_test_enabled = false;
425    state->dirty.vis_test = true;
426 }
427