1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * 'pvr_write_query_to_buffer()' and 'pvr_wait_for_available()' based on anv:
24 * Copyright © 2015 Intel Corporation
25 */
26
27 #include <assert.h>
28 #include <stddef.h>
29 #include <stdint.h>
30 #include <string.h>
31 #include <vulkan/vulkan.h>
32
33 #include "pvr_bo.h"
34 #include "pvr_csb.h"
35 #include "pvr_device_info.h"
36 #include "pvr_private.h"
37 #include "util/macros.h"
38 #include "util/os_time.h"
39 #include "vk_log.h"
40 #include "vk_object.h"
41
pvr_CreateQueryPool(VkDevice _device,const VkQueryPoolCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkQueryPool * pQueryPool)42 VkResult pvr_CreateQueryPool(VkDevice _device,
43 const VkQueryPoolCreateInfo *pCreateInfo,
44 const VkAllocationCallbacks *pAllocator,
45 VkQueryPool *pQueryPool)
46 {
47 PVR_FROM_HANDLE(pvr_device, device, _device);
48 const uint32_t core_count = device->pdevice->dev_runtime_info.core_count;
49 const uint32_t query_size = pCreateInfo->queryCount * sizeof(uint32_t);
50 struct pvr_query_pool *pool;
51 uint64_t alloc_size;
52 VkResult result;
53
54 /* Vulkan 1.0 supports only occlusion, timestamp, and pipeline statistics
55 * query.
56 * We don't currently support timestamp queries.
57 * VkQueueFamilyProperties->timestampValidBits = 0.
58 * We don't currently support pipeline statistics queries.
59 * VkPhysicalDeviceFeatures->pipelineStatisticsQuery = false.
60 */
61 assert(!device->vk.enabled_features.pipelineStatisticsQuery);
62 assert(pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION);
63
64 pool = vk_object_alloc(&device->vk,
65 pAllocator,
66 sizeof(*pool),
67 VK_OBJECT_TYPE_QUERY_POOL);
68 if (!pool)
69 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
70
71 pool->result_stride =
72 ALIGN_POT(query_size, PVRX(CR_ISP_OCLQRY_BASE_ADDR_ALIGNMENT));
73
74 pool->query_count = pCreateInfo->queryCount;
75
76 /* Each Phantom writes to a separate offset within the vis test heap so
77 * allocate space for the total number of Phantoms.
78 */
79 alloc_size = (uint64_t)pool->result_stride * core_count;
80
81 result = pvr_bo_suballoc(&device->suballoc_vis_test,
82 alloc_size,
83 PVRX(CR_ISP_OCLQRY_BASE_ADDR_ALIGNMENT),
84 false,
85 &pool->result_buffer);
86 if (result != VK_SUCCESS)
87 goto err_free_pool;
88
89 result = pvr_bo_suballoc(&device->suballoc_general,
90 query_size,
91 sizeof(uint32_t),
92 false,
93 &pool->availability_buffer);
94 if (result != VK_SUCCESS)
95 goto err_free_result_buffer;
96
97 *pQueryPool = pvr_query_pool_to_handle(pool);
98
99 return VK_SUCCESS;
100
101 err_free_result_buffer:
102 pvr_bo_suballoc_free(pool->result_buffer);
103
104 err_free_pool:
105 vk_object_free(&device->vk, pAllocator, pool);
106
107 return result;
108 }
109
pvr_DestroyQueryPool(VkDevice _device,VkQueryPool queryPool,const VkAllocationCallbacks * pAllocator)110 void pvr_DestroyQueryPool(VkDevice _device,
111 VkQueryPool queryPool,
112 const VkAllocationCallbacks *pAllocator)
113 {
114 PVR_FROM_HANDLE(pvr_query_pool, pool, queryPool);
115 PVR_FROM_HANDLE(pvr_device, device, _device);
116
117 if (!pool)
118 return;
119
120 pvr_bo_suballoc_free(pool->availability_buffer);
121 pvr_bo_suballoc_free(pool->result_buffer);
122
123 vk_object_free(&device->vk, pAllocator, pool);
124 }
125
126 /* Note: make sure to make the availability buffer's memory defined in
127 * accordance to how the device is expected to fill it. We don't make it defined
128 * here since that would cover up usage of this function while the underlying
129 * buffer region being accessed wasn't expect to have been written by the
130 * device.
131 */
pvr_query_is_available(const struct pvr_query_pool * pool,uint32_t query_idx)132 static inline bool pvr_query_is_available(const struct pvr_query_pool *pool,
133 uint32_t query_idx)
134 {
135 volatile uint32_t *available =
136 pvr_bo_suballoc_get_map_addr(pool->availability_buffer);
137 return !!available[query_idx];
138 }
139
140 #define NSEC_PER_SEC UINT64_C(1000000000)
141 #define PVR_WAIT_TIMEOUT UINT64_C(5)
142
143 /* Note: make sure to make the availability buffer's memory defined in
144 * accordance to how the device is expected to fill it. We don't make it defined
145 * here since that would cover up usage of this function while the underlying
146 * buffer region being accessed wasn't expect to have been written by the
147 * device.
148 */
149 /* TODO: Handle device loss scenario properly. */
pvr_wait_for_available(struct pvr_device * device,const struct pvr_query_pool * pool,uint32_t query_idx)150 static VkResult pvr_wait_for_available(struct pvr_device *device,
151 const struct pvr_query_pool *pool,
152 uint32_t query_idx)
153 {
154 const uint64_t abs_timeout =
155 os_time_get_absolute_timeout(PVR_WAIT_TIMEOUT * NSEC_PER_SEC);
156
157 /* From the Vulkan 1.0 spec:
158 *
159 * Commands that wait indefinitely for device execution (namely
160 * vkDeviceWaitIdle, vkQueueWaitIdle, vkWaitForFences or
161 * vkAcquireNextImageKHR with a maximum timeout, and
162 * vkGetQueryPoolResults with the VK_QUERY_RESULT_WAIT_BIT bit set in
163 * flags) must return in finite time even in the case of a lost device,
164 * and return either VK_SUCCESS or VK_ERROR_DEVICE_LOST.
165 */
166 while (os_time_get_nano() < abs_timeout) {
167 if (pvr_query_is_available(pool, query_idx) != 0)
168 return VK_SUCCESS;
169 }
170
171 return vk_error(device, VK_ERROR_DEVICE_LOST);
172 }
173
174 #undef NSEC_PER_SEC
175 #undef PVR_WAIT_TIMEOUT
176
pvr_write_query_to_buffer(uint8_t * buffer,VkQueryResultFlags flags,uint32_t idx,uint64_t value)177 static inline void pvr_write_query_to_buffer(uint8_t *buffer,
178 VkQueryResultFlags flags,
179 uint32_t idx,
180 uint64_t value)
181 {
182 if (flags & VK_QUERY_RESULT_64_BIT) {
183 uint64_t *query_data = (uint64_t *)buffer;
184 query_data[idx] = value;
185 } else {
186 uint32_t *query_data = (uint32_t *)buffer;
187 query_data[idx] = value;
188 }
189 }
190
pvr_GetQueryPoolResults(VkDevice _device,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount,size_t dataSize,void * pData,VkDeviceSize stride,VkQueryResultFlags flags)191 VkResult pvr_GetQueryPoolResults(VkDevice _device,
192 VkQueryPool queryPool,
193 uint32_t firstQuery,
194 uint32_t queryCount,
195 size_t dataSize,
196 void *pData,
197 VkDeviceSize stride,
198 VkQueryResultFlags flags)
199 {
200 PVR_FROM_HANDLE(pvr_query_pool, pool, queryPool);
201 PVR_FROM_HANDLE(pvr_device, device, _device);
202 VG(volatile uint32_t *available =
203 pvr_bo_suballoc_get_map_addr(pool->availability_buffer));
204 volatile uint32_t *query_results =
205 pvr_bo_suballoc_get_map_addr(pool->result_buffer);
206 const uint32_t core_count = device->pdevice->dev_runtime_info.core_count;
207 uint8_t *data = (uint8_t *)pData;
208 VkResult result = VK_SUCCESS;
209
210 /* TODO: Instead of making the memory defined here for valgrind, to better
211 * catch out of bounds access and other memory errors we should move them
212 * where where the query buffers are changed by the driver or device (e.g.
213 * "vkCmdResetQueryPool()", "vkGetQueryPoolResults()", etc.).
214 */
215
216 VG(VALGRIND_MAKE_MEM_DEFINED(&available[firstQuery],
217 queryCount * sizeof(uint32_t)));
218
219 for (uint32_t i = 0; i < core_count; i++) {
220 VG(VALGRIND_MAKE_MEM_DEFINED(
221 &query_results[firstQuery + i * pool->result_stride],
222 queryCount * sizeof(uint32_t)));
223 }
224
225 for (uint32_t i = 0; i < queryCount; i++) {
226 bool is_available = pvr_query_is_available(pool, firstQuery + i);
227 uint64_t count = 0;
228
229 if (flags & VK_QUERY_RESULT_WAIT_BIT && !is_available) {
230 result = pvr_wait_for_available(device, pool, firstQuery + i);
231 if (result != VK_SUCCESS)
232 return result;
233
234 is_available = true;
235 }
236
237 for (uint32_t j = 0; j < core_count; j++)
238 count += query_results[pool->result_stride * j + firstQuery + i];
239
240 if (is_available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
241 pvr_write_query_to_buffer(data, flags, 0, count);
242 else
243 result = VK_NOT_READY;
244
245 if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
246 pvr_write_query_to_buffer(data, flags, 1, is_available);
247
248 data += stride;
249 }
250
251 VG(VALGRIND_MAKE_MEM_UNDEFINED(&available[firstQuery],
252 queryCount * sizeof(uint32_t)));
253
254 for (uint32_t i = 0; i < core_count; i++) {
255 VG(VALGRIND_MAKE_MEM_UNDEFINED(
256 &query_results[firstQuery + i * pool->result_stride],
257 queryCount * sizeof(uint32_t)));
258 }
259
260 return result;
261 }
262
pvr_CmdResetQueryPool(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount)263 void pvr_CmdResetQueryPool(VkCommandBuffer commandBuffer,
264 VkQueryPool queryPool,
265 uint32_t firstQuery,
266 uint32_t queryCount)
267 {
268 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
269 struct pvr_query_info query_info;
270
271 PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
272
273 query_info.type = PVR_QUERY_TYPE_RESET_QUERY_POOL;
274
275 query_info.reset_query_pool.query_pool = queryPool;
276 query_info.reset_query_pool.first_query = firstQuery;
277 query_info.reset_query_pool.query_count = queryCount;
278
279 pvr_add_query_program(cmd_buffer, &query_info);
280 }
281
pvr_ResetQueryPool(VkDevice _device,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount)282 void pvr_ResetQueryPool(VkDevice _device,
283 VkQueryPool queryPool,
284 uint32_t firstQuery,
285 uint32_t queryCount)
286 {
287 PVR_FROM_HANDLE(pvr_query_pool, pool, queryPool);
288 uint32_t *availability =
289 pvr_bo_suballoc_get_map_addr(pool->availability_buffer);
290
291 memset(availability + firstQuery, 0, sizeof(uint32_t) * queryCount);
292 }
293
pvr_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize stride,VkQueryResultFlags flags)294 void pvr_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
295 VkQueryPool queryPool,
296 uint32_t firstQuery,
297 uint32_t queryCount,
298 VkBuffer dstBuffer,
299 VkDeviceSize dstOffset,
300 VkDeviceSize stride,
301 VkQueryResultFlags flags)
302 {
303 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
304 struct pvr_query_info query_info;
305 VkResult result;
306
307 PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
308
309 query_info.type = PVR_QUERY_TYPE_COPY_QUERY_RESULTS;
310
311 query_info.copy_query_results.query_pool = queryPool;
312 query_info.copy_query_results.first_query = firstQuery;
313 query_info.copy_query_results.query_count = queryCount;
314 query_info.copy_query_results.dst_buffer = dstBuffer;
315 query_info.copy_query_results.dst_offset = dstOffset;
316 query_info.copy_query_results.stride = stride;
317 query_info.copy_query_results.flags = flags;
318
319 result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_EVENT);
320 if (result != VK_SUCCESS)
321 return;
322
323 /* The Vulkan 1.3.231 spec says:
324 *
325 * "vkCmdCopyQueryPoolResults is considered to be a transfer operation,
326 * and its writes to buffer memory must be synchronized using
327 * VK_PIPELINE_STAGE_TRANSFER_BIT and VK_ACCESS_TRANSFER_WRITE_BIT before
328 * using the results."
329 *
330 */
331 /* We record barrier event sub commands to sync the compute job used for the
332 * copy query results program with transfer jobs to prevent an overlapping
333 * transfer job with the compute job.
334 */
335
336 cmd_buffer->state.current_sub_cmd->event = (struct pvr_sub_cmd_event){
337 .type = PVR_EVENT_TYPE_BARRIER,
338 .barrier = {
339 .wait_for_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
340 .wait_at_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT,
341 },
342 };
343
344 result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
345 if (result != VK_SUCCESS)
346 return;
347
348 pvr_add_query_program(cmd_buffer, &query_info);
349
350 result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_EVENT);
351 if (result != VK_SUCCESS)
352 return;
353
354 cmd_buffer->state.current_sub_cmd->event = (struct pvr_sub_cmd_event){
355 .type = PVR_EVENT_TYPE_BARRIER,
356 .barrier = {
357 .wait_for_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT,
358 .wait_at_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
359 },
360 };
361 }
362
pvr_CmdBeginQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query,VkQueryControlFlags flags)363 void pvr_CmdBeginQuery(VkCommandBuffer commandBuffer,
364 VkQueryPool queryPool,
365 uint32_t query,
366 VkQueryControlFlags flags)
367 {
368 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
369 struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
370 PVR_FROM_HANDLE(pvr_query_pool, pool, queryPool);
371
372 PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
373
374 /* Occlusion queries can't be nested. */
375 assert(!state->vis_test_enabled);
376
377 if (state->current_sub_cmd) {
378 assert(state->current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS);
379
380 if (!state->current_sub_cmd->gfx.query_pool) {
381 state->current_sub_cmd->gfx.query_pool = pool;
382 } else if (state->current_sub_cmd->gfx.query_pool != pool) {
383 VkResult result;
384
385 /* Kick render. */
386 state->current_sub_cmd->gfx.barrier_store = true;
387
388 result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
389 if (result != VK_SUCCESS)
390 return;
391
392 result =
393 pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS);
394 if (result != VK_SUCCESS)
395 return;
396
397 /* Use existing render setup, but load color attachments from HW
398 * BGOBJ.
399 */
400 state->current_sub_cmd->gfx.barrier_load = true;
401 state->current_sub_cmd->gfx.barrier_store = false;
402 state->current_sub_cmd->gfx.query_pool = pool;
403 }
404 }
405
406 state->query_pool = pool;
407 state->vis_test_enabled = true;
408 state->vis_reg = query;
409 state->dirty.vis_test = true;
410
411 /* Add the index to the list for this render. */
412 util_dynarray_append(&state->query_indices, __typeof__(query), query);
413 }
414
pvr_CmdEndQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query)415 void pvr_CmdEndQuery(VkCommandBuffer commandBuffer,
416 VkQueryPool queryPool,
417 uint32_t query)
418 {
419 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
420 struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
421
422 PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
423
424 state->vis_test_enabled = false;
425 state->dirty.vis_test = true;
426 }
427