xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan_hasvk/anv_perf.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdint.h>
27 
28 #include "anv_private.h"
29 #include "vk_util.h"
30 
31 #include "perf/intel_perf.h"
32 #include "perf/intel_perf_mdapi.h"
33 
34 #include "util/mesa-sha1.h"
35 
36 void
anv_physical_device_init_perf(struct anv_physical_device * device,int fd)37 anv_physical_device_init_perf(struct anv_physical_device *device, int fd)
38 {
39    const struct intel_device_info *devinfo = &device->info;
40 
41    /* We need self modifying batches. The i915 parser prevents it on
42     * Gfx7.5 :( maybe one day.
43     */
44    if (devinfo->ver < 8)
45       return;
46 
47    struct intel_perf_config *perf = intel_perf_new(NULL);
48 
49    intel_perf_init_metrics(perf, &device->info, fd,
50                            false /* pipeline statistics */,
51                            true /* register snapshots */);
52 
53    if (!perf->n_queries)
54       goto err;
55 
56    /* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in
57     * perf revision 2.
58     */
59    if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
60       if (!intel_perf_has_hold_preemption(perf))
61          goto err;
62    }
63 
64    device->perf = perf;
65 
66    /* Compute the number of commands we need to implement a performance
67     * query.
68     */
69    const struct intel_perf_query_field_layout *layout = &perf->query_layout;
70    device->n_perf_query_commands = 0;
71    for (uint32_t f = 0; f < layout->n_fields; f++) {
72       struct intel_perf_query_field *field = &layout->fields[f];
73 
74       switch (field->type) {
75       case INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC:
76          device->n_perf_query_commands++;
77          break;
78       case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
79       case INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT:
80       case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A:
81       case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
82       case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
83          device->n_perf_query_commands += field->size / 4;
84          break;
85       default:
86          unreachable("Unhandled register type");
87       }
88    }
89    device->n_perf_query_commands *= 2; /* Begin & End */
90    device->n_perf_query_commands += 1; /* availability */
91 
92    return;
93 
94 err:
95    intel_perf_free(perf);
96 }
97 
98 void
anv_device_perf_init(struct anv_device * device)99 anv_device_perf_init(struct anv_device *device)
100 {
101    device->perf_fd = -1;
102 }
103 
104 static int
anv_device_perf_open(struct anv_device * device,uint64_t metric_id)105 anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
106 {
107    uint64_t period_exponent = 31; /* slowest sampling period */
108 
109    return intel_perf_stream_open(device->physical->perf, device->fd,
110                                  device->context_id, metric_id,
111                                  period_exponent, true, true);
112 }
113 
114 /* VK_INTEL_performance_query */
anv_InitializePerformanceApiINTEL(VkDevice _device,const VkInitializePerformanceApiInfoINTEL * pInitializeInfo)115 VkResult anv_InitializePerformanceApiINTEL(
116     VkDevice                                    _device,
117     const VkInitializePerformanceApiInfoINTEL*  pInitializeInfo)
118 {
119    ANV_FROM_HANDLE(anv_device, device, _device);
120 
121    if (!device->physical->perf)
122       return VK_ERROR_EXTENSION_NOT_PRESENT;
123 
124    /* Not much to do here */
125    return VK_SUCCESS;
126 }
127 
anv_GetPerformanceParameterINTEL(VkDevice _device,VkPerformanceParameterTypeINTEL parameter,VkPerformanceValueINTEL * pValue)128 VkResult anv_GetPerformanceParameterINTEL(
129     VkDevice                                    _device,
130     VkPerformanceParameterTypeINTEL             parameter,
131     VkPerformanceValueINTEL*                    pValue)
132 {
133       ANV_FROM_HANDLE(anv_device, device, _device);
134 
135       if (!device->physical->perf)
136          return VK_ERROR_EXTENSION_NOT_PRESENT;
137 
138       VkResult result = VK_SUCCESS;
139       switch (parameter) {
140       case VK_PERFORMANCE_PARAMETER_TYPE_HW_COUNTERS_SUPPORTED_INTEL:
141          pValue->type = VK_PERFORMANCE_VALUE_TYPE_BOOL_INTEL;
142          pValue->data.valueBool = VK_TRUE;
143          break;
144 
145       case VK_PERFORMANCE_PARAMETER_TYPE_STREAM_MARKER_VALID_BITS_INTEL:
146          pValue->type = VK_PERFORMANCE_VALUE_TYPE_UINT32_INTEL;
147          pValue->data.value32 = 25;
148          break;
149 
150       default:
151          result = VK_ERROR_FEATURE_NOT_PRESENT;
152          break;
153       }
154 
155       return result;
156 }
157 
anv_CmdSetPerformanceMarkerINTEL(VkCommandBuffer commandBuffer,const VkPerformanceMarkerInfoINTEL * pMarkerInfo)158 VkResult anv_CmdSetPerformanceMarkerINTEL(
159     VkCommandBuffer                             commandBuffer,
160     const VkPerformanceMarkerInfoINTEL*         pMarkerInfo)
161 {
162    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
163 
164    cmd_buffer->intel_perf_marker = pMarkerInfo->marker;
165 
166    return VK_SUCCESS;
167 }
168 
anv_AcquirePerformanceConfigurationINTEL(VkDevice _device,const VkPerformanceConfigurationAcquireInfoINTEL * pAcquireInfo,VkPerformanceConfigurationINTEL * pConfiguration)169 VkResult anv_AcquirePerformanceConfigurationINTEL(
170     VkDevice                                    _device,
171     const VkPerformanceConfigurationAcquireInfoINTEL* pAcquireInfo,
172     VkPerformanceConfigurationINTEL*            pConfiguration)
173 {
174    ANV_FROM_HANDLE(anv_device, device, _device);
175    struct anv_performance_configuration_intel *config;
176 
177    config = vk_object_alloc(&device->vk, NULL, sizeof(*config),
178                             VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL);
179    if (!config)
180       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
181 
182    if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
183       config->register_config =
184          intel_perf_load_configuration(device->physical->perf, device->fd,
185                                      INTEL_PERF_QUERY_GUID_MDAPI);
186       if (!config->register_config) {
187          vk_object_free(&device->vk, NULL, config);
188          return VK_INCOMPLETE;
189       }
190 
191       uint64_t ret =
192          intel_perf_store_configuration(device->physical->perf, device->fd,
193                                       config->register_config, NULL /* guid */);
194       if (ret == 0) {
195          ralloc_free(config->register_config);
196          vk_object_free(&device->vk, NULL, config);
197          return VK_INCOMPLETE;
198       }
199 
200       config->config_id = ret;
201    }
202 
203    *pConfiguration = anv_performance_configuration_intel_to_handle(config);
204 
205    return VK_SUCCESS;
206 }
207 
anv_ReleasePerformanceConfigurationINTEL(VkDevice _device,VkPerformanceConfigurationINTEL _configuration)208 VkResult anv_ReleasePerformanceConfigurationINTEL(
209     VkDevice                                    _device,
210     VkPerformanceConfigurationINTEL             _configuration)
211 {
212    ANV_FROM_HANDLE(anv_device, device, _device);
213    ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
214 
215    if (!INTEL_DEBUG(DEBUG_NO_OACONFIG))
216       intel_perf_remove_configuration(device->physical->perf, device->fd, config->config_id);
217 
218    ralloc_free(config->register_config);
219 
220    vk_object_free(&device->vk, NULL, config);
221 
222    return VK_SUCCESS;
223 }
224 
anv_QueueSetPerformanceConfigurationINTEL(VkQueue _queue,VkPerformanceConfigurationINTEL _configuration)225 VkResult anv_QueueSetPerformanceConfigurationINTEL(
226     VkQueue                                     _queue,
227     VkPerformanceConfigurationINTEL             _configuration)
228 {
229    ANV_FROM_HANDLE(anv_queue, queue, _queue);
230    ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
231    struct anv_device *device = queue->device;
232 
233    if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
234       if (device->perf_fd < 0) {
235          device->perf_fd = anv_device_perf_open(device, config->config_id);
236          if (device->perf_fd < 0)
237             return VK_ERROR_INITIALIZATION_FAILED;
238       } else {
239          int ret = intel_perf_stream_set_metrics_id(device->physical->perf,
240                                                     device->perf_fd,
241                                                     config->config_id);
242          if (ret < 0)
243             return vk_device_set_lost(&device->vk, "i915-perf config failed: %m");
244       }
245    }
246 
247    return VK_SUCCESS;
248 }
249 
anv_UninitializePerformanceApiINTEL(VkDevice _device)250 void anv_UninitializePerformanceApiINTEL(
251     VkDevice                                    _device)
252 {
253    ANV_FROM_HANDLE(anv_device, device, _device);
254 
255    if (device->perf_fd >= 0) {
256       close(device->perf_fd);
257       device->perf_fd = -1;
258    }
259 }
260 
261 /* VK_KHR_performance_query */
262 static const VkPerformanceCounterUnitKHR
263 intel_perf_counter_unit_to_vk_unit[] = {
264    [INTEL_PERF_COUNTER_UNITS_BYTES]                                = VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR,
265    [INTEL_PERF_COUNTER_UNITS_HZ]                                   = VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR,
266    [INTEL_PERF_COUNTER_UNITS_NS]                                   = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR,
267    [INTEL_PERF_COUNTER_UNITS_US]                                   = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR, /* todo */
268    [INTEL_PERF_COUNTER_UNITS_PIXELS]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
269    [INTEL_PERF_COUNTER_UNITS_TEXELS]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
270    [INTEL_PERF_COUNTER_UNITS_THREADS]                              = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
271    [INTEL_PERF_COUNTER_UNITS_PERCENT]                              = VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR,
272    [INTEL_PERF_COUNTER_UNITS_MESSAGES]                             = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
273    [INTEL_PERF_COUNTER_UNITS_NUMBER]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
274    [INTEL_PERF_COUNTER_UNITS_CYCLES]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
275    [INTEL_PERF_COUNTER_UNITS_EVENTS]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
276    [INTEL_PERF_COUNTER_UNITS_UTILIZATION]                          = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
277    [INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES]           = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
278    [INTEL_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
279    [INTEL_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES]        = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
280    [INTEL_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE]           = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
281 };
282 
283 static const VkPerformanceCounterStorageKHR
284 intel_perf_counter_data_type_to_vk_storage[] = {
285    [INTEL_PERF_COUNTER_DATA_TYPE_BOOL32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
286    [INTEL_PERF_COUNTER_DATA_TYPE_UINT32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
287    [INTEL_PERF_COUNTER_DATA_TYPE_UINT64] = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR,
288    [INTEL_PERF_COUNTER_DATA_TYPE_FLOAT]  = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR,
289    [INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR,
290 };
291 
anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(VkPhysicalDevice physicalDevice,uint32_t queueFamilyIndex,uint32_t * pCounterCount,VkPerformanceCounterKHR * pCounters,VkPerformanceCounterDescriptionKHR * pCounterDescriptions)292 VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
293     VkPhysicalDevice                            physicalDevice,
294     uint32_t                                    queueFamilyIndex,
295     uint32_t*                                   pCounterCount,
296     VkPerformanceCounterKHR*                    pCounters,
297     VkPerformanceCounterDescriptionKHR*         pCounterDescriptions)
298 {
299    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
300    struct intel_perf_config *perf = pdevice->perf;
301 
302    uint32_t desc_count = *pCounterCount;
303 
304    VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterKHR, out, pCounters, pCounterCount);
305    VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterDescriptionKHR, out_desc,
306                           pCounterDescriptions, &desc_count);
307 
308    /* We cannot support performance queries on anything other than RCS,
309     * because the MI_REPORT_PERF_COUNT command is not available on other
310     * engines.
311     */
312    struct anv_queue_family *queue_family =
313       &pdevice->queue.families[queueFamilyIndex];
314    if (queue_family->engine_class != INTEL_ENGINE_CLASS_RENDER)
315       return vk_outarray_status(&out);
316 
317    for (int c = 0; c < (perf ? perf->n_counters : 0); c++) {
318       const struct intel_perf_query_counter *intel_counter = perf->counter_infos[c].counter;
319 
320       vk_outarray_append_typed(VkPerformanceCounterKHR, &out, counter) {
321          counter->unit = intel_perf_counter_unit_to_vk_unit[intel_counter->units];
322          counter->scope = VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_KHR;
323          counter->storage = intel_perf_counter_data_type_to_vk_storage[intel_counter->data_type];
324 
325          unsigned char sha1_result[20];
326          _mesa_sha1_compute(intel_counter->symbol_name,
327                             strlen(intel_counter->symbol_name),
328                             sha1_result);
329          memcpy(counter->uuid, sha1_result, sizeof(counter->uuid));
330       }
331 
332       vk_outarray_append_typed(VkPerformanceCounterDescriptionKHR, &out_desc, desc) {
333          desc->flags = 0; /* None so far. */
334          snprintf(desc->name, sizeof(desc->name), "%s", intel_counter->name);
335          snprintf(desc->category, sizeof(desc->category), "%s", intel_counter->category);
336          snprintf(desc->description, sizeof(desc->description), "%s", intel_counter->desc);
337       }
338    }
339 
340    return vk_outarray_status(&out);
341 }
342 
anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(VkPhysicalDevice physicalDevice,const VkQueryPoolPerformanceCreateInfoKHR * pPerformanceQueryCreateInfo,uint32_t * pNumPasses)343 void anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
344     VkPhysicalDevice                            physicalDevice,
345     const VkQueryPoolPerformanceCreateInfoKHR*  pPerformanceQueryCreateInfo,
346     uint32_t*                                   pNumPasses)
347 {
348    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
349    struct intel_perf_config *perf = pdevice->perf;
350 
351    if (!perf) {
352       *pNumPasses = 0;
353       return;
354    }
355 
356    *pNumPasses = intel_perf_get_n_passes(perf,
357                                        pPerformanceQueryCreateInfo->pCounterIndices,
358                                        pPerformanceQueryCreateInfo->counterIndexCount,
359                                        NULL);
360 }
361 
anv_AcquireProfilingLockKHR(VkDevice _device,const VkAcquireProfilingLockInfoKHR * pInfo)362 VkResult anv_AcquireProfilingLockKHR(
363     VkDevice                                    _device,
364     const VkAcquireProfilingLockInfoKHR*        pInfo)
365 {
366    ANV_FROM_HANDLE(anv_device, device, _device);
367    struct intel_perf_config *perf = device->physical->perf;
368    struct intel_perf_query_info *first_metric_set = &perf->queries[0];
369    int fd = -1;
370 
371    assert(device->perf_fd == -1);
372 
373    if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
374       fd = anv_device_perf_open(device, first_metric_set->oa_metrics_set_id);
375       if (fd < 0)
376          return VK_TIMEOUT;
377    }
378 
379    device->perf_fd = fd;
380    return VK_SUCCESS;
381 }
382 
anv_ReleaseProfilingLockKHR(VkDevice _device)383 void anv_ReleaseProfilingLockKHR(
384     VkDevice                                    _device)
385 {
386    ANV_FROM_HANDLE(anv_device, device, _device);
387 
388    if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
389       assert(device->perf_fd >= 0);
390       close(device->perf_fd);
391    }
392    device->perf_fd = -1;
393 }
394 
395 void
anv_perf_write_pass_results(struct intel_perf_config * perf,struct anv_query_pool * pool,uint32_t pass,const struct intel_perf_query_result * accumulated_results,union VkPerformanceCounterResultKHR * results)396 anv_perf_write_pass_results(struct intel_perf_config *perf,
397                             struct anv_query_pool *pool, uint32_t pass,
398                             const struct intel_perf_query_result *accumulated_results,
399                             union VkPerformanceCounterResultKHR *results)
400 {
401    const struct intel_perf_query_info *query = pool->pass_query[pass];
402 
403    for (uint32_t c = 0; c < pool->n_counters; c++) {
404       const struct intel_perf_counter_pass *counter_pass = &pool->counter_pass[c];
405 
406       if (counter_pass->query != query)
407          continue;
408 
409       switch (pool->pass_query[pass]->kind) {
410       case INTEL_PERF_QUERY_TYPE_PIPELINE: {
411          assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);
412          uint32_t accu_offset = counter_pass->counter->offset / sizeof(uint64_t);
413          results[c].uint64 = accumulated_results->accumulator[accu_offset];
414          break;
415       }
416 
417       case INTEL_PERF_QUERY_TYPE_OA:
418       case INTEL_PERF_QUERY_TYPE_RAW:
419          switch (counter_pass->counter->data_type) {
420          case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
421             results[c].uint64 =
422                counter_pass->counter->oa_counter_read_uint64(perf,
423                                                              counter_pass->query,
424                                                              accumulated_results);
425             break;
426          case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
427             results[c].float32 =
428                counter_pass->counter->oa_counter_read_float(perf,
429                                                             counter_pass->query,
430                                                             accumulated_results);
431             break;
432          default:
433             /* So far we aren't using uint32, double or bool32... */
434             unreachable("unexpected counter data type");
435          }
436          break;
437 
438       default:
439          unreachable("invalid query type");
440       }
441 
442       /* The Vulkan extension only has nanoseconds as a unit */
443       if (counter_pass->counter->units == INTEL_PERF_COUNTER_UNITS_US) {
444          assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);
445          results[c].uint64 *= 1000;
446       }
447    }
448 }
449