1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdint.h>
27
28 #include "anv_private.h"
29 #include "vk_util.h"
30
31 #include "perf/intel_perf.h"
32 #include "perf/intel_perf_mdapi.h"
33
34 #include "util/mesa-sha1.h"
35
36 void
anv_physical_device_init_perf(struct anv_physical_device * device,int fd)37 anv_physical_device_init_perf(struct anv_physical_device *device, int fd)
38 {
39 const struct intel_device_info *devinfo = &device->info;
40
41 /* We need self modifying batches. The i915 parser prevents it on
42 * Gfx7.5 :( maybe one day.
43 */
44 if (devinfo->ver < 8)
45 return;
46
47 struct intel_perf_config *perf = intel_perf_new(NULL);
48
49 intel_perf_init_metrics(perf, &device->info, fd,
50 false /* pipeline statistics */,
51 true /* register snapshots */);
52
53 if (!perf->n_queries)
54 goto err;
55
56 /* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in
57 * perf revision 2.
58 */
59 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
60 if (!intel_perf_has_hold_preemption(perf))
61 goto err;
62 }
63
64 device->perf = perf;
65
66 /* Compute the number of commands we need to implement a performance
67 * query.
68 */
69 const struct intel_perf_query_field_layout *layout = &perf->query_layout;
70 device->n_perf_query_commands = 0;
71 for (uint32_t f = 0; f < layout->n_fields; f++) {
72 struct intel_perf_query_field *field = &layout->fields[f];
73
74 switch (field->type) {
75 case INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC:
76 device->n_perf_query_commands++;
77 break;
78 case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
79 case INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT:
80 case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A:
81 case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
82 case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
83 device->n_perf_query_commands += field->size / 4;
84 break;
85 default:
86 unreachable("Unhandled register type");
87 }
88 }
89 device->n_perf_query_commands *= 2; /* Begin & End */
90 device->n_perf_query_commands += 1; /* availability */
91
92 return;
93
94 err:
95 intel_perf_free(perf);
96 }
97
98 void
anv_device_perf_init(struct anv_device * device)99 anv_device_perf_init(struct anv_device *device)
100 {
101 device->perf_fd = -1;
102 }
103
104 static int
anv_device_perf_open(struct anv_device * device,uint64_t metric_id)105 anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
106 {
107 uint64_t period_exponent = 31; /* slowest sampling period */
108
109 return intel_perf_stream_open(device->physical->perf, device->fd,
110 device->context_id, metric_id,
111 period_exponent, true, true);
112 }
113
114 /* VK_INTEL_performance_query */
anv_InitializePerformanceApiINTEL(VkDevice _device,const VkInitializePerformanceApiInfoINTEL * pInitializeInfo)115 VkResult anv_InitializePerformanceApiINTEL(
116 VkDevice _device,
117 const VkInitializePerformanceApiInfoINTEL* pInitializeInfo)
118 {
119 ANV_FROM_HANDLE(anv_device, device, _device);
120
121 if (!device->physical->perf)
122 return VK_ERROR_EXTENSION_NOT_PRESENT;
123
124 /* Not much to do here */
125 return VK_SUCCESS;
126 }
127
anv_GetPerformanceParameterINTEL(VkDevice _device,VkPerformanceParameterTypeINTEL parameter,VkPerformanceValueINTEL * pValue)128 VkResult anv_GetPerformanceParameterINTEL(
129 VkDevice _device,
130 VkPerformanceParameterTypeINTEL parameter,
131 VkPerformanceValueINTEL* pValue)
132 {
133 ANV_FROM_HANDLE(anv_device, device, _device);
134
135 if (!device->physical->perf)
136 return VK_ERROR_EXTENSION_NOT_PRESENT;
137
138 VkResult result = VK_SUCCESS;
139 switch (parameter) {
140 case VK_PERFORMANCE_PARAMETER_TYPE_HW_COUNTERS_SUPPORTED_INTEL:
141 pValue->type = VK_PERFORMANCE_VALUE_TYPE_BOOL_INTEL;
142 pValue->data.valueBool = VK_TRUE;
143 break;
144
145 case VK_PERFORMANCE_PARAMETER_TYPE_STREAM_MARKER_VALID_BITS_INTEL:
146 pValue->type = VK_PERFORMANCE_VALUE_TYPE_UINT32_INTEL;
147 pValue->data.value32 = 25;
148 break;
149
150 default:
151 result = VK_ERROR_FEATURE_NOT_PRESENT;
152 break;
153 }
154
155 return result;
156 }
157
anv_CmdSetPerformanceMarkerINTEL(VkCommandBuffer commandBuffer,const VkPerformanceMarkerInfoINTEL * pMarkerInfo)158 VkResult anv_CmdSetPerformanceMarkerINTEL(
159 VkCommandBuffer commandBuffer,
160 const VkPerformanceMarkerInfoINTEL* pMarkerInfo)
161 {
162 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
163
164 cmd_buffer->intel_perf_marker = pMarkerInfo->marker;
165
166 return VK_SUCCESS;
167 }
168
anv_AcquirePerformanceConfigurationINTEL(VkDevice _device,const VkPerformanceConfigurationAcquireInfoINTEL * pAcquireInfo,VkPerformanceConfigurationINTEL * pConfiguration)169 VkResult anv_AcquirePerformanceConfigurationINTEL(
170 VkDevice _device,
171 const VkPerformanceConfigurationAcquireInfoINTEL* pAcquireInfo,
172 VkPerformanceConfigurationINTEL* pConfiguration)
173 {
174 ANV_FROM_HANDLE(anv_device, device, _device);
175 struct anv_performance_configuration_intel *config;
176
177 config = vk_object_alloc(&device->vk, NULL, sizeof(*config),
178 VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL);
179 if (!config)
180 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
181
182 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
183 config->register_config =
184 intel_perf_load_configuration(device->physical->perf, device->fd,
185 INTEL_PERF_QUERY_GUID_MDAPI);
186 if (!config->register_config) {
187 vk_object_free(&device->vk, NULL, config);
188 return VK_INCOMPLETE;
189 }
190
191 uint64_t ret =
192 intel_perf_store_configuration(device->physical->perf, device->fd,
193 config->register_config, NULL /* guid */);
194 if (ret == 0) {
195 ralloc_free(config->register_config);
196 vk_object_free(&device->vk, NULL, config);
197 return VK_INCOMPLETE;
198 }
199
200 config->config_id = ret;
201 }
202
203 *pConfiguration = anv_performance_configuration_intel_to_handle(config);
204
205 return VK_SUCCESS;
206 }
207
anv_ReleasePerformanceConfigurationINTEL(VkDevice _device,VkPerformanceConfigurationINTEL _configuration)208 VkResult anv_ReleasePerformanceConfigurationINTEL(
209 VkDevice _device,
210 VkPerformanceConfigurationINTEL _configuration)
211 {
212 ANV_FROM_HANDLE(anv_device, device, _device);
213 ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
214
215 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG))
216 intel_perf_remove_configuration(device->physical->perf, device->fd, config->config_id);
217
218 ralloc_free(config->register_config);
219
220 vk_object_free(&device->vk, NULL, config);
221
222 return VK_SUCCESS;
223 }
224
anv_QueueSetPerformanceConfigurationINTEL(VkQueue _queue,VkPerformanceConfigurationINTEL _configuration)225 VkResult anv_QueueSetPerformanceConfigurationINTEL(
226 VkQueue _queue,
227 VkPerformanceConfigurationINTEL _configuration)
228 {
229 ANV_FROM_HANDLE(anv_queue, queue, _queue);
230 ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
231 struct anv_device *device = queue->device;
232
233 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
234 if (device->perf_fd < 0) {
235 device->perf_fd = anv_device_perf_open(device, config->config_id);
236 if (device->perf_fd < 0)
237 return VK_ERROR_INITIALIZATION_FAILED;
238 } else {
239 int ret = intel_perf_stream_set_metrics_id(device->physical->perf,
240 device->perf_fd,
241 config->config_id);
242 if (ret < 0)
243 return vk_device_set_lost(&device->vk, "i915-perf config failed: %m");
244 }
245 }
246
247 return VK_SUCCESS;
248 }
249
anv_UninitializePerformanceApiINTEL(VkDevice _device)250 void anv_UninitializePerformanceApiINTEL(
251 VkDevice _device)
252 {
253 ANV_FROM_HANDLE(anv_device, device, _device);
254
255 if (device->perf_fd >= 0) {
256 close(device->perf_fd);
257 device->perf_fd = -1;
258 }
259 }
260
261 /* VK_KHR_performance_query */
262 static const VkPerformanceCounterUnitKHR
263 intel_perf_counter_unit_to_vk_unit[] = {
264 [INTEL_PERF_COUNTER_UNITS_BYTES] = VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR,
265 [INTEL_PERF_COUNTER_UNITS_HZ] = VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR,
266 [INTEL_PERF_COUNTER_UNITS_NS] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR,
267 [INTEL_PERF_COUNTER_UNITS_US] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR, /* todo */
268 [INTEL_PERF_COUNTER_UNITS_PIXELS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
269 [INTEL_PERF_COUNTER_UNITS_TEXELS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
270 [INTEL_PERF_COUNTER_UNITS_THREADS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
271 [INTEL_PERF_COUNTER_UNITS_PERCENT] = VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR,
272 [INTEL_PERF_COUNTER_UNITS_MESSAGES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
273 [INTEL_PERF_COUNTER_UNITS_NUMBER] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
274 [INTEL_PERF_COUNTER_UNITS_CYCLES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
275 [INTEL_PERF_COUNTER_UNITS_EVENTS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
276 [INTEL_PERF_COUNTER_UNITS_UTILIZATION] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
277 [INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
278 [INTEL_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
279 [INTEL_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
280 [INTEL_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
281 };
282
283 static const VkPerformanceCounterStorageKHR
284 intel_perf_counter_data_type_to_vk_storage[] = {
285 [INTEL_PERF_COUNTER_DATA_TYPE_BOOL32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
286 [INTEL_PERF_COUNTER_DATA_TYPE_UINT32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
287 [INTEL_PERF_COUNTER_DATA_TYPE_UINT64] = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR,
288 [INTEL_PERF_COUNTER_DATA_TYPE_FLOAT] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR,
289 [INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR,
290 };
291
anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(VkPhysicalDevice physicalDevice,uint32_t queueFamilyIndex,uint32_t * pCounterCount,VkPerformanceCounterKHR * pCounters,VkPerformanceCounterDescriptionKHR * pCounterDescriptions)292 VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
293 VkPhysicalDevice physicalDevice,
294 uint32_t queueFamilyIndex,
295 uint32_t* pCounterCount,
296 VkPerformanceCounterKHR* pCounters,
297 VkPerformanceCounterDescriptionKHR* pCounterDescriptions)
298 {
299 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
300 struct intel_perf_config *perf = pdevice->perf;
301
302 uint32_t desc_count = *pCounterCount;
303
304 VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterKHR, out, pCounters, pCounterCount);
305 VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterDescriptionKHR, out_desc,
306 pCounterDescriptions, &desc_count);
307
308 /* We cannot support performance queries on anything other than RCS,
309 * because the MI_REPORT_PERF_COUNT command is not available on other
310 * engines.
311 */
312 struct anv_queue_family *queue_family =
313 &pdevice->queue.families[queueFamilyIndex];
314 if (queue_family->engine_class != INTEL_ENGINE_CLASS_RENDER)
315 return vk_outarray_status(&out);
316
317 for (int c = 0; c < (perf ? perf->n_counters : 0); c++) {
318 const struct intel_perf_query_counter *intel_counter = perf->counter_infos[c].counter;
319
320 vk_outarray_append_typed(VkPerformanceCounterKHR, &out, counter) {
321 counter->unit = intel_perf_counter_unit_to_vk_unit[intel_counter->units];
322 counter->scope = VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_KHR;
323 counter->storage = intel_perf_counter_data_type_to_vk_storage[intel_counter->data_type];
324
325 unsigned char sha1_result[20];
326 _mesa_sha1_compute(intel_counter->symbol_name,
327 strlen(intel_counter->symbol_name),
328 sha1_result);
329 memcpy(counter->uuid, sha1_result, sizeof(counter->uuid));
330 }
331
332 vk_outarray_append_typed(VkPerformanceCounterDescriptionKHR, &out_desc, desc) {
333 desc->flags = 0; /* None so far. */
334 snprintf(desc->name, sizeof(desc->name), "%s", intel_counter->name);
335 snprintf(desc->category, sizeof(desc->category), "%s", intel_counter->category);
336 snprintf(desc->description, sizeof(desc->description), "%s", intel_counter->desc);
337 }
338 }
339
340 return vk_outarray_status(&out);
341 }
342
anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(VkPhysicalDevice physicalDevice,const VkQueryPoolPerformanceCreateInfoKHR * pPerformanceQueryCreateInfo,uint32_t * pNumPasses)343 void anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
344 VkPhysicalDevice physicalDevice,
345 const VkQueryPoolPerformanceCreateInfoKHR* pPerformanceQueryCreateInfo,
346 uint32_t* pNumPasses)
347 {
348 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
349 struct intel_perf_config *perf = pdevice->perf;
350
351 if (!perf) {
352 *pNumPasses = 0;
353 return;
354 }
355
356 *pNumPasses = intel_perf_get_n_passes(perf,
357 pPerformanceQueryCreateInfo->pCounterIndices,
358 pPerformanceQueryCreateInfo->counterIndexCount,
359 NULL);
360 }
361
anv_AcquireProfilingLockKHR(VkDevice _device,const VkAcquireProfilingLockInfoKHR * pInfo)362 VkResult anv_AcquireProfilingLockKHR(
363 VkDevice _device,
364 const VkAcquireProfilingLockInfoKHR* pInfo)
365 {
366 ANV_FROM_HANDLE(anv_device, device, _device);
367 struct intel_perf_config *perf = device->physical->perf;
368 struct intel_perf_query_info *first_metric_set = &perf->queries[0];
369 int fd = -1;
370
371 assert(device->perf_fd == -1);
372
373 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
374 fd = anv_device_perf_open(device, first_metric_set->oa_metrics_set_id);
375 if (fd < 0)
376 return VK_TIMEOUT;
377 }
378
379 device->perf_fd = fd;
380 return VK_SUCCESS;
381 }
382
anv_ReleaseProfilingLockKHR(VkDevice _device)383 void anv_ReleaseProfilingLockKHR(
384 VkDevice _device)
385 {
386 ANV_FROM_HANDLE(anv_device, device, _device);
387
388 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
389 assert(device->perf_fd >= 0);
390 close(device->perf_fd);
391 }
392 device->perf_fd = -1;
393 }
394
395 void
anv_perf_write_pass_results(struct intel_perf_config * perf,struct anv_query_pool * pool,uint32_t pass,const struct intel_perf_query_result * accumulated_results,union VkPerformanceCounterResultKHR * results)396 anv_perf_write_pass_results(struct intel_perf_config *perf,
397 struct anv_query_pool *pool, uint32_t pass,
398 const struct intel_perf_query_result *accumulated_results,
399 union VkPerformanceCounterResultKHR *results)
400 {
401 const struct intel_perf_query_info *query = pool->pass_query[pass];
402
403 for (uint32_t c = 0; c < pool->n_counters; c++) {
404 const struct intel_perf_counter_pass *counter_pass = &pool->counter_pass[c];
405
406 if (counter_pass->query != query)
407 continue;
408
409 switch (pool->pass_query[pass]->kind) {
410 case INTEL_PERF_QUERY_TYPE_PIPELINE: {
411 assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);
412 uint32_t accu_offset = counter_pass->counter->offset / sizeof(uint64_t);
413 results[c].uint64 = accumulated_results->accumulator[accu_offset];
414 break;
415 }
416
417 case INTEL_PERF_QUERY_TYPE_OA:
418 case INTEL_PERF_QUERY_TYPE_RAW:
419 switch (counter_pass->counter->data_type) {
420 case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
421 results[c].uint64 =
422 counter_pass->counter->oa_counter_read_uint64(perf,
423 counter_pass->query,
424 accumulated_results);
425 break;
426 case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
427 results[c].float32 =
428 counter_pass->counter->oa_counter_read_float(perf,
429 counter_pass->query,
430 accumulated_results);
431 break;
432 default:
433 /* So far we aren't using uint32, double or bool32... */
434 unreachable("unexpected counter data type");
435 }
436 break;
437
438 default:
439 unreachable("invalid query type");
440 }
441
442 /* The Vulkan extension only has nanoseconds as a unit */
443 if (counter_pass->counter->units == INTEL_PERF_COUNTER_UNITS_US) {
444 assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);
445 results[c].uint64 *= 1000;
446 }
447 }
448 }
449