1 /*
2 * Copyright © 2021 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <stdio.h>
25 #include <stdarg.h>
26
27 #include "common/intel_gem.h"
28 #include "perf/intel_perf.h"
29
30 #include "util/hash_table.h"
31 #include "util/u_process.h"
32
33 #include "intel_driver_ds.h"
34 #include "intel_pps_priv.h"
35 #include "intel_tracepoints.h"
36
37 #ifdef HAVE_PERFETTO
38
39 #include "util/perf/u_perfetto.h"
40 #include "util/perf/u_perfetto_renderpass.h"
41
42 #include "intel_tracepoints_perfetto.h"
43
44 /* Just naming stages */
45 static const struct {
46 const char *name;
47
48 /* The perfetto UI requires that there is a parent-child relationship
49 * within a row of elements. Which means that all children elements must
50 * end within the lifespan of their parent.
51 *
52 * Some elements like stalls and command buffers follow that relationship,
53 * but not all. This tells us in which UI row the elements should live.
54 */
55 enum intel_ds_queue_stage draw_stage;
56 } intel_queue_stage_desc[INTEL_DS_QUEUE_STAGE_N_STAGES] = {
57 /* Order must match the enum! */
58 {
59 "queue",
60 INTEL_DS_QUEUE_STAGE_QUEUE,
61 },
62 {
63 "frame",
64 INTEL_DS_QUEUE_STAGE_FRAME,
65 },
66 {
67 "cmd-buffer",
68 INTEL_DS_QUEUE_STAGE_CMD_BUFFER,
69 },
70 {
71 "internal-ops",
72 INTEL_DS_QUEUE_STAGE_INTERNAL_OPS,
73 },
74 {
75 "stall",
76 INTEL_DS_QUEUE_STAGE_STALL,
77 },
78 {
79 "compute",
80 INTEL_DS_QUEUE_STAGE_COMPUTE,
81 },
82 {
83 "as-build",
84 INTEL_DS_QUEUE_STAGE_AS,
85 },
86 {
87 "RT",
88 INTEL_DS_QUEUE_STAGE_RT,
89 },
90 {
91 "render-pass",
92 INTEL_DS_QUEUE_STAGE_RENDER_PASS,
93 },
94 {
95 "blorp",
96 INTEL_DS_QUEUE_STAGE_BLORP,
97 },
98 {
99 "draw",
100 INTEL_DS_QUEUE_STAGE_DRAW,
101 },
102 {
103 "draw_mesh",
104 INTEL_DS_QUEUE_STAGE_DRAW_MESH,
105 },
106 };
107
108 struct IntelRenderpassIncrementalState {
109 bool was_cleared = true;
110 };
111
112 struct IntelRenderpassTraits : public perfetto::DefaultDataSourceTraits {
113 using IncrementalStateType = IntelRenderpassIncrementalState;
114 };
115
116 class IntelRenderpassDataSource : public MesaRenderpassDataSource<IntelRenderpassDataSource,
117 IntelRenderpassTraits> {
118 public:
119 /* Make sure we're not losing traces due to lack of shared memory space */
120 constexpr static perfetto::BufferExhaustedPolicy kBufferExhaustedPolicy =
121 perfetto::BufferExhaustedPolicy::kDrop;
122 };
123
124 PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(IntelRenderpassDataSource);
125 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(IntelRenderpassDataSource);
126
127 using perfetto::protos::pbzero::InternedGpuRenderStageSpecification_RenderStageCategory;
128
129 static void
sync_timestamp(IntelRenderpassDataSource::TraceContext & ctx,struct intel_ds_device * device)130 sync_timestamp(IntelRenderpassDataSource::TraceContext &ctx,
131 struct intel_ds_device *device)
132 {
133 uint64_t cpu_ts, gpu_ts;
134
135 if (!intel_gem_read_correlate_cpu_gpu_timestamp(device->fd,
136 device->info.kmd_type,
137 INTEL_ENGINE_CLASS_RENDER, 0,
138 CLOCK_BOOTTIME,
139 &cpu_ts, &gpu_ts, NULL)) {
140 cpu_ts = perfetto::base::GetBootTimeNs().count();
141 intel_gem_read_render_timestamp(device->fd, device->info.kmd_type,
142 &gpu_ts);
143 }
144 gpu_ts = intel_device_info_timebase_scale(&device->info, gpu_ts);
145
146 if (cpu_ts < device->next_clock_sync_ns)
147 return;
148
149 PERFETTO_LOG("sending clocks gpu=0x%08x", device->gpu_clock_id);
150
151 device->sync_gpu_ts = gpu_ts;
152 device->next_clock_sync_ns = cpu_ts + 1000000000ull;
153
154 MesaRenderpassDataSource<IntelRenderpassDataSource, IntelRenderpassTraits>::EmitClockSync(ctx,
155 cpu_ts, gpu_ts, device->gpu_clock_id);
156 }
157
158 static void
send_descriptors(IntelRenderpassDataSource::TraceContext & ctx,struct intel_ds_device * device)159 send_descriptors(IntelRenderpassDataSource::TraceContext &ctx,
160 struct intel_ds_device *device)
161 {
162 PERFETTO_LOG("Sending renderstage descriptors");
163
164 device->event_id = 0;
165 list_for_each_entry_safe(struct intel_ds_queue, queue, &device->queues, link) {
166 for (uint32_t s = 0; s < ARRAY_SIZE(queue->stages); s++) {
167 queue->stages[s].start_ns[0] = 0;
168 }
169 }
170
171 {
172 auto packet = ctx.NewTracePacket();
173
174 packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
175 packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
176 packet->set_sequence_flags(perfetto::protos::pbzero::TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
177
178 auto interned_data = packet->set_interned_data();
179
180 {
181 auto desc = interned_data->add_graphics_contexts();
182 desc->set_iid(device->iid);
183 desc->set_pid(getpid());
184 switch (device->api) {
185 case INTEL_DS_API_OPENGL:
186 desc->set_api(perfetto::protos::pbzero::InternedGraphicsContext_Api::OPEN_GL);
187 break;
188 case INTEL_DS_API_VULKAN:
189 desc->set_api(perfetto::protos::pbzero::InternedGraphicsContext_Api::VULKAN);
190 break;
191 default:
192 break;
193 }
194 }
195
196 /* Emit all the IID picked at device/queue creation. */
197 list_for_each_entry_safe(struct intel_ds_queue, queue, &device->queues, link) {
198 for (unsigned s = 0; s < INTEL_DS_QUEUE_STAGE_N_STAGES; s++) {
199 {
200 /* We put the stage number in there so that all rows are order
201 * by intel_ds_queue_stage.
202 */
203 char name[100];
204 snprintf(name, sizeof(name), "%.10s-%s-%u-%s",
205 util_get_process_name(),
206 queue->name, s, intel_queue_stage_desc[s].name);
207
208 auto desc = interned_data->add_gpu_specifications();
209 desc->set_iid(queue->stages[s].queue_iid);
210 desc->set_name(name);
211 }
212 }
213 }
214
215 for (unsigned i = 0; i < ARRAY_SIZE(intel_tracepoint_names); i++) {
216 /* Skip the begin tracepoint, the label represent the couple of
217 * begin/end tracepoints.
218 */
219 if (strstr(intel_tracepoint_names[i], "intel_begin_") != NULL)
220 continue;
221
222 auto desc = interned_data->add_gpu_specifications();
223 desc->set_iid(device->tracepoint_iids[i]);
224 desc->set_name(intel_tracepoint_names[i] + strlen("intel_end_"));
225 }
226 }
227
228 device->next_clock_sync_ns = 0;
229 sync_timestamp(ctx, device);
230 }
231
232 typedef void (*trace_payload_as_extra_func)(perfetto::protos::pbzero::GpuRenderStageEvent *, const void*, const void *);
233
234 static void
begin_event(struct intel_ds_queue * queue,uint64_t ts_ns,enum intel_ds_queue_stage stage_id)235 begin_event(struct intel_ds_queue *queue, uint64_t ts_ns,
236 enum intel_ds_queue_stage stage_id)
237 {
238 uint32_t level = queue->stages[stage_id].level;
239 /* If we haven't managed to calibrate the alignment between GPU and CPU
240 * timestamps yet, then skip this trace, otherwise perfetto won't know
241 * what to do with it.
242 */
243 if (!queue->device->sync_gpu_ts) {
244 queue->stages[stage_id].start_ns[level] = 0;
245 return;
246 }
247
248 if (level >= (ARRAY_SIZE(queue->stages[stage_id].start_ns) - 1))
249 return;
250
251 queue->stages[stage_id].start_ns[level] = ts_ns;
252 queue->stages[stage_id].level++;
253 }
254
255 static void
end_event(struct intel_ds_queue * queue,uint64_t ts_ns,enum intel_ds_queue_stage stage_id,uint32_t submission_id,uint16_t tracepoint_idx,const char * app_event,const void * payload=nullptr,const void * indirect_data=nullptr,trace_payload_as_extra_func payload_as_extra=nullptr)256 end_event(struct intel_ds_queue *queue, uint64_t ts_ns,
257 enum intel_ds_queue_stage stage_id,
258 uint32_t submission_id,
259 uint16_t tracepoint_idx,
260 const char *app_event,
261 const void *payload = nullptr,
262 const void *indirect_data = nullptr,
263 trace_payload_as_extra_func payload_as_extra = nullptr)
264 {
265 struct intel_ds_device *device = queue->device;
266
267 /* If we haven't managed to calibrate the alignment between GPU and CPU
268 * timestamps yet, then skip this trace, otherwise perfetto won't know
269 * what to do with it.
270 */
271 if (!device->sync_gpu_ts)
272 return;
273
274 if (queue->stages[stage_id].level == 0)
275 return;
276
277 uint32_t level = --queue->stages[stage_id].level;
278 struct intel_ds_stage *stage = &queue->stages[stage_id];
279 uint64_t start_ns = stage->start_ns[level];
280
281 if (!start_ns)
282 return;
283
284 IntelRenderpassDataSource::Trace([=](IntelRenderpassDataSource::TraceContext tctx) {
285 if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
286 send_descriptors(tctx, queue->device);
287 state->was_cleared = false;
288 }
289
290 sync_timestamp(tctx, queue->device);
291
292 uint64_t evt_id = device->event_id++;
293
294 /* If this is an application event, we might need to generate a new
295 * stage_iid if not already seen. Otherwise, it's a driver event and we
296 * have use the internal stage_iid.
297 */
298 uint64_t stage_iid = app_event ?
299 tctx.GetDataSourceLocked()->debug_marker_stage(tctx, app_event) :
300 device->tracepoint_iids[tracepoint_idx];
301
302 auto packet = tctx.NewTracePacket();
303
304 packet->set_timestamp(start_ns);
305 packet->set_timestamp_clock_id(queue->device->gpu_clock_id);
306
307 assert(ts_ns >= start_ns);
308
309 auto event = packet->set_gpu_render_stage_event();
310 event->set_gpu_id(queue->device->gpu_id);
311
312 event->set_hw_queue_iid(stage->queue_iid);
313 event->set_stage_iid(stage_iid);
314 event->set_context(queue->device->iid);
315 event->set_event_id(evt_id);
316 event->set_duration(ts_ns - start_ns);
317 event->set_submission_id(submission_id);
318
319 if ((payload || indirect_data) && payload_as_extra) {
320 payload_as_extra(event, payload, indirect_data);
321 }
322 });
323
324 stage->start_ns[level] = 0;
325 }
326
327 static void
custom_trace_payload_as_extra_end_stall(perfetto::protos::pbzero::GpuRenderStageEvent * event,const struct trace_intel_end_stall * payload)328 custom_trace_payload_as_extra_end_stall(perfetto::protos::pbzero::GpuRenderStageEvent *event,
329 const struct trace_intel_end_stall *payload)
330 {
331 char buf[256];
332
333 {
334 auto data = event->add_extra_data();
335 data->set_name("stall_reason");
336
337 snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s : %s%s%s%s%s%s%s",
338 (payload->flags & INTEL_DS_DEPTH_CACHE_FLUSH_BIT) ? "+depth_flush" : "",
339 (payload->flags & INTEL_DS_DATA_CACHE_FLUSH_BIT) ? "+dc_flush" : "",
340 (payload->flags & INTEL_DS_HDC_PIPELINE_FLUSH_BIT) ? "+hdc_flush" : "",
341 (payload->flags & INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT) ? "+rt_flush" : "",
342 (payload->flags & INTEL_DS_TILE_CACHE_FLUSH_BIT) ? "+tile_flush" : "",
343 (payload->flags & INTEL_DS_STATE_CACHE_INVALIDATE_BIT) ? "+state_inv" : "",
344 (payload->flags & INTEL_DS_CONST_CACHE_INVALIDATE_BIT) ? "+const_inv" : "",
345 (payload->flags & INTEL_DS_VF_CACHE_INVALIDATE_BIT) ? "+vf_inv" : "",
346 (payload->flags & INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT) ? "+tex_inv" : "",
347 (payload->flags & INTEL_DS_INST_CACHE_INVALIDATE_BIT) ? "+inst_inv" : "",
348 (payload->flags & INTEL_DS_STALL_AT_SCOREBOARD_BIT) ? "+pb_stall" : "",
349 (payload->flags & INTEL_DS_DEPTH_STALL_BIT) ? "+depth_stall" : "",
350 (payload->flags & INTEL_DS_CS_STALL_BIT) ? "+cs_stall" : "",
351 (payload->flags & INTEL_DS_UNTYPED_DATAPORT_CACHE_FLUSH_BIT) ? "+udp_flush" : "",
352 (payload->flags & INTEL_DS_END_OF_PIPE_BIT) ? "+eop" : "",
353 (payload->flags & INTEL_DS_CCS_CACHE_FLUSH_BIT) ? "+ccs_flush" : "",
354 (payload->reason1) ? payload->reason1 : "unknown",
355 (payload->reason2) ? "; " : "",
356 (payload->reason2) ? payload->reason2 : "",
357 (payload->reason3) ? "; " : "",
358 (payload->reason3) ? payload->reason3 : "",
359 (payload->reason4) ? "; " : "",
360 (payload->reason4) ? payload->reason4 : "");
361
362 assert(strlen(buf) > 0);
363
364 data->set_value(buf);
365 }
366 }
367
368 #endif /* HAVE_PERFETTO */
369
370 #ifdef __cplusplus
371 extern "C" {
372 #endif
373
374 #ifdef HAVE_PERFETTO
375
376 /*
377 * Trace callbacks, called from u_trace once the timestamps from GPU have been
378 * collected.
379 */
380
381 #define CREATE_DUAL_EVENT_CALLBACK(event_name, stage) \
382 void \
383 intel_ds_begin_##event_name(struct intel_ds_device *device, \
384 uint64_t ts_ns, \
385 uint16_t tp_idx, \
386 const void *flush_data, \
387 const struct trace_intel_begin_##event_name *payload, \
388 const void *indirect_data) \
389 { \
390 const struct intel_ds_flush_data *flush = \
391 (const struct intel_ds_flush_data *) flush_data; \
392 begin_event(flush->queue, ts_ns, stage); \
393 } \
394 \
395 void \
396 intel_ds_end_##event_name(struct intel_ds_device *device, \
397 uint64_t ts_ns, \
398 uint16_t tp_idx, \
399 const void *flush_data, \
400 const struct trace_intel_end_##event_name *payload, \
401 const void *indirect_data) \
402 { \
403 const struct intel_ds_flush_data *flush = \
404 (const struct intel_ds_flush_data *) flush_data; \
405 end_event(flush->queue, ts_ns, stage, flush->submission_id, \
406 tp_idx, NULL, payload, indirect_data, \
407 (trace_payload_as_extra_func) \
408 &trace_payload_as_extra_intel_end_##event_name); \
409 } \
410
CREATE_DUAL_EVENT_CALLBACK(frame,INTEL_DS_QUEUE_STAGE_FRAME)411 CREATE_DUAL_EVENT_CALLBACK(frame, INTEL_DS_QUEUE_STAGE_FRAME)
412 CREATE_DUAL_EVENT_CALLBACK(batch, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
413 CREATE_DUAL_EVENT_CALLBACK(cmd_buffer, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
414 CREATE_DUAL_EVENT_CALLBACK(render_pass, INTEL_DS_QUEUE_STAGE_RENDER_PASS)
415 CREATE_DUAL_EVENT_CALLBACK(blorp, INTEL_DS_QUEUE_STAGE_BLORP)
416 CREATE_DUAL_EVENT_CALLBACK(draw, INTEL_DS_QUEUE_STAGE_DRAW)
417 CREATE_DUAL_EVENT_CALLBACK(draw_indexed, INTEL_DS_QUEUE_STAGE_DRAW)
418 CREATE_DUAL_EVENT_CALLBACK(draw_indexed_multi, INTEL_DS_QUEUE_STAGE_DRAW)
419 CREATE_DUAL_EVENT_CALLBACK(draw_indexed_indirect, INTEL_DS_QUEUE_STAGE_DRAW)
420 CREATE_DUAL_EVENT_CALLBACK(draw_multi, INTEL_DS_QUEUE_STAGE_DRAW)
421 CREATE_DUAL_EVENT_CALLBACK(draw_indirect, INTEL_DS_QUEUE_STAGE_DRAW)
422 CREATE_DUAL_EVENT_CALLBACK(draw_indirect_count, INTEL_DS_QUEUE_STAGE_DRAW)
423 CREATE_DUAL_EVENT_CALLBACK(draw_indirect_byte_count, INTEL_DS_QUEUE_STAGE_DRAW)
424 CREATE_DUAL_EVENT_CALLBACK(draw_indexed_indirect_count, INTEL_DS_QUEUE_STAGE_DRAW)
425 CREATE_DUAL_EVENT_CALLBACK(draw_mesh, INTEL_DS_QUEUE_STAGE_DRAW_MESH)
426 CREATE_DUAL_EVENT_CALLBACK(draw_mesh_indirect, INTEL_DS_QUEUE_STAGE_DRAW_MESH)
427 CREATE_DUAL_EVENT_CALLBACK(draw_mesh_indirect_count, INTEL_DS_QUEUE_STAGE_DRAW_MESH)
428 CREATE_DUAL_EVENT_CALLBACK(xfb, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
429 CREATE_DUAL_EVENT_CALLBACK(compute, INTEL_DS_QUEUE_STAGE_COMPUTE)
430 CREATE_DUAL_EVENT_CALLBACK(compute_indirect, INTEL_DS_QUEUE_STAGE_COMPUTE)
431 CREATE_DUAL_EVENT_CALLBACK(generate_draws, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
432 CREATE_DUAL_EVENT_CALLBACK(generate_commands, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
433 CREATE_DUAL_EVENT_CALLBACK(trace_copy, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
434 CREATE_DUAL_EVENT_CALLBACK(trace_copy_cb, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
435 CREATE_DUAL_EVENT_CALLBACK(query_clear_blorp, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
436 CREATE_DUAL_EVENT_CALLBACK(query_clear_cs, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
437 CREATE_DUAL_EVENT_CALLBACK(query_copy_cs, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
438 CREATE_DUAL_EVENT_CALLBACK(query_copy_shader, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
439 CREATE_DUAL_EVENT_CALLBACK(write_buffer_marker, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
440 CREATE_DUAL_EVENT_CALLBACK(rays, INTEL_DS_QUEUE_STAGE_RT)
441 CREATE_DUAL_EVENT_CALLBACK(as_build, INTEL_DS_QUEUE_STAGE_AS)
442
443 void
444 intel_ds_begin_cmd_buffer_annotation(struct intel_ds_device *device,
445 uint64_t ts_ns,
446 uint16_t tp_idx,
447 const void *flush_data,
448 const struct trace_intel_begin_cmd_buffer_annotation *payload,
449 const void *indirect_data)
450 {
451 const struct intel_ds_flush_data *flush =
452 (const struct intel_ds_flush_data *) flush_data;
453 begin_event(flush->queue, ts_ns, INTEL_DS_QUEUE_STAGE_CMD_BUFFER);
454 }
455
456 void
intel_ds_end_cmd_buffer_annotation(struct intel_ds_device * device,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_intel_end_cmd_buffer_annotation * payload,const void * indirect_data)457 intel_ds_end_cmd_buffer_annotation(struct intel_ds_device *device,
458 uint64_t ts_ns,
459 uint16_t tp_idx,
460 const void *flush_data,
461 const struct trace_intel_end_cmd_buffer_annotation *payload,
462 const void *indirect_data)
463 {
464 const struct intel_ds_flush_data *flush =
465 (const struct intel_ds_flush_data *) flush_data;
466 end_event(flush->queue, ts_ns, INTEL_DS_QUEUE_STAGE_CMD_BUFFER,
467 flush->submission_id, tp_idx, payload->str, NULL, NULL, NULL);
468 }
469
470 void
intel_ds_begin_queue_annotation(struct intel_ds_device * device,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_intel_begin_queue_annotation * payload,const void * indirect_data)471 intel_ds_begin_queue_annotation(struct intel_ds_device *device,
472 uint64_t ts_ns,
473 uint16_t tp_idx,
474 const void *flush_data,
475 const struct trace_intel_begin_queue_annotation *payload,
476 const void *indirect_data)
477 {
478 const struct intel_ds_flush_data *flush =
479 (const struct intel_ds_flush_data *) flush_data;
480 begin_event(flush->queue, ts_ns, INTEL_DS_QUEUE_STAGE_QUEUE);
481 }
482
483 void
intel_ds_end_queue_annotation(struct intel_ds_device * device,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_intel_end_queue_annotation * payload,const void * indirect_data)484 intel_ds_end_queue_annotation(struct intel_ds_device *device,
485 uint64_t ts_ns,
486 uint16_t tp_idx,
487 const void *flush_data,
488 const struct trace_intel_end_queue_annotation *payload,
489 const void *indirect_data)
490 {
491 const struct intel_ds_flush_data *flush =
492 (const struct intel_ds_flush_data *) flush_data;
493 end_event(flush->queue, ts_ns, INTEL_DS_QUEUE_STAGE_QUEUE,
494 flush->submission_id, tp_idx, payload->str, NULL, NULL, NULL);
495 }
496
497 void
intel_ds_begin_stall(struct intel_ds_device * device,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_intel_begin_stall * payload,const void * indirect_data)498 intel_ds_begin_stall(struct intel_ds_device *device,
499 uint64_t ts_ns,
500 uint16_t tp_idx,
501 const void *flush_data,
502 const struct trace_intel_begin_stall *payload,
503 const void *indirect_data)
504 {
505 const struct intel_ds_flush_data *flush =
506 (const struct intel_ds_flush_data *) flush_data;
507 begin_event(flush->queue, ts_ns, INTEL_DS_QUEUE_STAGE_STALL);
508 }
509
510 void
intel_ds_end_stall(struct intel_ds_device * device,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_intel_end_stall * payload,const void * indirect_data)511 intel_ds_end_stall(struct intel_ds_device *device,
512 uint64_t ts_ns,
513 uint16_t tp_idx,
514 const void *flush_data,
515 const struct trace_intel_end_stall *payload,
516 const void *indirect_data)
517 {
518 const struct intel_ds_flush_data *flush =
519 (const struct intel_ds_flush_data *) flush_data;
520 end_event(flush->queue, ts_ns, INTEL_DS_QUEUE_STAGE_STALL,
521 flush->submission_id, tp_idx, NULL, payload, indirect_data,
522 (trace_payload_as_extra_func)custom_trace_payload_as_extra_end_stall);
523 }
524
525 uint64_t
intel_ds_begin_submit(struct intel_ds_queue * queue)526 intel_ds_begin_submit(struct intel_ds_queue *queue)
527 {
528 return perfetto::base::GetBootTimeNs().count();
529 }
530
531 void
intel_ds_end_submit(struct intel_ds_queue * queue,uint64_t start_ts)532 intel_ds_end_submit(struct intel_ds_queue *queue,
533 uint64_t start_ts)
534 {
535 if (!u_trace_should_process(&queue->device->trace_context)) {
536 queue->device->sync_gpu_ts = 0;
537 queue->device->next_clock_sync_ns = 0;
538 return;
539 }
540
541 uint64_t end_ts = perfetto::base::GetBootTimeNs().count();
542 uint32_t submission_id = queue->submission_id++;
543
544 IntelRenderpassDataSource::Trace([=](IntelRenderpassDataSource::TraceContext tctx) {
545 if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
546 send_descriptors(tctx, queue->device);
547 state->was_cleared = false;
548 }
549
550 sync_timestamp(tctx, queue->device);
551
552 auto packet = tctx.NewTracePacket();
553
554 packet->set_timestamp(start_ts);
555
556 auto event = packet->set_vulkan_api_event();
557 auto submit = event->set_vk_queue_submit();
558
559 // submit->set_pid(os_get_pid());
560 // submit->set_tid(os_get_tid());
561 submit->set_duration_ns(end_ts - start_ts);
562 submit->set_vk_queue((uintptr_t) queue);
563 submit->set_submission_id(submission_id);
564 });
565 }
566
567 #endif /* HAVE_PERFETTO */
568
569 static void
intel_driver_ds_init_once(void)570 intel_driver_ds_init_once(void)
571 {
572 #ifdef HAVE_PERFETTO
573 util_perfetto_init();
574 perfetto::DataSourceDescriptor dsd;
575 dsd.set_name("gpu.renderstages.intel");
576 IntelRenderpassDataSource::Register(dsd);
577 #endif
578 }
579
580 static once_flag intel_driver_ds_once_flag = ONCE_FLAG_INIT;
581 static uint64_t iid = 1;
582
get_iid()583 static uint64_t get_iid()
584 {
585 return iid++;
586 }
587
588 void
intel_driver_ds_init(void)589 intel_driver_ds_init(void)
590 {
591 call_once(&intel_driver_ds_once_flag,
592 intel_driver_ds_init_once);
593 intel_gpu_tracepoint_config_variable();
594 }
595
596 void
intel_ds_device_init(struct intel_ds_device * device,const struct intel_device_info * devinfo,int drm_fd,uint32_t gpu_id,enum intel_ds_api api)597 intel_ds_device_init(struct intel_ds_device *device,
598 const struct intel_device_info *devinfo,
599 int drm_fd,
600 uint32_t gpu_id,
601 enum intel_ds_api api)
602 {
603 memset(device, 0, sizeof(*device));
604
605 device->gpu_id = gpu_id;
606 device->gpu_clock_id = intel_pps_clock_id(gpu_id);
607 device->fd = drm_fd;
608 device->info = *devinfo;
609 device->iid = get_iid();
610 device->api = api;
611
612 #ifdef HAVE_PERFETTO
613 assert(ARRAY_SIZE(intel_tracepoint_names) < ARRAY_SIZE(device->tracepoint_iids));
614 for (unsigned i = 0; i < ARRAY_SIZE(intel_tracepoint_names); i++)
615 device->tracepoint_iids[i] = get_iid();
616 #endif
617
618 list_inithead(&device->queues);
619 simple_mtx_init(&device->trace_context_mutex, mtx_plain);
620 }
621
622 void
intel_ds_device_fini(struct intel_ds_device * device)623 intel_ds_device_fini(struct intel_ds_device *device)
624 {
625 u_trace_context_fini(&device->trace_context);
626 simple_mtx_destroy(&device->trace_context_mutex);
627 }
628
629 struct intel_ds_queue *
intel_ds_device_init_queue(struct intel_ds_device * device,struct intel_ds_queue * queue,const char * fmt_name,...)630 intel_ds_device_init_queue(struct intel_ds_device *device,
631 struct intel_ds_queue *queue,
632 const char *fmt_name,
633 ...)
634 {
635 va_list ap;
636
637 memset(queue, 0, sizeof(*queue));
638
639 queue->device = device;
640
641 va_start(ap, fmt_name);
642 vsnprintf(queue->name, sizeof(queue->name), fmt_name, ap);
643 va_end(ap);
644
645 for (unsigned s = 0; s < INTEL_DS_QUEUE_STAGE_N_STAGES; s++) {
646 queue->stages[s].queue_iid = get_iid();
647 }
648
649 list_add(&queue->link, &device->queues);
650
651 return queue;
652 }
653
intel_ds_flush_data_init(struct intel_ds_flush_data * data,struct intel_ds_queue * queue,uint64_t submission_id)654 void intel_ds_flush_data_init(struct intel_ds_flush_data *data,
655 struct intel_ds_queue *queue,
656 uint64_t submission_id)
657 {
658 memset(data, 0, sizeof(*data));
659
660 data->queue = queue;
661 data->submission_id = submission_id;
662
663 u_trace_init(&data->trace, &queue->device->trace_context);
664 }
665
intel_ds_flush_data_fini(struct intel_ds_flush_data * data)666 void intel_ds_flush_data_fini(struct intel_ds_flush_data *data)
667 {
668 u_trace_fini(&data->trace);
669 }
670
intel_ds_queue_flush_data(struct intel_ds_queue * queue,struct u_trace * ut,struct intel_ds_flush_data * data,uint32_t frame_nr,bool free_data)671 void intel_ds_queue_flush_data(struct intel_ds_queue *queue,
672 struct u_trace *ut,
673 struct intel_ds_flush_data *data,
674 uint32_t frame_nr,
675 bool free_data)
676 {
677 simple_mtx_lock(&queue->device->trace_context_mutex);
678 u_trace_flush(ut, data, frame_nr, free_data);
679 simple_mtx_unlock(&queue->device->trace_context_mutex);
680 }
681
intel_ds_device_process(struct intel_ds_device * device,bool eof)682 void intel_ds_device_process(struct intel_ds_device *device,
683 bool eof)
684 {
685 simple_mtx_lock(&device->trace_context_mutex);
686 u_trace_context_process(&device->trace_context, eof);
687 simple_mtx_unlock(&device->trace_context_mutex);
688 }
689
690 #ifdef __cplusplus
691 }
692 #endif
693