1 /*
2 * Copyright © 2021 Google, Inc.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include <perfetto.h>
7
8 #include "tu_perfetto.h"
9 #include "tu_buffer.h"
10 #include "tu_device.h"
11 #include "tu_image.h"
12
13 #include "util/hash_table.h"
14 #include "util/perf/u_perfetto.h"
15 #include "util/perf/u_perfetto_renderpass.h"
16
17 #include "tu_tracepoints.h"
18 #include "tu_tracepoints_perfetto.h"
19
20 /* we can't include tu_knl.h and tu_device.h */
21
22 int
23 tu_device_get_gpu_timestamp(struct tu_device *dev,
24 uint64_t *ts);
25 int
26 tu_device_get_suspend_count(struct tu_device *dev,
27 uint64_t *suspend_count);
28 uint64_t
29 tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts);
30
31 struct u_trace_context *
32 tu_device_get_u_trace(struct tu_device *device);
33
34 /**
35 * Queue-id's
36 */
37 enum {
38 DEFAULT_HW_QUEUE_ID,
39 };
40
41 /**
42 * Render-stage id's
43 */
44 enum tu_stage_id {
45 CMD_BUFFER_STAGE_ID,
46 CMD_BUFFER_ANNOTATION_STAGE_ID,
47 RENDER_PASS_STAGE_ID,
48 CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID,
49 BINNING_STAGE_ID,
50 GMEM_STAGE_ID,
51 BYPASS_STAGE_ID,
52 BLIT_STAGE_ID,
53 COMPUTE_STAGE_ID,
54 CLEAR_SYSMEM_STAGE_ID,
55 CLEAR_GMEM_STAGE_ID,
56 GENERIC_CLEAR_STAGE_ID,
57 GMEM_LOAD_STAGE_ID,
58 GMEM_STORE_STAGE_ID,
59 SYSMEM_RESOLVE_STAGE_ID,
60 // TODO add the rest from fd_stage_id
61 };
62
63 static const struct {
64 const char *name;
65 const char *desc;
66 } queues[] = {
67 [DEFAULT_HW_QUEUE_ID] = {"GPU Queue 0", "Default Adreno Hardware Queue"},
68 };
69
70 static const struct {
71 const char *name;
72 const char *desc;
73 } stages[] = {
74 [CMD_BUFFER_STAGE_ID] = { "Command Buffer" },
75 [CMD_BUFFER_ANNOTATION_STAGE_ID] = { "Annotation", "Command Buffer Annotation" },
76 [RENDER_PASS_STAGE_ID] = { "Render Pass" },
77 [CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID] = { "Annotation", "Render Pass Command Buffer Annotation" },
78 [BINNING_STAGE_ID] = { "Binning", "Perform Visibility pass and determine target bins" },
79 [GMEM_STAGE_ID] = { "GMEM", "Rendering to GMEM" },
80 [BYPASS_STAGE_ID] = { "Bypass", "Rendering to system memory" },
81 [BLIT_STAGE_ID] = { "Blit", "Performing a Blit operation" },
82 [COMPUTE_STAGE_ID] = { "Compute", "Compute job" },
83 [CLEAR_SYSMEM_STAGE_ID] = { "Clear Sysmem", "" },
84 [CLEAR_GMEM_STAGE_ID] = { "Clear GMEM", "Per-tile (GMEM) clear" },
85 [GENERIC_CLEAR_STAGE_ID] = { "Clear Sysmem/Gmem", ""},
86 [GMEM_LOAD_STAGE_ID] = { "GMEM Load", "Per tile system memory to GMEM load" },
87 [GMEM_STORE_STAGE_ID] = { "GMEM Store", "Per tile GMEM to system memory store" },
88 [SYSMEM_RESOLVE_STAGE_ID] = { "SysMem Resolve", "System memory MSAA resolve" },
89 // TODO add the rest
90 };
91
92 static uint32_t gpu_clock_id;
93 static uint64_t next_clock_sync_ns; /* cpu time of next clk sync */
94
95 /**
96 * The timestamp at the point where we first emitted the clock_sync..
97 * this will be a *later* timestamp that the first GPU traces (since
98 * we capture the first clock_sync from the CPU *after* the first GPU
99 * tracepoints happen). To avoid confusing perfetto we need to drop
100 * the GPU traces with timestamps before this.
101 */
102 static uint64_t sync_gpu_ts;
103
104 static uint64_t last_suspend_count;
105
106 static uint64_t gpu_max_timestamp;
107 static uint64_t gpu_timestamp_offset;
108
109 struct TuRenderpassIncrementalState {
110 bool was_cleared = true;
111 };
112
113 struct TuRenderpassTraits : public perfetto::DefaultDataSourceTraits {
114 using IncrementalStateType = TuRenderpassIncrementalState;
115 };
116
117 class TuRenderpassDataSource : public MesaRenderpassDataSource<TuRenderpassDataSource,
118 TuRenderpassTraits> {
OnStart(const StartArgs & args)119 void OnStart(const StartArgs &args) override
120 {
121 MesaRenderpassDataSource<TuRenderpassDataSource, TuRenderpassTraits>::OnStart(args);
122
123 /* Note: clock_id's below 128 are reserved.. for custom clock sources,
124 * using the hash of a namespaced string is the recommended approach.
125 * See: https://perfetto.dev/docs/concepts/clock-sync
126 */
127 gpu_clock_id =
128 _mesa_hash_string("org.freedesktop.mesa.freedreno") | 0x80000000;
129
130 gpu_timestamp_offset = 0;
131 gpu_max_timestamp = 0;
132 last_suspend_count = 0;
133 }
134 };
135
136 PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(TuRenderpassDataSource);
137 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(TuRenderpassDataSource);
138
139 static void
send_descriptors(TuRenderpassDataSource::TraceContext & ctx)140 send_descriptors(TuRenderpassDataSource::TraceContext &ctx)
141 {
142 PERFETTO_LOG("Sending renderstage descriptors");
143
144 auto packet = ctx.NewTracePacket();
145
146 /* This must be set before interned data is sent. */
147 packet->set_sequence_flags(perfetto::protos::pbzero::TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
148
149 packet->set_timestamp(0);
150
151 auto event = packet->set_gpu_render_stage_event();
152 event->set_gpu_id(0);
153
154 auto spec = event->set_specifications();
155
156 for (unsigned i = 0; i < ARRAY_SIZE(queues); i++) {
157 auto desc = spec->add_hw_queue();
158
159 desc->set_name(queues[i].name);
160 desc->set_description(queues[i].desc);
161 }
162
163 for (unsigned i = 0; i < ARRAY_SIZE(stages); i++) {
164 auto desc = spec->add_stage();
165
166 desc->set_name(stages[i].name);
167 if (stages[i].desc)
168 desc->set_description(stages[i].desc);
169 }
170 }
171
172 static struct tu_perfetto_stage *
stage_push(struct tu_device * dev)173 stage_push(struct tu_device *dev)
174 {
175 struct tu_perfetto_state *p = &dev->perfetto;
176
177 if (p->stage_depth >= ARRAY_SIZE(p->stages)) {
178 p->skipped_depth++;
179 return NULL;
180 }
181
182 return &p->stages[p->stage_depth++];
183 }
184
185 typedef void (*trace_payload_as_extra_func)(perfetto::protos::pbzero::GpuRenderStageEvent *, const void*);
186
187 static struct tu_perfetto_stage *
stage_pop(struct tu_device * dev)188 stage_pop(struct tu_device *dev)
189 {
190 struct tu_perfetto_state *p = &dev->perfetto;
191
192 if (!p->stage_depth)
193 return NULL;
194
195 if (p->skipped_depth) {
196 p->skipped_depth--;
197 return NULL;
198 }
199
200 return &p->stages[--p->stage_depth];
201 }
202
203 static void
stage_start(struct tu_device * dev,uint64_t ts_ns,enum tu_stage_id stage_id,const char * app_event,const void * payload=nullptr,size_t payload_size=0,trace_payload_as_extra_func payload_as_extra=nullptr)204 stage_start(struct tu_device *dev,
205 uint64_t ts_ns,
206 enum tu_stage_id stage_id,
207 const char *app_event,
208 const void *payload = nullptr,
209 size_t payload_size = 0,
210 trace_payload_as_extra_func payload_as_extra = nullptr)
211 {
212 struct tu_perfetto_stage *stage = stage_push(dev);
213
214 if (!stage) {
215 PERFETTO_ELOG("stage %d is nested too deep", stage_id);
216 return;
217 }
218
219 if (payload) {
220 void* new_payload = malloc(payload_size);
221 if (new_payload)
222 memcpy(new_payload, payload, payload_size);
223 else
224 PERFETTO_ELOG("Failed to allocate payload for stage %d", stage_id);
225 payload = new_payload;
226 }
227
228 *stage = (struct tu_perfetto_stage) {
229 .stage_id = stage_id,
230 .stage_iid = 0,
231 .start_ts = ts_ns,
232 .payload = payload,
233 .start_payload_function = (void *) payload_as_extra,
234 };
235
236 if (app_event) {
237 TuRenderpassDataSource::Trace([=](auto tctx) {
238 stage->stage_iid =
239 tctx.GetDataSourceLocked()->debug_marker_stage(tctx, app_event);
240 });
241 }
242 }
243
244 static void
stage_end(struct tu_device * dev,uint64_t ts_ns,enum tu_stage_id stage_id,const void * flush_data,const void * payload=nullptr,trace_payload_as_extra_func payload_as_extra=nullptr)245 stage_end(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage_id,
246 const void *flush_data,
247 const void* payload = nullptr,
248 trace_payload_as_extra_func payload_as_extra = nullptr)
249 {
250 struct tu_perfetto_stage *stage = stage_pop(dev);
251 auto trace_flush_data =
252 (const struct tu_u_trace_submission_data *) flush_data;
253 uint32_t submission_id = trace_flush_data->submission_id;
254 uint64_t gpu_ts_offset = trace_flush_data->gpu_ts_offset;
255
256 if (!stage)
257 return;
258
259 if (stage->stage_id != stage_id) {
260 PERFETTO_ELOG("stage %d ended while stage %d is expected",
261 stage_id, stage->stage_id);
262 return;
263 }
264
265 /* If we haven't managed to calibrate the alignment between GPU and CPU
266 * timestamps yet, then skip this trace, otherwise perfetto won't know
267 * what to do with it.
268 */
269 if (!sync_gpu_ts)
270 return;
271
272 TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) {
273 if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
274 send_descriptors(tctx);
275 state->was_cleared = false;
276 }
277
278 auto packet = tctx.NewTracePacket();
279
280 gpu_max_timestamp = MAX2(gpu_max_timestamp, ts_ns + gpu_ts_offset);
281
282 packet->set_timestamp(stage->start_ts + gpu_ts_offset);
283 packet->set_timestamp_clock_id(gpu_clock_id);
284
285 auto event = packet->set_gpu_render_stage_event();
286 event->set_event_id(0); // ???
287 event->set_hw_queue_id(DEFAULT_HW_QUEUE_ID);
288 event->set_duration(ts_ns - stage->start_ts);
289 if (stage->stage_iid)
290 event->set_stage_iid(stage->stage_iid);
291 else
292 event->set_stage_id(stage->stage_id);
293 event->set_context((uintptr_t) dev);
294 event->set_submission_id(submission_id);
295
296 if (stage->payload) {
297 if (stage->start_payload_function)
298 ((trace_payload_as_extra_func) stage->start_payload_function)(
299 event, stage->payload);
300 free((void *)stage->payload);
301 }
302
303 if (payload && payload_as_extra)
304 payload_as_extra(event, payload);
305 });
306 }
307
308 class TuMemoryDataSource : public perfetto::DataSource<TuMemoryDataSource> {
309 public:
OnSetup(const SetupArgs &)310 void OnSetup(const SetupArgs &) override
311 {
312 }
313
OnStart(const StartArgs &)314 void OnStart(const StartArgs &) override
315 {
316 PERFETTO_LOG("Memory tracing started");
317 }
318
OnStop(const StopArgs &)319 void OnStop(const StopArgs &) override
320 {
321 PERFETTO_LOG("Memory tracing stopped");
322 }
323 };
324
325 PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(TuMemoryDataSource);
326 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(TuMemoryDataSource);
327
328
329 #ifdef __cplusplus
330 extern "C" {
331 #endif
332
333 void
tu_perfetto_init(void)334 tu_perfetto_init(void)
335 {
336 util_perfetto_init();
337
338 {
339 perfetto::DataSourceDescriptor dsd;
340 #if DETECT_OS_ANDROID
341 /* AGI requires this name */
342 dsd.set_name("gpu.renderstages");
343 #else
344 dsd.set_name("gpu.renderstages.msm");
345 #endif
346 TuRenderpassDataSource::Register(dsd);
347 }
348
349 {
350 perfetto::DataSourceDescriptor dsd;
351 dsd.set_name("gpu.memory.msm");
352 TuMemoryDataSource::Register(dsd);
353 }
354 }
355
356 static void
emit_sync_timestamp(uint64_t cpu_ts,uint64_t gpu_ts)357 emit_sync_timestamp(uint64_t cpu_ts, uint64_t gpu_ts)
358 {
359 TuRenderpassDataSource::Trace([=](auto tctx) {
360 MesaRenderpassDataSource<TuRenderpassDataSource,
361 TuRenderpassTraits>::EmitClockSync(tctx, cpu_ts,
362 gpu_ts, gpu_clock_id);
363 });
364 }
365
366 static void
emit_submit_id(uint32_t submission_id)367 emit_submit_id(uint32_t submission_id)
368 {
369 TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) {
370 auto packet = tctx.NewTracePacket();
371
372 packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
373
374 auto event = packet->set_vulkan_api_event();
375 auto submit = event->set_vk_queue_submit();
376
377 submit->set_submission_id(submission_id);
378 });
379 }
380
381 struct tu_perfetto_clocks
tu_perfetto_submit(struct tu_device * dev,uint32_t submission_id,struct tu_perfetto_clocks * gpu_clocks)382 tu_perfetto_submit(struct tu_device *dev,
383 uint32_t submission_id,
384 struct tu_perfetto_clocks *gpu_clocks)
385 {
386 struct tu_perfetto_clocks clocks {};
387 if (gpu_clocks) {
388 clocks = *gpu_clocks;
389 }
390
391 if (!u_trace_perfetto_active(tu_device_get_u_trace(dev)))
392 return {};
393
394 clocks.cpu = perfetto::base::GetBootTimeNs().count();
395
396 if (gpu_clocks) {
397 /* TODO: It would be better to use CPU time that comes
398 * together with GPU time from the KGSL, but it's not
399 * equal to GetBootTimeNs.
400 */
401
402 clocks.gpu_ts_offset = MAX2(gpu_timestamp_offset, clocks.gpu_ts_offset);
403 gpu_timestamp_offset = clocks.gpu_ts_offset;
404 sync_gpu_ts = clocks.gpu_ts + clocks.gpu_ts_offset;
405 } else {
406 clocks.gpu_ts = 0;
407 clocks.gpu_ts_offset = gpu_timestamp_offset;
408
409 if (clocks.cpu < next_clock_sync_ns)
410 return clocks;
411
412 if (tu_device_get_gpu_timestamp(dev, &clocks.gpu_ts)) {
413 PERFETTO_ELOG("Could not sync CPU and GPU clocks");
414 return {};
415 }
416
417 clocks.gpu_ts = tu_device_ticks_to_ns(dev, clocks.gpu_ts);
418
419 /* get cpu timestamp again because tu_device_get_gpu_timestamp can take
420 * >100us
421 */
422 clocks.cpu = perfetto::base::GetBootTimeNs().count();
423
424 uint64_t current_suspend_count = 0;
425 /* If we fail to get it we will use a fallback */
426 tu_device_get_suspend_count(dev, ¤t_suspend_count);
427
428 /* GPU timestamp is being reset after suspend-resume cycle.
429 * Perfetto requires clock snapshots to be monotonic,
430 * so we have to fix-up the time.
431 */
432 if (current_suspend_count != last_suspend_count) {
433 gpu_timestamp_offset = gpu_max_timestamp;
434 last_suspend_count = current_suspend_count;
435 }
436 clocks.gpu_ts_offset = gpu_timestamp_offset;
437
438 uint64_t gpu_absolute_ts = clocks.gpu_ts + clocks.gpu_ts_offset;
439
440 /* Fallback check, detect non-monotonic cases which would happen
441 * if we cannot retrieve suspend count.
442 */
443 if (sync_gpu_ts > gpu_absolute_ts) {
444 gpu_absolute_ts += (gpu_max_timestamp - gpu_timestamp_offset);
445 gpu_timestamp_offset = gpu_max_timestamp;
446 clocks.gpu_ts = gpu_absolute_ts - gpu_timestamp_offset;
447 }
448
449 if (sync_gpu_ts > gpu_absolute_ts) {
450 PERFETTO_ELOG("Non-monotonic gpu timestamp detected, bailing out");
451 return {};
452 }
453
454 gpu_max_timestamp = clocks.gpu_ts;
455 sync_gpu_ts = clocks.gpu_ts;
456 next_clock_sync_ns = clocks.cpu + 30000000;
457 }
458
459 emit_sync_timestamp(clocks.cpu, clocks.gpu_ts + clocks.gpu_ts_offset);
460 emit_submit_id(submission_id);
461 return clocks;
462 }
463
464 /*
465 * Trace callbacks, called from u_trace once the timestamps from GPU have been
466 * collected.
467 *
468 * The default "extra" funcs are code-generated into tu_tracepoints_perfetto.h
469 * and just take the tracepoint's args and add them as name/value pairs in the
470 * perfetto events. This file can usually just map a tu_perfetto_* to
471 * stage_start/end with a call to that codegenned "extra" func. But you can
472 * also provide your own entrypoint and extra funcs if you want to change that
473 * mapping.
474 */
475
476 #define CREATE_EVENT_CALLBACK(event_name, stage_id) \
477 void tu_perfetto_start_##event_name( \
478 struct tu_device *dev, uint64_t ts_ns, uint16_t tp_idx, \
479 const void *flush_data, const struct trace_start_##event_name *payload, \
480 const void *indirect_data) \
481 { \
482 stage_start( \
483 dev, ts_ns, stage_id, NULL, payload, sizeof(*payload), \
484 (trace_payload_as_extra_func) &trace_payload_as_extra_start_##event_name); \
485 } \
486 \
487 void tu_perfetto_end_##event_name( \
488 struct tu_device *dev, uint64_t ts_ns, uint16_t tp_idx, \
489 const void *flush_data, const struct trace_end_##event_name *payload, \
490 const void *indirect_data) \
491 { \
492 stage_end( \
493 dev, ts_ns, stage_id, flush_data, payload, \
494 (trace_payload_as_extra_func) &trace_payload_as_extra_end_##event_name); \
495 }
496
CREATE_EVENT_CALLBACK(cmd_buffer,CMD_BUFFER_STAGE_ID)497 CREATE_EVENT_CALLBACK(cmd_buffer, CMD_BUFFER_STAGE_ID)
498 CREATE_EVENT_CALLBACK(render_pass, RENDER_PASS_STAGE_ID)
499 CREATE_EVENT_CALLBACK(binning_ib, BINNING_STAGE_ID)
500 CREATE_EVENT_CALLBACK(draw_ib_gmem, GMEM_STAGE_ID)
501 CREATE_EVENT_CALLBACK(draw_ib_sysmem, BYPASS_STAGE_ID)
502 CREATE_EVENT_CALLBACK(blit, BLIT_STAGE_ID)
503 CREATE_EVENT_CALLBACK(compute, COMPUTE_STAGE_ID)
504 CREATE_EVENT_CALLBACK(compute_indirect, COMPUTE_STAGE_ID)
505 CREATE_EVENT_CALLBACK(generic_clear, GENERIC_CLEAR_STAGE_ID)
506 CREATE_EVENT_CALLBACK(gmem_clear, CLEAR_GMEM_STAGE_ID)
507 CREATE_EVENT_CALLBACK(sysmem_clear, CLEAR_SYSMEM_STAGE_ID)
508 CREATE_EVENT_CALLBACK(sysmem_clear_all, CLEAR_SYSMEM_STAGE_ID)
509 CREATE_EVENT_CALLBACK(gmem_load, GMEM_LOAD_STAGE_ID)
510 CREATE_EVENT_CALLBACK(gmem_store, GMEM_STORE_STAGE_ID)
511 CREATE_EVENT_CALLBACK(sysmem_resolve, SYSMEM_RESOLVE_STAGE_ID)
512
513 void
514 tu_perfetto_start_cmd_buffer_annotation(
515 struct tu_device *dev,
516 uint64_t ts_ns,
517 uint16_t tp_idx,
518 const void *flush_data,
519 const struct trace_start_cmd_buffer_annotation *payload,
520 const void *indirect_data)
521 {
522 /* No extra func necessary, the only arg is in the end payload.*/
523 stage_start(dev, ts_ns, CMD_BUFFER_ANNOTATION_STAGE_ID, payload->str, payload,
524 sizeof(*payload), NULL);
525 }
526
527 void
tu_perfetto_end_cmd_buffer_annotation(struct tu_device * dev,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_cmd_buffer_annotation * payload,const void * indirect_data)528 tu_perfetto_end_cmd_buffer_annotation(
529 struct tu_device *dev,
530 uint64_t ts_ns,
531 uint16_t tp_idx,
532 const void *flush_data,
533 const struct trace_end_cmd_buffer_annotation *payload,
534 const void *indirect_data)
535 {
536 /* Pass the payload string as the app_event, which will appear right on the
537 * event block, rather than as metadata inside.
538 */
539 stage_end(dev, ts_ns, CMD_BUFFER_ANNOTATION_STAGE_ID, flush_data,
540 payload, NULL);
541 }
542
543 void
tu_perfetto_start_cmd_buffer_annotation_rp(struct tu_device * dev,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_cmd_buffer_annotation_rp * payload,const void * indirect_data)544 tu_perfetto_start_cmd_buffer_annotation_rp(
545 struct tu_device *dev,
546 uint64_t ts_ns,
547 uint16_t tp_idx,
548 const void *flush_data,
549 const struct trace_start_cmd_buffer_annotation_rp *payload,
550 const void *indirect_data)
551 {
552 /* No extra func necessary, the only arg is in the end payload.*/
553 stage_start(dev, ts_ns, CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID,
554 payload->str, payload, sizeof(*payload), NULL);
555 }
556
557 void
tu_perfetto_end_cmd_buffer_annotation_rp(struct tu_device * dev,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_cmd_buffer_annotation_rp * payload,const void * indirect_data)558 tu_perfetto_end_cmd_buffer_annotation_rp(
559 struct tu_device *dev,
560 uint64_t ts_ns,
561 uint16_t tp_idx,
562 const void *flush_data,
563 const struct trace_end_cmd_buffer_annotation_rp *payload,
564 const void *indirect_data)
565 {
566 /* Pass the payload string as the app_event, which will appear right on the
567 * event block, rather than as metadata inside.
568 */
569 stage_end(dev, ts_ns, CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID,
570 flush_data, payload, NULL);
571 }
572
573
574 static void
log_mem(struct tu_device * dev,struct tu_buffer * buffer,struct tu_image * image,perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::Operation op)575 log_mem(struct tu_device *dev, struct tu_buffer *buffer, struct tu_image *image,
576 perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::Operation op)
577 {
578 TuMemoryDataSource::Trace([=](TuMemoryDataSource::TraceContext tctx) {
579 auto packet = tctx.NewTracePacket();
580
581 packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
582
583 auto event = packet->set_vulkan_memory_event();
584
585 event->set_timestamp(perfetto::base::GetBootTimeNs().count());
586 event->set_operation(op);
587 event->set_pid(getpid());
588
589 if (buffer) {
590 event->set_source(perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::SOURCE_BUFFER);
591 event->set_memory_size(buffer->vk.size);
592 if (buffer->bo)
593 event->set_memory_address(buffer->iova);
594 } else {
595 assert(image);
596 event->set_source(perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::SOURCE_IMAGE);
597 event->set_memory_size(image->layout[0].size);
598 if (image->bo)
599 event->set_memory_address(image->iova);
600 }
601
602 });
603 }
604
605 void
tu_perfetto_log_create_buffer(struct tu_device * dev,struct tu_buffer * buffer)606 tu_perfetto_log_create_buffer(struct tu_device *dev, struct tu_buffer *buffer)
607 {
608 log_mem(dev, buffer, NULL, perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_CREATE);
609 }
610
611 void
tu_perfetto_log_bind_buffer(struct tu_device * dev,struct tu_buffer * buffer)612 tu_perfetto_log_bind_buffer(struct tu_device *dev, struct tu_buffer *buffer)
613 {
614 log_mem(dev, buffer, NULL, perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_BIND);
615 }
616
617 void
tu_perfetto_log_destroy_buffer(struct tu_device * dev,struct tu_buffer * buffer)618 tu_perfetto_log_destroy_buffer(struct tu_device *dev, struct tu_buffer *buffer)
619 {
620 log_mem(dev, buffer, NULL, buffer->bo ?
621 perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_DESTROY_BOUND :
622 perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_DESTROY);
623 }
624
625 void
tu_perfetto_log_create_image(struct tu_device * dev,struct tu_image * image)626 tu_perfetto_log_create_image(struct tu_device *dev, struct tu_image *image)
627 {
628 log_mem(dev, NULL, image, perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_CREATE);
629 }
630
631 void
tu_perfetto_log_bind_image(struct tu_device * dev,struct tu_image * image)632 tu_perfetto_log_bind_image(struct tu_device *dev, struct tu_image *image)
633 {
634 log_mem(dev, NULL, image, perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_BIND);
635 }
636
637 void
tu_perfetto_log_destroy_image(struct tu_device * dev,struct tu_image * image)638 tu_perfetto_log_destroy_image(struct tu_device *dev, struct tu_image *image)
639 {
640 log_mem(dev, NULL, image, image->bo ?
641 perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_DESTROY_BOUND :
642 perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_DESTROY);
643 }
644
645
646
647 #ifdef __cplusplus
648 }
649 #endif
650