xref: /aosp_15_r20/external/mesa3d/src/freedreno/vulkan/tu_perfetto.cc (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2021 Google, Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include <perfetto.h>
7 
8 #include "tu_perfetto.h"
9 #include "tu_buffer.h"
10 #include "tu_device.h"
11 #include "tu_image.h"
12 
13 #include "util/hash_table.h"
14 #include "util/perf/u_perfetto.h"
15 #include "util/perf/u_perfetto_renderpass.h"
16 
17 #include "tu_tracepoints.h"
18 #include "tu_tracepoints_perfetto.h"
19 
20 /* we can't include tu_knl.h and tu_device.h */
21 
22 int
23 tu_device_get_gpu_timestamp(struct tu_device *dev,
24                             uint64_t *ts);
25 int
26 tu_device_get_suspend_count(struct tu_device *dev,
27                             uint64_t *suspend_count);
28 uint64_t
29 tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts);
30 
31 struct u_trace_context *
32 tu_device_get_u_trace(struct tu_device *device);
33 
34 /**
35  * Queue-id's
36  */
37 enum {
38    DEFAULT_HW_QUEUE_ID,
39 };
40 
41 /**
42  * Render-stage id's
43  */
44 enum tu_stage_id {
45    CMD_BUFFER_STAGE_ID,
46    CMD_BUFFER_ANNOTATION_STAGE_ID,
47    RENDER_PASS_STAGE_ID,
48    CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID,
49    BINNING_STAGE_ID,
50    GMEM_STAGE_ID,
51    BYPASS_STAGE_ID,
52    BLIT_STAGE_ID,
53    COMPUTE_STAGE_ID,
54    CLEAR_SYSMEM_STAGE_ID,
55    CLEAR_GMEM_STAGE_ID,
56    GENERIC_CLEAR_STAGE_ID,
57    GMEM_LOAD_STAGE_ID,
58    GMEM_STORE_STAGE_ID,
59    SYSMEM_RESOLVE_STAGE_ID,
60    // TODO add the rest from fd_stage_id
61 };
62 
63 static const struct {
64    const char *name;
65    const char *desc;
66 } queues[] = {
67    [DEFAULT_HW_QUEUE_ID] = {"GPU Queue 0", "Default Adreno Hardware Queue"},
68 };
69 
70 static const struct {
71    const char *name;
72    const char *desc;
73 } stages[] = {
74    [CMD_BUFFER_STAGE_ID]     = { "Command Buffer" },
75    [CMD_BUFFER_ANNOTATION_STAGE_ID]     = { "Annotation", "Command Buffer Annotation" },
76    [RENDER_PASS_STAGE_ID]    = { "Render Pass" },
77    [CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID]    = { "Annotation", "Render Pass Command Buffer Annotation" },
78    [BINNING_STAGE_ID]        = { "Binning", "Perform Visibility pass and determine target bins" },
79    [GMEM_STAGE_ID]           = { "GMEM", "Rendering to GMEM" },
80    [BYPASS_STAGE_ID]         = { "Bypass", "Rendering to system memory" },
81    [BLIT_STAGE_ID]           = { "Blit", "Performing a Blit operation" },
82    [COMPUTE_STAGE_ID]        = { "Compute", "Compute job" },
83    [CLEAR_SYSMEM_STAGE_ID]   = { "Clear Sysmem", "" },
84    [CLEAR_GMEM_STAGE_ID]     = { "Clear GMEM", "Per-tile (GMEM) clear" },
85    [GENERIC_CLEAR_STAGE_ID]  = { "Clear Sysmem/Gmem", ""},
86    [GMEM_LOAD_STAGE_ID]      = { "GMEM Load", "Per tile system memory to GMEM load" },
87    [GMEM_STORE_STAGE_ID]     = { "GMEM Store", "Per tile GMEM to system memory store" },
88    [SYSMEM_RESOLVE_STAGE_ID] = { "SysMem Resolve", "System memory MSAA resolve" },
89    // TODO add the rest
90 };
91 
92 static uint32_t gpu_clock_id;
93 static uint64_t next_clock_sync_ns; /* cpu time of next clk sync */
94 
95 /**
96  * The timestamp at the point where we first emitted the clock_sync..
97  * this  will be a *later* timestamp that the first GPU traces (since
98  * we capture the first clock_sync from the CPU *after* the first GPU
99  * tracepoints happen).  To avoid confusing perfetto we need to drop
100  * the GPU traces with timestamps before this.
101  */
102 static uint64_t sync_gpu_ts;
103 
104 static uint64_t last_suspend_count;
105 
106 static uint64_t gpu_max_timestamp;
107 static uint64_t gpu_timestamp_offset;
108 
109 struct TuRenderpassIncrementalState {
110    bool was_cleared = true;
111 };
112 
113 struct TuRenderpassTraits : public perfetto::DefaultDataSourceTraits {
114    using IncrementalStateType = TuRenderpassIncrementalState;
115 };
116 
117 class TuRenderpassDataSource : public MesaRenderpassDataSource<TuRenderpassDataSource,
118                                                                TuRenderpassTraits> {
OnStart(const StartArgs & args)119    void OnStart(const StartArgs &args) override
120    {
121       MesaRenderpassDataSource<TuRenderpassDataSource, TuRenderpassTraits>::OnStart(args);
122 
123       /* Note: clock_id's below 128 are reserved.. for custom clock sources,
124        * using the hash of a namespaced string is the recommended approach.
125        * See: https://perfetto.dev/docs/concepts/clock-sync
126        */
127       gpu_clock_id =
128          _mesa_hash_string("org.freedesktop.mesa.freedreno") | 0x80000000;
129 
130       gpu_timestamp_offset = 0;
131       gpu_max_timestamp = 0;
132       last_suspend_count = 0;
133    }
134 };
135 
136 PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(TuRenderpassDataSource);
137 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(TuRenderpassDataSource);
138 
139 static void
send_descriptors(TuRenderpassDataSource::TraceContext & ctx)140 send_descriptors(TuRenderpassDataSource::TraceContext &ctx)
141 {
142    PERFETTO_LOG("Sending renderstage descriptors");
143 
144    auto packet = ctx.NewTracePacket();
145 
146    /* This must be set before interned data is sent. */
147    packet->set_sequence_flags(perfetto::protos::pbzero::TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
148 
149    packet->set_timestamp(0);
150 
151    auto event = packet->set_gpu_render_stage_event();
152    event->set_gpu_id(0);
153 
154    auto spec = event->set_specifications();
155 
156    for (unsigned i = 0; i < ARRAY_SIZE(queues); i++) {
157       auto desc = spec->add_hw_queue();
158 
159       desc->set_name(queues[i].name);
160       desc->set_description(queues[i].desc);
161    }
162 
163    for (unsigned i = 0; i < ARRAY_SIZE(stages); i++) {
164       auto desc = spec->add_stage();
165 
166       desc->set_name(stages[i].name);
167       if (stages[i].desc)
168          desc->set_description(stages[i].desc);
169    }
170 }
171 
172 static struct tu_perfetto_stage *
stage_push(struct tu_device * dev)173 stage_push(struct tu_device *dev)
174 {
175    struct tu_perfetto_state *p = &dev->perfetto;
176 
177    if (p->stage_depth >= ARRAY_SIZE(p->stages)) {
178       p->skipped_depth++;
179       return NULL;
180    }
181 
182    return &p->stages[p->stage_depth++];
183 }
184 
185 typedef void (*trace_payload_as_extra_func)(perfetto::protos::pbzero::GpuRenderStageEvent *, const void*);
186 
187 static struct tu_perfetto_stage *
stage_pop(struct tu_device * dev)188 stage_pop(struct tu_device *dev)
189 {
190    struct tu_perfetto_state *p = &dev->perfetto;
191 
192    if (!p->stage_depth)
193       return NULL;
194 
195    if (p->skipped_depth) {
196       p->skipped_depth--;
197       return NULL;
198    }
199 
200    return &p->stages[--p->stage_depth];
201 }
202 
203 static void
stage_start(struct tu_device * dev,uint64_t ts_ns,enum tu_stage_id stage_id,const char * app_event,const void * payload=nullptr,size_t payload_size=0,trace_payload_as_extra_func payload_as_extra=nullptr)204 stage_start(struct tu_device *dev,
205             uint64_t ts_ns,
206             enum tu_stage_id stage_id,
207             const char *app_event,
208             const void *payload = nullptr,
209             size_t payload_size = 0,
210             trace_payload_as_extra_func payload_as_extra = nullptr)
211 {
212    struct tu_perfetto_stage *stage = stage_push(dev);
213 
214    if (!stage) {
215       PERFETTO_ELOG("stage %d is nested too deep", stage_id);
216       return;
217    }
218 
219    if (payload) {
220       void* new_payload = malloc(payload_size);
221       if (new_payload)
222          memcpy(new_payload, payload, payload_size);
223       else
224          PERFETTO_ELOG("Failed to allocate payload for stage %d", stage_id);
225       payload = new_payload;
226    }
227 
228    *stage = (struct tu_perfetto_stage) {
229       .stage_id = stage_id,
230       .stage_iid = 0,
231       .start_ts = ts_ns,
232       .payload = payload,
233       .start_payload_function = (void *) payload_as_extra,
234    };
235 
236    if (app_event) {
237       TuRenderpassDataSource::Trace([=](auto tctx) {
238          stage->stage_iid =
239             tctx.GetDataSourceLocked()->debug_marker_stage(tctx, app_event);
240       });
241    }
242 }
243 
244 static void
stage_end(struct tu_device * dev,uint64_t ts_ns,enum tu_stage_id stage_id,const void * flush_data,const void * payload=nullptr,trace_payload_as_extra_func payload_as_extra=nullptr)245 stage_end(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage_id,
246           const void *flush_data,
247           const void* payload = nullptr,
248           trace_payload_as_extra_func payload_as_extra = nullptr)
249 {
250    struct tu_perfetto_stage *stage = stage_pop(dev);
251    auto trace_flush_data =
252       (const struct tu_u_trace_submission_data *) flush_data;
253    uint32_t submission_id = trace_flush_data->submission_id;
254    uint64_t gpu_ts_offset = trace_flush_data->gpu_ts_offset;
255 
256    if (!stage)
257       return;
258 
259    if (stage->stage_id != stage_id) {
260       PERFETTO_ELOG("stage %d ended while stage %d is expected",
261             stage_id, stage->stage_id);
262       return;
263    }
264 
265    /* If we haven't managed to calibrate the alignment between GPU and CPU
266     * timestamps yet, then skip this trace, otherwise perfetto won't know
267     * what to do with it.
268     */
269    if (!sync_gpu_ts)
270       return;
271 
272    TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) {
273       if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
274          send_descriptors(tctx);
275          state->was_cleared = false;
276       }
277 
278       auto packet = tctx.NewTracePacket();
279 
280       gpu_max_timestamp = MAX2(gpu_max_timestamp, ts_ns + gpu_ts_offset);
281 
282       packet->set_timestamp(stage->start_ts + gpu_ts_offset);
283       packet->set_timestamp_clock_id(gpu_clock_id);
284 
285       auto event = packet->set_gpu_render_stage_event();
286       event->set_event_id(0); // ???
287       event->set_hw_queue_id(DEFAULT_HW_QUEUE_ID);
288       event->set_duration(ts_ns - stage->start_ts);
289       if (stage->stage_iid)
290          event->set_stage_iid(stage->stage_iid);
291       else
292          event->set_stage_id(stage->stage_id);
293       event->set_context((uintptr_t) dev);
294       event->set_submission_id(submission_id);
295 
296       if (stage->payload) {
297          if (stage->start_payload_function)
298             ((trace_payload_as_extra_func) stage->start_payload_function)(
299                event, stage->payload);
300          free((void *)stage->payload);
301       }
302 
303       if (payload && payload_as_extra)
304          payload_as_extra(event, payload);
305    });
306 }
307 
308 class TuMemoryDataSource : public perfetto::DataSource<TuMemoryDataSource> {
309  public:
OnSetup(const SetupArgs &)310    void OnSetup(const SetupArgs &) override
311    {
312    }
313 
OnStart(const StartArgs &)314    void OnStart(const StartArgs &) override
315    {
316       PERFETTO_LOG("Memory tracing started");
317    }
318 
OnStop(const StopArgs &)319    void OnStop(const StopArgs &) override
320    {
321       PERFETTO_LOG("Memory tracing stopped");
322    }
323 };
324 
325 PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(TuMemoryDataSource);
326 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(TuMemoryDataSource);
327 
328 
329 #ifdef __cplusplus
330 extern "C" {
331 #endif
332 
333 void
tu_perfetto_init(void)334 tu_perfetto_init(void)
335 {
336    util_perfetto_init();
337 
338    {
339    perfetto::DataSourceDescriptor dsd;
340 #if DETECT_OS_ANDROID
341      /* AGI requires this name */
342      dsd.set_name("gpu.renderstages");
343 #else
344       dsd.set_name("gpu.renderstages.msm");
345 #endif
346       TuRenderpassDataSource::Register(dsd);
347    }
348 
349    {
350      perfetto::DataSourceDescriptor dsd;
351      dsd.set_name("gpu.memory.msm");
352      TuMemoryDataSource::Register(dsd);
353    }
354 }
355 
356 static void
emit_sync_timestamp(uint64_t cpu_ts,uint64_t gpu_ts)357 emit_sync_timestamp(uint64_t cpu_ts, uint64_t gpu_ts)
358 {
359    TuRenderpassDataSource::Trace([=](auto tctx) {
360       MesaRenderpassDataSource<TuRenderpassDataSource,
361                                TuRenderpassTraits>::EmitClockSync(tctx, cpu_ts,
362                                                                   gpu_ts, gpu_clock_id);
363    });
364 }
365 
366 static void
emit_submit_id(uint32_t submission_id)367 emit_submit_id(uint32_t submission_id)
368 {
369    TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) {
370       auto packet = tctx.NewTracePacket();
371 
372       packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
373 
374       auto event = packet->set_vulkan_api_event();
375       auto submit = event->set_vk_queue_submit();
376 
377       submit->set_submission_id(submission_id);
378    });
379 }
380 
381 struct tu_perfetto_clocks
tu_perfetto_submit(struct tu_device * dev,uint32_t submission_id,struct tu_perfetto_clocks * gpu_clocks)382 tu_perfetto_submit(struct tu_device *dev,
383                    uint32_t submission_id,
384                    struct tu_perfetto_clocks *gpu_clocks)
385 {
386    struct tu_perfetto_clocks clocks {};
387    if (gpu_clocks) {
388       clocks = *gpu_clocks;
389    }
390 
391    if (!u_trace_perfetto_active(tu_device_get_u_trace(dev)))
392       return {};
393 
394    clocks.cpu = perfetto::base::GetBootTimeNs().count();
395 
396    if (gpu_clocks) {
397       /* TODO: It would be better to use CPU time that comes
398        * together with GPU time from the KGSL, but it's not
399        * equal to GetBootTimeNs.
400        */
401 
402       clocks.gpu_ts_offset = MAX2(gpu_timestamp_offset, clocks.gpu_ts_offset);
403       gpu_timestamp_offset = clocks.gpu_ts_offset;
404       sync_gpu_ts = clocks.gpu_ts + clocks.gpu_ts_offset;
405    } else {
406       clocks.gpu_ts = 0;
407       clocks.gpu_ts_offset = gpu_timestamp_offset;
408 
409       if (clocks.cpu < next_clock_sync_ns)
410          return clocks;
411 
412       if (tu_device_get_gpu_timestamp(dev, &clocks.gpu_ts)) {
413          PERFETTO_ELOG("Could not sync CPU and GPU clocks");
414          return {};
415       }
416 
417       clocks.gpu_ts = tu_device_ticks_to_ns(dev, clocks.gpu_ts);
418 
419       /* get cpu timestamp again because tu_device_get_gpu_timestamp can take
420        * >100us
421        */
422       clocks.cpu = perfetto::base::GetBootTimeNs().count();
423 
424       uint64_t current_suspend_count = 0;
425       /* If we fail to get it we will use a fallback */
426       tu_device_get_suspend_count(dev, &current_suspend_count);
427 
428       /* GPU timestamp is being reset after suspend-resume cycle.
429        * Perfetto requires clock snapshots to be monotonic,
430        * so we have to fix-up the time.
431        */
432       if (current_suspend_count != last_suspend_count) {
433          gpu_timestamp_offset = gpu_max_timestamp;
434          last_suspend_count = current_suspend_count;
435       }
436       clocks.gpu_ts_offset = gpu_timestamp_offset;
437 
438       uint64_t gpu_absolute_ts = clocks.gpu_ts + clocks.gpu_ts_offset;
439 
440       /* Fallback check, detect non-monotonic cases which would happen
441        * if we cannot retrieve suspend count.
442        */
443       if (sync_gpu_ts > gpu_absolute_ts) {
444          gpu_absolute_ts += (gpu_max_timestamp - gpu_timestamp_offset);
445          gpu_timestamp_offset = gpu_max_timestamp;
446          clocks.gpu_ts = gpu_absolute_ts - gpu_timestamp_offset;
447       }
448 
449       if (sync_gpu_ts > gpu_absolute_ts) {
450          PERFETTO_ELOG("Non-monotonic gpu timestamp detected, bailing out");
451          return {};
452       }
453 
454       gpu_max_timestamp = clocks.gpu_ts;
455       sync_gpu_ts = clocks.gpu_ts;
456       next_clock_sync_ns = clocks.cpu + 30000000;
457    }
458 
459    emit_sync_timestamp(clocks.cpu, clocks.gpu_ts + clocks.gpu_ts_offset);
460    emit_submit_id(submission_id);
461    return clocks;
462 }
463 
464 /*
465  * Trace callbacks, called from u_trace once the timestamps from GPU have been
466  * collected.
467  *
468  * The default "extra" funcs are code-generated into tu_tracepoints_perfetto.h
469  * and just take the tracepoint's args and add them as name/value pairs in the
470  * perfetto events.  This file can usually just map a tu_perfetto_* to
471  * stage_start/end with a call to that codegenned "extra" func.  But you can
472  * also provide your own entrypoint and extra funcs if you want to change that
473  * mapping.
474  */
475 
476 #define CREATE_EVENT_CALLBACK(event_name, stage_id)                                 \
477    void tu_perfetto_start_##event_name(                                             \
478       struct tu_device *dev, uint64_t ts_ns, uint16_t tp_idx,                       \
479       const void *flush_data, const struct trace_start_##event_name *payload,       \
480       const void *indirect_data)                                                    \
481    {                                                                                \
482       stage_start(                                                                  \
483          dev, ts_ns, stage_id, NULL, payload, sizeof(*payload),                     \
484          (trace_payload_as_extra_func) &trace_payload_as_extra_start_##event_name); \
485    }                                                                                \
486                                                                                     \
487    void tu_perfetto_end_##event_name(                                               \
488       struct tu_device *dev, uint64_t ts_ns, uint16_t tp_idx,                       \
489       const void *flush_data, const struct trace_end_##event_name *payload,         \
490       const void *indirect_data)                                                    \
491    {                                                                                \
492       stage_end(                                                                    \
493          dev, ts_ns, stage_id, flush_data, payload,                                 \
494          (trace_payload_as_extra_func) &trace_payload_as_extra_end_##event_name);   \
495    }
496 
CREATE_EVENT_CALLBACK(cmd_buffer,CMD_BUFFER_STAGE_ID)497 CREATE_EVENT_CALLBACK(cmd_buffer, CMD_BUFFER_STAGE_ID)
498 CREATE_EVENT_CALLBACK(render_pass, RENDER_PASS_STAGE_ID)
499 CREATE_EVENT_CALLBACK(binning_ib, BINNING_STAGE_ID)
500 CREATE_EVENT_CALLBACK(draw_ib_gmem, GMEM_STAGE_ID)
501 CREATE_EVENT_CALLBACK(draw_ib_sysmem, BYPASS_STAGE_ID)
502 CREATE_EVENT_CALLBACK(blit, BLIT_STAGE_ID)
503 CREATE_EVENT_CALLBACK(compute, COMPUTE_STAGE_ID)
504 CREATE_EVENT_CALLBACK(compute_indirect, COMPUTE_STAGE_ID)
505 CREATE_EVENT_CALLBACK(generic_clear, GENERIC_CLEAR_STAGE_ID)
506 CREATE_EVENT_CALLBACK(gmem_clear, CLEAR_GMEM_STAGE_ID)
507 CREATE_EVENT_CALLBACK(sysmem_clear, CLEAR_SYSMEM_STAGE_ID)
508 CREATE_EVENT_CALLBACK(sysmem_clear_all, CLEAR_SYSMEM_STAGE_ID)
509 CREATE_EVENT_CALLBACK(gmem_load, GMEM_LOAD_STAGE_ID)
510 CREATE_EVENT_CALLBACK(gmem_store, GMEM_STORE_STAGE_ID)
511 CREATE_EVENT_CALLBACK(sysmem_resolve, SYSMEM_RESOLVE_STAGE_ID)
512 
513 void
514 tu_perfetto_start_cmd_buffer_annotation(
515    struct tu_device *dev,
516    uint64_t ts_ns,
517    uint16_t tp_idx,
518    const void *flush_data,
519    const struct trace_start_cmd_buffer_annotation *payload,
520    const void *indirect_data)
521 {
522    /* No extra func necessary, the only arg is in the end payload.*/
523    stage_start(dev, ts_ns, CMD_BUFFER_ANNOTATION_STAGE_ID, payload->str, payload,
524                sizeof(*payload), NULL);
525 }
526 
527 void
tu_perfetto_end_cmd_buffer_annotation(struct tu_device * dev,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_cmd_buffer_annotation * payload,const void * indirect_data)528 tu_perfetto_end_cmd_buffer_annotation(
529    struct tu_device *dev,
530    uint64_t ts_ns,
531    uint16_t tp_idx,
532    const void *flush_data,
533    const struct trace_end_cmd_buffer_annotation *payload,
534    const void *indirect_data)
535 {
536    /* Pass the payload string as the app_event, which will appear right on the
537     * event block, rather than as metadata inside.
538     */
539    stage_end(dev, ts_ns, CMD_BUFFER_ANNOTATION_STAGE_ID, flush_data,
540              payload, NULL);
541 }
542 
543 void
tu_perfetto_start_cmd_buffer_annotation_rp(struct tu_device * dev,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_cmd_buffer_annotation_rp * payload,const void * indirect_data)544 tu_perfetto_start_cmd_buffer_annotation_rp(
545    struct tu_device *dev,
546    uint64_t ts_ns,
547    uint16_t tp_idx,
548    const void *flush_data,
549    const struct trace_start_cmd_buffer_annotation_rp *payload,
550    const void *indirect_data)
551 {
552    /* No extra func necessary, the only arg is in the end payload.*/
553    stage_start(dev, ts_ns, CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID,
554                payload->str, payload, sizeof(*payload), NULL);
555 }
556 
557 void
tu_perfetto_end_cmd_buffer_annotation_rp(struct tu_device * dev,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_cmd_buffer_annotation_rp * payload,const void * indirect_data)558 tu_perfetto_end_cmd_buffer_annotation_rp(
559    struct tu_device *dev,
560    uint64_t ts_ns,
561    uint16_t tp_idx,
562    const void *flush_data,
563    const struct trace_end_cmd_buffer_annotation_rp *payload,
564    const void *indirect_data)
565 {
566    /* Pass the payload string as the app_event, which will appear right on the
567     * event block, rather than as metadata inside.
568     */
569    stage_end(dev, ts_ns, CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID,
570              flush_data, payload, NULL);
571 }
572 
573 
574 static void
log_mem(struct tu_device * dev,struct tu_buffer * buffer,struct tu_image * image,perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::Operation op)575 log_mem(struct tu_device *dev, struct tu_buffer *buffer, struct tu_image *image,
576         perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::Operation op)
577 {
578    TuMemoryDataSource::Trace([=](TuMemoryDataSource::TraceContext tctx) {
579       auto packet = tctx.NewTracePacket();
580 
581       packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
582 
583       auto event = packet->set_vulkan_memory_event();
584 
585       event->set_timestamp(perfetto::base::GetBootTimeNs().count());
586       event->set_operation(op);
587       event->set_pid(getpid());
588 
589       if (buffer) {
590          event->set_source(perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::SOURCE_BUFFER);
591          event->set_memory_size(buffer->vk.size);
592          if (buffer->bo)
593             event->set_memory_address(buffer->iova);
594       } else {
595          assert(image);
596          event->set_source(perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::SOURCE_IMAGE);
597          event->set_memory_size(image->layout[0].size);
598          if (image->bo)
599             event->set_memory_address(image->iova);
600       }
601 
602    });
603 }
604 
605 void
tu_perfetto_log_create_buffer(struct tu_device * dev,struct tu_buffer * buffer)606 tu_perfetto_log_create_buffer(struct tu_device *dev, struct tu_buffer *buffer)
607 {
608    log_mem(dev, buffer, NULL, perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_CREATE);
609 }
610 
611 void
tu_perfetto_log_bind_buffer(struct tu_device * dev,struct tu_buffer * buffer)612 tu_perfetto_log_bind_buffer(struct tu_device *dev, struct tu_buffer *buffer)
613 {
614    log_mem(dev, buffer, NULL, perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_BIND);
615 }
616 
617 void
tu_perfetto_log_destroy_buffer(struct tu_device * dev,struct tu_buffer * buffer)618 tu_perfetto_log_destroy_buffer(struct tu_device *dev, struct tu_buffer *buffer)
619 {
620    log_mem(dev, buffer, NULL, buffer->bo ?
621       perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_DESTROY_BOUND :
622       perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_DESTROY);
623 }
624 
625 void
tu_perfetto_log_create_image(struct tu_device * dev,struct tu_image * image)626 tu_perfetto_log_create_image(struct tu_device *dev, struct tu_image *image)
627 {
628    log_mem(dev, NULL, image, perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_CREATE);
629 }
630 
631 void
tu_perfetto_log_bind_image(struct tu_device * dev,struct tu_image * image)632 tu_perfetto_log_bind_image(struct tu_device *dev, struct tu_image *image)
633 {
634    log_mem(dev, NULL, image, perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_BIND);
635 }
636 
637 void
tu_perfetto_log_destroy_image(struct tu_device * dev,struct tu_image * image)638 tu_perfetto_log_destroy_image(struct tu_device *dev, struct tu_image *image)
639 {
640    log_mem(dev, NULL, image, image->bo ?
641       perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_DESTROY_BOUND :
642       perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_DESTROY);
643 }
644 
645 
646 
647 #ifdef __cplusplus
648 }
649 #endif
650