1 /*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9 #include <string.h>
10
11 #include <executorch/runtime/platform/assert.h>
12 #include <executorch/runtime/platform/platform.h>
13 #include <executorch/runtime/platform/profiler.h>
14 #include <inttypes.h>
15
16 namespace executorch {
17 namespace runtime {
18
19 namespace {
20 static uint8_t prof_buf[prof_buf_size * MAX_PROFILE_BLOCKS];
21 // Base pointer for header
22 static prof_header_t* prof_header =
23 (prof_header_t*)((uintptr_t)prof_buf + prof_header_offset);
24 // Base pointer for profiling entries
25 static prof_event_t* prof_arr =
26 (prof_event_t*)((uintptr_t)prof_buf + prof_events_offset);
27 // Base pointer for memory allocator info array
28 static prof_allocator_t* mem_allocator_arr =
29 (prof_allocator_t*)((uintptr_t)prof_buf + prof_mem_alloc_info_offset);
30 // Base pointer for memory profiling entries
31 static mem_prof_event_t* mem_prof_arr =
32 (mem_prof_event_t*)((uintptr_t)prof_buf + prof_mem_alloc_events_offset);
33
34 static uint32_t num_blocks = 0;
35 static bool prof_stats_dumped = false;
36 prof_state_t profile_state_tls{-1, 0u};
37 } // namespace
38
get_profile_tls_state()39 const prof_state_t& get_profile_tls_state() {
40 return profile_state_tls;
41 }
42
set_profile_tls_state(const prof_state_t & state)43 void set_profile_tls_state(const prof_state_t& state) {
44 profile_state_tls = state;
45 }
46
ExecutorchProfilerInstructionScope(const prof_state_t & state)47 ExecutorchProfilerInstructionScope::ExecutorchProfilerInstructionScope(
48 const prof_state_t& state)
49 : old_state_(get_profile_tls_state()) {
50 set_profile_tls_state(state);
51 }
52
~ExecutorchProfilerInstructionScope()53 ExecutorchProfilerInstructionScope::~ExecutorchProfilerInstructionScope() {
54 set_profile_tls_state(old_state_);
55 }
56
begin_profiling(const char * name)57 uint32_t begin_profiling(const char* name) {
58 ET_CHECK_MSG(
59 prof_header->prof_entries < MAX_PROFILE_EVENTS,
60 "Out of profiling buffer space. Increase MAX_PROFILE_EVENTS and re-compile.");
61 uint32_t curr_counter = prof_header->prof_entries;
62 prof_header->prof_entries++;
63 prof_arr[curr_counter].end_time = 0;
64 prof_arr[curr_counter].name_str = name;
65 prof_state_t state = get_profile_tls_state();
66 prof_arr[curr_counter].chain_idx = state.chain_idx;
67 prof_arr[curr_counter].instruction_idx = state.instruction_idx;
68 // Set start time at the last to ensure that we're not capturing
69 // any of the overhead in this function.
70 prof_arr[curr_counter].start_time = et_pal_current_ticks();
71 return curr_counter;
72 }
73
end_profiling(uint32_t token_id)74 void end_profiling(uint32_t token_id) {
75 ET_CHECK_MSG(token_id < MAX_PROFILE_EVENTS, "Invalid token id.");
76 prof_arr[token_id].end_time = et_pal_current_ticks();
77 }
78
dump_profile_stats(prof_result_t * prof_result)79 void dump_profile_stats(prof_result_t* prof_result) {
80 prof_result->prof_data = (uint8_t*)prof_buf;
81 prof_result->num_bytes = num_blocks * prof_buf_size;
82 prof_result->num_blocks = num_blocks;
83
84 if (!prof_stats_dumped) {
85 for (size_t i = 0; i < num_blocks; i++) {
86 prof_header_t* prof_header_local =
87 (prof_header_t*)(prof_buf + prof_buf_size * i);
88 prof_event_t* prof_event_local =
89 (prof_event_t*)(prof_buf + prof_buf_size * i + prof_events_offset);
90 // Copy over the string names into the space allocated in prof_event_t. We
91 // avoided doing this earlier to keep the overhead in begin_profiling and
92 // end_profiling as low as possible.
93 for (size_t j = 0; j < prof_header_local->prof_entries; j++) {
94 size_t str_len = strlen(prof_event_local[j].name_str);
95 const char* str_ptr = prof_event_local[j].name_str;
96 memset(prof_event_local[j].name, 0, PROF_NAME_MAX_LEN);
97 if (str_len > PROF_NAME_MAX_LEN) {
98 memcpy(prof_event_local[j].name, str_ptr, PROF_NAME_MAX_LEN);
99 } else {
100 memcpy(prof_event_local[j].name, str_ptr, str_len);
101 }
102 }
103 }
104 }
105
106 prof_stats_dumped = true;
107 }
108
reset_profile_stats()109 void reset_profile_stats() {
110 prof_stats_dumped = false;
111 prof_header->prof_entries = 0;
112 prof_header->allocator_entries = 0;
113 prof_header->mem_prof_entries = 0;
114 }
115
track_allocation(int32_t id,uint32_t size)116 void track_allocation(int32_t id, uint32_t size) {
117 if (id == -1)
118 return;
119 ET_CHECK_MSG(
120 prof_header->mem_prof_entries < MAX_MEM_PROFILE_EVENTS,
121 "Out of memory profiling buffer space. Increase MAX_MEM_PROFILE_EVENTS\
122 to %" PRIu32 " and re-compile.",
123 prof_header->mem_prof_entries);
124 mem_prof_arr[prof_header->mem_prof_entries].allocator_id = id;
125 mem_prof_arr[prof_header->mem_prof_entries].allocation_size = size;
126 prof_header->mem_prof_entries++;
127 }
128
track_allocator(const char * name)129 uint32_t track_allocator(const char* name) {
130 ET_CHECK_MSG(
131 prof_header->allocator_entries < MEM_PROFILE_MAX_ALLOCATORS,
132 "Out of allocator tracking space, %d is needed. Increase MEM_PROFILE_MAX_ALLOCATORS and re-compile",
133 prof_header->allocator_entries);
134 size_t str_len = strlen(name);
135 size_t num_allocators = prof_header->allocator_entries;
136 memset(mem_allocator_arr[num_allocators].name, 0, PROF_NAME_MAX_LEN);
137 if (str_len > PROF_NAME_MAX_LEN) {
138 memcpy(mem_allocator_arr[num_allocators].name, name, PROF_NAME_MAX_LEN);
139 } else {
140 memcpy(mem_allocator_arr[num_allocators].name, name, str_len);
141 }
142 mem_allocator_arr[num_allocators].allocator_id = num_allocators;
143 return prof_header->allocator_entries++;
144 }
145
profiling_create_block(const char * name)146 void profiling_create_block(const char* name) {
147 // If the current profiling block is not used then continue to use this, if
148 // not move onto the next block.
149 if (prof_header->prof_entries != 0 || prof_header->mem_prof_entries != 0 ||
150 prof_header->allocator_entries != 0 || num_blocks == 0) {
151 num_blocks += 1;
152 ET_CHECK_MSG(
153 num_blocks <= MAX_PROFILE_BLOCKS,
154 "Only %d blocks are supported and they've all been used up but %d is used. Increment MAX_PROFILE_BLOCKS and re-run",
155 MAX_PROFILE_BLOCKS,
156 num_blocks);
157 }
158
159 // Copy over the name of this profiling block.
160 size_t str_len =
161 strlen(name) >= PROF_NAME_MAX_LEN ? PROF_NAME_MAX_LEN : strlen(name);
162 uintptr_t base = (uintptr_t)prof_buf + (num_blocks - 1) * prof_buf_size;
163 prof_header = (prof_header_t*)(base + prof_header_offset);
164 memset(prof_header->name, 0, PROF_NAME_MAX_LEN);
165 memcpy(prof_header->name, name, str_len);
166
167 // Set profiler version for compatiblity checks in the post-processing
168 // tool.
169 prof_header->prof_ver = ET_PROF_VER;
170 // Set the maximum number of entries that this block can support.
171 prof_header->max_prof_entries = MAX_PROFILE_EVENTS;
172 prof_header->max_allocator_entries = MEM_PROFILE_MAX_ALLOCATORS;
173 prof_header->max_mem_prof_entries = MAX_MEM_PROFILE_EVENTS;
174 reset_profile_stats();
175
176 // Set the base addresses for the various profiling entries arrays.
177 prof_arr = (prof_event_t*)(base + prof_events_offset);
178 mem_allocator_arr = (prof_allocator_t*)(base + prof_mem_alloc_info_offset);
179 mem_prof_arr = (mem_prof_event_t*)(base + prof_mem_alloc_events_offset);
180 }
181
profiler_init(void)182 void profiler_init(void) {
183 profiling_create_block("default");
184 }
185
ExecutorchProfiler(const char * name)186 ExecutorchProfiler::ExecutorchProfiler(const char* name) {
187 prof_tok = begin_profiling(name);
188 }
189
~ExecutorchProfiler()190 ExecutorchProfiler::~ExecutorchProfiler() {
191 end_profiling(prof_tok);
192 }
193
194 } // namespace runtime
195 } // namespace executorch
196