1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <inttypes.h>
18 #include <libgen.h>
19 #include <signal.h>
20 #include <sys/mman.h>
21 #include <sys/prctl.h>
22 #include <sys/utsname.h>
23 #include <time.h>
24 #include <unistd.h>
25 #include <chrono>
26 #include <filesystem>
27 #include <optional>
28 #include <set>
29 #include <string>
30 #include <unordered_map>
31 #include <unordered_set>
32 #include <vector>
33
34 #include <android-base/file.h>
35 #include <android-base/logging.h>
36 #include <android-base/parseint.h>
37 #include <android-base/stringprintf.h>
38 #include <android-base/strings.h>
39 #include <android-base/unique_fd.h>
40
41 #pragma clang diagnostic push
42 #pragma clang diagnostic ignored "-Wunused-parameter"
43 #include <llvm/Support/MemoryBuffer.h>
44 #pragma clang diagnostic pop
45
46 #if defined(__ANDROID__)
47 #include <android-base/properties.h>
48 #endif
49 #include <unwindstack/Error.h>
50
51 #include "BranchListFile.h"
52 #include "CallChainJoiner.h"
53 #include "ETMRecorder.h"
54 #include "IOEventLoop.h"
55 #include "JITDebugReader.h"
56 #include "MapRecordReader.h"
57 #include "OfflineUnwinder.h"
58 #include "ProbeEvents.h"
59 #include "RecordFilter.h"
60 #include "cmd_record_impl.h"
61 #include "command.h"
62 #include "environment.h"
63 #include "event_selection_set.h"
64 #include "event_type.h"
65 #include "kallsyms.h"
66 #include "read_apk.h"
67 #include "read_elf.h"
68 #include "read_symbol_map.h"
69 #include "record.h"
70 #include "record_file.h"
71 #include "thread_tree.h"
72 #include "tracing.h"
73 #include "utils.h"
74 #include "workload.h"
75
76 namespace simpleperf {
77 namespace {
78
79 using android::base::ParseUint;
80 using android::base::Realpath;
81
82 static std::string default_measured_event_type = "cpu-cycles";
83
84 static std::unordered_map<std::string, uint64_t> branch_sampling_type_map = {
85 {"u", PERF_SAMPLE_BRANCH_USER},
86 {"k", PERF_SAMPLE_BRANCH_KERNEL},
87 {"any", PERF_SAMPLE_BRANCH_ANY},
88 {"any_call", PERF_SAMPLE_BRANCH_ANY_CALL},
89 {"any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN},
90 {"ind_call", PERF_SAMPLE_BRANCH_IND_CALL},
91 };
92
93 static std::unordered_map<std::string, int> clockid_map = {
94 {"realtime", CLOCK_REALTIME},
95 {"monotonic", CLOCK_MONOTONIC},
96 {"monotonic_raw", CLOCK_MONOTONIC_RAW},
97 {"boottime", CLOCK_BOOTTIME},
98 };
99
100 // The max size of records dumped by kernel is 65535, and dump stack size
101 // should be a multiply of 8, so MAX_DUMP_STACK_SIZE is 65528.
102 static constexpr uint32_t MAX_DUMP_STACK_SIZE = 65528;
103
104 // The max allowed pages in mapped buffer is decided by rlimit(RLIMIT_MEMLOCK).
105 // Here 1024 is a desired value for pages in mapped buffer. If mapped
106 // successfully, the buffer size = 1024 * 4K (page size) = 4M.
107 static constexpr size_t DESIRED_PAGES_IN_MAPPED_BUFFER = 1024;
108
109 // Cache size used by CallChainJoiner to cache call chains in memory.
110 static constexpr size_t DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE = 8 * kMegabyte;
111
112 static constexpr size_t kDefaultAuxBufferSize = 4 * kMegabyte;
113
114 // On Pixel 3, it takes about 1ms to enable ETM, and 16-40ms to disable ETM and copy 4M ETM data.
115 // So make default interval to 100ms.
116 static constexpr uint32_t kDefaultEtmDataFlushIntervalInMs = 100;
117
118 struct TimeStat {
119 uint64_t prepare_recording_time = 0;
120 uint64_t start_recording_time = 0;
121 uint64_t stop_recording_time = 0;
122 uint64_t finish_recording_time = 0;
123 uint64_t post_process_time = 0;
124 };
125
GetDefaultRecordBufferSize(bool system_wide_recording)126 std::optional<size_t> GetDefaultRecordBufferSize(bool system_wide_recording) {
127 // Currently, the record buffer size in user-space is set to match the kernel buffer size on a
128 // 8 core system. For system-wide recording, it is 8K pages * 4K page_size * 8 cores = 256MB.
129 // For non system-wide recording, it is 1K pages * 4K page_size * 8 cores = 64MB.
130 // But on devices with memory >= 4GB, we increase buffer size to 256MB. This reduces the chance
131 // of cutting samples, which can cause broken callchains.
132 static constexpr size_t kLowMemoryRecordBufferSize = 64 * kMegabyte;
133 static constexpr size_t kHighMemoryRecordBufferSize = 256 * kMegabyte;
134 static constexpr size_t kSystemWideRecordBufferSize = 256 * kMegabyte;
135 // Ideally we can use >= 4GB here. But the memory size shown in /proc/meminfo is like to be 3.x GB
136 // on a device with 4GB memory. So we have to use <= 3GB.
137 static constexpr uint64_t kLowMemoryLimit = 3 * kGigabyte;
138
139 if (system_wide_recording) {
140 return kSystemWideRecordBufferSize;
141 }
142 return GetMemorySize() <= kLowMemoryLimit ? kLowMemoryRecordBufferSize
143 : kHighMemoryRecordBufferSize;
144 }
145
146 class RecordCommand : public Command {
147 public:
RecordCommand()148 RecordCommand()
149 : Command(
150 "record", "record sampling info in perf.data",
151 // clang-format off
152 "Usage: simpleperf record [options] [--] [command [command-args]]\n"
153 " Gather sampling information of running [command]. And -a/-p/-t option\n"
154 " can be used to change target of sampling information.\n"
155 " The default options are: -e cpu-cycles -f 4000 -o perf.data.\n"
156 "Select monitored threads:\n"
157 "-a System-wide collection. Use with --exclude-perf to exclude samples for\n"
158 " simpleperf process.\n"
159 #if defined(__ANDROID__)
160 "--app package_name Profile the process of an Android application.\n"
161 " On non-rooted devices, the app must be debuggable,\n"
162 " because we use run-as to switch to the app's context.\n"
163 #endif
164 "-p pid_or_process_name_regex1,pid_or_process_name_regex2,...\n"
165 " Record events on existing processes. Processes are searched either by pid\n"
166 " or process name regex. Mutually exclusive with -a.\n"
167 "-t tid1,tid2,... Record events on existing threads. Mutually exclusive with -a.\n"
168 "\n"
169 "Select monitored event types:\n"
170 "-e event1[:modifier1],event2[:modifier2],...\n"
171 " Select a list of events to record. An event can be:\n"
172 " 1) an event name listed in `simpleperf list`;\n"
173 " 2) a raw PMU event in rN format. N is a hex number.\n"
174 " For example, r1b selects event number 0x1b.\n"
175 " 3) a kprobe event added by --kprobe option.\n"
176 " Modifiers can be added to define how the event should be\n"
177 " monitored. Possible modifiers are:\n"
178 " u - monitor user space events only\n"
179 " k - monitor kernel space events only\n"
180 "--group event1[:modifier],event2[:modifier2],...\n"
181 " Similar to -e option. But events specified in the same --group\n"
182 " option are monitored as a group, and scheduled in and out at the\n"
183 " same time.\n"
184 "--trace-offcpu Generate samples when threads are scheduled off cpu.\n"
185 " Similar to \"-c 1 -e sched:sched_switch\".\n"
186 "--kprobe kprobe_event1,kprobe_event2,...\n"
187 " Add kprobe events during recording. The kprobe_event format is in\n"
188 " Documentation/trace/kprobetrace.rst in the kernel. Examples:\n"
189 " 'p:myprobe do_sys_openat2 $arg2:string' - add event kprobes:myprobe\n"
190 " 'r:myretprobe do_sys_openat2 $retval:s64' - add event kprobes:myretprobe\n"
191 "--add-counter event1,event2,... Add additional event counts in record samples. For example,\n"
192 " we can use `-e cpu-cycles --add-counter instructions` to\n"
193 " get samples for cpu-cycles event, while having instructions\n"
194 " event count for each sample.\n"
195 "\n"
196 "Select monitoring options:\n"
197 "-f freq Set event sample frequency. It means recording at most [freq]\n"
198 " samples every second. For non-tracepoint events, the default\n"
199 " option is -f 4000. A -f/-c option affects all event types\n"
200 " following it until meeting another -f/-c option. For example,\n"
201 " for \"-f 1000 -e cpu-cycles -c 1 -e sched:sched_switch\", cpu-cycles\n"
202 " has sample freq 1000, sched:sched_switch event has sample period 1.\n"
203 "-c count Set event sample period. It means recording one sample when\n"
204 " [count] events happen. For tracepoint events, the default option\n"
205 " is -c 1.\n"
206 "--call-graph fp | dwarf[,<dump_stack_size>]\n"
207 " Enable call graph recording. Use frame pointer or dwarf debug\n"
208 " frame as the method to parse call graph in stack.\n"
209 " Default is no call graph. Default dump_stack_size with -g is 65528.\n"
210 "-g Same as '--call-graph dwarf'.\n"
211 "--clockid clock_id Generate timestamps of samples using selected clock.\n"
212 " Possible values are: realtime, monotonic,\n"
213 " monotonic_raw, boottime, perf. If supported, default\n"
214 " is monotonic, otherwise is perf.\n"
215 "--cpu cpu_item1,cpu_item2,... Monitor events on selected cpus. cpu_item can be a number like\n"
216 " 1, or a range like 0-3. A --cpu option affects all event types\n"
217 " following it until meeting another --cpu option.\n"
218 "--delay time_in_ms Wait time_in_ms milliseconds before recording samples.\n"
219 "--duration time_in_sec Monitor for time_in_sec seconds instead of running\n"
220 " [command]. Here time_in_sec may be any positive\n"
221 " floating point number.\n"
222 "-j branch_filter1,branch_filter2,...\n"
223 " Enable taken branch stack sampling. Each sample captures a series\n"
224 " of consecutive taken branches.\n"
225 " The following filters are defined:\n"
226 " any: any type of branch\n"
227 " any_call: any function call or system call\n"
228 " any_ret: any function return or system call return\n"
229 " ind_call: any indirect branch\n"
230 " u: only when the branch target is at the user level\n"
231 " k: only when the branch target is in the kernel\n"
232 " This option requires at least one branch type among any, any_call,\n"
233 " any_ret, ind_call.\n"
234 "-b Enable taken branch stack sampling. Same as '-j any'.\n"
235 "-m mmap_pages Set pages used in the kernel to cache sample data for each cpu.\n"
236 " It should be a power of 2. If not set, the max possible value <= 1024\n"
237 " will be used.\n"
238 "--user-buffer-size <buffer_size> Set buffer size in userspace to cache sample data.\n"
239 " By default, it is %s.\n"
240 "--no-inherit Don't record created child threads/processes.\n"
241 "--cpu-percent <percent> Set the max percent of cpu time used for recording.\n"
242 " percent is in range [1-100], default is 25.\n"
243 "\n"
244 "--tp-filter filter_string Set filter_string for the previous tracepoint event.\n"
245 " Format is in Documentation/trace/events.rst in the kernel.\n"
246 " An example: 'prev_comm != \"simpleperf\" && (prev_pid > 1)'.\n"
247 "\n"
248 "Dwarf unwinding options:\n"
249 "--post-unwind=(yes|no) If `--call-graph dwarf` option is used, then the user's\n"
250 " stack will be recorded in perf.data and unwound while\n"
251 " recording by default. Use --post-unwind=yes to switch\n"
252 " to unwind after recording.\n"
253 "--no-unwind If `--call-graph dwarf` option is used, then the user's stack\n"
254 " will be unwound by default. Use this option to disable the\n"
255 " unwinding of the user's stack.\n"
256 "--no-callchain-joiner If `--call-graph dwarf` option is used, then by default\n"
257 " callchain joiner is used to break the 64k stack limit\n"
258 " and build more complete call graphs. However, the built\n"
259 " call graphs may not be correct in all cases.\n"
260 "--callchain-joiner-min-matching-nodes count\n"
261 " When callchain joiner is used, set the matched nodes needed to join\n"
262 " callchains. The count should be >= 1. By default it is 1.\n"
263 "--no-cut-samples Simpleperf uses a record buffer to cache records received from the kernel.\n"
264 " When the available space in the buffer reaches low level, the stack data in\n"
265 " samples is truncated to 1KB. When the available space reaches critical level,\n"
266 " it drops all samples. This option makes simpleperf not truncate stack data\n"
267 " when the available space reaches low level.\n"
268 "--keep-failed-unwinding-result Keep reasons for failed unwinding cases\n"
269 "--keep-failed-unwinding-debug-info Keep debug info for failed unwinding cases\n"
270 "\n"
271 "Sample filter options:\n"
272 "--exclude-perf Exclude samples for simpleperf process.\n"
273 RECORD_FILTER_OPTION_HELP_MSG_FOR_RECORDING
274 "\n"
275 "Recording file options:\n"
276 "--no-dump-build-id Don't dump build ids in perf.data.\n"
277 "--no-dump-kernel-symbols Don't dump kernel symbols in perf.data. By default\n"
278 " kernel symbols will be dumped when needed.\n"
279 "--no-dump-symbols Don't dump symbols in perf.data. By default symbols are\n"
280 " dumped in perf.data, to support reporting in another\n"
281 " environment.\n"
282 "-o record_file_name Set record file name, default is perf.data.\n"
283 "--size-limit SIZE[K|M|G] Stop recording after SIZE bytes of records.\n"
284 " Default is unlimited.\n"
285 "--symfs <dir> Look for files with symbols relative to this directory.\n"
286 " This option is used to provide files with symbol table and\n"
287 " debug information, which are used for unwinding and dumping symbols.\n"
288 "--add-meta-info key=value Add extra meta info, which will be stored in the recording file.\n"
289 "-z[=<compression_level>] Compress records using zstd. compression level: 1 is the fastest,\n"
290 " 22 is the greatest, 3 is the default.\n"
291 "\n"
292 "ETM recording options:\n"
293 "--addr-filter filter_str1,filter_str2,...\n"
294 " Provide address filters for cs-etm instruction tracing.\n"
295 " filter_str accepts below formats:\n"
296 " 'filter <addr-range>' -- trace instructions in a range\n"
297 " 'start <addr>' -- start tracing when ip is <addr>\n"
298 " 'stop <addr>' -- stop tracing when ip is <addr>\n"
299 " <addr-range> accepts below formats:\n"
300 " <file_path> -- code sections in a binary file\n"
301 " <vaddr_start>-<vaddr_end>@<file_path> -- part of a binary file\n"
302 " <kernel_addr_start>-<kernel_addr_end> -- part of kernel space\n"
303 " <addr> accepts below formats:\n"
304 " <vaddr>@<file_path> -- virtual addr in a binary file\n"
305 " <kernel_addr> -- a kernel address\n"
306 " Examples:\n"
307 " 'filter 0x456-0x480@/system/lib/libc.so'\n"
308 " 'start 0x456@/system/lib/libc.so,stop 0x480@/system/lib/libc.so'\n"
309 "--aux-buffer-size <buffer_size> Set aux buffer size, only used in cs-etm event type.\n"
310 " Need to be power of 2 and page size aligned.\n"
311 " Used memory size is (buffer_size * (cpu_count + 1).\n"
312 " Default is 4M.\n"
313 "--decode-etm Convert ETM data into branch lists while recording.\n"
314 "--binary binary_name Used with --decode-etm to only generate data for binaries\n"
315 " matching binary_name regex.\n"
316 "--record-timestamp Generate timestamp packets in ETM stream.\n"
317 "--record-cycles Generate cycle count packets in ETM stream.\n"
318 "--cycle-threshold <threshold> Set cycle count counter threshold for ETM cycle count packets.\n"
319 "--etm-flush-interval <interval> Set the interval between ETM data flushes from the ETR buffer\n"
320 " to the perf event buffer (in milliseconds). Default is 100 ms.\n"
321 "\n"
322 "Other options:\n"
323 "--exit-with-parent Stop recording when the thread starting simpleperf dies.\n"
324 "--use-cmd-exit-code Exit with the same exit code as the monitored cmdline.\n"
325 "--start_profiling_fd fd_no After starting profiling, write \"STARTED\" to\n"
326 " <fd_no>, then close <fd_no>.\n"
327 "--stdio-controls-profiling Use stdin/stdout to pause/resume profiling.\n"
328 #if defined(__ANDROID__)
329 "--in-app We are already running in the app's context.\n"
330 "--tracepoint-events file_name Read tracepoint events from [file_name] instead of tracefs.\n"
331 #endif
332 #if 0
333 // Below options are only used internally and shouldn't be visible to the public.
334 "--out-fd <fd> Write perf.data to a file descriptor.\n"
335 "--stop-signal-fd <fd> Stop recording when fd is readable.\n"
336 #endif
337 // clang-format on
338 ),
339 system_wide_collection_(false),
340 branch_sampling_(0),
341 fp_callchain_sampling_(false),
342 dwarf_callchain_sampling_(false),
343 dump_stack_size_in_dwarf_sampling_(MAX_DUMP_STACK_SIZE),
344 unwind_dwarf_callchain_(true),
345 post_unwind_(false),
346 child_inherit_(true),
347 duration_in_sec_(0),
348 can_dump_kernel_symbols_(true),
349 dump_symbols_(true),
350 event_selection_set_(false),
351 mmap_page_range_(std::make_pair(1, DESIRED_PAGES_IN_MAPPED_BUFFER)),
352 record_filename_("perf.data"),
353 sample_record_count_(0),
354 in_app_context_(false),
355 trace_offcpu_(false),
356 exclude_kernel_callchain_(false),
357 allow_callchain_joiner_(true),
358 callchain_joiner_min_matching_nodes_(1u),
359 last_record_timestamp_(0u),
360 record_filter_(thread_tree_) {
361 // If we run `adb shell simpleperf record xxx` and stop profiling by ctrl-c, adb closes
362 // sockets connecting simpleperf. After that, simpleperf will receive SIGPIPE when writing
363 // to stdout/stderr, which is a problem when we use '--app' option. So ignore SIGPIPE to
364 // finish properly.
365 signal(SIGPIPE, SIG_IGN);
366 }
367
368 std::string LongHelpString() const override;
369 void Run(const std::vector<std::string>& args, int* exit_code) override;
Run(const std::vector<std::string> & args)370 bool Run(const std::vector<std::string>& args) override {
371 int exit_code;
372 Run(args, &exit_code);
373 return exit_code == 0;
374 }
375
376 private:
377 bool ParseOptions(const std::vector<std::string>& args, std::vector<std::string>* non_option_args,
378 ProbeEvents& probe_events);
379 bool AdjustPerfEventLimit();
380 bool PrepareRecording(Workload* workload);
381 bool DoRecording(Workload* workload);
382 bool PostProcessRecording(const std::vector<std::string>& args);
383 // pre recording functions
384 bool TraceOffCpu();
385 bool SetEventSelectionFlags();
386 bool CreateAndInitRecordFile();
387 std::unique_ptr<RecordFileWriter> CreateRecordFile(const std::string& filename,
388 const EventAttrIds& attrs);
389 bool DumpKernelSymbol();
390 bool DumpTracingData();
391 bool DumpMaps();
392 bool DumpAuxTraceInfo();
393
394 // recording functions
395 bool ProcessRecord(Record* record);
396 bool ShouldOmitRecord(Record* record);
397 bool DumpMapsForRecord(Record* record);
398 bool SaveRecordForPostUnwinding(Record* record);
399 bool SaveRecordAfterUnwinding(Record* record);
400 bool SaveRecordWithoutUnwinding(Record* record);
401 bool ProcessJITDebugInfo(std::vector<JITDebugInfo> debug_info, bool sync_kernel_records);
402 bool ProcessControlCmd(IOEventLoop* loop);
403 void UpdateRecord(Record* record);
404 bool UnwindRecord(SampleRecord& r);
405 bool KeepFailedUnwindingResult(const SampleRecord& r, const std::vector<uint64_t>& ips,
406 const std::vector<uint64_t>& sps);
407
408 // post recording functions
409 std::unique_ptr<RecordFileReader> MoveRecordFile(const std::string& old_filename);
410 bool PostUnwindRecords();
411 bool JoinCallChains();
412 bool DumpAdditionalFeatures(const std::vector<std::string>& args);
413 bool DumpBuildIdFeature();
414 bool DumpFileFeature();
415 bool DumpMetaInfoFeature(bool kernel_symbols_available);
416 bool DumpDebugUnwindFeature(const std::unordered_set<Dso*>& dso_set);
417 void CollectHitFileInfo(const SampleRecord& r, std::unordered_set<Dso*>* dso_set);
418 bool DumpETMBranchListFeature();
419 bool DumpInitMapFeature();
420
421 bool system_wide_collection_;
422 uint64_t branch_sampling_;
423 bool fp_callchain_sampling_;
424 bool dwarf_callchain_sampling_;
425 uint32_t dump_stack_size_in_dwarf_sampling_;
426 bool unwind_dwarf_callchain_;
427 bool post_unwind_;
428 bool keep_failed_unwinding_result_ = false;
429 bool keep_failed_unwinding_debug_info_ = false;
430 std::unique_ptr<OfflineUnwinder> offline_unwinder_;
431 bool child_inherit_;
432 uint64_t delay_in_ms_ = 0;
433 double duration_in_sec_;
434 bool dump_build_id_ = true;
435 bool can_dump_kernel_symbols_;
436 bool dump_symbols_;
437 std::string clockid_;
438 EventSelectionSet event_selection_set_;
439
440 std::pair<size_t, size_t> mmap_page_range_;
441 std::optional<size_t> user_buffer_size_;
442 size_t aux_buffer_size_ = kDefaultAuxBufferSize;
443
444 ThreadTree thread_tree_;
445 std::string record_filename_;
446 android::base::unique_fd out_fd_;
447 std::unique_ptr<RecordFileWriter> record_file_writer_;
448 android::base::unique_fd stop_signal_fd_;
449
450 uint64_t sample_record_count_;
451 android::base::unique_fd start_profiling_fd_;
452 bool stdio_controls_profiling_ = false;
453
454 std::string app_package_name_;
455 bool in_app_context_;
456 bool trace_offcpu_;
457 bool exclude_kernel_callchain_;
458 uint64_t size_limit_in_bytes_ = 0;
459 uint64_t max_sample_freq_ = DEFAULT_SAMPLE_FREQ_FOR_NONTRACEPOINT_EVENT;
460 size_t cpu_time_max_percent_ = 25;
461
462 // For CallChainJoiner
463 bool allow_callchain_joiner_;
464 size_t callchain_joiner_min_matching_nodes_;
465 std::unique_ptr<CallChainJoiner> callchain_joiner_;
466 bool allow_truncating_samples_ = true;
467
468 std::unique_ptr<JITDebugReader> jit_debug_reader_;
469 uint64_t last_record_timestamp_; // used to insert Mmap2Records for JIT debug info
470 TimeStat time_stat_;
471 EventAttrWithId dumping_attr_id_;
472 // In system wide recording, record if we have dumped map info for a process.
473 std::unordered_set<pid_t> dumped_processes_;
474 bool exclude_perf_ = false;
475 RecordFilter record_filter_;
476
477 std::optional<MapRecordReader> map_record_reader_;
478 std::optional<MapRecordThread> map_record_thread_;
479
480 std::unordered_map<std::string, std::string> extra_meta_info_;
481 bool use_cmd_exit_code_ = false;
482 std::vector<std::string> add_counters_;
483
484 std::unique_ptr<ETMBranchListGenerator> etm_branch_list_generator_;
485 std::unique_ptr<RegEx> binary_name_regex_;
486 std::chrono::milliseconds etm_flush_interval_{kDefaultEtmDataFlushIntervalInMs};
487
488 size_t compression_level_ = 0;
489 };
490
LongHelpString() const491 std::string RecordCommand::LongHelpString() const {
492 uint64_t process_buffer_size = 0;
493 uint64_t system_wide_buffer_size = 0;
494 if (auto size = GetDefaultRecordBufferSize(false); size) {
495 process_buffer_size = size.value() / kMegabyte;
496 }
497 if (auto size = GetDefaultRecordBufferSize(true); size) {
498 system_wide_buffer_size = size.value() / kMegabyte;
499 }
500 std::string buffer_size_str;
501 if (process_buffer_size == system_wide_buffer_size) {
502 buffer_size_str = android::base::StringPrintf("%" PRIu64 "M", process_buffer_size);
503 } else {
504 buffer_size_str =
505 android::base::StringPrintf("%" PRIu64 "M for process recording and %" PRIu64
506 "M\n for system wide recording",
507 process_buffer_size, system_wide_buffer_size);
508 }
509 return android::base::StringPrintf(long_help_string_.c_str(), buffer_size_str.c_str());
510 }
511
Run(const std::vector<std::string> & args,int * exit_code)512 void RecordCommand::Run(const std::vector<std::string>& args, int* exit_code) {
513 *exit_code = 1;
514 time_stat_.prepare_recording_time = GetSystemClock();
515 ScopedCurrentArch scoped_arch(GetMachineArch());
516
517 if (!CheckPerfEventLimit()) {
518 return;
519 }
520 AllowMoreOpenedFiles();
521
522 std::vector<std::string> workload_args;
523 ProbeEvents probe_events(event_selection_set_);
524 if (!ParseOptions(args, &workload_args, probe_events)) {
525 return;
526 }
527 if (!AdjustPerfEventLimit()) {
528 return;
529 }
530 std::unique_ptr<ScopedTempFiles> scoped_temp_files =
531 ScopedTempFiles::Create(android::base::Dirname(record_filename_));
532 if (!scoped_temp_files) {
533 PLOG(ERROR) << "Can't create output file in directory "
534 << android::base::Dirname(record_filename_);
535 return;
536 }
537 if (!app_package_name_.empty() && !in_app_context_) {
538 // Some users want to profile non debuggable apps on rooted devices. If we use run-as,
539 // it will be impossible when using --app. So don't switch to app's context when we are
540 // root.
541 if (!IsRoot()) {
542 // Running simpleperf in app context doesn't allow running child command. So no need to
543 // consider exit code of child command here.
544 *exit_code = RunInAppContext(app_package_name_, "record", args, workload_args.size(),
545 record_filename_, true)
546 ? 0
547 : 1;
548 return;
549 }
550 }
551 std::unique_ptr<Workload> workload;
552 if (!workload_args.empty()) {
553 workload = Workload::CreateWorkload(workload_args);
554 if (workload == nullptr) {
555 return;
556 }
557 }
558 if (!PrepareRecording(workload.get())) {
559 return;
560 }
561 time_stat_.start_recording_time = GetSystemClock();
562 if (!DoRecording(workload.get()) || !PostProcessRecording(args)) {
563 return;
564 }
565 if (use_cmd_exit_code_ && workload) {
566 workload->WaitChildProcess(false, exit_code);
567 } else {
568 *exit_code = 0;
569 }
570 }
571
PrepareRecording(Workload * workload)572 bool RecordCommand::PrepareRecording(Workload* workload) {
573 // 1. Prepare in other modules.
574 PrepareVdsoFile();
575
576 // 2. Add default event type.
577 if (event_selection_set_.empty()) {
578 std::string event_type = default_measured_event_type;
579 if (GetTargetArch() == ARCH_X86_32 || GetTargetArch() == ARCH_X86_64 ||
580 GetTargetArch() == ARCH_RISCV64) {
581 // Emulators may not support hardware events. So switch to cpu-clock when cpu-cycles isn't
582 // available.
583 if (!IsHardwareEventSupported()) {
584 event_type = "cpu-clock";
585 LOG(INFO) << "Hardware events are not available, switch to cpu-clock.";
586 }
587 }
588 if (!event_selection_set_.AddEventType(event_type)) {
589 return false;
590 }
591 }
592
593 // 3. Process options before opening perf event files.
594 exclude_kernel_callchain_ = event_selection_set_.ExcludeKernel();
595 if (trace_offcpu_ && !TraceOffCpu()) {
596 return false;
597 }
598 if (!add_counters_.empty()) {
599 if (child_inherit_) {
600 LOG(ERROR) << "--no-inherit is needed when using --add-counter.";
601 return false;
602 }
603 if (!event_selection_set_.AddCounters(add_counters_)) {
604 return false;
605 }
606 }
607 if (!SetEventSelectionFlags()) {
608 return false;
609 }
610 if (unwind_dwarf_callchain_) {
611 bool collect_stat = keep_failed_unwinding_result_;
612 offline_unwinder_ = OfflineUnwinder::Create(collect_stat);
613 }
614 if (unwind_dwarf_callchain_ && allow_callchain_joiner_) {
615 callchain_joiner_.reset(new CallChainJoiner(DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE,
616 callchain_joiner_min_matching_nodes_, false));
617 }
618
619 // 4. Add monitored targets.
620 bool need_to_check_targets = false;
621 if (system_wide_collection_) {
622 event_selection_set_.AddMonitoredThreads({-1});
623 } else if (!event_selection_set_.HasMonitoredTarget()) {
624 if (workload != nullptr) {
625 event_selection_set_.AddMonitoredProcesses({workload->GetPid()});
626 event_selection_set_.SetEnableCondition(false, true);
627 } else if (!app_package_name_.empty()) {
628 // If app process is not created, wait for it. This allows simpleperf starts before
629 // app process. In this way, we can have a better support of app start-up time profiling.
630 std::set<pid_t> pids = WaitForAppProcesses(app_package_name_);
631 event_selection_set_.AddMonitoredProcesses(pids);
632 need_to_check_targets = true;
633 } else {
634 LOG(ERROR) << "No threads to monitor. Try `simpleperf help record` for help";
635 return false;
636 }
637 } else {
638 need_to_check_targets = true;
639 }
640 if (delay_in_ms_ != 0 || event_selection_set_.HasAuxTrace()) {
641 event_selection_set_.SetEnableCondition(false, false);
642 }
643
644 // Profiling JITed/interpreted Java code is supported starting from Android P.
645 // Also support profiling art interpreter on host.
646 if (GetAndroidVersion() >= kAndroidVersionP || GetAndroidVersion() == 0) {
647 // JIT symfiles are stored in temporary files, and are deleted after recording. But if
648 // `-g --no-unwind` option is used, we want to keep symfiles to support unwinding in
649 // the debug-unwind cmd.
650 auto symfile_option = (dwarf_callchain_sampling_ && !unwind_dwarf_callchain_)
651 ? JITDebugReader::SymFileOption::kKeepSymFiles
652 : JITDebugReader::SymFileOption::kDropSymFiles;
653 auto sync_option = (clockid_ == "monotonic") ? JITDebugReader::SyncOption::kSyncWithRecords
654 : JITDebugReader::SyncOption::kNoSync;
655 jit_debug_reader_.reset(new JITDebugReader(record_filename_, symfile_option, sync_option));
656 // To profile java code, need to dump maps containing vdex files, which are not executable.
657 event_selection_set_.SetRecordNotExecutableMaps(true);
658 }
659
660 // 5. Open perf event files and create mapped buffers.
661 if (!event_selection_set_.OpenEventFiles()) {
662 return false;
663 }
664 size_t record_buffer_size = 0;
665 if (user_buffer_size_.has_value()) {
666 record_buffer_size = user_buffer_size_.value();
667 } else {
668 auto default_size = GetDefaultRecordBufferSize(system_wide_collection_);
669 if (!default_size.has_value()) {
670 return false;
671 }
672 record_buffer_size = default_size.value();
673 }
674 if (!event_selection_set_.MmapEventFiles(mmap_page_range_.first, mmap_page_range_.second,
675 aux_buffer_size_, record_buffer_size,
676 allow_truncating_samples_, exclude_perf_)) {
677 return false;
678 }
679 auto callback = std::bind(&RecordCommand::ProcessRecord, this, std::placeholders::_1);
680 if (!event_selection_set_.PrepareToReadMmapEventData(callback)) {
681 return false;
682 }
683
684 // 6. Create perf.data.
685 if (!CreateAndInitRecordFile()) {
686 return false;
687 }
688
689 // 7. Add read/signal/periodic Events.
690 if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) {
691 return false;
692 }
693 IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
694 auto exit_loop_callback = [loop]() { return loop->ExitLoop(); };
695 if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM}, exit_loop_callback, IOEventHighPriority)) {
696 return false;
697 }
698
699 // Only add an event for SIGHUP if we didn't inherit SIG_IGN (e.g. from nohup).
700 if (!SignalIsIgnored(SIGHUP)) {
701 if (!loop->AddSignalEvent(SIGHUP, exit_loop_callback, IOEventHighPriority)) {
702 return false;
703 }
704 }
705 if (stop_signal_fd_ != -1) {
706 if (!loop->AddReadEvent(stop_signal_fd_, exit_loop_callback, IOEventHighPriority)) {
707 return false;
708 }
709 }
710
711 if (delay_in_ms_ != 0) {
712 auto delay_callback = [this]() {
713 if (!event_selection_set_.SetEnableEvents(true)) {
714 return false;
715 }
716 if (!system_wide_collection_) {
717 // Dump maps in case there are new maps created while delaying.
718 return DumpMaps();
719 }
720 return true;
721 };
722 if (!loop->AddOneTimeEvent(SecondToTimeval(delay_in_ms_ / 1000), delay_callback)) {
723 return false;
724 }
725 }
726 if (duration_in_sec_ != 0) {
727 if (!loop->AddPeriodicEvent(
728 SecondToTimeval(duration_in_sec_), [loop]() { return loop->ExitLoop(); },
729 IOEventHighPriority)) {
730 return false;
731 }
732 }
733 if (stdio_controls_profiling_) {
734 if (!loop->AddReadEvent(0, [this, loop]() { return ProcessControlCmd(loop); })) {
735 return false;
736 }
737 }
738 if (jit_debug_reader_) {
739 auto callback = [this](std::vector<JITDebugInfo> debug_info, bool sync_kernel_records) {
740 return ProcessJITDebugInfo(std::move(debug_info), sync_kernel_records);
741 };
742 if (!jit_debug_reader_->RegisterDebugInfoCallback(loop, callback)) {
743 return false;
744 }
745 if (!system_wide_collection_) {
746 std::set<pid_t> pids = event_selection_set_.GetMonitoredProcesses();
747 for (pid_t tid : event_selection_set_.GetMonitoredThreads()) {
748 pid_t pid;
749 if (GetProcessForThread(tid, &pid)) {
750 pids.insert(pid);
751 }
752 }
753 for (pid_t pid : pids) {
754 if (!jit_debug_reader_->MonitorProcess(pid)) {
755 return false;
756 }
757 }
758 if (!jit_debug_reader_->ReadAllProcesses()) {
759 return false;
760 }
761 }
762 }
763 if (event_selection_set_.HasAuxTrace()) {
764 // ETM events can only be enabled successfully after MmapEventFiles().
765 if (delay_in_ms_ == 0 && !event_selection_set_.IsEnabledOnExec()) {
766 if (!event_selection_set_.EnableETMEvents()) {
767 return false;
768 }
769 }
770 // ETM data is dumped to kernel buffer only when there is no thread traced by ETM. It happens
771 // either when all monitored threads are scheduled off cpu, or when all etm perf events are
772 // disabled.
773 // If ETM data isn't dumped to kernel buffer in time, overflow parts will be dropped. This
774 // makes less than expected data, especially in system wide recording. So add a periodic event
775 // to flush etm data by temporarily disable all perf events.
776 auto etm_flush = [this]() {
777 return event_selection_set_.DisableETMEvents() && event_selection_set_.EnableETMEvents();
778 };
779 if (!loop->AddPeriodicEvent(SecondToTimeval(etm_flush_interval_.count() / 1000.0), etm_flush)) {
780 return false;
781 }
782
783 if (etm_branch_list_generator_) {
784 if (exclude_perf_) {
785 etm_branch_list_generator_->SetExcludePid(getpid());
786 }
787 if (binary_name_regex_) {
788 etm_branch_list_generator_->SetBinaryFilter(binary_name_regex_.get());
789 }
790 }
791 }
792 return true;
793 }
794
DoRecording(Workload * workload)795 bool RecordCommand::DoRecording(Workload* workload) {
796 // Write records in mapped buffers of perf_event_files to output file while workload is running.
797 if (workload != nullptr && !workload->IsStarted() && !workload->Start()) {
798 return false;
799 }
800 if (start_profiling_fd_.get() != -1) {
801 if (!android::base::WriteStringToFd("STARTED", start_profiling_fd_)) {
802 PLOG(ERROR) << "failed to write to start_profiling_fd_";
803 }
804 start_profiling_fd_.reset();
805 }
806 if (stdio_controls_profiling_) {
807 printf("started\n");
808 fflush(stdout);
809 }
810 if (!event_selection_set_.GetIOEventLoop()->RunLoop()) {
811 return false;
812 }
813 time_stat_.stop_recording_time = GetSystemClock();
814 if (event_selection_set_.HasAuxTrace()) {
815 // Disable ETM events to flush the last ETM data.
816 if (!event_selection_set_.DisableETMEvents()) {
817 return false;
818 }
819 }
820 if (!event_selection_set_.SyncKernelBuffer()) {
821 return false;
822 }
823 event_selection_set_.CloseEventFiles();
824 time_stat_.finish_recording_time = GetSystemClock();
825 uint64_t recording_time = time_stat_.finish_recording_time - time_stat_.start_recording_time;
826 LOG(INFO) << "Recorded for " << recording_time / 1e9 << " seconds. Start post processing.";
827 return true;
828 }
829
WriteRecordDataToOutFd(const std::string & in_filename,android::base::unique_fd out_fd)830 static bool WriteRecordDataToOutFd(const std::string& in_filename,
831 android::base::unique_fd out_fd) {
832 android::base::unique_fd in_fd(FileHelper::OpenReadOnly(in_filename));
833 if (in_fd == -1) {
834 PLOG(ERROR) << "Failed to open " << in_filename;
835 return false;
836 }
837 char buf[8192];
838 while (true) {
839 ssize_t n = TEMP_FAILURE_RETRY(read(in_fd, buf, sizeof(buf)));
840 if (n < 0) {
841 PLOG(ERROR) << "Failed to read " << in_filename;
842 return false;
843 }
844 if (n == 0) {
845 break;
846 }
847 if (!android::base::WriteFully(out_fd, buf, n)) {
848 PLOG(ERROR) << "Failed to write to out_fd";
849 return false;
850 }
851 }
852 unlink(in_filename.c_str());
853 return true;
854 }
855
PostProcessRecording(const std::vector<std::string> & args)856 bool RecordCommand::PostProcessRecording(const std::vector<std::string>& args) {
857 // 1. Read records left in the buffer.
858 if (!event_selection_set_.FinishReadMmapEventData()) {
859 return false;
860 }
861
862 // 2. Post unwind dwarf callchain.
863 if (unwind_dwarf_callchain_ && post_unwind_) {
864 if (!PostUnwindRecords()) {
865 return false;
866 }
867 }
868
869 // 3. Optionally join Callchains.
870 if (callchain_joiner_) {
871 JoinCallChains();
872 }
873
874 // 4. Dump additional features, and close record file.
875 if (!record_file_writer_->FinishWritingDataSection()) {
876 return false;
877 }
878 if (!DumpAdditionalFeatures(args)) {
879 return false;
880 }
881 if (!record_file_writer_->Close()) {
882 return false;
883 }
884 if (out_fd_ != -1 && !WriteRecordDataToOutFd(record_filename_, std::move(out_fd_))) {
885 return false;
886 }
887 time_stat_.post_process_time = GetSystemClock();
888
889 // 5. Show brief record result.
890 auto report_compression_stat = [&]() {
891 if (auto compressor = record_file_writer_->GetCompressor(); compressor != nullptr) {
892 uint64_t original_size = compressor->TotalInputSize();
893 uint64_t compressed_size = compressor->TotalOutputSize();
894 LOG(INFO) << "Record compressed: " << ReadableBytes(compressed_size) << " (original "
895 << ReadableBytes(original_size) << ", ratio " << std::setprecision(2)
896 << (static_cast<double>(original_size) / compressed_size) << ")";
897 }
898 };
899
900 auto record_stat = event_selection_set_.GetRecordStat();
901 if (event_selection_set_.HasAuxTrace()) {
902 LOG(INFO) << "Aux data traced: " << ReadableCount(record_stat.aux_data_size);
903 if (record_stat.lost_aux_data_size != 0) {
904 LOG(INFO) << "Aux data lost in user space: " << ReadableCount(record_stat.lost_aux_data_size)
905 << ", consider increasing userspace buffer size(--user-buffer-size).";
906 }
907 report_compression_stat();
908 } else {
909 // Here we report all lost records as samples. This isn't accurate. Because records like
910 // MmapRecords are not samples. But It's easier for users to understand.
911 size_t userspace_lost_samples =
912 record_stat.userspace_lost_samples + record_stat.userspace_lost_non_samples;
913 size_t lost_samples = record_stat.kernelspace_lost_records + userspace_lost_samples;
914
915 std::stringstream os;
916 os << "Samples recorded: " << ReadableCount(sample_record_count_);
917 if (record_stat.userspace_truncated_stack_samples > 0) {
918 os << " (" << ReadableCount(record_stat.userspace_truncated_stack_samples)
919 << " with truncated stacks)";
920 }
921 os << ". Samples lost: " << ReadableCount(lost_samples);
922 if (lost_samples != 0) {
923 os << " (kernelspace: " << ReadableCount(record_stat.kernelspace_lost_records)
924 << ", userspace: " << ReadableCount(userspace_lost_samples) << ")";
925 }
926 os << ".";
927 LOG(INFO) << os.str();
928 report_compression_stat();
929
930 LOG(DEBUG) << "Record stat: kernelspace_lost_records="
931 << ReadableCount(record_stat.kernelspace_lost_records)
932 << ", userspace_lost_samples=" << ReadableCount(record_stat.userspace_lost_samples)
933 << ", userspace_lost_non_samples="
934 << ReadableCount(record_stat.userspace_lost_non_samples)
935 << ", userspace_truncated_stack_samples="
936 << ReadableCount(record_stat.userspace_truncated_stack_samples);
937
938 if (sample_record_count_ + record_stat.kernelspace_lost_records != 0) {
939 double kernelspace_lost_percent =
940 static_cast<double>(record_stat.kernelspace_lost_records) /
941 (record_stat.kernelspace_lost_records + sample_record_count_);
942 constexpr double KERNELSPACE_LOST_PERCENT_WARNING_BAR = 0.1;
943 if (kernelspace_lost_percent >= KERNELSPACE_LOST_PERCENT_WARNING_BAR) {
944 LOG(WARNING) << "Lost " << (kernelspace_lost_percent * 100)
945 << "% of samples in kernel space, "
946 << "consider increasing kernel buffer size(-m), "
947 << "or decreasing sample frequency(-f), "
948 << "or increasing sample period(-c).";
949 }
950 }
951 size_t userspace_lost_truncated_samples =
952 userspace_lost_samples + record_stat.userspace_truncated_stack_samples;
953 size_t userspace_complete_samples =
954 sample_record_count_ - record_stat.userspace_truncated_stack_samples;
955 if (userspace_complete_samples + userspace_lost_truncated_samples != 0) {
956 double userspace_lost_percent =
957 static_cast<double>(userspace_lost_truncated_samples) /
958 (userspace_complete_samples + userspace_lost_truncated_samples);
959 constexpr double USERSPACE_LOST_PERCENT_WARNING_BAR = 0.1;
960 if (userspace_lost_percent >= USERSPACE_LOST_PERCENT_WARNING_BAR) {
961 LOG(WARNING) << "Lost/Truncated " << (userspace_lost_percent * 100)
962 << "% of samples in user space, "
963 << "consider increasing userspace buffer size(--user-buffer-size), "
964 << "or decreasing sample frequency(-f), "
965 << "or increasing sample period(-c).";
966 }
967 }
968 if (callchain_joiner_) {
969 callchain_joiner_->DumpStat();
970 }
971 }
972 LOG(DEBUG) << "Prepare recording time "
973 << (time_stat_.start_recording_time - time_stat_.prepare_recording_time) / 1e9
974 << " s, recording time "
975 << (time_stat_.stop_recording_time - time_stat_.start_recording_time) / 1e9
976 << " s, stop recording time "
977 << (time_stat_.finish_recording_time - time_stat_.stop_recording_time) / 1e9
978 << " s, post process time "
979 << (time_stat_.post_process_time - time_stat_.finish_recording_time) / 1e9 << " s.";
980 return true;
981 }
982
ParseOptions(const std::vector<std::string> & args,std::vector<std::string> * non_option_args,ProbeEvents & probe_events)983 bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
984 std::vector<std::string>* non_option_args,
985 ProbeEvents& probe_events) {
986 OptionValueMap options;
987 std::vector<std::pair<OptionName, OptionValue>> ordered_options;
988
989 if (!PreprocessOptions(args, GetRecordCmdOptionFormats(), &options, &ordered_options,
990 non_option_args)) {
991 return false;
992 }
993
994 // Process options.
995 system_wide_collection_ = options.PullBoolValue("-a");
996
997 if (auto value = options.PullValue("--add-counter"); value) {
998 add_counters_ = android::base::Split(value->str_value, ",");
999 }
1000
1001 for (const OptionValue& value : options.PullValues("--add-meta-info")) {
1002 const std::string& s = value.str_value;
1003 auto split_pos = s.find('=');
1004 if (split_pos == std::string::npos || split_pos == 0 || split_pos + 1 == s.size()) {
1005 LOG(ERROR) << "invalid meta-info: " << s;
1006 return false;
1007 }
1008 extra_meta_info_[s.substr(0, split_pos)] = s.substr(split_pos + 1);
1009 }
1010
1011 if (auto value = options.PullValue("--addr-filter"); value) {
1012 auto filters = ParseAddrFilterOption(value->str_value);
1013 if (filters.empty()) {
1014 return false;
1015 }
1016 event_selection_set_.SetAddrFilters(std::move(filters));
1017 }
1018
1019 if (auto value = options.PullValue("--app"); value) {
1020 app_package_name_ = value->str_value;
1021 }
1022
1023 if (auto value = options.PullValue("--aux-buffer-size"); value) {
1024 uint64_t v = value->uint_value;
1025 if (v > std::numeric_limits<size_t>::max() || !IsPowerOfTwo(v) || v % sysconf(_SC_PAGE_SIZE)) {
1026 LOG(ERROR) << "invalid aux buffer size: " << v;
1027 return false;
1028 }
1029 aux_buffer_size_ = static_cast<size_t>(v);
1030 }
1031
1032 if (options.PullValue("-b")) {
1033 branch_sampling_ = branch_sampling_type_map["any"];
1034 }
1035
1036 if (auto value = options.PullValue("--binary"); value) {
1037 binary_name_regex_ = RegEx::Create(value->str_value);
1038 if (binary_name_regex_ == nullptr) {
1039 return false;
1040 }
1041 }
1042
1043 if (!options.PullUintValue("--callchain-joiner-min-matching-nodes",
1044 &callchain_joiner_min_matching_nodes_, 1)) {
1045 return false;
1046 }
1047
1048 if (auto value = options.PullValue("--clockid"); value) {
1049 clockid_ = value->str_value;
1050 if (clockid_ != "perf") {
1051 if (!IsSettingClockIdSupported()) {
1052 LOG(ERROR) << "Setting clockid is not supported by the kernel.";
1053 return false;
1054 }
1055 if (clockid_map.find(clockid_) == clockid_map.end()) {
1056 LOG(ERROR) << "Invalid clockid: " << clockid_;
1057 return false;
1058 }
1059 }
1060 }
1061
1062 if (!options.PullUintValue("--cpu-percent", &cpu_time_max_percent_, 1, 100)) {
1063 return false;
1064 }
1065
1066 if (options.PullBoolValue("--decode-etm")) {
1067 etm_branch_list_generator_ = ETMBranchListGenerator::Create(system_wide_collection_);
1068 }
1069 uint32_t interval = 0;
1070 if (options.PullUintValue("--etm-flush-interval", &interval) && interval != 0) {
1071 etm_flush_interval_ = std::chrono::milliseconds(interval);
1072 }
1073
1074 if (options.PullBoolValue("--record-timestamp")) {
1075 ETMRecorder& recorder = ETMRecorder::GetInstance();
1076 recorder.SetRecordTimestamp(true);
1077 }
1078
1079 if (options.PullBoolValue("--record-cycles")) {
1080 ETMRecorder& recorder = ETMRecorder::GetInstance();
1081 recorder.SetRecordCycles(true);
1082 }
1083
1084 if (!options.PullUintValue("--delay", &delay_in_ms_)) {
1085 return false;
1086 }
1087
1088 size_t cyc_threshold;
1089 if (options.PullUintValue("--cycle-threshold", &cyc_threshold)) {
1090 ETMRecorder& recorder = ETMRecorder::GetInstance();
1091 recorder.SetCycleThreshold(cyc_threshold);
1092 }
1093
1094 if (!options.PullDoubleValue("--duration", &duration_in_sec_, 1e-9)) {
1095 return false;
1096 }
1097
1098 exclude_perf_ = options.PullBoolValue("--exclude-perf");
1099 if (!record_filter_.ParseOptions(options)) {
1100 return false;
1101 }
1102
1103 if (options.PullValue("--exit-with-parent")) {
1104 prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
1105 }
1106
1107 in_app_context_ = options.PullBoolValue("--in-app");
1108
1109 for (const OptionValue& value : options.PullValues("-j")) {
1110 std::vector<std::string> branch_sampling_types = android::base::Split(value.str_value, ",");
1111 for (auto& type : branch_sampling_types) {
1112 auto it = branch_sampling_type_map.find(type);
1113 if (it == branch_sampling_type_map.end()) {
1114 LOG(ERROR) << "unrecognized branch sampling filter: " << type;
1115 return false;
1116 }
1117 branch_sampling_ |= it->second;
1118 }
1119 }
1120 keep_failed_unwinding_result_ = options.PullBoolValue("--keep-failed-unwinding-result");
1121 keep_failed_unwinding_debug_info_ = options.PullBoolValue("--keep-failed-unwinding-debug-info");
1122 if (keep_failed_unwinding_debug_info_) {
1123 keep_failed_unwinding_result_ = true;
1124 }
1125
1126 for (const OptionValue& value : options.PullValues("--kprobe")) {
1127 std::vector<std::string> cmds = android::base::Split(value.str_value, ",");
1128 for (const auto& cmd : cmds) {
1129 if (!probe_events.AddKprobe(cmd)) {
1130 return false;
1131 }
1132 }
1133 }
1134
1135 if (auto value = options.PullValue("-m"); value) {
1136 if (!IsPowerOfTwo(value->uint_value) ||
1137 value->uint_value > std::numeric_limits<size_t>::max()) {
1138 LOG(ERROR) << "Invalid mmap_pages: '" << value->uint_value << "'";
1139 return false;
1140 }
1141 mmap_page_range_.first = mmap_page_range_.second = value->uint_value;
1142 }
1143
1144 allow_callchain_joiner_ = !options.PullBoolValue("--no-callchain-joiner");
1145 allow_truncating_samples_ = !options.PullBoolValue("--no-cut-samples");
1146 dump_build_id_ = !options.PullBoolValue("--no-dump-build-id");
1147 can_dump_kernel_symbols_ = !options.PullBoolValue("--no-dump-kernel-symbols");
1148 dump_symbols_ = !options.PullBoolValue("--no-dump-symbols");
1149 if (auto value = options.PullValue("--no-inherit"); value) {
1150 child_inherit_ = false;
1151 } else if (system_wide_collection_) {
1152 // child_inherit is used to monitor newly created threads. It isn't useful in system wide
1153 // collection, which monitors all threads running on selected cpus.
1154 child_inherit_ = false;
1155 }
1156 unwind_dwarf_callchain_ = !options.PullBoolValue("--no-unwind");
1157
1158 if (auto value = options.PullValue("-o"); value) {
1159 record_filename_ = value->str_value;
1160 }
1161
1162 if (auto value = options.PullValue("--out-fd"); value) {
1163 out_fd_.reset(static_cast<int>(value->uint_value));
1164 }
1165
1166 if (auto strs = options.PullStringValues("-p"); !strs.empty()) {
1167 if (auto pids = GetPidsFromStrings(strs, true, true); pids) {
1168 event_selection_set_.AddMonitoredProcesses(pids.value());
1169 } else {
1170 return false;
1171 }
1172 }
1173
1174 // Use explicit if statements instead of logical operators to avoid short-circuit.
1175 if (options.PullValue("--post-unwind")) {
1176 post_unwind_ = true;
1177 }
1178 if (options.PullValue("--post-unwind=yes")) {
1179 post_unwind_ = true;
1180 }
1181 if (options.PullValue("--post-unwind=no")) {
1182 post_unwind_ = false;
1183 }
1184
1185 if (auto value = options.PullValue("--user-buffer-size"); value) {
1186 uint64_t v = value->uint_value;
1187 if (v > std::numeric_limits<size_t>::max() || v == 0) {
1188 LOG(ERROR) << "invalid user buffer size: " << v;
1189 return false;
1190 }
1191 user_buffer_size_ = static_cast<size_t>(v);
1192 }
1193
1194 if (!options.PullUintValue("--size-limit", &size_limit_in_bytes_, 1)) {
1195 return false;
1196 }
1197
1198 if (auto value = options.PullValue("--start_profiling_fd"); value) {
1199 start_profiling_fd_.reset(static_cast<int>(value->uint_value));
1200 }
1201
1202 stdio_controls_profiling_ = options.PullBoolValue("--stdio-controls-profiling");
1203
1204 if (auto value = options.PullValue("--stop-signal-fd"); value) {
1205 stop_signal_fd_.reset(static_cast<int>(value->uint_value));
1206 }
1207
1208 if (auto value = options.PullValue("--symfs"); value) {
1209 if (!Dso::SetSymFsDir(value->str_value)) {
1210 return false;
1211 }
1212 }
1213
1214 for (const OptionValue& value : options.PullValues("-t")) {
1215 if (auto tids = GetTidsFromString(value.str_value, true); tids) {
1216 event_selection_set_.AddMonitoredThreads(tids.value());
1217 } else {
1218 return false;
1219 }
1220 }
1221
1222 trace_offcpu_ = options.PullBoolValue("--trace-offcpu");
1223
1224 if (auto value = options.PullValue("--tracepoint-events"); value) {
1225 if (!EventTypeManager::Instance().ReadTracepointsFromFile(value->str_value)) {
1226 return false;
1227 }
1228 }
1229 use_cmd_exit_code_ = options.PullBoolValue("--use-cmd-exit-code");
1230
1231 if (auto value = options.PullValue("-z"); value) {
1232 if (value->str_value.empty()) {
1233 // 3 is the default compression level of zstd library, in ZSTD_defaultCLevel().
1234 constexpr size_t DEFAULT_COMPRESSION_LEVEL = 3;
1235 compression_level_ = DEFAULT_COMPRESSION_LEVEL;
1236 } else {
1237 if (!android::base::ParseUint(value->str_value, &compression_level_) ||
1238 compression_level_ < 1 || compression_level_ > 22) {
1239 LOG(ERROR) << "invalid compression level for -z: " << value->str_value;
1240 return false;
1241 }
1242 }
1243 }
1244
1245 CHECK(options.values.empty());
1246
1247 // Process ordered options.
1248 for (const auto& pair : ordered_options) {
1249 const OptionName& name = pair.first;
1250 const OptionValue& value = pair.second;
1251
1252 if (name == "-c" || name == "-f") {
1253 if (value.uint_value < 1) {
1254 LOG(ERROR) << "invalid " << name << ": " << value.uint_value;
1255 return false;
1256 }
1257 SampleRate rate;
1258 if (name == "-c") {
1259 rate.sample_period = value.uint_value;
1260 } else {
1261 if (value.uint_value >= INT_MAX) {
1262 LOG(ERROR) << "sample freq can't be bigger than INT_MAX: " << value.uint_value;
1263 return false;
1264 }
1265 rate.sample_freq = value.uint_value;
1266 }
1267 event_selection_set_.SetSampleRateForNewEvents(rate);
1268
1269 } else if (name == "--call-graph") {
1270 std::vector<std::string> strs = android::base::Split(value.str_value, ",");
1271 if (strs[0] == "fp") {
1272 fp_callchain_sampling_ = true;
1273 dwarf_callchain_sampling_ = false;
1274 } else if (strs[0] == "dwarf") {
1275 fp_callchain_sampling_ = false;
1276 dwarf_callchain_sampling_ = true;
1277 if (strs.size() > 1) {
1278 uint64_t size;
1279 if (!ParseUint(strs[1], &size)) {
1280 LOG(ERROR) << "invalid dump stack size in --call-graph option: " << strs[1];
1281 return false;
1282 }
1283 if ((size & 7) != 0) {
1284 LOG(ERROR) << "dump stack size " << size << " is not 8-byte aligned.";
1285 return false;
1286 }
1287 if (size >= MAX_DUMP_STACK_SIZE) {
1288 LOG(ERROR) << "dump stack size " << size << " is bigger than max allowed size "
1289 << MAX_DUMP_STACK_SIZE << ".";
1290 return false;
1291 }
1292 dump_stack_size_in_dwarf_sampling_ = static_cast<uint32_t>(size);
1293 }
1294 }
1295
1296 } else if (name == "--cpu") {
1297 if (auto cpus = GetCpusFromString(value.str_value); cpus) {
1298 event_selection_set_.SetCpusForNewEvents(
1299 std::vector<int>(cpus.value().begin(), cpus.value().end()));
1300 } else {
1301 return false;
1302 }
1303 } else if (name == "-e") {
1304 std::vector<std::string> event_types = android::base::Split(value.str_value, ",");
1305 for (auto& event_type : event_types) {
1306 if (!probe_events.CreateProbeEventIfNotExist(event_type)) {
1307 return false;
1308 }
1309 if (!event_selection_set_.AddEventType(event_type)) {
1310 return false;
1311 }
1312 }
1313 } else if (name == "-g") {
1314 fp_callchain_sampling_ = false;
1315 dwarf_callchain_sampling_ = true;
1316 } else if (name == "--group") {
1317 std::vector<std::string> event_types = android::base::Split(value.str_value, ",");
1318 for (const auto& event_type : event_types) {
1319 if (!probe_events.CreateProbeEventIfNotExist(event_type)) {
1320 return false;
1321 }
1322 }
1323 if (!event_selection_set_.AddEventGroup(event_types)) {
1324 return false;
1325 }
1326 } else if (name == "--tp-filter") {
1327 if (!event_selection_set_.SetTracepointFilter(value.str_value)) {
1328 return false;
1329 }
1330 } else {
1331 LOG(ERROR) << "unprocessed option: " << name;
1332 return false;
1333 }
1334 }
1335
1336 if (!dwarf_callchain_sampling_) {
1337 if (!unwind_dwarf_callchain_) {
1338 LOG(ERROR) << "--no-unwind is only used with `--call-graph dwarf` option.";
1339 return false;
1340 }
1341 unwind_dwarf_callchain_ = false;
1342 }
1343 if (post_unwind_) {
1344 if (!dwarf_callchain_sampling_ || !unwind_dwarf_callchain_) {
1345 post_unwind_ = false;
1346 }
1347 }
1348
1349 if (fp_callchain_sampling_) {
1350 if (GetTargetArch() == ARCH_ARM) {
1351 LOG(WARNING) << "`--callgraph fp` option doesn't work well on arm architecture, "
1352 << "consider using `-g` option or profiling on aarch64 architecture.";
1353 }
1354 }
1355
1356 if (system_wide_collection_ && event_selection_set_.HasMonitoredTarget()) {
1357 LOG(ERROR) << "Record system wide and existing processes/threads can't be "
1358 "used at the same time.";
1359 return false;
1360 }
1361
1362 if (system_wide_collection_ && !IsRoot()) {
1363 LOG(ERROR) << "System wide profiling needs root privilege.";
1364 return false;
1365 }
1366
1367 if (dump_symbols_ && can_dump_kernel_symbols_) {
1368 // No need to dump kernel symbols as we will dump all required symbols.
1369 can_dump_kernel_symbols_ = false;
1370 }
1371 if (clockid_.empty()) {
1372 clockid_ = IsSettingClockIdSupported() ? "monotonic" : "perf";
1373 }
1374 return true;
1375 }
1376
AdjustPerfEventLimit()1377 bool RecordCommand::AdjustPerfEventLimit() {
1378 bool set_prop = false;
1379 // 1. Adjust max_sample_rate.
1380 uint64_t cur_max_freq;
1381 if (GetMaxSampleFrequency(&cur_max_freq) && cur_max_freq < max_sample_freq_ &&
1382 !SetMaxSampleFrequency(max_sample_freq_)) {
1383 set_prop = true;
1384 }
1385 // 2. Adjust perf_cpu_time_max_percent.
1386 size_t cur_percent;
1387 if (GetCpuTimeMaxPercent(&cur_percent) && cur_percent != cpu_time_max_percent_ &&
1388 !SetCpuTimeMaxPercent(cpu_time_max_percent_)) {
1389 set_prop = true;
1390 }
1391 // 3. Adjust perf_event_mlock_kb.
1392 long cpus = sysconf(_SC_NPROCESSORS_CONF);
1393 uint64_t mlock_kb = cpus * (mmap_page_range_.second + 1) * 4;
1394 if (event_selection_set_.HasAuxTrace()) {
1395 mlock_kb += cpus * aux_buffer_size_ / 1024;
1396 }
1397 uint64_t cur_mlock_kb;
1398 if (GetPerfEventMlockKb(&cur_mlock_kb) && cur_mlock_kb < mlock_kb &&
1399 !SetPerfEventMlockKb(mlock_kb)) {
1400 set_prop = true;
1401 }
1402
1403 if (GetAndroidVersion() >= kAndroidVersionQ && set_prop && !in_app_context_) {
1404 return SetPerfEventLimits(std::max(max_sample_freq_, cur_max_freq), cpu_time_max_percent_,
1405 std::max(mlock_kb, cur_mlock_kb));
1406 }
1407 return true;
1408 }
1409
TraceOffCpu()1410 bool RecordCommand::TraceOffCpu() {
1411 if (FindEventTypeByName("sched:sched_switch") == nullptr) {
1412 LOG(ERROR) << "Can't trace off cpu because sched:sched_switch event is not available";
1413 return false;
1414 }
1415 for (auto& event_type : event_selection_set_.GetTracepointEvents()) {
1416 if (event_type->name == "sched:sched_switch") {
1417 LOG(ERROR) << "Trace offcpu can't be used together with sched:sched_switch event";
1418 return false;
1419 }
1420 }
1421 if (!IsDumpingRegsForTracepointEventsSupported()) {
1422 LOG(ERROR) << "Dumping regs for tracepoint events is not supported by the kernel";
1423 return false;
1424 }
1425 // --trace-offcpu option only works with one of the selected event types.
1426 std::set<std::string> accepted_events = {"cpu-clock", "task-clock"};
1427 std::vector<const EventType*> events = event_selection_set_.GetEvents();
1428 if (events.size() != 1 || accepted_events.find(events[0]->name) == accepted_events.end()) {
1429 LOG(ERROR) << "--trace-offcpu option only works with one of events "
1430 << android::base::Join(accepted_events, ' ');
1431 return false;
1432 }
1433 if (!event_selection_set_.AddEventType("sched:sched_switch", SampleRate(0, 1))) {
1434 return false;
1435 }
1436 if (IsSwitchRecordSupported()) {
1437 event_selection_set_.EnableSwitchRecord();
1438 }
1439 return true;
1440 }
1441
SetEventSelectionFlags()1442 bool RecordCommand::SetEventSelectionFlags() {
1443 event_selection_set_.SampleIdAll();
1444 if (!event_selection_set_.SetBranchSampling(branch_sampling_)) {
1445 return false;
1446 }
1447 if (fp_callchain_sampling_) {
1448 event_selection_set_.EnableFpCallChainSampling();
1449 } else if (dwarf_callchain_sampling_) {
1450 if (!event_selection_set_.EnableDwarfCallChainSampling(dump_stack_size_in_dwarf_sampling_)) {
1451 return false;
1452 }
1453 }
1454 event_selection_set_.SetInherit(child_inherit_);
1455 if (clockid_ != "perf") {
1456 event_selection_set_.SetClockId(clockid_map[clockid_]);
1457 }
1458 return true;
1459 }
1460
CreateAndInitRecordFile()1461 bool RecordCommand::CreateAndInitRecordFile() {
1462 EventAttrIds attrs = event_selection_set_.GetEventAttrWithId();
1463 bool remove_regs_and_stacks = unwind_dwarf_callchain_ && !post_unwind_;
1464 if (remove_regs_and_stacks) {
1465 for (auto& attr : attrs) {
1466 ReplaceRegAndStackWithCallChain(attr.attr);
1467 }
1468 }
1469 record_file_writer_ = CreateRecordFile(record_filename_, attrs);
1470 if (record_file_writer_ == nullptr) {
1471 return false;
1472 }
1473 // Use first perf_event_attr and first event id to dump mmap and comm records.
1474 CHECK(!attrs.empty());
1475 dumping_attr_id_ = attrs[0];
1476 CHECK(!dumping_attr_id_.ids.empty());
1477 map_record_reader_.emplace(dumping_attr_id_.attr, dumping_attr_id_.ids[0],
1478 event_selection_set_.RecordNotExecutableMaps());
1479 map_record_reader_->SetCallback([this](Record* r) { return ProcessRecord(r); });
1480
1481 return DumpKernelSymbol() && DumpTracingData() && DumpMaps() && DumpAuxTraceInfo();
1482 }
1483
CreateRecordFile(const std::string & filename,const EventAttrIds & attrs)1484 std::unique_ptr<RecordFileWriter> RecordCommand::CreateRecordFile(const std::string& filename,
1485 const EventAttrIds& attrs) {
1486 std::unique_ptr<RecordFileWriter> writer = RecordFileWriter::CreateInstance(filename);
1487 if (!writer) {
1488 return nullptr;
1489 }
1490 if (compression_level_ != 0 && !writer->SetCompressionLevel(compression_level_)) {
1491 return nullptr;
1492 }
1493 if (!writer->WriteAttrSection(attrs)) {
1494 return nullptr;
1495 }
1496 return writer;
1497 }
1498
DumpKernelSymbol()1499 bool RecordCommand::DumpKernelSymbol() {
1500 if (can_dump_kernel_symbols_) {
1501 if (event_selection_set_.NeedKernelSymbol()) {
1502 std::string kallsyms;
1503 if (!LoadKernelSymbols(&kallsyms)) {
1504 // Symbol loading may have failed due to the lack of permissions. This
1505 // is not fatal, the symbols will appear as "unknown".
1506 return true;
1507 }
1508 KernelSymbolRecord r(kallsyms);
1509 if (!ProcessRecord(&r)) {
1510 return false;
1511 }
1512 }
1513 }
1514 return true;
1515 }
1516
DumpTracingData()1517 bool RecordCommand::DumpTracingData() {
1518 std::vector<const EventType*> tracepoint_event_types = event_selection_set_.GetTracepointEvents();
1519 if (tracepoint_event_types.empty() || !CanRecordRawData() || in_app_context_) {
1520 return true; // No need to dump tracing data, or can't do it.
1521 }
1522 std::vector<char> tracing_data;
1523 if (!GetTracingData(tracepoint_event_types, &tracing_data)) {
1524 return false;
1525 }
1526 TracingDataRecord record(tracing_data);
1527 if (!ProcessRecord(&record)) {
1528 return false;
1529 }
1530 return true;
1531 }
1532
DumpMaps()1533 bool RecordCommand::DumpMaps() {
1534 if (system_wide_collection_) {
1535 // For system wide recording:
1536 // If not aux tracing, only dump kernel maps. Maps of a process is dumped when needed (the
1537 // first time a sample hits that process).
1538 // If aux tracing with decoding etm data, the maps are dumped by etm_branch_list_generator.
1539 // If aux tracing without decoding etm data, we don't know which maps will be needed, so dump
1540 // all process maps. To reduce pre recording time, we dump process maps in map record thread
1541 // while recording.
1542 if (event_selection_set_.HasAuxTrace() && !etm_branch_list_generator_) {
1543 map_record_thread_.emplace(*map_record_reader_);
1544 return true;
1545 }
1546 if (!event_selection_set_.ExcludeKernel()) {
1547 return map_record_reader_->ReadKernelMaps();
1548 }
1549 return true;
1550 }
1551 if (!event_selection_set_.ExcludeKernel() && !map_record_reader_->ReadKernelMaps()) {
1552 return false;
1553 }
1554 // Map from process id to a set of thread ids in that process.
1555 std::unordered_map<pid_t, std::unordered_set<pid_t>> process_map;
1556 for (pid_t pid : event_selection_set_.GetMonitoredProcesses()) {
1557 std::vector<pid_t> tids = GetThreadsInProcess(pid);
1558 process_map[pid].insert(tids.begin(), tids.end());
1559 }
1560 for (pid_t tid : event_selection_set_.GetMonitoredThreads()) {
1561 pid_t pid;
1562 if (GetProcessForThread(tid, &pid)) {
1563 process_map[pid].insert(tid);
1564 }
1565 }
1566
1567 // Dump each process.
1568 for (const auto& [pid, tids] : process_map) {
1569 if (!map_record_reader_->ReadProcessMaps(pid, tids, 0)) {
1570 return false;
1571 }
1572 }
1573 return true;
1574 }
1575
ProcessRecord(Record * record)1576 bool RecordCommand::ProcessRecord(Record* record) {
1577 UpdateRecord(record);
1578 if (ShouldOmitRecord(record)) {
1579 return true;
1580 }
1581 if (size_limit_in_bytes_ > 0u) {
1582 if (size_limit_in_bytes_ < record_file_writer_->GetDataSectionSize()) {
1583 return event_selection_set_.GetIOEventLoop()->ExitLoop();
1584 }
1585 }
1586 if (jit_debug_reader_ && !jit_debug_reader_->UpdateRecord(record)) {
1587 return false;
1588 }
1589 last_record_timestamp_ = std::max(last_record_timestamp_, record->Timestamp());
1590 // In system wide recording, maps are dumped when they are needed by records.
1591 if (system_wide_collection_ && !DumpMapsForRecord(record)) {
1592 return false;
1593 }
1594 // Record filter check should go after DumpMapsForRecord(). Otherwise, process/thread name
1595 // filters don't work in system wide collection.
1596 if (record->type() == PERF_RECORD_SAMPLE) {
1597 if (!record_filter_.Check(static_cast<SampleRecord&>(*record))) {
1598 return true;
1599 }
1600 }
1601 if (etm_branch_list_generator_) {
1602 bool consumed = false;
1603 if (!etm_branch_list_generator_->ProcessRecord(*record, consumed)) {
1604 return false;
1605 }
1606 if (consumed) {
1607 return true;
1608 }
1609 }
1610 if (unwind_dwarf_callchain_) {
1611 if (post_unwind_) {
1612 return SaveRecordForPostUnwinding(record);
1613 }
1614 return SaveRecordAfterUnwinding(record);
1615 }
1616 return SaveRecordWithoutUnwinding(record);
1617 }
1618
DumpAuxTraceInfo()1619 bool RecordCommand::DumpAuxTraceInfo() {
1620 if (event_selection_set_.HasAuxTrace()) {
1621 AuxTraceInfoRecord auxtrace_info = ETMRecorder::GetInstance().CreateAuxTraceInfoRecord();
1622 return ProcessRecord(&auxtrace_info);
1623 }
1624 return true;
1625 }
1626
1627 template <typename MmapRecordType>
MapOnlyExistInMemory(MmapRecordType * record)1628 bool MapOnlyExistInMemory(MmapRecordType* record) {
1629 return !record->InKernel() && MappedFileOnlyExistInMemory(record->filename);
1630 }
1631
ShouldOmitRecord(Record * record)1632 bool RecordCommand::ShouldOmitRecord(Record* record) {
1633 if (jit_debug_reader_) {
1634 // To profile jitted Java code, we need PROT_JIT_SYMFILE_MAP maps not overlapped by maps for
1635 // [anon:dalvik-jit-code-cache]. To profile interpreted Java code, we record maps that
1636 // are not executable. Some non-exec maps (like those for stack, heap) provide misleading map
1637 // entries for unwinding, as in http://b/77236599. So it is better to remove
1638 // dalvik-jit-code-cache and other maps that only exist in memory.
1639 switch (record->type()) {
1640 case PERF_RECORD_MMAP:
1641 return MapOnlyExistInMemory(static_cast<MmapRecord*>(record));
1642 case PERF_RECORD_MMAP2:
1643 return MapOnlyExistInMemory(static_cast<Mmap2Record*>(record));
1644 }
1645 }
1646 return false;
1647 }
1648
DumpMapsForRecord(Record * record)1649 bool RecordCommand::DumpMapsForRecord(Record* record) {
1650 if (record->type() == PERF_RECORD_SAMPLE) {
1651 pid_t pid = static_cast<SampleRecord*>(record)->tid_data.pid;
1652 if (dumped_processes_.find(pid) == dumped_processes_.end()) {
1653 // Dump map info and all thread names for that process.
1654 if (!map_record_reader_->ReadProcessMaps(pid, last_record_timestamp_)) {
1655 return false;
1656 }
1657 dumped_processes_.insert(pid);
1658 }
1659 }
1660 return true;
1661 }
1662
SaveRecordForPostUnwinding(Record * record)1663 bool RecordCommand::SaveRecordForPostUnwinding(Record* record) {
1664 if (!record_file_writer_->WriteRecord(*record)) {
1665 LOG(ERROR) << "If there isn't enough space for storing profiling data, consider using "
1666 << "--no-post-unwind option.";
1667 return false;
1668 }
1669 return true;
1670 }
1671
SaveRecordAfterUnwinding(Record * record)1672 bool RecordCommand::SaveRecordAfterUnwinding(Record* record) {
1673 if (record->type() == PERF_RECORD_SAMPLE) {
1674 auto& r = *static_cast<SampleRecord*>(record);
1675 // AdjustCallChainGeneratedByKernel() should go before UnwindRecord(). Because we don't want
1676 // to adjust callchains generated by dwarf unwinder.
1677 r.AdjustCallChainGeneratedByKernel();
1678 if (!UnwindRecord(r)) {
1679 return false;
1680 }
1681 // ExcludeKernelCallChain() should go after UnwindRecord() to notice the generated user call
1682 // chain.
1683 if (r.InKernel() && exclude_kernel_callchain_ && !r.ExcludeKernelCallChain()) {
1684 // If current record contains no user callchain, skip it.
1685 return true;
1686 }
1687 sample_record_count_++;
1688 } else {
1689 thread_tree_.Update(*record);
1690 }
1691 return record_file_writer_->WriteRecord(*record);
1692 }
1693
SaveRecordWithoutUnwinding(Record * record)1694 bool RecordCommand::SaveRecordWithoutUnwinding(Record* record) {
1695 if (record->type() == PERF_RECORD_SAMPLE) {
1696 auto& r = *static_cast<SampleRecord*>(record);
1697 if (fp_callchain_sampling_ || dwarf_callchain_sampling_) {
1698 r.AdjustCallChainGeneratedByKernel();
1699 }
1700 if (r.InKernel() && exclude_kernel_callchain_ && !r.ExcludeKernelCallChain()) {
1701 // If current record contains no user callchain, skip it.
1702 return true;
1703 }
1704 sample_record_count_++;
1705 }
1706 return record_file_writer_->WriteRecord(*record);
1707 }
1708
ProcessJITDebugInfo(std::vector<JITDebugInfo> debug_info,bool sync_kernel_records)1709 bool RecordCommand::ProcessJITDebugInfo(std::vector<JITDebugInfo> debug_info,
1710 bool sync_kernel_records) {
1711 for (auto& info : debug_info) {
1712 if (info.type == JITDebugInfo::JIT_DEBUG_JIT_CODE) {
1713 uint64_t timestamp =
1714 jit_debug_reader_->SyncWithRecords() ? info.timestamp : last_record_timestamp_;
1715 Mmap2Record record(dumping_attr_id_.attr, false, info.pid, info.pid, info.jit_code_addr,
1716 info.jit_code_len, info.file_offset, map_flags::PROT_JIT_SYMFILE_MAP,
1717 info.file_path, dumping_attr_id_.ids[0], timestamp);
1718 if (!ProcessRecord(&record)) {
1719 return false;
1720 }
1721 } else {
1722 if (!info.symbols.empty()) {
1723 Dso* dso = thread_tree_.FindUserDsoOrNew(info.file_path, 0, DSO_DEX_FILE);
1724 dso->SetSymbols(&info.symbols);
1725 }
1726 if (info.dex_file_map) {
1727 ThreadMmap& map = *info.dex_file_map;
1728 uint64_t timestamp =
1729 jit_debug_reader_->SyncWithRecords() ? info.timestamp : last_record_timestamp_;
1730 Mmap2Record record(dumping_attr_id_.attr, false, info.pid, info.pid, map.start_addr,
1731 map.len, map.pgoff, map.prot, map.name, dumping_attr_id_.ids[0],
1732 timestamp);
1733 if (!ProcessRecord(&record)) {
1734 return false;
1735 }
1736 }
1737 thread_tree_.AddDexFileOffset(info.file_path, info.dex_file_offset);
1738 }
1739 }
1740 // We want to let samples see the most recent JIT maps generated before them, but no JIT maps
1741 // generated after them. So process existing samples each time generating new JIT maps. We prefer
1742 // to process samples after processing JIT maps. Because some of the samples may hit the new JIT
1743 // maps, and we want to report them properly.
1744 if (sync_kernel_records && !event_selection_set_.SyncKernelBuffer()) {
1745 return false;
1746 }
1747 return true;
1748 }
1749
ProcessControlCmd(IOEventLoop * loop)1750 bool RecordCommand::ProcessControlCmd(IOEventLoop* loop) {
1751 char* line = nullptr;
1752 size_t line_length = 0;
1753 if (getline(&line, &line_length, stdin) == -1) {
1754 free(line);
1755 // When the simpleperf Java API destroys the simpleperf process, it also closes the stdin pipe.
1756 // So we may see EOF of stdin.
1757 return loop->ExitLoop();
1758 }
1759 std::string cmd = android::base::Trim(line);
1760 free(line);
1761 LOG(DEBUG) << "process control cmd: " << cmd;
1762 bool result = false;
1763 if (cmd == "pause") {
1764 result = event_selection_set_.SetEnableEvents(false);
1765 } else if (cmd == "resume") {
1766 result = event_selection_set_.SetEnableEvents(true);
1767 } else {
1768 LOG(ERROR) << "unknown control cmd: " << cmd;
1769 }
1770 printf("%s\n", result ? "ok" : "error");
1771 fflush(stdout);
1772 return result;
1773 }
1774
1775 template <class RecordType>
UpdateMmapRecordForEmbeddedPath(RecordType & r,bool has_prot,uint32_t prot)1776 void UpdateMmapRecordForEmbeddedPath(RecordType& r, bool has_prot, uint32_t prot) {
1777 if (r.InKernel()) {
1778 return;
1779 }
1780 std::string filename = r.filename;
1781 bool name_changed = false;
1782 // Some vdex files in map files are marked with deleted flag, but they exist in the file
1783 // system.
1784 // It may be because a new file is used to replace the old one, but still worth to try.
1785 if (android::base::EndsWith(filename, " (deleted)")) {
1786 filename.resize(filename.size() - 10);
1787 name_changed = true;
1788 }
1789 if (r.data->pgoff != 0 && (!has_prot || (prot & PROT_EXEC))) {
1790 // For the case of a shared library "foobar.so" embedded
1791 // inside an APK, we rewrite the original MMAP from
1792 // ["path.apk" offset=X] to ["path.apk!/foobar.so" offset=W]
1793 // so as to make the library name explicit. This update is
1794 // done here (as part of the record operation) as opposed to
1795 // on the host during the report, since we want to report
1796 // the correct library name even if the the APK in question
1797 // is not present on the host. The new offset W is
1798 // calculated to be with respect to the start of foobar.so,
1799 // not to the start of path.apk.
1800 EmbeddedElf* ee = ApkInspector::FindElfInApkByOffset(filename, r.data->pgoff);
1801 if (ee != nullptr) {
1802 // Compute new offset relative to start of elf in APK.
1803 auto data = *r.data;
1804 data.pgoff -= ee->entry_offset();
1805 r.SetDataAndFilename(data, GetUrlInApk(filename, ee->entry_name()));
1806 return;
1807 }
1808 }
1809 std::string zip_path;
1810 std::string entry_name;
1811 if (ParseExtractedInMemoryPath(filename, &zip_path, &entry_name)) {
1812 filename = GetUrlInApk(zip_path, entry_name);
1813 name_changed = true;
1814 }
1815 if (name_changed) {
1816 auto data = *r.data;
1817 r.SetDataAndFilename(data, filename);
1818 }
1819 }
1820
UpdateRecord(Record * record)1821 void RecordCommand::UpdateRecord(Record* record) {
1822 if (record->type() == PERF_RECORD_MMAP) {
1823 UpdateMmapRecordForEmbeddedPath(*static_cast<MmapRecord*>(record), false, 0);
1824 } else if (record->type() == PERF_RECORD_MMAP2) {
1825 auto r = static_cast<Mmap2Record*>(record);
1826 UpdateMmapRecordForEmbeddedPath(*r, true, r->data->prot);
1827 } else if (record->type() == PERF_RECORD_COMM) {
1828 auto r = static_cast<CommRecord*>(record);
1829 if (r->data->pid == r->data->tid) {
1830 std::string s = GetCompleteProcessName(r->data->pid);
1831 if (!s.empty()) {
1832 r->SetCommandName(s);
1833 }
1834 }
1835 }
1836 }
1837
UnwindRecord(SampleRecord & r)1838 bool RecordCommand::UnwindRecord(SampleRecord& r) {
1839 if (!(r.sample_type & PERF_SAMPLE_CALLCHAIN) && (r.sample_type & PERF_SAMPLE_REGS_USER) &&
1840 (r.regs_user_data.reg_mask != 0) && (r.sample_type & PERF_SAMPLE_STACK_USER)) {
1841 return true;
1842 }
1843 if (r.GetValidStackSize() > 0) {
1844 ThreadEntry* thread = thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
1845 RegSet regs(r.regs_user_data.abi, r.regs_user_data.reg_mask, r.regs_user_data.regs);
1846 std::vector<uint64_t> ips;
1847 std::vector<uint64_t> sps;
1848 if (!offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data,
1849 r.GetValidStackSize(), &ips, &sps)) {
1850 return false;
1851 }
1852 // The unwinding may fail if JIT debug info isn't the latest. In this case, read JIT debug info
1853 // from the process and retry unwinding.
1854 if (jit_debug_reader_ && !post_unwind_ &&
1855 offline_unwinder_->IsCallChainBrokenForIncompleteJITDebugInfo()) {
1856 jit_debug_reader_->ReadProcess(r.tid_data.pid);
1857 jit_debug_reader_->FlushDebugInfo(r.Timestamp());
1858 if (!offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data,
1859 r.GetValidStackSize(), &ips, &sps)) {
1860 return false;
1861 }
1862 }
1863 if (keep_failed_unwinding_result_ && !KeepFailedUnwindingResult(r, ips, sps)) {
1864 return false;
1865 }
1866 r.ReplaceRegAndStackWithCallChain(ips);
1867 if (callchain_joiner_ &&
1868 !callchain_joiner_->AddCallChain(r.tid_data.pid, r.tid_data.tid,
1869 CallChainJoiner::ORIGINAL_OFFLINE, ips, sps)) {
1870 return false;
1871 }
1872 } else {
1873 // For kernel samples, we still need to remove user stack and register fields.
1874 r.ReplaceRegAndStackWithCallChain({});
1875 }
1876 return true;
1877 }
1878
KeepFailedUnwindingResult(const SampleRecord & r,const std::vector<uint64_t> & ips,const std::vector<uint64_t> & sps)1879 bool RecordCommand::KeepFailedUnwindingResult(const SampleRecord& r,
1880 const std::vector<uint64_t>& ips,
1881 const std::vector<uint64_t>& sps) {
1882 auto& result = offline_unwinder_->GetUnwindingResult();
1883 if (result.error_code != unwindstack::ERROR_NONE) {
1884 if (keep_failed_unwinding_debug_info_) {
1885 return record_file_writer_->WriteRecord(UnwindingResultRecord(
1886 r.time_data.time, result, r.regs_user_data, r.stack_user_data, ips, sps));
1887 }
1888 return record_file_writer_->WriteRecord(
1889 UnwindingResultRecord(r.time_data.time, result, {}, {}, {}, {}));
1890 }
1891 return true;
1892 }
1893
MoveRecordFile(const std::string & old_filename)1894 std::unique_ptr<RecordFileReader> RecordCommand::MoveRecordFile(const std::string& old_filename) {
1895 if (!record_file_writer_->FinishWritingDataSection() || !record_file_writer_->Close()) {
1896 return nullptr;
1897 }
1898 record_file_writer_.reset();
1899 std::error_code ec;
1900 std::filesystem::rename(record_filename_, old_filename, ec);
1901 if (ec) {
1902 LOG(DEBUG) << "Failed to rename: " << ec.message();
1903 // rename() fails on Android N x86 emulator, which uses kernel 3.10. Because rename() in bionic
1904 // uses renameat2 syscall, which isn't support on kernel < 3.15. So add a fallback to mv
1905 // command. The mv command can also work with other situations when rename() doesn't work.
1906 // So we'd like to keep it as a fallback to rename().
1907 if (!Workload::RunCmd({"mv", record_filename_, old_filename})) {
1908 return nullptr;
1909 }
1910 }
1911
1912 auto reader = RecordFileReader::CreateInstance(old_filename);
1913 if (!reader) {
1914 return nullptr;
1915 }
1916
1917 record_file_writer_ = CreateRecordFile(record_filename_, reader->AttrSection());
1918 if (!record_file_writer_) {
1919 return nullptr;
1920 }
1921 return reader;
1922 }
1923
PostUnwindRecords()1924 bool RecordCommand::PostUnwindRecords() {
1925 auto tmp_file = ScopedTempFiles::CreateTempFile();
1926 auto reader = MoveRecordFile(tmp_file->path);
1927 if (!reader) {
1928 return false;
1929 }
1930 // Write new event attrs without regs and stacks fields.
1931 EventAttrIds attrs = reader->AttrSection();
1932 for (auto& attr : attrs) {
1933 ReplaceRegAndStackWithCallChain(attr.attr);
1934 }
1935 if (!record_file_writer_->WriteAttrSection(attrs)) {
1936 return false;
1937 }
1938
1939 sample_record_count_ = 0;
1940 auto callback = [this](std::unique_ptr<Record> record) {
1941 return SaveRecordAfterUnwinding(record.get());
1942 };
1943 return reader->ReadDataSection(callback);
1944 }
1945
JoinCallChains()1946 bool RecordCommand::JoinCallChains() {
1947 // 1. Prepare joined callchains.
1948 if (!callchain_joiner_->JoinCallChains()) {
1949 return false;
1950 }
1951 // 2. Move records from record_filename_ to a temporary file.
1952 auto tmp_file = ScopedTempFiles::CreateTempFile();
1953 auto reader = MoveRecordFile(tmp_file->path);
1954 if (!reader) {
1955 return false;
1956 }
1957
1958 // 3. Read records from the temporary file, and write record with joined call chains back
1959 // to record_filename_.
1960 auto record_callback = [&](std::unique_ptr<Record> r) {
1961 if (r->type() != PERF_RECORD_SAMPLE) {
1962 return record_file_writer_->WriteRecord(*r);
1963 }
1964 SampleRecord& sr = *static_cast<SampleRecord*>(r.get());
1965 if (!sr.HasUserCallChain()) {
1966 return record_file_writer_->WriteRecord(sr);
1967 }
1968 pid_t pid;
1969 pid_t tid;
1970 CallChainJoiner::ChainType type;
1971 std::vector<uint64_t> ips;
1972 std::vector<uint64_t> sps;
1973 if (!callchain_joiner_->GetNextCallChain(pid, tid, type, ips, sps)) {
1974 return false;
1975 }
1976 CHECK_EQ(type, CallChainJoiner::JOINED_OFFLINE);
1977 CHECK_EQ(pid, static_cast<pid_t>(sr.tid_data.pid));
1978 CHECK_EQ(tid, static_cast<pid_t>(sr.tid_data.tid));
1979 sr.UpdateUserCallChain(ips);
1980 return record_file_writer_->WriteRecord(sr);
1981 };
1982 return reader->ReadDataSection(record_callback);
1983 }
1984
LoadSymbolMapFile(int pid,const std::string & package,ThreadTree * thread_tree)1985 static void LoadSymbolMapFile(int pid, const std::string& package, ThreadTree* thread_tree) {
1986 // On Linux, symbol map files usually go to /tmp/perf-<pid>.map
1987 // On Android, there is no directory where any process can create files.
1988 // For now, use /data/local/tmp/perf-<pid>.map, which works for standalone programs,
1989 // and /data/data/<package>/perf-<pid>.map, which works for apps.
1990 auto path = package.empty()
1991 ? android::base::StringPrintf("/data/local/tmp/perf-%d.map", pid)
1992 : android::base::StringPrintf("/data/data/%s/perf-%d.map", package.c_str(), pid);
1993
1994 auto symbols = ReadSymbolMapFromFile(path);
1995 if (!symbols.empty()) {
1996 thread_tree->AddSymbolsForProcess(pid, &symbols);
1997 }
1998 }
1999
DumpAdditionalFeatures(const std::vector<std::string> & args)2000 bool RecordCommand::DumpAdditionalFeatures(const std::vector<std::string>& args) {
2001 // Read data section of perf.data to collect hit file information.
2002 thread_tree_.ClearThreadAndMap();
2003 bool kernel_symbols_available = false;
2004 std::string kallsyms;
2005 if (event_selection_set_.NeedKernelSymbol() && LoadKernelSymbols(&kallsyms)) {
2006 Dso::SetKallsyms(kallsyms);
2007 kernel_symbols_available = true;
2008 }
2009 std::unordered_set<int> loaded_symbol_maps;
2010 const std::vector<uint64_t>& auxtrace_offset = record_file_writer_->AuxTraceRecordOffsets();
2011 std::unordered_set<Dso*> debug_unwinding_files;
2012 bool failed_unwinding_sample = false;
2013
2014 auto callback = [&](const Record* r) {
2015 thread_tree_.Update(*r);
2016 if (r->type() == PERF_RECORD_SAMPLE) {
2017 auto sample = reinterpret_cast<const SampleRecord*>(r);
2018 // Symbol map files are available after recording. Load one for the process.
2019 if (loaded_symbol_maps.insert(sample->tid_data.pid).second) {
2020 LoadSymbolMapFile(sample->tid_data.pid, app_package_name_, &thread_tree_);
2021 }
2022 if (failed_unwinding_sample) {
2023 failed_unwinding_sample = false;
2024 CollectHitFileInfo(*sample, &debug_unwinding_files);
2025 } else {
2026 CollectHitFileInfo(*sample, nullptr);
2027 }
2028 } else if (r->type() == SIMPLE_PERF_RECORD_UNWINDING_RESULT) {
2029 failed_unwinding_sample = true;
2030 }
2031 };
2032
2033 if (map_record_thread_) {
2034 if (!map_record_thread_->Join()) {
2035 return false;
2036 }
2037 // If not dumping build id, we only need to read kernel maps, to dump kernel module addresses
2038 // in file feature section.
2039 if (!map_record_thread_->ReadMapRecords(callback, !dump_build_id_)) {
2040 return false;
2041 }
2042 }
2043
2044 // We don't need to read data section when recording ETM data and not need to dump build ids.
2045 bool read_data_section = true;
2046 if (event_selection_set_.HasAuxTrace() && !dump_build_id_) {
2047 read_data_section = false;
2048 }
2049
2050 if (read_data_section && !record_file_writer_->ReadDataSection(callback)) {
2051 return false;
2052 }
2053
2054 size_t feature_count = 5;
2055 if (dump_build_id_) {
2056 feature_count++;
2057 }
2058 if (branch_sampling_) {
2059 feature_count++;
2060 }
2061 if (!auxtrace_offset.empty()) {
2062 feature_count++;
2063 }
2064 if (keep_failed_unwinding_debug_info_) {
2065 feature_count += 2;
2066 }
2067 if (etm_branch_list_generator_) {
2068 feature_count++;
2069 }
2070 if (map_record_thread_) {
2071 feature_count++;
2072 }
2073 if (!record_file_writer_->BeginWriteFeatures(feature_count)) {
2074 return false;
2075 }
2076 if (dump_build_id_ && !DumpBuildIdFeature()) {
2077 return false;
2078 }
2079 if (!DumpFileFeature()) {
2080 return false;
2081 }
2082 utsname uname_buf;
2083 if (TEMP_FAILURE_RETRY(uname(&uname_buf)) != 0) {
2084 PLOG(ERROR) << "uname() failed";
2085 return false;
2086 }
2087 if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_OSRELEASE, uname_buf.release)) {
2088 return false;
2089 }
2090 if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_ARCH, uname_buf.machine)) {
2091 return false;
2092 }
2093
2094 std::string exec_path = android::base::GetExecutablePath();
2095 if (exec_path.empty()) exec_path = "simpleperf";
2096 std::vector<std::string> cmdline;
2097 cmdline.push_back(exec_path);
2098 cmdline.push_back("record");
2099 cmdline.insert(cmdline.end(), args.begin(), args.end());
2100 if (!record_file_writer_->WriteCmdlineFeature(cmdline)) {
2101 return false;
2102 }
2103 if (branch_sampling_ != 0 && !record_file_writer_->WriteBranchStackFeature()) {
2104 return false;
2105 }
2106 if (!DumpMetaInfoFeature(kernel_symbols_available)) {
2107 return false;
2108 }
2109 if (!auxtrace_offset.empty() && !record_file_writer_->WriteAuxTraceFeature(auxtrace_offset)) {
2110 return false;
2111 }
2112 if (keep_failed_unwinding_debug_info_ && !DumpDebugUnwindFeature(debug_unwinding_files)) {
2113 return false;
2114 }
2115 if (etm_branch_list_generator_ && !DumpETMBranchListFeature()) {
2116 return false;
2117 }
2118 if (map_record_thread_ && !DumpInitMapFeature()) {
2119 return false;
2120 }
2121
2122 if (!record_file_writer_->EndWriteFeatures()) {
2123 return false;
2124 }
2125 return true;
2126 }
2127
DumpBuildIdFeature()2128 bool RecordCommand::DumpBuildIdFeature() {
2129 std::vector<BuildIdRecord> build_id_records;
2130 BuildId build_id;
2131 std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
2132 for (Dso* dso : dso_v) {
2133 // For aux tracing, we don't know which binaries are traced.
2134 // So dump build ids for all binaries.
2135 if (!dso->HasDumpId() && !event_selection_set_.HasAuxTrace()) {
2136 continue;
2137 }
2138 if (GetBuildId(*dso, build_id)) {
2139 bool in_kernel = dso->type() == DSO_KERNEL || dso->type() == DSO_KERNEL_MODULE;
2140 build_id_records.emplace_back(in_kernel, UINT_MAX, build_id, dso->Path());
2141 }
2142 }
2143 if (!record_file_writer_->WriteBuildIdFeature(build_id_records)) {
2144 return false;
2145 }
2146 return true;
2147 }
2148
DumpFileFeature()2149 bool RecordCommand::DumpFileFeature() {
2150 std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
2151 // To parse ETM data for kernel modules, we need to dump memory address for kernel modules.
2152 if (event_selection_set_.HasAuxTrace() && !event_selection_set_.ExcludeKernel()) {
2153 for (Dso* dso : dso_v) {
2154 if (dso->type() == DSO_KERNEL_MODULE) {
2155 dso->CreateDumpId();
2156 }
2157 }
2158 }
2159 return record_file_writer_->WriteFileFeatures(dso_v);
2160 }
2161
DumpMetaInfoFeature(bool kernel_symbols_available)2162 bool RecordCommand::DumpMetaInfoFeature(bool kernel_symbols_available) {
2163 std::unordered_map<std::string, std::string> info_map = extra_meta_info_;
2164 info_map["simpleperf_version"] = GetSimpleperfVersion();
2165 info_map["system_wide_collection"] = system_wide_collection_ ? "true" : "false";
2166 info_map["trace_offcpu"] = trace_offcpu_ ? "true" : "false";
2167 // By storing event types information in perf.data, the readers of perf.data have the same
2168 // understanding of event types, even if they are on another machine.
2169 info_map["event_type_info"] = ScopedEventTypes::BuildString(event_selection_set_.GetEvents());
2170 #if defined(__ANDROID__)
2171 info_map["product_props"] = android::base::StringPrintf(
2172 "%s:%s:%s", android::base::GetProperty("ro.product.manufacturer", "").c_str(),
2173 android::base::GetProperty("ro.product.model", "").c_str(),
2174 android::base::GetProperty("ro.product.name", "").c_str());
2175 info_map["android_version"] = android::base::GetProperty("ro.build.version.release", "");
2176 info_map["android_sdk_version"] = android::base::GetProperty("ro.build.version.sdk", "");
2177 info_map["android_build_type"] = android::base::GetProperty("ro.build.type", "");
2178 info_map["android_build_fingerprint"] = android::base::GetProperty("ro.build.fingerprint", "");
2179 utsname un;
2180 if (uname(&un) == 0) {
2181 info_map["kernel_version"] = un.release;
2182 }
2183 if (!app_package_name_.empty()) {
2184 info_map["app_package_name"] = app_package_name_;
2185 if (IsRoot()) {
2186 info_map["app_type"] = GetAppType(app_package_name_);
2187 }
2188 }
2189 if (event_selection_set_.HasAuxTrace()) {
2190 // used by --exclude-perf in cmd_inject.cpp
2191 info_map["recording_process"] = std::to_string(getpid());
2192 }
2193 #endif
2194 info_map["clockid"] = clockid_;
2195 info_map["timestamp"] = std::to_string(time(nullptr));
2196 info_map["kernel_symbols_available"] = kernel_symbols_available ? "true" : "false";
2197 if (dwarf_callchain_sampling_ && !unwind_dwarf_callchain_) {
2198 OfflineUnwinder::CollectMetaInfo(&info_map);
2199 }
2200 auto record_stat = event_selection_set_.GetRecordStat();
2201 info_map["record_stat"] = android::base::StringPrintf(
2202 "sample_record_count=%" PRIu64
2203 ",kernelspace_lost_records=%zu,userspace_lost_samples=%zu,"
2204 "userspace_lost_non_samples=%zu,userspace_truncated_stack_samples=%zu",
2205 sample_record_count_, record_stat.kernelspace_lost_records,
2206 record_stat.userspace_lost_samples, record_stat.userspace_lost_non_samples,
2207 record_stat.userspace_truncated_stack_samples);
2208
2209 return record_file_writer_->WriteMetaInfoFeature(info_map);
2210 }
2211
DumpDebugUnwindFeature(const std::unordered_set<Dso * > & dso_set)2212 bool RecordCommand::DumpDebugUnwindFeature(const std::unordered_set<Dso*>& dso_set) {
2213 DebugUnwindFeature debug_unwind_feature;
2214 debug_unwind_feature.reserve(dso_set.size());
2215 for (const Dso* dso : dso_set) {
2216 if (dso->type() != DSO_ELF_FILE) {
2217 continue;
2218 }
2219 const std::string& filename = dso->GetDebugFilePath();
2220 std::unique_ptr<ElfFile> elf = ElfFile::Open(filename);
2221 if (elf) {
2222 llvm::MemoryBuffer* buffer = elf->GetMemoryBuffer();
2223 debug_unwind_feature.resize(debug_unwind_feature.size() + 1);
2224 auto& debug_unwind_file = debug_unwind_feature.back();
2225 debug_unwind_file.path = filename;
2226 debug_unwind_file.size = buffer->getBufferSize();
2227 if (!record_file_writer_->WriteFeature(PerfFileFormat::FEAT_DEBUG_UNWIND_FILE,
2228 buffer->getBufferStart(), buffer->getBufferSize())) {
2229 return false;
2230 }
2231 } else {
2232 LOG(WARNING) << "failed to keep " << filename << " in debug_unwind_feature section";
2233 }
2234 }
2235 return record_file_writer_->WriteDebugUnwindFeature(debug_unwind_feature);
2236 }
2237
CollectHitFileInfo(const SampleRecord & r,std::unordered_set<Dso * > * dso_set)2238 void RecordCommand::CollectHitFileInfo(const SampleRecord& r, std::unordered_set<Dso*>* dso_set) {
2239 const ThreadEntry* thread = thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
2240 size_t kernel_ip_count;
2241 std::vector<uint64_t> ips = r.GetCallChain(&kernel_ip_count);
2242 if ((r.sample_type & PERF_SAMPLE_BRANCH_STACK) != 0) {
2243 for (uint64_t i = 0; i < r.branch_stack_data.stack_nr; ++i) {
2244 const auto& item = r.branch_stack_data.stack[i];
2245 ips.push_back(item.from);
2246 ips.push_back(item.to);
2247 }
2248 }
2249 for (size_t i = 0; i < ips.size(); i++) {
2250 const MapEntry* map = thread_tree_.FindMap(thread, ips[i], i < kernel_ip_count);
2251 Dso* dso = map->dso;
2252 if (dump_symbols_) {
2253 const Symbol* symbol = thread_tree_.FindSymbol(map, ips[i], nullptr, &dso);
2254 if (!symbol->HasDumpId()) {
2255 dso->CreateSymbolDumpId(symbol);
2256 }
2257 }
2258 if (!dso->HasDumpId() && dso->type() != DSO_UNKNOWN_FILE) {
2259 dso->CreateDumpId();
2260 }
2261 if (dso_set != nullptr) {
2262 dso_set->insert(dso);
2263 }
2264 }
2265 }
2266
DumpETMBranchListFeature()2267 bool RecordCommand::DumpETMBranchListFeature() {
2268 ETMBinaryMap binary_map = etm_branch_list_generator_->GetETMBinaryMap();
2269 std::string s;
2270 if (!ETMBinaryMapToString(binary_map, s)) {
2271 return false;
2272 }
2273 return record_file_writer_->WriteFeature(PerfFileFormat::FEAT_ETM_BRANCH_LIST, s.data(),
2274 s.size());
2275 }
2276
DumpInitMapFeature()2277 bool RecordCommand::DumpInitMapFeature() {
2278 if (!map_record_thread_->Join()) {
2279 return false;
2280 }
2281 auto callback = [&](const char* data, size_t size) {
2282 return record_file_writer_->WriteInitMapFeature(data, size);
2283 };
2284 return map_record_thread_->ReadMapRecordData(callback) &&
2285 record_file_writer_->FinishWritingInitMapFeature();
2286 }
2287
2288 } // namespace
2289
ConsumeStr(const char * & p,const char * s)2290 static bool ConsumeStr(const char*& p, const char* s) {
2291 if (strncmp(p, s, strlen(s)) == 0) {
2292 p += strlen(s);
2293 return true;
2294 }
2295 return false;
2296 }
2297
ConsumeAddr(const char * & p,uint64_t * addr)2298 static bool ConsumeAddr(const char*& p, uint64_t* addr) {
2299 errno = 0;
2300 char* end;
2301 *addr = strtoull(p, &end, 0);
2302 if (errno == 0 && p != end) {
2303 p = end;
2304 return true;
2305 }
2306 return false;
2307 }
2308
2309 // To reduce function length, not all format errors are checked.
ParseOneAddrFilter(const std::string & s,std::vector<AddrFilter> * filters)2310 static bool ParseOneAddrFilter(const std::string& s, std::vector<AddrFilter>* filters) {
2311 std::vector<std::string> args = android::base::Split(s, " ");
2312 if (args.size() != 2) {
2313 return false;
2314 }
2315
2316 uint64_t addr1;
2317 uint64_t addr2;
2318 uint64_t off1;
2319 uint64_t off2;
2320 std::string path;
2321
2322 if (auto p = s.data(); ConsumeStr(p, "start") && ConsumeAddr(p, &addr1)) {
2323 if (*p == '\0') {
2324 // start <kernel_addr>
2325 filters->emplace_back(AddrFilter::KERNEL_START, addr1, 0, "");
2326 return true;
2327 }
2328 if (ConsumeStr(p, "@") && *p != '\0') {
2329 // start <vaddr>@<file_path>
2330 if (auto elf = ElfFile::Open(p); elf && elf->VaddrToOff(addr1, &off1) && Realpath(p, &path)) {
2331 filters->emplace_back(AddrFilter::FILE_START, off1, 0, path);
2332 return true;
2333 }
2334 }
2335 }
2336 if (auto p = s.data(); ConsumeStr(p, "stop") && ConsumeAddr(p, &addr1)) {
2337 if (*p == '\0') {
2338 // stop <kernel_addr>
2339 filters->emplace_back(AddrFilter::KERNEL_STOP, addr1, 0, "");
2340 return true;
2341 }
2342 if (ConsumeStr(p, "@") && *p != '\0') {
2343 // stop <vaddr>@<file_path>
2344 if (auto elf = ElfFile::Open(p); elf && elf->VaddrToOff(addr1, &off1) && Realpath(p, &path)) {
2345 filters->emplace_back(AddrFilter::FILE_STOP, off1, 0, path);
2346 return true;
2347 }
2348 }
2349 }
2350 if (auto p = s.data(); ConsumeStr(p, "filter") && ConsumeAddr(p, &addr1) && ConsumeStr(p, "-") &&
2351 ConsumeAddr(p, &addr2)) {
2352 if (*p == '\0') {
2353 // filter <kernel_addr_start>-<kernel_addr_end>
2354 filters->emplace_back(AddrFilter::KERNEL_RANGE, addr1, addr2 - addr1, "");
2355 return true;
2356 }
2357 if (ConsumeStr(p, "@") && *p != '\0') {
2358 // filter <vaddr_start>-<vaddr_end>@<file_path>
2359 if (auto elf = ElfFile::Open(p); elf && elf->VaddrToOff(addr1, &off1) &&
2360 elf->VaddrToOff(addr2, &off2) && Realpath(p, &path)) {
2361 filters->emplace_back(AddrFilter::FILE_RANGE, off1, off2 - off1, path);
2362 return true;
2363 }
2364 }
2365 }
2366 if (auto p = s.data(); ConsumeStr(p, "filter") && *p != '\0') {
2367 // filter <file_path>
2368 path = android::base::Trim(p);
2369 if (auto elf = ElfFile::Open(path); elf) {
2370 for (const ElfSegment& seg : elf->GetProgramHeader()) {
2371 if (seg.is_executable) {
2372 filters->emplace_back(AddrFilter::FILE_RANGE, seg.file_offset, seg.file_size, path);
2373 }
2374 }
2375 return true;
2376 }
2377 }
2378 return false;
2379 }
2380
ParseAddrFilterOption(const std::string & s)2381 std::vector<AddrFilter> ParseAddrFilterOption(const std::string& s) {
2382 std::vector<AddrFilter> filters;
2383 for (const auto& str : android::base::Split(s, ",")) {
2384 if (!ParseOneAddrFilter(str, &filters)) {
2385 LOG(ERROR) << "failed to parse addr filter: " << str;
2386 return {};
2387 }
2388 }
2389 return filters;
2390 }
2391
RegisterRecordCommand()2392 void RegisterRecordCommand() {
2393 RegisterCommand("record", [] { return std::unique_ptr<Command>(new RecordCommand()); });
2394 }
2395
2396 } // namespace simpleperf
2397