1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/traced/probes/ftrace/ftrace_controller.h"
18
19 #include <fcntl.h>
20 #include <poll.h>
21 #include <string.h>
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <sys/utsname.h>
25 #include <sys/wait.h>
26 #include <unistd.h>
27 #include <cstdint>
28
29 #include <limits>
30 #include <memory>
31 #include <optional>
32 #include <string>
33 #include <tuple>
34 #include <utility>
35
36 #include "perfetto/base/build_config.h"
37 #include "perfetto/base/logging.h"
38 #include "perfetto/base/time.h"
39 #include "perfetto/ext/base/file_utils.h"
40 #include "perfetto/ext/base/metatrace.h"
41 #include "perfetto/ext/base/scoped_file.h"
42 #include "perfetto/ext/base/string_splitter.h"
43 #include "perfetto/ext/base/string_utils.h"
44 #include "perfetto/ext/tracing/core/trace_writer.h"
45 #include "src/kallsyms/kernel_symbol_map.h"
46 #include "src/kallsyms/lazy_kernel_symbolizer.h"
47 #include "src/traced/probes/ftrace/atrace_hal_wrapper.h"
48 #include "src/traced/probes/ftrace/cpu_reader.h"
49 #include "src/traced/probes/ftrace/cpu_stats_parser.h"
50 #include "src/traced/probes/ftrace/event_info.h"
51 #include "src/traced/probes/ftrace/event_info_constants.h"
52 #include "src/traced/probes/ftrace/ftrace_config_muxer.h"
53 #include "src/traced/probes/ftrace/ftrace_config_utils.h"
54 #include "src/traced/probes/ftrace/ftrace_data_source.h"
55 #include "src/traced/probes/ftrace/ftrace_metadata.h"
56 #include "src/traced/probes/ftrace/ftrace_procfs.h"
57 #include "src/traced/probes/ftrace/ftrace_stats.h"
58 #include "src/traced/probes/ftrace/proto_translation_table.h"
59 #include "src/traced/probes/ftrace/vendor_tracepoints.h"
60
61 namespace perfetto {
62 namespace {
63
64 constexpr uint32_t kDefaultTickPeriodMs = 100;
65 constexpr uint32_t kPollBackingTickPeriodMs = 1000;
66 constexpr uint32_t kMinTickPeriodMs = 1;
67 constexpr uint32_t kMaxTickPeriodMs = 1000 * 60;
68 constexpr int kPollRequiredMajorVersion = 6;
69 constexpr int kPollRequiredMinorVersion = 9;
70
71 // Read at most this many pages of data per cpu per read task. If we hit this
72 // limit on at least one cpu, we stop and repost the read task, letting other
73 // tasks get some cpu time before continuing reading.
74 constexpr size_t kMaxPagesPerCpuPerReadTick = 256; // 1 MB per cpu
75
WriteToFile(const char * path,const char * str)76 bool WriteToFile(const char* path, const char* str) {
77 auto fd = base::OpenFile(path, O_WRONLY);
78 if (!fd)
79 return false;
80 const size_t str_len = strlen(str);
81 return base::WriteAll(*fd, str, str_len) == static_cast<ssize_t>(str_len);
82 }
83
ClearFile(const char * path)84 bool ClearFile(const char* path) {
85 auto fd = base::OpenFile(path, O_WRONLY | O_TRUNC);
86 return !!fd;
87 }
88
ReadFtraceNowTs(const base::ScopedFile & cpu_stats_fd)89 std::optional<int64_t> ReadFtraceNowTs(const base::ScopedFile& cpu_stats_fd) {
90 PERFETTO_CHECK(cpu_stats_fd);
91
92 char buf[512];
93 ssize_t res = PERFETTO_EINTR(pread(*cpu_stats_fd, buf, sizeof(buf) - 1, 0));
94 if (res <= 0)
95 return std::nullopt;
96 buf[res] = '\0';
97
98 FtraceCpuStats stats{};
99 DumpCpuStats(buf, &stats);
100 return static_cast<int64_t>(stats.now_ts * 1000 * 1000 * 1000);
101 }
102
GetAtraceVendorEvents(FtraceProcfs * tracefs)103 std::map<std::string, std::vector<GroupAndName>> GetAtraceVendorEvents(
104 FtraceProcfs* tracefs) {
105 #if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
106 if (base::FileExists(vendor_tracepoints::kCategoriesFile)) {
107 std::map<std::string, std::vector<GroupAndName>> vendor_evts;
108 base::Status status =
109 vendor_tracepoints::DiscoverAccessibleVendorTracepointsWithFile(
110 vendor_tracepoints::kCategoriesFile, &vendor_evts, tracefs);
111 if (!status.ok()) {
112 PERFETTO_ELOG("Cannot load vendor categories: %s", status.c_message());
113 }
114 return vendor_evts;
115 } else {
116 AtraceHalWrapper hal;
117 return vendor_tracepoints::DiscoverVendorTracepointsWithHal(&hal, tracefs);
118 }
119 #else
120 base::ignore_result(tracefs);
121 return {};
122 #endif
123 }
124
125 struct AndroidGkiVersion {
126 uint64_t version = 0;
127 uint64_t patch_level = 0;
128 uint64_t sub_level = 0;
129 uint64_t release = 0;
130 uint64_t kmi_gen = 0;
131 };
132
133 #define ANDROID_GKI_UNAME_FMT \
134 "%" PRIu64 ".%" PRIu64 ".%" PRIu64 "-android%" PRIu64 "-%" PRIu64
135
ParseAndroidGkiVersion(const char * s)136 std::optional<AndroidGkiVersion> ParseAndroidGkiVersion(const char* s) {
137 AndroidGkiVersion v = {};
138 if (sscanf(s, ANDROID_GKI_UNAME_FMT, &v.version, &v.patch_level, &v.sub_level,
139 &v.release, &v.kmi_gen) != 5) {
140 return std::nullopt;
141 }
142 return v;
143 }
144
145 } // namespace
146
147 // Method of last resort to reset ftrace state.
148 // We don't know what state the rest of the system and process is so as far
149 // as possible avoid allocations.
HardResetFtraceState()150 bool HardResetFtraceState() {
151 for (const char* const* item = FtraceProcfs::kTracingPaths; *item; ++item) {
152 std::string prefix(*item);
153 PERFETTO_CHECK(base::EndsWith(prefix, "/"));
154 bool res = true;
155 res &= WriteToFile((prefix + "tracing_on").c_str(), "0");
156 res &= WriteToFile((prefix + "buffer_size_kb").c_str(), "4");
157 // Not checking success because these files might not be accessible on
158 // older or release builds of Android:
159 WriteToFile((prefix + "events/enable").c_str(), "0");
160 WriteToFile((prefix + "events/raw_syscalls/filter").c_str(), "0");
161 WriteToFile((prefix + "current_tracer").c_str(), "nop");
162 res &= ClearFile((prefix + "trace").c_str());
163 if (res)
164 return true;
165 }
166 return false;
167 }
168
169 // static
Create(base::TaskRunner * runner,Observer * observer)170 std::unique_ptr<FtraceController> FtraceController::Create(
171 base::TaskRunner* runner,
172 Observer* observer) {
173 std::unique_ptr<FtraceProcfs> ftrace_procfs =
174 FtraceProcfs::CreateGuessingMountPoint("");
175 if (!ftrace_procfs)
176 return nullptr;
177
178 std::unique_ptr<ProtoTranslationTable> table = ProtoTranslationTable::Create(
179 ftrace_procfs.get(), GetStaticEventInfo(), GetStaticCommonFieldsInfo());
180 if (!table)
181 return nullptr;
182
183 auto atrace_wrapper = std::make_unique<AtraceWrapperImpl>();
184
185 std::map<std::string, std::vector<GroupAndName>> vendor_evts =
186 GetAtraceVendorEvents(ftrace_procfs.get());
187
188 SyscallTable syscalls = SyscallTable::FromCurrentArch();
189
190 auto muxer = std::make_unique<FtraceConfigMuxer>(
191 ftrace_procfs.get(), atrace_wrapper.get(), table.get(),
192 std::move(syscalls), vendor_evts);
193 return std::unique_ptr<FtraceController>(new FtraceController(
194 std::move(ftrace_procfs), std::move(table), std::move(atrace_wrapper),
195 std::move(muxer), runner, observer));
196 }
197
FtraceController(std::unique_ptr<FtraceProcfs> ftrace_procfs,std::unique_ptr<ProtoTranslationTable> table,std::unique_ptr<AtraceWrapper> atrace_wrapper,std::unique_ptr<FtraceConfigMuxer> muxer,base::TaskRunner * task_runner,Observer * observer)198 FtraceController::FtraceController(
199 std::unique_ptr<FtraceProcfs> ftrace_procfs,
200 std::unique_ptr<ProtoTranslationTable> table,
201 std::unique_ptr<AtraceWrapper> atrace_wrapper,
202 std::unique_ptr<FtraceConfigMuxer> muxer,
203 base::TaskRunner* task_runner,
204 Observer* observer)
205 : task_runner_(task_runner),
206 observer_(observer),
207 atrace_wrapper_(std::move(atrace_wrapper)),
208 primary_(std::move(ftrace_procfs), std::move(table), std::move(muxer)),
209 weak_factory_(this) {}
210
~FtraceController()211 FtraceController::~FtraceController() {
212 while (!data_sources_.empty()) {
213 RemoveDataSource(*data_sources_.begin());
214 }
215 PERFETTO_DCHECK(data_sources_.empty());
216 PERFETTO_DCHECK(primary_.started_data_sources.empty());
217 PERFETTO_DCHECK(primary_.cpu_readers.empty());
218 PERFETTO_DCHECK(secondary_instances_.empty());
219 }
220
NowMs() const221 uint64_t FtraceController::NowMs() const {
222 return static_cast<uint64_t>(base::GetWallTimeMs().count());
223 }
224
225 template <typename F>
ForEachInstance(F fn)226 void FtraceController::ForEachInstance(F fn) {
227 fn(&primary_);
228 for (auto& kv : secondary_instances_) {
229 fn(kv.second.get());
230 }
231 }
232
StartIfNeeded(FtraceInstanceState * instance,const std::string & instance_name)233 void FtraceController::StartIfNeeded(FtraceInstanceState* instance,
234 const std::string& instance_name) {
235 if (buffer_watermark_support_ == PollSupport::kUntested) {
236 buffer_watermark_support_ = VerifyKernelSupportForBufferWatermark();
237 }
238
239 // If instance is already active, then at most we need to update the buffer
240 // poll callbacks. The periodic |ReadTick| will pick up any updates to the
241 // period the next time it executes.
242 if (instance->started_data_sources.size() > 1) {
243 UpdateBufferWatermarkWatches(instance, instance_name);
244 return;
245 }
246
247 // Lazily allocate the memory used for reading & parsing ftrace. In the case
248 // of multiple ftrace instances, this might already be valid.
249 parsing_mem_.AllocateIfNeeded();
250
251 const auto ftrace_clock = instance->ftrace_config_muxer->ftrace_clock();
252 size_t num_cpus = instance->ftrace_procfs->NumberOfCpus();
253 PERFETTO_CHECK(instance->cpu_readers.empty());
254 instance->cpu_readers.reserve(num_cpus);
255 for (size_t cpu = 0; cpu < num_cpus; cpu++) {
256 instance->cpu_readers.emplace_back(
257 cpu, instance->ftrace_procfs->OpenPipeForCpu(cpu),
258 instance->table.get(), &symbolizer_, ftrace_clock,
259 &ftrace_clock_snapshot_);
260 }
261
262 // Special case for primary instance: if not using the boot clock, take
263 // manual clock snapshots so that the trace parser can do a best effort
264 // conversion back to boot. This is primarily for old kernels that predate
265 // boot support, and therefore default to "global" clock.
266 if (instance == &primary_ &&
267 ftrace_clock != protos::pbzero::FtraceClock::FTRACE_CLOCK_UNSPECIFIED) {
268 cpu_zero_stats_fd_ = primary_.ftrace_procfs->OpenCpuStats(0 /* cpu */);
269 MaybeSnapshotFtraceClock();
270 }
271
272 // Set up poll callbacks for the buffers if requested by at least one DS.
273 UpdateBufferWatermarkWatches(instance, instance_name);
274
275 // Start a new repeating read task (even if there is already one posted due
276 // to a different ftrace instance). Any old tasks will stop due to generation
277 // checks.
278 auto generation = ++tick_generation_;
279 auto tick_period_ms = GetTickPeriodMs();
280 auto weak_this = weak_factory_.GetWeakPtr();
281 task_runner_->PostDelayedTask(
282 [weak_this, generation] {
283 if (weak_this)
284 weak_this->ReadTick(generation);
285 },
286 tick_period_ms - (NowMs() % tick_period_ms));
287 }
288
289 // We handle the ftrace buffers in a repeating task (ReadTick). On a given tick,
290 // we iterate over all per-cpu buffers, parse their contents, and then write out
291 // the serialized packets. This is handled by |CpuReader| instances, which
292 // attempt to read from their respective per-cpu buffer fd until they catch up
293 // to the head of the buffer, or hit a transient error.
294 //
295 // The readers work in batches of |kParsingBufferSizePages| pages for cache
296 // locality, and to limit memory usage.
297 //
298 // However, the reading happens on the primary thread, shared with the rest of
299 // the service (including ipc). If there is a lot of ftrace data to read, we
300 // want to yield to the event loop, re-enqueueing a continuation task at the end
301 // of the immediate queue (letting other enqueued tasks to run before
302 // continuing). Therefore we introduce |kMaxPagesPerCpuPerReadTick|.
ReadTick(int generation)303 void FtraceController::ReadTick(int generation) {
304 metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
305 metatrace::FTRACE_READ_TICK);
306 if (generation != tick_generation_ || GetStartedDataSourcesCount() == 0) {
307 return;
308 }
309 MaybeSnapshotFtraceClock();
310
311 // Read all per-cpu buffers.
312 bool all_cpus_done = true;
313 ForEachInstance([&](FtraceInstanceState* instance) {
314 all_cpus_done &= ReadPassForInstance(instance);
315 });
316 observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
317
318 auto weak_this = weak_factory_.GetWeakPtr();
319 if (!all_cpus_done) {
320 PERFETTO_DLOG("Reposting immediate ReadTick as there's more work.");
321 task_runner_->PostTask([weak_this, generation] {
322 if (weak_this)
323 weak_this->ReadTick(generation);
324 });
325 } else {
326 // Done until next period.
327 auto tick_period_ms = GetTickPeriodMs();
328 task_runner_->PostDelayedTask(
329 [weak_this, generation] {
330 if (weak_this)
331 weak_this->ReadTick(generation);
332 },
333 tick_period_ms - (NowMs() % tick_period_ms));
334 }
335
336 #if PERFETTO_DCHECK_IS_ON()
337 // OnFtraceDataWrittenIntoDataSourceBuffers() is supposed to clear
338 // all metadata, including the |kernel_addrs| map for symbolization.
339 ForEachInstance([&](FtraceInstanceState* instance) {
340 for (FtraceDataSource* ds : instance->started_data_sources) {
341 FtraceMetadata* ftrace_metadata = ds->mutable_metadata();
342 PERFETTO_DCHECK(ftrace_metadata->kernel_addrs.empty());
343 PERFETTO_DCHECK(ftrace_metadata->last_kernel_addr_index_written == 0);
344 }
345 });
346 #endif
347 }
348
ReadPassForInstance(FtraceInstanceState * instance)349 bool FtraceController::ReadPassForInstance(FtraceInstanceState* instance) {
350 if (instance->started_data_sources.empty())
351 return true;
352
353 bool all_cpus_done = true;
354 for (size_t i = 0; i < instance->cpu_readers.size(); i++) {
355 size_t max_pages = kMaxPagesPerCpuPerReadTick;
356 size_t pages_read = instance->cpu_readers[i].ReadCycle(
357 &parsing_mem_, max_pages, instance->started_data_sources);
358 PERFETTO_DCHECK(pages_read <= max_pages);
359 if (pages_read == max_pages) {
360 all_cpus_done = false;
361 }
362 }
363 return all_cpus_done;
364 }
365
GetTickPeriodMs()366 uint32_t FtraceController::GetTickPeriodMs() {
367 if (data_sources_.empty())
368 return kDefaultTickPeriodMs;
369 uint32_t kUnsetPeriod = std::numeric_limits<uint32_t>::max();
370 uint32_t min_period_ms = kUnsetPeriod;
371 bool using_poll = true;
372 ForEachInstance([&](FtraceInstanceState* instance) {
373 using_poll &= instance->buffer_watches_posted;
374 for (FtraceDataSource* ds : instance->started_data_sources) {
375 if (ds->config().has_drain_period_ms()) {
376 min_period_ms = std::min(min_period_ms, ds->config().drain_period_ms());
377 }
378 }
379 });
380
381 // None of the active data sources requested an explicit tick period.
382 // The historical default is 100ms, but if we know that all instances are also
383 // using buffer watermark polling, we can raise it. We don't disable the tick
384 // entirely as it spreads the read work more evenly, and ensures procfs
385 // scrapes of seen TIDs are not too stale.
386 if (min_period_ms == kUnsetPeriod) {
387 return using_poll ? kPollBackingTickPeriodMs : kDefaultTickPeriodMs;
388 }
389
390 if (min_period_ms < kMinTickPeriodMs || min_period_ms > kMaxTickPeriodMs) {
391 PERFETTO_LOG(
392 "drain_period_ms was %u should be between %u and %u. "
393 "Falling back onto a default.",
394 min_period_ms, kMinTickPeriodMs, kMaxTickPeriodMs);
395 return kDefaultTickPeriodMs;
396 }
397 return min_period_ms;
398 }
399
UpdateBufferWatermarkWatches(FtraceInstanceState * instance,const std::string & instance_name)400 void FtraceController::UpdateBufferWatermarkWatches(
401 FtraceInstanceState* instance,
402 const std::string& instance_name) {
403 PERFETTO_DCHECK(buffer_watermark_support_ != PollSupport::kUntested);
404 if (buffer_watermark_support_ == PollSupport::kUnsupported)
405 return;
406
407 bool requested_poll = false;
408 for (const FtraceDataSource* ds : instance->started_data_sources) {
409 requested_poll |= ds->config().has_drain_buffer_percent();
410 }
411
412 if (!requested_poll || instance->buffer_watches_posted)
413 return;
414
415 auto weak_this = weak_factory_.GetWeakPtr();
416 for (size_t i = 0; i < instance->cpu_readers.size(); i++) {
417 int fd = instance->cpu_readers[i].RawBufferFd();
418 task_runner_->AddFileDescriptorWatch(fd, [weak_this, instance_name, i] {
419 if (weak_this)
420 weak_this->OnBufferPastWatermark(instance_name, i,
421 /*repoll_watermark=*/true);
422 });
423 }
424 instance->buffer_watches_posted = true;
425 }
426
RemoveBufferWatermarkWatches(FtraceInstanceState * instance)427 void FtraceController::RemoveBufferWatermarkWatches(
428 FtraceInstanceState* instance) {
429 if (!instance->buffer_watches_posted)
430 return;
431
432 for (size_t i = 0; i < instance->cpu_readers.size(); i++) {
433 int fd = instance->cpu_readers[i].RawBufferFd();
434 task_runner_->RemoveFileDescriptorWatch(fd);
435 }
436 instance->buffer_watches_posted = false;
437 }
438
439 // TODO(rsavitski): consider calling OnFtraceData only if we're not reposting
440 // a continuation. It's a tradeoff between procfs scrape freshness and urgency
441 // to drain ftrace kernel buffers.
OnBufferPastWatermark(std::string instance_name,size_t cpu,bool repoll_watermark)442 void FtraceController::OnBufferPastWatermark(std::string instance_name,
443 size_t cpu,
444 bool repoll_watermark) {
445 metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
446 metatrace::FTRACE_CPU_BUFFER_WATERMARK);
447
448 // Instance might have been stopped before this callback runs.
449 FtraceInstanceState* instance = GetInstance(instance_name);
450 if (!instance || cpu >= instance->cpu_readers.size())
451 return;
452
453 // Repoll all per-cpu buffers with zero timeout to confirm that at least
454 // one is still past the watermark. This might not be true if a different
455 // callback / readtick / flush did a read pass before this callback reached
456 // the front of the task runner queue.
457 if (repoll_watermark) {
458 size_t num_cpus = instance->cpu_readers.size();
459 std::vector<struct pollfd> pollfds(num_cpus);
460 for (size_t i = 0; i < num_cpus; i++) {
461 pollfds[i].fd = instance->cpu_readers[i].RawBufferFd();
462 pollfds[i].events = POLLIN;
463 }
464 int r = PERFETTO_EINTR(poll(pollfds.data(), num_cpus, 0));
465 if (r < 0) {
466 PERFETTO_DPLOG("poll failed");
467 return;
468 } else if (r == 0) { // no buffers below the watermark -> we're done.
469 return;
470 }
471 // Count the number of readable fds, as some poll results might be POLLERR,
472 // as seen in cases with offlined cores. It's still fine to attempt reading
473 // from those buffers as CpuReader will handle the ENODEV.
474 bool has_readable_fd = false;
475 for (size_t i = 0; i < num_cpus; i++) {
476 has_readable_fd |= (pollfds[i].revents & POLLIN);
477 }
478 if (!has_readable_fd) {
479 return;
480 }
481 }
482
483 MaybeSnapshotFtraceClock();
484 bool all_cpus_done = ReadPassForInstance(instance);
485 observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
486 if (!all_cpus_done) {
487 // More data to be read, but we want to let other task_runner tasks to run.
488 // Repost a continuation task.
489 auto weak_this = weak_factory_.GetWeakPtr();
490 task_runner_->PostTask([weak_this, instance_name, cpu] {
491 if (weak_this)
492 weak_this->OnBufferPastWatermark(instance_name, cpu,
493 /*repoll_watermark=*/false);
494 });
495 }
496 }
497
Flush(FlushRequestID flush_id)498 void FtraceController::Flush(FlushRequestID flush_id) {
499 metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
500 metatrace::FTRACE_CPU_FLUSH);
501
502 ForEachInstance([&](FtraceInstanceState* instance) { // for clang-format
503 FlushForInstance(instance);
504 });
505 observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
506
507 ForEachInstance([&](FtraceInstanceState* instance) {
508 for (FtraceDataSource* ds : instance->started_data_sources) {
509 ds->OnFtraceFlushComplete(flush_id);
510 }
511 });
512 }
513
FlushForInstance(FtraceInstanceState * instance)514 void FtraceController::FlushForInstance(FtraceInstanceState* instance) {
515 if (instance->started_data_sources.empty())
516 return;
517
518 // Read all cpus in one go, limiting the per-cpu read amount to make sure we
519 // don't get stuck chasing the writer if there's a very high bandwidth of
520 // events.
521 size_t max_pages = instance->ftrace_config_muxer->GetPerCpuBufferSizePages();
522 for (size_t i = 0; i < instance->cpu_readers.size(); i++) {
523 instance->cpu_readers[i].ReadCycle(&parsing_mem_, max_pages,
524 instance->started_data_sources);
525 }
526 }
527
528 // We are not implicitly flushing on Stop. The tracing service is supposed to
529 // ask for an explicit flush before stopping, unless it needs to perform a
530 // non-graceful stop.
StopIfNeeded(FtraceInstanceState * instance)531 void FtraceController::StopIfNeeded(FtraceInstanceState* instance) {
532 if (!instance->started_data_sources.empty())
533 return;
534
535 RemoveBufferWatermarkWatches(instance);
536 instance->cpu_readers.clear();
537 if (instance == &primary_) {
538 cpu_zero_stats_fd_.reset();
539 }
540 // Muxer cannot change the current_tracer until we close the trace pipe fds
541 // (i.e. per_cpu). Hence an explicit request here.
542 instance->ftrace_config_muxer->ResetCurrentTracer();
543
544 DestroyIfUnusedSeconaryInstance(instance);
545
546 // Clean up global state if done with all data sources.
547 if (!data_sources_.empty())
548 return;
549
550 if (!retain_ksyms_on_stop_) {
551 symbolizer_.Destroy();
552 }
553 retain_ksyms_on_stop_ = false;
554
555 // Note: might have never been allocated if data sources were rejected.
556 parsing_mem_.Release();
557 }
558
AddDataSource(FtraceDataSource * data_source)559 bool FtraceController::AddDataSource(FtraceDataSource* data_source) {
560 if (!ValidConfig(data_source->config()))
561 return false;
562
563 FtraceInstanceState* instance =
564 GetOrCreateInstance(data_source->config().instance_name());
565 if (!instance)
566 return false;
567
568 // note: from this point onwards, need to not leak a possibly created
569 // instance if returning early.
570
571 FtraceConfigId config_id = next_cfg_id_++;
572 if (!instance->ftrace_config_muxer->SetupConfig(
573 config_id, data_source->config(),
574 data_source->mutable_setup_errors())) {
575 DestroyIfUnusedSeconaryInstance(instance);
576 return false;
577 }
578
579 const FtraceDataSourceConfig* ds_config =
580 instance->ftrace_config_muxer->GetDataSourceConfig(config_id);
581 auto it_and_inserted = data_sources_.insert(data_source);
582 PERFETTO_DCHECK(it_and_inserted.second);
583 data_source->Initialize(config_id, ds_config);
584 return true;
585 }
586
StartDataSource(FtraceDataSource * data_source)587 bool FtraceController::StartDataSource(FtraceDataSource* data_source) {
588 PERFETTO_DCHECK(data_sources_.count(data_source) > 0);
589
590 FtraceConfigId config_id = data_source->config_id();
591 PERFETTO_CHECK(config_id);
592 const std::string& instance_name = data_source->config().instance_name();
593 FtraceInstanceState* instance = GetOrCreateInstance(instance_name);
594 PERFETTO_CHECK(instance);
595
596 if (!instance->ftrace_config_muxer->ActivateConfig(config_id))
597 return false;
598 instance->started_data_sources.insert(data_source);
599 StartIfNeeded(instance, instance_name);
600
601 // Parse kernel symbols if required by the config. This can be an expensive
602 // operation (cpu-bound for 500ms+), so delay the StartDataSource
603 // acknowledgement until after we're done. This lets a consumer wait for the
604 // expensive work to be done by waiting on the "all data sources started"
605 // fence. This helps isolate the effects of the cpu-bound work on
606 // frequency scaling of cpus when recording benchmarks (b/236143653).
607 // Note that we're already recording data into the kernel ftrace
608 // buffers while doing the symbol parsing.
609 if (data_source->config().symbolize_ksyms()) {
610 symbolizer_.GetOrCreateKernelSymbolMap();
611 // If at least one config sets the KSYMS_RETAIN flag, keep the ksysm map
612 // around in StopIfNeeded().
613 const auto KRET = FtraceConfig::KSYMS_RETAIN;
614 retain_ksyms_on_stop_ |= data_source->config().ksyms_mem_policy() == KRET;
615 }
616
617 return true;
618 }
619
RemoveDataSource(FtraceDataSource * data_source)620 void FtraceController::RemoveDataSource(FtraceDataSource* data_source) {
621 size_t removed = data_sources_.erase(data_source);
622 if (!removed)
623 return; // can happen if AddDataSource failed
624
625 FtraceInstanceState* instance =
626 GetOrCreateInstance(data_source->config().instance_name());
627 PERFETTO_CHECK(instance);
628
629 instance->ftrace_config_muxer->RemoveConfig(data_source->config_id());
630 instance->started_data_sources.erase(data_source);
631 StopIfNeeded(instance);
632 }
633
DumpKprobeStats(const std::string & text,FtraceStats * ftrace_stats)634 bool DumpKprobeStats(const std::string& text, FtraceStats* ftrace_stats) {
635 int64_t hits = 0;
636 int64_t misses = 0;
637
638 base::StringSplitter line(std::move(text), '\n');
639 while (line.Next()) {
640 base::StringSplitter tok(line.cur_token(), line.cur_token_size() + 1, ' ');
641
642 if (!tok.Next())
643 return false;
644 // Skip the event name field
645
646 if (!tok.Next())
647 return false;
648 hits += static_cast<int64_t>(std::strtoll(tok.cur_token(), nullptr, 10));
649
650 if (!tok.Next())
651 return false;
652 misses += static_cast<int64_t>(std::strtoll(tok.cur_token(), nullptr, 10));
653 }
654
655 ftrace_stats->kprobe_stats.hits = hits;
656 ftrace_stats->kprobe_stats.misses = misses;
657
658 return true;
659 }
660
DumpFtraceStats(FtraceDataSource * data_source,FtraceStats * stats_out)661 void FtraceController::DumpFtraceStats(FtraceDataSource* data_source,
662 FtraceStats* stats_out) {
663 FtraceInstanceState* instance =
664 GetInstance(data_source->config().instance_name());
665 PERFETTO_DCHECK(instance);
666 if (!instance)
667 return;
668
669 DumpAllCpuStats(instance->ftrace_procfs.get(), stats_out);
670 if (symbolizer_.is_valid()) {
671 auto* symbol_map = symbolizer_.GetOrCreateKernelSymbolMap();
672 stats_out->kernel_symbols_parsed =
673 static_cast<uint32_t>(symbol_map->num_syms());
674 stats_out->kernel_symbols_mem_kb =
675 static_cast<uint32_t>(symbol_map->size_bytes() / 1024);
676 }
677
678 if (data_source->parsing_config()->kprobes.size() > 0) {
679 DumpKprobeStats(instance->ftrace_procfs.get()->ReadKprobeStats(),
680 stats_out);
681 }
682 }
683
MaybeSnapshotFtraceClock()684 void FtraceController::MaybeSnapshotFtraceClock() {
685 if (!cpu_zero_stats_fd_)
686 return;
687
688 auto ftrace_clock = primary_.ftrace_config_muxer->ftrace_clock();
689 PERFETTO_DCHECK(ftrace_clock != protos::pbzero::FTRACE_CLOCK_UNSPECIFIED);
690
691 // Snapshot the boot clock *before* reading CPU stats so that
692 // two clocks are as close togher as possible (i.e. if it was the
693 // other way round, we'd skew by the const of string parsing).
694 ftrace_clock_snapshot_.boot_clock_ts = base::GetBootTimeNs().count();
695
696 // A value of zero will cause this snapshot to be skipped.
697 ftrace_clock_snapshot_.ftrace_clock_ts =
698 ReadFtraceNowTs(cpu_zero_stats_fd_).value_or(0);
699 }
700
701 FtraceController::PollSupport
VerifyKernelSupportForBufferWatermark()702 FtraceController::VerifyKernelSupportForBufferWatermark() {
703 struct utsname uts = {};
704 if (uname(&uts) < 0 || strcmp(uts.sysname, "Linux") != 0)
705 return PollSupport::kUnsupported;
706 if (!PollSupportedOnKernelVersion(uts.release))
707 return PollSupport::kUnsupported;
708
709 // buffer_percent exists and is writable
710 auto* tracefs = primary_.ftrace_procfs.get();
711 uint32_t current = tracefs->ReadBufferPercent();
712 if (!tracefs->SetBufferPercent(current ? current : 50)) {
713 return PollSupport::kUnsupported;
714 }
715
716 // Polling on per_cpu/cpu0/trace_pipe_raw doesn't return errors.
717 base::ScopedFile fd = tracefs->OpenPipeForCpu(0);
718 struct pollfd pollset = {};
719 pollset.fd = fd.get();
720 pollset.events = POLLIN;
721 int r = PERFETTO_EINTR(poll(&pollset, 1, 0));
722 if (r < 0 || (r > 0 && (pollset.revents & POLLERR))) {
723 return PollSupport::kUnsupported;
724 }
725 return PollSupport::kSupported;
726 }
727
728 // Check kernel version since the poll implementation has historical bugs.
729 // We're looking for at least 6.9 for the following:
730 // ffe3986fece6 ring-buffer: Only update pages_touched when a new page...
731 // static
PollSupportedOnKernelVersion(const char * uts_release)732 bool FtraceController::PollSupportedOnKernelVersion(const char* uts_release) {
733 int major = 0, minor = 0;
734 if (sscanf(uts_release, "%d.%d", &major, &minor) != 2) {
735 return false;
736 }
737 if (major < kPollRequiredMajorVersion ||
738 (major == kPollRequiredMajorVersion &&
739 minor < kPollRequiredMinorVersion)) {
740 // Android: opportunistically detect a few select GKI kernels that are known
741 // to have the fixes.
742 std::optional<AndroidGkiVersion> gki = ParseAndroidGkiVersion(uts_release);
743 if (!gki.has_value())
744 return false;
745 // android14-6.1.86 or higher sublevel:
746 // 2d5f12de4cf5 ring-buffer: Only update pages_touched when a new page...
747 // android15-6.6.27 or higher sublevel:
748 // a9cd92bc051f ring-buffer: Only update pages_touched when a new page...
749 bool gki_patched = (gki->release == 14 && gki->version == 6 &&
750 gki->patch_level == 1 && gki->sub_level >= 86) ||
751 (gki->release == 15 && gki->version == 6 &&
752 gki->patch_level == 6 && gki->sub_level >= 27);
753 return gki_patched;
754 }
755 return true;
756 }
757
GetStartedDataSourcesCount()758 size_t FtraceController::GetStartedDataSourcesCount() {
759 size_t cnt = 0;
760 ForEachInstance([&](FtraceInstanceState* instance) {
761 cnt += instance->started_data_sources.size();
762 });
763 return cnt;
764 }
765
FtraceInstanceState(std::unique_ptr<FtraceProcfs> ft,std::unique_ptr<ProtoTranslationTable> ptt,std::unique_ptr<FtraceConfigMuxer> fcm)766 FtraceController::FtraceInstanceState::FtraceInstanceState(
767 std::unique_ptr<FtraceProcfs> ft,
768 std::unique_ptr<ProtoTranslationTable> ptt,
769 std::unique_ptr<FtraceConfigMuxer> fcm)
770 : ftrace_procfs(std::move(ft)),
771 table(std::move(ptt)),
772 ftrace_config_muxer(std::move(fcm)) {}
773
GetOrCreateInstance(const std::string & instance_name)774 FtraceController::FtraceInstanceState* FtraceController::GetOrCreateInstance(
775 const std::string& instance_name) {
776 FtraceInstanceState* maybe_existing = GetInstance(instance_name);
777 if (maybe_existing)
778 return maybe_existing;
779
780 PERFETTO_DCHECK(!instance_name.empty());
781 std::unique_ptr<FtraceInstanceState> instance =
782 CreateSecondaryInstance(instance_name);
783 if (!instance)
784 return nullptr;
785
786 auto it_and_inserted = secondary_instances_.emplace(
787 std::piecewise_construct, std::forward_as_tuple(instance_name),
788 std::forward_as_tuple(std::move(instance)));
789 PERFETTO_CHECK(it_and_inserted.second);
790 return it_and_inserted.first->second.get();
791 }
792
GetInstance(const std::string & instance_name)793 FtraceController::FtraceInstanceState* FtraceController::GetInstance(
794 const std::string& instance_name) {
795 if (instance_name.empty())
796 return &primary_;
797
798 auto it = secondary_instances_.find(instance_name);
799 return it != secondary_instances_.end() ? it->second.get() : nullptr;
800 }
801
DestroyIfUnusedSeconaryInstance(FtraceInstanceState * instance)802 void FtraceController::DestroyIfUnusedSeconaryInstance(
803 FtraceInstanceState* instance) {
804 if (instance == &primary_)
805 return;
806 for (auto it = secondary_instances_.begin(); it != secondary_instances_.end();
807 ++it) {
808 if (it->second.get() == instance &&
809 instance->ftrace_config_muxer->GetDataSourcesCount() == 0) {
810 // no data sources left referencing this secondary instance
811 secondary_instances_.erase(it);
812 return;
813 }
814 }
815 PERFETTO_FATAL("Bug in ftrace instance lifetimes");
816 }
817
818 std::unique_ptr<FtraceController::FtraceInstanceState>
CreateSecondaryInstance(const std::string & instance_name)819 FtraceController::CreateSecondaryInstance(const std::string& instance_name) {
820 std::optional<std::string> instance_path = AbsolutePathForInstance(
821 primary_.ftrace_procfs->GetRootPath(), instance_name);
822 if (!instance_path.has_value()) {
823 PERFETTO_ELOG("Invalid ftrace instance name: \"%s\"",
824 instance_name.c_str());
825 return nullptr;
826 }
827
828 auto ftrace_procfs = FtraceProcfs::Create(*instance_path);
829 if (!ftrace_procfs) {
830 PERFETTO_ELOG("Failed to create ftrace procfs for \"%s\"",
831 instance_path->c_str());
832 return nullptr;
833 }
834
835 auto table = ProtoTranslationTable::Create(
836 ftrace_procfs.get(), GetStaticEventInfo(), GetStaticCommonFieldsInfo());
837 if (!table) {
838 PERFETTO_ELOG("Failed to create proto translation table for \"%s\"",
839 instance_path->c_str());
840 return nullptr;
841 }
842
843 // secondary instances don't support atrace and vendor tracepoint HAL
844 std::map<std::string, std::vector<GroupAndName>> vendor_evts;
845
846 auto syscalls = SyscallTable::FromCurrentArch();
847
848 auto muxer = std::make_unique<FtraceConfigMuxer>(
849 ftrace_procfs.get(), atrace_wrapper_.get(), table.get(),
850 std::move(syscalls), vendor_evts,
851 /* secondary_instance= */ true);
852 return std::make_unique<FtraceInstanceState>(
853 std::move(ftrace_procfs), std::move(table), std::move(muxer));
854 }
855
856 // TODO(rsavitski): we want to eventually add support for the default
857 // (primary_) tracefs path to be an instance itself, at which point we'll need
858 // to be careful to distinguish the tracefs mount point from the default
859 // instance path.
860 // static
AbsolutePathForInstance(const std::string & tracefs_root,const std::string & raw_cfg_name)861 std::optional<std::string> FtraceController::AbsolutePathForInstance(
862 const std::string& tracefs_root,
863 const std::string& raw_cfg_name) {
864 if (base::Contains(raw_cfg_name, '/') ||
865 base::StartsWith(raw_cfg_name, "..")) {
866 return std::nullopt;
867 }
868
869 // ARM64 pKVM hypervisor tracing emulates an instance, but is not under
870 // instances/, we special-case that name for now.
871 if (raw_cfg_name == "hyp") {
872 std::string hyp_path = tracefs_root + "hyp/";
873 PERFETTO_LOG(
874 "Config specified reserved \"hyp\" instance name, using %s for events.",
875 hyp_path.c_str());
876 return std::make_optional(hyp_path);
877 }
878
879 return tracefs_root + "instances/" + raw_cfg_name + "/";
880 }
881
882 FtraceController::Observer::~Observer() = default;
883
884 } // namespace perfetto
885