xref: /aosp_15_r20/external/perfetto/src/traced/probes/ftrace/ftrace_controller.cc (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/traced/probes/ftrace/ftrace_controller.h"
18 
19 #include <fcntl.h>
20 #include <poll.h>
21 #include <string.h>
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <sys/utsname.h>
25 #include <sys/wait.h>
26 #include <unistd.h>
27 #include <cstdint>
28 
29 #include <limits>
30 #include <memory>
31 #include <optional>
32 #include <string>
33 #include <tuple>
34 #include <utility>
35 
36 #include "perfetto/base/build_config.h"
37 #include "perfetto/base/logging.h"
38 #include "perfetto/base/time.h"
39 #include "perfetto/ext/base/file_utils.h"
40 #include "perfetto/ext/base/metatrace.h"
41 #include "perfetto/ext/base/scoped_file.h"
42 #include "perfetto/ext/base/string_splitter.h"
43 #include "perfetto/ext/base/string_utils.h"
44 #include "perfetto/ext/tracing/core/trace_writer.h"
45 #include "src/kallsyms/kernel_symbol_map.h"
46 #include "src/kallsyms/lazy_kernel_symbolizer.h"
47 #include "src/traced/probes/ftrace/atrace_hal_wrapper.h"
48 #include "src/traced/probes/ftrace/cpu_reader.h"
49 #include "src/traced/probes/ftrace/cpu_stats_parser.h"
50 #include "src/traced/probes/ftrace/event_info.h"
51 #include "src/traced/probes/ftrace/event_info_constants.h"
52 #include "src/traced/probes/ftrace/ftrace_config_muxer.h"
53 #include "src/traced/probes/ftrace/ftrace_config_utils.h"
54 #include "src/traced/probes/ftrace/ftrace_data_source.h"
55 #include "src/traced/probes/ftrace/ftrace_metadata.h"
56 #include "src/traced/probes/ftrace/ftrace_procfs.h"
57 #include "src/traced/probes/ftrace/ftrace_stats.h"
58 #include "src/traced/probes/ftrace/proto_translation_table.h"
59 #include "src/traced/probes/ftrace/vendor_tracepoints.h"
60 
61 namespace perfetto {
62 namespace {
63 
64 constexpr uint32_t kDefaultTickPeriodMs = 100;
65 constexpr uint32_t kPollBackingTickPeriodMs = 1000;
66 constexpr uint32_t kMinTickPeriodMs = 1;
67 constexpr uint32_t kMaxTickPeriodMs = 1000 * 60;
68 constexpr int kPollRequiredMajorVersion = 6;
69 constexpr int kPollRequiredMinorVersion = 9;
70 
71 // Read at most this many pages of data per cpu per read task. If we hit this
72 // limit on at least one cpu, we stop and repost the read task, letting other
73 // tasks get some cpu time before continuing reading.
74 constexpr size_t kMaxPagesPerCpuPerReadTick = 256;  // 1 MB per cpu
75 
WriteToFile(const char * path,const char * str)76 bool WriteToFile(const char* path, const char* str) {
77   auto fd = base::OpenFile(path, O_WRONLY);
78   if (!fd)
79     return false;
80   const size_t str_len = strlen(str);
81   return base::WriteAll(*fd, str, str_len) == static_cast<ssize_t>(str_len);
82 }
83 
ClearFile(const char * path)84 bool ClearFile(const char* path) {
85   auto fd = base::OpenFile(path, O_WRONLY | O_TRUNC);
86   return !!fd;
87 }
88 
ReadFtraceNowTs(const base::ScopedFile & cpu_stats_fd)89 std::optional<int64_t> ReadFtraceNowTs(const base::ScopedFile& cpu_stats_fd) {
90   PERFETTO_CHECK(cpu_stats_fd);
91 
92   char buf[512];
93   ssize_t res = PERFETTO_EINTR(pread(*cpu_stats_fd, buf, sizeof(buf) - 1, 0));
94   if (res <= 0)
95     return std::nullopt;
96   buf[res] = '\0';
97 
98   FtraceCpuStats stats{};
99   DumpCpuStats(buf, &stats);
100   return static_cast<int64_t>(stats.now_ts * 1000 * 1000 * 1000);
101 }
102 
GetAtraceVendorEvents(FtraceProcfs * tracefs)103 std::map<std::string, std::vector<GroupAndName>> GetAtraceVendorEvents(
104     FtraceProcfs* tracefs) {
105 #if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
106   if (base::FileExists(vendor_tracepoints::kCategoriesFile)) {
107     std::map<std::string, std::vector<GroupAndName>> vendor_evts;
108     base::Status status =
109         vendor_tracepoints::DiscoverAccessibleVendorTracepointsWithFile(
110             vendor_tracepoints::kCategoriesFile, &vendor_evts, tracefs);
111     if (!status.ok()) {
112       PERFETTO_ELOG("Cannot load vendor categories: %s", status.c_message());
113     }
114     return vendor_evts;
115   } else {
116     AtraceHalWrapper hal;
117     return vendor_tracepoints::DiscoverVendorTracepointsWithHal(&hal, tracefs);
118   }
119 #else
120   base::ignore_result(tracefs);
121   return {};
122 #endif
123 }
124 
125 struct AndroidGkiVersion {
126   uint64_t version = 0;
127   uint64_t patch_level = 0;
128   uint64_t sub_level = 0;
129   uint64_t release = 0;
130   uint64_t kmi_gen = 0;
131 };
132 
133 #define ANDROID_GKI_UNAME_FMT \
134   "%" PRIu64 ".%" PRIu64 ".%" PRIu64 "-android%" PRIu64 "-%" PRIu64
135 
ParseAndroidGkiVersion(const char * s)136 std::optional<AndroidGkiVersion> ParseAndroidGkiVersion(const char* s) {
137   AndroidGkiVersion v = {};
138   if (sscanf(s, ANDROID_GKI_UNAME_FMT, &v.version, &v.patch_level, &v.sub_level,
139              &v.release, &v.kmi_gen) != 5) {
140     return std::nullopt;
141   }
142   return v;
143 }
144 
145 }  // namespace
146 
147 // Method of last resort to reset ftrace state.
148 // We don't know what state the rest of the system and process is so as far
149 // as possible avoid allocations.
HardResetFtraceState()150 bool HardResetFtraceState() {
151   for (const char* const* item = FtraceProcfs::kTracingPaths; *item; ++item) {
152     std::string prefix(*item);
153     PERFETTO_CHECK(base::EndsWith(prefix, "/"));
154     bool res = true;
155     res &= WriteToFile((prefix + "tracing_on").c_str(), "0");
156     res &= WriteToFile((prefix + "buffer_size_kb").c_str(), "4");
157     // Not checking success because these files might not be accessible on
158     // older or release builds of Android:
159     WriteToFile((prefix + "events/enable").c_str(), "0");
160     WriteToFile((prefix + "events/raw_syscalls/filter").c_str(), "0");
161     WriteToFile((prefix + "current_tracer").c_str(), "nop");
162     res &= ClearFile((prefix + "trace").c_str());
163     if (res)
164       return true;
165   }
166   return false;
167 }
168 
169 // static
Create(base::TaskRunner * runner,Observer * observer)170 std::unique_ptr<FtraceController> FtraceController::Create(
171     base::TaskRunner* runner,
172     Observer* observer) {
173   std::unique_ptr<FtraceProcfs> ftrace_procfs =
174       FtraceProcfs::CreateGuessingMountPoint("");
175   if (!ftrace_procfs)
176     return nullptr;
177 
178   std::unique_ptr<ProtoTranslationTable> table = ProtoTranslationTable::Create(
179       ftrace_procfs.get(), GetStaticEventInfo(), GetStaticCommonFieldsInfo());
180   if (!table)
181     return nullptr;
182 
183   auto atrace_wrapper = std::make_unique<AtraceWrapperImpl>();
184 
185   std::map<std::string, std::vector<GroupAndName>> vendor_evts =
186       GetAtraceVendorEvents(ftrace_procfs.get());
187 
188   SyscallTable syscalls = SyscallTable::FromCurrentArch();
189 
190   auto muxer = std::make_unique<FtraceConfigMuxer>(
191       ftrace_procfs.get(), atrace_wrapper.get(), table.get(),
192       std::move(syscalls), vendor_evts);
193   return std::unique_ptr<FtraceController>(new FtraceController(
194       std::move(ftrace_procfs), std::move(table), std::move(atrace_wrapper),
195       std::move(muxer), runner, observer));
196 }
197 
FtraceController(std::unique_ptr<FtraceProcfs> ftrace_procfs,std::unique_ptr<ProtoTranslationTable> table,std::unique_ptr<AtraceWrapper> atrace_wrapper,std::unique_ptr<FtraceConfigMuxer> muxer,base::TaskRunner * task_runner,Observer * observer)198 FtraceController::FtraceController(
199     std::unique_ptr<FtraceProcfs> ftrace_procfs,
200     std::unique_ptr<ProtoTranslationTable> table,
201     std::unique_ptr<AtraceWrapper> atrace_wrapper,
202     std::unique_ptr<FtraceConfigMuxer> muxer,
203     base::TaskRunner* task_runner,
204     Observer* observer)
205     : task_runner_(task_runner),
206       observer_(observer),
207       atrace_wrapper_(std::move(atrace_wrapper)),
208       primary_(std::move(ftrace_procfs), std::move(table), std::move(muxer)),
209       weak_factory_(this) {}
210 
~FtraceController()211 FtraceController::~FtraceController() {
212   while (!data_sources_.empty()) {
213     RemoveDataSource(*data_sources_.begin());
214   }
215   PERFETTO_DCHECK(data_sources_.empty());
216   PERFETTO_DCHECK(primary_.started_data_sources.empty());
217   PERFETTO_DCHECK(primary_.cpu_readers.empty());
218   PERFETTO_DCHECK(secondary_instances_.empty());
219 }
220 
NowMs() const221 uint64_t FtraceController::NowMs() const {
222   return static_cast<uint64_t>(base::GetWallTimeMs().count());
223 }
224 
225 template <typename F>
ForEachInstance(F fn)226 void FtraceController::ForEachInstance(F fn) {
227   fn(&primary_);
228   for (auto& kv : secondary_instances_) {
229     fn(kv.second.get());
230   }
231 }
232 
StartIfNeeded(FtraceInstanceState * instance,const std::string & instance_name)233 void FtraceController::StartIfNeeded(FtraceInstanceState* instance,
234                                      const std::string& instance_name) {
235   if (buffer_watermark_support_ == PollSupport::kUntested) {
236     buffer_watermark_support_ = VerifyKernelSupportForBufferWatermark();
237   }
238 
239   // If instance is already active, then at most we need to update the buffer
240   // poll callbacks. The periodic |ReadTick| will pick up any updates to the
241   // period the next time it executes.
242   if (instance->started_data_sources.size() > 1) {
243     UpdateBufferWatermarkWatches(instance, instance_name);
244     return;
245   }
246 
247   // Lazily allocate the memory used for reading & parsing ftrace. In the case
248   // of multiple ftrace instances, this might already be valid.
249   parsing_mem_.AllocateIfNeeded();
250 
251   const auto ftrace_clock = instance->ftrace_config_muxer->ftrace_clock();
252   size_t num_cpus = instance->ftrace_procfs->NumberOfCpus();
253   PERFETTO_CHECK(instance->cpu_readers.empty());
254   instance->cpu_readers.reserve(num_cpus);
255   for (size_t cpu = 0; cpu < num_cpus; cpu++) {
256     instance->cpu_readers.emplace_back(
257         cpu, instance->ftrace_procfs->OpenPipeForCpu(cpu),
258         instance->table.get(), &symbolizer_, ftrace_clock,
259         &ftrace_clock_snapshot_);
260   }
261 
262   // Special case for primary instance: if not using the boot clock, take
263   // manual clock snapshots so that the trace parser can do a best effort
264   // conversion back to boot. This is primarily for old kernels that predate
265   // boot support, and therefore default to "global" clock.
266   if (instance == &primary_ &&
267       ftrace_clock != protos::pbzero::FtraceClock::FTRACE_CLOCK_UNSPECIFIED) {
268     cpu_zero_stats_fd_ = primary_.ftrace_procfs->OpenCpuStats(0 /* cpu */);
269     MaybeSnapshotFtraceClock();
270   }
271 
272   // Set up poll callbacks for the buffers if requested by at least one DS.
273   UpdateBufferWatermarkWatches(instance, instance_name);
274 
275   // Start a new repeating read task (even if there is already one posted due
276   // to a different ftrace instance). Any old tasks will stop due to generation
277   // checks.
278   auto generation = ++tick_generation_;
279   auto tick_period_ms = GetTickPeriodMs();
280   auto weak_this = weak_factory_.GetWeakPtr();
281   task_runner_->PostDelayedTask(
282       [weak_this, generation] {
283         if (weak_this)
284           weak_this->ReadTick(generation);
285       },
286       tick_period_ms - (NowMs() % tick_period_ms));
287 }
288 
289 // We handle the ftrace buffers in a repeating task (ReadTick). On a given tick,
290 // we iterate over all per-cpu buffers, parse their contents, and then write out
291 // the serialized packets. This is handled by |CpuReader| instances, which
292 // attempt to read from their respective per-cpu buffer fd until they catch up
293 // to the head of the buffer, or hit a transient error.
294 //
295 // The readers work in batches of |kParsingBufferSizePages| pages for cache
296 // locality, and to limit memory usage.
297 //
298 // However, the reading happens on the primary thread, shared with the rest of
299 // the service (including ipc). If there is a lot of ftrace data to read, we
300 // want to yield to the event loop, re-enqueueing a continuation task at the end
301 // of the immediate queue (letting other enqueued tasks to run before
302 // continuing). Therefore we introduce |kMaxPagesPerCpuPerReadTick|.
ReadTick(int generation)303 void FtraceController::ReadTick(int generation) {
304   metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
305                              metatrace::FTRACE_READ_TICK);
306   if (generation != tick_generation_ || GetStartedDataSourcesCount() == 0) {
307     return;
308   }
309   MaybeSnapshotFtraceClock();
310 
311   // Read all per-cpu buffers.
312   bool all_cpus_done = true;
313   ForEachInstance([&](FtraceInstanceState* instance) {
314     all_cpus_done &= ReadPassForInstance(instance);
315   });
316   observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
317 
318   auto weak_this = weak_factory_.GetWeakPtr();
319   if (!all_cpus_done) {
320     PERFETTO_DLOG("Reposting immediate ReadTick as there's more work.");
321     task_runner_->PostTask([weak_this, generation] {
322       if (weak_this)
323         weak_this->ReadTick(generation);
324     });
325   } else {
326     // Done until next period.
327     auto tick_period_ms = GetTickPeriodMs();
328     task_runner_->PostDelayedTask(
329         [weak_this, generation] {
330           if (weak_this)
331             weak_this->ReadTick(generation);
332         },
333         tick_period_ms - (NowMs() % tick_period_ms));
334   }
335 
336 #if PERFETTO_DCHECK_IS_ON()
337   // OnFtraceDataWrittenIntoDataSourceBuffers() is supposed to clear
338   // all metadata, including the |kernel_addrs| map for symbolization.
339   ForEachInstance([&](FtraceInstanceState* instance) {
340     for (FtraceDataSource* ds : instance->started_data_sources) {
341       FtraceMetadata* ftrace_metadata = ds->mutable_metadata();
342       PERFETTO_DCHECK(ftrace_metadata->kernel_addrs.empty());
343       PERFETTO_DCHECK(ftrace_metadata->last_kernel_addr_index_written == 0);
344     }
345   });
346 #endif
347 }
348 
ReadPassForInstance(FtraceInstanceState * instance)349 bool FtraceController::ReadPassForInstance(FtraceInstanceState* instance) {
350   if (instance->started_data_sources.empty())
351     return true;
352 
353   bool all_cpus_done = true;
354   for (size_t i = 0; i < instance->cpu_readers.size(); i++) {
355     size_t max_pages = kMaxPagesPerCpuPerReadTick;
356     size_t pages_read = instance->cpu_readers[i].ReadCycle(
357         &parsing_mem_, max_pages, instance->started_data_sources);
358     PERFETTO_DCHECK(pages_read <= max_pages);
359     if (pages_read == max_pages) {
360       all_cpus_done = false;
361     }
362   }
363   return all_cpus_done;
364 }
365 
GetTickPeriodMs()366 uint32_t FtraceController::GetTickPeriodMs() {
367   if (data_sources_.empty())
368     return kDefaultTickPeriodMs;
369   uint32_t kUnsetPeriod = std::numeric_limits<uint32_t>::max();
370   uint32_t min_period_ms = kUnsetPeriod;
371   bool using_poll = true;
372   ForEachInstance([&](FtraceInstanceState* instance) {
373     using_poll &= instance->buffer_watches_posted;
374     for (FtraceDataSource* ds : instance->started_data_sources) {
375       if (ds->config().has_drain_period_ms()) {
376         min_period_ms = std::min(min_period_ms, ds->config().drain_period_ms());
377       }
378     }
379   });
380 
381   // None of the active data sources requested an explicit tick period.
382   // The historical default is 100ms, but if we know that all instances are also
383   // using buffer watermark polling, we can raise it. We don't disable the tick
384   // entirely as it spreads the read work more evenly, and ensures procfs
385   // scrapes of seen TIDs are not too stale.
386   if (min_period_ms == kUnsetPeriod) {
387     return using_poll ? kPollBackingTickPeriodMs : kDefaultTickPeriodMs;
388   }
389 
390   if (min_period_ms < kMinTickPeriodMs || min_period_ms > kMaxTickPeriodMs) {
391     PERFETTO_LOG(
392         "drain_period_ms was %u should be between %u and %u. "
393         "Falling back onto a default.",
394         min_period_ms, kMinTickPeriodMs, kMaxTickPeriodMs);
395     return kDefaultTickPeriodMs;
396   }
397   return min_period_ms;
398 }
399 
UpdateBufferWatermarkWatches(FtraceInstanceState * instance,const std::string & instance_name)400 void FtraceController::UpdateBufferWatermarkWatches(
401     FtraceInstanceState* instance,
402     const std::string& instance_name) {
403   PERFETTO_DCHECK(buffer_watermark_support_ != PollSupport::kUntested);
404   if (buffer_watermark_support_ == PollSupport::kUnsupported)
405     return;
406 
407   bool requested_poll = false;
408   for (const FtraceDataSource* ds : instance->started_data_sources) {
409     requested_poll |= ds->config().has_drain_buffer_percent();
410   }
411 
412   if (!requested_poll || instance->buffer_watches_posted)
413     return;
414 
415   auto weak_this = weak_factory_.GetWeakPtr();
416   for (size_t i = 0; i < instance->cpu_readers.size(); i++) {
417     int fd = instance->cpu_readers[i].RawBufferFd();
418     task_runner_->AddFileDescriptorWatch(fd, [weak_this, instance_name, i] {
419       if (weak_this)
420         weak_this->OnBufferPastWatermark(instance_name, i,
421                                          /*repoll_watermark=*/true);
422     });
423   }
424   instance->buffer_watches_posted = true;
425 }
426 
RemoveBufferWatermarkWatches(FtraceInstanceState * instance)427 void FtraceController::RemoveBufferWatermarkWatches(
428     FtraceInstanceState* instance) {
429   if (!instance->buffer_watches_posted)
430     return;
431 
432   for (size_t i = 0; i < instance->cpu_readers.size(); i++) {
433     int fd = instance->cpu_readers[i].RawBufferFd();
434     task_runner_->RemoveFileDescriptorWatch(fd);
435   }
436   instance->buffer_watches_posted = false;
437 }
438 
439 // TODO(rsavitski): consider calling OnFtraceData only if we're not reposting
440 // a continuation. It's a tradeoff between procfs scrape freshness and urgency
441 // to drain ftrace kernel buffers.
OnBufferPastWatermark(std::string instance_name,size_t cpu,bool repoll_watermark)442 void FtraceController::OnBufferPastWatermark(std::string instance_name,
443                                              size_t cpu,
444                                              bool repoll_watermark) {
445   metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
446                              metatrace::FTRACE_CPU_BUFFER_WATERMARK);
447 
448   // Instance might have been stopped before this callback runs.
449   FtraceInstanceState* instance = GetInstance(instance_name);
450   if (!instance || cpu >= instance->cpu_readers.size())
451     return;
452 
453   // Repoll all per-cpu buffers with zero timeout to confirm that at least
454   // one is still past the watermark. This might not be true if a different
455   // callback / readtick / flush did a read pass before this callback reached
456   // the front of the task runner queue.
457   if (repoll_watermark) {
458     size_t num_cpus = instance->cpu_readers.size();
459     std::vector<struct pollfd> pollfds(num_cpus);
460     for (size_t i = 0; i < num_cpus; i++) {
461       pollfds[i].fd = instance->cpu_readers[i].RawBufferFd();
462       pollfds[i].events = POLLIN;
463     }
464     int r = PERFETTO_EINTR(poll(pollfds.data(), num_cpus, 0));
465     if (r < 0) {
466       PERFETTO_DPLOG("poll failed");
467       return;
468     } else if (r == 0) {  // no buffers below the watermark -> we're done.
469       return;
470     }
471     // Count the number of readable fds, as some poll results might be POLLERR,
472     // as seen in cases with offlined cores. It's still fine to attempt reading
473     // from those buffers as CpuReader will handle the ENODEV.
474     bool has_readable_fd = false;
475     for (size_t i = 0; i < num_cpus; i++) {
476       has_readable_fd |= (pollfds[i].revents & POLLIN);
477     }
478     if (!has_readable_fd) {
479       return;
480     }
481   }
482 
483   MaybeSnapshotFtraceClock();
484   bool all_cpus_done = ReadPassForInstance(instance);
485   observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
486   if (!all_cpus_done) {
487     // More data to be read, but we want to let other task_runner tasks to run.
488     // Repost a continuation task.
489     auto weak_this = weak_factory_.GetWeakPtr();
490     task_runner_->PostTask([weak_this, instance_name, cpu] {
491       if (weak_this)
492         weak_this->OnBufferPastWatermark(instance_name, cpu,
493                                          /*repoll_watermark=*/false);
494     });
495   }
496 }
497 
Flush(FlushRequestID flush_id)498 void FtraceController::Flush(FlushRequestID flush_id) {
499   metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
500                              metatrace::FTRACE_CPU_FLUSH);
501 
502   ForEachInstance([&](FtraceInstanceState* instance) {  // for clang-format
503     FlushForInstance(instance);
504   });
505   observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
506 
507   ForEachInstance([&](FtraceInstanceState* instance) {
508     for (FtraceDataSource* ds : instance->started_data_sources) {
509       ds->OnFtraceFlushComplete(flush_id);
510     }
511   });
512 }
513 
FlushForInstance(FtraceInstanceState * instance)514 void FtraceController::FlushForInstance(FtraceInstanceState* instance) {
515   if (instance->started_data_sources.empty())
516     return;
517 
518   // Read all cpus in one go, limiting the per-cpu read amount to make sure we
519   // don't get stuck chasing the writer if there's a very high bandwidth of
520   // events.
521   size_t max_pages = instance->ftrace_config_muxer->GetPerCpuBufferSizePages();
522   for (size_t i = 0; i < instance->cpu_readers.size(); i++) {
523     instance->cpu_readers[i].ReadCycle(&parsing_mem_, max_pages,
524                                        instance->started_data_sources);
525   }
526 }
527 
528 // We are not implicitly flushing on Stop. The tracing service is supposed to
529 // ask for an explicit flush before stopping, unless it needs to perform a
530 // non-graceful stop.
StopIfNeeded(FtraceInstanceState * instance)531 void FtraceController::StopIfNeeded(FtraceInstanceState* instance) {
532   if (!instance->started_data_sources.empty())
533     return;
534 
535   RemoveBufferWatermarkWatches(instance);
536   instance->cpu_readers.clear();
537   if (instance == &primary_) {
538     cpu_zero_stats_fd_.reset();
539   }
540   // Muxer cannot change the current_tracer until we close the trace pipe fds
541   // (i.e. per_cpu). Hence an explicit request here.
542   instance->ftrace_config_muxer->ResetCurrentTracer();
543 
544   DestroyIfUnusedSeconaryInstance(instance);
545 
546   // Clean up global state if done with all data sources.
547   if (!data_sources_.empty())
548     return;
549 
550   if (!retain_ksyms_on_stop_) {
551     symbolizer_.Destroy();
552   }
553   retain_ksyms_on_stop_ = false;
554 
555   // Note: might have never been allocated if data sources were rejected.
556   parsing_mem_.Release();
557 }
558 
AddDataSource(FtraceDataSource * data_source)559 bool FtraceController::AddDataSource(FtraceDataSource* data_source) {
560   if (!ValidConfig(data_source->config()))
561     return false;
562 
563   FtraceInstanceState* instance =
564       GetOrCreateInstance(data_source->config().instance_name());
565   if (!instance)
566     return false;
567 
568   // note: from this point onwards, need to not leak a possibly created
569   // instance if returning early.
570 
571   FtraceConfigId config_id = next_cfg_id_++;
572   if (!instance->ftrace_config_muxer->SetupConfig(
573           config_id, data_source->config(),
574           data_source->mutable_setup_errors())) {
575     DestroyIfUnusedSeconaryInstance(instance);
576     return false;
577   }
578 
579   const FtraceDataSourceConfig* ds_config =
580       instance->ftrace_config_muxer->GetDataSourceConfig(config_id);
581   auto it_and_inserted = data_sources_.insert(data_source);
582   PERFETTO_DCHECK(it_and_inserted.second);
583   data_source->Initialize(config_id, ds_config);
584   return true;
585 }
586 
StartDataSource(FtraceDataSource * data_source)587 bool FtraceController::StartDataSource(FtraceDataSource* data_source) {
588   PERFETTO_DCHECK(data_sources_.count(data_source) > 0);
589 
590   FtraceConfigId config_id = data_source->config_id();
591   PERFETTO_CHECK(config_id);
592   const std::string& instance_name = data_source->config().instance_name();
593   FtraceInstanceState* instance = GetOrCreateInstance(instance_name);
594   PERFETTO_CHECK(instance);
595 
596   if (!instance->ftrace_config_muxer->ActivateConfig(config_id))
597     return false;
598   instance->started_data_sources.insert(data_source);
599   StartIfNeeded(instance, instance_name);
600 
601   // Parse kernel symbols if required by the config. This can be an expensive
602   // operation (cpu-bound for 500ms+), so delay the StartDataSource
603   // acknowledgement until after we're done. This lets a consumer wait for the
604   // expensive work to be done by waiting on the "all data sources started"
605   // fence. This helps isolate the effects of the cpu-bound work on
606   // frequency scaling of cpus when recording benchmarks (b/236143653).
607   // Note that we're already recording data into the kernel ftrace
608   // buffers while doing the symbol parsing.
609   if (data_source->config().symbolize_ksyms()) {
610     symbolizer_.GetOrCreateKernelSymbolMap();
611     // If at least one config sets the KSYMS_RETAIN flag, keep the ksysm map
612     // around in StopIfNeeded().
613     const auto KRET = FtraceConfig::KSYMS_RETAIN;
614     retain_ksyms_on_stop_ |= data_source->config().ksyms_mem_policy() == KRET;
615   }
616 
617   return true;
618 }
619 
RemoveDataSource(FtraceDataSource * data_source)620 void FtraceController::RemoveDataSource(FtraceDataSource* data_source) {
621   size_t removed = data_sources_.erase(data_source);
622   if (!removed)
623     return;  // can happen if AddDataSource failed
624 
625   FtraceInstanceState* instance =
626       GetOrCreateInstance(data_source->config().instance_name());
627   PERFETTO_CHECK(instance);
628 
629   instance->ftrace_config_muxer->RemoveConfig(data_source->config_id());
630   instance->started_data_sources.erase(data_source);
631   StopIfNeeded(instance);
632 }
633 
DumpKprobeStats(const std::string & text,FtraceStats * ftrace_stats)634 bool DumpKprobeStats(const std::string& text, FtraceStats* ftrace_stats) {
635   int64_t hits = 0;
636   int64_t misses = 0;
637 
638   base::StringSplitter line(std::move(text), '\n');
639   while (line.Next()) {
640     base::StringSplitter tok(line.cur_token(), line.cur_token_size() + 1, ' ');
641 
642     if (!tok.Next())
643       return false;
644     // Skip the event name field
645 
646     if (!tok.Next())
647       return false;
648     hits += static_cast<int64_t>(std::strtoll(tok.cur_token(), nullptr, 10));
649 
650     if (!tok.Next())
651       return false;
652     misses += static_cast<int64_t>(std::strtoll(tok.cur_token(), nullptr, 10));
653   }
654 
655   ftrace_stats->kprobe_stats.hits = hits;
656   ftrace_stats->kprobe_stats.misses = misses;
657 
658   return true;
659 }
660 
DumpFtraceStats(FtraceDataSource * data_source,FtraceStats * stats_out)661 void FtraceController::DumpFtraceStats(FtraceDataSource* data_source,
662                                        FtraceStats* stats_out) {
663   FtraceInstanceState* instance =
664       GetInstance(data_source->config().instance_name());
665   PERFETTO_DCHECK(instance);
666   if (!instance)
667     return;
668 
669   DumpAllCpuStats(instance->ftrace_procfs.get(), stats_out);
670   if (symbolizer_.is_valid()) {
671     auto* symbol_map = symbolizer_.GetOrCreateKernelSymbolMap();
672     stats_out->kernel_symbols_parsed =
673         static_cast<uint32_t>(symbol_map->num_syms());
674     stats_out->kernel_symbols_mem_kb =
675         static_cast<uint32_t>(symbol_map->size_bytes() / 1024);
676   }
677 
678   if (data_source->parsing_config()->kprobes.size() > 0) {
679     DumpKprobeStats(instance->ftrace_procfs.get()->ReadKprobeStats(),
680                     stats_out);
681   }
682 }
683 
MaybeSnapshotFtraceClock()684 void FtraceController::MaybeSnapshotFtraceClock() {
685   if (!cpu_zero_stats_fd_)
686     return;
687 
688   auto ftrace_clock = primary_.ftrace_config_muxer->ftrace_clock();
689   PERFETTO_DCHECK(ftrace_clock != protos::pbzero::FTRACE_CLOCK_UNSPECIFIED);
690 
691   // Snapshot the boot clock *before* reading CPU stats so that
692   // two clocks are as close togher as possible (i.e. if it was the
693   // other way round, we'd skew by the const of string parsing).
694   ftrace_clock_snapshot_.boot_clock_ts = base::GetBootTimeNs().count();
695 
696   // A value of zero will cause this snapshot to be skipped.
697   ftrace_clock_snapshot_.ftrace_clock_ts =
698       ReadFtraceNowTs(cpu_zero_stats_fd_).value_or(0);
699 }
700 
701 FtraceController::PollSupport
VerifyKernelSupportForBufferWatermark()702 FtraceController::VerifyKernelSupportForBufferWatermark() {
703   struct utsname uts = {};
704   if (uname(&uts) < 0 || strcmp(uts.sysname, "Linux") != 0)
705     return PollSupport::kUnsupported;
706   if (!PollSupportedOnKernelVersion(uts.release))
707     return PollSupport::kUnsupported;
708 
709   // buffer_percent exists and is writable
710   auto* tracefs = primary_.ftrace_procfs.get();
711   uint32_t current = tracefs->ReadBufferPercent();
712   if (!tracefs->SetBufferPercent(current ? current : 50)) {
713     return PollSupport::kUnsupported;
714   }
715 
716   // Polling on per_cpu/cpu0/trace_pipe_raw doesn't return errors.
717   base::ScopedFile fd = tracefs->OpenPipeForCpu(0);
718   struct pollfd pollset = {};
719   pollset.fd = fd.get();
720   pollset.events = POLLIN;
721   int r = PERFETTO_EINTR(poll(&pollset, 1, 0));
722   if (r < 0 || (r > 0 && (pollset.revents & POLLERR))) {
723     return PollSupport::kUnsupported;
724   }
725   return PollSupport::kSupported;
726 }
727 
728 // Check kernel version since the poll implementation has historical bugs.
729 // We're looking for at least 6.9 for the following:
730 //   ffe3986fece6 ring-buffer: Only update pages_touched when a new page...
731 // static
PollSupportedOnKernelVersion(const char * uts_release)732 bool FtraceController::PollSupportedOnKernelVersion(const char* uts_release) {
733   int major = 0, minor = 0;
734   if (sscanf(uts_release, "%d.%d", &major, &minor) != 2) {
735     return false;
736   }
737   if (major < kPollRequiredMajorVersion ||
738       (major == kPollRequiredMajorVersion &&
739        minor < kPollRequiredMinorVersion)) {
740     // Android: opportunistically detect a few select GKI kernels that are known
741     // to have the fixes.
742     std::optional<AndroidGkiVersion> gki = ParseAndroidGkiVersion(uts_release);
743     if (!gki.has_value())
744       return false;
745     // android14-6.1.86 or higher sublevel:
746     //   2d5f12de4cf5 ring-buffer: Only update pages_touched when a new page...
747     // android15-6.6.27 or higher sublevel:
748     //   a9cd92bc051f ring-buffer: Only update pages_touched when a new page...
749     bool gki_patched = (gki->release == 14 && gki->version == 6 &&
750                         gki->patch_level == 1 && gki->sub_level >= 86) ||
751                        (gki->release == 15 && gki->version == 6 &&
752                         gki->patch_level == 6 && gki->sub_level >= 27);
753     return gki_patched;
754   }
755   return true;
756 }
757 
GetStartedDataSourcesCount()758 size_t FtraceController::GetStartedDataSourcesCount() {
759   size_t cnt = 0;
760   ForEachInstance([&](FtraceInstanceState* instance) {
761     cnt += instance->started_data_sources.size();
762   });
763   return cnt;
764 }
765 
FtraceInstanceState(std::unique_ptr<FtraceProcfs> ft,std::unique_ptr<ProtoTranslationTable> ptt,std::unique_ptr<FtraceConfigMuxer> fcm)766 FtraceController::FtraceInstanceState::FtraceInstanceState(
767     std::unique_ptr<FtraceProcfs> ft,
768     std::unique_ptr<ProtoTranslationTable> ptt,
769     std::unique_ptr<FtraceConfigMuxer> fcm)
770     : ftrace_procfs(std::move(ft)),
771       table(std::move(ptt)),
772       ftrace_config_muxer(std::move(fcm)) {}
773 
GetOrCreateInstance(const std::string & instance_name)774 FtraceController::FtraceInstanceState* FtraceController::GetOrCreateInstance(
775     const std::string& instance_name) {
776   FtraceInstanceState* maybe_existing = GetInstance(instance_name);
777   if (maybe_existing)
778     return maybe_existing;
779 
780   PERFETTO_DCHECK(!instance_name.empty());
781   std::unique_ptr<FtraceInstanceState> instance =
782       CreateSecondaryInstance(instance_name);
783   if (!instance)
784     return nullptr;
785 
786   auto it_and_inserted = secondary_instances_.emplace(
787       std::piecewise_construct, std::forward_as_tuple(instance_name),
788       std::forward_as_tuple(std::move(instance)));
789   PERFETTO_CHECK(it_and_inserted.second);
790   return it_and_inserted.first->second.get();
791 }
792 
GetInstance(const std::string & instance_name)793 FtraceController::FtraceInstanceState* FtraceController::GetInstance(
794     const std::string& instance_name) {
795   if (instance_name.empty())
796     return &primary_;
797 
798   auto it = secondary_instances_.find(instance_name);
799   return it != secondary_instances_.end() ? it->second.get() : nullptr;
800 }
801 
DestroyIfUnusedSeconaryInstance(FtraceInstanceState * instance)802 void FtraceController::DestroyIfUnusedSeconaryInstance(
803     FtraceInstanceState* instance) {
804   if (instance == &primary_)
805     return;
806   for (auto it = secondary_instances_.begin(); it != secondary_instances_.end();
807        ++it) {
808     if (it->second.get() == instance &&
809         instance->ftrace_config_muxer->GetDataSourcesCount() == 0) {
810       // no data sources left referencing this secondary instance
811       secondary_instances_.erase(it);
812       return;
813     }
814   }
815   PERFETTO_FATAL("Bug in ftrace instance lifetimes");
816 }
817 
818 std::unique_ptr<FtraceController::FtraceInstanceState>
CreateSecondaryInstance(const std::string & instance_name)819 FtraceController::CreateSecondaryInstance(const std::string& instance_name) {
820   std::optional<std::string> instance_path = AbsolutePathForInstance(
821       primary_.ftrace_procfs->GetRootPath(), instance_name);
822   if (!instance_path.has_value()) {
823     PERFETTO_ELOG("Invalid ftrace instance name: \"%s\"",
824                   instance_name.c_str());
825     return nullptr;
826   }
827 
828   auto ftrace_procfs = FtraceProcfs::Create(*instance_path);
829   if (!ftrace_procfs) {
830     PERFETTO_ELOG("Failed to create ftrace procfs for \"%s\"",
831                   instance_path->c_str());
832     return nullptr;
833   }
834 
835   auto table = ProtoTranslationTable::Create(
836       ftrace_procfs.get(), GetStaticEventInfo(), GetStaticCommonFieldsInfo());
837   if (!table) {
838     PERFETTO_ELOG("Failed to create proto translation table for \"%s\"",
839                   instance_path->c_str());
840     return nullptr;
841   }
842 
843   // secondary instances don't support atrace and vendor tracepoint HAL
844   std::map<std::string, std::vector<GroupAndName>> vendor_evts;
845 
846   auto syscalls = SyscallTable::FromCurrentArch();
847 
848   auto muxer = std::make_unique<FtraceConfigMuxer>(
849       ftrace_procfs.get(), atrace_wrapper_.get(), table.get(),
850       std::move(syscalls), vendor_evts,
851       /* secondary_instance= */ true);
852   return std::make_unique<FtraceInstanceState>(
853       std::move(ftrace_procfs), std::move(table), std::move(muxer));
854 }
855 
856 // TODO(rsavitski): we want to eventually add support for the default
857 // (primary_) tracefs path to be an instance itself, at which point we'll need
858 // to be careful to distinguish the tracefs mount point from the default
859 // instance path.
860 // static
AbsolutePathForInstance(const std::string & tracefs_root,const std::string & raw_cfg_name)861 std::optional<std::string> FtraceController::AbsolutePathForInstance(
862     const std::string& tracefs_root,
863     const std::string& raw_cfg_name) {
864   if (base::Contains(raw_cfg_name, '/') ||
865       base::StartsWith(raw_cfg_name, "..")) {
866     return std::nullopt;
867   }
868 
869   // ARM64 pKVM hypervisor tracing emulates an instance, but is not under
870   // instances/, we special-case that name for now.
871   if (raw_cfg_name == "hyp") {
872     std::string hyp_path = tracefs_root + "hyp/";
873     PERFETTO_LOG(
874         "Config specified reserved \"hyp\" instance name, using %s for events.",
875         hyp_path.c_str());
876     return std::make_optional(hyp_path);
877   }
878 
879   return tracefs_root + "instances/" + raw_cfg_name + "/";
880 }
881 
882 FtraceController::Observer::~Observer() = default;
883 
884 }  // namespace perfetto
885