xref: /aosp_15_r20/external/perfetto/src/traced/probes/ftrace/cpu_reader.h (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACED_PROBES_FTRACE_CPU_READER_H_
18 #define SRC_TRACED_PROBES_FTRACE_CPU_READER_H_
19 
20 #include <string.h>
21 #include <cstdint>
22 
23 #include <optional>
24 #include <set>
25 
26 #include "perfetto/ext/base/paged_memory.h"
27 #include "perfetto/ext/base/scoped_file.h"
28 #include "perfetto/ext/base/utils.h"
29 #include "perfetto/ext/traced/data_source_types.h"
30 #include "perfetto/ext/tracing/core/trace_writer.h"
31 #include "perfetto/protozero/message.h"
32 #include "perfetto/protozero/message_handle.h"
33 #include "src/traced/probes/ftrace/compact_sched.h"
34 #include "src/traced/probes/ftrace/ftrace_metadata.h"
35 
36 #include "protos/perfetto/trace/trace_packet.pbzero.h"
37 
38 namespace perfetto {
39 
40 class FtraceDataSource;
41 class LazyKernelSymbolizer;
42 class ProtoTranslationTable;
43 struct FtraceClockSnapshot;
44 struct FtraceDataSourceConfig;
45 
46 namespace protos {
47 namespace pbzero {
48 class FtraceEventBundle;
49 enum FtraceClock : int32_t;
50 enum FtraceParseStatus : int32_t;
51 }  // namespace pbzero
52 }  // namespace protos
53 
54 // Reads raw ftrace data for a cpu, parses it, and writes it into the perfetto
55 // tracing buffers.
56 class CpuReader {
57  public:
58   // Buffers used when parsing a chunk of ftrace data, allocated by
59   // FtraceController and repeatedly reused by all CpuReaders:
60   // * paged memory into which we read raw ftrace data.
61   // * buffers to accumulate and emit scheduling data in a structure-of-arrays
62   //   format (packed proto fields).
63   class ParsingBuffers {
64    public:
AllocateIfNeeded()65     void AllocateIfNeeded() {
66       // PagedMemory stays valid as long as it was allocated once.
67       if (!ftrace_data_.IsValid()) {
68         ftrace_data_ = base::PagedMemory::Allocate(base::GetSysPageSize() *
69                                                    kFtraceDataBufSizePages);
70       }
71       // Heap-allocated buffer gets freed and reallocated.
72       if (!compact_sched_) {
73         compact_sched_ = std::make_unique<CompactSchedBuffer>();
74       }
75     }
76 
Release()77     void Release() {
78       if (ftrace_data_.IsValid()) {
79         ftrace_data_.AdviseDontNeed(ftrace_data_.Get(), ftrace_data_.size());
80       }
81       compact_sched_.reset();
82     }
83 
84    private:
85     friend class CpuReader;
86     // When reading and parsing data for a particular cpu, we do it in batches
87     // of this many pages. In other words, we'll read up to
88     // |kFtraceDataBufSizePages| into memory, parse them, and then repeat if we
89     // still haven't caught up to the writer.
90     static constexpr size_t kFtraceDataBufSizePages = 32;
91 
ftrace_data_buf()92     uint8_t* ftrace_data_buf() const {
93       return reinterpret_cast<uint8_t*>(ftrace_data_.Get());
94     }
ftrace_data_buf_pages()95     size_t ftrace_data_buf_pages() const {
96       PERFETTO_DCHECK(ftrace_data_.size() ==
97                       base::GetSysPageSize() * kFtraceDataBufSizePages);
98       return kFtraceDataBufSizePages;
99     }
compact_sched_buf()100     CompactSchedBuffer* compact_sched_buf() const {
101       return compact_sched_.get();
102     }
103 
104     base::PagedMemory ftrace_data_;
105     std::unique_ptr<CompactSchedBuffer> compact_sched_;
106   };
107 
108   // Facilitates lazy proto writing - not every event in the kernel ring buffer
109   // is serialised in the trace, so this class allows for trace packets to be
110   // written only if there's at least one relevant event in the ring buffer
111   // batch. Public for testing.
112   class Bundler {
113    public:
Bundler(TraceWriter * trace_writer,FtraceMetadata * metadata,LazyKernelSymbolizer * symbolizer,size_t cpu,const FtraceClockSnapshot * ftrace_clock_snapshot,protos::pbzero::FtraceClock ftrace_clock,CompactSchedBuffer * compact_sched_buf,bool compact_sched_enabled,uint64_t previous_bundle_end_ts)114     Bundler(TraceWriter* trace_writer,
115             FtraceMetadata* metadata,
116             LazyKernelSymbolizer* symbolizer,
117             size_t cpu,
118             const FtraceClockSnapshot* ftrace_clock_snapshot,
119             protos::pbzero::FtraceClock ftrace_clock,
120             CompactSchedBuffer* compact_sched_buf,
121             bool compact_sched_enabled,
122             uint64_t previous_bundle_end_ts)
123         : trace_writer_(trace_writer),
124           metadata_(metadata),
125           symbolizer_(symbolizer),
126           cpu_(cpu),
127           ftrace_clock_snapshot_(ftrace_clock_snapshot),
128           ftrace_clock_(ftrace_clock),
129           compact_sched_enabled_(compact_sched_enabled),
130           compact_sched_buf_(compact_sched_buf),
131           initial_previous_bundle_end_ts_(previous_bundle_end_ts) {
132       if (compact_sched_enabled_)
133         compact_sched_buf_->Reset();
134     }
135 
~Bundler()136     ~Bundler() { FinalizeAndRunSymbolizer(); }
137 
GetOrCreateBundle()138     protos::pbzero::FtraceEventBundle* GetOrCreateBundle() {
139       if (!bundle_) {
140         StartNewPacket(false, initial_previous_bundle_end_ts_);
141       }
142       return bundle_;
143     }
144 
145     // Forces the creation of a new TracePacket.
146     void StartNewPacket(bool lost_events,
147                         uint64_t previous_bundle_end_timestamp);
148 
149     // This function is called after the contents of a FtraceBundle are written.
150     void FinalizeAndRunSymbolizer();
151 
compact_sched_buf()152     CompactSchedBuffer* compact_sched_buf() {
153       // FinalizeAndRunSymbolizer will only process the compact_sched_buf_ if
154       // there is an open bundle.
155       GetOrCreateBundle();
156       return compact_sched_buf_;
157     }
158 
159    private:
160     TraceWriter* const trace_writer_;         // Never nullptr.
161     FtraceMetadata* const metadata_;          // Never nullptr.
162     LazyKernelSymbolizer* const symbolizer_;  // Can be nullptr.
163     const size_t cpu_;
164     const FtraceClockSnapshot* const ftrace_clock_snapshot_;
165     protos::pbzero::FtraceClock const ftrace_clock_;
166     const bool compact_sched_enabled_;
167     CompactSchedBuffer* const compact_sched_buf_;
168     uint64_t initial_previous_bundle_end_ts_;
169 
170     TraceWriter::TracePacketHandle packet_;
171     protos::pbzero::FtraceEventBundle* bundle_ = nullptr;
172   };
173 
174   struct PageHeader {
175     uint64_t timestamp;
176     uint64_t size;
177     bool lost_events;
178   };
179 
180   CpuReader(size_t cpu,
181             base::ScopedFile trace_fd,
182             const ProtoTranslationTable* table,
183             LazyKernelSymbolizer* symbolizer,
184             protos::pbzero::FtraceClock ftrace_clock,
185             const FtraceClockSnapshot* ftrace_clock_snapshot);
186   ~CpuReader();
187 
188   // move-only
189   CpuReader(const CpuReader&) = delete;
190   CpuReader& operator=(const CpuReader&) = delete;
191   CpuReader(CpuReader&&) = default;
192   CpuReader& operator=(CpuReader&&) = default;
193 
194   // Reads and parses all ftrace data for this cpu (in batches), until we catch
195   // up to the writer, or hit |max_pages|. Returns number of pages read.
196   size_t ReadCycle(ParsingBuffers* parsing_bufs,
197                    size_t max_pages,
198                    const std::set<FtraceDataSource*>& started_data_sources);
199 
200   template <typename T>
ReadAndAdvance(const uint8_t ** ptr,const uint8_t * end,T * out)201   static bool ReadAndAdvance(const uint8_t** ptr, const uint8_t* end, T* out) {
202     if (*ptr > end - sizeof(T))
203       return false;
204     memcpy(reinterpret_cast<void*>(out), reinterpret_cast<const void*>(*ptr),
205            sizeof(T));
206     *ptr += sizeof(T);
207     return true;
208   }
209 
210   // Caller must do the bounds check:
211   // [start + offset, start + offset + sizeof(T))
212   // Returns the raw value not the varint.
213   template <typename T>
ReadIntoVarInt(const uint8_t * start,uint32_t field_id,protozero::Message * out)214   static T ReadIntoVarInt(const uint8_t* start,
215                           uint32_t field_id,
216                           protozero::Message* out) {
217     T t;
218     memcpy(&t, reinterpret_cast<const void*>(start), sizeof(T));
219     out->AppendVarInt<T>(field_id, t);
220     return t;
221   }
222 
223   template <typename T>
ReadInode(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)224   static void ReadInode(const uint8_t* start,
225                         uint32_t field_id,
226                         protozero::Message* out,
227                         FtraceMetadata* metadata) {
228     T t = ReadIntoVarInt<T>(start, field_id, out);
229     metadata->AddInode(static_cast<Inode>(t));
230   }
231 
232   template <typename T>
ReadDevId(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)233   static void ReadDevId(const uint8_t* start,
234                         uint32_t field_id,
235                         protozero::Message* out,
236                         FtraceMetadata* metadata) {
237     T t;
238     memcpy(&t, reinterpret_cast<const void*>(start), sizeof(T));
239     BlockDeviceID dev_id = TranslateBlockDeviceIDToUserspace<T>(t);
240     out->AppendVarInt<BlockDeviceID>(field_id, dev_id);
241     metadata->AddDevice(dev_id);
242   }
243 
244   template <typename T>
ReadSymbolAddr(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)245   static void ReadSymbolAddr(const uint8_t* start,
246                              uint32_t field_id,
247                              protozero::Message* out,
248                              FtraceMetadata* metadata) {
249     // ReadSymbolAddr is a bit special. In order to not disclose KASLR layout
250     // via traces, we put in the trace only a mangled address (which really is
251     // the insertion order into metadata.kernel_addrs). We don't care about the
252     // actual symbol addesses. We just need to match that against the symbol
253     // name in the names in the FtraceEventBundle.KernelSymbols.
254     T full_addr;
255     memcpy(&full_addr, reinterpret_cast<const void*>(start), sizeof(T));
256     uint32_t interned_index = metadata->AddSymbolAddr(full_addr);
257     out->AppendVarInt(field_id, interned_index);
258   }
259 
ReadPid(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)260   static void ReadPid(const uint8_t* start,
261                       uint32_t field_id,
262                       protozero::Message* out,
263                       FtraceMetadata* metadata) {
264     int32_t pid = ReadIntoVarInt<int32_t>(start, field_id, out);
265     metadata->AddPid(pid);
266   }
267 
ReadCommonPid(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)268   static void ReadCommonPid(const uint8_t* start,
269                             uint32_t field_id,
270                             protozero::Message* out,
271                             FtraceMetadata* metadata) {
272     int32_t pid = ReadIntoVarInt<int32_t>(start, field_id, out);
273     metadata->AddCommonPid(pid);
274   }
275 
276   // Internally the kernel stores device ids in a different layout to that
277   // exposed to userspace via stat etc. There's no userspace function to convert
278   // between the formats so we have to do it ourselves.
279   template <typename T>
TranslateBlockDeviceIDToUserspace(T kernel_dev)280   static BlockDeviceID TranslateBlockDeviceIDToUserspace(T kernel_dev) {
281     // Provided search index s_dev from
282     // https://github.com/torvalds/linux/blob/v4.12/include/linux/fs.h#L404
283     // Convert to user space id using
284     // https://github.com/torvalds/linux/blob/v4.12/include/linux/kdev_t.h#L10
285     // TODO(azappone): see if this is the same on all platforms
286     uint64_t maj = static_cast<uint64_t>(kernel_dev) >> 20;
287     uint64_t min = static_cast<uint64_t>(kernel_dev) & ((1U << 20) - 1);
288     return static_cast<BlockDeviceID>(  // From makedev()
289         ((maj & 0xfffff000ULL) << 32) | ((maj & 0xfffULL) << 8) |
290         ((min & 0xffffff00ULL) << 12) | ((min & 0xffULL)));
291   }
292 
293   // Returns a parsed representation of the given raw ftrace page's header.
294   static std::optional<CpuReader::PageHeader> ParsePageHeader(
295       const uint8_t** ptr,
296       uint16_t page_header_size_len);
297 
298   // Parse the payload of a raw ftrace page, and write the events as protos
299   // into the provided bundle (and/or compact buffer).
300   // |table| contains the mix of compile time (e.g. proto field ids) and
301   // run time (e.g. field offset and size) information necessary to do this.
302   // The table is initialized once at start time by the ftrace controller
303   // which passes it to the CpuReader which passes it here.
304   // The caller is responsible for validating that the page_header->size stays
305   // within the current page.
306   static protos::pbzero::FtraceParseStatus ParsePagePayload(
307       const uint8_t* start_of_payload,
308       const PageHeader* page_header,
309       const ProtoTranslationTable* table,
310       const FtraceDataSourceConfig* ds_config,
311       Bundler* bundler,
312       FtraceMetadata* metadata,
313       uint64_t* bundle_end_timestamp);
314 
315   // Parse a single raw ftrace event beginning at |start| and ending at |end|
316   // and write it into the provided bundle as a proto.
317   // |table| contains the mix of compile time (e.g. proto field ids) and
318   // run time (e.g. field offset and size) information necessary to do this.
319   // The table is initialized once at start time by the ftrace controller
320   // which passes it to the CpuReader which passes it to ParsePage which
321   // passes it here.
322   static bool ParseEvent(uint16_t ftrace_event_id,
323                          const uint8_t* start,
324                          const uint8_t* end,
325                          const ProtoTranslationTable* table,
326                          const FtraceDataSourceConfig* ds_config,
327                          protozero::Message* message,
328                          FtraceMetadata* metadata);
329 
330   static bool ParseField(const Field& field,
331                          const uint8_t* start,
332                          const uint8_t* end,
333                          const ProtoTranslationTable* table,
334                          protozero::Message* message,
335                          FtraceMetadata* metadata);
336 
337   // Parse a sys_enter event according to the pre-validated expected format
338   static bool ParseSysEnter(const Event& info,
339                             const uint8_t* start,
340                             const uint8_t* end,
341                             protozero::Message* message,
342                             FtraceMetadata* metadata);
343 
344   // Parse a sys_exit event according to the pre-validated expected format
345   static bool ParseSysExit(const Event& info,
346                            const uint8_t* start,
347                            const uint8_t* end,
348                            const FtraceDataSourceConfig* ds_config,
349                            protozero::Message* message,
350                            FtraceMetadata* metadata);
351 
352   // Parse a sched_switch event according to pre-validated format, and buffer
353   // the individual fields in the given compact encoding batch.
354   static void ParseSchedSwitchCompact(const uint8_t* start,
355                                       uint64_t timestamp,
356                                       const CompactSchedSwitchFormat* format,
357                                       CompactSchedBuffer* compact_buf,
358                                       FtraceMetadata* metadata);
359 
360   // Parse a sched_waking event according to pre-validated format, and buffer
361   // the individual fields in the given compact encoding batch.
362   static void ParseSchedWakingCompact(const uint8_t* start,
363                                       uint64_t timestamp,
364                                       const CompactSchedWakingFormat* format,
365                                       CompactSchedBuffer* compact_buf,
366                                       FtraceMetadata* metadata);
367 
368   // Parses & encodes the given range of contiguous tracing pages. Called by
369   // |ReadAndProcessBatch| for each active data source.
370   //
371   // Returns true if all pages were parsed correctly. In case of parsing
372   // errors, they will be recorded in the FtraceEventBundle proto.
373   //
374   // public and static for testing
375   static bool ProcessPagesForDataSource(
376       TraceWriter* trace_writer,
377       FtraceMetadata* metadata,
378       size_t cpu,
379       const FtraceDataSourceConfig* ds_config,
380       base::FlatSet<protos::pbzero::FtraceParseStatus>* parse_errors,
381       uint64_t* bundle_end_timestamp,
382       const uint8_t* parsing_buf,
383       size_t pages_read,
384       CompactSchedBuffer* compact_sched_buf,
385       const ProtoTranslationTable* table,
386       LazyKernelSymbolizer* symbolizer,
387       const FtraceClockSnapshot* ftrace_clock_snapshot,
388       protos::pbzero::FtraceClock ftrace_clock);
389 
390   // For FtraceController, which manages poll callbacks on per-cpu buffer fds.
RawBufferFd()391   int RawBufferFd() const { return trace_fd_.get(); }
392 
393  private:
394   // Reads at most |max_pages| of ftrace data, parses it, and writes it
395   // into |started_data_sources|. Returns number of pages read.
396   // See comment on ftrace_controller.cc:kMaxParsingWorkingSetPages for
397   // rationale behind the batching.
398   size_t ReadAndProcessBatch(
399       uint8_t* parsing_buf,
400       size_t max_pages,
401       bool first_batch_in_cycle,
402       CompactSchedBuffer* compact_sched_buf,
403       const std::set<FtraceDataSource*>& started_data_sources);
404 
405   size_t cpu_;
406   const ProtoTranslationTable* table_;
407   LazyKernelSymbolizer* symbolizer_;
408   base::ScopedFile trace_fd_;
409   protos::pbzero::FtraceClock ftrace_clock_{};
410   const FtraceClockSnapshot* ftrace_clock_snapshot_;
411 };
412 
413 }  // namespace perfetto
414 
415 #endif  // SRC_TRACED_PROBES_FTRACE_CPU_READER_H_
416