1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_TRACED_PROBES_FTRACE_CPU_READER_H_ 18 #define SRC_TRACED_PROBES_FTRACE_CPU_READER_H_ 19 20 #include <string.h> 21 #include <cstdint> 22 23 #include <optional> 24 #include <set> 25 26 #include "perfetto/ext/base/paged_memory.h" 27 #include "perfetto/ext/base/scoped_file.h" 28 #include "perfetto/ext/base/utils.h" 29 #include "perfetto/ext/traced/data_source_types.h" 30 #include "perfetto/ext/tracing/core/trace_writer.h" 31 #include "perfetto/protozero/message.h" 32 #include "perfetto/protozero/message_handle.h" 33 #include "src/traced/probes/ftrace/compact_sched.h" 34 #include "src/traced/probes/ftrace/ftrace_metadata.h" 35 36 #include "protos/perfetto/trace/trace_packet.pbzero.h" 37 38 namespace perfetto { 39 40 class FtraceDataSource; 41 class LazyKernelSymbolizer; 42 class ProtoTranslationTable; 43 struct FtraceClockSnapshot; 44 struct FtraceDataSourceConfig; 45 46 namespace protos { 47 namespace pbzero { 48 class FtraceEventBundle; 49 enum FtraceClock : int32_t; 50 enum FtraceParseStatus : int32_t; 51 } // namespace pbzero 52 } // namespace protos 53 54 // Reads raw ftrace data for a cpu, parses it, and writes it into the perfetto 55 // tracing buffers. 56 class CpuReader { 57 public: 58 // Buffers used when parsing a chunk of ftrace data, allocated by 59 // FtraceController and repeatedly reused by all CpuReaders: 60 // * paged memory into which we read raw ftrace data. 61 // * buffers to accumulate and emit scheduling data in a structure-of-arrays 62 // format (packed proto fields). 63 class ParsingBuffers { 64 public: AllocateIfNeeded()65 void AllocateIfNeeded() { 66 // PagedMemory stays valid as long as it was allocated once. 67 if (!ftrace_data_.IsValid()) { 68 ftrace_data_ = base::PagedMemory::Allocate(base::GetSysPageSize() * 69 kFtraceDataBufSizePages); 70 } 71 // Heap-allocated buffer gets freed and reallocated. 72 if (!compact_sched_) { 73 compact_sched_ = std::make_unique<CompactSchedBuffer>(); 74 } 75 } 76 Release()77 void Release() { 78 if (ftrace_data_.IsValid()) { 79 ftrace_data_.AdviseDontNeed(ftrace_data_.Get(), ftrace_data_.size()); 80 } 81 compact_sched_.reset(); 82 } 83 84 private: 85 friend class CpuReader; 86 // When reading and parsing data for a particular cpu, we do it in batches 87 // of this many pages. In other words, we'll read up to 88 // |kFtraceDataBufSizePages| into memory, parse them, and then repeat if we 89 // still haven't caught up to the writer. 90 static constexpr size_t kFtraceDataBufSizePages = 32; 91 ftrace_data_buf()92 uint8_t* ftrace_data_buf() const { 93 return reinterpret_cast<uint8_t*>(ftrace_data_.Get()); 94 } ftrace_data_buf_pages()95 size_t ftrace_data_buf_pages() const { 96 PERFETTO_DCHECK(ftrace_data_.size() == 97 base::GetSysPageSize() * kFtraceDataBufSizePages); 98 return kFtraceDataBufSizePages; 99 } compact_sched_buf()100 CompactSchedBuffer* compact_sched_buf() const { 101 return compact_sched_.get(); 102 } 103 104 base::PagedMemory ftrace_data_; 105 std::unique_ptr<CompactSchedBuffer> compact_sched_; 106 }; 107 108 // Facilitates lazy proto writing - not every event in the kernel ring buffer 109 // is serialised in the trace, so this class allows for trace packets to be 110 // written only if there's at least one relevant event in the ring buffer 111 // batch. Public for testing. 112 class Bundler { 113 public: Bundler(TraceWriter * trace_writer,FtraceMetadata * metadata,LazyKernelSymbolizer * symbolizer,size_t cpu,const FtraceClockSnapshot * ftrace_clock_snapshot,protos::pbzero::FtraceClock ftrace_clock,CompactSchedBuffer * compact_sched_buf,bool compact_sched_enabled,uint64_t previous_bundle_end_ts)114 Bundler(TraceWriter* trace_writer, 115 FtraceMetadata* metadata, 116 LazyKernelSymbolizer* symbolizer, 117 size_t cpu, 118 const FtraceClockSnapshot* ftrace_clock_snapshot, 119 protos::pbzero::FtraceClock ftrace_clock, 120 CompactSchedBuffer* compact_sched_buf, 121 bool compact_sched_enabled, 122 uint64_t previous_bundle_end_ts) 123 : trace_writer_(trace_writer), 124 metadata_(metadata), 125 symbolizer_(symbolizer), 126 cpu_(cpu), 127 ftrace_clock_snapshot_(ftrace_clock_snapshot), 128 ftrace_clock_(ftrace_clock), 129 compact_sched_enabled_(compact_sched_enabled), 130 compact_sched_buf_(compact_sched_buf), 131 initial_previous_bundle_end_ts_(previous_bundle_end_ts) { 132 if (compact_sched_enabled_) 133 compact_sched_buf_->Reset(); 134 } 135 ~Bundler()136 ~Bundler() { FinalizeAndRunSymbolizer(); } 137 GetOrCreateBundle()138 protos::pbzero::FtraceEventBundle* GetOrCreateBundle() { 139 if (!bundle_) { 140 StartNewPacket(false, initial_previous_bundle_end_ts_); 141 } 142 return bundle_; 143 } 144 145 // Forces the creation of a new TracePacket. 146 void StartNewPacket(bool lost_events, 147 uint64_t previous_bundle_end_timestamp); 148 149 // This function is called after the contents of a FtraceBundle are written. 150 void FinalizeAndRunSymbolizer(); 151 compact_sched_buf()152 CompactSchedBuffer* compact_sched_buf() { 153 // FinalizeAndRunSymbolizer will only process the compact_sched_buf_ if 154 // there is an open bundle. 155 GetOrCreateBundle(); 156 return compact_sched_buf_; 157 } 158 159 private: 160 TraceWriter* const trace_writer_; // Never nullptr. 161 FtraceMetadata* const metadata_; // Never nullptr. 162 LazyKernelSymbolizer* const symbolizer_; // Can be nullptr. 163 const size_t cpu_; 164 const FtraceClockSnapshot* const ftrace_clock_snapshot_; 165 protos::pbzero::FtraceClock const ftrace_clock_; 166 const bool compact_sched_enabled_; 167 CompactSchedBuffer* const compact_sched_buf_; 168 uint64_t initial_previous_bundle_end_ts_; 169 170 TraceWriter::TracePacketHandle packet_; 171 protos::pbzero::FtraceEventBundle* bundle_ = nullptr; 172 }; 173 174 struct PageHeader { 175 uint64_t timestamp; 176 uint64_t size; 177 bool lost_events; 178 }; 179 180 CpuReader(size_t cpu, 181 base::ScopedFile trace_fd, 182 const ProtoTranslationTable* table, 183 LazyKernelSymbolizer* symbolizer, 184 protos::pbzero::FtraceClock ftrace_clock, 185 const FtraceClockSnapshot* ftrace_clock_snapshot); 186 ~CpuReader(); 187 188 // move-only 189 CpuReader(const CpuReader&) = delete; 190 CpuReader& operator=(const CpuReader&) = delete; 191 CpuReader(CpuReader&&) = default; 192 CpuReader& operator=(CpuReader&&) = default; 193 194 // Reads and parses all ftrace data for this cpu (in batches), until we catch 195 // up to the writer, or hit |max_pages|. Returns number of pages read. 196 size_t ReadCycle(ParsingBuffers* parsing_bufs, 197 size_t max_pages, 198 const std::set<FtraceDataSource*>& started_data_sources); 199 200 template <typename T> ReadAndAdvance(const uint8_t ** ptr,const uint8_t * end,T * out)201 static bool ReadAndAdvance(const uint8_t** ptr, const uint8_t* end, T* out) { 202 if (*ptr > end - sizeof(T)) 203 return false; 204 memcpy(reinterpret_cast<void*>(out), reinterpret_cast<const void*>(*ptr), 205 sizeof(T)); 206 *ptr += sizeof(T); 207 return true; 208 } 209 210 // Caller must do the bounds check: 211 // [start + offset, start + offset + sizeof(T)) 212 // Returns the raw value not the varint. 213 template <typename T> ReadIntoVarInt(const uint8_t * start,uint32_t field_id,protozero::Message * out)214 static T ReadIntoVarInt(const uint8_t* start, 215 uint32_t field_id, 216 protozero::Message* out) { 217 T t; 218 memcpy(&t, reinterpret_cast<const void*>(start), sizeof(T)); 219 out->AppendVarInt<T>(field_id, t); 220 return t; 221 } 222 223 template <typename T> ReadInode(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)224 static void ReadInode(const uint8_t* start, 225 uint32_t field_id, 226 protozero::Message* out, 227 FtraceMetadata* metadata) { 228 T t = ReadIntoVarInt<T>(start, field_id, out); 229 metadata->AddInode(static_cast<Inode>(t)); 230 } 231 232 template <typename T> ReadDevId(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)233 static void ReadDevId(const uint8_t* start, 234 uint32_t field_id, 235 protozero::Message* out, 236 FtraceMetadata* metadata) { 237 T t; 238 memcpy(&t, reinterpret_cast<const void*>(start), sizeof(T)); 239 BlockDeviceID dev_id = TranslateBlockDeviceIDToUserspace<T>(t); 240 out->AppendVarInt<BlockDeviceID>(field_id, dev_id); 241 metadata->AddDevice(dev_id); 242 } 243 244 template <typename T> ReadSymbolAddr(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)245 static void ReadSymbolAddr(const uint8_t* start, 246 uint32_t field_id, 247 protozero::Message* out, 248 FtraceMetadata* metadata) { 249 // ReadSymbolAddr is a bit special. In order to not disclose KASLR layout 250 // via traces, we put in the trace only a mangled address (which really is 251 // the insertion order into metadata.kernel_addrs). We don't care about the 252 // actual symbol addesses. We just need to match that against the symbol 253 // name in the names in the FtraceEventBundle.KernelSymbols. 254 T full_addr; 255 memcpy(&full_addr, reinterpret_cast<const void*>(start), sizeof(T)); 256 uint32_t interned_index = metadata->AddSymbolAddr(full_addr); 257 out->AppendVarInt(field_id, interned_index); 258 } 259 ReadPid(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)260 static void ReadPid(const uint8_t* start, 261 uint32_t field_id, 262 protozero::Message* out, 263 FtraceMetadata* metadata) { 264 int32_t pid = ReadIntoVarInt<int32_t>(start, field_id, out); 265 metadata->AddPid(pid); 266 } 267 ReadCommonPid(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)268 static void ReadCommonPid(const uint8_t* start, 269 uint32_t field_id, 270 protozero::Message* out, 271 FtraceMetadata* metadata) { 272 int32_t pid = ReadIntoVarInt<int32_t>(start, field_id, out); 273 metadata->AddCommonPid(pid); 274 } 275 276 // Internally the kernel stores device ids in a different layout to that 277 // exposed to userspace via stat etc. There's no userspace function to convert 278 // between the formats so we have to do it ourselves. 279 template <typename T> TranslateBlockDeviceIDToUserspace(T kernel_dev)280 static BlockDeviceID TranslateBlockDeviceIDToUserspace(T kernel_dev) { 281 // Provided search index s_dev from 282 // https://github.com/torvalds/linux/blob/v4.12/include/linux/fs.h#L404 283 // Convert to user space id using 284 // https://github.com/torvalds/linux/blob/v4.12/include/linux/kdev_t.h#L10 285 // TODO(azappone): see if this is the same on all platforms 286 uint64_t maj = static_cast<uint64_t>(kernel_dev) >> 20; 287 uint64_t min = static_cast<uint64_t>(kernel_dev) & ((1U << 20) - 1); 288 return static_cast<BlockDeviceID>( // From makedev() 289 ((maj & 0xfffff000ULL) << 32) | ((maj & 0xfffULL) << 8) | 290 ((min & 0xffffff00ULL) << 12) | ((min & 0xffULL))); 291 } 292 293 // Returns a parsed representation of the given raw ftrace page's header. 294 static std::optional<CpuReader::PageHeader> ParsePageHeader( 295 const uint8_t** ptr, 296 uint16_t page_header_size_len); 297 298 // Parse the payload of a raw ftrace page, and write the events as protos 299 // into the provided bundle (and/or compact buffer). 300 // |table| contains the mix of compile time (e.g. proto field ids) and 301 // run time (e.g. field offset and size) information necessary to do this. 302 // The table is initialized once at start time by the ftrace controller 303 // which passes it to the CpuReader which passes it here. 304 // The caller is responsible for validating that the page_header->size stays 305 // within the current page. 306 static protos::pbzero::FtraceParseStatus ParsePagePayload( 307 const uint8_t* start_of_payload, 308 const PageHeader* page_header, 309 const ProtoTranslationTable* table, 310 const FtraceDataSourceConfig* ds_config, 311 Bundler* bundler, 312 FtraceMetadata* metadata, 313 uint64_t* bundle_end_timestamp); 314 315 // Parse a single raw ftrace event beginning at |start| and ending at |end| 316 // and write it into the provided bundle as a proto. 317 // |table| contains the mix of compile time (e.g. proto field ids) and 318 // run time (e.g. field offset and size) information necessary to do this. 319 // The table is initialized once at start time by the ftrace controller 320 // which passes it to the CpuReader which passes it to ParsePage which 321 // passes it here. 322 static bool ParseEvent(uint16_t ftrace_event_id, 323 const uint8_t* start, 324 const uint8_t* end, 325 const ProtoTranslationTable* table, 326 const FtraceDataSourceConfig* ds_config, 327 protozero::Message* message, 328 FtraceMetadata* metadata); 329 330 static bool ParseField(const Field& field, 331 const uint8_t* start, 332 const uint8_t* end, 333 const ProtoTranslationTable* table, 334 protozero::Message* message, 335 FtraceMetadata* metadata); 336 337 // Parse a sys_enter event according to the pre-validated expected format 338 static bool ParseSysEnter(const Event& info, 339 const uint8_t* start, 340 const uint8_t* end, 341 protozero::Message* message, 342 FtraceMetadata* metadata); 343 344 // Parse a sys_exit event according to the pre-validated expected format 345 static bool ParseSysExit(const Event& info, 346 const uint8_t* start, 347 const uint8_t* end, 348 const FtraceDataSourceConfig* ds_config, 349 protozero::Message* message, 350 FtraceMetadata* metadata); 351 352 // Parse a sched_switch event according to pre-validated format, and buffer 353 // the individual fields in the given compact encoding batch. 354 static void ParseSchedSwitchCompact(const uint8_t* start, 355 uint64_t timestamp, 356 const CompactSchedSwitchFormat* format, 357 CompactSchedBuffer* compact_buf, 358 FtraceMetadata* metadata); 359 360 // Parse a sched_waking event according to pre-validated format, and buffer 361 // the individual fields in the given compact encoding batch. 362 static void ParseSchedWakingCompact(const uint8_t* start, 363 uint64_t timestamp, 364 const CompactSchedWakingFormat* format, 365 CompactSchedBuffer* compact_buf, 366 FtraceMetadata* metadata); 367 368 // Parses & encodes the given range of contiguous tracing pages. Called by 369 // |ReadAndProcessBatch| for each active data source. 370 // 371 // Returns true if all pages were parsed correctly. In case of parsing 372 // errors, they will be recorded in the FtraceEventBundle proto. 373 // 374 // public and static for testing 375 static bool ProcessPagesForDataSource( 376 TraceWriter* trace_writer, 377 FtraceMetadata* metadata, 378 size_t cpu, 379 const FtraceDataSourceConfig* ds_config, 380 base::FlatSet<protos::pbzero::FtraceParseStatus>* parse_errors, 381 uint64_t* bundle_end_timestamp, 382 const uint8_t* parsing_buf, 383 size_t pages_read, 384 CompactSchedBuffer* compact_sched_buf, 385 const ProtoTranslationTable* table, 386 LazyKernelSymbolizer* symbolizer, 387 const FtraceClockSnapshot* ftrace_clock_snapshot, 388 protos::pbzero::FtraceClock ftrace_clock); 389 390 // For FtraceController, which manages poll callbacks on per-cpu buffer fds. RawBufferFd()391 int RawBufferFd() const { return trace_fd_.get(); } 392 393 private: 394 // Reads at most |max_pages| of ftrace data, parses it, and writes it 395 // into |started_data_sources|. Returns number of pages read. 396 // See comment on ftrace_controller.cc:kMaxParsingWorkingSetPages for 397 // rationale behind the batching. 398 size_t ReadAndProcessBatch( 399 uint8_t* parsing_buf, 400 size_t max_pages, 401 bool first_batch_in_cycle, 402 CompactSchedBuffer* compact_sched_buf, 403 const std::set<FtraceDataSource*>& started_data_sources); 404 405 size_t cpu_; 406 const ProtoTranslationTable* table_; 407 LazyKernelSymbolizer* symbolizer_; 408 base::ScopedFile trace_fd_; 409 protos::pbzero::FtraceClock ftrace_clock_{}; 410 const FtraceClockSnapshot* ftrace_clock_snapshot_; 411 }; 412 413 } // namespace perfetto 414 415 #endif // SRC_TRACED_PROBES_FTRACE_CPU_READER_H_ 416