xref: /aosp_15_r20/external/perfetto/src/trace_processor/importers/perf/perf_data_tokenizer.cc (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/importers/perf/perf_data_tokenizer.h"
18 
19 #include <algorithm>
20 #include <cinttypes>
21 #include <cstddef>
22 #include <cstdint>
23 #include <cstring>
24 #include <memory>
25 #include <optional>
26 #include <string>
27 #include <tuple>
28 #include <utility>
29 #include <vector>
30 
31 #include "perfetto/base/flat_set.h"
32 #include "perfetto/base/logging.h"
33 #include "perfetto/base/status.h"
34 #include "perfetto/ext/base/status_or.h"
35 #include "perfetto/public/compiler.h"
36 #include "perfetto/trace_processor/ref_counted.h"
37 #include "perfetto/trace_processor/trace_blob_view.h"
38 #include "protos/perfetto/common/builtin_clock.pbzero.h"
39 #include "protos/perfetto/trace/clock_snapshot.pbzero.h"
40 #include "protos/third_party/simpleperf/record_file.pbzero.h"
41 #include "src/trace_processor/importers/common/clock_tracker.h"
42 #include "src/trace_processor/importers/common/process_tracker.h"
43 #include "src/trace_processor/importers/common/slice_tracker.h"
44 #include "src/trace_processor/importers/perf/attrs_section_reader.h"
45 #include "src/trace_processor/importers/perf/aux_data_tokenizer.h"
46 #include "src/trace_processor/importers/perf/aux_record.h"
47 #include "src/trace_processor/importers/perf/aux_stream_manager.h"
48 #include "src/trace_processor/importers/perf/auxtrace_info_record.h"
49 #include "src/trace_processor/importers/perf/auxtrace_record.h"
50 #include "src/trace_processor/importers/perf/features.h"
51 #include "src/trace_processor/importers/perf/itrace_start_record.h"
52 #include "src/trace_processor/importers/perf/perf_event.h"
53 #include "src/trace_processor/importers/perf/perf_event_attr.h"
54 #include "src/trace_processor/importers/perf/perf_file.h"
55 #include "src/trace_processor/importers/perf/perf_session.h"
56 #include "src/trace_processor/importers/perf/perf_tracker.h"
57 #include "src/trace_processor/importers/perf/reader.h"
58 #include "src/trace_processor/importers/perf/record.h"
59 #include "src/trace_processor/importers/perf/sample_id.h"
60 #include "src/trace_processor/importers/proto/perf_sample_tracker.h"
61 #include "src/trace_processor/sorter/trace_sorter.h"
62 #include "src/trace_processor/storage/stats.h"
63 #include "src/trace_processor/util/build_id.h"
64 #include "src/trace_processor/util/status_macros.h"
65 #include "src/trace_processor/util/trace_blob_view_reader.h"
66 
67 namespace perfetto::trace_processor::perf_importer {
68 namespace {
69 
AddIds(uint8_t id_offset,uint64_t flags,base::FlatSet<uint8_t> & feature_ids)70 void AddIds(uint8_t id_offset,
71             uint64_t flags,
72             base::FlatSet<uint8_t>& feature_ids) {
73   for (size_t i = 0; i < sizeof(flags) * 8; ++i) {
74     if (flags & 1) {
75       feature_ids.insert(id_offset);
76     }
77     flags >>= 1;
78     ++id_offset;
79   }
80 }
81 
ExtractFeatureIds(const uint64_t & flags,const uint64_t (& flags1)[3])82 base::FlatSet<uint8_t> ExtractFeatureIds(const uint64_t& flags,
83                                          const uint64_t (&flags1)[3]) {
84   base::FlatSet<uint8_t> feature_ids;
85   AddIds(0, flags, feature_ids);
86   AddIds(64, flags1[0], feature_ids);
87   AddIds(128, flags1[1], feature_ids);
88   AddIds(192, flags1[2], feature_ids);
89   return feature_ids;
90 }
91 
ReadTime(const Record & record,std::optional<uint64_t> & time)92 bool ReadTime(const Record& record, std::optional<uint64_t>& time) {
93   if (!record.attr) {
94     time = std::nullopt;
95     return true;
96   }
97   Reader reader(record.payload.copy());
98   if (record.header.type != PERF_RECORD_SAMPLE) {
99     std::optional<size_t> offset = record.attr->time_offset_from_end();
100     if (!offset.has_value()) {
101       time = std::nullopt;
102       return true;
103     }
104     if (*offset > reader.size_left()) {
105       return false;
106     }
107     return reader.Skip(reader.size_left() - *offset) &&
108            reader.ReadOptional(time);
109   }
110 
111   std::optional<size_t> offset = record.attr->time_offset_from_start();
112   if (!offset.has_value()) {
113     time = std::nullopt;
114     return true;
115   }
116   return reader.Skip(*offset) && reader.ReadOptional(time);
117 }
118 
119 }  // namespace
120 
PerfDataTokenizer(TraceProcessorContext * ctx)121 PerfDataTokenizer::PerfDataTokenizer(TraceProcessorContext* ctx)
122     : context_(ctx), aux_manager_(ctx) {}
123 
124 PerfDataTokenizer::~PerfDataTokenizer() = default;
125 
126 // A normal perf.data consts of:
127 // [ header ]
128 // [ attr section ]
129 // [ data section ]
130 // [ optional feature sections ]
131 //
132 // Where each "attr" describes one event type recorded in the file.
133 //
134 // Most file format documentation is outdated or misleading, instead see
135 // perf_session__do_write_header() in linux/tools/perf/util/header.c.
Parse(TraceBlobView blob)136 base::Status PerfDataTokenizer::Parse(TraceBlobView blob) {
137   buffer_.PushBack(std::move(blob));
138 
139   base::StatusOr<ParsingResult> result = ParsingResult::kSuccess;
140   while (result.ok() && result.value() != ParsingResult::kMoreDataNeeded) {
141     switch (parsing_state_) {
142       case ParsingState::kParseHeader:
143         result = ParseHeader();
144         break;
145 
146       case ParsingState::kParseAttrs:
147         result = ParseAttrs();
148         break;
149 
150       case ParsingState::kSeekRecords:
151         result = SeekRecords();
152         break;
153 
154       case ParsingState::kParseRecords:
155         result = ParseRecords();
156         break;
157 
158       case ParsingState::kParseAuxtraceData:
159         result = ParseAuxtraceData();
160         break;
161 
162       case ParsingState::kParseFeatures:
163         result = ParseFeatures();
164         break;
165 
166       case ParsingState::kParseFeatureSections:
167         result = ParseFeatureSections();
168         break;
169 
170       case ParsingState::kDone:
171         if (!buffer_.empty()) {
172           return base::ErrStatus("Unexpected data, %zu", buffer_.avail());
173         }
174         return base::OkStatus();
175     }
176   }
177   return result.status();
178 }
179 
180 base::StatusOr<PerfDataTokenizer::ParsingResult>
ParseHeader()181 PerfDataTokenizer::ParseHeader() {
182   auto tbv = buffer_.SliceOff(0, sizeof(header_));
183   if (!tbv) {
184     return ParsingResult::kMoreDataNeeded;
185   }
186   PERFETTO_CHECK(Reader(std::move(*tbv)).Read(header_));
187 
188   // TODO: Check for endianess (big endian will have letters reversed);
189   if (memcmp(header_.magic, PerfFile::kPerfMagic,
190              sizeof(PerfFile::kPerfMagic)) != 0) {
191     return base::ErrStatus("Invalid magic string");
192   }
193 
194   if (header_.size != sizeof(PerfFile::Header)) {
195     return base::ErrStatus(
196         "Failed to perf file header size. Expected %zu"
197         ", found %" PRIu64,
198         sizeof(PerfFile::Header), header_.size);
199   }
200 
201   feature_ids_ = ExtractFeatureIds(header_.flags, header_.flags1);
202   feature_headers_section_ = {header_.data.end(),
203                               feature_ids_.size() * sizeof(PerfFile::Section)};
204   context_->clock_tracker->SetTraceTimeClock(
205       protos::pbzero::ClockSnapshot::Clock::MONOTONIC);
206 
207   PERFETTO_CHECK(buffer_.PopFrontUntil(sizeof(PerfFile::Header)));
208   parsing_state_ = ParsingState::kParseAttrs;
209   return ParsingResult::kSuccess;
210 }
211 
212 base::StatusOr<PerfDataTokenizer::ParsingResult>
ParseAttrs()213 PerfDataTokenizer::ParseAttrs() {
214   std::optional<TraceBlobView> tbv =
215       buffer_.SliceOff(header_.attrs.offset, header_.attrs.size);
216   if (!tbv) {
217     return ParsingResult::kMoreDataNeeded;
218   }
219 
220   ASSIGN_OR_RETURN(AttrsSectionReader attr_reader,
221                    AttrsSectionReader::Create(header_, std::move(*tbv)));
222 
223   PerfSession::Builder builder(context_);
224   while (attr_reader.CanReadNext()) {
225     PerfFile::AttrsEntry entry;
226     RETURN_IF_ERROR(attr_reader.ReadNext(entry));
227 
228     if (entry.ids.size % sizeof(uint64_t) != 0) {
229       return base::ErrStatus("Invalid id section size: %" PRIu64,
230                              entry.ids.size);
231     }
232 
233     tbv = buffer_.SliceOff(entry.ids.offset, entry.ids.size);
234     if (!tbv) {
235       return ParsingResult::kMoreDataNeeded;
236     }
237 
238     std::vector<uint64_t> ids(entry.ids.size / sizeof(uint64_t));
239     PERFETTO_CHECK(Reader(std::move(*tbv)).ReadVector(ids));
240     builder.AddAttrAndIds(entry.attr, std::move(ids));
241   }
242 
243   ASSIGN_OR_RETURN(perf_session_, builder.Build());
244   if (perf_session_->HasPerfClock()) {
245     context_->clock_tracker->SetTraceTimeClock(
246         protos::pbzero::BUILTIN_CLOCK_PERF);
247   }
248   parsing_state_ = ParsingState::kSeekRecords;
249   return ParsingResult::kSuccess;
250 }
251 
252 base::StatusOr<PerfDataTokenizer::ParsingResult>
SeekRecords()253 PerfDataTokenizer::SeekRecords() {
254   if (!buffer_.PopFrontUntil(header_.data.offset)) {
255     return ParsingResult::kMoreDataNeeded;
256   }
257   parsing_state_ = ParsingState::kParseRecords;
258   return ParsingResult::kSuccess;
259 }
260 
261 base::StatusOr<PerfDataTokenizer::ParsingResult>
ParseRecords()262 PerfDataTokenizer::ParseRecords() {
263   while (buffer_.start_offset() < header_.data.end()) {
264     Record record;
265 
266     if (auto res = ParseRecord(record);
267         !res.ok() || *res != ParsingResult::kSuccess) {
268       return res;
269     }
270 
271     if (record.header.type == PERF_RECORD_AUXTRACE) {
272       PERFETTO_CHECK(!current_auxtrace_.has_value());
273       current_auxtrace_.emplace();
274       RETURN_IF_ERROR(current_auxtrace_->Parse(record));
275       parsing_state_ = ParsingState::kParseAuxtraceData;
276       return ParsingResult::kSuccess;
277     }
278 
279     RETURN_IF_ERROR(ProcessRecord(std::move(record)));
280   }
281 
282   RETURN_IF_ERROR(aux_manager_.FinalizeStreams());
283 
284   parsing_state_ = ParsingState::kParseFeatureSections;
285   return ParsingResult::kSuccess;
286 }
287 
ProcessRecord(Record record)288 base::Status PerfDataTokenizer::ProcessRecord(Record record) {
289   const uint32_t type = record.header.type;
290   switch (type) {
291     case PERF_RECORD_AUXTRACE:
292       PERFETTO_FATAL("Unreachable");
293 
294     case PERF_RECORD_AUXTRACE_INFO:
295       return ProcessAuxtraceInfoRecord(std::move(record));
296 
297     case PERF_RECORD_AUX:
298       return ProcessAuxRecord(std::move(record));
299 
300     case PERF_RECORD_TIME_CONV:
301       return ProcessTimeConvRecord(std::move(record));
302 
303     case PERF_RECORD_ITRACE_START:
304       return ProcessItraceStartRecord(std::move(record));
305 
306     default:
307       MaybePushRecord(std::move(record));
308       return base::OkStatus();
309   }
310 }
311 
ParseRecord(Record & record)312 base::StatusOr<PerfDataTokenizer::ParsingResult> PerfDataTokenizer::ParseRecord(
313     Record& record) {
314   record.session = perf_session_;
315   std::optional<TraceBlobView> tbv =
316       buffer_.SliceOff(buffer_.start_offset(), sizeof(record.header));
317   if (!tbv) {
318     return ParsingResult::kMoreDataNeeded;
319   }
320   PERFETTO_CHECK(Reader(std::move(*tbv)).Read(record.header));
321 
322   if (record.header.size < sizeof(record.header)) {
323     return base::ErrStatus("Invalid record size: %" PRIu16, record.header.size);
324   }
325 
326   tbv = buffer_.SliceOff(buffer_.start_offset() + sizeof(record.header),
327                          record.header.size - sizeof(record.header));
328   if (!tbv) {
329     return ParsingResult::kMoreDataNeeded;
330   }
331 
332   record.payload = std::move(*tbv);
333 
334   base::StatusOr<RefPtr<PerfEventAttr>> attr =
335       perf_session_->FindAttrForRecord(record.header, record.payload);
336   if (!attr.ok()) {
337     return base::ErrStatus("Unable to determine perf_event_attr for record. %s",
338                            attr.status().c_message());
339   }
340   record.attr = *attr;
341 
342   buffer_.PopFrontBytes(record.header.size);
343   return ParsingResult::kSuccess;
344 }
345 
ExtractTraceTimestamp(const Record & record)346 base::StatusOr<int64_t> PerfDataTokenizer::ExtractTraceTimestamp(
347     const Record& record) {
348   std::optional<uint64_t> time;
349   if (!ReadTime(record, time)) {
350     return base::ErrStatus("Failed to read time");
351   }
352 
353   base::StatusOr<int64_t> trace_ts =
354       time.has_value()
355           ? context_->clock_tracker->ToTraceTime(record.attr->clock_id(),
356                                                  static_cast<int64_t>(*time))
357           : std::min(latest_timestamp_, context_->sorter->max_timestamp());
358 
359   if (PERFETTO_LIKELY(trace_ts.ok())) {
360     latest_timestamp_ = std::max(latest_timestamp_, *trace_ts);
361   }
362 
363   return trace_ts;
364 }
MaybePushRecord(Record record)365 void PerfDataTokenizer::MaybePushRecord(Record record) {
366   base::StatusOr<int64_t> trace_ts = ExtractTraceTimestamp(record);
367   if (!trace_ts.ok()) {
368     context_->storage->IncrementIndexedStats(
369         stats::perf_record_skipped, static_cast<int>(record.header.type));
370     return;
371   }
372   context_->sorter->PushPerfRecord(*trace_ts, std::move(record));
373 }
374 
375 base::StatusOr<PerfDataTokenizer::ParsingResult>
ParseFeatureSections()376 PerfDataTokenizer::ParseFeatureSections() {
377   PERFETTO_CHECK(buffer_.start_offset() == header_.data.end());
378   auto tbv = buffer_.SliceOff(feature_headers_section_.offset,
379                               feature_headers_section_.size);
380   if (!tbv) {
381     return ParsingResult::kMoreDataNeeded;
382   }
383 
384   Reader reader(std::move(*tbv));
385   for (auto feature_id : feature_ids_) {
386     feature_sections_.emplace_back(std::piecewise_construct,
387                                    std::forward_as_tuple(feature_id),
388                                    std::forward_as_tuple());
389     PERFETTO_CHECK(reader.Read(feature_sections_.back().second));
390   }
391 
392   std::sort(feature_sections_.begin(), feature_sections_.end(),
393             [](const std::pair<uint8_t, PerfFile::Section>& lhs,
394                const std::pair<uint8_t, PerfFile::Section>& rhs) {
395               if (lhs.second.offset == rhs.second.offset) {
396                 // Some sections have 0 length and thus there can be offset
397                 // collisions. To make sure we parse sections by increasing
398                 // offset parse empty sections first.
399                 return lhs.second.size > rhs.second.size;
400               }
401               return lhs.second.offset > rhs.second.offset;
402             });
403 
404   buffer_.PopFrontUntil(feature_headers_section_.end());
405   parsing_state_ = feature_sections_.empty() ? ParsingState::kDone
406                                              : ParsingState::kParseFeatures;
407   return ParsingResult::kSuccess;
408 }
409 
410 base::StatusOr<PerfDataTokenizer::ParsingResult>
ParseFeatures()411 PerfDataTokenizer::ParseFeatures() {
412   while (!feature_sections_.empty()) {
413     const auto feature_id = feature_sections_.back().first;
414     const auto& section = feature_sections_.back().second;
415     auto tbv = buffer_.SliceOff(section.offset, section.size);
416     if (!tbv) {
417       return ParsingResult::kMoreDataNeeded;
418     }
419 
420     RETURN_IF_ERROR(ParseFeature(feature_id, std::move(*tbv)));
421     buffer_.PopFrontUntil(section.end());
422     feature_sections_.pop_back();
423   }
424 
425   parsing_state_ = ParsingState::kDone;
426   return ParsingResult::kSuccess;
427 }
428 
ParseFeature(uint8_t feature_id,TraceBlobView data)429 base::Status PerfDataTokenizer::ParseFeature(uint8_t feature_id,
430                                              TraceBlobView data) {
431   switch (feature_id) {
432     case feature::ID_CMD_LINE: {
433       ASSIGN_OR_RETURN(std::vector<std::string> args,
434                        feature::ParseCmdline(std::move(data)));
435       perf_session_->SetCmdline(args);
436       return base::OkStatus();
437     }
438 
439     case feature::ID_EVENT_DESC:
440       return feature::EventDescription::Parse(
441           std::move(data), [&](feature::EventDescription desc) {
442             for (auto id : desc.ids) {
443               perf_session_->SetEventName(id, std::move(desc.event_string));
444             }
445             return base::OkStatus();
446           });
447 
448     case feature::ID_BUILD_ID:
449       return feature::BuildId::Parse(
450           std::move(data), [&](feature::BuildId build_id) {
451             perf_session_->AddBuildId(
452                 build_id.pid, std::move(build_id.filename),
453                 BuildId::FromRaw(std::move(build_id.build_id)));
454             return base::OkStatus();
455           });
456 
457     case feature::ID_GROUP_DESC: {
458       feature::HeaderGroupDesc group_desc;
459       RETURN_IF_ERROR(
460           feature::HeaderGroupDesc::Parse(std::move(data), group_desc));
461       // TODO(carlscab): Do someting
462       break;
463     }
464 
465     case feature::ID_SIMPLEPERF_META_INFO: {
466       perf_session_->SetIsSimpleperf();
467       feature::SimpleperfMetaInfo meta_info;
468       RETURN_IF_ERROR(feature::SimpleperfMetaInfo::Parse(data, meta_info));
469       for (auto it = meta_info.event_type_info.GetIterator(); it; ++it) {
470         perf_session_->SetEventName(it.key().type, it.key().config, it.value());
471       }
472       break;
473     }
474     case feature::ID_SIMPLEPERF_FILE2: {
475       perf_session_->SetIsSimpleperf();
476       RETURN_IF_ERROR(feature::ParseSimpleperfFile2(
477           std::move(data), [&](TraceBlobView blob) {
478             third_party::simpleperf::proto::pbzero::FileFeature::Decoder file(
479                 blob.data(), blob.length());
480             PerfTracker::GetOrCreate(context_)->AddSimpleperfFile2(file);
481           }));
482 
483       break;
484     }
485     default:
486       context_->storage->IncrementIndexedStats(stats::perf_features_skipped,
487                                                feature_id);
488   }
489 
490   return base::OkStatus();
491 }
492 
ProcessAuxtraceInfoRecord(Record record)493 base::Status PerfDataTokenizer::ProcessAuxtraceInfoRecord(Record record) {
494   AuxtraceInfoRecord auxtrace_info;
495   RETURN_IF_ERROR(auxtrace_info.Parse(record));
496   return aux_manager_.OnAuxtraceInfoRecord(std::move(auxtrace_info));
497 }
498 
ProcessAuxRecord(Record record)499 base::Status PerfDataTokenizer::ProcessAuxRecord(Record record) {
500   AuxRecord aux;
501   RETURN_IF_ERROR(aux.Parse(record));
502   return aux_manager_.OnAuxRecord(std::move(aux));
503 }
504 
ProcessTimeConvRecord(Record record)505 base::Status PerfDataTokenizer::ProcessTimeConvRecord(Record record) {
506   Reader reader(std::move(record.payload));
507   TimeConvRecord time_conv;
508   if (!reader.Read(time_conv)) {
509     return base::ErrStatus("Failed to parse PERF_RECORD_TIME_CONV");
510   }
511 
512   return aux_manager_.OnTimeConvRecord(std::move(time_conv));
513 }
514 
515 base::StatusOr<PerfDataTokenizer::ParsingResult>
ParseAuxtraceData()516 PerfDataTokenizer::ParseAuxtraceData() {
517   PERFETTO_CHECK(current_auxtrace_.has_value());
518   const uint64_t size = current_auxtrace_->size;
519   if (buffer_.avail() < size) {
520     return ParsingResult::kMoreDataNeeded;
521   }
522 
523   // TODO(carlscab): We could make this more efficient and avoid the copies by
524   // passing several chunks instead.
525   std::optional<TraceBlobView> data =
526       buffer_.SliceOff(buffer_.start_offset(), size);
527   buffer_.PopFrontBytes(size);
528   PERFETTO_CHECK(data.has_value());
529   base::Status status = aux_manager_.OnAuxtraceRecord(
530       std::move(*current_auxtrace_), std::move(*data));
531   current_auxtrace_.reset();
532   parsing_state_ = ParsingState::kParseRecords;
533   RETURN_IF_ERROR(status);
534   return ParseRecords();
535 }
536 
ProcessItraceStartRecord(Record record)537 base::Status PerfDataTokenizer::ProcessItraceStartRecord(Record record) {
538   ItraceStartRecord start;
539   RETURN_IF_ERROR(start.Parse(record));
540   context_->process_tracker->UpdateThread(start.tid, start.pid);
541   aux_manager_.OnItraceStartRecord(std::move(start));
542   MaybePushRecord(std::move(record));
543   return base::OkStatus();
544 }
545 
NotifyEndOfFile()546 base::Status PerfDataTokenizer::NotifyEndOfFile() {
547   if (parsing_state_ != ParsingState::kDone) {
548     return base::ErrStatus("Premature end of perf file.");
549   }
550   return base::OkStatus();
551 }
552 
553 }  // namespace perfetto::trace_processor::perf_importer
554