1 /*
2  * Copyright (C) 2024 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/importers/perf_text/perf_text_sample_line_parser.h"
18 
19 #include <cctype>
20 #include <cstddef>
21 #include <cstdint>
22 #include <optional>
23 #include <string>
24 #include <string_view>
25 #include <vector>
26 
27 #include "perfetto/ext/base/string_utils.h"
28 
29 namespace perfetto::trace_processor::perf_text_importer {
30 
31 namespace {
32 
FindTsAtEnd(std::string_view line)33 std::string_view FindTsAtEnd(std::string_view line) {
34   // We need to have 8 characters to have a valid timestamp with decimal
35   // and 6 trailing digits.
36   if (line.size() < 8) {
37     return {};
38   }
39   // All of the 6 trailing digits should be digits.
40   for (char c : line.substr(line.size() - 6)) {
41     if (!isdigit(c)) {
42       return {};
43     }
44   }
45   // 7 digits from the end should be a '.'.
46   if (line[line.size() - 7] != '.') {
47     return {};
48   }
49 
50   // A space before the timestamp dot should exist.
51   std::string_view until_dot = line.substr(0, line.size() - 7);
52   size_t c = until_dot.rfind(' ');
53   if (c == std::string_view::npos) {
54     return {};
55   }
56 
57   // All the characters between the last space and the colon should also
58   // be the digits.
59   for (char x : until_dot.substr(c + 1, until_dot.size() - c - 1)) {
60     if (!isdigit(x)) {
61       return {};
62     }
63   }
64   return line.substr(c + 1);
65 }
66 
67 }  // namespace
68 
ParseSampleLine(std::string_view line)69 std::optional<SampleLine> ParseSampleLine(std::string_view line) {
70   // Example of what we're parsing here:
71   // trace_processor 3962131 303057.417513:          1 cpu_atom/cycles/Pu:
72   //
73   // Find colons and look backwards to find something which looks like a
74   // timestamp. Anything before that is metadata of the sample we may be able
75   // to parse out.
76   for (size_t s = 0, cln = line.find(':', s); cln != std::string_view::npos;
77        s = cln + 1, cln = line.find(':', s)) {
78     std::string_view raw_ts = FindTsAtEnd(line.substr(0, cln));
79     if (raw_ts.empty()) {
80       continue;
81     }
82     std::optional<double> ts = base::StringToDouble(std::string(raw_ts));
83     if (!ts) {
84       continue;
85     }
86     std::string before_ts(line.data(),
87                           static_cast<size_t>(raw_ts.data() - line.data()));
88 
89     // simpleperf puts tabs after the comm while perf puts spaces. Make it
90     // consistent and just use spaces.
91     before_ts = base::ReplaceAll(before_ts, "\t", "  ");
92 
93     std::vector<std::string> pieces = base::SplitString(before_ts, " ");
94     if (pieces.empty()) {
95       continue;
96     }
97 
98     size_t pos = pieces.size() - 1;
99 
100     // Try to parse out the CPU in the form: '[cpu]' (e.g. '[3]').
101     std::optional<uint32_t> cpu;
102     if (base::StartsWith(pieces[pos], "[") &&
103         base::EndsWith(pieces[pos], "]")) {
104       cpu = base::StringToUInt32(pieces[pos].substr(1, pieces[pos].size() - 2));
105       if (!cpu) {
106         continue;
107       }
108       --pos;
109     }
110 
111     // Try to parse out the tid and pid in the form 'pid/tid' (e.g.
112     // '1024/1025'). If there's no '/' then just try to parse it as a tid.
113     std::vector<std::string> pid_and_tid = base::SplitString(pieces[pos], "/");
114     if (pid_and_tid.size() == 0 || pid_and_tid.size() > 2) {
115       continue;
116     }
117 
118     uint32_t tid_idx = pid_and_tid.size() == 1 ? 0 : 1;
119     auto opt_tid = base::StringToUInt32(pid_and_tid[tid_idx]);
120     if (!opt_tid) {
121       continue;
122     }
123     uint32_t tid = *opt_tid;
124 
125     std::optional<uint32_t> pid;
126     if (pid_and_tid.size() == 2) {
127       pid = base::StringToUInt32(pid_and_tid[0]);
128       if (!pid) {
129         continue;
130       }
131     }
132 
133     // All the remaining pieces are the comm which needs to be joined together
134     // with ' '.
135     pieces.resize(pos);
136     std::string comm = base::Join(pieces, " ");
137     return SampleLine{
138         comm, pid, tid, cpu, static_cast<int64_t>(*ts * 1000 * 1000 * 1000),
139     };
140   }
141   return std::nullopt;
142 }
143 
IsPerfTextFormatTrace(const uint8_t * ptr,size_t size)144 bool IsPerfTextFormatTrace(const uint8_t* ptr, size_t size) {
145   std::string_view str(reinterpret_cast<const char*>(ptr), size);
146   size_t nl = str.find('\n');
147   if (nl == std::string_view::npos) {
148     return false;
149   }
150   return ParseSampleLine(str.substr(0, nl)).has_value();
151 }
152 
153 }  // namespace perfetto::trace_processor::perf_text_importer
154