1 /*
2 * Copyright (C) 2024 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/importers/proto/pigweed_detokenizer.h"
18
19 #include <array>
20 #include <cctype>
21 #include <cstring>
22
23 #include "perfetto/ext/base/flat_hash_map.h"
24 #include "perfetto/ext/base/status_or.h"
25 #include "perfetto/ext/base/string_utils.h"
26 #include "perfetto/protozero/field.h"
27
28 // Removed date for an entry that is live.
29 static constexpr uint32_t kDateRemovedNever = 0xFFFFFFFF;
30
31 static constexpr uint32_t kFormatBufferSize = 32;
32
33 static constexpr std::array<uint8_t, 8> kHeaderPrefix = {'T', 'O', 'K', 'E',
34 'N', 'S', '\0', '\0'};
35
36 struct Header {
37 std::array<char, 6> magic;
38 uint16_t version;
39 uint32_t entry_count;
40 uint32_t reserved;
41 };
42
43 struct Entry {
44 uint32_t token;
45 uint32_t date_removed;
46 };
47
ReadUint32(const uint8_t * bytes)48 static constexpr uint32_t ReadUint32(const uint8_t* bytes) {
49 return static_cast<uint32_t>(bytes[0]) |
50 static_cast<uint32_t>(bytes[1]) << 8 |
51 static_cast<uint32_t>(bytes[2]) << 16 |
52 static_cast<uint32_t>(bytes[3]) << 24;
53 }
54
55 namespace perfetto::trace_processor::pigweed {
56
CreateNullDetokenizer()57 PigweedDetokenizer CreateNullDetokenizer() {
58 return PigweedDetokenizer{base::FlatHashMap<uint32_t, FormatString>()};
59 }
60
CreateDetokenizer(const protozero::ConstBytes & bytes)61 base::StatusOr<PigweedDetokenizer> CreateDetokenizer(
62 const protozero::ConstBytes& bytes) {
63 base::FlatHashMap<uint32_t, FormatString> tokens;
64 // See Pigweed's token_database.h for a description of the format,
65 // but tl;dr we have:
66 //
67 // * Header.
68 // * Array of {token, date_removed} structs.
69 // * Matching table of null-terminated strings.
70
71 if (bytes.size < sizeof(Header)) {
72 return base::ErrStatus("Truncated Pigweed database (no header)");
73 }
74
75 for (size_t i = 0; i < kHeaderPrefix.size(); ++i) {
76 if (bytes.data[i] != kHeaderPrefix[i]) {
77 return base::ErrStatus("Pigweed database has wrong magic");
78 }
79 }
80
81 size_t entry_count = ReadUint32(bytes.data + offsetof(Header, entry_count));
82
83 size_t entry_ix = sizeof(Header);
84 size_t string_ix = sizeof(Header) + entry_count * sizeof(Entry);
85
86 if (string_ix > bytes.size) {
87 return base::ErrStatus("Truncated Pigweed database (no string table)");
88 }
89
90 for (size_t i = 0; i < entry_count; ++i) {
91 uint32_t token = ReadUint32(bytes.data + entry_ix);
92 uint32_t date_removed =
93 ReadUint32(bytes.data + entry_ix + offsetof(Entry, date_removed));
94
95 const uint8_t* next_null_char = static_cast<const uint8_t*>(
96 memchr(bytes.data + string_ix, '\0', bytes.size - string_ix));
97 const size_t next_string_ix =
98 static_cast<size_t>(next_null_char - bytes.data) + 1;
99 if (next_string_ix > bytes.size) {
100 return base::ErrStatus(
101 "Truncated Pigweed database (string table not terminated)");
102 }
103
104 if (date_removed == kDateRemovedNever) {
105 std::string str(reinterpret_cast<const char*>(bytes.data + string_ix));
106
107 tokens[token] = FormatString(str);
108 }
109
110 entry_ix += sizeof(Entry);
111 string_ix = next_string_ix;
112 }
113
114 return PigweedDetokenizer(std::move(tokens));
115 }
116
PigweedDetokenizer(base::FlatHashMap<uint32_t,FormatString> tokens)117 PigweedDetokenizer::PigweedDetokenizer(
118 base::FlatHashMap<uint32_t, FormatString> tokens)
119 : tokens_(std::move(tokens)) {}
120
121 #if defined(__GNUC__) || defined(__clang__)
122 #pragma GCC diagnostic push
123 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
124 #endif // defined(__GNUC__) || defined(__clang__)
125
Detokenize(const protozero::ConstBytes & bytes) const126 base::StatusOr<DetokenizedString> PigweedDetokenizer::Detokenize(
127 const protozero::ConstBytes& bytes) const {
128 if (bytes.size < sizeof(uint32_t)) {
129 return base::ErrStatus("Truncated Pigweed payload");
130 }
131
132 const uint32_t token = ReadUint32(bytes.data);
133
134 FormatString* format = tokens_.Find(token);
135 if (!format) {
136 return DetokenizedString(token,
137 FormatString(std::string("Token not found")));
138 }
139
140 const uint8_t* ptr = bytes.data + sizeof(uint32_t);
141
142 std::vector<std::variant<int64_t, uint64_t, double>> args;
143 std::vector<std::string> args_formatted;
144 for (Arg arg : format->args()) {
145 char buffer[kFormatBufferSize];
146 const char* fmt = arg.format.c_str();
147 size_t formatted_size;
148
149 if (arg.type == kFloat) {
150 if (ptr + sizeof(float) > bytes.data + bytes.size) {
151 return base::ErrStatus("Truncated Pigweed float");
152 }
153
154 float value_float;
155 memcpy(&value_float, ptr, sizeof(value_float));
156 ptr += sizeof(value_float);
157 double value = static_cast<double>(value_float);
158 args.push_back(value);
159 formatted_size =
160 perfetto::base::SprintfTrunc(buffer, kFormatBufferSize, fmt, value);
161 } else {
162 uint64_t raw;
163 auto old_ptr = ptr;
164 ptr = protozero::proto_utils::ParseVarInt(ptr, bytes.data + bytes.size,
165 &raw);
166 if (old_ptr == ptr) {
167 return base::ErrStatus("Truncated Pigweed varint");
168 }
169 // All Pigweed integers (including unsigned) are zigzag encoded.
170 int64_t value = ::protozero::proto_utils::ZigZagDecode(raw);
171 if (arg.type == kSignedInt) {
172 args.push_back(value);
173 formatted_size =
174 perfetto::base::SprintfTrunc(buffer, kFormatBufferSize, fmt, value);
175 } else {
176 uint64_t value_unsigned;
177 memcpy(&value_unsigned, &value, sizeof(value_unsigned));
178 if (arg.type == kUnsigned32) {
179 value_unsigned &= 0xFFFFFFFFu;
180 }
181 args.push_back(value_unsigned);
182 formatted_size = perfetto::base::SprintfTrunc(buffer, kFormatBufferSize,
183 fmt, value_unsigned);
184 }
185 }
186 if (formatted_size == kFormatBufferSize - 1) {
187 return base::ErrStatus("Exceeded buffer size for number");
188 }
189 args_formatted.push_back(std::string(buffer, formatted_size));
190 if (ptr >= bytes.data + bytes.size) {
191 break;
192 }
193 }
194
195 return DetokenizedString(token, *format, args, args_formatted);
196 }
197
198 #if defined(__GNUC__) || defined(__clang__)
199 #pragma GCC diagnostic pop
200 #endif // defined(__GNUC__) || defined(__clang__)
201
DetokenizedString(const uint32_t token,FormatString format_string)202 DetokenizedString::DetokenizedString(const uint32_t token,
203 FormatString format_string)
204 : token_(token), format_string_(std::move(format_string)) {}
205
DetokenizedString(const uint32_t token,FormatString format_string,std::vector<std::variant<int64_t,uint64_t,double>> args,std::vector<std::string> args_formatted)206 DetokenizedString::DetokenizedString(
207 const uint32_t token,
208 FormatString format_string,
209 std::vector<std::variant<int64_t, uint64_t, double>> args,
210 std::vector<std::string> args_formatted)
211 : token_(token),
212 format_string_(format_string),
213 args_(args),
214 args_formatted_(args_formatted) {}
215
Format() const216 std::string DetokenizedString::Format() const {
217 const auto args = format_string_.args();
218 const auto fmt = format_string_.template_str();
219 if (args.size() == 0) {
220 return fmt;
221 }
222
223 std::string result;
224
225 result.append(fmt.substr(0, args[0].begin));
226
227 for (size_t i = 0; i < args.size(); i++) {
228 result.append(args_formatted_[i]);
229 if (i < args.size() - 1) {
230 result.append(fmt.substr(args[i].end, args[i + 1].begin - args[i].end));
231 } else {
232 result.append(fmt.substr(args[i].end, fmt.size() - args[i].end));
233 }
234 }
235
236 return result;
237 }
238
SkipFlags(std::string fmt,size_t ix)239 static size_t SkipFlags(std::string fmt, size_t ix) {
240 while (fmt[ix] == '-' || fmt[ix] == '+' || fmt[ix] == '#' || fmt[ix] == ' ' ||
241 fmt[ix] == '0') {
242 ix += 1;
243 }
244 return ix;
245 }
246
SkipAsteriskOrInteger(std::string fmt,size_t ix)247 static size_t SkipAsteriskOrInteger(std::string fmt, size_t ix) {
248 if (fmt[ix] == '*') {
249 return ix + 1;
250 }
251
252 ix = (fmt[ix] == '-' || fmt[ix] == '+') ? ix + 1 : ix;
253
254 while (std::isdigit(fmt[ix])) {
255 ix += 1;
256 }
257 return ix;
258 }
259
ReadLengthModifier(std::string fmt,size_t ix)260 static std::array<char, 2> ReadLengthModifier(std::string fmt, size_t ix) {
261 // Check for ll or hh.
262 if (fmt[ix] == fmt[ix + 1] && (fmt[ix] == 'l' || fmt[ix] == 'h')) {
263 return {fmt[ix], fmt[ix + 1]};
264 }
265 if (std::strchr("hljztL", fmt[ix]) != nullptr) {
266 return {fmt[ix]};
267 }
268 return {};
269 }
270
FormatString(std::string format)271 FormatString::FormatString(std::string format) : template_str_(format) {
272 size_t fmt_start = 0;
273 for (size_t i = 0; i < format.size(); i++) {
274 if (format[i] == '%') {
275 fmt_start = i;
276 i += 1;
277
278 i = SkipFlags(format, i);
279
280 // Field width.
281 i = SkipAsteriskOrInteger(format, i);
282
283 // Precision.
284 if (format[i] == '.') {
285 i += 1;
286 i = SkipAsteriskOrInteger(format, i);
287 }
288
289 // Length modifier
290 const std::array<char, 2> length = ReadLengthModifier(format, i);
291 i += (length[0] == '\0' ? 0 : 1) + (length[1] == '\0' ? 0 : 1);
292
293 const char spec = format[i];
294 const std::string arg_format =
295 format.substr(fmt_start, i - fmt_start + 1);
296 if (spec == 'c' || spec == 'd' || spec == 'i') {
297 args_.push_back(Arg{kSignedInt, arg_format, fmt_start, i + 1});
298 } else if (strchr("oxXup", spec) != nullptr) {
299 // Size matters for unsigned integers.
300 if (length[0] == 'j' || length[1] == 'l') {
301 args_.push_back(Arg{kUnsigned64, arg_format, fmt_start, i + 1});
302 } else {
303 args_.push_back(Arg{kUnsigned32, arg_format, fmt_start, i + 1});
304 }
305 } else if (strchr("fFeEaAgG", spec) != nullptr) {
306 args_.push_back(Arg{kFloat, arg_format, fmt_start, i + 1});
307 } else {
308 // Parsing failed.
309 // We ignore this silently for now.
310 }
311 }
312 }
313 }
314
315 } // namespace perfetto::trace_processor::pigweed
316