xref: /aosp_15_r20/external/perfetto/src/trace_processor/importers/proto/pigweed_detokenizer.cc (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1 /*
2  * Copyright (C) 2024 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/importers/proto/pigweed_detokenizer.h"
18 
19 #include <array>
20 #include <cctype>
21 #include <cstring>
22 
23 #include "perfetto/ext/base/flat_hash_map.h"
24 #include "perfetto/ext/base/status_or.h"
25 #include "perfetto/ext/base/string_utils.h"
26 #include "perfetto/protozero/field.h"
27 
28 // Removed date for an entry that is live.
29 static constexpr uint32_t kDateRemovedNever = 0xFFFFFFFF;
30 
31 static constexpr uint32_t kFormatBufferSize = 32;
32 
33 static constexpr std::array<uint8_t, 8> kHeaderPrefix = {'T', 'O', 'K',  'E',
34                                                          'N', 'S', '\0', '\0'};
35 
36 struct Header {
37   std::array<char, 6> magic;
38   uint16_t version;
39   uint32_t entry_count;
40   uint32_t reserved;
41 };
42 
43 struct Entry {
44   uint32_t token;
45   uint32_t date_removed;
46 };
47 
ReadUint32(const uint8_t * bytes)48 static constexpr uint32_t ReadUint32(const uint8_t* bytes) {
49   return static_cast<uint32_t>(bytes[0]) |
50          static_cast<uint32_t>(bytes[1]) << 8 |
51          static_cast<uint32_t>(bytes[2]) << 16 |
52          static_cast<uint32_t>(bytes[3]) << 24;
53 }
54 
55 namespace perfetto::trace_processor::pigweed {
56 
CreateNullDetokenizer()57 PigweedDetokenizer CreateNullDetokenizer() {
58   return PigweedDetokenizer{base::FlatHashMap<uint32_t, FormatString>()};
59 }
60 
CreateDetokenizer(const protozero::ConstBytes & bytes)61 base::StatusOr<PigweedDetokenizer> CreateDetokenizer(
62     const protozero::ConstBytes& bytes) {
63   base::FlatHashMap<uint32_t, FormatString> tokens;
64   // See Pigweed's token_database.h for a description of the format,
65   // but tl;dr we have:
66   //
67   // * Header.
68   // * Array of {token, date_removed} structs.
69   // * Matching table of null-terminated strings.
70 
71   if (bytes.size < sizeof(Header)) {
72     return base::ErrStatus("Truncated Pigweed database (no header)");
73   }
74 
75   for (size_t i = 0; i < kHeaderPrefix.size(); ++i) {
76     if (bytes.data[i] != kHeaderPrefix[i]) {
77       return base::ErrStatus("Pigweed database has wrong magic");
78     }
79   }
80 
81   size_t entry_count = ReadUint32(bytes.data + offsetof(Header, entry_count));
82 
83   size_t entry_ix = sizeof(Header);
84   size_t string_ix = sizeof(Header) + entry_count * sizeof(Entry);
85 
86   if (string_ix > bytes.size) {
87     return base::ErrStatus("Truncated Pigweed database (no string table)");
88   }
89 
90   for (size_t i = 0; i < entry_count; ++i) {
91     uint32_t token = ReadUint32(bytes.data + entry_ix);
92     uint32_t date_removed =
93         ReadUint32(bytes.data + entry_ix + offsetof(Entry, date_removed));
94 
95     const uint8_t* next_null_char = static_cast<const uint8_t*>(
96         memchr(bytes.data + string_ix, '\0', bytes.size - string_ix));
97     const size_t next_string_ix =
98         static_cast<size_t>(next_null_char - bytes.data) + 1;
99     if (next_string_ix > bytes.size) {
100       return base::ErrStatus(
101           "Truncated Pigweed database (string table not terminated)");
102     }
103 
104     if (date_removed == kDateRemovedNever) {
105       std::string str(reinterpret_cast<const char*>(bytes.data + string_ix));
106 
107       tokens[token] = FormatString(str);
108     }
109 
110     entry_ix += sizeof(Entry);
111     string_ix = next_string_ix;
112   }
113 
114   return PigweedDetokenizer(std::move(tokens));
115 }
116 
PigweedDetokenizer(base::FlatHashMap<uint32_t,FormatString> tokens)117 PigweedDetokenizer::PigweedDetokenizer(
118     base::FlatHashMap<uint32_t, FormatString> tokens)
119     : tokens_(std::move(tokens)) {}
120 
121 #if defined(__GNUC__) || defined(__clang__)
122 #pragma GCC diagnostic push
123 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
124 #endif  // defined(__GNUC__) || defined(__clang__)
125 
Detokenize(const protozero::ConstBytes & bytes) const126 base::StatusOr<DetokenizedString> PigweedDetokenizer::Detokenize(
127     const protozero::ConstBytes& bytes) const {
128   if (bytes.size < sizeof(uint32_t)) {
129     return base::ErrStatus("Truncated Pigweed payload");
130   }
131 
132   const uint32_t token = ReadUint32(bytes.data);
133 
134   FormatString* format = tokens_.Find(token);
135   if (!format) {
136     return DetokenizedString(token,
137                              FormatString(std::string("Token not found")));
138   }
139 
140   const uint8_t* ptr = bytes.data + sizeof(uint32_t);
141 
142   std::vector<std::variant<int64_t, uint64_t, double>> args;
143   std::vector<std::string> args_formatted;
144   for (Arg arg : format->args()) {
145     char buffer[kFormatBufferSize];
146     const char* fmt = arg.format.c_str();
147     size_t formatted_size;
148 
149     if (arg.type == kFloat) {
150       if (ptr + sizeof(float) > bytes.data + bytes.size) {
151         return base::ErrStatus("Truncated Pigweed float");
152       }
153 
154       float value_float;
155       memcpy(&value_float, ptr, sizeof(value_float));
156       ptr += sizeof(value_float);
157       double value = static_cast<double>(value_float);
158       args.push_back(value);
159       formatted_size =
160           perfetto::base::SprintfTrunc(buffer, kFormatBufferSize, fmt, value);
161     } else {
162       uint64_t raw;
163       auto old_ptr = ptr;
164       ptr = protozero::proto_utils::ParseVarInt(ptr, bytes.data + bytes.size,
165                                                 &raw);
166       if (old_ptr == ptr) {
167         return base::ErrStatus("Truncated Pigweed varint");
168       }
169       // All Pigweed integers (including unsigned) are zigzag encoded.
170       int64_t value = ::protozero::proto_utils::ZigZagDecode(raw);
171       if (arg.type == kSignedInt) {
172         args.push_back(value);
173         formatted_size =
174             perfetto::base::SprintfTrunc(buffer, kFormatBufferSize, fmt, value);
175       } else {
176         uint64_t value_unsigned;
177         memcpy(&value_unsigned, &value, sizeof(value_unsigned));
178         if (arg.type == kUnsigned32) {
179           value_unsigned &= 0xFFFFFFFFu;
180         }
181         args.push_back(value_unsigned);
182         formatted_size = perfetto::base::SprintfTrunc(buffer, kFormatBufferSize,
183                                                       fmt, value_unsigned);
184       }
185     }
186     if (formatted_size == kFormatBufferSize - 1) {
187       return base::ErrStatus("Exceeded buffer size for number");
188     }
189     args_formatted.push_back(std::string(buffer, formatted_size));
190     if (ptr >= bytes.data + bytes.size) {
191       break;
192     }
193   }
194 
195   return DetokenizedString(token, *format, args, args_formatted);
196 }
197 
198 #if defined(__GNUC__) || defined(__clang__)
199 #pragma GCC diagnostic pop
200 #endif  // defined(__GNUC__) || defined(__clang__)
201 
DetokenizedString(const uint32_t token,FormatString format_string)202 DetokenizedString::DetokenizedString(const uint32_t token,
203                                      FormatString format_string)
204     : token_(token), format_string_(std::move(format_string)) {}
205 
DetokenizedString(const uint32_t token,FormatString format_string,std::vector<std::variant<int64_t,uint64_t,double>> args,std::vector<std::string> args_formatted)206 DetokenizedString::DetokenizedString(
207     const uint32_t token,
208     FormatString format_string,
209     std::vector<std::variant<int64_t, uint64_t, double>> args,
210     std::vector<std::string> args_formatted)
211     : token_(token),
212       format_string_(format_string),
213       args_(args),
214       args_formatted_(args_formatted) {}
215 
Format() const216 std::string DetokenizedString::Format() const {
217   const auto args = format_string_.args();
218   const auto fmt = format_string_.template_str();
219   if (args.size() == 0) {
220     return fmt;
221   }
222 
223   std::string result;
224 
225   result.append(fmt.substr(0, args[0].begin));
226 
227   for (size_t i = 0; i < args.size(); i++) {
228     result.append(args_formatted_[i]);
229     if (i < args.size() - 1) {
230       result.append(fmt.substr(args[i].end, args[i + 1].begin - args[i].end));
231     } else {
232       result.append(fmt.substr(args[i].end, fmt.size() - args[i].end));
233     }
234   }
235 
236   return result;
237 }
238 
SkipFlags(std::string fmt,size_t ix)239 static size_t SkipFlags(std::string fmt, size_t ix) {
240   while (fmt[ix] == '-' || fmt[ix] == '+' || fmt[ix] == '#' || fmt[ix] == ' ' ||
241          fmt[ix] == '0') {
242     ix += 1;
243   }
244   return ix;
245 }
246 
SkipAsteriskOrInteger(std::string fmt,size_t ix)247 static size_t SkipAsteriskOrInteger(std::string fmt, size_t ix) {
248   if (fmt[ix] == '*') {
249     return ix + 1;
250   }
251 
252   ix = (fmt[ix] == '-' || fmt[ix] == '+') ? ix + 1 : ix;
253 
254   while (std::isdigit(fmt[ix])) {
255     ix += 1;
256   }
257   return ix;
258 }
259 
ReadLengthModifier(std::string fmt,size_t ix)260 static std::array<char, 2> ReadLengthModifier(std::string fmt, size_t ix) {
261   // Check for ll or hh.
262   if (fmt[ix] == fmt[ix + 1] && (fmt[ix] == 'l' || fmt[ix] == 'h')) {
263     return {fmt[ix], fmt[ix + 1]};
264   }
265   if (std::strchr("hljztL", fmt[ix]) != nullptr) {
266     return {fmt[ix]};
267   }
268   return {};
269 }
270 
FormatString(std::string format)271 FormatString::FormatString(std::string format) : template_str_(format) {
272   size_t fmt_start = 0;
273   for (size_t i = 0; i < format.size(); i++) {
274     if (format[i] == '%') {
275       fmt_start = i;
276       i += 1;
277 
278       i = SkipFlags(format, i);
279 
280       // Field width.
281       i = SkipAsteriskOrInteger(format, i);
282 
283       // Precision.
284       if (format[i] == '.') {
285         i += 1;
286         i = SkipAsteriskOrInteger(format, i);
287       }
288 
289       // Length modifier
290       const std::array<char, 2> length = ReadLengthModifier(format, i);
291       i += (length[0] == '\0' ? 0 : 1) + (length[1] == '\0' ? 0 : 1);
292 
293       const char spec = format[i];
294       const std::string arg_format =
295           format.substr(fmt_start, i - fmt_start + 1);
296       if (spec == 'c' || spec == 'd' || spec == 'i') {
297         args_.push_back(Arg{kSignedInt, arg_format, fmt_start, i + 1});
298       } else if (strchr("oxXup", spec) != nullptr) {
299         // Size matters for unsigned integers.
300         if (length[0] == 'j' || length[1] == 'l') {
301           args_.push_back(Arg{kUnsigned64, arg_format, fmt_start, i + 1});
302         } else {
303           args_.push_back(Arg{kUnsigned32, arg_format, fmt_start, i + 1});
304         }
305       } else if (strchr("fFeEaAgG", spec) != nullptr) {
306         args_.push_back(Arg{kFloat, arg_format, fmt_start, i + 1});
307       } else {
308         // Parsing failed.
309         // We ignore this silently for now.
310       }
311     }
312   }
313 }
314 
315 }  // namespace perfetto::trace_processor::pigweed
316