xref: /aosp_15_r20/external/perfetto/src/trace_processor/util/descriptors.cc (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/util/descriptors.h"
18 
19 #include <cstdint>
20 #include <optional>
21 #include <vector>
22 
23 #include "perfetto/base/status.h"
24 #include "perfetto/ext/base/string_utils.h"
25 #include "perfetto/ext/base/string_view.h"
26 #include "perfetto/protozero/field.h"
27 #include "perfetto/protozero/message.h"
28 #include "perfetto/protozero/proto_decoder.h"
29 #include "perfetto/protozero/scattered_heap_buffer.h"
30 #include "protos/perfetto/common/descriptor.pbzero.h"
31 #include "protos/perfetto/trace_processor/trace_processor.pbzero.h"
32 #include "src/trace_processor/util/status_macros.h"
33 
34 namespace perfetto {
35 namespace trace_processor {
36 namespace {
CreateFieldFromDecoder(const protos::pbzero::FieldDescriptorProto::Decoder & f_decoder,bool is_extension)37 FieldDescriptor CreateFieldFromDecoder(
38     const protos::pbzero::FieldDescriptorProto::Decoder& f_decoder,
39     bool is_extension) {
40   using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
41   std::string type_name =
42       f_decoder.has_type_name()
43           ? base::StringView(f_decoder.type_name()).ToStdString()
44           : "";
45   // TODO(lalitm): add support for enums here.
46   uint32_t type =
47       f_decoder.has_type()
48           ? static_cast<uint32_t>(f_decoder.type())
49           : static_cast<uint32_t>(FieldDescriptorProto::TYPE_MESSAGE);
50   protos::pbzero::FieldOptions::Decoder opt(f_decoder.options());
51   std::optional<std::string> default_value;
52   if (f_decoder.has_default_value()) {
53     default_value = f_decoder.default_value().ToStdString();
54   }
55   return FieldDescriptor(
56       base::StringView(f_decoder.name()).ToStdString(),
57       static_cast<uint32_t>(f_decoder.number()), type, std::move(type_name),
58       std::vector<uint8_t>(f_decoder.options().data,
59                            f_decoder.options().data + f_decoder.options().size),
60       default_value, f_decoder.label() == FieldDescriptorProto::LABEL_REPEATED,
61       opt.packed(), is_extension);
62 }
63 
CheckExtensionField(const ProtoDescriptor & proto_descriptor,const FieldDescriptor & field)64 base::Status CheckExtensionField(const ProtoDescriptor& proto_descriptor,
65                                  const FieldDescriptor& field) {
66   using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
67   auto existing_field = proto_descriptor.FindFieldByTag(field.number());
68   if (existing_field) {
69     if (field.type() != existing_field->type()) {
70       return base::ErrStatus("Field %s is re-introduced with different type",
71                              field.name().c_str());
72     }
73     if ((field.type() == FieldDescriptorProto::TYPE_MESSAGE ||
74          field.type() == FieldDescriptorProto::TYPE_ENUM) &&
75         field.raw_type_name() != existing_field->raw_type_name()) {
76       return base::ErrStatus(
77           "Field %s is re-introduced with different type %s (was %s)",
78           field.name().c_str(), field.raw_type_name().c_str(),
79           existing_field->raw_type_name().c_str());
80     }
81   }
82   return base::OkStatus();
83 }
84 
85 }  // namespace
86 
ResolveShortType(const std::string & parent_path,const std::string & short_type)87 std::optional<uint32_t> DescriptorPool::ResolveShortType(
88     const std::string& parent_path,
89     const std::string& short_type) {
90   PERFETTO_DCHECK(!short_type.empty());
91 
92   std::string search_path = short_type[0] == '.'
93                                 ? parent_path + short_type
94                                 : parent_path + '.' + short_type;
95   auto opt_idx = FindDescriptorIdx(search_path);
96   if (opt_idx)
97     return opt_idx;
98 
99   if (parent_path.empty())
100     return std::nullopt;
101 
102   auto parent_dot_idx = parent_path.rfind('.');
103   auto parent_substr = parent_dot_idx == std::string::npos
104                            ? ""
105                            : parent_path.substr(0, parent_dot_idx);
106   return ResolveShortType(parent_substr, short_type);
107 }
108 
AddExtensionField(const std::string & package_name,protozero::ConstBytes field_desc_proto)109 base::Status DescriptorPool::AddExtensionField(
110     const std::string& package_name,
111     protozero::ConstBytes field_desc_proto) {
112   using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
113   FieldDescriptorProto::Decoder f_decoder(field_desc_proto);
114   auto field = CreateFieldFromDecoder(f_decoder, true);
115 
116   std::string extendee_name = f_decoder.extendee().ToStdString();
117   if (extendee_name.empty()) {
118     return base::ErrStatus("Extendee name is empty");
119   }
120 
121   if (extendee_name[0] != '.') {
122     // Only prepend if the extendee is not fully qualified
123     extendee_name = package_name + "." + extendee_name;
124   }
125   std::optional<uint32_t> extendee = FindDescriptorIdx(extendee_name);
126   if (!extendee.has_value()) {
127     return base::ErrStatus("Extendee does not exist %s", extendee_name.c_str());
128   }
129   ProtoDescriptor& extendee_desc = descriptors_[extendee.value()];
130   RETURN_IF_ERROR(CheckExtensionField(extendee_desc, field));
131   extendee_desc.AddField(field);
132   return base::OkStatus();
133 }
134 
AddNestedProtoDescriptors(const std::string & file_name,const std::string & package_name,std::optional<uint32_t> parent_idx,protozero::ConstBytes descriptor_proto,std::vector<ExtensionInfo> * extensions,bool merge_existing_messages)135 base::Status DescriptorPool::AddNestedProtoDescriptors(
136     const std::string& file_name,
137     const std::string& package_name,
138     std::optional<uint32_t> parent_idx,
139     protozero::ConstBytes descriptor_proto,
140     std::vector<ExtensionInfo>* extensions,
141     bool merge_existing_messages) {
142   protos::pbzero::DescriptorProto::Decoder decoder(descriptor_proto);
143 
144   auto parent_name =
145       parent_idx ? descriptors_[*parent_idx].full_name() : package_name;
146   auto full_name =
147       parent_name + "." + base::StringView(decoder.name()).ToStdString();
148 
149   auto idx = FindDescriptorIdx(full_name);
150   if (idx.has_value() && !merge_existing_messages) {
151     const auto& existing_descriptor = descriptors_[*idx];
152     return base::ErrStatus("%s: %s was already defined in file %s",
153                            file_name.c_str(), full_name.c_str(),
154                            existing_descriptor.file_name().c_str());
155   }
156   if (!idx.has_value()) {
157     ProtoDescriptor proto_descriptor(file_name, package_name, full_name,
158                                      ProtoDescriptor::Type::kMessage,
159                                      parent_idx);
160     idx = AddProtoDescriptor(std::move(proto_descriptor));
161   }
162   ProtoDescriptor& proto_descriptor = descriptors_[*idx];
163   if (proto_descriptor.type() != ProtoDescriptor::Type::kMessage) {
164     return base::ErrStatus("%s was enum, redefined as message",
165                            full_name.c_str());
166   }
167 
168   using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
169   for (auto it = decoder.field(); it; ++it) {
170     FieldDescriptorProto::Decoder f_decoder(*it);
171     auto field = CreateFieldFromDecoder(f_decoder, /*is_extension=*/false);
172     RETURN_IF_ERROR(CheckExtensionField(proto_descriptor, field));
173     proto_descriptor.AddField(std::move(field));
174   }
175 
176   for (auto it = decoder.enum_type(); it; ++it) {
177     RETURN_IF_ERROR(AddEnumProtoDescriptors(file_name, package_name, idx, *it,
178                                             merge_existing_messages));
179   }
180   for (auto it = decoder.nested_type(); it; ++it) {
181     RETURN_IF_ERROR(AddNestedProtoDescriptors(file_name, package_name, idx, *it,
182                                               extensions,
183                                               merge_existing_messages));
184   }
185   for (auto ext_it = decoder.extension(); ext_it; ++ext_it) {
186     extensions->emplace_back(package_name, *ext_it);
187   }
188   return base::OkStatus();
189 }
190 
AddEnumProtoDescriptors(const std::string & file_name,const std::string & package_name,std::optional<uint32_t> parent_idx,protozero::ConstBytes descriptor_proto,bool merge_existing_messages)191 base::Status DescriptorPool::AddEnumProtoDescriptors(
192     const std::string& file_name,
193     const std::string& package_name,
194     std::optional<uint32_t> parent_idx,
195     protozero::ConstBytes descriptor_proto,
196     bool merge_existing_messages) {
197   protos::pbzero::EnumDescriptorProto::Decoder decoder(descriptor_proto);
198 
199   auto parent_name =
200       parent_idx ? descriptors_[*parent_idx].full_name() : package_name;
201   auto full_name =
202       parent_name + "." + base::StringView(decoder.name()).ToStdString();
203 
204   auto prev_idx = FindDescriptorIdx(full_name);
205   if (prev_idx.has_value() && !merge_existing_messages) {
206     const auto& existing_descriptor = descriptors_[*prev_idx];
207     return base::ErrStatus("%s: %s was already defined in file %s",
208                            file_name.c_str(), full_name.c_str(),
209                            existing_descriptor.file_name().c_str());
210   }
211   if (!prev_idx.has_value()) {
212     ProtoDescriptor proto_descriptor(file_name, package_name, full_name,
213                                      ProtoDescriptor::Type::kEnum,
214                                      std::nullopt);
215     prev_idx = AddProtoDescriptor(std::move(proto_descriptor));
216   }
217   ProtoDescriptor& proto_descriptor = descriptors_[*prev_idx];
218   if (proto_descriptor.type() != ProtoDescriptor::Type::kEnum) {
219     return base::ErrStatus("%s was message, redefined as enum",
220                            full_name.c_str());
221   }
222 
223   for (auto it = decoder.value(); it; ++it) {
224     protos::pbzero::EnumValueDescriptorProto::Decoder enum_value(it->data(),
225                                                                  it->size());
226     proto_descriptor.AddEnumValue(enum_value.number(),
227                                   enum_value.name().ToStdString());
228   }
229 
230   return base::OkStatus();
231 }
232 
AddFromFileDescriptorSet(const uint8_t * file_descriptor_set_proto,size_t size,const std::vector<std::string> & skip_prefixes,bool merge_existing_messages)233 base::Status DescriptorPool::AddFromFileDescriptorSet(
234     const uint8_t* file_descriptor_set_proto,
235     size_t size,
236     const std::vector<std::string>& skip_prefixes,
237     bool merge_existing_messages) {
238   protos::pbzero::FileDescriptorSet::Decoder proto(file_descriptor_set_proto,
239                                                    size);
240   std::vector<ExtensionInfo> extensions;
241   for (auto it = proto.file(); it; ++it) {
242     protos::pbzero::FileDescriptorProto::Decoder file(*it);
243     const std::string file_name = file.name().ToStdString();
244     if (base::StartsWithAny(file_name, skip_prefixes))
245       continue;
246     if (!merge_existing_messages &&
247         processed_files_.find(file_name) != processed_files_.end()) {
248       // This file has been loaded once already. Skip.
249       continue;
250     }
251     processed_files_.insert(file_name);
252     std::string package = "." + base::StringView(file.package()).ToStdString();
253     for (auto message_it = file.message_type(); message_it; ++message_it) {
254       RETURN_IF_ERROR(AddNestedProtoDescriptors(
255           file_name, package, std::nullopt, *message_it, &extensions,
256           merge_existing_messages));
257     }
258     for (auto enum_it = file.enum_type(); enum_it; ++enum_it) {
259       RETURN_IF_ERROR(AddEnumProtoDescriptors(
260           file_name, package, std::nullopt, *enum_it, merge_existing_messages));
261     }
262     for (auto ext_it = file.extension(); ext_it; ++ext_it) {
263       extensions.emplace_back(package, *ext_it);
264     }
265   }
266 
267   // Second pass: Add extension fields to the real protos.
268   for (const auto& extension : extensions) {
269     RETURN_IF_ERROR(AddExtensionField(extension.first, extension.second));
270   }
271 
272   // Third pass: resolve the types of all the fields.
273   using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
274   for (ProtoDescriptor& descriptor : descriptors_) {
275     for (auto& entry : *descriptor.mutable_fields()) {
276       FieldDescriptor& field = entry.second;
277       bool needs_resolution =
278           field.resolved_type_name().empty() &&
279           (field.type() == FieldDescriptorProto::TYPE_MESSAGE ||
280            field.type() == FieldDescriptorProto::TYPE_ENUM);
281       if (needs_resolution) {
282         auto opt_desc =
283             ResolveShortType(descriptor.full_name(), field.raw_type_name());
284         if (!opt_desc.has_value()) {
285           return base::ErrStatus(
286               "Unable to find short type %s in field inside message %s",
287               field.raw_type_name().c_str(), descriptor.full_name().c_str());
288         }
289         field.set_resolved_type_name(
290             descriptors_[opt_desc.value()].full_name());
291       }
292     }
293   }
294 
295   // Fourth pass: resolve all "uninterpreted" options to real options.
296   for (ProtoDescriptor& descriptor : descriptors_) {
297     for (auto& entry : *descriptor.mutable_fields()) {
298       FieldDescriptor& field = entry.second;
299       if (field.options().empty()) {
300         continue;
301       }
302       ResolveUninterpretedOption(descriptor, field, *field.mutable_options());
303     }
304   }
305   return base::OkStatus();
306 }
307 
ResolveUninterpretedOption(const ProtoDescriptor & proto_desc,const FieldDescriptor & field_desc,std::vector<uint8_t> & options)308 base::Status DescriptorPool::ResolveUninterpretedOption(
309     const ProtoDescriptor& proto_desc,
310     const FieldDescriptor& field_desc,
311     std::vector<uint8_t>& options) {
312   auto opt_idx = FindDescriptorIdx(".google.protobuf.FieldOptions");
313   if (!opt_idx) {
314     return base::ErrStatus("Unable to find field options for field %s in %s",
315                            field_desc.name().c_str(),
316                            proto_desc.full_name().c_str());
317   }
318   ProtoDescriptor& field_options_desc = descriptors_[*opt_idx];
319 
320   protozero::ProtoDecoder decoder(field_desc.options().data(),
321                                   field_desc.options().size());
322   protozero::HeapBuffered<protozero::Message> field_options;
323   for (;;) {
324     const uint8_t* start = decoder.begin() + decoder.read_offset();
325     auto field = decoder.ReadField();
326     if (!field.valid()) {
327       break;
328     }
329     const uint8_t* end = decoder.begin() + decoder.read_offset();
330 
331     if (field.id() !=
332         protos::pbzero::FieldOptions::kUninterpretedOptionFieldNumber) {
333       field_options->AppendRawProtoBytes(start,
334                                          static_cast<size_t>(end - start));
335       continue;
336     }
337 
338     protos::pbzero::UninterpretedOption::Decoder unint(field.as_bytes());
339     auto it = unint.name();
340     if (!it) {
341       return base::ErrStatus(
342           "Option for field %s in message %s does not have a name",
343           field_desc.name().c_str(), proto_desc.full_name().c_str());
344     }
345     protos::pbzero::UninterpretedOption::NamePart::Decoder name_part(*it);
346     auto option_field_desc =
347         field_options_desc.FindFieldByName(name_part.name_part().ToStdString());
348 
349     // It's not immediately clear how options with multiple names should
350     // be parsed. This likely requires digging into protobuf compiler
351     // source; given we don't have any examples of this in the codebase
352     // today, defer handling of this to when we may need it.
353     if (++it) {
354       return base::ErrStatus(
355           "Option for field %s in message %s has multiple name segments",
356           field_desc.name().c_str(), proto_desc.full_name().c_str());
357     }
358     if (unint.has_identifier_value()) {
359       field_options->AppendString(option_field_desc->number(),
360                                   unint.identifier_value().ToStdString());
361     } else if (unint.has_positive_int_value()) {
362       field_options->AppendVarInt(option_field_desc->number(),
363                                   unint.positive_int_value());
364     } else if (unint.has_negative_int_value()) {
365       field_options->AppendVarInt(option_field_desc->number(),
366                                   unint.negative_int_value());
367     } else if (unint.has_double_value()) {
368       field_options->AppendFixed(option_field_desc->number(),
369                                  unint.double_value());
370     } else if (unint.has_string_value()) {
371       field_options->AppendString(option_field_desc->number(),
372                                   unint.string_value().ToStdString());
373     } else if (unint.has_aggregate_value()) {
374       field_options->AppendString(option_field_desc->number(),
375                                   unint.aggregate_value().ToStdString());
376     } else {
377       return base::ErrStatus(
378           "Unknown field set in UninterpretedOption %s for field %s in message "
379           "%s",
380           option_field_desc->name().c_str(), field_desc.name().c_str(),
381           proto_desc.full_name().c_str());
382     }
383   }
384   if (decoder.bytes_left() > 0) {
385     return base::ErrStatus("Unexpected extra bytes when parsing option %zu",
386                            decoder.bytes_left());
387   }
388   options = field_options.SerializeAsArray();
389   return base::OkStatus();
390 }
391 
FindDescriptorIdx(const std::string & full_name) const392 std::optional<uint32_t> DescriptorPool::FindDescriptorIdx(
393     const std::string& full_name) const {
394   auto it = full_name_to_descriptor_index_.find(full_name);
395   if (it == full_name_to_descriptor_index_.end()) {
396     return std::nullopt;
397   }
398   return it->second;
399 }
400 
SerializeAsDescriptorSet()401 std::vector<uint8_t> DescriptorPool::SerializeAsDescriptorSet() {
402   protozero::HeapBuffered<protos::pbzero::DescriptorSet> descs;
403   for (auto& desc : descriptors()) {
404     protos::pbzero::DescriptorProto* proto_descriptor =
405         descs->add_descriptors();
406     proto_descriptor->set_name(desc.full_name());
407     for (auto& entry : desc.fields()) {
408       auto& field = entry.second;
409       protos::pbzero::FieldDescriptorProto* field_descriptor =
410           proto_descriptor->add_field();
411       field_descriptor->set_name(field.name());
412       field_descriptor->set_number(static_cast<int32_t>(field.number()));
413       // We do not support required fields. They will show up as
414       // optional after serialization.
415       field_descriptor->set_label(
416           field.is_repeated()
417               ? protos::pbzero::FieldDescriptorProto::LABEL_REPEATED
418               : protos::pbzero::FieldDescriptorProto::LABEL_OPTIONAL);
419       field_descriptor->set_type_name(field.resolved_type_name());
420       field_descriptor->set_type(
421           static_cast<protos::pbzero::FieldDescriptorProto_Type>(field.type()));
422     }
423   }
424   return descs.SerializeAsArray();
425 }
426 
AddProtoDescriptor(ProtoDescriptor descriptor)427 uint32_t DescriptorPool::AddProtoDescriptor(ProtoDescriptor descriptor) {
428   uint32_t idx = static_cast<uint32_t>(descriptors_.size());
429   full_name_to_descriptor_index_[descriptor.full_name()] = idx;
430   descriptors_.emplace_back(std::move(descriptor));
431   return idx;
432 }
433 
ProtoDescriptor(std::string file_name,std::string package_name,std::string full_name,Type type,std::optional<uint32_t> parent_id)434 ProtoDescriptor::ProtoDescriptor(std::string file_name,
435                                  std::string package_name,
436                                  std::string full_name,
437                                  Type type,
438                                  std::optional<uint32_t> parent_id)
439     : file_name_(std::move(file_name)),
440       package_name_(std::move(package_name)),
441       full_name_(std::move(full_name)),
442       type_(type),
443       parent_id_(parent_id) {}
444 
FieldDescriptor(std::string name,uint32_t number,uint32_t type,std::string raw_type_name,std::vector<uint8_t> options,std::optional<std::string> default_value,bool is_repeated,bool is_packed,bool is_extension)445 FieldDescriptor::FieldDescriptor(std::string name,
446                                  uint32_t number,
447                                  uint32_t type,
448                                  std::string raw_type_name,
449                                  std::vector<uint8_t> options,
450                                  std::optional<std::string> default_value,
451                                  bool is_repeated,
452                                  bool is_packed,
453                                  bool is_extension)
454     : name_(std::move(name)),
455       number_(number),
456       type_(type),
457       raw_type_name_(std::move(raw_type_name)),
458       options_(std::move(options)),
459       default_value_(std::move(default_value)),
460       is_repeated_(is_repeated),
461       is_packed_(is_packed),
462       is_extension_(is_extension) {}
463 
464 }  // namespace trace_processor
465 }  // namespace perfetto
466