1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/util/descriptors.h"
18
19 #include <cstdint>
20 #include <optional>
21 #include <vector>
22
23 #include "perfetto/base/status.h"
24 #include "perfetto/ext/base/string_utils.h"
25 #include "perfetto/ext/base/string_view.h"
26 #include "perfetto/protozero/field.h"
27 #include "perfetto/protozero/message.h"
28 #include "perfetto/protozero/proto_decoder.h"
29 #include "perfetto/protozero/scattered_heap_buffer.h"
30 #include "protos/perfetto/common/descriptor.pbzero.h"
31 #include "protos/perfetto/trace_processor/trace_processor.pbzero.h"
32 #include "src/trace_processor/util/status_macros.h"
33
34 namespace perfetto {
35 namespace trace_processor {
36 namespace {
CreateFieldFromDecoder(const protos::pbzero::FieldDescriptorProto::Decoder & f_decoder,bool is_extension)37 FieldDescriptor CreateFieldFromDecoder(
38 const protos::pbzero::FieldDescriptorProto::Decoder& f_decoder,
39 bool is_extension) {
40 using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
41 std::string type_name =
42 f_decoder.has_type_name()
43 ? base::StringView(f_decoder.type_name()).ToStdString()
44 : "";
45 // TODO(lalitm): add support for enums here.
46 uint32_t type =
47 f_decoder.has_type()
48 ? static_cast<uint32_t>(f_decoder.type())
49 : static_cast<uint32_t>(FieldDescriptorProto::TYPE_MESSAGE);
50 protos::pbzero::FieldOptions::Decoder opt(f_decoder.options());
51 std::optional<std::string> default_value;
52 if (f_decoder.has_default_value()) {
53 default_value = f_decoder.default_value().ToStdString();
54 }
55 return FieldDescriptor(
56 base::StringView(f_decoder.name()).ToStdString(),
57 static_cast<uint32_t>(f_decoder.number()), type, std::move(type_name),
58 std::vector<uint8_t>(f_decoder.options().data,
59 f_decoder.options().data + f_decoder.options().size),
60 default_value, f_decoder.label() == FieldDescriptorProto::LABEL_REPEATED,
61 opt.packed(), is_extension);
62 }
63
CheckExtensionField(const ProtoDescriptor & proto_descriptor,const FieldDescriptor & field)64 base::Status CheckExtensionField(const ProtoDescriptor& proto_descriptor,
65 const FieldDescriptor& field) {
66 using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
67 auto existing_field = proto_descriptor.FindFieldByTag(field.number());
68 if (existing_field) {
69 if (field.type() != existing_field->type()) {
70 return base::ErrStatus("Field %s is re-introduced with different type",
71 field.name().c_str());
72 }
73 if ((field.type() == FieldDescriptorProto::TYPE_MESSAGE ||
74 field.type() == FieldDescriptorProto::TYPE_ENUM) &&
75 field.raw_type_name() != existing_field->raw_type_name()) {
76 return base::ErrStatus(
77 "Field %s is re-introduced with different type %s (was %s)",
78 field.name().c_str(), field.raw_type_name().c_str(),
79 existing_field->raw_type_name().c_str());
80 }
81 }
82 return base::OkStatus();
83 }
84
85 } // namespace
86
ResolveShortType(const std::string & parent_path,const std::string & short_type)87 std::optional<uint32_t> DescriptorPool::ResolveShortType(
88 const std::string& parent_path,
89 const std::string& short_type) {
90 PERFETTO_DCHECK(!short_type.empty());
91
92 std::string search_path = short_type[0] == '.'
93 ? parent_path + short_type
94 : parent_path + '.' + short_type;
95 auto opt_idx = FindDescriptorIdx(search_path);
96 if (opt_idx)
97 return opt_idx;
98
99 if (parent_path.empty())
100 return std::nullopt;
101
102 auto parent_dot_idx = parent_path.rfind('.');
103 auto parent_substr = parent_dot_idx == std::string::npos
104 ? ""
105 : parent_path.substr(0, parent_dot_idx);
106 return ResolveShortType(parent_substr, short_type);
107 }
108
AddExtensionField(const std::string & package_name,protozero::ConstBytes field_desc_proto)109 base::Status DescriptorPool::AddExtensionField(
110 const std::string& package_name,
111 protozero::ConstBytes field_desc_proto) {
112 using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
113 FieldDescriptorProto::Decoder f_decoder(field_desc_proto);
114 auto field = CreateFieldFromDecoder(f_decoder, true);
115
116 std::string extendee_name = f_decoder.extendee().ToStdString();
117 if (extendee_name.empty()) {
118 return base::ErrStatus("Extendee name is empty");
119 }
120
121 if (extendee_name[0] != '.') {
122 // Only prepend if the extendee is not fully qualified
123 extendee_name = package_name + "." + extendee_name;
124 }
125 std::optional<uint32_t> extendee = FindDescriptorIdx(extendee_name);
126 if (!extendee.has_value()) {
127 return base::ErrStatus("Extendee does not exist %s", extendee_name.c_str());
128 }
129 ProtoDescriptor& extendee_desc = descriptors_[extendee.value()];
130 RETURN_IF_ERROR(CheckExtensionField(extendee_desc, field));
131 extendee_desc.AddField(field);
132 return base::OkStatus();
133 }
134
AddNestedProtoDescriptors(const std::string & file_name,const std::string & package_name,std::optional<uint32_t> parent_idx,protozero::ConstBytes descriptor_proto,std::vector<ExtensionInfo> * extensions,bool merge_existing_messages)135 base::Status DescriptorPool::AddNestedProtoDescriptors(
136 const std::string& file_name,
137 const std::string& package_name,
138 std::optional<uint32_t> parent_idx,
139 protozero::ConstBytes descriptor_proto,
140 std::vector<ExtensionInfo>* extensions,
141 bool merge_existing_messages) {
142 protos::pbzero::DescriptorProto::Decoder decoder(descriptor_proto);
143
144 auto parent_name =
145 parent_idx ? descriptors_[*parent_idx].full_name() : package_name;
146 auto full_name =
147 parent_name + "." + base::StringView(decoder.name()).ToStdString();
148
149 auto idx = FindDescriptorIdx(full_name);
150 if (idx.has_value() && !merge_existing_messages) {
151 const auto& existing_descriptor = descriptors_[*idx];
152 return base::ErrStatus("%s: %s was already defined in file %s",
153 file_name.c_str(), full_name.c_str(),
154 existing_descriptor.file_name().c_str());
155 }
156 if (!idx.has_value()) {
157 ProtoDescriptor proto_descriptor(file_name, package_name, full_name,
158 ProtoDescriptor::Type::kMessage,
159 parent_idx);
160 idx = AddProtoDescriptor(std::move(proto_descriptor));
161 }
162 ProtoDescriptor& proto_descriptor = descriptors_[*idx];
163 if (proto_descriptor.type() != ProtoDescriptor::Type::kMessage) {
164 return base::ErrStatus("%s was enum, redefined as message",
165 full_name.c_str());
166 }
167
168 using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
169 for (auto it = decoder.field(); it; ++it) {
170 FieldDescriptorProto::Decoder f_decoder(*it);
171 auto field = CreateFieldFromDecoder(f_decoder, /*is_extension=*/false);
172 RETURN_IF_ERROR(CheckExtensionField(proto_descriptor, field));
173 proto_descriptor.AddField(std::move(field));
174 }
175
176 for (auto it = decoder.enum_type(); it; ++it) {
177 RETURN_IF_ERROR(AddEnumProtoDescriptors(file_name, package_name, idx, *it,
178 merge_existing_messages));
179 }
180 for (auto it = decoder.nested_type(); it; ++it) {
181 RETURN_IF_ERROR(AddNestedProtoDescriptors(file_name, package_name, idx, *it,
182 extensions,
183 merge_existing_messages));
184 }
185 for (auto ext_it = decoder.extension(); ext_it; ++ext_it) {
186 extensions->emplace_back(package_name, *ext_it);
187 }
188 return base::OkStatus();
189 }
190
AddEnumProtoDescriptors(const std::string & file_name,const std::string & package_name,std::optional<uint32_t> parent_idx,protozero::ConstBytes descriptor_proto,bool merge_existing_messages)191 base::Status DescriptorPool::AddEnumProtoDescriptors(
192 const std::string& file_name,
193 const std::string& package_name,
194 std::optional<uint32_t> parent_idx,
195 protozero::ConstBytes descriptor_proto,
196 bool merge_existing_messages) {
197 protos::pbzero::EnumDescriptorProto::Decoder decoder(descriptor_proto);
198
199 auto parent_name =
200 parent_idx ? descriptors_[*parent_idx].full_name() : package_name;
201 auto full_name =
202 parent_name + "." + base::StringView(decoder.name()).ToStdString();
203
204 auto prev_idx = FindDescriptorIdx(full_name);
205 if (prev_idx.has_value() && !merge_existing_messages) {
206 const auto& existing_descriptor = descriptors_[*prev_idx];
207 return base::ErrStatus("%s: %s was already defined in file %s",
208 file_name.c_str(), full_name.c_str(),
209 existing_descriptor.file_name().c_str());
210 }
211 if (!prev_idx.has_value()) {
212 ProtoDescriptor proto_descriptor(file_name, package_name, full_name,
213 ProtoDescriptor::Type::kEnum,
214 std::nullopt);
215 prev_idx = AddProtoDescriptor(std::move(proto_descriptor));
216 }
217 ProtoDescriptor& proto_descriptor = descriptors_[*prev_idx];
218 if (proto_descriptor.type() != ProtoDescriptor::Type::kEnum) {
219 return base::ErrStatus("%s was message, redefined as enum",
220 full_name.c_str());
221 }
222
223 for (auto it = decoder.value(); it; ++it) {
224 protos::pbzero::EnumValueDescriptorProto::Decoder enum_value(it->data(),
225 it->size());
226 proto_descriptor.AddEnumValue(enum_value.number(),
227 enum_value.name().ToStdString());
228 }
229
230 return base::OkStatus();
231 }
232
AddFromFileDescriptorSet(const uint8_t * file_descriptor_set_proto,size_t size,const std::vector<std::string> & skip_prefixes,bool merge_existing_messages)233 base::Status DescriptorPool::AddFromFileDescriptorSet(
234 const uint8_t* file_descriptor_set_proto,
235 size_t size,
236 const std::vector<std::string>& skip_prefixes,
237 bool merge_existing_messages) {
238 protos::pbzero::FileDescriptorSet::Decoder proto(file_descriptor_set_proto,
239 size);
240 std::vector<ExtensionInfo> extensions;
241 for (auto it = proto.file(); it; ++it) {
242 protos::pbzero::FileDescriptorProto::Decoder file(*it);
243 const std::string file_name = file.name().ToStdString();
244 if (base::StartsWithAny(file_name, skip_prefixes))
245 continue;
246 if (!merge_existing_messages &&
247 processed_files_.find(file_name) != processed_files_.end()) {
248 // This file has been loaded once already. Skip.
249 continue;
250 }
251 processed_files_.insert(file_name);
252 std::string package = "." + base::StringView(file.package()).ToStdString();
253 for (auto message_it = file.message_type(); message_it; ++message_it) {
254 RETURN_IF_ERROR(AddNestedProtoDescriptors(
255 file_name, package, std::nullopt, *message_it, &extensions,
256 merge_existing_messages));
257 }
258 for (auto enum_it = file.enum_type(); enum_it; ++enum_it) {
259 RETURN_IF_ERROR(AddEnumProtoDescriptors(
260 file_name, package, std::nullopt, *enum_it, merge_existing_messages));
261 }
262 for (auto ext_it = file.extension(); ext_it; ++ext_it) {
263 extensions.emplace_back(package, *ext_it);
264 }
265 }
266
267 // Second pass: Add extension fields to the real protos.
268 for (const auto& extension : extensions) {
269 RETURN_IF_ERROR(AddExtensionField(extension.first, extension.second));
270 }
271
272 // Third pass: resolve the types of all the fields.
273 using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
274 for (ProtoDescriptor& descriptor : descriptors_) {
275 for (auto& entry : *descriptor.mutable_fields()) {
276 FieldDescriptor& field = entry.second;
277 bool needs_resolution =
278 field.resolved_type_name().empty() &&
279 (field.type() == FieldDescriptorProto::TYPE_MESSAGE ||
280 field.type() == FieldDescriptorProto::TYPE_ENUM);
281 if (needs_resolution) {
282 auto opt_desc =
283 ResolveShortType(descriptor.full_name(), field.raw_type_name());
284 if (!opt_desc.has_value()) {
285 return base::ErrStatus(
286 "Unable to find short type %s in field inside message %s",
287 field.raw_type_name().c_str(), descriptor.full_name().c_str());
288 }
289 field.set_resolved_type_name(
290 descriptors_[opt_desc.value()].full_name());
291 }
292 }
293 }
294
295 // Fourth pass: resolve all "uninterpreted" options to real options.
296 for (ProtoDescriptor& descriptor : descriptors_) {
297 for (auto& entry : *descriptor.mutable_fields()) {
298 FieldDescriptor& field = entry.second;
299 if (field.options().empty()) {
300 continue;
301 }
302 ResolveUninterpretedOption(descriptor, field, *field.mutable_options());
303 }
304 }
305 return base::OkStatus();
306 }
307
ResolveUninterpretedOption(const ProtoDescriptor & proto_desc,const FieldDescriptor & field_desc,std::vector<uint8_t> & options)308 base::Status DescriptorPool::ResolveUninterpretedOption(
309 const ProtoDescriptor& proto_desc,
310 const FieldDescriptor& field_desc,
311 std::vector<uint8_t>& options) {
312 auto opt_idx = FindDescriptorIdx(".google.protobuf.FieldOptions");
313 if (!opt_idx) {
314 return base::ErrStatus("Unable to find field options for field %s in %s",
315 field_desc.name().c_str(),
316 proto_desc.full_name().c_str());
317 }
318 ProtoDescriptor& field_options_desc = descriptors_[*opt_idx];
319
320 protozero::ProtoDecoder decoder(field_desc.options().data(),
321 field_desc.options().size());
322 protozero::HeapBuffered<protozero::Message> field_options;
323 for (;;) {
324 const uint8_t* start = decoder.begin() + decoder.read_offset();
325 auto field = decoder.ReadField();
326 if (!field.valid()) {
327 break;
328 }
329 const uint8_t* end = decoder.begin() + decoder.read_offset();
330
331 if (field.id() !=
332 protos::pbzero::FieldOptions::kUninterpretedOptionFieldNumber) {
333 field_options->AppendRawProtoBytes(start,
334 static_cast<size_t>(end - start));
335 continue;
336 }
337
338 protos::pbzero::UninterpretedOption::Decoder unint(field.as_bytes());
339 auto it = unint.name();
340 if (!it) {
341 return base::ErrStatus(
342 "Option for field %s in message %s does not have a name",
343 field_desc.name().c_str(), proto_desc.full_name().c_str());
344 }
345 protos::pbzero::UninterpretedOption::NamePart::Decoder name_part(*it);
346 auto option_field_desc =
347 field_options_desc.FindFieldByName(name_part.name_part().ToStdString());
348
349 // It's not immediately clear how options with multiple names should
350 // be parsed. This likely requires digging into protobuf compiler
351 // source; given we don't have any examples of this in the codebase
352 // today, defer handling of this to when we may need it.
353 if (++it) {
354 return base::ErrStatus(
355 "Option for field %s in message %s has multiple name segments",
356 field_desc.name().c_str(), proto_desc.full_name().c_str());
357 }
358 if (unint.has_identifier_value()) {
359 field_options->AppendString(option_field_desc->number(),
360 unint.identifier_value().ToStdString());
361 } else if (unint.has_positive_int_value()) {
362 field_options->AppendVarInt(option_field_desc->number(),
363 unint.positive_int_value());
364 } else if (unint.has_negative_int_value()) {
365 field_options->AppendVarInt(option_field_desc->number(),
366 unint.negative_int_value());
367 } else if (unint.has_double_value()) {
368 field_options->AppendFixed(option_field_desc->number(),
369 unint.double_value());
370 } else if (unint.has_string_value()) {
371 field_options->AppendString(option_field_desc->number(),
372 unint.string_value().ToStdString());
373 } else if (unint.has_aggregate_value()) {
374 field_options->AppendString(option_field_desc->number(),
375 unint.aggregate_value().ToStdString());
376 } else {
377 return base::ErrStatus(
378 "Unknown field set in UninterpretedOption %s for field %s in message "
379 "%s",
380 option_field_desc->name().c_str(), field_desc.name().c_str(),
381 proto_desc.full_name().c_str());
382 }
383 }
384 if (decoder.bytes_left() > 0) {
385 return base::ErrStatus("Unexpected extra bytes when parsing option %zu",
386 decoder.bytes_left());
387 }
388 options = field_options.SerializeAsArray();
389 return base::OkStatus();
390 }
391
FindDescriptorIdx(const std::string & full_name) const392 std::optional<uint32_t> DescriptorPool::FindDescriptorIdx(
393 const std::string& full_name) const {
394 auto it = full_name_to_descriptor_index_.find(full_name);
395 if (it == full_name_to_descriptor_index_.end()) {
396 return std::nullopt;
397 }
398 return it->second;
399 }
400
SerializeAsDescriptorSet()401 std::vector<uint8_t> DescriptorPool::SerializeAsDescriptorSet() {
402 protozero::HeapBuffered<protos::pbzero::DescriptorSet> descs;
403 for (auto& desc : descriptors()) {
404 protos::pbzero::DescriptorProto* proto_descriptor =
405 descs->add_descriptors();
406 proto_descriptor->set_name(desc.full_name());
407 for (auto& entry : desc.fields()) {
408 auto& field = entry.second;
409 protos::pbzero::FieldDescriptorProto* field_descriptor =
410 proto_descriptor->add_field();
411 field_descriptor->set_name(field.name());
412 field_descriptor->set_number(static_cast<int32_t>(field.number()));
413 // We do not support required fields. They will show up as
414 // optional after serialization.
415 field_descriptor->set_label(
416 field.is_repeated()
417 ? protos::pbzero::FieldDescriptorProto::LABEL_REPEATED
418 : protos::pbzero::FieldDescriptorProto::LABEL_OPTIONAL);
419 field_descriptor->set_type_name(field.resolved_type_name());
420 field_descriptor->set_type(
421 static_cast<protos::pbzero::FieldDescriptorProto_Type>(field.type()));
422 }
423 }
424 return descs.SerializeAsArray();
425 }
426
AddProtoDescriptor(ProtoDescriptor descriptor)427 uint32_t DescriptorPool::AddProtoDescriptor(ProtoDescriptor descriptor) {
428 uint32_t idx = static_cast<uint32_t>(descriptors_.size());
429 full_name_to_descriptor_index_[descriptor.full_name()] = idx;
430 descriptors_.emplace_back(std::move(descriptor));
431 return idx;
432 }
433
ProtoDescriptor(std::string file_name,std::string package_name,std::string full_name,Type type,std::optional<uint32_t> parent_id)434 ProtoDescriptor::ProtoDescriptor(std::string file_name,
435 std::string package_name,
436 std::string full_name,
437 Type type,
438 std::optional<uint32_t> parent_id)
439 : file_name_(std::move(file_name)),
440 package_name_(std::move(package_name)),
441 full_name_(std::move(full_name)),
442 type_(type),
443 parent_id_(parent_id) {}
444
FieldDescriptor(std::string name,uint32_t number,uint32_t type,std::string raw_type_name,std::vector<uint8_t> options,std::optional<std::string> default_value,bool is_repeated,bool is_packed,bool is_extension)445 FieldDescriptor::FieldDescriptor(std::string name,
446 uint32_t number,
447 uint32_t type,
448 std::string raw_type_name,
449 std::vector<uint8_t> options,
450 std::optional<std::string> default_value,
451 bool is_repeated,
452 bool is_packed,
453 bool is_extension)
454 : name_(std::move(name)),
455 number_(number),
456 type_(type),
457 raw_type_name_(std::move(raw_type_name)),
458 options_(std::move(options)),
459 default_value_(std::move(default_value)),
460 is_repeated_(is_repeated),
461 is_packed_(is_packed),
462 is_extension_(is_extension) {}
463
464 } // namespace trace_processor
465 } // namespace perfetto
466