xref: /aosp_15_r20/external/flatbuffers/src/binary_annotator.cpp (revision 890232f25432b36107d06881e0a25aaa6b473652)
1 #include "binary_annotator.h"
2 
3 #include <limits>
4 #include <string>
5 #include <vector>
6 
7 #include "flatbuffers/reflection.h"
8 #include "flatbuffers/verifier.h"
9 
10 namespace flatbuffers {
11 namespace {
12 
BinaryRegionSort(const BinaryRegion & a,const BinaryRegion & b)13 static bool BinaryRegionSort(const BinaryRegion &a, const BinaryRegion &b) {
14   return a.offset < b.offset;
15 }
16 
SetError(BinaryRegionComment & comment,BinaryRegionStatus status,std::string message="")17 static void SetError(BinaryRegionComment &comment, BinaryRegionStatus status,
18                      std::string message = "") {
19   comment.status = status;
20   comment.status_message = message;
21 }
22 
MakeBinaryRegion(const uint64_t offset=0,const uint64_t length=0,const BinaryRegionType type=BinaryRegionType::Unknown,const uint64_t array_length=0,const uint64_t points_to_offset=0,const BinaryRegionComment comment={})23 static BinaryRegion MakeBinaryRegion(
24     const uint64_t offset = 0, const uint64_t length = 0,
25     const BinaryRegionType type = BinaryRegionType::Unknown,
26     const uint64_t array_length = 0, const uint64_t points_to_offset = 0,
27     const BinaryRegionComment comment = {}) {
28   BinaryRegion region;
29   region.offset = offset;
30   region.length = length;
31   region.type = type;
32   region.array_length = array_length;
33   region.points_to_offset = points_to_offset;
34   region.comment = std::move(comment);
35   return region;
36 }
37 
MakeBinarySection(const std::string & name,const BinarySectionType type,const std::vector<BinaryRegion> regions)38 static BinarySection MakeBinarySection(
39     const std::string &name, const BinarySectionType type,
40     const std::vector<BinaryRegion> regions) {
41   BinarySection section;
42   section.name = name;
43   section.type = type;
44   section.regions = std::move(regions);
45   return section;
46 }
47 
MakeSingleRegionBinarySection(const std::string & name,const BinarySectionType type,const BinaryRegion & region)48 static BinarySection MakeSingleRegionBinarySection(const std::string &name,
49                                                    const BinarySectionType type,
50                                                    const BinaryRegion &region) {
51   std::vector<BinaryRegion> regions;
52   regions.push_back(region);
53   return MakeBinarySection(name, type, std::move(regions));
54 }
55 
IsNonZeroRegion(const uint64_t offset,const uint64_t length,const uint8_t * const binary)56 static bool IsNonZeroRegion(const uint64_t offset, const uint64_t length,
57                             const uint8_t *const binary) {
58   for (uint64_t i = offset; i < offset + length; ++i) {
59     if (binary[i] != 0) { return true; }
60   }
61   return false;
62 }
63 
IsPrintableRegion(const uint64_t offset,const uint64_t length,const uint8_t * const binary)64 static bool IsPrintableRegion(const uint64_t offset, const uint64_t length,
65                               const uint8_t *const binary) {
66   for (uint64_t i = offset; i < offset + length; ++i) {
67     if (!isprint(binary[i])) { return false; }
68   }
69   return true;
70 }
71 
GenerateMissingSection(const uint64_t offset,const uint64_t length,const uint8_t * const binary)72 static BinarySection GenerateMissingSection(const uint64_t offset,
73                                             const uint64_t length,
74                                             const uint8_t *const binary) {
75   std::vector<BinaryRegion> regions;
76 
77   // Check if the region is all zeros or not, as that can tell us if it is
78   // padding or not.
79   if (IsNonZeroRegion(offset, length, binary)) {
80     // Some of the padding bytes are non-zero, so this might be an unknown
81     // section of the binary.
82     // TODO(dbaileychess): We could be a bit smarter with different sized
83     // alignments. For now, the 8 byte check encompasses all the smaller
84     // alignments.
85     BinaryRegionComment comment;
86     comment.type = BinaryRegionCommentType::Unknown;
87     if (length >= 8) {
88       SetError(comment, BinaryRegionStatus::WARN_NO_REFERENCES);
89     } else {
90       SetError(comment, BinaryRegionStatus::WARN_CORRUPTED_PADDING);
91     }
92 
93     regions.push_back(MakeBinaryRegion(offset, length * sizeof(uint8_t),
94                                        BinaryRegionType::Unknown, length, 0,
95                                        comment));
96 
97     return MakeBinarySection("no known references", BinarySectionType::Unknown,
98                              std::move(regions));
99   }
100 
101   BinaryRegionComment comment;
102   comment.type = BinaryRegionCommentType::Padding;
103   if (length >= 8) {
104     SetError(comment, BinaryRegionStatus::WARN_PADDING_LENGTH);
105   }
106 
107   // This region is most likely padding.
108   regions.push_back(MakeBinaryRegion(offset, length * sizeof(uint8_t),
109                                      BinaryRegionType::Uint8, length, 0,
110                                      comment));
111 
112   return MakeBinarySection("", BinarySectionType::Padding, std::move(regions));
113 }
114 
115 }  // namespace
116 
Annotate()117 std::map<uint64_t, BinarySection> BinaryAnnotator::Annotate() {
118   flatbuffers::Verifier verifier(bfbs_, static_cast<size_t>(bfbs_length_));
119   if (!reflection::VerifySchemaBuffer(verifier)) { return {}; }
120 
121   // The binary is too short to read as a flatbuffers.
122   // TODO(dbaileychess): We could spit out the annotated buffer sections, but
123   // I'm not sure if it is worth it.
124   if (binary_length_ < 4) { return {}; }
125 
126   // Make sure we start with a clean slate.
127   vtables_.clear();
128   sections_.clear();
129 
130   // First parse the header region which always start at offset 0.
131   // The returned offset will point to the root_table location.
132   const uint64_t root_table_offset = BuildHeader(0);
133 
134   if (IsValidOffset(root_table_offset)) {
135     // Build the root table, and all else will be referenced from it.
136     BuildTable(root_table_offset, BinarySectionType::RootTable,
137                schema_->root_table());
138   }
139 
140   // Now that all the sections are built, make sure the binary sections are
141   // contiguous.
142   FixMissingRegions();
143 
144   // Then scan the area between BinarySections insert padding sections that are
145   // implied.
146   FixMissingSections();
147 
148   return sections_;
149 }
150 
BuildHeader(const uint64_t header_offset)151 uint64_t BinaryAnnotator::BuildHeader(const uint64_t header_offset) {
152   const auto root_table_offset = ReadScalar<uint32_t>(header_offset);
153 
154   if (!root_table_offset.has_value()) {
155     // This shouldn't occur, since we validate the min size of the buffer
156     // before. But for completion sake, we shouldn't read passed the binary end.
157     return std::numeric_limits<uint64_t>::max();
158   }
159 
160   std::vector<BinaryRegion> regions;
161   uint64_t offset = header_offset;
162   // TODO(dbaileychess): sized prefixed value
163 
164   BinaryRegionComment root_offset_comment;
165   root_offset_comment.type = BinaryRegionCommentType::RootTableOffset;
166   root_offset_comment.name = schema_->root_table()->name()->str();
167 
168   if (!IsValidOffset(root_table_offset.value())) {
169     SetError(root_offset_comment,
170              BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
171   }
172 
173   regions.push_back(
174       MakeBinaryRegion(offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
175                        root_table_offset.value(), root_offset_comment));
176   offset += sizeof(uint32_t);
177 
178   if (IsValidRead(offset, flatbuffers::kFileIdentifierLength) &&
179       IsPrintableRegion(offset, flatbuffers::kFileIdentifierLength, binary_)) {
180     BinaryRegionComment comment;
181     comment.type = BinaryRegionCommentType::FileIdentifier;
182     // Check if the file identifier region has non-zero data, and assume its
183     // the file identifier. Otherwise, it will get filled in with padding
184     // later.
185     regions.push_back(MakeBinaryRegion(
186         offset, flatbuffers::kFileIdentifierLength * sizeof(uint8_t),
187         BinaryRegionType::Char, flatbuffers::kFileIdentifierLength, 0,
188         comment));
189   }
190 
191   AddSection(header_offset, MakeBinarySection("", BinarySectionType::Header,
192                                               std::move(regions)));
193 
194   return root_table_offset.value();
195 }
196 
BuildVTable(const uint64_t vtable_offset,const reflection::Object * const table,const uint64_t offset_of_referring_table)197 void BinaryAnnotator::BuildVTable(const uint64_t vtable_offset,
198                                   const reflection::Object *const table,
199                                   const uint64_t offset_of_referring_table) {
200   // First see if we have used this vtable before, if so skip building it again.
201   auto it = vtables_.find(vtable_offset);
202   if (it != vtables_.end()) { return; }
203 
204   if (ContainsSection(vtable_offset)) { return; }
205 
206   BinaryRegionComment vtable_size_comment;
207   vtable_size_comment.type = BinaryRegionCommentType::VTableSize;
208 
209   const auto vtable_length = ReadScalar<uint16_t>(vtable_offset);
210   if (!vtable_length.has_value()) {
211     const uint64_t remaining = RemainingBytes(vtable_offset);
212 
213     SetError(vtable_size_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
214              "2");
215 
216     AddSection(vtable_offset,
217                MakeSingleRegionBinarySection(
218                    table->name()->str(), BinarySectionType::VTable,
219                    MakeBinaryRegion(vtable_offset, remaining,
220                                     BinaryRegionType::Unknown, remaining, 0,
221                                     vtable_size_comment)));
222     return;
223   }
224 
225   // Vtables start with the size of the vtable
226   const uint16_t vtable_size = vtable_length.value();
227 
228   if (!IsValidOffset(vtable_offset + vtable_size - 1)) {
229     SetError(vtable_size_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_LONG);
230     // The vtable_size points to off the end of the binary.
231     AddSection(vtable_offset,
232                MakeSingleRegionBinarySection(
233                    table->name()->str(), BinarySectionType::VTable,
234                    MakeBinaryRegion(vtable_offset, sizeof(uint16_t),
235                                     BinaryRegionType::Uint16, 0, 0,
236                                     vtable_size_comment)));
237 
238     return;
239   } else if (vtable_size < 2 * sizeof(uint16_t)) {
240     SetError(vtable_size_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_SHORT,
241              "4");
242     // The size includes itself and the table size which are both uint16_t.
243     AddSection(vtable_offset,
244                MakeSingleRegionBinarySection(
245                    table->name()->str(), BinarySectionType::VTable,
246                    MakeBinaryRegion(vtable_offset, sizeof(uint16_t),
247                                     BinaryRegionType::Uint16, 0, 0,
248                                     vtable_size_comment)));
249     return;
250   }
251 
252   std::vector<BinaryRegion> regions;
253 
254   regions.push_back(MakeBinaryRegion(vtable_offset, sizeof(uint16_t),
255                                      BinaryRegionType::Uint16, 0, 0,
256                                      vtable_size_comment));
257   uint64_t offset = vtable_offset + sizeof(uint16_t);
258 
259   BinaryRegionComment ref_table_len_comment;
260   ref_table_len_comment.type =
261       BinaryRegionCommentType::VTableRefferingTableLength;
262 
263   // Ensure we can read the next uint16_t field, which is the size of the
264   // referring table.
265   const auto table_length = ReadScalar<uint16_t>(offset);
266 
267   if (!table_length.has_value()) {
268     const uint64_t remaining = RemainingBytes(offset);
269     SetError(ref_table_len_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
270              "2");
271 
272     AddSection(offset, MakeSingleRegionBinarySection(
273                            table->name()->str(), BinarySectionType::VTable,
274                            MakeBinaryRegion(
275                                offset, remaining, BinaryRegionType::Unknown,
276                                remaining, 0, ref_table_len_comment)));
277     return;
278   }
279 
280   // Then they have the size of the table they reference.
281   const uint16_t table_size = table_length.value();
282 
283   if (!IsValidOffset(offset_of_referring_table + table_size - 1)) {
284     SetError(ref_table_len_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_LONG);
285   } else if (table_size < 4) {
286     SetError(ref_table_len_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_SHORT,
287              "4");
288   }
289 
290   regions.push_back(MakeBinaryRegion(offset, sizeof(uint16_t),
291                                      BinaryRegionType::Uint16, 0, 0,
292                                      ref_table_len_comment));
293   offset += sizeof(uint16_t);
294 
295   const uint64_t offset_start = offset;
296 
297   // A mapping between field (and its id) to the relative offset (uin16_t) from
298   // the start of the table.
299   std::map<uint16_t, VTable::Entry> fields;
300 
301   // Counter for determining if the binary has more vtable entries than the
302   // schema provided. This can occur if the binary was created at a newer schema
303   // version and is being processed with an older one.
304   uint16_t fields_processed = 0;
305 
306   // Loop over all the fields.
307   ForAllFields(table, /*reverse=*/false, [&](const reflection::Field *field) {
308     const uint64_t field_offset = offset_start + field->id() * sizeof(uint16_t);
309 
310     if (field_offset >= vtable_offset + vtable_size) {
311       // This field_offset is too large for this vtable, so it must come from a
312       // newer schema than the binary was create with or the binary writer did
313       // not write it. For either case, it is safe to ignore.
314 
315       // TODO(dbaileychess): We could show which fields are not set an their
316       // default values if we want. We just need a way to make it obvious that
317       // it isn't part of the buffer.
318       return;
319     }
320 
321     BinaryRegionComment field_comment;
322     field_comment.type = BinaryRegionCommentType::VTableFieldOffset;
323     field_comment.name = std::string(field->name()->c_str()) +
324                          "` (id: " + std::to_string(field->id()) + ")";
325 
326     const auto offset_from_table = ReadScalar<uint16_t>(field_offset);
327 
328     if (!offset_from_table.has_value()) {
329       const uint64_t remaining = RemainingBytes(field_offset);
330 
331       SetError(field_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "2");
332       regions.push_back(MakeBinaryRegion(field_offset, remaining,
333                                          BinaryRegionType::Unknown, remaining,
334                                          0, field_comment));
335 
336       return;
337     }
338 
339     if (!IsValidOffset(offset_of_referring_table + offset_from_table.value() -
340                        1)) {
341       SetError(field_comment, BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
342       regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t),
343                                          BinaryRegionType::VOffset, 0, 0,
344                                          field_comment));
345       return;
346     }
347 
348     VTable::Entry entry;
349     entry.field = field;
350     entry.offset_from_table = offset_from_table.value();
351     fields.insert(std::make_pair(field->id(), entry));
352 
353     std::string default_label;
354     if (offset_from_table.value() == 0) {
355       // Not present, so could be default or be optional.
356       if (field->required()) {
357         SetError(field_comment,
358                  BinaryRegionStatus::ERROR_REQUIRED_FIELD_NOT_PRESENT);
359         // If this is a required field, make it known this is an error.
360         regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t),
361                                            BinaryRegionType::VOffset, 0, 0,
362                                            field_comment));
363         return;
364       } else {
365         // Its an optional field, so get the default value and interpret and
366         // provided an annotation for it.
367         if (IsScalar(field->type()->base_type())) {
368           default_label += "<defaults to ";
369           default_label += IsFloat(field->type()->base_type())
370                                ? std::to_string(field->default_real())
371                                : std::to_string(field->default_integer());
372           default_label += "> (";
373         } else {
374           default_label += "<null> (";
375         }
376         default_label +=
377             reflection::EnumNameBaseType(field->type()->base_type());
378         default_label += ")";
379       }
380     }
381     field_comment.default_value = default_label;
382 
383     regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t),
384                                        BinaryRegionType::VOffset, 0, 0,
385                                        field_comment));
386 
387     fields_processed++;
388   });
389 
390   // Check if we covered all the expectant fields. If not, we need to add them
391   // as unknown fields.
392   uint16_t expectant_vtable_fields =
393       (vtable_size - sizeof(uint16_t) - sizeof(uint16_t)) / sizeof(uint16_t);
394 
395   // Prevent a bad binary from declaring a really large vtable_size, that we can
396   // not indpendently verify.
397   expectant_vtable_fields = std::min(
398       static_cast<uint16_t>(fields_processed * 3), expectant_vtable_fields);
399 
400   for (uint16_t id = fields_processed; id < expectant_vtable_fields; ++id) {
401     const uint64_t field_offset = offset_start + id * sizeof(uint16_t);
402 
403     const auto offset_from_table = ReadScalar<uint16_t>(field_offset);
404 
405     BinaryRegionComment field_comment;
406     field_comment.type = BinaryRegionCommentType::VTableUnknownFieldOffset;
407     field_comment.index = id;
408 
409     if (!offset_from_table.has_value()) {
410       const uint64_t remaining = RemainingBytes(field_offset);
411       SetError(field_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "2");
412       regions.push_back(MakeBinaryRegion(field_offset, remaining,
413                                          BinaryRegionType::Unknown, remaining,
414                                          0, field_comment));
415       continue;
416     }
417 
418     VTable::Entry entry;
419     entry.field = nullptr;  // No field to reference.
420     entry.offset_from_table = offset_from_table.value();
421     fields.insert(std::make_pair(id, entry));
422 
423     regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t),
424                                        BinaryRegionType::VOffset, 0, 0,
425                                        field_comment));
426   }
427 
428   sections_[vtable_offset] = MakeBinarySection(
429       table->name()->str(), BinarySectionType::VTable, std::move(regions));
430 
431   VTable vtable;
432   vtable.fields = std::move(fields);
433   vtable.table_size = table_size;
434   vtable.vtable_size = vtable_size;
435 
436   vtables_[vtable_offset] = vtable;
437 }
438 
BuildTable(const uint64_t table_offset,const BinarySectionType type,const reflection::Object * const table)439 void BinaryAnnotator::BuildTable(const uint64_t table_offset,
440                                  const BinarySectionType type,
441                                  const reflection::Object *const table) {
442   if (ContainsSection(table_offset)) { return; }
443 
444   BinaryRegionComment vtable_offset_comment;
445   vtable_offset_comment.type = BinaryRegionCommentType::TableVTableOffset;
446 
447   const auto vtable_soffset = ReadScalar<int32_t>(table_offset);
448 
449   if (!vtable_soffset.has_value()) {
450     const uint64_t remaining = RemainingBytes(table_offset);
451     SetError(vtable_offset_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
452              "4");
453 
454     AddSection(
455         table_offset,
456         MakeSingleRegionBinarySection(
457             table->name()->str(), type,
458             MakeBinaryRegion(table_offset, remaining, BinaryRegionType::Unknown,
459                              remaining, 0, vtable_offset_comment)));
460 
461     // If there aren't enough bytes left to read the vtable offset, there is
462     // nothing we can do.
463     return;
464   }
465 
466   // Tables start with the vtable
467   const uint64_t vtable_offset = table_offset - vtable_soffset.value();
468 
469   if (!IsValidOffset(vtable_offset)) {
470     SetError(vtable_offset_comment,
471              BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
472 
473     AddSection(table_offset,
474                MakeSingleRegionBinarySection(
475                    table->name()->str(), type,
476                    MakeBinaryRegion(table_offset, sizeof(int32_t),
477                                     BinaryRegionType::SOffset, 0, vtable_offset,
478                                     vtable_offset_comment)));
479 
480     // There isn't much to do with an invalid vtable offset, as we won't be able
481     // to intepret the rest of the table fields.
482     return;
483   }
484 
485   std::vector<BinaryRegion> regions;
486   regions.push_back(MakeBinaryRegion(table_offset, sizeof(int32_t),
487                                      BinaryRegionType::SOffset, 0,
488                                      vtable_offset, vtable_offset_comment));
489 
490   // Parse the vtable first so we know what the rest of the fields in the table
491   // are.
492   BuildVTable(vtable_offset, table, table_offset);
493 
494   auto vtable_entry = vtables_.find(vtable_offset);
495   if (vtable_entry == vtables_.end()) {
496     // There is no valid vtable for this table, so we cannot process the rest of
497     // the table entries.
498     return;
499   }
500 
501   const VTable &vtable = vtable_entry->second;
502 
503   // This is the size and length of this table.
504   const uint16_t table_size = vtable.table_size;
505   uint64_t table_end_offset = table_offset + table_size;
506 
507   if (!IsValidOffset(table_end_offset - 1)) {
508     // We already validated the table size in BuildVTable, but we have to make
509     // sure we don't use a bad value here.
510     table_end_offset = binary_length_;
511   }
512 
513   // We need to iterate over the vtable fields by their offset in the binary,
514   // not by their IDs. So copy them over to another vector that we can sort on
515   // the offset_from_table property.
516   std::vector<VTable::Entry> fields;
517   for (const auto &vtable_field : vtable.fields) {
518     fields.push_back(vtable_field.second);
519   }
520 
521   std::stable_sort(fields.begin(), fields.end(),
522                    [](const VTable::Entry &a, const VTable::Entry &b) {
523                      return a.offset_from_table < b.offset_from_table;
524                    });
525 
526   // Iterate over all the fields by order of their offset.
527   for (size_t i = 0; i < fields.size(); ++i) {
528     const reflection::Field *field = fields[i].field;
529     const uint16_t offset_from_table = fields[i].offset_from_table;
530 
531     if (offset_from_table == 0) {
532       // Skip non-present fields.
533       continue;
534     }
535 
536     // The field offsets are relative to the start of the table.
537     const uint64_t field_offset = table_offset + offset_from_table;
538 
539     if (!IsValidOffset(field_offset)) {
540       // The field offset is larger than the binary, nothing we can do.
541       continue;
542     }
543 
544     // We have a vtable entry for a non-existant field, that means its a binary
545     // generated by a newer schema than we are currently processing.
546     if (field == nullptr) {
547       // Calculate the length of this unknown field.
548       const uint64_t unknown_field_length =
549           // Check if there is another unknown field after this one.
550           ((i + 1 < fields.size())
551                ? table_offset + fields[i + 1].offset_from_table
552                // Otherwise use the known end of the table.
553                : table_end_offset) -
554           field_offset;
555 
556       if (unknown_field_length == 0) { continue; }
557 
558       std::string hint;
559 
560       if (unknown_field_length == 4) {
561         const auto relative_offset = ReadScalar<uint32_t>(field_offset);
562         if (relative_offset.has_value()) {
563           // The field is 4 in length, so it could be an offset? Provide a hint.
564           hint += "<possibly an offset? Check Loc: +0x";
565           hint += ToHex(field_offset + relative_offset.value());
566           hint += ">";
567         }
568       }
569 
570       BinaryRegionComment unknown_field_comment;
571       unknown_field_comment.type = BinaryRegionCommentType::TableUnknownField;
572 
573       if (!IsValidRead(field_offset, unknown_field_length)) {
574         const uint64_t remaining = RemainingBytes(field_offset);
575 
576         SetError(unknown_field_comment,
577                  BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
578                  std::to_string(unknown_field_length));
579 
580         regions.push_back(MakeBinaryRegion(field_offset, remaining,
581                                            BinaryRegionType::Unknown, remaining,
582                                            0, unknown_field_comment));
583         continue;
584       }
585 
586       unknown_field_comment.default_value = hint;
587 
588       regions.push_back(MakeBinaryRegion(
589           field_offset, unknown_field_length, BinaryRegionType::Unknown,
590           unknown_field_length, 0, unknown_field_comment));
591       continue;
592     }
593 
594     if (IsScalar(field->type()->base_type())) {
595       // These are the raw values store in the table.
596       const uint64_t type_size = GetTypeSize(field->type()->base_type());
597       const BinaryRegionType region_type =
598           GetRegionType(field->type()->base_type());
599 
600       BinaryRegionComment scalar_field_comment;
601       scalar_field_comment.type = BinaryRegionCommentType::TableField;
602       scalar_field_comment.name =
603           std::string(field->name()->c_str()) + "` (" +
604           reflection::EnumNameBaseType(field->type()->base_type()) + ")";
605 
606       if (!IsValidRead(field_offset, type_size)) {
607         const uint64_t remaining = RemainingBytes(field_offset);
608         SetError(scalar_field_comment,
609                  BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
610                  std::to_string(type_size));
611 
612         regions.push_back(MakeBinaryRegion(field_offset, remaining,
613                                            BinaryRegionType::Unknown, remaining,
614                                            0, scalar_field_comment));
615         continue;
616       }
617 
618       if (IsUnionType(field)) {
619         // This is a type for a union. Validate the value
620         const auto enum_value = ReadScalar<uint8_t>(field_offset);
621 
622         // This should always have a value, due to the IsValidRead check above.
623         if (!IsValidUnionValue(field, enum_value.value())) {
624           SetError(scalar_field_comment,
625                    BinaryRegionStatus::ERROR_INVALID_UNION_TYPE);
626 
627           regions.push_back(MakeBinaryRegion(field_offset, type_size,
628                                              region_type, 0, 0,
629                                              scalar_field_comment));
630           continue;
631         }
632       }
633 
634       regions.push_back(MakeBinaryRegion(field_offset, type_size, region_type,
635                                          0, 0, scalar_field_comment));
636       continue;
637     }
638 
639     // Read the offset
640     const auto offset_from_field = ReadScalar<uint32_t>(field_offset);
641     uint64_t offset_of_next_item = 0;
642     BinaryRegionComment offset_field_comment;
643     offset_field_comment.type = BinaryRegionCommentType::TableOffsetField;
644     offset_field_comment.name = field->name()->c_str();
645     const std::string offset_prefix =
646         "offset to field `" + std::string(field->name()->c_str()) + "`";
647 
648     // Validate any field that isn't inline (i.e., non-structs).
649     if (!IsInlineField(field)) {
650       if (!offset_from_field.has_value()) {
651         const uint64_t remaining = RemainingBytes(field_offset);
652 
653         SetError(offset_field_comment,
654                  BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4");
655 
656         regions.push_back(MakeBinaryRegion(field_offset, remaining,
657                                            BinaryRegionType::Unknown, remaining,
658                                            0, offset_field_comment));
659         continue;
660       }
661 
662       offset_of_next_item = field_offset + offset_from_field.value();
663 
664       if (!IsValidOffset(offset_of_next_item)) {
665         SetError(offset_field_comment,
666                  BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
667         regions.push_back(MakeBinaryRegion(
668             field_offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
669             offset_of_next_item, offset_field_comment));
670         continue;
671       }
672     }
673 
674     switch (field->type()->base_type()) {
675       case reflection::BaseType::Obj: {
676         const reflection::Object *next_object =
677             schema_->objects()->Get(field->type()->index());
678 
679         if (next_object->is_struct()) {
680           // Structs are stored inline.
681           BuildStruct(field_offset, regions, next_object);
682         } else {
683           offset_field_comment.default_value = "(table)";
684 
685           regions.push_back(MakeBinaryRegion(
686               field_offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
687               offset_of_next_item, offset_field_comment));
688 
689           BuildTable(offset_of_next_item, BinarySectionType::Table,
690                      next_object);
691         }
692       } break;
693 
694       case reflection::BaseType::String: {
695         offset_field_comment.default_value = "(string)";
696         regions.push_back(MakeBinaryRegion(
697             field_offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
698             offset_of_next_item, offset_field_comment));
699         BuildString(offset_of_next_item, table, field);
700       } break;
701 
702       case reflection::BaseType::Vector: {
703         offset_field_comment.default_value = "(vector)";
704         regions.push_back(MakeBinaryRegion(
705             field_offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
706             offset_of_next_item, offset_field_comment));
707         BuildVector(offset_of_next_item, table, field, table_offset, vtable);
708       } break;
709 
710       case reflection::BaseType::Union: {
711         const uint64_t union_offset = offset_of_next_item;
712 
713         // The union type field is always one less than the union itself.
714         const uint16_t union_type_id = field->id() - 1;
715 
716         auto vtable_field = vtable.fields.find(union_type_id);
717         if (vtable_field == vtable.fields.end()) {
718           // TODO(dbaileychess): need to capture this error condition.
719           break;
720         }
721         offset_field_comment.default_value = "(union)";
722 
723         const uint64_t type_offset =
724             table_offset + vtable_field->second.offset_from_table;
725 
726         const auto realized_type = ReadScalar<uint8_t>(type_offset);
727         if (!realized_type.has_value()) {
728           const uint64_t remaining = RemainingBytes(type_offset);
729           SetError(offset_field_comment,
730                    BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "1");
731           regions.push_back(MakeBinaryRegion(
732               type_offset, remaining, BinaryRegionType::Unknown, remaining, 0,
733               offset_field_comment));
734           continue;
735         }
736 
737         if (!IsValidUnionValue(field, realized_type.value())) {
738           // We already export an error in the union type field, so just skip
739           // building the union itself and it will default to an unreference
740           // Binary section.
741           continue;
742         }
743 
744         const std::string enum_type =
745             BuildUnion(union_offset, realized_type.value(), field);
746 
747         offset_field_comment.default_value =
748             "(union of type `" + enum_type + "`)";
749 
750         regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint32_t),
751                                            BinaryRegionType::UOffset, 0,
752                                            union_offset, offset_field_comment));
753 
754       } break;
755 
756       default: break;
757     }
758   }
759 
760   // Handle the case where there is padding after the last known binary
761   // region. Calculate where we left off towards the expected end of the
762   // table.
763   const uint64_t i = regions.back().offset + regions.back().length + 1;
764 
765   if (i < table_end_offset) {
766     const uint64_t pad_bytes = table_end_offset - i + 1;
767 
768     BinaryRegionComment padding_comment;
769     padding_comment.type = BinaryRegionCommentType::Padding;
770 
771     regions.push_back(MakeBinaryRegion(i - 1, pad_bytes * sizeof(uint8_t),
772                                        BinaryRegionType::Uint8, pad_bytes, 0,
773                                        padding_comment));
774   }
775 
776   AddSection(table_offset,
777              MakeBinarySection(table->name()->str(), type, std::move(regions)));
778 }
779 
BuildStruct(const uint64_t struct_offset,std::vector<BinaryRegion> & regions,const reflection::Object * const object)780 uint64_t BinaryAnnotator::BuildStruct(const uint64_t struct_offset,
781                                       std::vector<BinaryRegion> &regions,
782                                       const reflection::Object *const object) {
783   if (!object->is_struct()) { return struct_offset; }
784   uint64_t offset = struct_offset;
785 
786   // Loop over all the fields in increasing order
787   ForAllFields(object, /*reverse=*/false, [&](const reflection::Field *field) {
788     if (IsScalar(field->type()->base_type())) {
789       // Structure Field value
790       const uint64_t type_size = GetTypeSize(field->type()->base_type());
791       const BinaryRegionType region_type =
792           GetRegionType(field->type()->base_type());
793 
794       BinaryRegionComment comment;
795       comment.type = BinaryRegionCommentType::StructField;
796       comment.name =
797           std::string(object->name()->c_str()) + "." + field->name()->c_str();
798       comment.default_value = "(" +
799                               std::string(reflection::EnumNameBaseType(
800                                   field->type()->base_type())) +
801                               ")";
802 
803       if (!IsValidRead(offset, type_size)) {
804         const uint64_t remaining = RemainingBytes(offset);
805         SetError(comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
806                  std::to_string(type_size));
807         regions.push_back(MakeBinaryRegion(offset, remaining,
808                                            BinaryRegionType::Unknown, remaining,
809                                            0, comment));
810 
811         // TODO(dbaileychess): Should I bail out here? This sets offset to the
812         // end of the binary. So all other reads in the loop should fail.
813         offset += remaining;
814         return;
815       }
816 
817       regions.push_back(
818           MakeBinaryRegion(offset, type_size, region_type, 0, 0, comment));
819       offset += type_size;
820     } else if (field->type()->base_type() == reflection::BaseType::Obj) {
821       // Structs are stored inline, even when nested.
822       offset = BuildStruct(offset, regions,
823                            schema_->objects()->Get(field->type()->index()));
824     } else if (field->type()->base_type() == reflection::BaseType::Array) {
825       const bool is_scalar = IsScalar(field->type()->element());
826       const uint64_t type_size = GetTypeSize(field->type()->element());
827       const BinaryRegionType region_type =
828           GetRegionType(field->type()->element());
829 
830       // Arrays are just repeated structures.
831       for (uint16_t i = 0; i < field->type()->fixed_length(); ++i) {
832         if (is_scalar) {
833           BinaryRegionComment array_comment;
834           array_comment.type = BinaryRegionCommentType::ArrayField;
835           array_comment.name = std::string(object->name()->c_str()) + "." +
836                                field->name()->c_str();
837           array_comment.index = i;
838           array_comment.default_value =
839               "(" +
840               std::string(
841                   reflection::EnumNameBaseType(field->type()->element())) +
842               ")";
843 
844           if (!IsValidRead(offset, type_size)) {
845             const uint64_t remaining = RemainingBytes(offset);
846 
847             SetError(array_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
848                      std::to_string(type_size));
849 
850             regions.push_back(MakeBinaryRegion(offset, remaining,
851                                                BinaryRegionType::Unknown,
852                                                remaining, 0, array_comment));
853 
854             // TODO(dbaileychess): Should I bail out here? This sets offset to
855             // the end of the binary. So all other reads in the loop should
856             // fail.
857             offset += remaining;
858             break;
859           }
860 
861           regions.push_back(MakeBinaryRegion(offset, type_size, region_type, 0,
862                                              0, array_comment));
863 
864           offset += type_size;
865         } else {
866           // Array of Structs.
867           //
868           // TODO(dbaileychess): This works, but the comments on the fields lose
869           // some context. Need to figure a way how to plumb the nested arrays
870           // comments together that isn't too confusing.
871           offset = BuildStruct(offset, regions,
872                                schema_->objects()->Get(field->type()->index()));
873         }
874       }
875     }
876 
877     // Insert any padding after this field.
878     const uint16_t padding = field->padding();
879     if (padding > 0 && IsValidOffset(offset + padding)) {
880       BinaryRegionComment padding_comment;
881       padding_comment.type = BinaryRegionCommentType::Padding;
882 
883       regions.push_back(MakeBinaryRegion(offset, padding,
884                                          BinaryRegionType::Uint8, padding, 0,
885                                          padding_comment));
886       offset += padding;
887     }
888   });
889 
890   return offset;
891 }
892 
BuildString(const uint64_t string_offset,const reflection::Object * const table,const reflection::Field * const field)893 void BinaryAnnotator::BuildString(const uint64_t string_offset,
894                                   const reflection::Object *const table,
895                                   const reflection::Field *const field) {
896   // Check if we have already generated this string section, and this is a
897   // shared string instance.
898   if (ContainsSection(string_offset)) { return; }
899 
900   std::vector<BinaryRegion> regions;
901   const auto string_length = ReadScalar<uint32_t>(string_offset);
902 
903   BinaryRegionComment string_length_comment;
904   string_length_comment.type = BinaryRegionCommentType::StringLength;
905 
906   if (!string_length.has_value()) {
907     const uint64_t remaining = RemainingBytes(string_offset);
908 
909     SetError(string_length_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
910              "4");
911 
912     regions.push_back(MakeBinaryRegion(string_offset, remaining,
913                                        BinaryRegionType::Unknown, remaining, 0,
914                                        string_length_comment));
915 
916   } else {
917     const uint32_t string_size = string_length.value();
918     const uint64_t string_end =
919         string_offset + sizeof(uint32_t) + string_size + sizeof(char);
920 
921     if (!IsValidOffset(string_end - 1)) {
922       SetError(string_length_comment,
923                BinaryRegionStatus::ERROR_LENGTH_TOO_LONG);
924 
925       regions.push_back(MakeBinaryRegion(string_offset, sizeof(uint32_t),
926                                          BinaryRegionType::Uint32, 0, 0,
927                                          string_length_comment));
928     } else {
929       regions.push_back(MakeBinaryRegion(string_offset, sizeof(uint32_t),
930                                          BinaryRegionType::Uint32, 0, 0,
931                                          string_length_comment));
932 
933       BinaryRegionComment string_comment;
934       string_comment.type = BinaryRegionCommentType::StringValue;
935 
936       regions.push_back(MakeBinaryRegion(string_offset + sizeof(uint32_t),
937                                          string_size, BinaryRegionType::Char,
938                                          string_size, 0, string_comment));
939 
940       BinaryRegionComment string_terminator_comment;
941       string_terminator_comment.type =
942           BinaryRegionCommentType::StringTerminator;
943 
944       regions.push_back(MakeBinaryRegion(
945           string_offset + sizeof(uint32_t) + string_size, sizeof(char),
946           BinaryRegionType::Char, 0, 0, string_terminator_comment));
947     }
948   }
949 
950   AddSection(string_offset,
951              MakeBinarySection(std::string(table->name()->c_str()) + "." +
952                                    field->name()->c_str(),
953                                BinarySectionType::String, std::move(regions)));
954 }
955 
BuildVector(const uint64_t vector_offset,const reflection::Object * const table,const reflection::Field * const field,const uint64_t parent_table_offset,const VTable & vtable)956 void BinaryAnnotator::BuildVector(const uint64_t vector_offset,
957                                   const reflection::Object *const table,
958                                   const reflection::Field *const field,
959                                   const uint64_t parent_table_offset,
960                                   const VTable &vtable) {
961   if (ContainsSection(vector_offset)) { return; }
962 
963   BinaryRegionComment vector_length_comment;
964   vector_length_comment.type = BinaryRegionCommentType::VectorLength;
965 
966   const auto vector_length = ReadScalar<uint32_t>(vector_offset);
967   if (!vector_length.has_value()) {
968     const uint64_t remaining = RemainingBytes(vector_offset);
969     SetError(vector_length_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
970              "4");
971 
972     AddSection(
973         vector_offset,
974         MakeSingleRegionBinarySection(
975             std::string(table->name()->c_str()) + "." + field->name()->c_str(),
976             BinarySectionType::Vector,
977             MakeBinaryRegion(vector_offset, remaining,
978                              BinaryRegionType::Unknown, remaining, 0,
979                              vector_length_comment)));
980     return;
981   }
982 
983   // Validate there are enough bytes left in the binary to process all the
984   // items.
985   const uint64_t last_item_offset =
986       vector_offset + sizeof(uint32_t) +
987       vector_length.value() * GetElementSize(field);
988 
989   if (!IsValidOffset(last_item_offset - 1)) {
990     SetError(vector_length_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_LONG);
991     AddSection(
992         vector_offset,
993         MakeSingleRegionBinarySection(
994             std::string(table->name()->c_str()) + "." + field->name()->c_str(),
995             BinarySectionType::Vector,
996             MakeBinaryRegion(vector_offset, sizeof(uint32_t),
997                              BinaryRegionType::Uint32, 0, 0,
998                              vector_length_comment)));
999 
1000     return;
1001   }
1002 
1003   std::vector<BinaryRegion> regions;
1004 
1005   regions.push_back(MakeBinaryRegion(vector_offset, sizeof(uint32_t),
1006                                      BinaryRegionType::Uint32, 0, 0,
1007                                      vector_length_comment));
1008 
1009   uint64_t offset = vector_offset + sizeof(uint32_t);
1010 
1011   switch (field->type()->element()) {
1012     case reflection::BaseType::Obj: {
1013       const reflection::Object *object =
1014           schema_->objects()->Get(field->type()->index());
1015 
1016       if (object->is_struct()) {
1017         // Vector of structs
1018         for (size_t i = 0; i < vector_length.value(); ++i) {
1019           // Structs are inline to the vector.
1020           const uint64_t next_offset = BuildStruct(offset, regions, object);
1021           if (next_offset == offset) { break; }
1022           offset = next_offset;
1023         }
1024       } else {
1025         // Vector of objects
1026         for (size_t i = 0; i < vector_length.value(); ++i) {
1027           BinaryRegionComment vector_object_comment;
1028           vector_object_comment.type =
1029               BinaryRegionCommentType::VectorTableValue;
1030           vector_object_comment.index = i;
1031 
1032           const auto table_relative_offset = ReadScalar<uint32_t>(offset);
1033           if (!table_relative_offset.has_value()) {
1034             const uint64_t remaining = RemainingBytes(offset);
1035             SetError(vector_object_comment,
1036                      BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4");
1037 
1038             regions.push_back(
1039                 MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown,
1040                                  remaining, 0, vector_object_comment));
1041             break;
1042           }
1043 
1044           // The table offset is relative from the offset location itself.
1045           const uint64_t table_offset = offset + table_relative_offset.value();
1046 
1047           if (!IsValidOffset(table_offset)) {
1048             SetError(vector_object_comment,
1049                      BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
1050             regions.push_back(MakeBinaryRegion(
1051                 offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
1052                 table_offset, vector_object_comment));
1053 
1054             offset += sizeof(uint32_t);
1055             continue;
1056           }
1057 
1058           if (table_offset == parent_table_offset) {
1059             SetError(vector_object_comment,
1060                      BinaryRegionStatus::ERROR_CYCLE_DETECTED);
1061             // A cycle detected where a table vector field is pointing to
1062             // itself. This should only happen in corrupted files.
1063             regions.push_back(MakeBinaryRegion(
1064                 offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
1065                 table_offset, vector_object_comment));
1066 
1067             offset += sizeof(uint32_t);
1068             continue;
1069           }
1070 
1071           regions.push_back(MakeBinaryRegion(
1072               offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
1073               table_offset, vector_object_comment));
1074 
1075           offset += sizeof(uint32_t);
1076 
1077           BuildTable(table_offset, BinarySectionType::Table, object);
1078         }
1079       }
1080     } break;
1081     case reflection::BaseType::String: {
1082       // Vector of strings
1083       for (size_t i = 0; i < vector_length.value(); ++i) {
1084         BinaryRegionComment vector_object_comment;
1085         vector_object_comment.type = BinaryRegionCommentType::VectorStringValue;
1086         vector_object_comment.index = i;
1087 
1088         const auto string_relative_offset = ReadScalar<uint32_t>(offset);
1089         if (!string_relative_offset.has_value()) {
1090           const uint64_t remaining = RemainingBytes(offset);
1091 
1092           SetError(vector_object_comment,
1093                    BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4");
1094 
1095           regions.push_back(
1096               MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown,
1097                                remaining, 0, vector_object_comment));
1098           break;
1099         }
1100 
1101         // The string offset is relative from the offset location itself.
1102         const uint64_t string_offset = offset + string_relative_offset.value();
1103 
1104         if (!IsValidOffset(string_offset)) {
1105           SetError(vector_object_comment,
1106                    BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
1107           regions.push_back(MakeBinaryRegion(
1108               offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
1109               string_offset, vector_object_comment));
1110 
1111           offset += sizeof(uint32_t);
1112           continue;
1113         }
1114 
1115         regions.push_back(MakeBinaryRegion(
1116             offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
1117             string_offset, vector_object_comment));
1118 
1119         BuildString(string_offset, table, field);
1120 
1121         offset += sizeof(uint32_t);
1122       }
1123     } break;
1124     case reflection::BaseType::Union: {
1125       // Vector of unions
1126       // Unions have both their realized type (uint8_t for now) that are
1127       // stored separately. These are stored in the field->index() - 1
1128       // location.
1129       const uint16_t union_type_vector_id = field->id() - 1;
1130 
1131       auto vtable_entry = vtable.fields.find(union_type_vector_id);
1132       if (vtable_entry == vtable.fields.end()) {
1133         // TODO(dbaileychess): need to capture this error condition.
1134         break;
1135       }
1136 
1137       const uint64_t union_type_vector_field_offset =
1138           parent_table_offset + vtable_entry->second.offset_from_table;
1139 
1140       const auto union_type_vector_field_relative_offset =
1141           ReadScalar<uint16_t>(union_type_vector_field_offset);
1142 
1143       if (!union_type_vector_field_relative_offset.has_value()) {
1144         const uint64_t remaining = RemainingBytes(offset);
1145         BinaryRegionComment vector_union_comment;
1146         vector_union_comment.type = BinaryRegionCommentType::VectorUnionValue;
1147         SetError(vector_union_comment,
1148                  BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "2");
1149 
1150         regions.push_back(MakeBinaryRegion(offset, remaining,
1151                                            BinaryRegionType::Unknown, remaining,
1152                                            0, vector_union_comment));
1153 
1154         break;
1155       }
1156 
1157       // Get the offset to the first type (the + sizeof(uint32_t) is to skip
1158       // over the vector length which we already know). Validation happens
1159       // within the loop below.
1160       const uint64_t union_type_vector_data_offset =
1161           union_type_vector_field_offset +
1162           union_type_vector_field_relative_offset.value() + sizeof(uint32_t);
1163 
1164       for (size_t i = 0; i < vector_length.value(); ++i) {
1165         BinaryRegionComment comment;
1166         comment.type = BinaryRegionCommentType::VectorUnionValue;
1167         comment.index = i;
1168 
1169         const auto union_relative_offset = ReadScalar<uint32_t>(offset);
1170         if (!union_relative_offset.has_value()) {
1171           const uint64_t remaining = RemainingBytes(offset);
1172 
1173           SetError(comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4");
1174 
1175           regions.push_back(MakeBinaryRegion(offset, remaining,
1176                                              BinaryRegionType::Unknown,
1177                                              remaining, 0, comment));
1178 
1179           break;
1180         }
1181 
1182         // The union offset is relative from the offset location itself.
1183         const uint64_t union_offset = offset + union_relative_offset.value();
1184 
1185         if (!IsValidOffset(union_offset)) {
1186           SetError(comment, BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
1187 
1188           regions.push_back(MakeBinaryRegion(offset, sizeof(uint32_t),
1189                                              BinaryRegionType::UOffset, 0,
1190                                              union_offset, comment));
1191           continue;
1192         }
1193 
1194         const auto realized_type =
1195             ReadScalar<uint8_t>(union_type_vector_data_offset + i);
1196 
1197         if (!realized_type.has_value()) {
1198           SetError(comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "1");
1199           regions.push_back(MakeBinaryRegion(
1200               offset, 0, BinaryRegionType::Unknown, 0, 0, comment));
1201           continue;
1202         }
1203 
1204         if (!IsValidUnionValue(vtable_entry->second.field->type()->index(),
1205                                realized_type.value())) {
1206           // We already export an error in the union type field, so just skip
1207           // building the union itself and it will default to an unreference
1208           // Binary section.
1209           offset += sizeof(uint32_t);
1210           continue;
1211         }
1212 
1213         const std::string enum_type =
1214             BuildUnion(union_offset, realized_type.value(), field);
1215 
1216         comment.default_value = "(`" + enum_type + "`)";
1217         regions.push_back(MakeBinaryRegion(offset, sizeof(uint32_t),
1218                                            BinaryRegionType::UOffset, 0,
1219                                            union_offset, comment));
1220 
1221         offset += sizeof(uint32_t);
1222       }
1223     } break;
1224     default: {
1225       if (IsScalar(field->type()->element())) {
1226         const BinaryRegionType binary_region_type =
1227             GetRegionType(field->type()->element());
1228 
1229         const uint64_t type_size = GetTypeSize(field->type()->element());
1230 
1231         // TODO(dbaileychess): It might be nicer to user the
1232         // BinaryRegion.array_length field to indicate this.
1233         for (size_t i = 0; i < vector_length.value(); ++i) {
1234           BinaryRegionComment vector_scalar_comment;
1235           vector_scalar_comment.type = BinaryRegionCommentType::VectorValue;
1236           vector_scalar_comment.index = i;
1237 
1238           if (!IsValidRead(offset, type_size)) {
1239             const uint64_t remaining = RemainingBytes(offset);
1240 
1241             SetError(vector_scalar_comment,
1242                      BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
1243                      std::to_string(type_size));
1244 
1245             regions.push_back(
1246                 MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown,
1247                                  remaining, 0, vector_scalar_comment));
1248             break;
1249           }
1250 
1251           if (IsUnionType(field->type()->element())) {
1252             // This is a type for a union. Validate the value
1253             const auto enum_value = ReadScalar<uint8_t>(offset);
1254 
1255             // This should always have a value, due to the IsValidRead check
1256             // above.
1257             if (!IsValidUnionValue(field->type()->index(),
1258                                    enum_value.value())) {
1259               SetError(vector_scalar_comment,
1260                        BinaryRegionStatus::ERROR_INVALID_UNION_TYPE);
1261               regions.push_back(MakeBinaryRegion(offset, type_size,
1262                                                  binary_region_type, 0, 0,
1263                                                  vector_scalar_comment));
1264               offset += type_size;
1265               continue;
1266             }
1267           }
1268 
1269           regions.push_back(MakeBinaryRegion(offset, type_size,
1270                                              binary_region_type, 0, 0,
1271                                              vector_scalar_comment));
1272           offset += type_size;
1273         }
1274       }
1275     } break;
1276   }
1277   AddSection(vector_offset,
1278              MakeBinarySection(std::string(table->name()->c_str()) + "." +
1279                                    field->name()->c_str(),
1280                                BinarySectionType::Vector, std::move(regions)));
1281 }
1282 
BuildUnion(const uint64_t union_offset,const uint8_t realized_type,const reflection::Field * const field)1283 std::string BinaryAnnotator::BuildUnion(const uint64_t union_offset,
1284                                         const uint8_t realized_type,
1285                                         const reflection::Field *const field) {
1286   const reflection::Enum *next_enum =
1287       schema_->enums()->Get(field->type()->index());
1288 
1289   const reflection::EnumVal *enum_val = next_enum->values()->Get(realized_type);
1290 
1291   if (ContainsSection(union_offset)) { return enum_val->name()->c_str(); }
1292 
1293   const reflection::Type *union_type = enum_val->union_type();
1294 
1295   if (union_type->base_type() == reflection::BaseType::Obj) {
1296     const reflection::Object *object =
1297         schema_->objects()->Get(union_type->index());
1298 
1299     if (object->is_struct()) {
1300       // Union of vectors point to a new Binary section
1301       std::vector<BinaryRegion> regions;
1302 
1303       BuildStruct(union_offset, regions, object);
1304 
1305       AddSection(
1306           union_offset,
1307           MakeBinarySection(std::string(object->name()->c_str()) + "." +
1308                                 field->name()->c_str(),
1309                             BinarySectionType::Union, std::move(regions)));
1310     } else {
1311       BuildTable(union_offset, BinarySectionType::Table, object);
1312     }
1313   }
1314   // TODO(dbaileychess): handle the other union types.
1315 
1316   return enum_val->name()->c_str();
1317 }
1318 
FixMissingRegions()1319 void BinaryAnnotator::FixMissingRegions() {
1320   std::vector<BinaryRegion> regions_to_insert;
1321   for (auto &current_section : sections_) {
1322     BinarySection &section = current_section.second;
1323     if (section.regions.empty()) {
1324       // TODO(dbaileychess): is this possible?
1325       continue;
1326     }
1327 
1328     uint64_t offset = section.regions[0].offset + section.regions[0].length;
1329     for (size_t i = 1; i < section.regions.size(); ++i) {
1330       BinaryRegion &region = section.regions[i];
1331 
1332       const uint64_t next_offset = region.offset;
1333       if (!IsValidOffset(next_offset)) {
1334         // TODO(dbaileychess): figure out how we get into this situation.
1335         continue;
1336       }
1337 
1338       if (offset < next_offset) {
1339         const uint64_t padding_bytes = next_offset - offset;
1340 
1341         BinaryRegionComment comment;
1342         comment.type = BinaryRegionCommentType::Padding;
1343 
1344         if (IsNonZeroRegion(offset, padding_bytes, binary_)) {
1345           SetError(comment, BinaryRegionStatus::WARN_NO_REFERENCES);
1346           regions_to_insert.push_back(
1347               MakeBinaryRegion(offset, padding_bytes, BinaryRegionType::Unknown,
1348                                padding_bytes, 0, comment));
1349         } else {
1350           regions_to_insert.push_back(
1351               MakeBinaryRegion(offset, padding_bytes, BinaryRegionType::Uint8,
1352                                padding_bytes, 0, comment));
1353         }
1354       }
1355       offset = next_offset + region.length;
1356     }
1357 
1358     if (!regions_to_insert.empty()) {
1359       section.regions.insert(section.regions.end(), regions_to_insert.begin(),
1360                              regions_to_insert.end());
1361       std::stable_sort(section.regions.begin(), section.regions.end(),
1362                        BinaryRegionSort);
1363       regions_to_insert.clear();
1364     }
1365   }
1366 }
1367 
FixMissingSections()1368 void BinaryAnnotator::FixMissingSections() {
1369   uint64_t offset = 0;
1370 
1371   std::vector<BinarySection> sections_to_insert;
1372 
1373   for (auto &current_section : sections_) {
1374     BinarySection &section = current_section.second;
1375     const uint64_t section_start_offset = current_section.first;
1376     const uint64_t section_end_offset =
1377         section.regions.back().offset + section.regions.back().length;
1378 
1379     if (offset < section_start_offset) {
1380       // We are at an offset that is less then the current section.
1381       const uint64_t pad_bytes = section_start_offset - offset + 1;
1382 
1383       sections_to_insert.push_back(
1384           GenerateMissingSection(offset - 1, pad_bytes, binary_));
1385     }
1386     offset = section_end_offset + 1;
1387   }
1388 
1389   // Handle the case where there are still bytes left in the binary that are
1390   // unaccounted for.
1391   if (offset < binary_length_) {
1392     const uint64_t pad_bytes = binary_length_ - offset + 1;
1393     sections_to_insert.push_back(
1394         GenerateMissingSection(offset - 1, pad_bytes, binary_));
1395   }
1396 
1397   for (const BinarySection &section_to_insert : sections_to_insert) {
1398     AddSection(section_to_insert.regions[0].offset, section_to_insert);
1399   }
1400 }
1401 
ContainsSection(const uint64_t offset)1402 bool BinaryAnnotator::ContainsSection(const uint64_t offset) {
1403   auto it = sections_.lower_bound(offset);
1404   // If the section is found, check that it is exactly equal its offset.
1405   if (it != sections_.end() && it->first == offset) { return true; }
1406 
1407   // If this was the first section, there are no other previous sections to
1408   // check.
1409   if (it == sections_.begin()) { return false; }
1410 
1411   // Go back one section.
1412   --it;
1413 
1414   // And check that if the offset is covered by the section.
1415   return offset >= it->first && offset < it->second.regions.back().offset +
1416                                              it->second.regions.back().length;
1417 }
1418 
1419 }  // namespace flatbuffers