xref: /aosp_15_r20/external/flatbuffers/src/binary_annotator.h (revision 890232f25432b36107d06881e0a25aaa6b473652)
1 /*
2  * Copyright 2021 Google Inc. All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef FLATBUFFERS_BINARY_ANNOTATOR_H_
18 #define FLATBUFFERS_BINARY_ANNOTATOR_H_
19 
20 #include <map>
21 #include <string>
22 #include <vector>
23 
24 #include "flatbuffers/base.h"
25 #include "flatbuffers/reflection.h"
26 #include "flatbuffers/stl_emulation.h"
27 #include "flatbuffers/util.h"
28 
29 namespace flatbuffers {
30 
31 enum class BinaryRegionType {
32   Unknown = 0,
33   UOffset = 1,
34   SOffset = 2,
35   VOffset = 3,
36   Bool = 4,
37   Byte = 5,
38   Char = 6,
39   Uint8 = 7,
40   Int8 = 8,
41   Uint16 = 9,
42   Int16 = 10,
43   Uint32 = 11,
44   Int32 = 12,
45   Uint64 = 13,
46   Int64 = 14,
47   Float = 15,
48   Double = 16,
49   UType = 17,
50 };
51 
52 template<typename T>
53 static inline std::string ToHex(T i, size_t width = sizeof(T)) {
54   std::stringstream stream;
55   stream << std::hex << std::uppercase << std::setfill('0') << std::setw(width)
56          << i;
57   return stream.str();
58 }
59 
60 // Specialized version for uint8_t that don't work well with std::hex.
ToHex(uint8_t i)61 static inline std::string ToHex(uint8_t i) {
62   return ToHex(static_cast<int>(i), 2);
63 }
64 
65 enum class BinaryRegionStatus {
66   OK = 0,
67   WARN = 100,
68   WARN_NO_REFERENCES,
69   WARN_CORRUPTED_PADDING,
70   WARN_PADDING_LENGTH,
71   ERROR = 200,
72   // An offset is pointing outside the binary bounds.
73   ERROR_OFFSET_OUT_OF_BINARY,
74   // Expecting to read N bytes but not enough remain in the binary.
75   ERROR_INCOMPLETE_BINARY,
76   // When a length of a vtable/vector is longer than possible.
77   ERROR_LENGTH_TOO_LONG,
78   // When a length of a vtable/vector is shorter than possible.
79   ERROR_LENGTH_TOO_SHORT,
80   // A field mark required is not present in the vtable.
81   ERROR_REQUIRED_FIELD_NOT_PRESENT,
82   // A realized union type is not within the enum bounds.
83   ERROR_INVALID_UNION_TYPE,
84   // Occurs when there is a cycle in offsets.
85   ERROR_CYCLE_DETECTED,
86 };
87 
88 enum class BinaryRegionCommentType {
89   Unknown = 0,
90   SizePrefix,
91   // The offset to the root table.
92   RootTableOffset,
93   // The optional 4-char file identifier.
94   FileIdentifier,
95   // Generic 0-filled padding
96   Padding,
97   // The size of the vtable.
98   VTableSize,
99   // The size of the referring table.
100   VTableRefferingTableLength,
101   // Offsets to vtable fields.
102   VTableFieldOffset,
103   // Offsets to unknown vtable fields.
104   VTableUnknownFieldOffset,
105   // The vtable offset of a table.
106   TableVTableOffset,
107   // A "inline" table field value.
108   TableField,
109   // A table field that is unknown.
110   TableUnknownField,
111   // A table field value that points to another section.
112   TableOffsetField,
113   // A struct field value.
114   StructField,
115   // A array field value.
116   ArrayField,
117   // The length of the string.
118   StringLength,
119   // The string contents.
120   StringValue,
121   // The explicit string terminator.
122   StringTerminator,
123   // The length of the vector (# of items).
124   VectorLength,
125   // A "inline" value of a vector.
126   VectorValue,
127   // A vector value that points to another section.
128   VectorTableValue,
129   VectorStringValue,
130   VectorUnionValue,
131 };
132 
133 struct BinaryRegionComment {
134   BinaryRegionStatus status = BinaryRegionStatus::OK;
135 
136   // If status is non OK, this may be filled in with additional details.
137   std::string status_message;
138 
139   BinaryRegionCommentType type = BinaryRegionCommentType::Unknown;
140 
141   std::string name;
142 
143   std::string default_value;
144 
145   size_t index = 0;
146 };
147 
148 struct BinaryRegion {
149   // Offset into the binary where this region begins.
150   uint64_t offset = 0;
151 
152   // The length of this region in bytes.
153   uint64_t length = 0;
154 
155   // The underlying datatype of this region
156   BinaryRegionType type = BinaryRegionType::Unknown;
157 
158   // If `type` is an array/vector, this is the number of those types this region
159   // encompasses.
160   uint64_t array_length = 0;
161 
162   // If the is an offset to some other region, this is what it points to. The
163   // offset is relative to overall binary, not to this region.
164   uint64_t points_to_offset = 0;
165 
166   // The comment on the region.
167   BinaryRegionComment comment;
168 };
169 
170 enum class BinarySectionType {
171   Unknown = 0,
172   Header = 1,
173   Table = 2,
174   RootTable = 3,
175   VTable = 4,
176   Struct = 5,
177   String = 6,
178   Vector = 7,
179   Union = 8,
180   Padding = 9,
181 };
182 
183 // A section of the binary that is grouped together in some logical manner, and
184 // often is pointed too by some other offset BinaryRegion. Sections include
185 // `tables`, `vtables`, `strings`, `vectors`, etc..
186 struct BinarySection {
187   // User-specified name of the section, if applicable.
188   std::string name;
189 
190   // The type of this section.
191   BinarySectionType type = BinarySectionType::Unknown;
192 
193   // The binary regions that make up this section, in order of their offsets.
194   std::vector<BinaryRegion> regions;
195 };
196 
GetRegionType(reflection::BaseType base_type)197 inline static BinaryRegionType GetRegionType(reflection::BaseType base_type) {
198   switch (base_type) {
199     case reflection::UType: return BinaryRegionType::UType;
200     case reflection::Bool: return BinaryRegionType::Uint8;
201     case reflection::Byte: return BinaryRegionType::Uint8;
202     case reflection::UByte: return BinaryRegionType::Uint8;
203     case reflection::Short: return BinaryRegionType::Int16;
204     case reflection::UShort: return BinaryRegionType::Uint16;
205     case reflection::Int: return BinaryRegionType::Uint32;
206     case reflection::UInt: return BinaryRegionType::Uint32;
207     case reflection::Long: return BinaryRegionType::Int64;
208     case reflection::ULong: return BinaryRegionType::Uint64;
209     case reflection::Float: return BinaryRegionType::Float;
210     case reflection::Double: return BinaryRegionType::Double;
211     default: return BinaryRegionType::Unknown;
212   }
213 }
214 
ToString(const BinaryRegionType type)215 inline static std::string ToString(const BinaryRegionType type) {
216   switch (type) {
217     case BinaryRegionType::UOffset: return "UOffset32";
218     case BinaryRegionType::SOffset: return "SOffset32";
219     case BinaryRegionType::VOffset: return "VOffset16";
220     case BinaryRegionType::Bool: return "bool";
221     case BinaryRegionType::Char: return "char";
222     case BinaryRegionType::Byte: return "int8_t";
223     case BinaryRegionType::Uint8: return "uint8_t";
224     case BinaryRegionType::Uint16: return "uint16_t";
225     case BinaryRegionType::Uint32: return "uint32_t";
226     case BinaryRegionType::Uint64: return "uint64_t"; ;
227     case BinaryRegionType::Int8: return "int8_t";
228     case BinaryRegionType::Int16: return "int16_t";
229     case BinaryRegionType::Int32: return "int32_t";
230     case BinaryRegionType::Int64: return "int64_t";
231     case BinaryRegionType::Double: return "double";
232     case BinaryRegionType::Float: return "float";
233     case BinaryRegionType::UType: return "UType8";
234     case BinaryRegionType::Unknown: return "?uint8_t";
235     default: return "todo";
236   }
237 }
238 
239 class BinaryAnnotator {
240  public:
BinaryAnnotator(const uint8_t * const bfbs,const uint64_t bfbs_length,const uint8_t * const binary,const uint64_t binary_length)241   explicit BinaryAnnotator(const uint8_t *const bfbs,
242                            const uint64_t bfbs_length,
243                            const uint8_t *const binary,
244                            const uint64_t binary_length)
245       : bfbs_(bfbs),
246         bfbs_length_(bfbs_length),
247         schema_(reflection::GetSchema(bfbs)),
248         binary_(binary),
249         binary_length_(binary_length) {}
250 
251   std::map<uint64_t, BinarySection> Annotate();
252 
253  private:
254   struct VTable {
255     struct Entry {
256       const reflection::Field *field = nullptr;
257       uint16_t offset_from_table = 0;
258     };
259 
260     // Field ID -> {field def, offset from table}
261     std::map<uint16_t, Entry> fields;
262 
263     uint16_t vtable_size = 0;
264     uint16_t table_size = 0;
265   };
266 
267   uint64_t BuildHeader(uint64_t offset);
268 
269   void BuildVTable(uint64_t offset, const reflection::Object *table,
270                    uint64_t offset_of_referring_table);
271 
272   void BuildTable(uint64_t offset, const BinarySectionType type,
273                   const reflection::Object *table);
274 
275   uint64_t BuildStruct(uint64_t offset, std::vector<BinaryRegion> &regions,
276                        const reflection::Object *structure);
277 
278   void BuildString(uint64_t offset, const reflection::Object *table,
279                    const reflection::Field *field);
280 
281   void BuildVector(uint64_t offset, const reflection::Object *table,
282                    const reflection::Field *field, uint64_t parent_table_offset,
283                    const VTable &vtable);
284 
285   std::string BuildUnion(uint64_t offset, uint8_t realized_type,
286                          const reflection::Field *field);
287 
288   void FixMissingRegions();
289   void FixMissingSections();
290 
IsValidOffset(const uint64_t offset)291   inline bool IsValidOffset(const uint64_t offset) const {
292     return offset < binary_length_;
293   }
294 
295   // Determines if performing a GetScalar request for `T` at `offset` would read
296   // passed the end of the binary.
IsValidRead(const uint64_t offset)297   template<typename T> inline bool IsValidRead(const uint64_t offset) const {
298     return IsValidRead(offset, sizeof(T));
299   }
300 
IsValidRead(const uint64_t offset,const uint64_t length)301   inline bool IsValidRead(const uint64_t offset, const uint64_t length) const {
302     return length < binary_length_ && IsValidOffset(offset + length - 1);
303   }
304 
305   // Calculate the number of bytes remaining from the given offset. If offset is
306   // > binary_length, 0 is returned.
RemainingBytes(const uint64_t offset)307   uint64_t RemainingBytes(const uint64_t offset) const {
308     return IsValidOffset(offset) ? binary_length_ - offset : 0;
309   }
310 
311   template<typename T>
ReadScalar(const uint64_t offset)312   flatbuffers::Optional<T> ReadScalar(const uint64_t offset) const {
313     if (!IsValidRead<T>(offset)) { return flatbuffers::nullopt; }
314 
315     return flatbuffers::ReadScalar<T>(binary_ + offset);
316   }
317 
318   // Adds the provided `section` keyed by the `offset` it occurs at. If a
319   // section is already added at that offset, it doesn't replace the exisiting
320   // one.
AddSection(const uint64_t offset,const BinarySection & section)321   void AddSection(const uint64_t offset, const BinarySection &section) {
322     sections_.insert(std::make_pair(offset, section));
323   }
324 
IsInlineField(const reflection::Field * const field)325   bool IsInlineField(const reflection::Field *const field) {
326     if (field->type()->base_type() == reflection::BaseType::Obj) {
327       return schema_->objects()->Get(field->type()->index())->is_struct();
328     }
329     return IsScalar(field->type()->base_type());
330   }
331 
IsUnionType(const reflection::BaseType type)332   bool IsUnionType(const reflection::BaseType type) {
333     return (type == reflection::BaseType::UType ||
334             type == reflection::BaseType::Union);
335   }
336 
IsUnionType(const reflection::Field * const field)337   bool IsUnionType(const reflection::Field *const field) {
338     return IsUnionType(field->type()->base_type()) &&
339            field->type()->index() >= 0;
340   }
341 
IsValidUnionValue(const reflection::Field * const field,const uint8_t value)342   bool IsValidUnionValue(const reflection::Field *const field,
343                          const uint8_t value) {
344     return IsUnionType(field) &&
345            IsValidUnionValue(field->type()->index(), value);
346   }
347 
IsValidUnionValue(const uint32_t enum_id,const uint8_t value)348   bool IsValidUnionValue(const uint32_t enum_id, const uint8_t value) {
349     if (enum_id >= schema_->enums()->size()) { return false; }
350 
351     const reflection::Enum *enum_def = schema_->enums()->Get(enum_id);
352 
353     if (enum_def == nullptr) { return false; }
354 
355     return value < enum_def->values()->size();
356   }
357 
GetElementSize(const reflection::Field * const field)358   uint64_t GetElementSize(const reflection::Field *const field) {
359     if (IsScalar(field->type()->element())) {
360       return GetTypeSize(field->type()->element());
361     }
362 
363     switch (field->type()->element()) {
364       case reflection::BaseType::Obj: {
365         auto obj = schema_->objects()->Get(field->type()->index());
366         return obj->is_struct() ? obj->bytesize() : sizeof(uint32_t);
367       }
368       default: return sizeof(uint32_t);
369     }
370   }
371 
372   bool ContainsSection(const uint64_t offset);
373 
374   // The schema for the binary file
375   const uint8_t *bfbs_;
376   const uint64_t bfbs_length_;
377   const reflection::Schema *schema_;
378 
379   // The binary data itself.
380   const uint8_t *binary_;
381   const uint64_t binary_length_;
382 
383   // Map of binary offset to vtables, to dedupe vtables.
384   std::map<uint64_t, VTable> vtables_;
385 
386   // The annotated binary sections, index by their absolute offset.
387   std::map<uint64_t, BinarySection> sections_;
388 };
389 
390 }  // namespace flatbuffers
391 
392 #endif  // FLATBUFFERS_BINARY_ANNOTATOR_H_