xref: /aosp_15_r20/external/flatbuffers/src/annotated_binary_text_gen.cpp (revision 890232f25432b36107d06881e0a25aaa6b473652)
1 #include "annotated_binary_text_gen.h"
2 
3 #include <sstream>
4 #include <string>
5 
6 #include "binary_annotator.h"
7 #include "flatbuffers/base.h"
8 #include "flatbuffers/util.h"
9 
10 namespace flatbuffers {
11 namespace {
12 
13 struct OutputConfig {
14   size_t largest_type_string = 10;
15 
16   size_t largest_value_string = 20;
17 
18   size_t max_bytes_per_line = 8;
19 
20   size_t offset_max_char = 4;
21 
22   char delimiter = '|';
23 };
24 
ToString(const BinarySectionType type)25 static std::string ToString(const BinarySectionType type) {
26   switch (type) {
27     case BinarySectionType::Header: return "header";
28     case BinarySectionType::Table: return "table";
29     case BinarySectionType::RootTable: return "root_table";
30     case BinarySectionType::VTable: return "vtable";
31     case BinarySectionType::Struct: return "struct";
32     case BinarySectionType::String: return "string";
33     case BinarySectionType::Vector: return "vector";
34     case BinarySectionType::Unknown: return "unknown";
35     case BinarySectionType::Union: return "union";
36     case BinarySectionType::Padding: return "padding";
37     default: return "todo";
38   }
39 }
40 
IsOffset(const BinaryRegionType type)41 static bool IsOffset(const BinaryRegionType type) {
42   return type == BinaryRegionType::UOffset || type == BinaryRegionType::SOffset;
43 }
44 
ToString(T value)45 template<typename T> std::string ToString(T value) {
46   if (std::is_floating_point<T>::value) {
47     std::stringstream ss;
48     ss << value;
49     return ss.str();
50   } else {
51     return std::to_string(value);
52   }
53 }
54 
55 template<typename T>
ToValueString(const BinaryRegion & region,const uint8_t * binary)56 std::string ToValueString(const BinaryRegion &region, const uint8_t *binary) {
57   std::string s;
58   s += "0x";
59   const T val = ReadScalar<T>(binary + region.offset);
60   const uint64_t start_index = region.offset + region.length - 1;
61   for (uint64_t i = 0; i < region.length; ++i) {
62     s += ToHex(binary[start_index - i]);
63   }
64   s += " (";
65   s += ToString(val);
66   s += ")";
67   return s;
68 }
69 
70 template<>
ToValueString(const BinaryRegion & region,const uint8_t * binary)71 std::string ToValueString<std::string>(const BinaryRegion &region,
72                                        const uint8_t *binary) {
73   return std::string(reinterpret_cast<const char *>(binary + region.offset),
74                      static_cast<size_t>(region.array_length));
75 }
76 
ToValueString(const BinaryRegion & region,const uint8_t * binary,const OutputConfig & output_config)77 static std::string ToValueString(const BinaryRegion &region,
78                                  const uint8_t *binary,
79                                  const OutputConfig &output_config) {
80   std::string s;
81 
82   if (region.array_length) {
83     if (region.type == BinaryRegionType::Uint8 ||
84         region.type == BinaryRegionType::Unknown) {
85       // Interpet each value as a ASCII to aid debugging
86       for (uint64_t i = 0; i < region.array_length; ++i) {
87         const uint8_t c = *(binary + region.offset + i);
88         s += isprint(c) ? static_cast<char>(c & 0x7F) : '.';
89       }
90       return s;
91     } else if (region.type == BinaryRegionType::Char) {
92       // string value
93       return ToValueString<std::string>(region, binary);
94     }
95   }
96 
97   switch (region.type) {
98     case BinaryRegionType::Uint32:
99       return ToValueString<uint32_t>(region, binary);
100     case BinaryRegionType::Int32: return ToValueString<int32_t>(region, binary);
101     case BinaryRegionType::Uint16:
102       return ToValueString<uint16_t>(region, binary);
103     case BinaryRegionType::Int16: return ToValueString<int16_t>(region, binary);
104     case BinaryRegionType::Bool: return ToValueString<bool>(region, binary);
105     case BinaryRegionType::Uint8: return ToValueString<uint8_t>(region, binary);
106     case BinaryRegionType::Char: return ToValueString<char>(region, binary);
107     case BinaryRegionType::Byte:
108     case BinaryRegionType::Int8: return ToValueString<int8_t>(region, binary);
109     case BinaryRegionType::Int64: return ToValueString<int64_t>(region, binary);
110     case BinaryRegionType::Uint64:
111       return ToValueString<uint64_t>(region, binary);
112     case BinaryRegionType::Double: return ToValueString<double>(region, binary);
113     case BinaryRegionType::Float: return ToValueString<float>(region, binary);
114     case BinaryRegionType::UType: return ToValueString<uint8_t>(region, binary);
115 
116     // Handle Offsets separately, incase they add additional details.
117     case BinaryRegionType::UOffset:
118       s += ToValueString<uint32_t>(region, binary);
119       break;
120     case BinaryRegionType::SOffset:
121       s += ToValueString<int32_t>(region, binary);
122       break;
123     case BinaryRegionType::VOffset:
124       s += ToValueString<uint16_t>(region, binary);
125       break;
126 
127     default: break;
128   }
129   // If this is an offset type, include the calculated offset location in the
130   // value.
131   // TODO(dbaileychess): It might be nicer to put this in the comment field.
132   if (IsOffset(region.type)) {
133     s += " Loc: +0x";
134     s += ToHex(region.points_to_offset, output_config.offset_max_char);
135   }
136   return s;
137 }
138 
139 struct DocContinuation {
140   // The start column where the value text first starts
141   size_t value_start_column = 0;
142 
143   // The remaining part of the doc to print.
144   std::string value;
145 };
146 
GenerateTypeString(const BinaryRegion & region)147 static std::string GenerateTypeString(const BinaryRegion &region) {
148   return ToString(region.type) +
149          ((region.array_length)
150               ? "[" + std::to_string(region.array_length) + "]"
151               : "");
152 }
153 
GenerateComment(const BinaryRegionComment & comment,const BinarySection &)154 static std::string GenerateComment(const BinaryRegionComment &comment,
155                                    const BinarySection &) {
156   std::string s;
157   switch (comment.type) {
158     case BinaryRegionCommentType::Unknown: s = "unknown"; break;
159     case BinaryRegionCommentType::SizePrefix: s = "size prefix"; break;
160     case BinaryRegionCommentType::RootTableOffset:
161       s = "offset to root table `" + comment.name + "`";
162       break;
163     // TODO(dbaileychess): make this lowercase to follow the convention.
164     case BinaryRegionCommentType::FileIdentifier: s = "File Identifier"; break;
165     case BinaryRegionCommentType::Padding: s = "padding"; break;
166     case BinaryRegionCommentType::VTableSize: s = "size of this vtable"; break;
167     case BinaryRegionCommentType::VTableRefferingTableLength:
168       s = "size of referring table";
169       break;
170     case BinaryRegionCommentType::VTableFieldOffset:
171       s = "offset to field `" + comment.name;
172       break;
173     case BinaryRegionCommentType::VTableUnknownFieldOffset:
174       s = "offset to unknown field (id: " + std::to_string(comment.index) + ")";
175       break;
176 
177     case BinaryRegionCommentType::TableVTableOffset:
178       s = "offset to vtable";
179       break;
180     case BinaryRegionCommentType::TableField:
181       s = "table field `" + comment.name;
182       break;
183     case BinaryRegionCommentType::TableUnknownField: s = "unknown field"; break;
184     case BinaryRegionCommentType::TableOffsetField:
185       s = "offset to field `" + comment.name + "`";
186       break;
187     case BinaryRegionCommentType::StructField:
188       s = "struct field `" + comment.name + "`";
189       break;
190     case BinaryRegionCommentType::ArrayField:
191       s = "array field `" + comment.name + "`[" +
192           std::to_string(comment.index) + "]";
193       break;
194     case BinaryRegionCommentType::StringLength: s = "length of string"; break;
195     case BinaryRegionCommentType::StringValue: s = "string literal"; break;
196     case BinaryRegionCommentType::StringTerminator:
197       s = "string terminator";
198       break;
199     case BinaryRegionCommentType::VectorLength:
200       s = "length of vector (# items)";
201       break;
202     case BinaryRegionCommentType::VectorValue:
203       s = "value[" + std::to_string(comment.index) + "]";
204       break;
205     case BinaryRegionCommentType::VectorTableValue:
206       s = "offset to table[" + std::to_string(comment.index) + "]";
207       break;
208     case BinaryRegionCommentType::VectorStringValue:
209       s = "offset to string[" + std::to_string(comment.index) + "]";
210       break;
211     case BinaryRegionCommentType::VectorUnionValue:
212       s = "offset to union[" + std::to_string(comment.index) + "]";
213       break;
214 
215     default: break;
216   }
217   if (!comment.default_value.empty()) { s += " " + comment.default_value; }
218 
219   switch (comment.status) {
220     case BinaryRegionStatus::OK: break;  // no-op
221     case BinaryRegionStatus::WARN: s = "WARN: " + s; break;
222     case BinaryRegionStatus::WARN_NO_REFERENCES:
223       s = "WARN: nothing refers to this section.";
224       break;
225     case BinaryRegionStatus::WARN_CORRUPTED_PADDING:
226       s = "WARN: could be corrupted padding region.";
227       break;
228     case BinaryRegionStatus::WARN_PADDING_LENGTH:
229       s = "WARN: padding is longer than expected.";
230       break;
231     case BinaryRegionStatus::ERROR: s = "ERROR: " + s; break;
232     case BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY:
233       s = "ERROR: " + s + ". Invalid offset, points outside the binary.";
234       break;
235     case BinaryRegionStatus::ERROR_INCOMPLETE_BINARY:
236       s = "ERROR: " + s + ". Incomplete binary, expected to read " +
237           comment.status_message + " bytes.";
238       break;
239     case BinaryRegionStatus::ERROR_LENGTH_TOO_LONG:
240       s = "ERROR: " + s + ". Longer than the binary.";
241       break;
242     case BinaryRegionStatus::ERROR_LENGTH_TOO_SHORT:
243       s = "ERROR: " + s + ". Shorter than the minimum length: ";
244       break;
245     case BinaryRegionStatus::ERROR_REQUIRED_FIELD_NOT_PRESENT:
246       s = "ERROR: " + s + ". Required field is not present.";
247       break;
248     case BinaryRegionStatus::ERROR_INVALID_UNION_TYPE:
249       s = "ERROR: " + s + ". Invalid union type value.";
250       break;
251     case BinaryRegionStatus::ERROR_CYCLE_DETECTED:
252       s = "ERROR: " + s + ". Invalid offset, cycle detected.";
253       break;
254   }
255 
256   return s;
257 }
258 
GenerateDocumentation(const BinaryRegion & region,const BinarySection & section,const uint8_t * binary,DocContinuation & continuation,const OutputConfig & output_config)259 static std::string GenerateDocumentation(const BinaryRegion &region,
260                                          const BinarySection &section,
261                                          const uint8_t *binary,
262                                          DocContinuation &continuation,
263                                          const OutputConfig &output_config) {
264   std::string s;
265 
266   // Check if there is a doc continuation that should be prioritized.
267   if (continuation.value_start_column) {
268     s += std::string(continuation.value_start_column - 2, ' ');
269     s += output_config.delimiter;
270     s += " ";
271 
272     s += continuation.value.substr(0, output_config.max_bytes_per_line);
273     continuation.value = continuation.value.substr(
274         std::min(output_config.max_bytes_per_line, continuation.value.size()));
275     return s;
276   }
277 
278   {
279     std::stringstream ss;
280     ss << std::setw(output_config.largest_type_string) << std::left;
281     ss << GenerateTypeString(region);
282     s += ss.str();
283   }
284   s += " ";
285   s += output_config.delimiter;
286   s += " ";
287   if (region.array_length) {
288     // Record where the value is first being outputted.
289     continuation.value_start_column = s.size();
290 
291     // Get the full-length value, which we will chunk below.
292     const std::string value = ToValueString(region, binary, output_config);
293 
294     std::stringstream ss;
295     ss << std::setw(output_config.largest_value_string) << std::left;
296     ss << value.substr(0, output_config.max_bytes_per_line);
297     s += ss.str();
298 
299     continuation.value =
300         value.substr(std::min(output_config.max_bytes_per_line, value.size()));
301   } else {
302     std::stringstream ss;
303     ss << std::setw(output_config.largest_value_string) << std::left;
304     ss << ToValueString(region, binary, output_config);
305     s += ss.str();
306   }
307 
308   s += " ";
309   s += output_config.delimiter;
310   s += " ";
311   s += GenerateComment(region.comment, section);
312 
313   return s;
314 }
315 
GenerateRegion(const BinaryRegion & region,const BinarySection & section,const uint8_t * binary,const OutputConfig & output_config)316 static std::string GenerateRegion(const BinaryRegion &region,
317                                   const BinarySection &section,
318                                   const uint8_t *binary,
319                                   const OutputConfig &output_config) {
320   std::string s;
321   bool doc_generated = false;
322   DocContinuation doc_continuation;
323   for (uint64_t i = 0; i < region.length; ++i) {
324     if ((i % output_config.max_bytes_per_line) == 0) {
325       // Start a new line of output
326       s += '\n';
327       s += "  ";
328       s += "+0x";
329       s += ToHex(region.offset + i, output_config.offset_max_char);
330       s += " ";
331       s += output_config.delimiter;
332     }
333 
334     // Add each byte
335     s += " ";
336     s += ToHex(binary[region.offset + i]);
337 
338     // Check for end of line or end of region conditions.
339     if (((i + 1) % output_config.max_bytes_per_line == 0) ||
340         i + 1 == region.length) {
341       if (i + 1 == region.length) {
342         // We are out of bytes but haven't the kMaxBytesPerLine, so we need to
343         // zero those out to align everything globally.
344         for (uint64_t j = i + 1; (j % output_config.max_bytes_per_line) != 0;
345              ++j) {
346           s += "   ";
347         }
348       }
349       s += " ";
350       s += output_config.delimiter;
351       // This is the end of the first line or its the last byte of the region,
352       // generate the end-of-line documentation.
353       if (!doc_generated) {
354         s += " ";
355         s += GenerateDocumentation(region, section, binary, doc_continuation,
356                                    output_config);
357 
358         // If we have a value in the doc continuation, that means the doc is
359         // being printed on multiple lines.
360         doc_generated = doc_continuation.value.empty();
361       }
362     }
363   }
364 
365   return s;
366 }
367 
GenerateSection(const BinarySection & section,const uint8_t * binary,const OutputConfig & output_config)368 static std::string GenerateSection(const BinarySection &section,
369                                    const uint8_t *binary,
370                                    const OutputConfig &output_config) {
371   std::string s;
372   s += "\n";
373   s += ToString(section.type);
374   if (!section.name.empty()) { s += " (" + section.name + ")"; }
375   s += ":";
376   for (const BinaryRegion &region : section.regions) {
377     s += GenerateRegion(region, section, binary, output_config);
378   }
379   return s;
380 }
381 }  // namespace
382 
Generate(const std::string & filename,const std::string & schema_filename)383 bool AnnotatedBinaryTextGenerator::Generate(
384     const std::string &filename, const std::string &schema_filename) {
385   OutputConfig output_config;
386   output_config.max_bytes_per_line = options_.max_bytes_per_line;
387 
388   // Given the length of the binary, we can calculate the maximum number of
389   // characters to display in the offset hex: (i.e. 2 would lead to 0XFF being
390   // the max output).
391   output_config.offset_max_char =
392       binary_length_ > 0xFFFFFF
393           ? 8
394           : (binary_length_ > 0xFFFF ? 6 : (binary_length_ > 0xFF ? 4 : 2));
395 
396   // Find the largest type string of all the regions in this file, so we can
397   // align the output nicely.
398   output_config.largest_type_string = 0;
399   for (const auto &section : annotations_) {
400     for (const auto &region : section.second.regions) {
401       std::string s = GenerateTypeString(region);
402       if (s.size() > output_config.largest_type_string) {
403         output_config.largest_type_string = s.size();
404       }
405 
406       // Don't consider array regions, as they will be split to multiple lines.
407       if (!region.array_length) {
408         s = ToValueString(region, binary_, output_config);
409         if (s.size() > output_config.largest_value_string) {
410           output_config.largest_value_string = s.size();
411         }
412       }
413     }
414   }
415 
416   // Generate each of the binary sections
417   std::string s;
418 
419   s += "// Annotated Flatbuffer Binary\n";
420   s += "//\n";
421   s += "// Schema file: " + schema_filename + "\n";
422   s += "// Binary file: " + filename + "\n";
423 
424   for (const auto &section : annotations_) {
425     s += GenerateSection(section.second, binary_, output_config);
426     s += "\n";
427   }
428 
429   // Modify the output filename.
430   std::string output_filename = StripExtension(filename);
431   output_filename += options_.output_postfix;
432   output_filename +=
433       "." + (options_.output_extension.empty() ? GetExtension(filename)
434                                                : options_.output_extension);
435 
436   return SaveFile(output_filename.c_str(), s, false);
437 }
438 
439 }  // namespace flatbuffers
440