xref: /aosp_15_r20/external/emboss/runtime/cpp/emboss_text_util.h (revision 99e0aae7469b87d12f0ad23e61142c2d74c1ef70)
1 // Copyright 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // This header contains functionality related to Emboss text output.
16 #ifndef EMBOSS_RUNTIME_CPP_EMBOSS_TEXT_UTIL_H_
17 #define EMBOSS_RUNTIME_CPP_EMBOSS_TEXT_UTIL_H_
18 
19 #include <array>
20 #include <climits>
21 #include <cmath>
22 #include <cstdint>
23 #include <cstdio>
24 #include <cstring>
25 #include <limits>
26 #include <sstream>
27 #include <string>
28 #include <vector>
29 
30 #include "runtime/cpp/emboss_defines.h"
31 
32 namespace emboss {
33 
34 // TextOutputOptions are used to configure text output.  Typically, one can just
35 // use a default TextOutputOptions() (for compact output) or MultilineText()
36 // (for reasonable formatted output).
37 class TextOutputOptions final {
38  public:
39   TextOutputOptions() = default;
40 
PlusOneIndent()41   TextOutputOptions PlusOneIndent() const {
42     TextOutputOptions result = *this;
43     result.current_indent_ += indent();
44     return result;
45   }
46 
Multiline(bool new_value)47   TextOutputOptions Multiline(bool new_value) const {
48     TextOutputOptions result = *this;
49     result.multiline_ = new_value;
50     return result;
51   }
52 
WithIndent(::std::string new_value)53   TextOutputOptions WithIndent(::std::string new_value) const {
54     TextOutputOptions result = *this;
55     result.indent_ = ::std::move(new_value);
56     return result;
57   }
58 
WithComments(bool new_value)59   TextOutputOptions WithComments(bool new_value) const {
60     TextOutputOptions result = *this;
61     result.comments_ = new_value;
62     return result;
63   }
64 
WithDigitGrouping(bool new_value)65   TextOutputOptions WithDigitGrouping(bool new_value) const {
66     TextOutputOptions result = *this;
67     result.digit_grouping_ = new_value;
68     return result;
69   }
70 
WithNumericBase(uint8_t new_value)71   TextOutputOptions WithNumericBase(uint8_t new_value) const {
72     TextOutputOptions result = *this;
73     result.numeric_base_ = new_value;
74     return result;
75   }
76 
WithAllowPartialOutput(bool new_value)77   TextOutputOptions WithAllowPartialOutput(bool new_value) const {
78     TextOutputOptions result = *this;
79     result.allow_partial_output_ = new_value;
80     return result;
81   }
82 
current_indent()83   ::std::string current_indent() const { return current_indent_; }
indent()84   ::std::string indent() const { return indent_; }
multiline()85   bool multiline() const { return multiline_; }
digit_grouping()86   bool digit_grouping() const { return digit_grouping_; }
comments()87   bool comments() const { return comments_; }
numeric_base()88   ::std::uint8_t numeric_base() const { return numeric_base_; }
allow_partial_output()89   bool allow_partial_output() const { return allow_partial_output_; }
90 
91  private:
92   ::std::string indent_;
93   ::std::string current_indent_;
94   bool comments_ = false;
95   bool multiline_ = false;
96   bool digit_grouping_ = false;
97   bool allow_partial_output_ = false;
98   ::std::uint8_t numeric_base_ = 10;
99 };
100 
101 namespace support {
102 
103 // TextOutputStream puts a stream-like interface onto a std::string, for use by
104 // DumpToTextStream.  It is used by UpdateFromText().
105 class TextOutputStream final {
106  public:
107   inline explicit TextOutputStream() = default;
108 
Write(const::std::string & text)109   inline void Write(const ::std::string &text) {
110     text_.write(text.data(), text.size());
111   }
112 
Write(const char * text)113   inline void Write(const char *text) { text_.write(text, strlen(text)); }
114 
Write(const char c)115   inline void Write(const char c) { text_.put(c); }
116 
Result()117   inline ::std::string Result() { return text_.str(); }
118 
119  private:
120   ::std::ostringstream text_;
121 };
122 
123 // DecodeInteger decodes an integer from a string.  This is very similar to the
124 // many, many existing integer decode routines in the world, except that a) it
125 // accepts integers in any Emboss format, and b) it can run in environments that
126 // do not support std::istream or Google's number conversion routines.
127 //
128 // Ideally, this would be replaced by someone else's code.
129 template <class IntType>
DecodeInteger(const::std::string & text,IntType * result)130 bool DecodeInteger(const ::std::string &text, IntType *result) {
131   IntType accumulator = 0;
132   IntType base = 10;
133   bool negative = false;
134   unsigned offset = 0;
135   if (::std::is_signed<IntType>::value && text.size() >= 1 + offset &&
136       text[offset] == '-') {
137     negative = true;
138     offset += 1;
139   }
140   if (text.size() >= 2 + offset && text[offset] == '0') {
141     if (text[offset + 1] == 'x' || text[offset + 1] == 'X') {
142       base = 16;
143       offset += 2;
144     } else if (text[offset + 1] == 'b' || text[offset + 1] == 'B') {
145       base = 2;
146       offset += 2;
147     }
148   }
149   // "", "0x", "0b", "-", "-0x", and "-0b" are not valid numbers.
150   if (offset == text.size()) return false;
151   for (; offset < text.size(); ++offset) {
152     char c = text[offset];
153     IntType digit = 0;
154     if (c == '_') {
155       if (offset == 0) {
156         return false;
157       }
158       continue;
159     } else if (c >= '0' && c <= '9') {
160       digit = c - '0';
161     } else if (c >= 'A' && c <= 'F') {
162       digit = c - 'A' + 10;
163     } else if (c >= 'a' && c <= 'f') {
164       digit = c - 'a' + 10;
165     } else {
166       return false;
167     }
168     if (digit >= base) {
169       return false;
170     }
171     if (negative) {
172       if (accumulator <
173           (::std::numeric_limits<IntType>::min() + digit) / base) {
174         return false;
175       }
176       accumulator = accumulator * base - digit;
177     } else {
178       if (accumulator >
179           (::std::numeric_limits<IntType>::max() - digit) / base) {
180         return false;
181       }
182       accumulator = accumulator * base + digit;
183     }
184   }
185   *result = accumulator;
186   return true;
187 }
188 
189 template <class Stream>
DiscardWhitespace(Stream * stream)190 bool DiscardWhitespace(Stream *stream) {
191   char c;
192   bool in_comment = false;
193   do {
194     if (!stream->Read(&c)) return true;
195     if (c == '#') in_comment = true;
196     if (c == '\r' || c == '\n') in_comment = false;
197   } while (in_comment || c == ' ' || c == '\t' || c == '\n' || c == '\r');
198   return stream->Unread(c);
199 }
200 
201 template <class Stream>
ReadToken(Stream * stream,::std::string * token)202 bool ReadToken(Stream *stream, ::std::string *token) {
203   ::std::vector<char> result;
204   char c;
205   if (!DiscardWhitespace(stream)) return false;
206   if (!stream->Read(&c)) {
207     *token = "";
208     return true;
209   }
210 
211   const char *const punctuation = ":{}[],";
212   if (strchr(punctuation, c) != nullptr) {
213     *token = ::std::string(1, c);
214     return true;
215   } else {
216     // TODO(bolms): Only allow alphanumeric characters here?
217     do {
218       result.push_back(c);
219       if (!stream->Read(&c)) {
220         *token = ::std::string(&result[0], result.size());
221         return true;
222       }
223     } while (c != ' ' && c != '\t' && c != '\n' && c != '\r' && c != '#' &&
224              strchr(punctuation, c) == nullptr);
225     if (!stream->Unread(c)) return false;
226     *token = ::std::string(&result[0], result.size());
227     return true;
228   }
229 }
230 
231 template <class Stream, class View>
ReadIntegerFromTextStream(View * view,Stream * stream)232 bool ReadIntegerFromTextStream(View *view, Stream *stream) {
233   ::std::string token;
234   if (!::emboss::support::ReadToken(stream, &token)) return false;
235   if (token.empty()) return false;
236   typename View::ValueType value;
237   if (!::emboss::support::DecodeInteger(token, &value)) return false;
238   return view->TryToWrite(value);
239 }
240 
241 // WriteIntegerToTextStream encodes the given value in base 2, 10, or 16, with
242 // or without digit group separators ('_'), and then calls stream->Write() with
243 // a char * argument that is a C-style null-terminated string of the encoded
244 // number.
245 //
246 // As with DecodeInteger, above, it would be nice to be able to replace this
247 // with someone else's code, but I (bolms@) was unable to find anything in
248 // standard C++ that would encode numbers in binary, nothing that would add
249 // digit separators to hex numbers, and nothing that would use '_' for digit
250 // separators.
251 template <class Stream, typename IntegralType>
WriteIntegerToTextStream(IntegralType value,Stream * stream,::std::uint8_t base,bool digit_grouping)252 void WriteIntegerToTextStream(IntegralType value, Stream *stream,
253                               ::std::uint8_t base, bool digit_grouping) {
254   static_assert(::std::numeric_limits<
255                     typename ::std::remove_cv<IntegralType>::type>::is_integer,
256                 "WriteIntegerToTextStream only supports integer types.");
257   static_assert(
258       !::std::is_same<bool,
259                       typename ::std::remove_cv<IntegralType>::type>::value,
260       "WriteIntegerToTextStream only supports integer types.");
261   EMBOSS_CHECK(base == 10 || base == 2 || base == 16);
262   const char *const digits = "0123456789abcdef";
263   const int grouping = base == 10 ? 3 : base == 16 ? 4 : 8;
264   // The maximum size 32-bit number is -2**31, which is:
265   //
266   // -0b10000000_00000000_00000000_00000000  (38 chars)
267   // -2_147_483_648  (14 chars)
268   // -0x8000_0000  (12 chars)
269   //
270   // Likewise, the maximum size 8-bit number is -128, which is:
271   // -0b10000000  (11 chars)
272   // -128  (4 chars)
273   // -0x80  (5 chars)
274   //
275   // Binary with separators is always the longest value: 9 chars per 8 bits,
276   // minus 1 char for the '_' that does not appear at the front of the number,
277   // plus 2 chars for "0b", plus 1 char for '-', plus 1 extra char for the
278   // trailing '\0', which is (sizeof value) * CHAR_BIT * 9 / 8 - 1 + 2 + 1 + 1.
279   const int buffer_size = (sizeof value) * CHAR_BIT * 9 / 8 + 3;
280   char buffer[buffer_size];
281   buffer[buffer_size - 1] = '\0';
282   int next_char = buffer_size - 2;
283   if (value == 0) {
284     EMBOSS_DCHECK_GE(next_char, 0);
285     buffer[next_char] = digits[0];
286     --next_char;
287   }
288   int sign = value < 0 ? -1 : 1;
289   int digit_count = 0;
290   auto buffer_char = [&](char c) {
291     EMBOSS_DCHECK_GE(next_char, 0);
292     buffer[next_char] = c;
293     --next_char;
294   };
295   if (value < 0) {
296     if (value == ::std::numeric_limits<decltype(value)>::lowest()) {
297       // The minimum negative two's-complement value has no corresponding
298       // positive value, so 'value = -value' is not useful in that case.
299       // Instead, we do some trickery to buffer the lowest-order digit here.
300       auto digit = -(value + 1) % base + 1;
301       value = -(value + 1) / base;
302       if (digit == base) {
303         digit = 0;
304         ++value;
305       }
306       buffer_char(digits[digit]);
307       ++digit_count;
308     } else {
309       value = -value;
310     }
311   }
312   while (value > 0) {
313     if (digit_count && digit_count % grouping == 0 && digit_grouping) {
314       buffer_char('_');
315     }
316     buffer_char(digits[value % base]);
317     value /= base;
318     ++digit_count;
319   }
320   if (base == 16) {
321     buffer_char('x');
322     buffer_char('0');
323   } else if (base == 2) {
324     buffer_char('b');
325     buffer_char('0');
326   }
327   if (sign < 0) {
328     buffer_char('-');
329   }
330 
331   stream->Write(buffer + 1 + next_char);
332 }
333 
334 // Writes an integer value in the base given in options, plus an optional
335 // comment with the same value in a second base.  This is used for the common
336 // output format of IntView, UIntView, and BcdView.
337 template <class Stream, class View>
WriteIntegerViewToTextStream(View * view,Stream * stream,const TextOutputOptions & options)338 void WriteIntegerViewToTextStream(View *view, Stream *stream,
339                                   const TextOutputOptions &options) {
340   WriteIntegerToTextStream(view->Read(), stream, options.numeric_base(),
341                            options.digit_grouping());
342   if (options.comments()) {
343     stream->Write("  # ");
344     WriteIntegerToTextStream(view->Read(), stream,
345                              options.numeric_base() == 10 ? 16 : 10,
346                              options.digit_grouping());
347   }
348 }
349 
350 template <class Stream, class View>
ReadBooleanFromTextStream(View * view,Stream * stream)351 bool ReadBooleanFromTextStream(View *view, Stream *stream) {
352   ::std::string token;
353   if (!::emboss::support::ReadToken(stream, &token)) return false;
354   if (token == "true") {
355     return view->TryToWrite(true);
356   } else if (token == "false") {
357     return view->TryToWrite(false);
358   }
359   // TODO(bolms): Provide a way to get an error message on parse failure.
360   return false;
361 }
362 
363 // The TextOutputOptions parameter is present so that it can be passed in by
364 // generated code that uses the same form for WriteBooleanViewToTextStream,
365 // WriteIntegerViewToTextStream, and WriteEnumViewToTextStream.
366 template <class Stream, class View>
WriteBooleanViewToTextStream(View * view,Stream * stream,const TextOutputOptions &)367 void WriteBooleanViewToTextStream(View *view, Stream *stream,
368                                   const TextOutputOptions &) {
369   if (view->Read()) {
370     stream->Write("true");
371   } else {
372     stream->Write("false");
373   }
374 }
375 
376 // FloatConstants holds various masks for working with IEEE754-compatible
377 // floating-point values at a bit level.  These are mostly used here to
378 // implement text format for NaNs, preserving the NaN payload so that the text
379 // format can (in theory) provide a bit-exact round-trip through the text
380 // format.
381 template <class Float>
382 struct FloatConstants;
383 
384 template <>
385 struct FloatConstants<float> {
386   static_assert(sizeof(float) == 4, "Emboss requires 32-bit float.");
387   using MatchingIntegerType = ::std::uint32_t;
388   static constexpr MatchingIntegerType kMantissaMask() { return 0x7fffffU; }
389   static constexpr MatchingIntegerType kExponentMask() { return 0x7f800000U; }
390   static constexpr MatchingIntegerType kSignMask() { return 0x80000000U; }
391   static constexpr int kPrintfPrecision() { return 9; }
392   static constexpr const char *kScanfFormat() { return "%f%n"; }
393 };
394 
395 template <>
396 struct FloatConstants<double> {
397   static_assert(sizeof(double) == 8, "Emboss requires 64-bit double.");
398   using MatchingIntegerType = ::std::uint64_t;
399   static constexpr MatchingIntegerType kMantissaMask() {
400     return 0xfffffffffffffUL;
401   }
402   static constexpr MatchingIntegerType kExponentMask() {
403     return 0x7ff0000000000000UL;
404   }
405   static constexpr MatchingIntegerType kSignMask() {
406     return 0x8000000000000000UL;
407   }
408   static constexpr int kPrintfPrecision() { return 17; }
409   static constexpr const char *kScanfFormat() { return "%lf%n"; }
410 };
411 
412 // Decodes a floating-point number from text.
413 template <class Float>
414 bool DecodeFloat(const ::std::string &token, Float *result) {
415   // The state of the world for reading floating-point values is somewhat better
416   // than the situation for writing them, but there are still a few bits that
417   // are underspecified.  This function is the mirror of WriteFloatToTextStream,
418   // below, so it specifically decodes infinities and NaNs in the formats that
419   // Emboss uses.
420   //
421   // Because of the use of scanf here, this function accepts hex floating-point
422   // values (0xh.hhhhpeee) *on some systems*.  TODO(bolms): make hex float
423   // support universal.
424 
425   using UInt = typename FloatConstants<Float>::MatchingIntegerType;
426 
427   if (token.empty()) return false;
428 
429   // First, check for negative.
430   bool negative = token[0] == '-';
431 
432   // Second, check for NaN.
433   ::std::size_t i = token[0] == '-' || token[0] == '+' ? 1 : 0;
434   if (token.size() >= i + 3 && (token[i] == 'N' || token[i] == 'n') &&
435       (token[i + 1] == 'A' || token[i + 1] == 'a') &&
436       (token[i + 2] == 'N' || token[i + 2] == 'n')) {
437     UInt nan_payload;
438     if (token.size() >= i + 4) {
439       if (token[i + 3] == '(' && token[token.size() - 1] == ')') {
440         if (!DecodeInteger(token.substr(i + 4, token.size() - i - 5),
441                            &nan_payload)) {
442           return false;
443         }
444       } else {
445         // NaN may not be followed by trailing characters other than a
446         // ()-enclosed payload.
447         return false;
448       }
449     } else {
450       // If no specific NaN was given, take a default NaN from the C++ standard
451       // library.  Technically, a conformant C++ implementation might not have
452       // quiet_NaN(), but any IEEE754-based implementation should.
453       //
454       // It is tempting to just write the default NaN directly into the view and
455       // return success, but "-NaN" should be have its sign bit set, and there
456       // is no direct way to set the sign bit of a NaN, so there are fewer code
457       // paths if we extract the default NaN payload, then use it in the
458       // reconstruction step, below.
459       Float default_nan = ::std::numeric_limits<Float>::quiet_NaN();
460       UInt bits;
461       ::std::memcpy(&bits, &default_nan, sizeof(bits));
462       nan_payload = bits & FloatConstants<Float>::kMantissaMask();
463     }
464     if (nan_payload == 0) {
465       // "NaN" with a payload of zero is actually the bit pattern for infinity;
466       // "NaN(0)" should not be an alias for "Inf".
467       return false;
468     }
469     if (nan_payload & (FloatConstants<Float>::kExponentMask() |
470                        FloatConstants<Float>::kSignMask())) {
471       // The payload must be small enough to fit in the payload space; it must
472       // not overflow into the exponent or sign bits.
473       //
474       // Note that the DecodeInteger call which decoded the payload will return
475       // false if the payload would overflow the `UInt` type, so cases like
476       // "NaN(0x10000000000000000000000000000)" -- which are so big that they no
477       // longer interfere with the sign or exponent -- are caught above.
478       return false;
479     }
480     UInt bits = FloatConstants<Float>::kExponentMask();
481     bits |= nan_payload;
482     if (negative) {
483       bits |= FloatConstants<Float>::kSignMask();
484     }
485     ::std::memcpy(result, &bits, sizeof(bits));
486     return true;
487   }
488 
489   // If the value is not NaN, check for infinity.
490   if (token.size() >= i + 3 && (token[i] == 'I' || token[i] == 'i') &&
491       (token[i + 1] == 'N' || token[i + 1] == 'n') &&
492       (token[i + 2] == 'F' || token[i + 2] == 'f')) {
493     if (token.size() > i + 3) {
494       // Infinity must be exactly "Inf" or "-Inf" (case insensitive).  There
495       // must not be trailing characters.
496       return false;
497     }
498     // As with quiet_NaN(), a conforming C++ implementation might not have
499     // infinity(), but an IEEE 754-based implementation should.
500     if (negative) {
501       *result = -::std::numeric_limits<Float>::infinity();
502       return true;
503     } else {
504       *result = ::std::numeric_limits<Float>::infinity();
505       return true;
506     }
507   }
508 
509   // For non-NaN, non-Inf values, use the C scanf function, mirroring the use of
510   // printf for writing the value, below.
511   int chars_used = -1;
512   if (::std::sscanf(token.c_str(), FloatConstants<Float>::kScanfFormat(),
513                     result, &chars_used) < 1) {
514     return false;
515   }
516   if (chars_used < 0 ||
517       static_cast</**/ ::std::size_t>(chars_used) < token.size()) {
518     return false;
519   }
520   return true;
521 }
522 
523 // Decodes a floating-point number from a text stream and writes it to the
524 // specified view.
525 template <class Stream, class View>
526 bool ReadFloatFromTextStream(View *view, Stream *stream) {
527   ::std::string token;
528   if (!ReadToken(stream, &token)) return false;
529   typename View::ValueType value;
530   if (!DecodeFloat(token, &value)) return false;
531   return view->TryToWrite(value);
532 }
533 
534 template <class Stream, class Float>
535 void WriteFloatToTextStream(Float n, Stream *stream,
536                             const TextOutputOptions &options) {
537   static_assert(::std::is_same<Float, float>::value ||
538                     ::std::is_same<Float, double>::value,
539                 "WriteFloatToTextStream can only write float or double.");
540   // The state of the world w.r.t. rendering floating-points as decimal text is,
541   // ca. 2018, less than ideal.
542   //
543   // In C++ land, there is actually no stable facility in the standard library
544   // until to_chars() in C++17 -- which is not actually implemented yet in
545   // libc++.  to_string(), the printf() family, and the iostreams system all
546   // respect the current locale.  In most programs, the locale is permanently
547   // left on "C", but this is not guaranteed.  to_string() also uses a fixed and
548   // rather unfortunate format.
549   //
550   // For integers, I (bolms@) chose to just implement custom read and write
551   // routines, but those routines are quite small and straightforward compared
552   // to floating point conversion.  Even writing correct output is difficult,
553   // and writing correct and minimal output is the subject of a number of
554   // academic papers.
555   //
556   // For the moment, I'm just using snprintf("%.*g", 17, n), which is guaranteed
557   // to be read back as the same number, but can be longer than strictly
558   // necessary.
559   //
560   // TODO(bolms): Import a modified version of the double-to-string conversion
561   // from Swift's standard library, which appears to be best implementation
562   // currently available.
563 
564   if (::std::isnan(n)) {
565     // The printf format for NaN is just "NaN".  In the interests of keeping
566     // things bit-exact, Emboss prints the exact NaN.
567     typename FloatConstants<Float>::MatchingIntegerType bits;
568     ::std::memcpy(&bits, &n, sizeof(bits));
569     ::std::uint64_t nan_payload = bits & FloatConstants<Float>::kMantissaMask();
570     ::std::uint64_t nan_sign = bits & FloatConstants<Float>::kSignMask();
571     if (nan_sign) {
572       // NaN still has a sign bit, which is generally treated differently from
573       // the payload.  There is no real "standard" text format for NaNs, but
574       // "-NaN" appears to be a common way of indicating a NaN with the sign bit
575       // set.
576       stream->Write("-NaN(");
577     } else {
578       stream->Write("NaN(");
579     }
580     // NaN payloads are always dumped in hex.  Note that Emboss is treating the
581     // is_quiet/is_signal bit as just another bit in the payload.
582     WriteIntegerToTextStream(nan_payload, stream, 16, options.digit_grouping());
583     stream->Write(")");
584     return;
585   }
586 
587   if (::std::isinf(n)) {
588     if (n < 0.0) {
589       stream->Write("-Inf");
590     } else {
591       stream->Write("Inf");
592     }
593     return;
594   }
595 
596   // TODO(bolms): Should the current numeric base be honored here?  Should there
597   // be a separate Float numeric base?
598   ::std::array<char, 30> buffer;
599   // TODO(bolms): Figure out how to get ::std::snprintf to work on
600   // microcontroller builds.
601   ::std::size_t snprintf_result = static_cast</**/ ::std::size_t>(::snprintf(
602       &(buffer[0]), buffer.size(), "%.*g",
603       FloatConstants<Float>::kPrintfPrecision(), static_cast<double>(n)));
604   (void)snprintf_result;  // Unused if EMBOSS_CHECK_LE is compiled out.
605   EMBOSS_CHECK_LE(snprintf_result, buffer.size());
606   stream->Write(&buffer[0]);
607 
608   // TODO(bolms): Support digit grouping.
609 }
610 
611 template <class Stream, class View>
612 bool ReadEnumViewFromTextStream(View *view, Stream *stream) {
613   ::std::string token;
614   if (!ReadToken(stream, &token)) return false;
615   if (token.empty()) return false;
616   if (::std::isdigit(token[0])) {
617     ::std::uint64_t value;
618     if (!DecodeInteger(token, &value)) return false;
619     // TODO(bolms): Fix the static_cast<ValueType> for signed ValueType.
620     // TODO(bolms): Should values between 2**63 and 2**64-1 actually be
621     // allowed in the text format when ValueType is signed?
622     return view->TryToWrite(static_cast<typename View::ValueType>(value));
623   } else if (token[0] == '-') {
624     ::std::int64_t value;
625     if (!DecodeInteger(token, &value)) return false;
626     return view->TryToWrite(static_cast<typename View::ValueType>(value));
627   } else {
628     typename View::ValueType value;
629     if (!TryToGetEnumFromName(token.c_str(), &value)) return false;
630     return view->TryToWrite(value);
631   }
632 }
633 
634 template <class Stream, class View>
635 void WriteEnumViewToTextStream(View *view, Stream *stream,
636                                const TextOutputOptions &options) {
637   const char *name = TryToGetNameFromEnum(view->Read());
638   if (name != nullptr) {
639     stream->Write(name);
640   }
641   // If the enum value has no known name, then write its numeric value
642   // instead.  If it does have a known name, and comments are enabled on the
643   // output, then write the numeric value as a comment.
644   if (name == nullptr || options.comments()) {
645     if (name != nullptr) stream->Write("  # ");
646     WriteIntegerToTextStream(
647         static_cast<
648             typename ::std::underlying_type<typename View::ValueType>::type>(
649             view->Read()),
650         stream, options.numeric_base(), options.digit_grouping());
651   }
652 }
653 
654 // Updates an array from a text stream.  For an array of integers, the most
655 // basic form of the text format looks like:
656 //
657 // { 0, 1, 2 }
658 //
659 // However, the following are all acceptable and equivalent:
660 //
661 // { 0, 1, 2, }
662 // {0 1 2}
663 // { [2]: 2, [1]: 1, [0]: 0 }
664 // {[2]:2, [0]:0, 1}
665 //
666 // Formally, the array must be contained within braces ("{}").  Elements are
667 // represented as an optional index surrounded by brackets ("[]") followed by
668 // the text format of the element, followed by a single optional comma (",").
669 // If no index is present for the first element, the index 0 will be used.  If
670 // no index is present for any elements after the first, the index one greater
671 // than the previous index will be used.
672 template <class Array, class Stream>
673 bool ReadArrayFromTextStream(Array *array, Stream *stream) {
674   // The text format allows any given index to be set more than once.  In
675   // theory, this function could track indices and fail if an index were
676   // double-set, but doing so would require quite a bit of overhead, and
677   // O(array->ElementCount()) extra space in the worst case.  It does not seem
678   // worth it to impose the runtime cost here.
679   ::std::size_t index = 0;
680   ::std::string brace;
681   // Read out the opening brace.
682   if (!ReadToken(stream, &brace)) return false;
683   if (brace != "{") return false;
684   for (;;) {
685     char c;
686     // Check for a closing brace; if present, success.
687     if (!DiscardWhitespace(stream)) return false;
688     if (!stream->Read(&c)) return false;
689     if (c == '}') return true;
690 
691     // If the element has an index, read it.
692     if (c == '[') {
693       ::std::string index_text;
694       if (!ReadToken(stream, &index_text)) return false;
695       if (!::emboss::support::DecodeInteger(index_text, &index)) return false;
696       ::std::string closing_bracket;
697       if (!ReadToken(stream, &closing_bracket)) return false;
698       if (closing_bracket != "]") return false;
699       ::std::string colon;
700       if (!ReadToken(stream, &colon)) return false;
701       if (colon != ":") return false;
702     } else {
703       if (!stream->Unread(c)) return false;
704     }
705 
706     // Read the element.
707     if (index >= array->ElementCount()) return false;
708     if (!(*array)[index].UpdateFromTextStream(stream)) return false;
709     ++index;
710 
711     // If there is a trailing comma, discard it.
712     if (!DiscardWhitespace(stream)) return false;
713     if (!stream->Read(&c)) return false;
714     if (c != ',') {
715       if (c != '}') return false;
716       if (!stream->Unread(c)) return false;
717     }
718   }
719 }
720 
721 // Prints out the elements of an 8-bit Int or UInt array as characters.
722 template <class Array, class Stream>
723 void WriteShorthandAsciiArrayCommentToTextStream(
724     const Array *array, Stream *stream, const TextOutputOptions &options) {
725   if (!options.multiline()) return;
726   if (!options.comments()) return;
727   if (array->ElementCount() == 0) return;
728   static constexpr int kCharsPerBlock = 64;
729   static constexpr char kStandInForNonPrintableChar = '.';
730   auto start_new_line = [&]() {
731     stream->Write("\n");
732     stream->Write(options.current_indent());
733     stream->Write("# ");
734   };
735   for (int i = 0, n = array->ElementCount(); i < n; ++i) {
736     const int c = (*array)[i].Read();
737     const bool c_is_printable = (c >= 32 && c <= 126);
738     const bool starting_new_block = ((i % kCharsPerBlock) == 0);
739     if (starting_new_block) start_new_line();
740     stream->Write(c_is_printable ? static_cast<char>(c)
741                                  : kStandInForNonPrintableChar);
742   }
743 }
744 
745 // Writes an array to a text stream.  This writes the array in a format
746 // compatible with ReadArrayFromTextStream, above.  For multiline output, writes
747 // one element per line.
748 //
749 // TODO(bolms): Make the output for arrays of small elements (like bytes) much
750 // more compact.
751 //
752 // This will require several support functions like `MaxTextLength` on every
753 // view type, and will substantially increase the number of tests required for
754 // this function, but will make arrays of small elements much more readable.
755 template <class Array, class Stream>
756 void WriteArrayToTextStream(Array *array, Stream *stream,
757                             const TextOutputOptions &options) {
758   TextOutputOptions element_options = options.PlusOneIndent();
759   if (options.multiline()) {
760     stream->Write("{");
761     WriteShorthandArrayCommentToTextStream(array, stream, element_options);
762     for (::std::size_t i = 0; i < array->ElementCount(); ++i) {
763       if (!options.allow_partial_output() || (*array)[i].IsAggregate() ||
764           (*array)[i].Ok()) {
765         stream->Write("\n");
766         stream->Write(element_options.current_indent());
767         stream->Write("[");
768         // TODO(bolms): Put padding in here so that array elements start at the
769         // same column.
770         //
771         // TODO(bolms): (Maybe) figure out how to get padding to work so that
772         // elements with comments can have their comments align to the same
773         // column.
774         WriteIntegerToTextStream(i, stream, options.numeric_base(),
775                                  options.digit_grouping());
776         stream->Write("]: ");
777         (*array)[i].WriteToTextStream(stream, element_options);
778       } else if (element_options.comments()) {
779         stream->Write("\n");
780         stream->Write(element_options.current_indent());
781         stream->Write("# [");
782         WriteIntegerToTextStream(i, stream, options.numeric_base(),
783                                  options.digit_grouping());
784         stream->Write("]: UNREADABLE");
785       }
786     }
787     stream->Write("\n");
788     stream->Write(options.current_indent());
789     stream->Write("}");
790   } else {
791     stream->Write("{");
792     bool skipped_unreadable = false;
793     for (::std::size_t i = 0; i < array->ElementCount(); ++i) {
794       if (!options.allow_partial_output() || (*array)[i].IsAggregate() ||
795           (*array)[i].Ok()) {
796         stream->Write(" ");
797         if (i % 8 == 0 || skipped_unreadable) {
798           stream->Write("[");
799           WriteIntegerToTextStream(i, stream, options.numeric_base(),
800                                    options.digit_grouping());
801           stream->Write("]: ");
802         }
803         (*array)[i].WriteToTextStream(stream, element_options);
804         if (i < array->ElementCount() - 1) {
805           stream->Write(",");
806         }
807         skipped_unreadable = false;
808       } else {
809         if (element_options.comments()) {
810           stream->Write(" # ");
811           if (i % 8 == 0) {
812             stream->Write("[");
813             WriteIntegerToTextStream(i, stream, options.numeric_base(),
814                                      options.digit_grouping());
815             stream->Write("]: ");
816           }
817           stream->Write("UNREADABLE\n");
818         }
819         skipped_unreadable = true;
820       }
821     }
822     stream->Write(" }");
823   }
824 }
825 
826 // TextStream puts a stream-like interface onto a std::string, for use by
827 // UpdateFromTextStream.  It is used by UpdateFromText().
828 class TextStream final {
829  public:
830   // This template handles std::string, std::string_view, and absl::string_view.
831   template <class String>
832   inline explicit TextStream(const String &text)
833       : text_(text.data()), length_(text.size()) {}
834 
835   inline explicit TextStream(const char *text)
836       : text_(text), length_(strlen(text)) {}
837 
838   inline TextStream(const char *text, ::std::size_t length)
839       : text_(text), length_(length) {}
840 
841   inline bool Read(char *result) {
842     if (index_ >= length_) return false;
843     *result = text_[index_];
844     ++index_;
845     return true;
846   }
847 
848   inline bool Unread(char c) {
849     if (index_ < 1) return false;
850     if (text_[index_ - 1] != c) return false;
851     --index_;
852     return true;
853   }
854 
855  private:
856   // It would be nice to use string_view here, but that's not available until
857   // C++17.
858   const char *text_ = nullptr;
859   ::std::size_t length_ = 0;
860   ::std::size_t index_ = 0;
861 };
862 
863 }  // namespace support
864 
865 // Returns a TextOutputOptions set for reasonable multi-line text output.
866 static inline TextOutputOptions MultilineText() {
867   return TextOutputOptions()
868       .Multiline(true)
869       .WithIndent("  ")
870       .WithComments(true)
871       .WithDigitGrouping(true);
872 }
873 
874 // TODO(bolms): Add corresponding ReadFromText*() verbs which enforce the
875 // constraint that all of a field's dependencies must be present in the text
876 // before the field itself is set.
877 template <typename EmbossViewType>
878 inline bool UpdateFromText(const EmbossViewType &view,
879                            const ::std::string &text) {
880   auto text_stream = support::TextStream{text};
881   return view.UpdateFromTextStream(&text_stream);
882 }
883 
884 template <typename EmbossViewType>
885 inline ::std::string WriteToString(const EmbossViewType &view,
886                                    TextOutputOptions options) {
887   support::TextOutputStream text_stream;
888   view.WriteToTextStream(&text_stream, options);
889   return text_stream.Result();
890 }
891 
892 template <typename EmbossViewType>
893 inline ::std::string WriteToString(const EmbossViewType &view) {
894   return WriteToString(view, TextOutputOptions());
895 }
896 
897 }  // namespace emboss
898 
899 #endif  // EMBOSS_RUNTIME_CPP_EMBOSS_TEXT_UTIL_H_
900