xref: /aosp_15_r20/external/emboss/runtime/cpp/emboss_text_util.h (revision 99e0aae7469b87d12f0ad23e61142c2d74c1ef70)
1*99e0aae7SDavid Rees // Copyright 2019 Google LLC
2*99e0aae7SDavid Rees //
3*99e0aae7SDavid Rees // Licensed under the Apache License, Version 2.0 (the "License");
4*99e0aae7SDavid Rees // you may not use this file except in compliance with the License.
5*99e0aae7SDavid Rees // You may obtain a copy of the License at
6*99e0aae7SDavid Rees //
7*99e0aae7SDavid Rees //     https://www.apache.org/licenses/LICENSE-2.0
8*99e0aae7SDavid Rees //
9*99e0aae7SDavid Rees // Unless required by applicable law or agreed to in writing, software
10*99e0aae7SDavid Rees // distributed under the License is distributed on an "AS IS" BASIS,
11*99e0aae7SDavid Rees // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*99e0aae7SDavid Rees // See the License for the specific language governing permissions and
13*99e0aae7SDavid Rees // limitations under the License.
14*99e0aae7SDavid Rees 
15*99e0aae7SDavid Rees // This header contains functionality related to Emboss text output.
16*99e0aae7SDavid Rees #ifndef EMBOSS_RUNTIME_CPP_EMBOSS_TEXT_UTIL_H_
17*99e0aae7SDavid Rees #define EMBOSS_RUNTIME_CPP_EMBOSS_TEXT_UTIL_H_
18*99e0aae7SDavid Rees 
19*99e0aae7SDavid Rees #include <array>
20*99e0aae7SDavid Rees #include <climits>
21*99e0aae7SDavid Rees #include <cmath>
22*99e0aae7SDavid Rees #include <cstdint>
23*99e0aae7SDavid Rees #include <cstdio>
24*99e0aae7SDavid Rees #include <cstring>
25*99e0aae7SDavid Rees #include <limits>
26*99e0aae7SDavid Rees #include <sstream>
27*99e0aae7SDavid Rees #include <string>
28*99e0aae7SDavid Rees #include <vector>
29*99e0aae7SDavid Rees 
30*99e0aae7SDavid Rees #include "runtime/cpp/emboss_defines.h"
31*99e0aae7SDavid Rees 
32*99e0aae7SDavid Rees namespace emboss {
33*99e0aae7SDavid Rees 
34*99e0aae7SDavid Rees // TextOutputOptions are used to configure text output.  Typically, one can just
35*99e0aae7SDavid Rees // use a default TextOutputOptions() (for compact output) or MultilineText()
36*99e0aae7SDavid Rees // (for reasonable formatted output).
37*99e0aae7SDavid Rees class TextOutputOptions final {
38*99e0aae7SDavid Rees  public:
39*99e0aae7SDavid Rees   TextOutputOptions() = default;
40*99e0aae7SDavid Rees 
PlusOneIndent()41*99e0aae7SDavid Rees   TextOutputOptions PlusOneIndent() const {
42*99e0aae7SDavid Rees     TextOutputOptions result = *this;
43*99e0aae7SDavid Rees     result.current_indent_ += indent();
44*99e0aae7SDavid Rees     return result;
45*99e0aae7SDavid Rees   }
46*99e0aae7SDavid Rees 
Multiline(bool new_value)47*99e0aae7SDavid Rees   TextOutputOptions Multiline(bool new_value) const {
48*99e0aae7SDavid Rees     TextOutputOptions result = *this;
49*99e0aae7SDavid Rees     result.multiline_ = new_value;
50*99e0aae7SDavid Rees     return result;
51*99e0aae7SDavid Rees   }
52*99e0aae7SDavid Rees 
WithIndent(::std::string new_value)53*99e0aae7SDavid Rees   TextOutputOptions WithIndent(::std::string new_value) const {
54*99e0aae7SDavid Rees     TextOutputOptions result = *this;
55*99e0aae7SDavid Rees     result.indent_ = ::std::move(new_value);
56*99e0aae7SDavid Rees     return result;
57*99e0aae7SDavid Rees   }
58*99e0aae7SDavid Rees 
WithComments(bool new_value)59*99e0aae7SDavid Rees   TextOutputOptions WithComments(bool new_value) const {
60*99e0aae7SDavid Rees     TextOutputOptions result = *this;
61*99e0aae7SDavid Rees     result.comments_ = new_value;
62*99e0aae7SDavid Rees     return result;
63*99e0aae7SDavid Rees   }
64*99e0aae7SDavid Rees 
WithDigitGrouping(bool new_value)65*99e0aae7SDavid Rees   TextOutputOptions WithDigitGrouping(bool new_value) const {
66*99e0aae7SDavid Rees     TextOutputOptions result = *this;
67*99e0aae7SDavid Rees     result.digit_grouping_ = new_value;
68*99e0aae7SDavid Rees     return result;
69*99e0aae7SDavid Rees   }
70*99e0aae7SDavid Rees 
WithNumericBase(uint8_t new_value)71*99e0aae7SDavid Rees   TextOutputOptions WithNumericBase(uint8_t new_value) const {
72*99e0aae7SDavid Rees     TextOutputOptions result = *this;
73*99e0aae7SDavid Rees     result.numeric_base_ = new_value;
74*99e0aae7SDavid Rees     return result;
75*99e0aae7SDavid Rees   }
76*99e0aae7SDavid Rees 
WithAllowPartialOutput(bool new_value)77*99e0aae7SDavid Rees   TextOutputOptions WithAllowPartialOutput(bool new_value) const {
78*99e0aae7SDavid Rees     TextOutputOptions result = *this;
79*99e0aae7SDavid Rees     result.allow_partial_output_ = new_value;
80*99e0aae7SDavid Rees     return result;
81*99e0aae7SDavid Rees   }
82*99e0aae7SDavid Rees 
current_indent()83*99e0aae7SDavid Rees   ::std::string current_indent() const { return current_indent_; }
indent()84*99e0aae7SDavid Rees   ::std::string indent() const { return indent_; }
multiline()85*99e0aae7SDavid Rees   bool multiline() const { return multiline_; }
digit_grouping()86*99e0aae7SDavid Rees   bool digit_grouping() const { return digit_grouping_; }
comments()87*99e0aae7SDavid Rees   bool comments() const { return comments_; }
numeric_base()88*99e0aae7SDavid Rees   ::std::uint8_t numeric_base() const { return numeric_base_; }
allow_partial_output()89*99e0aae7SDavid Rees   bool allow_partial_output() const { return allow_partial_output_; }
90*99e0aae7SDavid Rees 
91*99e0aae7SDavid Rees  private:
92*99e0aae7SDavid Rees   ::std::string indent_;
93*99e0aae7SDavid Rees   ::std::string current_indent_;
94*99e0aae7SDavid Rees   bool comments_ = false;
95*99e0aae7SDavid Rees   bool multiline_ = false;
96*99e0aae7SDavid Rees   bool digit_grouping_ = false;
97*99e0aae7SDavid Rees   bool allow_partial_output_ = false;
98*99e0aae7SDavid Rees   ::std::uint8_t numeric_base_ = 10;
99*99e0aae7SDavid Rees };
100*99e0aae7SDavid Rees 
101*99e0aae7SDavid Rees namespace support {
102*99e0aae7SDavid Rees 
103*99e0aae7SDavid Rees // TextOutputStream puts a stream-like interface onto a std::string, for use by
104*99e0aae7SDavid Rees // DumpToTextStream.  It is used by UpdateFromText().
105*99e0aae7SDavid Rees class TextOutputStream final {
106*99e0aae7SDavid Rees  public:
107*99e0aae7SDavid Rees   inline explicit TextOutputStream() = default;
108*99e0aae7SDavid Rees 
Write(const::std::string & text)109*99e0aae7SDavid Rees   inline void Write(const ::std::string &text) {
110*99e0aae7SDavid Rees     text_.write(text.data(), text.size());
111*99e0aae7SDavid Rees   }
112*99e0aae7SDavid Rees 
Write(const char * text)113*99e0aae7SDavid Rees   inline void Write(const char *text) { text_.write(text, strlen(text)); }
114*99e0aae7SDavid Rees 
Write(const char c)115*99e0aae7SDavid Rees   inline void Write(const char c) { text_.put(c); }
116*99e0aae7SDavid Rees 
Result()117*99e0aae7SDavid Rees   inline ::std::string Result() { return text_.str(); }
118*99e0aae7SDavid Rees 
119*99e0aae7SDavid Rees  private:
120*99e0aae7SDavid Rees   ::std::ostringstream text_;
121*99e0aae7SDavid Rees };
122*99e0aae7SDavid Rees 
123*99e0aae7SDavid Rees // DecodeInteger decodes an integer from a string.  This is very similar to the
124*99e0aae7SDavid Rees // many, many existing integer decode routines in the world, except that a) it
125*99e0aae7SDavid Rees // accepts integers in any Emboss format, and b) it can run in environments that
126*99e0aae7SDavid Rees // do not support std::istream or Google's number conversion routines.
127*99e0aae7SDavid Rees //
128*99e0aae7SDavid Rees // Ideally, this would be replaced by someone else's code.
129*99e0aae7SDavid Rees template <class IntType>
DecodeInteger(const::std::string & text,IntType * result)130*99e0aae7SDavid Rees bool DecodeInteger(const ::std::string &text, IntType *result) {
131*99e0aae7SDavid Rees   IntType accumulator = 0;
132*99e0aae7SDavid Rees   IntType base = 10;
133*99e0aae7SDavid Rees   bool negative = false;
134*99e0aae7SDavid Rees   unsigned offset = 0;
135*99e0aae7SDavid Rees   if (::std::is_signed<IntType>::value && text.size() >= 1 + offset &&
136*99e0aae7SDavid Rees       text[offset] == '-') {
137*99e0aae7SDavid Rees     negative = true;
138*99e0aae7SDavid Rees     offset += 1;
139*99e0aae7SDavid Rees   }
140*99e0aae7SDavid Rees   if (text.size() >= 2 + offset && text[offset] == '0') {
141*99e0aae7SDavid Rees     if (text[offset + 1] == 'x' || text[offset + 1] == 'X') {
142*99e0aae7SDavid Rees       base = 16;
143*99e0aae7SDavid Rees       offset += 2;
144*99e0aae7SDavid Rees     } else if (text[offset + 1] == 'b' || text[offset + 1] == 'B') {
145*99e0aae7SDavid Rees       base = 2;
146*99e0aae7SDavid Rees       offset += 2;
147*99e0aae7SDavid Rees     }
148*99e0aae7SDavid Rees   }
149*99e0aae7SDavid Rees   // "", "0x", "0b", "-", "-0x", and "-0b" are not valid numbers.
150*99e0aae7SDavid Rees   if (offset == text.size()) return false;
151*99e0aae7SDavid Rees   for (; offset < text.size(); ++offset) {
152*99e0aae7SDavid Rees     char c = text[offset];
153*99e0aae7SDavid Rees     IntType digit = 0;
154*99e0aae7SDavid Rees     if (c == '_') {
155*99e0aae7SDavid Rees       if (offset == 0) {
156*99e0aae7SDavid Rees         return false;
157*99e0aae7SDavid Rees       }
158*99e0aae7SDavid Rees       continue;
159*99e0aae7SDavid Rees     } else if (c >= '0' && c <= '9') {
160*99e0aae7SDavid Rees       digit = c - '0';
161*99e0aae7SDavid Rees     } else if (c >= 'A' && c <= 'F') {
162*99e0aae7SDavid Rees       digit = c - 'A' + 10;
163*99e0aae7SDavid Rees     } else if (c >= 'a' && c <= 'f') {
164*99e0aae7SDavid Rees       digit = c - 'a' + 10;
165*99e0aae7SDavid Rees     } else {
166*99e0aae7SDavid Rees       return false;
167*99e0aae7SDavid Rees     }
168*99e0aae7SDavid Rees     if (digit >= base) {
169*99e0aae7SDavid Rees       return false;
170*99e0aae7SDavid Rees     }
171*99e0aae7SDavid Rees     if (negative) {
172*99e0aae7SDavid Rees       if (accumulator <
173*99e0aae7SDavid Rees           (::std::numeric_limits<IntType>::min() + digit) / base) {
174*99e0aae7SDavid Rees         return false;
175*99e0aae7SDavid Rees       }
176*99e0aae7SDavid Rees       accumulator = accumulator * base - digit;
177*99e0aae7SDavid Rees     } else {
178*99e0aae7SDavid Rees       if (accumulator >
179*99e0aae7SDavid Rees           (::std::numeric_limits<IntType>::max() - digit) / base) {
180*99e0aae7SDavid Rees         return false;
181*99e0aae7SDavid Rees       }
182*99e0aae7SDavid Rees       accumulator = accumulator * base + digit;
183*99e0aae7SDavid Rees     }
184*99e0aae7SDavid Rees   }
185*99e0aae7SDavid Rees   *result = accumulator;
186*99e0aae7SDavid Rees   return true;
187*99e0aae7SDavid Rees }
188*99e0aae7SDavid Rees 
189*99e0aae7SDavid Rees template <class Stream>
DiscardWhitespace(Stream * stream)190*99e0aae7SDavid Rees bool DiscardWhitespace(Stream *stream) {
191*99e0aae7SDavid Rees   char c;
192*99e0aae7SDavid Rees   bool in_comment = false;
193*99e0aae7SDavid Rees   do {
194*99e0aae7SDavid Rees     if (!stream->Read(&c)) return true;
195*99e0aae7SDavid Rees     if (c == '#') in_comment = true;
196*99e0aae7SDavid Rees     if (c == '\r' || c == '\n') in_comment = false;
197*99e0aae7SDavid Rees   } while (in_comment || c == ' ' || c == '\t' || c == '\n' || c == '\r');
198*99e0aae7SDavid Rees   return stream->Unread(c);
199*99e0aae7SDavid Rees }
200*99e0aae7SDavid Rees 
201*99e0aae7SDavid Rees template <class Stream>
ReadToken(Stream * stream,::std::string * token)202*99e0aae7SDavid Rees bool ReadToken(Stream *stream, ::std::string *token) {
203*99e0aae7SDavid Rees   ::std::vector<char> result;
204*99e0aae7SDavid Rees   char c;
205*99e0aae7SDavid Rees   if (!DiscardWhitespace(stream)) return false;
206*99e0aae7SDavid Rees   if (!stream->Read(&c)) {
207*99e0aae7SDavid Rees     *token = "";
208*99e0aae7SDavid Rees     return true;
209*99e0aae7SDavid Rees   }
210*99e0aae7SDavid Rees 
211*99e0aae7SDavid Rees   const char *const punctuation = ":{}[],";
212*99e0aae7SDavid Rees   if (strchr(punctuation, c) != nullptr) {
213*99e0aae7SDavid Rees     *token = ::std::string(1, c);
214*99e0aae7SDavid Rees     return true;
215*99e0aae7SDavid Rees   } else {
216*99e0aae7SDavid Rees     // TODO(bolms): Only allow alphanumeric characters here?
217*99e0aae7SDavid Rees     do {
218*99e0aae7SDavid Rees       result.push_back(c);
219*99e0aae7SDavid Rees       if (!stream->Read(&c)) {
220*99e0aae7SDavid Rees         *token = ::std::string(&result[0], result.size());
221*99e0aae7SDavid Rees         return true;
222*99e0aae7SDavid Rees       }
223*99e0aae7SDavid Rees     } while (c != ' ' && c != '\t' && c != '\n' && c != '\r' && c != '#' &&
224*99e0aae7SDavid Rees              strchr(punctuation, c) == nullptr);
225*99e0aae7SDavid Rees     if (!stream->Unread(c)) return false;
226*99e0aae7SDavid Rees     *token = ::std::string(&result[0], result.size());
227*99e0aae7SDavid Rees     return true;
228*99e0aae7SDavid Rees   }
229*99e0aae7SDavid Rees }
230*99e0aae7SDavid Rees 
231*99e0aae7SDavid Rees template <class Stream, class View>
ReadIntegerFromTextStream(View * view,Stream * stream)232*99e0aae7SDavid Rees bool ReadIntegerFromTextStream(View *view, Stream *stream) {
233*99e0aae7SDavid Rees   ::std::string token;
234*99e0aae7SDavid Rees   if (!::emboss::support::ReadToken(stream, &token)) return false;
235*99e0aae7SDavid Rees   if (token.empty()) return false;
236*99e0aae7SDavid Rees   typename View::ValueType value;
237*99e0aae7SDavid Rees   if (!::emboss::support::DecodeInteger(token, &value)) return false;
238*99e0aae7SDavid Rees   return view->TryToWrite(value);
239*99e0aae7SDavid Rees }
240*99e0aae7SDavid Rees 
241*99e0aae7SDavid Rees // WriteIntegerToTextStream encodes the given value in base 2, 10, or 16, with
242*99e0aae7SDavid Rees // or without digit group separators ('_'), and then calls stream->Write() with
243*99e0aae7SDavid Rees // a char * argument that is a C-style null-terminated string of the encoded
244*99e0aae7SDavid Rees // number.
245*99e0aae7SDavid Rees //
246*99e0aae7SDavid Rees // As with DecodeInteger, above, it would be nice to be able to replace this
247*99e0aae7SDavid Rees // with someone else's code, but I (bolms@) was unable to find anything in
248*99e0aae7SDavid Rees // standard C++ that would encode numbers in binary, nothing that would add
249*99e0aae7SDavid Rees // digit separators to hex numbers, and nothing that would use '_' for digit
250*99e0aae7SDavid Rees // separators.
251*99e0aae7SDavid Rees template <class Stream, typename IntegralType>
WriteIntegerToTextStream(IntegralType value,Stream * stream,::std::uint8_t base,bool digit_grouping)252*99e0aae7SDavid Rees void WriteIntegerToTextStream(IntegralType value, Stream *stream,
253*99e0aae7SDavid Rees                               ::std::uint8_t base, bool digit_grouping) {
254*99e0aae7SDavid Rees   static_assert(::std::numeric_limits<
255*99e0aae7SDavid Rees                     typename ::std::remove_cv<IntegralType>::type>::is_integer,
256*99e0aae7SDavid Rees                 "WriteIntegerToTextStream only supports integer types.");
257*99e0aae7SDavid Rees   static_assert(
258*99e0aae7SDavid Rees       !::std::is_same<bool,
259*99e0aae7SDavid Rees                       typename ::std::remove_cv<IntegralType>::type>::value,
260*99e0aae7SDavid Rees       "WriteIntegerToTextStream only supports integer types.");
261*99e0aae7SDavid Rees   EMBOSS_CHECK(base == 10 || base == 2 || base == 16);
262*99e0aae7SDavid Rees   const char *const digits = "0123456789abcdef";
263*99e0aae7SDavid Rees   const int grouping = base == 10 ? 3 : base == 16 ? 4 : 8;
264*99e0aae7SDavid Rees   // The maximum size 32-bit number is -2**31, which is:
265*99e0aae7SDavid Rees   //
266*99e0aae7SDavid Rees   // -0b10000000_00000000_00000000_00000000  (38 chars)
267*99e0aae7SDavid Rees   // -2_147_483_648  (14 chars)
268*99e0aae7SDavid Rees   // -0x8000_0000  (12 chars)
269*99e0aae7SDavid Rees   //
270*99e0aae7SDavid Rees   // Likewise, the maximum size 8-bit number is -128, which is:
271*99e0aae7SDavid Rees   // -0b10000000  (11 chars)
272*99e0aae7SDavid Rees   // -128  (4 chars)
273*99e0aae7SDavid Rees   // -0x80  (5 chars)
274*99e0aae7SDavid Rees   //
275*99e0aae7SDavid Rees   // Binary with separators is always the longest value: 9 chars per 8 bits,
276*99e0aae7SDavid Rees   // minus 1 char for the '_' that does not appear at the front of the number,
277*99e0aae7SDavid Rees   // plus 2 chars for "0b", plus 1 char for '-', plus 1 extra char for the
278*99e0aae7SDavid Rees   // trailing '\0', which is (sizeof value) * CHAR_BIT * 9 / 8 - 1 + 2 + 1 + 1.
279*99e0aae7SDavid Rees   const int buffer_size = (sizeof value) * CHAR_BIT * 9 / 8 + 3;
280*99e0aae7SDavid Rees   char buffer[buffer_size];
281*99e0aae7SDavid Rees   buffer[buffer_size - 1] = '\0';
282*99e0aae7SDavid Rees   int next_char = buffer_size - 2;
283*99e0aae7SDavid Rees   if (value == 0) {
284*99e0aae7SDavid Rees     EMBOSS_DCHECK_GE(next_char, 0);
285*99e0aae7SDavid Rees     buffer[next_char] = digits[0];
286*99e0aae7SDavid Rees     --next_char;
287*99e0aae7SDavid Rees   }
288*99e0aae7SDavid Rees   int sign = value < 0 ? -1 : 1;
289*99e0aae7SDavid Rees   int digit_count = 0;
290*99e0aae7SDavid Rees   auto buffer_char = [&](char c) {
291*99e0aae7SDavid Rees     EMBOSS_DCHECK_GE(next_char, 0);
292*99e0aae7SDavid Rees     buffer[next_char] = c;
293*99e0aae7SDavid Rees     --next_char;
294*99e0aae7SDavid Rees   };
295*99e0aae7SDavid Rees   if (value < 0) {
296*99e0aae7SDavid Rees     if (value == ::std::numeric_limits<decltype(value)>::lowest()) {
297*99e0aae7SDavid Rees       // The minimum negative two's-complement value has no corresponding
298*99e0aae7SDavid Rees       // positive value, so 'value = -value' is not useful in that case.
299*99e0aae7SDavid Rees       // Instead, we do some trickery to buffer the lowest-order digit here.
300*99e0aae7SDavid Rees       auto digit = -(value + 1) % base + 1;
301*99e0aae7SDavid Rees       value = -(value + 1) / base;
302*99e0aae7SDavid Rees       if (digit == base) {
303*99e0aae7SDavid Rees         digit = 0;
304*99e0aae7SDavid Rees         ++value;
305*99e0aae7SDavid Rees       }
306*99e0aae7SDavid Rees       buffer_char(digits[digit]);
307*99e0aae7SDavid Rees       ++digit_count;
308*99e0aae7SDavid Rees     } else {
309*99e0aae7SDavid Rees       value = -value;
310*99e0aae7SDavid Rees     }
311*99e0aae7SDavid Rees   }
312*99e0aae7SDavid Rees   while (value > 0) {
313*99e0aae7SDavid Rees     if (digit_count && digit_count % grouping == 0 && digit_grouping) {
314*99e0aae7SDavid Rees       buffer_char('_');
315*99e0aae7SDavid Rees     }
316*99e0aae7SDavid Rees     buffer_char(digits[value % base]);
317*99e0aae7SDavid Rees     value /= base;
318*99e0aae7SDavid Rees     ++digit_count;
319*99e0aae7SDavid Rees   }
320*99e0aae7SDavid Rees   if (base == 16) {
321*99e0aae7SDavid Rees     buffer_char('x');
322*99e0aae7SDavid Rees     buffer_char('0');
323*99e0aae7SDavid Rees   } else if (base == 2) {
324*99e0aae7SDavid Rees     buffer_char('b');
325*99e0aae7SDavid Rees     buffer_char('0');
326*99e0aae7SDavid Rees   }
327*99e0aae7SDavid Rees   if (sign < 0) {
328*99e0aae7SDavid Rees     buffer_char('-');
329*99e0aae7SDavid Rees   }
330*99e0aae7SDavid Rees 
331*99e0aae7SDavid Rees   stream->Write(buffer + 1 + next_char);
332*99e0aae7SDavid Rees }
333*99e0aae7SDavid Rees 
334*99e0aae7SDavid Rees // Writes an integer value in the base given in options, plus an optional
335*99e0aae7SDavid Rees // comment with the same value in a second base.  This is used for the common
336*99e0aae7SDavid Rees // output format of IntView, UIntView, and BcdView.
337*99e0aae7SDavid Rees template <class Stream, class View>
WriteIntegerViewToTextStream(View * view,Stream * stream,const TextOutputOptions & options)338*99e0aae7SDavid Rees void WriteIntegerViewToTextStream(View *view, Stream *stream,
339*99e0aae7SDavid Rees                                   const TextOutputOptions &options) {
340*99e0aae7SDavid Rees   WriteIntegerToTextStream(view->Read(), stream, options.numeric_base(),
341*99e0aae7SDavid Rees                            options.digit_grouping());
342*99e0aae7SDavid Rees   if (options.comments()) {
343*99e0aae7SDavid Rees     stream->Write("  # ");
344*99e0aae7SDavid Rees     WriteIntegerToTextStream(view->Read(), stream,
345*99e0aae7SDavid Rees                              options.numeric_base() == 10 ? 16 : 10,
346*99e0aae7SDavid Rees                              options.digit_grouping());
347*99e0aae7SDavid Rees   }
348*99e0aae7SDavid Rees }
349*99e0aae7SDavid Rees 
350*99e0aae7SDavid Rees template <class Stream, class View>
ReadBooleanFromTextStream(View * view,Stream * stream)351*99e0aae7SDavid Rees bool ReadBooleanFromTextStream(View *view, Stream *stream) {
352*99e0aae7SDavid Rees   ::std::string token;
353*99e0aae7SDavid Rees   if (!::emboss::support::ReadToken(stream, &token)) return false;
354*99e0aae7SDavid Rees   if (token == "true") {
355*99e0aae7SDavid Rees     return view->TryToWrite(true);
356*99e0aae7SDavid Rees   } else if (token == "false") {
357*99e0aae7SDavid Rees     return view->TryToWrite(false);
358*99e0aae7SDavid Rees   }
359*99e0aae7SDavid Rees   // TODO(bolms): Provide a way to get an error message on parse failure.
360*99e0aae7SDavid Rees   return false;
361*99e0aae7SDavid Rees }
362*99e0aae7SDavid Rees 
363*99e0aae7SDavid Rees // The TextOutputOptions parameter is present so that it can be passed in by
364*99e0aae7SDavid Rees // generated code that uses the same form for WriteBooleanViewToTextStream,
365*99e0aae7SDavid Rees // WriteIntegerViewToTextStream, and WriteEnumViewToTextStream.
366*99e0aae7SDavid Rees template <class Stream, class View>
WriteBooleanViewToTextStream(View * view,Stream * stream,const TextOutputOptions &)367*99e0aae7SDavid Rees void WriteBooleanViewToTextStream(View *view, Stream *stream,
368*99e0aae7SDavid Rees                                   const TextOutputOptions &) {
369*99e0aae7SDavid Rees   if (view->Read()) {
370*99e0aae7SDavid Rees     stream->Write("true");
371*99e0aae7SDavid Rees   } else {
372*99e0aae7SDavid Rees     stream->Write("false");
373*99e0aae7SDavid Rees   }
374*99e0aae7SDavid Rees }
375*99e0aae7SDavid Rees 
376*99e0aae7SDavid Rees // FloatConstants holds various masks for working with IEEE754-compatible
377*99e0aae7SDavid Rees // floating-point values at a bit level.  These are mostly used here to
378*99e0aae7SDavid Rees // implement text format for NaNs, preserving the NaN payload so that the text
379*99e0aae7SDavid Rees // format can (in theory) provide a bit-exact round-trip through the text
380*99e0aae7SDavid Rees // format.
381*99e0aae7SDavid Rees template <class Float>
382*99e0aae7SDavid Rees struct FloatConstants;
383*99e0aae7SDavid Rees 
384*99e0aae7SDavid Rees template <>
385*99e0aae7SDavid Rees struct FloatConstants<float> {
386*99e0aae7SDavid Rees   static_assert(sizeof(float) == 4, "Emboss requires 32-bit float.");
387*99e0aae7SDavid Rees   using MatchingIntegerType = ::std::uint32_t;
388*99e0aae7SDavid Rees   static constexpr MatchingIntegerType kMantissaMask() { return 0x7fffffU; }
389*99e0aae7SDavid Rees   static constexpr MatchingIntegerType kExponentMask() { return 0x7f800000U; }
390*99e0aae7SDavid Rees   static constexpr MatchingIntegerType kSignMask() { return 0x80000000U; }
391*99e0aae7SDavid Rees   static constexpr int kPrintfPrecision() { return 9; }
392*99e0aae7SDavid Rees   static constexpr const char *kScanfFormat() { return "%f%n"; }
393*99e0aae7SDavid Rees };
394*99e0aae7SDavid Rees 
395*99e0aae7SDavid Rees template <>
396*99e0aae7SDavid Rees struct FloatConstants<double> {
397*99e0aae7SDavid Rees   static_assert(sizeof(double) == 8, "Emboss requires 64-bit double.");
398*99e0aae7SDavid Rees   using MatchingIntegerType = ::std::uint64_t;
399*99e0aae7SDavid Rees   static constexpr MatchingIntegerType kMantissaMask() {
400*99e0aae7SDavid Rees     return 0xfffffffffffffUL;
401*99e0aae7SDavid Rees   }
402*99e0aae7SDavid Rees   static constexpr MatchingIntegerType kExponentMask() {
403*99e0aae7SDavid Rees     return 0x7ff0000000000000UL;
404*99e0aae7SDavid Rees   }
405*99e0aae7SDavid Rees   static constexpr MatchingIntegerType kSignMask() {
406*99e0aae7SDavid Rees     return 0x8000000000000000UL;
407*99e0aae7SDavid Rees   }
408*99e0aae7SDavid Rees   static constexpr int kPrintfPrecision() { return 17; }
409*99e0aae7SDavid Rees   static constexpr const char *kScanfFormat() { return "%lf%n"; }
410*99e0aae7SDavid Rees };
411*99e0aae7SDavid Rees 
412*99e0aae7SDavid Rees // Decodes a floating-point number from text.
413*99e0aae7SDavid Rees template <class Float>
414*99e0aae7SDavid Rees bool DecodeFloat(const ::std::string &token, Float *result) {
415*99e0aae7SDavid Rees   // The state of the world for reading floating-point values is somewhat better
416*99e0aae7SDavid Rees   // than the situation for writing them, but there are still a few bits that
417*99e0aae7SDavid Rees   // are underspecified.  This function is the mirror of WriteFloatToTextStream,
418*99e0aae7SDavid Rees   // below, so it specifically decodes infinities and NaNs in the formats that
419*99e0aae7SDavid Rees   // Emboss uses.
420*99e0aae7SDavid Rees   //
421*99e0aae7SDavid Rees   // Because of the use of scanf here, this function accepts hex floating-point
422*99e0aae7SDavid Rees   // values (0xh.hhhhpeee) *on some systems*.  TODO(bolms): make hex float
423*99e0aae7SDavid Rees   // support universal.
424*99e0aae7SDavid Rees 
425*99e0aae7SDavid Rees   using UInt = typename FloatConstants<Float>::MatchingIntegerType;
426*99e0aae7SDavid Rees 
427*99e0aae7SDavid Rees   if (token.empty()) return false;
428*99e0aae7SDavid Rees 
429*99e0aae7SDavid Rees   // First, check for negative.
430*99e0aae7SDavid Rees   bool negative = token[0] == '-';
431*99e0aae7SDavid Rees 
432*99e0aae7SDavid Rees   // Second, check for NaN.
433*99e0aae7SDavid Rees   ::std::size_t i = token[0] == '-' || token[0] == '+' ? 1 : 0;
434*99e0aae7SDavid Rees   if (token.size() >= i + 3 && (token[i] == 'N' || token[i] == 'n') &&
435*99e0aae7SDavid Rees       (token[i + 1] == 'A' || token[i + 1] == 'a') &&
436*99e0aae7SDavid Rees       (token[i + 2] == 'N' || token[i + 2] == 'n')) {
437*99e0aae7SDavid Rees     UInt nan_payload;
438*99e0aae7SDavid Rees     if (token.size() >= i + 4) {
439*99e0aae7SDavid Rees       if (token[i + 3] == '(' && token[token.size() - 1] == ')') {
440*99e0aae7SDavid Rees         if (!DecodeInteger(token.substr(i + 4, token.size() - i - 5),
441*99e0aae7SDavid Rees                            &nan_payload)) {
442*99e0aae7SDavid Rees           return false;
443*99e0aae7SDavid Rees         }
444*99e0aae7SDavid Rees       } else {
445*99e0aae7SDavid Rees         // NaN may not be followed by trailing characters other than a
446*99e0aae7SDavid Rees         // ()-enclosed payload.
447*99e0aae7SDavid Rees         return false;
448*99e0aae7SDavid Rees       }
449*99e0aae7SDavid Rees     } else {
450*99e0aae7SDavid Rees       // If no specific NaN was given, take a default NaN from the C++ standard
451*99e0aae7SDavid Rees       // library.  Technically, a conformant C++ implementation might not have
452*99e0aae7SDavid Rees       // quiet_NaN(), but any IEEE754-based implementation should.
453*99e0aae7SDavid Rees       //
454*99e0aae7SDavid Rees       // It is tempting to just write the default NaN directly into the view and
455*99e0aae7SDavid Rees       // return success, but "-NaN" should be have its sign bit set, and there
456*99e0aae7SDavid Rees       // is no direct way to set the sign bit of a NaN, so there are fewer code
457*99e0aae7SDavid Rees       // paths if we extract the default NaN payload, then use it in the
458*99e0aae7SDavid Rees       // reconstruction step, below.
459*99e0aae7SDavid Rees       Float default_nan = ::std::numeric_limits<Float>::quiet_NaN();
460*99e0aae7SDavid Rees       UInt bits;
461*99e0aae7SDavid Rees       ::std::memcpy(&bits, &default_nan, sizeof(bits));
462*99e0aae7SDavid Rees       nan_payload = bits & FloatConstants<Float>::kMantissaMask();
463*99e0aae7SDavid Rees     }
464*99e0aae7SDavid Rees     if (nan_payload == 0) {
465*99e0aae7SDavid Rees       // "NaN" with a payload of zero is actually the bit pattern for infinity;
466*99e0aae7SDavid Rees       // "NaN(0)" should not be an alias for "Inf".
467*99e0aae7SDavid Rees       return false;
468*99e0aae7SDavid Rees     }
469*99e0aae7SDavid Rees     if (nan_payload & (FloatConstants<Float>::kExponentMask() |
470*99e0aae7SDavid Rees                        FloatConstants<Float>::kSignMask())) {
471*99e0aae7SDavid Rees       // The payload must be small enough to fit in the payload space; it must
472*99e0aae7SDavid Rees       // not overflow into the exponent or sign bits.
473*99e0aae7SDavid Rees       //
474*99e0aae7SDavid Rees       // Note that the DecodeInteger call which decoded the payload will return
475*99e0aae7SDavid Rees       // false if the payload would overflow the `UInt` type, so cases like
476*99e0aae7SDavid Rees       // "NaN(0x10000000000000000000000000000)" -- which are so big that they no
477*99e0aae7SDavid Rees       // longer interfere with the sign or exponent -- are caught above.
478*99e0aae7SDavid Rees       return false;
479*99e0aae7SDavid Rees     }
480*99e0aae7SDavid Rees     UInt bits = FloatConstants<Float>::kExponentMask();
481*99e0aae7SDavid Rees     bits |= nan_payload;
482*99e0aae7SDavid Rees     if (negative) {
483*99e0aae7SDavid Rees       bits |= FloatConstants<Float>::kSignMask();
484*99e0aae7SDavid Rees     }
485*99e0aae7SDavid Rees     ::std::memcpy(result, &bits, sizeof(bits));
486*99e0aae7SDavid Rees     return true;
487*99e0aae7SDavid Rees   }
488*99e0aae7SDavid Rees 
489*99e0aae7SDavid Rees   // If the value is not NaN, check for infinity.
490*99e0aae7SDavid Rees   if (token.size() >= i + 3 && (token[i] == 'I' || token[i] == 'i') &&
491*99e0aae7SDavid Rees       (token[i + 1] == 'N' || token[i + 1] == 'n') &&
492*99e0aae7SDavid Rees       (token[i + 2] == 'F' || token[i + 2] == 'f')) {
493*99e0aae7SDavid Rees     if (token.size() > i + 3) {
494*99e0aae7SDavid Rees       // Infinity must be exactly "Inf" or "-Inf" (case insensitive).  There
495*99e0aae7SDavid Rees       // must not be trailing characters.
496*99e0aae7SDavid Rees       return false;
497*99e0aae7SDavid Rees     }
498*99e0aae7SDavid Rees     // As with quiet_NaN(), a conforming C++ implementation might not have
499*99e0aae7SDavid Rees     // infinity(), but an IEEE 754-based implementation should.
500*99e0aae7SDavid Rees     if (negative) {
501*99e0aae7SDavid Rees       *result = -::std::numeric_limits<Float>::infinity();
502*99e0aae7SDavid Rees       return true;
503*99e0aae7SDavid Rees     } else {
504*99e0aae7SDavid Rees       *result = ::std::numeric_limits<Float>::infinity();
505*99e0aae7SDavid Rees       return true;
506*99e0aae7SDavid Rees     }
507*99e0aae7SDavid Rees   }
508*99e0aae7SDavid Rees 
509*99e0aae7SDavid Rees   // For non-NaN, non-Inf values, use the C scanf function, mirroring the use of
510*99e0aae7SDavid Rees   // printf for writing the value, below.
511*99e0aae7SDavid Rees   int chars_used = -1;
512*99e0aae7SDavid Rees   if (::std::sscanf(token.c_str(), FloatConstants<Float>::kScanfFormat(),
513*99e0aae7SDavid Rees                     result, &chars_used) < 1) {
514*99e0aae7SDavid Rees     return false;
515*99e0aae7SDavid Rees   }
516*99e0aae7SDavid Rees   if (chars_used < 0 ||
517*99e0aae7SDavid Rees       static_cast</**/ ::std::size_t>(chars_used) < token.size()) {
518*99e0aae7SDavid Rees     return false;
519*99e0aae7SDavid Rees   }
520*99e0aae7SDavid Rees   return true;
521*99e0aae7SDavid Rees }
522*99e0aae7SDavid Rees 
523*99e0aae7SDavid Rees // Decodes a floating-point number from a text stream and writes it to the
524*99e0aae7SDavid Rees // specified view.
525*99e0aae7SDavid Rees template <class Stream, class View>
526*99e0aae7SDavid Rees bool ReadFloatFromTextStream(View *view, Stream *stream) {
527*99e0aae7SDavid Rees   ::std::string token;
528*99e0aae7SDavid Rees   if (!ReadToken(stream, &token)) return false;
529*99e0aae7SDavid Rees   typename View::ValueType value;
530*99e0aae7SDavid Rees   if (!DecodeFloat(token, &value)) return false;
531*99e0aae7SDavid Rees   return view->TryToWrite(value);
532*99e0aae7SDavid Rees }
533*99e0aae7SDavid Rees 
534*99e0aae7SDavid Rees template <class Stream, class Float>
535*99e0aae7SDavid Rees void WriteFloatToTextStream(Float n, Stream *stream,
536*99e0aae7SDavid Rees                             const TextOutputOptions &options) {
537*99e0aae7SDavid Rees   static_assert(::std::is_same<Float, float>::value ||
538*99e0aae7SDavid Rees                     ::std::is_same<Float, double>::value,
539*99e0aae7SDavid Rees                 "WriteFloatToTextStream can only write float or double.");
540*99e0aae7SDavid Rees   // The state of the world w.r.t. rendering floating-points as decimal text is,
541*99e0aae7SDavid Rees   // ca. 2018, less than ideal.
542*99e0aae7SDavid Rees   //
543*99e0aae7SDavid Rees   // In C++ land, there is actually no stable facility in the standard library
544*99e0aae7SDavid Rees   // until to_chars() in C++17 -- which is not actually implemented yet in
545*99e0aae7SDavid Rees   // libc++.  to_string(), the printf() family, and the iostreams system all
546*99e0aae7SDavid Rees   // respect the current locale.  In most programs, the locale is permanently
547*99e0aae7SDavid Rees   // left on "C", but this is not guaranteed.  to_string() also uses a fixed and
548*99e0aae7SDavid Rees   // rather unfortunate format.
549*99e0aae7SDavid Rees   //
550*99e0aae7SDavid Rees   // For integers, I (bolms@) chose to just implement custom read and write
551*99e0aae7SDavid Rees   // routines, but those routines are quite small and straightforward compared
552*99e0aae7SDavid Rees   // to floating point conversion.  Even writing correct output is difficult,
553*99e0aae7SDavid Rees   // and writing correct and minimal output is the subject of a number of
554*99e0aae7SDavid Rees   // academic papers.
555*99e0aae7SDavid Rees   //
556*99e0aae7SDavid Rees   // For the moment, I'm just using snprintf("%.*g", 17, n), which is guaranteed
557*99e0aae7SDavid Rees   // to be read back as the same number, but can be longer than strictly
558*99e0aae7SDavid Rees   // necessary.
559*99e0aae7SDavid Rees   //
560*99e0aae7SDavid Rees   // TODO(bolms): Import a modified version of the double-to-string conversion
561*99e0aae7SDavid Rees   // from Swift's standard library, which appears to be best implementation
562*99e0aae7SDavid Rees   // currently available.
563*99e0aae7SDavid Rees 
564*99e0aae7SDavid Rees   if (::std::isnan(n)) {
565*99e0aae7SDavid Rees     // The printf format for NaN is just "NaN".  In the interests of keeping
566*99e0aae7SDavid Rees     // things bit-exact, Emboss prints the exact NaN.
567*99e0aae7SDavid Rees     typename FloatConstants<Float>::MatchingIntegerType bits;
568*99e0aae7SDavid Rees     ::std::memcpy(&bits, &n, sizeof(bits));
569*99e0aae7SDavid Rees     ::std::uint64_t nan_payload = bits & FloatConstants<Float>::kMantissaMask();
570*99e0aae7SDavid Rees     ::std::uint64_t nan_sign = bits & FloatConstants<Float>::kSignMask();
571*99e0aae7SDavid Rees     if (nan_sign) {
572*99e0aae7SDavid Rees       // NaN still has a sign bit, which is generally treated differently from
573*99e0aae7SDavid Rees       // the payload.  There is no real "standard" text format for NaNs, but
574*99e0aae7SDavid Rees       // "-NaN" appears to be a common way of indicating a NaN with the sign bit
575*99e0aae7SDavid Rees       // set.
576*99e0aae7SDavid Rees       stream->Write("-NaN(");
577*99e0aae7SDavid Rees     } else {
578*99e0aae7SDavid Rees       stream->Write("NaN(");
579*99e0aae7SDavid Rees     }
580*99e0aae7SDavid Rees     // NaN payloads are always dumped in hex.  Note that Emboss is treating the
581*99e0aae7SDavid Rees     // is_quiet/is_signal bit as just another bit in the payload.
582*99e0aae7SDavid Rees     WriteIntegerToTextStream(nan_payload, stream, 16, options.digit_grouping());
583*99e0aae7SDavid Rees     stream->Write(")");
584*99e0aae7SDavid Rees     return;
585*99e0aae7SDavid Rees   }
586*99e0aae7SDavid Rees 
587*99e0aae7SDavid Rees   if (::std::isinf(n)) {
588*99e0aae7SDavid Rees     if (n < 0.0) {
589*99e0aae7SDavid Rees       stream->Write("-Inf");
590*99e0aae7SDavid Rees     } else {
591*99e0aae7SDavid Rees       stream->Write("Inf");
592*99e0aae7SDavid Rees     }
593*99e0aae7SDavid Rees     return;
594*99e0aae7SDavid Rees   }
595*99e0aae7SDavid Rees 
596*99e0aae7SDavid Rees   // TODO(bolms): Should the current numeric base be honored here?  Should there
597*99e0aae7SDavid Rees   // be a separate Float numeric base?
598*99e0aae7SDavid Rees   ::std::array<char, 30> buffer;
599*99e0aae7SDavid Rees   // TODO(bolms): Figure out how to get ::std::snprintf to work on
600*99e0aae7SDavid Rees   // microcontroller builds.
601*99e0aae7SDavid Rees   ::std::size_t snprintf_result = static_cast</**/ ::std::size_t>(::snprintf(
602*99e0aae7SDavid Rees       &(buffer[0]), buffer.size(), "%.*g",
603*99e0aae7SDavid Rees       FloatConstants<Float>::kPrintfPrecision(), static_cast<double>(n)));
604*99e0aae7SDavid Rees   (void)snprintf_result;  // Unused if EMBOSS_CHECK_LE is compiled out.
605*99e0aae7SDavid Rees   EMBOSS_CHECK_LE(snprintf_result, buffer.size());
606*99e0aae7SDavid Rees   stream->Write(&buffer[0]);
607*99e0aae7SDavid Rees 
608*99e0aae7SDavid Rees   // TODO(bolms): Support digit grouping.
609*99e0aae7SDavid Rees }
610*99e0aae7SDavid Rees 
611*99e0aae7SDavid Rees template <class Stream, class View>
612*99e0aae7SDavid Rees bool ReadEnumViewFromTextStream(View *view, Stream *stream) {
613*99e0aae7SDavid Rees   ::std::string token;
614*99e0aae7SDavid Rees   if (!ReadToken(stream, &token)) return false;
615*99e0aae7SDavid Rees   if (token.empty()) return false;
616*99e0aae7SDavid Rees   if (::std::isdigit(token[0])) {
617*99e0aae7SDavid Rees     ::std::uint64_t value;
618*99e0aae7SDavid Rees     if (!DecodeInteger(token, &value)) return false;
619*99e0aae7SDavid Rees     // TODO(bolms): Fix the static_cast<ValueType> for signed ValueType.
620*99e0aae7SDavid Rees     // TODO(bolms): Should values between 2**63 and 2**64-1 actually be
621*99e0aae7SDavid Rees     // allowed in the text format when ValueType is signed?
622*99e0aae7SDavid Rees     return view->TryToWrite(static_cast<typename View::ValueType>(value));
623*99e0aae7SDavid Rees   } else if (token[0] == '-') {
624*99e0aae7SDavid Rees     ::std::int64_t value;
625*99e0aae7SDavid Rees     if (!DecodeInteger(token, &value)) return false;
626*99e0aae7SDavid Rees     return view->TryToWrite(static_cast<typename View::ValueType>(value));
627*99e0aae7SDavid Rees   } else {
628*99e0aae7SDavid Rees     typename View::ValueType value;
629*99e0aae7SDavid Rees     if (!TryToGetEnumFromName(token.c_str(), &value)) return false;
630*99e0aae7SDavid Rees     return view->TryToWrite(value);
631*99e0aae7SDavid Rees   }
632*99e0aae7SDavid Rees }
633*99e0aae7SDavid Rees 
634*99e0aae7SDavid Rees template <class Stream, class View>
635*99e0aae7SDavid Rees void WriteEnumViewToTextStream(View *view, Stream *stream,
636*99e0aae7SDavid Rees                                const TextOutputOptions &options) {
637*99e0aae7SDavid Rees   const char *name = TryToGetNameFromEnum(view->Read());
638*99e0aae7SDavid Rees   if (name != nullptr) {
639*99e0aae7SDavid Rees     stream->Write(name);
640*99e0aae7SDavid Rees   }
641*99e0aae7SDavid Rees   // If the enum value has no known name, then write its numeric value
642*99e0aae7SDavid Rees   // instead.  If it does have a known name, and comments are enabled on the
643*99e0aae7SDavid Rees   // output, then write the numeric value as a comment.
644*99e0aae7SDavid Rees   if (name == nullptr || options.comments()) {
645*99e0aae7SDavid Rees     if (name != nullptr) stream->Write("  # ");
646*99e0aae7SDavid Rees     WriteIntegerToTextStream(
647*99e0aae7SDavid Rees         static_cast<
648*99e0aae7SDavid Rees             typename ::std::underlying_type<typename View::ValueType>::type>(
649*99e0aae7SDavid Rees             view->Read()),
650*99e0aae7SDavid Rees         stream, options.numeric_base(), options.digit_grouping());
651*99e0aae7SDavid Rees   }
652*99e0aae7SDavid Rees }
653*99e0aae7SDavid Rees 
654*99e0aae7SDavid Rees // Updates an array from a text stream.  For an array of integers, the most
655*99e0aae7SDavid Rees // basic form of the text format looks like:
656*99e0aae7SDavid Rees //
657*99e0aae7SDavid Rees // { 0, 1, 2 }
658*99e0aae7SDavid Rees //
659*99e0aae7SDavid Rees // However, the following are all acceptable and equivalent:
660*99e0aae7SDavid Rees //
661*99e0aae7SDavid Rees // { 0, 1, 2, }
662*99e0aae7SDavid Rees // {0 1 2}
663*99e0aae7SDavid Rees // { [2]: 2, [1]: 1, [0]: 0 }
664*99e0aae7SDavid Rees // {[2]:2, [0]:0, 1}
665*99e0aae7SDavid Rees //
666*99e0aae7SDavid Rees // Formally, the array must be contained within braces ("{}").  Elements are
667*99e0aae7SDavid Rees // represented as an optional index surrounded by brackets ("[]") followed by
668*99e0aae7SDavid Rees // the text format of the element, followed by a single optional comma (",").
669*99e0aae7SDavid Rees // If no index is present for the first element, the index 0 will be used.  If
670*99e0aae7SDavid Rees // no index is present for any elements after the first, the index one greater
671*99e0aae7SDavid Rees // than the previous index will be used.
672*99e0aae7SDavid Rees template <class Array, class Stream>
673*99e0aae7SDavid Rees bool ReadArrayFromTextStream(Array *array, Stream *stream) {
674*99e0aae7SDavid Rees   // The text format allows any given index to be set more than once.  In
675*99e0aae7SDavid Rees   // theory, this function could track indices and fail if an index were
676*99e0aae7SDavid Rees   // double-set, but doing so would require quite a bit of overhead, and
677*99e0aae7SDavid Rees   // O(array->ElementCount()) extra space in the worst case.  It does not seem
678*99e0aae7SDavid Rees   // worth it to impose the runtime cost here.
679*99e0aae7SDavid Rees   ::std::size_t index = 0;
680*99e0aae7SDavid Rees   ::std::string brace;
681*99e0aae7SDavid Rees   // Read out the opening brace.
682*99e0aae7SDavid Rees   if (!ReadToken(stream, &brace)) return false;
683*99e0aae7SDavid Rees   if (brace != "{") return false;
684*99e0aae7SDavid Rees   for (;;) {
685*99e0aae7SDavid Rees     char c;
686*99e0aae7SDavid Rees     // Check for a closing brace; if present, success.
687*99e0aae7SDavid Rees     if (!DiscardWhitespace(stream)) return false;
688*99e0aae7SDavid Rees     if (!stream->Read(&c)) return false;
689*99e0aae7SDavid Rees     if (c == '}') return true;
690*99e0aae7SDavid Rees 
691*99e0aae7SDavid Rees     // If the element has an index, read it.
692*99e0aae7SDavid Rees     if (c == '[') {
693*99e0aae7SDavid Rees       ::std::string index_text;
694*99e0aae7SDavid Rees       if (!ReadToken(stream, &index_text)) return false;
695*99e0aae7SDavid Rees       if (!::emboss::support::DecodeInteger(index_text, &index)) return false;
696*99e0aae7SDavid Rees       ::std::string closing_bracket;
697*99e0aae7SDavid Rees       if (!ReadToken(stream, &closing_bracket)) return false;
698*99e0aae7SDavid Rees       if (closing_bracket != "]") return false;
699*99e0aae7SDavid Rees       ::std::string colon;
700*99e0aae7SDavid Rees       if (!ReadToken(stream, &colon)) return false;
701*99e0aae7SDavid Rees       if (colon != ":") return false;
702*99e0aae7SDavid Rees     } else {
703*99e0aae7SDavid Rees       if (!stream->Unread(c)) return false;
704*99e0aae7SDavid Rees     }
705*99e0aae7SDavid Rees 
706*99e0aae7SDavid Rees     // Read the element.
707*99e0aae7SDavid Rees     if (index >= array->ElementCount()) return false;
708*99e0aae7SDavid Rees     if (!(*array)[index].UpdateFromTextStream(stream)) return false;
709*99e0aae7SDavid Rees     ++index;
710*99e0aae7SDavid Rees 
711*99e0aae7SDavid Rees     // If there is a trailing comma, discard it.
712*99e0aae7SDavid Rees     if (!DiscardWhitespace(stream)) return false;
713*99e0aae7SDavid Rees     if (!stream->Read(&c)) return false;
714*99e0aae7SDavid Rees     if (c != ',') {
715*99e0aae7SDavid Rees       if (c != '}') return false;
716*99e0aae7SDavid Rees       if (!stream->Unread(c)) return false;
717*99e0aae7SDavid Rees     }
718*99e0aae7SDavid Rees   }
719*99e0aae7SDavid Rees }
720*99e0aae7SDavid Rees 
721*99e0aae7SDavid Rees // Prints out the elements of an 8-bit Int or UInt array as characters.
722*99e0aae7SDavid Rees template <class Array, class Stream>
723*99e0aae7SDavid Rees void WriteShorthandAsciiArrayCommentToTextStream(
724*99e0aae7SDavid Rees     const Array *array, Stream *stream, const TextOutputOptions &options) {
725*99e0aae7SDavid Rees   if (!options.multiline()) return;
726*99e0aae7SDavid Rees   if (!options.comments()) return;
727*99e0aae7SDavid Rees   if (array->ElementCount() == 0) return;
728*99e0aae7SDavid Rees   static constexpr int kCharsPerBlock = 64;
729*99e0aae7SDavid Rees   static constexpr char kStandInForNonPrintableChar = '.';
730*99e0aae7SDavid Rees   auto start_new_line = [&]() {
731*99e0aae7SDavid Rees     stream->Write("\n");
732*99e0aae7SDavid Rees     stream->Write(options.current_indent());
733*99e0aae7SDavid Rees     stream->Write("# ");
734*99e0aae7SDavid Rees   };
735*99e0aae7SDavid Rees   for (int i = 0, n = array->ElementCount(); i < n; ++i) {
736*99e0aae7SDavid Rees     const int c = (*array)[i].Read();
737*99e0aae7SDavid Rees     const bool c_is_printable = (c >= 32 && c <= 126);
738*99e0aae7SDavid Rees     const bool starting_new_block = ((i % kCharsPerBlock) == 0);
739*99e0aae7SDavid Rees     if (starting_new_block) start_new_line();
740*99e0aae7SDavid Rees     stream->Write(c_is_printable ? static_cast<char>(c)
741*99e0aae7SDavid Rees                                  : kStandInForNonPrintableChar);
742*99e0aae7SDavid Rees   }
743*99e0aae7SDavid Rees }
744*99e0aae7SDavid Rees 
745*99e0aae7SDavid Rees // Writes an array to a text stream.  This writes the array in a format
746*99e0aae7SDavid Rees // compatible with ReadArrayFromTextStream, above.  For multiline output, writes
747*99e0aae7SDavid Rees // one element per line.
748*99e0aae7SDavid Rees //
749*99e0aae7SDavid Rees // TODO(bolms): Make the output for arrays of small elements (like bytes) much
750*99e0aae7SDavid Rees // more compact.
751*99e0aae7SDavid Rees //
752*99e0aae7SDavid Rees // This will require several support functions like `MaxTextLength` on every
753*99e0aae7SDavid Rees // view type, and will substantially increase the number of tests required for
754*99e0aae7SDavid Rees // this function, but will make arrays of small elements much more readable.
755*99e0aae7SDavid Rees template <class Array, class Stream>
756*99e0aae7SDavid Rees void WriteArrayToTextStream(Array *array, Stream *stream,
757*99e0aae7SDavid Rees                             const TextOutputOptions &options) {
758*99e0aae7SDavid Rees   TextOutputOptions element_options = options.PlusOneIndent();
759*99e0aae7SDavid Rees   if (options.multiline()) {
760*99e0aae7SDavid Rees     stream->Write("{");
761*99e0aae7SDavid Rees     WriteShorthandArrayCommentToTextStream(array, stream, element_options);
762*99e0aae7SDavid Rees     for (::std::size_t i = 0; i < array->ElementCount(); ++i) {
763*99e0aae7SDavid Rees       if (!options.allow_partial_output() || (*array)[i].IsAggregate() ||
764*99e0aae7SDavid Rees           (*array)[i].Ok()) {
765*99e0aae7SDavid Rees         stream->Write("\n");
766*99e0aae7SDavid Rees         stream->Write(element_options.current_indent());
767*99e0aae7SDavid Rees         stream->Write("[");
768*99e0aae7SDavid Rees         // TODO(bolms): Put padding in here so that array elements start at the
769*99e0aae7SDavid Rees         // same column.
770*99e0aae7SDavid Rees         //
771*99e0aae7SDavid Rees         // TODO(bolms): (Maybe) figure out how to get padding to work so that
772*99e0aae7SDavid Rees         // elements with comments can have their comments align to the same
773*99e0aae7SDavid Rees         // column.
774*99e0aae7SDavid Rees         WriteIntegerToTextStream(i, stream, options.numeric_base(),
775*99e0aae7SDavid Rees                                  options.digit_grouping());
776*99e0aae7SDavid Rees         stream->Write("]: ");
777*99e0aae7SDavid Rees         (*array)[i].WriteToTextStream(stream, element_options);
778*99e0aae7SDavid Rees       } else if (element_options.comments()) {
779*99e0aae7SDavid Rees         stream->Write("\n");
780*99e0aae7SDavid Rees         stream->Write(element_options.current_indent());
781*99e0aae7SDavid Rees         stream->Write("# [");
782*99e0aae7SDavid Rees         WriteIntegerToTextStream(i, stream, options.numeric_base(),
783*99e0aae7SDavid Rees                                  options.digit_grouping());
784*99e0aae7SDavid Rees         stream->Write("]: UNREADABLE");
785*99e0aae7SDavid Rees       }
786*99e0aae7SDavid Rees     }
787*99e0aae7SDavid Rees     stream->Write("\n");
788*99e0aae7SDavid Rees     stream->Write(options.current_indent());
789*99e0aae7SDavid Rees     stream->Write("}");
790*99e0aae7SDavid Rees   } else {
791*99e0aae7SDavid Rees     stream->Write("{");
792*99e0aae7SDavid Rees     bool skipped_unreadable = false;
793*99e0aae7SDavid Rees     for (::std::size_t i = 0; i < array->ElementCount(); ++i) {
794*99e0aae7SDavid Rees       if (!options.allow_partial_output() || (*array)[i].IsAggregate() ||
795*99e0aae7SDavid Rees           (*array)[i].Ok()) {
796*99e0aae7SDavid Rees         stream->Write(" ");
797*99e0aae7SDavid Rees         if (i % 8 == 0 || skipped_unreadable) {
798*99e0aae7SDavid Rees           stream->Write("[");
799*99e0aae7SDavid Rees           WriteIntegerToTextStream(i, stream, options.numeric_base(),
800*99e0aae7SDavid Rees                                    options.digit_grouping());
801*99e0aae7SDavid Rees           stream->Write("]: ");
802*99e0aae7SDavid Rees         }
803*99e0aae7SDavid Rees         (*array)[i].WriteToTextStream(stream, element_options);
804*99e0aae7SDavid Rees         if (i < array->ElementCount() - 1) {
805*99e0aae7SDavid Rees           stream->Write(",");
806*99e0aae7SDavid Rees         }
807*99e0aae7SDavid Rees         skipped_unreadable = false;
808*99e0aae7SDavid Rees       } else {
809*99e0aae7SDavid Rees         if (element_options.comments()) {
810*99e0aae7SDavid Rees           stream->Write(" # ");
811*99e0aae7SDavid Rees           if (i % 8 == 0) {
812*99e0aae7SDavid Rees             stream->Write("[");
813*99e0aae7SDavid Rees             WriteIntegerToTextStream(i, stream, options.numeric_base(),
814*99e0aae7SDavid Rees                                      options.digit_grouping());
815*99e0aae7SDavid Rees             stream->Write("]: ");
816*99e0aae7SDavid Rees           }
817*99e0aae7SDavid Rees           stream->Write("UNREADABLE\n");
818*99e0aae7SDavid Rees         }
819*99e0aae7SDavid Rees         skipped_unreadable = true;
820*99e0aae7SDavid Rees       }
821*99e0aae7SDavid Rees     }
822*99e0aae7SDavid Rees     stream->Write(" }");
823*99e0aae7SDavid Rees   }
824*99e0aae7SDavid Rees }
825*99e0aae7SDavid Rees 
826*99e0aae7SDavid Rees // TextStream puts a stream-like interface onto a std::string, for use by
827*99e0aae7SDavid Rees // UpdateFromTextStream.  It is used by UpdateFromText().
828*99e0aae7SDavid Rees class TextStream final {
829*99e0aae7SDavid Rees  public:
830*99e0aae7SDavid Rees   // This template handles std::string, std::string_view, and absl::string_view.
831*99e0aae7SDavid Rees   template <class String>
832*99e0aae7SDavid Rees   inline explicit TextStream(const String &text)
833*99e0aae7SDavid Rees       : text_(text.data()), length_(text.size()) {}
834*99e0aae7SDavid Rees 
835*99e0aae7SDavid Rees   inline explicit TextStream(const char *text)
836*99e0aae7SDavid Rees       : text_(text), length_(strlen(text)) {}
837*99e0aae7SDavid Rees 
838*99e0aae7SDavid Rees   inline TextStream(const char *text, ::std::size_t length)
839*99e0aae7SDavid Rees       : text_(text), length_(length) {}
840*99e0aae7SDavid Rees 
841*99e0aae7SDavid Rees   inline bool Read(char *result) {
842*99e0aae7SDavid Rees     if (index_ >= length_) return false;
843*99e0aae7SDavid Rees     *result = text_[index_];
844*99e0aae7SDavid Rees     ++index_;
845*99e0aae7SDavid Rees     return true;
846*99e0aae7SDavid Rees   }
847*99e0aae7SDavid Rees 
848*99e0aae7SDavid Rees   inline bool Unread(char c) {
849*99e0aae7SDavid Rees     if (index_ < 1) return false;
850*99e0aae7SDavid Rees     if (text_[index_ - 1] != c) return false;
851*99e0aae7SDavid Rees     --index_;
852*99e0aae7SDavid Rees     return true;
853*99e0aae7SDavid Rees   }
854*99e0aae7SDavid Rees 
855*99e0aae7SDavid Rees  private:
856*99e0aae7SDavid Rees   // It would be nice to use string_view here, but that's not available until
857*99e0aae7SDavid Rees   // C++17.
858*99e0aae7SDavid Rees   const char *text_ = nullptr;
859*99e0aae7SDavid Rees   ::std::size_t length_ = 0;
860*99e0aae7SDavid Rees   ::std::size_t index_ = 0;
861*99e0aae7SDavid Rees };
862*99e0aae7SDavid Rees 
863*99e0aae7SDavid Rees }  // namespace support
864*99e0aae7SDavid Rees 
865*99e0aae7SDavid Rees // Returns a TextOutputOptions set for reasonable multi-line text output.
866*99e0aae7SDavid Rees static inline TextOutputOptions MultilineText() {
867*99e0aae7SDavid Rees   return TextOutputOptions()
868*99e0aae7SDavid Rees       .Multiline(true)
869*99e0aae7SDavid Rees       .WithIndent("  ")
870*99e0aae7SDavid Rees       .WithComments(true)
871*99e0aae7SDavid Rees       .WithDigitGrouping(true);
872*99e0aae7SDavid Rees }
873*99e0aae7SDavid Rees 
874*99e0aae7SDavid Rees // TODO(bolms): Add corresponding ReadFromText*() verbs which enforce the
875*99e0aae7SDavid Rees // constraint that all of a field's dependencies must be present in the text
876*99e0aae7SDavid Rees // before the field itself is set.
877*99e0aae7SDavid Rees template <typename EmbossViewType>
878*99e0aae7SDavid Rees inline bool UpdateFromText(const EmbossViewType &view,
879*99e0aae7SDavid Rees                            const ::std::string &text) {
880*99e0aae7SDavid Rees   auto text_stream = support::TextStream{text};
881*99e0aae7SDavid Rees   return view.UpdateFromTextStream(&text_stream);
882*99e0aae7SDavid Rees }
883*99e0aae7SDavid Rees 
884*99e0aae7SDavid Rees template <typename EmbossViewType>
885*99e0aae7SDavid Rees inline ::std::string WriteToString(const EmbossViewType &view,
886*99e0aae7SDavid Rees                                    TextOutputOptions options) {
887*99e0aae7SDavid Rees   support::TextOutputStream text_stream;
888*99e0aae7SDavid Rees   view.WriteToTextStream(&text_stream, options);
889*99e0aae7SDavid Rees   return text_stream.Result();
890*99e0aae7SDavid Rees }
891*99e0aae7SDavid Rees 
892*99e0aae7SDavid Rees template <typename EmbossViewType>
893*99e0aae7SDavid Rees inline ::std::string WriteToString(const EmbossViewType &view) {
894*99e0aae7SDavid Rees   return WriteToString(view, TextOutputOptions());
895*99e0aae7SDavid Rees }
896*99e0aae7SDavid Rees 
897*99e0aae7SDavid Rees }  // namespace emboss
898*99e0aae7SDavid Rees 
899*99e0aae7SDavid Rees #endif  // EMBOSS_RUNTIME_CPP_EMBOSS_TEXT_UTIL_H_
900