1 // Copyright 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 // This header contains functionality related to Emboss text output.
16 #ifndef EMBOSS_RUNTIME_CPP_EMBOSS_TEXT_UTIL_H_
17 #define EMBOSS_RUNTIME_CPP_EMBOSS_TEXT_UTIL_H_
18
19 #include <array>
20 #include <climits>
21 #include <cmath>
22 #include <cstdint>
23 #include <cstdio>
24 #include <cstring>
25 #include <limits>
26 #include <sstream>
27 #include <string>
28 #include <vector>
29
30 #include "runtime/cpp/emboss_defines.h"
31
32 namespace emboss {
33
34 // TextOutputOptions are used to configure text output. Typically, one can just
35 // use a default TextOutputOptions() (for compact output) or MultilineText()
36 // (for reasonable formatted output).
37 class TextOutputOptions final {
38 public:
39 TextOutputOptions() = default;
40
PlusOneIndent()41 TextOutputOptions PlusOneIndent() const {
42 TextOutputOptions result = *this;
43 result.current_indent_ += indent();
44 return result;
45 }
46
Multiline(bool new_value)47 TextOutputOptions Multiline(bool new_value) const {
48 TextOutputOptions result = *this;
49 result.multiline_ = new_value;
50 return result;
51 }
52
WithIndent(::std::string new_value)53 TextOutputOptions WithIndent(::std::string new_value) const {
54 TextOutputOptions result = *this;
55 result.indent_ = ::std::move(new_value);
56 return result;
57 }
58
WithComments(bool new_value)59 TextOutputOptions WithComments(bool new_value) const {
60 TextOutputOptions result = *this;
61 result.comments_ = new_value;
62 return result;
63 }
64
WithDigitGrouping(bool new_value)65 TextOutputOptions WithDigitGrouping(bool new_value) const {
66 TextOutputOptions result = *this;
67 result.digit_grouping_ = new_value;
68 return result;
69 }
70
WithNumericBase(uint8_t new_value)71 TextOutputOptions WithNumericBase(uint8_t new_value) const {
72 TextOutputOptions result = *this;
73 result.numeric_base_ = new_value;
74 return result;
75 }
76
WithAllowPartialOutput(bool new_value)77 TextOutputOptions WithAllowPartialOutput(bool new_value) const {
78 TextOutputOptions result = *this;
79 result.allow_partial_output_ = new_value;
80 return result;
81 }
82
current_indent()83 ::std::string current_indent() const { return current_indent_; }
indent()84 ::std::string indent() const { return indent_; }
multiline()85 bool multiline() const { return multiline_; }
digit_grouping()86 bool digit_grouping() const { return digit_grouping_; }
comments()87 bool comments() const { return comments_; }
numeric_base()88 ::std::uint8_t numeric_base() const { return numeric_base_; }
allow_partial_output()89 bool allow_partial_output() const { return allow_partial_output_; }
90
91 private:
92 ::std::string indent_;
93 ::std::string current_indent_;
94 bool comments_ = false;
95 bool multiline_ = false;
96 bool digit_grouping_ = false;
97 bool allow_partial_output_ = false;
98 ::std::uint8_t numeric_base_ = 10;
99 };
100
101 namespace support {
102
103 // TextOutputStream puts a stream-like interface onto a std::string, for use by
104 // DumpToTextStream. It is used by UpdateFromText().
105 class TextOutputStream final {
106 public:
107 inline explicit TextOutputStream() = default;
108
Write(const::std::string & text)109 inline void Write(const ::std::string &text) {
110 text_.write(text.data(), text.size());
111 }
112
Write(const char * text)113 inline void Write(const char *text) { text_.write(text, strlen(text)); }
114
Write(const char c)115 inline void Write(const char c) { text_.put(c); }
116
Result()117 inline ::std::string Result() { return text_.str(); }
118
119 private:
120 ::std::ostringstream text_;
121 };
122
123 // DecodeInteger decodes an integer from a string. This is very similar to the
124 // many, many existing integer decode routines in the world, except that a) it
125 // accepts integers in any Emboss format, and b) it can run in environments that
126 // do not support std::istream or Google's number conversion routines.
127 //
128 // Ideally, this would be replaced by someone else's code.
129 template <class IntType>
DecodeInteger(const::std::string & text,IntType * result)130 bool DecodeInteger(const ::std::string &text, IntType *result) {
131 IntType accumulator = 0;
132 IntType base = 10;
133 bool negative = false;
134 unsigned offset = 0;
135 if (::std::is_signed<IntType>::value && text.size() >= 1 + offset &&
136 text[offset] == '-') {
137 negative = true;
138 offset += 1;
139 }
140 if (text.size() >= 2 + offset && text[offset] == '0') {
141 if (text[offset + 1] == 'x' || text[offset + 1] == 'X') {
142 base = 16;
143 offset += 2;
144 } else if (text[offset + 1] == 'b' || text[offset + 1] == 'B') {
145 base = 2;
146 offset += 2;
147 }
148 }
149 // "", "0x", "0b", "-", "-0x", and "-0b" are not valid numbers.
150 if (offset == text.size()) return false;
151 for (; offset < text.size(); ++offset) {
152 char c = text[offset];
153 IntType digit = 0;
154 if (c == '_') {
155 if (offset == 0) {
156 return false;
157 }
158 continue;
159 } else if (c >= '0' && c <= '9') {
160 digit = c - '0';
161 } else if (c >= 'A' && c <= 'F') {
162 digit = c - 'A' + 10;
163 } else if (c >= 'a' && c <= 'f') {
164 digit = c - 'a' + 10;
165 } else {
166 return false;
167 }
168 if (digit >= base) {
169 return false;
170 }
171 if (negative) {
172 if (accumulator <
173 (::std::numeric_limits<IntType>::min() + digit) / base) {
174 return false;
175 }
176 accumulator = accumulator * base - digit;
177 } else {
178 if (accumulator >
179 (::std::numeric_limits<IntType>::max() - digit) / base) {
180 return false;
181 }
182 accumulator = accumulator * base + digit;
183 }
184 }
185 *result = accumulator;
186 return true;
187 }
188
189 template <class Stream>
DiscardWhitespace(Stream * stream)190 bool DiscardWhitespace(Stream *stream) {
191 char c;
192 bool in_comment = false;
193 do {
194 if (!stream->Read(&c)) return true;
195 if (c == '#') in_comment = true;
196 if (c == '\r' || c == '\n') in_comment = false;
197 } while (in_comment || c == ' ' || c == '\t' || c == '\n' || c == '\r');
198 return stream->Unread(c);
199 }
200
201 template <class Stream>
ReadToken(Stream * stream,::std::string * token)202 bool ReadToken(Stream *stream, ::std::string *token) {
203 ::std::vector<char> result;
204 char c;
205 if (!DiscardWhitespace(stream)) return false;
206 if (!stream->Read(&c)) {
207 *token = "";
208 return true;
209 }
210
211 const char *const punctuation = ":{}[],";
212 if (strchr(punctuation, c) != nullptr) {
213 *token = ::std::string(1, c);
214 return true;
215 } else {
216 // TODO(bolms): Only allow alphanumeric characters here?
217 do {
218 result.push_back(c);
219 if (!stream->Read(&c)) {
220 *token = ::std::string(&result[0], result.size());
221 return true;
222 }
223 } while (c != ' ' && c != '\t' && c != '\n' && c != '\r' && c != '#' &&
224 strchr(punctuation, c) == nullptr);
225 if (!stream->Unread(c)) return false;
226 *token = ::std::string(&result[0], result.size());
227 return true;
228 }
229 }
230
231 template <class Stream, class View>
ReadIntegerFromTextStream(View * view,Stream * stream)232 bool ReadIntegerFromTextStream(View *view, Stream *stream) {
233 ::std::string token;
234 if (!::emboss::support::ReadToken(stream, &token)) return false;
235 if (token.empty()) return false;
236 typename View::ValueType value;
237 if (!::emboss::support::DecodeInteger(token, &value)) return false;
238 return view->TryToWrite(value);
239 }
240
241 // WriteIntegerToTextStream encodes the given value in base 2, 10, or 16, with
242 // or without digit group separators ('_'), and then calls stream->Write() with
243 // a char * argument that is a C-style null-terminated string of the encoded
244 // number.
245 //
246 // As with DecodeInteger, above, it would be nice to be able to replace this
247 // with someone else's code, but I (bolms@) was unable to find anything in
248 // standard C++ that would encode numbers in binary, nothing that would add
249 // digit separators to hex numbers, and nothing that would use '_' for digit
250 // separators.
251 template <class Stream, typename IntegralType>
WriteIntegerToTextStream(IntegralType value,Stream * stream,::std::uint8_t base,bool digit_grouping)252 void WriteIntegerToTextStream(IntegralType value, Stream *stream,
253 ::std::uint8_t base, bool digit_grouping) {
254 static_assert(::std::numeric_limits<
255 typename ::std::remove_cv<IntegralType>::type>::is_integer,
256 "WriteIntegerToTextStream only supports integer types.");
257 static_assert(
258 !::std::is_same<bool,
259 typename ::std::remove_cv<IntegralType>::type>::value,
260 "WriteIntegerToTextStream only supports integer types.");
261 EMBOSS_CHECK(base == 10 || base == 2 || base == 16);
262 const char *const digits = "0123456789abcdef";
263 const int grouping = base == 10 ? 3 : base == 16 ? 4 : 8;
264 // The maximum size 32-bit number is -2**31, which is:
265 //
266 // -0b10000000_00000000_00000000_00000000 (38 chars)
267 // -2_147_483_648 (14 chars)
268 // -0x8000_0000 (12 chars)
269 //
270 // Likewise, the maximum size 8-bit number is -128, which is:
271 // -0b10000000 (11 chars)
272 // -128 (4 chars)
273 // -0x80 (5 chars)
274 //
275 // Binary with separators is always the longest value: 9 chars per 8 bits,
276 // minus 1 char for the '_' that does not appear at the front of the number,
277 // plus 2 chars for "0b", plus 1 char for '-', plus 1 extra char for the
278 // trailing '\0', which is (sizeof value) * CHAR_BIT * 9 / 8 - 1 + 2 + 1 + 1.
279 const int buffer_size = (sizeof value) * CHAR_BIT * 9 / 8 + 3;
280 char buffer[buffer_size];
281 buffer[buffer_size - 1] = '\0';
282 int next_char = buffer_size - 2;
283 if (value == 0) {
284 EMBOSS_DCHECK_GE(next_char, 0);
285 buffer[next_char] = digits[0];
286 --next_char;
287 }
288 int sign = value < 0 ? -1 : 1;
289 int digit_count = 0;
290 auto buffer_char = [&](char c) {
291 EMBOSS_DCHECK_GE(next_char, 0);
292 buffer[next_char] = c;
293 --next_char;
294 };
295 if (value < 0) {
296 if (value == ::std::numeric_limits<decltype(value)>::lowest()) {
297 // The minimum negative two's-complement value has no corresponding
298 // positive value, so 'value = -value' is not useful in that case.
299 // Instead, we do some trickery to buffer the lowest-order digit here.
300 auto digit = -(value + 1) % base + 1;
301 value = -(value + 1) / base;
302 if (digit == base) {
303 digit = 0;
304 ++value;
305 }
306 buffer_char(digits[digit]);
307 ++digit_count;
308 } else {
309 value = -value;
310 }
311 }
312 while (value > 0) {
313 if (digit_count && digit_count % grouping == 0 && digit_grouping) {
314 buffer_char('_');
315 }
316 buffer_char(digits[value % base]);
317 value /= base;
318 ++digit_count;
319 }
320 if (base == 16) {
321 buffer_char('x');
322 buffer_char('0');
323 } else if (base == 2) {
324 buffer_char('b');
325 buffer_char('0');
326 }
327 if (sign < 0) {
328 buffer_char('-');
329 }
330
331 stream->Write(buffer + 1 + next_char);
332 }
333
334 // Writes an integer value in the base given in options, plus an optional
335 // comment with the same value in a second base. This is used for the common
336 // output format of IntView, UIntView, and BcdView.
337 template <class Stream, class View>
WriteIntegerViewToTextStream(View * view,Stream * stream,const TextOutputOptions & options)338 void WriteIntegerViewToTextStream(View *view, Stream *stream,
339 const TextOutputOptions &options) {
340 WriteIntegerToTextStream(view->Read(), stream, options.numeric_base(),
341 options.digit_grouping());
342 if (options.comments()) {
343 stream->Write(" # ");
344 WriteIntegerToTextStream(view->Read(), stream,
345 options.numeric_base() == 10 ? 16 : 10,
346 options.digit_grouping());
347 }
348 }
349
350 template <class Stream, class View>
ReadBooleanFromTextStream(View * view,Stream * stream)351 bool ReadBooleanFromTextStream(View *view, Stream *stream) {
352 ::std::string token;
353 if (!::emboss::support::ReadToken(stream, &token)) return false;
354 if (token == "true") {
355 return view->TryToWrite(true);
356 } else if (token == "false") {
357 return view->TryToWrite(false);
358 }
359 // TODO(bolms): Provide a way to get an error message on parse failure.
360 return false;
361 }
362
363 // The TextOutputOptions parameter is present so that it can be passed in by
364 // generated code that uses the same form for WriteBooleanViewToTextStream,
365 // WriteIntegerViewToTextStream, and WriteEnumViewToTextStream.
366 template <class Stream, class View>
WriteBooleanViewToTextStream(View * view,Stream * stream,const TextOutputOptions &)367 void WriteBooleanViewToTextStream(View *view, Stream *stream,
368 const TextOutputOptions &) {
369 if (view->Read()) {
370 stream->Write("true");
371 } else {
372 stream->Write("false");
373 }
374 }
375
376 // FloatConstants holds various masks for working with IEEE754-compatible
377 // floating-point values at a bit level. These are mostly used here to
378 // implement text format for NaNs, preserving the NaN payload so that the text
379 // format can (in theory) provide a bit-exact round-trip through the text
380 // format.
381 template <class Float>
382 struct FloatConstants;
383
384 template <>
385 struct FloatConstants<float> {
386 static_assert(sizeof(float) == 4, "Emboss requires 32-bit float.");
387 using MatchingIntegerType = ::std::uint32_t;
388 static constexpr MatchingIntegerType kMantissaMask() { return 0x7fffffU; }
389 static constexpr MatchingIntegerType kExponentMask() { return 0x7f800000U; }
390 static constexpr MatchingIntegerType kSignMask() { return 0x80000000U; }
391 static constexpr int kPrintfPrecision() { return 9; }
392 static constexpr const char *kScanfFormat() { return "%f%n"; }
393 };
394
395 template <>
396 struct FloatConstants<double> {
397 static_assert(sizeof(double) == 8, "Emboss requires 64-bit double.");
398 using MatchingIntegerType = ::std::uint64_t;
399 static constexpr MatchingIntegerType kMantissaMask() {
400 return 0xfffffffffffffUL;
401 }
402 static constexpr MatchingIntegerType kExponentMask() {
403 return 0x7ff0000000000000UL;
404 }
405 static constexpr MatchingIntegerType kSignMask() {
406 return 0x8000000000000000UL;
407 }
408 static constexpr int kPrintfPrecision() { return 17; }
409 static constexpr const char *kScanfFormat() { return "%lf%n"; }
410 };
411
412 // Decodes a floating-point number from text.
413 template <class Float>
414 bool DecodeFloat(const ::std::string &token, Float *result) {
415 // The state of the world for reading floating-point values is somewhat better
416 // than the situation for writing them, but there are still a few bits that
417 // are underspecified. This function is the mirror of WriteFloatToTextStream,
418 // below, so it specifically decodes infinities and NaNs in the formats that
419 // Emboss uses.
420 //
421 // Because of the use of scanf here, this function accepts hex floating-point
422 // values (0xh.hhhhpeee) *on some systems*. TODO(bolms): make hex float
423 // support universal.
424
425 using UInt = typename FloatConstants<Float>::MatchingIntegerType;
426
427 if (token.empty()) return false;
428
429 // First, check for negative.
430 bool negative = token[0] == '-';
431
432 // Second, check for NaN.
433 ::std::size_t i = token[0] == '-' || token[0] == '+' ? 1 : 0;
434 if (token.size() >= i + 3 && (token[i] == 'N' || token[i] == 'n') &&
435 (token[i + 1] == 'A' || token[i + 1] == 'a') &&
436 (token[i + 2] == 'N' || token[i + 2] == 'n')) {
437 UInt nan_payload;
438 if (token.size() >= i + 4) {
439 if (token[i + 3] == '(' && token[token.size() - 1] == ')') {
440 if (!DecodeInteger(token.substr(i + 4, token.size() - i - 5),
441 &nan_payload)) {
442 return false;
443 }
444 } else {
445 // NaN may not be followed by trailing characters other than a
446 // ()-enclosed payload.
447 return false;
448 }
449 } else {
450 // If no specific NaN was given, take a default NaN from the C++ standard
451 // library. Technically, a conformant C++ implementation might not have
452 // quiet_NaN(), but any IEEE754-based implementation should.
453 //
454 // It is tempting to just write the default NaN directly into the view and
455 // return success, but "-NaN" should be have its sign bit set, and there
456 // is no direct way to set the sign bit of a NaN, so there are fewer code
457 // paths if we extract the default NaN payload, then use it in the
458 // reconstruction step, below.
459 Float default_nan = ::std::numeric_limits<Float>::quiet_NaN();
460 UInt bits;
461 ::std::memcpy(&bits, &default_nan, sizeof(bits));
462 nan_payload = bits & FloatConstants<Float>::kMantissaMask();
463 }
464 if (nan_payload == 0) {
465 // "NaN" with a payload of zero is actually the bit pattern for infinity;
466 // "NaN(0)" should not be an alias for "Inf".
467 return false;
468 }
469 if (nan_payload & (FloatConstants<Float>::kExponentMask() |
470 FloatConstants<Float>::kSignMask())) {
471 // The payload must be small enough to fit in the payload space; it must
472 // not overflow into the exponent or sign bits.
473 //
474 // Note that the DecodeInteger call which decoded the payload will return
475 // false if the payload would overflow the `UInt` type, so cases like
476 // "NaN(0x10000000000000000000000000000)" -- which are so big that they no
477 // longer interfere with the sign or exponent -- are caught above.
478 return false;
479 }
480 UInt bits = FloatConstants<Float>::kExponentMask();
481 bits |= nan_payload;
482 if (negative) {
483 bits |= FloatConstants<Float>::kSignMask();
484 }
485 ::std::memcpy(result, &bits, sizeof(bits));
486 return true;
487 }
488
489 // If the value is not NaN, check for infinity.
490 if (token.size() >= i + 3 && (token[i] == 'I' || token[i] == 'i') &&
491 (token[i + 1] == 'N' || token[i + 1] == 'n') &&
492 (token[i + 2] == 'F' || token[i + 2] == 'f')) {
493 if (token.size() > i + 3) {
494 // Infinity must be exactly "Inf" or "-Inf" (case insensitive). There
495 // must not be trailing characters.
496 return false;
497 }
498 // As with quiet_NaN(), a conforming C++ implementation might not have
499 // infinity(), but an IEEE 754-based implementation should.
500 if (negative) {
501 *result = -::std::numeric_limits<Float>::infinity();
502 return true;
503 } else {
504 *result = ::std::numeric_limits<Float>::infinity();
505 return true;
506 }
507 }
508
509 // For non-NaN, non-Inf values, use the C scanf function, mirroring the use of
510 // printf for writing the value, below.
511 int chars_used = -1;
512 if (::std::sscanf(token.c_str(), FloatConstants<Float>::kScanfFormat(),
513 result, &chars_used) < 1) {
514 return false;
515 }
516 if (chars_used < 0 ||
517 static_cast</**/ ::std::size_t>(chars_used) < token.size()) {
518 return false;
519 }
520 return true;
521 }
522
523 // Decodes a floating-point number from a text stream and writes it to the
524 // specified view.
525 template <class Stream, class View>
526 bool ReadFloatFromTextStream(View *view, Stream *stream) {
527 ::std::string token;
528 if (!ReadToken(stream, &token)) return false;
529 typename View::ValueType value;
530 if (!DecodeFloat(token, &value)) return false;
531 return view->TryToWrite(value);
532 }
533
534 template <class Stream, class Float>
535 void WriteFloatToTextStream(Float n, Stream *stream,
536 const TextOutputOptions &options) {
537 static_assert(::std::is_same<Float, float>::value ||
538 ::std::is_same<Float, double>::value,
539 "WriteFloatToTextStream can only write float or double.");
540 // The state of the world w.r.t. rendering floating-points as decimal text is,
541 // ca. 2018, less than ideal.
542 //
543 // In C++ land, there is actually no stable facility in the standard library
544 // until to_chars() in C++17 -- which is not actually implemented yet in
545 // libc++. to_string(), the printf() family, and the iostreams system all
546 // respect the current locale. In most programs, the locale is permanently
547 // left on "C", but this is not guaranteed. to_string() also uses a fixed and
548 // rather unfortunate format.
549 //
550 // For integers, I (bolms@) chose to just implement custom read and write
551 // routines, but those routines are quite small and straightforward compared
552 // to floating point conversion. Even writing correct output is difficult,
553 // and writing correct and minimal output is the subject of a number of
554 // academic papers.
555 //
556 // For the moment, I'm just using snprintf("%.*g", 17, n), which is guaranteed
557 // to be read back as the same number, but can be longer than strictly
558 // necessary.
559 //
560 // TODO(bolms): Import a modified version of the double-to-string conversion
561 // from Swift's standard library, which appears to be best implementation
562 // currently available.
563
564 if (::std::isnan(n)) {
565 // The printf format for NaN is just "NaN". In the interests of keeping
566 // things bit-exact, Emboss prints the exact NaN.
567 typename FloatConstants<Float>::MatchingIntegerType bits;
568 ::std::memcpy(&bits, &n, sizeof(bits));
569 ::std::uint64_t nan_payload = bits & FloatConstants<Float>::kMantissaMask();
570 ::std::uint64_t nan_sign = bits & FloatConstants<Float>::kSignMask();
571 if (nan_sign) {
572 // NaN still has a sign bit, which is generally treated differently from
573 // the payload. There is no real "standard" text format for NaNs, but
574 // "-NaN" appears to be a common way of indicating a NaN with the sign bit
575 // set.
576 stream->Write("-NaN(");
577 } else {
578 stream->Write("NaN(");
579 }
580 // NaN payloads are always dumped in hex. Note that Emboss is treating the
581 // is_quiet/is_signal bit as just another bit in the payload.
582 WriteIntegerToTextStream(nan_payload, stream, 16, options.digit_grouping());
583 stream->Write(")");
584 return;
585 }
586
587 if (::std::isinf(n)) {
588 if (n < 0.0) {
589 stream->Write("-Inf");
590 } else {
591 stream->Write("Inf");
592 }
593 return;
594 }
595
596 // TODO(bolms): Should the current numeric base be honored here? Should there
597 // be a separate Float numeric base?
598 ::std::array<char, 30> buffer;
599 // TODO(bolms): Figure out how to get ::std::snprintf to work on
600 // microcontroller builds.
601 ::std::size_t snprintf_result = static_cast</**/ ::std::size_t>(::snprintf(
602 &(buffer[0]), buffer.size(), "%.*g",
603 FloatConstants<Float>::kPrintfPrecision(), static_cast<double>(n)));
604 (void)snprintf_result; // Unused if EMBOSS_CHECK_LE is compiled out.
605 EMBOSS_CHECK_LE(snprintf_result, buffer.size());
606 stream->Write(&buffer[0]);
607
608 // TODO(bolms): Support digit grouping.
609 }
610
611 template <class Stream, class View>
612 bool ReadEnumViewFromTextStream(View *view, Stream *stream) {
613 ::std::string token;
614 if (!ReadToken(stream, &token)) return false;
615 if (token.empty()) return false;
616 if (::std::isdigit(token[0])) {
617 ::std::uint64_t value;
618 if (!DecodeInteger(token, &value)) return false;
619 // TODO(bolms): Fix the static_cast<ValueType> for signed ValueType.
620 // TODO(bolms): Should values between 2**63 and 2**64-1 actually be
621 // allowed in the text format when ValueType is signed?
622 return view->TryToWrite(static_cast<typename View::ValueType>(value));
623 } else if (token[0] == '-') {
624 ::std::int64_t value;
625 if (!DecodeInteger(token, &value)) return false;
626 return view->TryToWrite(static_cast<typename View::ValueType>(value));
627 } else {
628 typename View::ValueType value;
629 if (!TryToGetEnumFromName(token.c_str(), &value)) return false;
630 return view->TryToWrite(value);
631 }
632 }
633
634 template <class Stream, class View>
635 void WriteEnumViewToTextStream(View *view, Stream *stream,
636 const TextOutputOptions &options) {
637 const char *name = TryToGetNameFromEnum(view->Read());
638 if (name != nullptr) {
639 stream->Write(name);
640 }
641 // If the enum value has no known name, then write its numeric value
642 // instead. If it does have a known name, and comments are enabled on the
643 // output, then write the numeric value as a comment.
644 if (name == nullptr || options.comments()) {
645 if (name != nullptr) stream->Write(" # ");
646 WriteIntegerToTextStream(
647 static_cast<
648 typename ::std::underlying_type<typename View::ValueType>::type>(
649 view->Read()),
650 stream, options.numeric_base(), options.digit_grouping());
651 }
652 }
653
654 // Updates an array from a text stream. For an array of integers, the most
655 // basic form of the text format looks like:
656 //
657 // { 0, 1, 2 }
658 //
659 // However, the following are all acceptable and equivalent:
660 //
661 // { 0, 1, 2, }
662 // {0 1 2}
663 // { [2]: 2, [1]: 1, [0]: 0 }
664 // {[2]:2, [0]:0, 1}
665 //
666 // Formally, the array must be contained within braces ("{}"). Elements are
667 // represented as an optional index surrounded by brackets ("[]") followed by
668 // the text format of the element, followed by a single optional comma (",").
669 // If no index is present for the first element, the index 0 will be used. If
670 // no index is present for any elements after the first, the index one greater
671 // than the previous index will be used.
672 template <class Array, class Stream>
673 bool ReadArrayFromTextStream(Array *array, Stream *stream) {
674 // The text format allows any given index to be set more than once. In
675 // theory, this function could track indices and fail if an index were
676 // double-set, but doing so would require quite a bit of overhead, and
677 // O(array->ElementCount()) extra space in the worst case. It does not seem
678 // worth it to impose the runtime cost here.
679 ::std::size_t index = 0;
680 ::std::string brace;
681 // Read out the opening brace.
682 if (!ReadToken(stream, &brace)) return false;
683 if (brace != "{") return false;
684 for (;;) {
685 char c;
686 // Check for a closing brace; if present, success.
687 if (!DiscardWhitespace(stream)) return false;
688 if (!stream->Read(&c)) return false;
689 if (c == '}') return true;
690
691 // If the element has an index, read it.
692 if (c == '[') {
693 ::std::string index_text;
694 if (!ReadToken(stream, &index_text)) return false;
695 if (!::emboss::support::DecodeInteger(index_text, &index)) return false;
696 ::std::string closing_bracket;
697 if (!ReadToken(stream, &closing_bracket)) return false;
698 if (closing_bracket != "]") return false;
699 ::std::string colon;
700 if (!ReadToken(stream, &colon)) return false;
701 if (colon != ":") return false;
702 } else {
703 if (!stream->Unread(c)) return false;
704 }
705
706 // Read the element.
707 if (index >= array->ElementCount()) return false;
708 if (!(*array)[index].UpdateFromTextStream(stream)) return false;
709 ++index;
710
711 // If there is a trailing comma, discard it.
712 if (!DiscardWhitespace(stream)) return false;
713 if (!stream->Read(&c)) return false;
714 if (c != ',') {
715 if (c != '}') return false;
716 if (!stream->Unread(c)) return false;
717 }
718 }
719 }
720
721 // Prints out the elements of an 8-bit Int or UInt array as characters.
722 template <class Array, class Stream>
723 void WriteShorthandAsciiArrayCommentToTextStream(
724 const Array *array, Stream *stream, const TextOutputOptions &options) {
725 if (!options.multiline()) return;
726 if (!options.comments()) return;
727 if (array->ElementCount() == 0) return;
728 static constexpr int kCharsPerBlock = 64;
729 static constexpr char kStandInForNonPrintableChar = '.';
730 auto start_new_line = [&]() {
731 stream->Write("\n");
732 stream->Write(options.current_indent());
733 stream->Write("# ");
734 };
735 for (int i = 0, n = array->ElementCount(); i < n; ++i) {
736 const int c = (*array)[i].Read();
737 const bool c_is_printable = (c >= 32 && c <= 126);
738 const bool starting_new_block = ((i % kCharsPerBlock) == 0);
739 if (starting_new_block) start_new_line();
740 stream->Write(c_is_printable ? static_cast<char>(c)
741 : kStandInForNonPrintableChar);
742 }
743 }
744
745 // Writes an array to a text stream. This writes the array in a format
746 // compatible with ReadArrayFromTextStream, above. For multiline output, writes
747 // one element per line.
748 //
749 // TODO(bolms): Make the output for arrays of small elements (like bytes) much
750 // more compact.
751 //
752 // This will require several support functions like `MaxTextLength` on every
753 // view type, and will substantially increase the number of tests required for
754 // this function, but will make arrays of small elements much more readable.
755 template <class Array, class Stream>
756 void WriteArrayToTextStream(Array *array, Stream *stream,
757 const TextOutputOptions &options) {
758 TextOutputOptions element_options = options.PlusOneIndent();
759 if (options.multiline()) {
760 stream->Write("{");
761 WriteShorthandArrayCommentToTextStream(array, stream, element_options);
762 for (::std::size_t i = 0; i < array->ElementCount(); ++i) {
763 if (!options.allow_partial_output() || (*array)[i].IsAggregate() ||
764 (*array)[i].Ok()) {
765 stream->Write("\n");
766 stream->Write(element_options.current_indent());
767 stream->Write("[");
768 // TODO(bolms): Put padding in here so that array elements start at the
769 // same column.
770 //
771 // TODO(bolms): (Maybe) figure out how to get padding to work so that
772 // elements with comments can have their comments align to the same
773 // column.
774 WriteIntegerToTextStream(i, stream, options.numeric_base(),
775 options.digit_grouping());
776 stream->Write("]: ");
777 (*array)[i].WriteToTextStream(stream, element_options);
778 } else if (element_options.comments()) {
779 stream->Write("\n");
780 stream->Write(element_options.current_indent());
781 stream->Write("# [");
782 WriteIntegerToTextStream(i, stream, options.numeric_base(),
783 options.digit_grouping());
784 stream->Write("]: UNREADABLE");
785 }
786 }
787 stream->Write("\n");
788 stream->Write(options.current_indent());
789 stream->Write("}");
790 } else {
791 stream->Write("{");
792 bool skipped_unreadable = false;
793 for (::std::size_t i = 0; i < array->ElementCount(); ++i) {
794 if (!options.allow_partial_output() || (*array)[i].IsAggregate() ||
795 (*array)[i].Ok()) {
796 stream->Write(" ");
797 if (i % 8 == 0 || skipped_unreadable) {
798 stream->Write("[");
799 WriteIntegerToTextStream(i, stream, options.numeric_base(),
800 options.digit_grouping());
801 stream->Write("]: ");
802 }
803 (*array)[i].WriteToTextStream(stream, element_options);
804 if (i < array->ElementCount() - 1) {
805 stream->Write(",");
806 }
807 skipped_unreadable = false;
808 } else {
809 if (element_options.comments()) {
810 stream->Write(" # ");
811 if (i % 8 == 0) {
812 stream->Write("[");
813 WriteIntegerToTextStream(i, stream, options.numeric_base(),
814 options.digit_grouping());
815 stream->Write("]: ");
816 }
817 stream->Write("UNREADABLE\n");
818 }
819 skipped_unreadable = true;
820 }
821 }
822 stream->Write(" }");
823 }
824 }
825
826 // TextStream puts a stream-like interface onto a std::string, for use by
827 // UpdateFromTextStream. It is used by UpdateFromText().
828 class TextStream final {
829 public:
830 // This template handles std::string, std::string_view, and absl::string_view.
831 template <class String>
832 inline explicit TextStream(const String &text)
833 : text_(text.data()), length_(text.size()) {}
834
835 inline explicit TextStream(const char *text)
836 : text_(text), length_(strlen(text)) {}
837
838 inline TextStream(const char *text, ::std::size_t length)
839 : text_(text), length_(length) {}
840
841 inline bool Read(char *result) {
842 if (index_ >= length_) return false;
843 *result = text_[index_];
844 ++index_;
845 return true;
846 }
847
848 inline bool Unread(char c) {
849 if (index_ < 1) return false;
850 if (text_[index_ - 1] != c) return false;
851 --index_;
852 return true;
853 }
854
855 private:
856 // It would be nice to use string_view here, but that's not available until
857 // C++17.
858 const char *text_ = nullptr;
859 ::std::size_t length_ = 0;
860 ::std::size_t index_ = 0;
861 };
862
863 } // namespace support
864
865 // Returns a TextOutputOptions set for reasonable multi-line text output.
866 static inline TextOutputOptions MultilineText() {
867 return TextOutputOptions()
868 .Multiline(true)
869 .WithIndent(" ")
870 .WithComments(true)
871 .WithDigitGrouping(true);
872 }
873
874 // TODO(bolms): Add corresponding ReadFromText*() verbs which enforce the
875 // constraint that all of a field's dependencies must be present in the text
876 // before the field itself is set.
877 template <typename EmbossViewType>
878 inline bool UpdateFromText(const EmbossViewType &view,
879 const ::std::string &text) {
880 auto text_stream = support::TextStream{text};
881 return view.UpdateFromTextStream(&text_stream);
882 }
883
884 template <typename EmbossViewType>
885 inline ::std::string WriteToString(const EmbossViewType &view,
886 TextOutputOptions options) {
887 support::TextOutputStream text_stream;
888 view.WriteToTextStream(&text_stream, options);
889 return text_stream.Result();
890 }
891
892 template <typename EmbossViewType>
893 inline ::std::string WriteToString(const EmbossViewType &view) {
894 return WriteToString(view, TextOutputOptions());
895 }
896
897 } // namespace emboss
898
899 #endif // EMBOSS_RUNTIME_CPP_EMBOSS_TEXT_UTIL_H_
900