runtime/cpp/emboss_text_util.h

*99e0aae7SDavid Rees// Copyright 2019 Google LLC
*99e0aae7SDavid Rees//
*99e0aae7SDavid Rees// Licensed under the Apache License, Version 2.0 (the "License");
*99e0aae7SDavid Rees// you may not use this file except in compliance with the License.
*99e0aae7SDavid Rees// You may obtain a copy of the License at
*99e0aae7SDavid Rees//
*99e0aae7SDavid Rees//     https://www.apache.org/licenses/LICENSE-2.0
*99e0aae7SDavid Rees//
*99e0aae7SDavid Rees// Unless required by applicable law or agreed to in writing, software
*99e0aae7SDavid Rees// distributed under the License is distributed on an "AS IS" BASIS,
*99e0aae7SDavid Rees// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*99e0aae7SDavid Rees// See the License for the specific language governing permissions and
*99e0aae7SDavid Rees// limitations under the License.
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees// This header contains functionality related to Emboss text output.
*99e0aae7SDavid Rees#ifndef EMBOSS_RUNTIME_CPP_EMBOSS_TEXT_UTIL_H_
*99e0aae7SDavid Rees#define EMBOSS_RUNTIME_CPP_EMBOSS_TEXT_UTIL_H_
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees#include <array>
*99e0aae7SDavid Rees#include <climits>
*99e0aae7SDavid Rees#include <cmath>
*99e0aae7SDavid Rees#include <cstdint>
*99e0aae7SDavid Rees#include <cstdio>
*99e0aae7SDavid Rees#include <cstring>
*99e0aae7SDavid Rees#include <limits>
*99e0aae7SDavid Rees#include <sstream>
*99e0aae7SDavid Rees#include <string>
*99e0aae7SDavid Rees#include <vector>
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees#include "runtime/cpp/emboss_defines.h"
*99e0aae7SDavid Rees
*99e0aae7SDavid Reesnamespace emboss {
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees// TextOutputOptions are used to configure text output.  Typically, one can just
*99e0aae7SDavid Rees// use a default TextOutputOptions() (for compact output) or MultilineText()
*99e0aae7SDavid Rees// (for reasonable formatted output).
*99e0aae7SDavid Reesclass TextOutputOptions final {
*99e0aae7SDavid Rees public:
*99e0aae7SDavid Rees  TextOutputOptions() = default;
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  TextOutputOptions PlusOneIndent() const {
*99e0aae7SDavid Rees    TextOutputOptions result = *this;
*99e0aae7SDavid Rees    result.current_indent_ += indent();
*99e0aae7SDavid Rees    return result;
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  TextOutputOptions Multiline(bool new_value) const {
*99e0aae7SDavid Rees    TextOutputOptions result = *this;
*99e0aae7SDavid Rees    result.multiline_ = new_value;
*99e0aae7SDavid Rees    return result;
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  TextOutputOptions WithIndent(::std::string new_value) const {
*99e0aae7SDavid Rees    TextOutputOptions result = *this;
*99e0aae7SDavid Rees    result.indent_ = ::std::move(new_value);
*99e0aae7SDavid Rees    return result;
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  TextOutputOptions WithComments(bool new_value) const {
*99e0aae7SDavid Rees    TextOutputOptions result = *this;
*99e0aae7SDavid Rees    result.comments_ = new_value;
*99e0aae7SDavid Rees    return result;
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  TextOutputOptions WithDigitGrouping(bool new_value) const {
*99e0aae7SDavid Rees    TextOutputOptions result = *this;
*99e0aae7SDavid Rees    result.digit_grouping_ = new_value;
*99e0aae7SDavid Rees    return result;
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  TextOutputOptions WithNumericBase(uint8_t new_value) const {
*99e0aae7SDavid Rees    TextOutputOptions result = *this;
*99e0aae7SDavid Rees    result.numeric_base_ = new_value;
*99e0aae7SDavid Rees    return result;
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  TextOutputOptions WithAllowPartialOutput(bool new_value) const {
*99e0aae7SDavid Rees    TextOutputOptions result = *this;
*99e0aae7SDavid Rees    result.allow_partial_output_ = new_value;
*99e0aae7SDavid Rees    return result;
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  ::std::string current_indent() const { return current_indent_; }
*99e0aae7SDavid Rees  ::std::string indent() const { return indent_; }
*99e0aae7SDavid Rees  bool multiline() const { return multiline_; }
*99e0aae7SDavid Rees  bool digit_grouping() const { return digit_grouping_; }
*99e0aae7SDavid Rees  bool comments() const { return comments_; }
*99e0aae7SDavid Rees  ::std::uint8_t numeric_base() const { return numeric_base_; }
*99e0aae7SDavid Rees  bool allow_partial_output() const { return allow_partial_output_; }
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees private:
*99e0aae7SDavid Rees  ::std::string indent_;
*99e0aae7SDavid Rees  ::std::string current_indent_;
*99e0aae7SDavid Rees  bool comments_ = false;
*99e0aae7SDavid Rees  bool multiline_ = false;
*99e0aae7SDavid Rees  bool digit_grouping_ = false;
*99e0aae7SDavid Rees  bool allow_partial_output_ = false;
*99e0aae7SDavid Rees  ::std::uint8_t numeric_base_ = 10;
*99e0aae7SDavid Rees};
*99e0aae7SDavid Rees
*99e0aae7SDavid Reesnamespace support {
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees// TextOutputStream puts a stream-like interface onto a std::string, for use by
*99e0aae7SDavid Rees// DumpToTextStream.  It is used by UpdateFromText().
*99e0aae7SDavid Reesclass TextOutputStream final {
*99e0aae7SDavid Rees public:
*99e0aae7SDavid Rees  inline explicit TextOutputStream() = default;
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  inline void Write(const ::std::string &text) {
*99e0aae7SDavid Rees    text_.write(text.data(), text.size());
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  inline void Write(const char *text) { text_.write(text, strlen(text)); }
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  inline void Write(const char c) { text_.put(c); }
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  inline ::std::string Result() { return text_.str(); }
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees private:
*99e0aae7SDavid Rees  ::std::ostringstream text_;
*99e0aae7SDavid Rees};
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees// DecodeInteger decodes an integer from a string.  This is very similar to the
*99e0aae7SDavid Rees// many, many existing integer decode routines in the world, except that a) it
*99e0aae7SDavid Rees// accepts integers in any Emboss format, and b) it can run in environments that
*99e0aae7SDavid Rees// do not support std::istream or Google's number conversion routines.
*99e0aae7SDavid Rees//
*99e0aae7SDavid Rees// Ideally, this would be replaced by someone else's code.
*99e0aae7SDavid Reestemplate <class IntType>
*99e0aae7SDavid Reesbool DecodeInteger(const ::std::string &text, IntType *result) {
*99e0aae7SDavid Rees  IntType accumulator = 0;
*99e0aae7SDavid Rees  IntType base = 10;
*99e0aae7SDavid Rees  bool negative = false;
*99e0aae7SDavid Rees  unsigned offset = 0;
*99e0aae7SDavid Rees  if (::std::is_signed<IntType>::value && text.size() >= 1 + offset &&
*99e0aae7SDavid Rees      text[offset] == '-') {
*99e0aae7SDavid Rees    negative = true;
*99e0aae7SDavid Rees    offset += 1;
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees  if (text.size() >= 2 + offset && text[offset] == '0') {
*99e0aae7SDavid Rees    if (text[offset + 1] == 'x' || text[offset + 1] == 'X') {
*99e0aae7SDavid Rees      base = 16;
*99e0aae7SDavid Rees      offset += 2;
*99e0aae7SDavid Rees    } else if (text[offset + 1] == 'b' || text[offset + 1] == 'B') {
*99e0aae7SDavid Rees      base = 2;
*99e0aae7SDavid Rees      offset += 2;
*99e0aae7SDavid Rees    }
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees  // "", "0x", "0b", "-", "-0x", and "-0b" are not valid numbers.
*99e0aae7SDavid Rees  if (offset == text.size()) return false;
*99e0aae7SDavid Rees  for (; offset < text.size(); ++offset) {
*99e0aae7SDavid Rees    char c = text[offset];
*99e0aae7SDavid Rees    IntType digit = 0;
*99e0aae7SDavid Rees    if (c == '_') {
*99e0aae7SDavid Rees      if (offset == 0) {
*99e0aae7SDavid Rees        return false;
*99e0aae7SDavid Rees      }
*99e0aae7SDavid Rees      continue;
*99e0aae7SDavid Rees    } else if (c >= '0' && c <= '9') {
*99e0aae7SDavid Rees      digit = c - '0';
*99e0aae7SDavid Rees    } else if (c >= 'A' && c <= 'F') {
*99e0aae7SDavid Rees      digit = c - 'A' + 10;
*99e0aae7SDavid Rees    } else if (c >= 'a' && c <= 'f') {
*99e0aae7SDavid Rees      digit = c - 'a' + 10;
*99e0aae7SDavid Rees    } else {
*99e0aae7SDavid Rees      return false;
*99e0aae7SDavid Rees    }
*99e0aae7SDavid Rees    if (digit >= base) {
*99e0aae7SDavid Rees      return false;
*99e0aae7SDavid Rees    }
*99e0aae7SDavid Rees    if (negative) {
*99e0aae7SDavid Rees      if (accumulator <
*99e0aae7SDavid Rees          (::std::numeric_limits<IntType>::min() + digit) / base) {
*99e0aae7SDavid Rees        return false;
*99e0aae7SDavid Rees      }
*99e0aae7SDavid Rees      accumulator = accumulator * base - digit;
*99e0aae7SDavid Rees    } else {
*99e0aae7SDavid Rees      if (accumulator >
*99e0aae7SDavid Rees          (::std::numeric_limits<IntType>::max() - digit) / base) {
*99e0aae7SDavid Rees        return false;
*99e0aae7SDavid Rees      }
*99e0aae7SDavid Rees      accumulator = accumulator * base + digit;
*99e0aae7SDavid Rees    }
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees  *result = accumulator;
*99e0aae7SDavid Rees  return true;
*99e0aae7SDavid Rees}
*99e0aae7SDavid Rees
*99e0aae7SDavid Reestemplate <class Stream>
*99e0aae7SDavid Reesbool DiscardWhitespace(Stream *stream) {
*99e0aae7SDavid Rees  char c;
*99e0aae7SDavid Rees  bool in_comment = false;
*99e0aae7SDavid Rees  do {
*99e0aae7SDavid Rees    if (!stream->Read(&c)) return true;
*99e0aae7SDavid Rees    if (c == '#') in_comment = true;
*99e0aae7SDavid Rees    if (c == '\r' || c == '\n') in_comment = false;
*99e0aae7SDavid Rees  } while (in_comment || c == ' ' || c == '\t' || c == '\n' || c == '\r');
*99e0aae7SDavid Rees  return stream->Unread(c);
*99e0aae7SDavid Rees}
*99e0aae7SDavid Rees
*99e0aae7SDavid Reestemplate <class Stream>
*99e0aae7SDavid Reesbool ReadToken(Stream *stream, ::std::string *token) {
*99e0aae7SDavid Rees  ::std::vector<char> result;
*99e0aae7SDavid Rees  char c;
*99e0aae7SDavid Rees  if (!DiscardWhitespace(stream)) return false;
*99e0aae7SDavid Rees  if (!stream->Read(&c)) {
*99e0aae7SDavid Rees    *token = "";
*99e0aae7SDavid Rees    return true;
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  const char *const punctuation = ":{}[],";
*99e0aae7SDavid Rees  if (strchr(punctuation, c) != nullptr) {
*99e0aae7SDavid Rees    *token = ::std::string(1, c);
*99e0aae7SDavid Rees    return true;
*99e0aae7SDavid Rees  } else {
*99e0aae7SDavid Rees    // TODO(bolms): Only allow alphanumeric characters here?
*99e0aae7SDavid Rees    do {
*99e0aae7SDavid Rees      result.push_back(c);
*99e0aae7SDavid Rees      if (!stream->Read(&c)) {
*99e0aae7SDavid Rees        *token = ::std::string(&result[0], result.size());
*99e0aae7SDavid Rees        return true;
*99e0aae7SDavid Rees      }
*99e0aae7SDavid Rees    } while (c != ' ' && c != '\t' && c != '\n' && c != '\r' && c != '#' &&
*99e0aae7SDavid Rees             strchr(punctuation, c) == nullptr);
*99e0aae7SDavid Rees    if (!stream->Unread(c)) return false;
*99e0aae7SDavid Rees    *token = ::std::string(&result[0], result.size());
*99e0aae7SDavid Rees    return true;
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees}
*99e0aae7SDavid Rees
*99e0aae7SDavid Reestemplate <class Stream, class View>
*99e0aae7SDavid Reesbool ReadIntegerFromTextStream(View *view, Stream *stream) {
*99e0aae7SDavid Rees  ::std::string token;
*99e0aae7SDavid Rees  if (!::emboss::support::ReadToken(stream, &token)) return false;
*99e0aae7SDavid Rees  if (token.empty()) return false;
*99e0aae7SDavid Rees  typename View::ValueType value;
*99e0aae7SDavid Rees  if (!::emboss::support::DecodeInteger(token, &value)) return false;
*99e0aae7SDavid Rees  return view->TryToWrite(value);
*99e0aae7SDavid Rees}
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees// WriteIntegerToTextStream encodes the given value in base 2, 10, or 16, with
*99e0aae7SDavid Rees// or without digit group separators ('_'), and then calls stream->Write() with
*99e0aae7SDavid Rees// a char * argument that is a C-style null-terminated string of the encoded
*99e0aae7SDavid Rees// number.
*99e0aae7SDavid Rees//
*99e0aae7SDavid Rees// As with DecodeInteger, above, it would be nice to be able to replace this
*99e0aae7SDavid Rees// with someone else's code, but I (bolms@) was unable to find anything in
*99e0aae7SDavid Rees// standard C++ that would encode numbers in binary, nothing that would add
*99e0aae7SDavid Rees// digit separators to hex numbers, and nothing that would use '_' for digit
*99e0aae7SDavid Rees// separators.
*99e0aae7SDavid Reestemplate <class Stream, typename IntegralType>
*99e0aae7SDavid Reesvoid WriteIntegerToTextStream(IntegralType value, Stream *stream,
*99e0aae7SDavid Rees                              ::std::uint8_t base, bool digit_grouping) {
*99e0aae7SDavid Rees  static_assert(::std::numeric_limits<
*99e0aae7SDavid Rees                    typename ::std::remove_cv<IntegralType>::type>::is_integer,
*99e0aae7SDavid Rees                "WriteIntegerToTextStream only supports integer types.");
*99e0aae7SDavid Rees  static_assert(
*99e0aae7SDavid Rees      !::std::is_same<bool,
*99e0aae7SDavid Rees                      typename ::std::remove_cv<IntegralType>::type>::value,
*99e0aae7SDavid Rees      "WriteIntegerToTextStream only supports integer types.");
*99e0aae7SDavid Rees  EMBOSS_CHECK(base == 10 || base == 2 || base == 16);
*99e0aae7SDavid Rees  const char *const digits = "0123456789abcdef";
*99e0aae7SDavid Rees  const int grouping = base == 10 ? 3 : base == 16 ? 4 : 8;
*99e0aae7SDavid Rees  // The maximum size 32-bit number is -2**31, which is:
*99e0aae7SDavid Rees  //
*99e0aae7SDavid Rees  // -0b10000000_00000000_00000000_00000000  (38 chars)
*99e0aae7SDavid Rees  // -2_147_483_648  (14 chars)
*99e0aae7SDavid Rees  // -0x8000_0000  (12 chars)
*99e0aae7SDavid Rees  //
*99e0aae7SDavid Rees  // Likewise, the maximum size 8-bit number is -128, which is:
*99e0aae7SDavid Rees  // -0b10000000  (11 chars)
*99e0aae7SDavid Rees  // -128  (4 chars)
*99e0aae7SDavid Rees  // -0x80  (5 chars)
*99e0aae7SDavid Rees  //
*99e0aae7SDavid Rees  // Binary with separators is always the longest value: 9 chars per 8 bits,
*99e0aae7SDavid Rees  // minus 1 char for the '_' that does not appear at the front of the number,
*99e0aae7SDavid Rees  // plus 2 chars for "0b", plus 1 char for '-', plus 1 extra char for the
*99e0aae7SDavid Rees  // trailing '\0', which is (sizeof value) * CHAR_BIT * 9 / 8 - 1 + 2 + 1 + 1.
*99e0aae7SDavid Rees  const int buffer_size = (sizeof value) * CHAR_BIT * 9 / 8 + 3;
*99e0aae7SDavid Rees  char buffer[buffer_size];
*99e0aae7SDavid Rees  buffer[buffer_size - 1] = '\0';
*99e0aae7SDavid Rees  int next_char = buffer_size - 2;
*99e0aae7SDavid Rees  if (value == 0) {
*99e0aae7SDavid Rees    EMBOSS_DCHECK_GE(next_char, 0);
*99e0aae7SDavid Rees    buffer[next_char] = digits[0];
*99e0aae7SDavid Rees    --next_char;
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees  int sign = value < 0 ? -1 : 1;
*99e0aae7SDavid Rees  int digit_count = 0;
*99e0aae7SDavid Rees  auto buffer_char = [&](char c) {
*99e0aae7SDavid Rees    EMBOSS_DCHECK_GE(next_char, 0);
*99e0aae7SDavid Rees    buffer[next_char] = c;
*99e0aae7SDavid Rees    --next_char;
*99e0aae7SDavid Rees  };
*99e0aae7SDavid Rees  if (value < 0) {
*99e0aae7SDavid Rees    if (value == ::std::numeric_limits<decltype(value)>::lowest()) {
*99e0aae7SDavid Rees      // The minimum negative two's-complement value has no corresponding
*99e0aae7SDavid Rees      // positive value, so 'value = -value' is not useful in that case.
*99e0aae7SDavid Rees      // Instead, we do some trickery to buffer the lowest-order digit here.
*99e0aae7SDavid Rees      auto digit = -(value + 1) % base + 1;
*99e0aae7SDavid Rees      value = -(value + 1) / base;
*99e0aae7SDavid Rees      if (digit == base) {
*99e0aae7SDavid Rees        digit = 0;
*99e0aae7SDavid Rees        ++value;
*99e0aae7SDavid Rees      }
*99e0aae7SDavid Rees      buffer_char(digits[digit]);
*99e0aae7SDavid Rees      ++digit_count;
*99e0aae7SDavid Rees    } else {
*99e0aae7SDavid Rees      value = -value;
*99e0aae7SDavid Rees    }
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees  while (value > 0) {
*99e0aae7SDavid Rees    if (digit_count && digit_count % grouping == 0 && digit_grouping) {
*99e0aae7SDavid Rees      buffer_char('_');
*99e0aae7SDavid Rees    }
*99e0aae7SDavid Rees    buffer_char(digits[value % base]);
*99e0aae7SDavid Rees    value /= base;
*99e0aae7SDavid Rees    ++digit_count;
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees  if (base == 16) {
*99e0aae7SDavid Rees    buffer_char('x');
*99e0aae7SDavid Rees    buffer_char('0');
*99e0aae7SDavid Rees  } else if (base == 2) {
*99e0aae7SDavid Rees    buffer_char('b');
*99e0aae7SDavid Rees    buffer_char('0');
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees  if (sign < 0) {
*99e0aae7SDavid Rees    buffer_char('-');
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  stream->Write(buffer + 1 + next_char);
*99e0aae7SDavid Rees}
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees// Writes an integer value in the base given in options, plus an optional
*99e0aae7SDavid Rees// comment with the same value in a second base.  This is used for the common
*99e0aae7SDavid Rees// output format of IntView, UIntView, and BcdView.
*99e0aae7SDavid Reestemplate <class Stream, class View>
*99e0aae7SDavid Reesvoid WriteIntegerViewToTextStream(View *view, Stream *stream,
*99e0aae7SDavid Rees                                  const TextOutputOptions &options) {
*99e0aae7SDavid Rees  WriteIntegerToTextStream(view->Read(), stream, options.numeric_base(),
*99e0aae7SDavid Rees                           options.digit_grouping());
*99e0aae7SDavid Rees  if (options.comments()) {
*99e0aae7SDavid Rees    stream->Write("  # ");
*99e0aae7SDavid Rees    WriteIntegerToTextStream(view->Read(), stream,
*99e0aae7SDavid Rees                             options.numeric_base() == 10 ? 16 : 10,
*99e0aae7SDavid Rees                             options.digit_grouping());
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees}
*99e0aae7SDavid Rees
*99e0aae7SDavid Reestemplate <class Stream, class View>
*99e0aae7SDavid Reesbool ReadBooleanFromTextStream(View *view, Stream *stream) {
*99e0aae7SDavid Rees  ::std::string token;
*99e0aae7SDavid Rees  if (!::emboss::support::ReadToken(stream, &token)) return false;
*99e0aae7SDavid Rees  if (token == "true") {
*99e0aae7SDavid Rees    return view->TryToWrite(true);
*99e0aae7SDavid Rees  } else if (token == "false") {
*99e0aae7SDavid Rees    return view->TryToWrite(false);
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees  // TODO(bolms): Provide a way to get an error message on parse failure.
*99e0aae7SDavid Rees  return false;
*99e0aae7SDavid Rees}
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees// The TextOutputOptions parameter is present so that it can be passed in by
*99e0aae7SDavid Rees// generated code that uses the same form for WriteBooleanViewToTextStream,
*99e0aae7SDavid Rees// WriteIntegerViewToTextStream, and WriteEnumViewToTextStream.
*99e0aae7SDavid Reestemplate <class Stream, class View>
*99e0aae7SDavid Reesvoid WriteBooleanViewToTextStream(View *view, Stream *stream,
*99e0aae7SDavid Rees                                  const TextOutputOptions &) {
*99e0aae7SDavid Rees  if (view->Read()) {
*99e0aae7SDavid Rees    stream->Write("true");
*99e0aae7SDavid Rees  } else {
*99e0aae7SDavid Rees    stream->Write("false");
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees}
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees// FloatConstants holds various masks for working with IEEE754-compatible
*99e0aae7SDavid Rees// floating-point values at a bit level.  These are mostly used here to
*99e0aae7SDavid Rees// implement text format for NaNs, preserving the NaN payload so that the text
*99e0aae7SDavid Rees// format can (in theory) provide a bit-exact round-trip through the text
*99e0aae7SDavid Rees// format.
*99e0aae7SDavid Reestemplate <class Float>
*99e0aae7SDavid Reesstruct FloatConstants;
*99e0aae7SDavid Rees
*99e0aae7SDavid Reestemplate <>
*99e0aae7SDavid Reesstruct FloatConstants<float> {
*99e0aae7SDavid Rees  static_assert(sizeof(float) == 4, "Emboss requires 32-bit float.");
*99e0aae7SDavid Rees  using MatchingIntegerType = ::std::uint32_t;
*99e0aae7SDavid Rees  static constexpr MatchingIntegerType kMantissaMask() { return 0x7fffffU; }
*99e0aae7SDavid Rees  static constexpr MatchingIntegerType kExponentMask() { return 0x7f800000U; }
*99e0aae7SDavid Rees  static constexpr MatchingIntegerType kSignMask() { return 0x80000000U; }
*99e0aae7SDavid Rees  static constexpr int kPrintfPrecision() { return 9; }
*99e0aae7SDavid Rees  static constexpr const char *kScanfFormat() { return "%f%n"; }
*99e0aae7SDavid Rees};
*99e0aae7SDavid Rees
*99e0aae7SDavid Reestemplate <>
*99e0aae7SDavid Reesstruct FloatConstants<double> {
*99e0aae7SDavid Rees  static_assert(sizeof(double) == 8, "Emboss requires 64-bit double.");
*99e0aae7SDavid Rees  using MatchingIntegerType = ::std::uint64_t;
*99e0aae7SDavid Rees  static constexpr MatchingIntegerType kMantissaMask() {
*99e0aae7SDavid Rees    return 0xfffffffffffffUL;
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees  static constexpr MatchingIntegerType kExponentMask() {
*99e0aae7SDavid Rees    return 0x7ff0000000000000UL;
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees  static constexpr MatchingIntegerType kSignMask() {
*99e0aae7SDavid Rees    return 0x8000000000000000UL;
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees  static constexpr int kPrintfPrecision() { return 17; }
*99e0aae7SDavid Rees  static constexpr const char *kScanfFormat() { return "%lf%n"; }
*99e0aae7SDavid Rees};
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees// Decodes a floating-point number from text.
*99e0aae7SDavid Reestemplate <class Float>
*99e0aae7SDavid Reesbool DecodeFloat(const ::std::string &token, Float *result) {
*99e0aae7SDavid Rees  // The state of the world for reading floating-point values is somewhat better
*99e0aae7SDavid Rees  // than the situation for writing them, but there are still a few bits that
*99e0aae7SDavid Rees  // are underspecified.  This function is the mirror of WriteFloatToTextStream,
*99e0aae7SDavid Rees  // below, so it specifically decodes infinities and NaNs in the formats that
*99e0aae7SDavid Rees  // Emboss uses.
*99e0aae7SDavid Rees  //
*99e0aae7SDavid Rees  // Because of the use of scanf here, this function accepts hex floating-point
*99e0aae7SDavid Rees  // values (0xh.hhhhpeee) *on some systems*.  TODO(bolms): make hex float
*99e0aae7SDavid Rees  // support universal.
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  using UInt = typename FloatConstants<Float>::MatchingIntegerType;
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  if (token.empty()) return false;
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  // First, check for negative.
*99e0aae7SDavid Rees  bool negative = token[0] == '-';
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  // Second, check for NaN.
*99e0aae7SDavid Rees  ::std::size_t i = token[0] == '-' || token[0] == '+' ? 1 : 0;
*99e0aae7SDavid Rees  if (token.size() >= i + 3 && (token[i] == 'N' || token[i] == 'n') &&
*99e0aae7SDavid Rees      (token[i + 1] == 'A' || token[i + 1] == 'a') &&
*99e0aae7SDavid Rees      (token[i + 2] == 'N' || token[i + 2] == 'n')) {
*99e0aae7SDavid Rees    UInt nan_payload;
*99e0aae7SDavid Rees    if (token.size() >= i + 4) {
*99e0aae7SDavid Rees      if (token[i + 3] == '(' && token[token.size() - 1] == ')') {
*99e0aae7SDavid Rees        if (!DecodeInteger(token.substr(i + 4, token.size() - i - 5),
*99e0aae7SDavid Rees                           &nan_payload)) {
*99e0aae7SDavid Rees          return false;
*99e0aae7SDavid Rees        }
*99e0aae7SDavid Rees      } else {
*99e0aae7SDavid Rees        // NaN may not be followed by trailing characters other than a
*99e0aae7SDavid Rees        // ()-enclosed payload.
*99e0aae7SDavid Rees        return false;
*99e0aae7SDavid Rees      }
*99e0aae7SDavid Rees    } else {
*99e0aae7SDavid Rees      // If no specific NaN was given, take a default NaN from the C++ standard
*99e0aae7SDavid Rees      // library.  Technically, a conformant C++ implementation might not have
*99e0aae7SDavid Rees      // quiet_NaN(), but any IEEE754-based implementation should.
*99e0aae7SDavid Rees      //
*99e0aae7SDavid Rees      // It is tempting to just write the default NaN directly into the view and
*99e0aae7SDavid Rees      // return success, but "-NaN" should be have its sign bit set, and there
*99e0aae7SDavid Rees      // is no direct way to set the sign bit of a NaN, so there are fewer code
*99e0aae7SDavid Rees      // paths if we extract the default NaN payload, then use it in the
*99e0aae7SDavid Rees      // reconstruction step, below.
*99e0aae7SDavid Rees      Float default_nan = ::std::numeric_limits<Float>::quiet_NaN();
*99e0aae7SDavid Rees      UInt bits;
*99e0aae7SDavid Rees      ::std::memcpy(&bits, &default_nan, sizeof(bits));
*99e0aae7SDavid Rees      nan_payload = bits & FloatConstants<Float>::kMantissaMask();
*99e0aae7SDavid Rees    }
*99e0aae7SDavid Rees    if (nan_payload == 0) {
*99e0aae7SDavid Rees      // "NaN" with a payload of zero is actually the bit pattern for infinity;
*99e0aae7SDavid Rees      // "NaN(0)" should not be an alias for "Inf".
*99e0aae7SDavid Rees      return false;
*99e0aae7SDavid Rees    }
*99e0aae7SDavid Rees    if (nan_payload & (FloatConstants<Float>::kExponentMask() |
*99e0aae7SDavid Rees                       FloatConstants<Float>::kSignMask())) {
*99e0aae7SDavid Rees      // The payload must be small enough to fit in the payload space; it must
*99e0aae7SDavid Rees      // not overflow into the exponent or sign bits.
*99e0aae7SDavid Rees      //
*99e0aae7SDavid Rees      // Note that the DecodeInteger call which decoded the payload will return
*99e0aae7SDavid Rees      // false if the payload would overflow the `UInt` type, so cases like
*99e0aae7SDavid Rees      // "NaN(0x10000000000000000000000000000)" -- which are so big that they no
*99e0aae7SDavid Rees      // longer interfere with the sign or exponent -- are caught above.
*99e0aae7SDavid Rees      return false;
*99e0aae7SDavid Rees    }
*99e0aae7SDavid Rees    UInt bits = FloatConstants<Float>::kExponentMask();
*99e0aae7SDavid Rees    bits |= nan_payload;
*99e0aae7SDavid Rees    if (negative) {
*99e0aae7SDavid Rees      bits |= FloatConstants<Float>::kSignMask();
*99e0aae7SDavid Rees    }
*99e0aae7SDavid Rees    ::std::memcpy(result, &bits, sizeof(bits));
*99e0aae7SDavid Rees    return true;
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  // If the value is not NaN, check for infinity.
*99e0aae7SDavid Rees  if (token.size() >= i + 3 && (token[i] == 'I' || token[i] == 'i') &&
*99e0aae7SDavid Rees      (token[i + 1] == 'N' || token[i + 1] == 'n') &&
*99e0aae7SDavid Rees      (token[i + 2] == 'F' || token[i + 2] == 'f')) {
*99e0aae7SDavid Rees    if (token.size() > i + 3) {
*99e0aae7SDavid Rees      // Infinity must be exactly "Inf" or "-Inf" (case insensitive).  There
*99e0aae7SDavid Rees      // must not be trailing characters.
*99e0aae7SDavid Rees      return false;
*99e0aae7SDavid Rees    }
*99e0aae7SDavid Rees    // As with quiet_NaN(), a conforming C++ implementation might not have
*99e0aae7SDavid Rees    // infinity(), but an IEEE 754-based implementation should.
*99e0aae7SDavid Rees    if (negative) {
*99e0aae7SDavid Rees      *result = -::std::numeric_limits<Float>::infinity();
*99e0aae7SDavid Rees      return true;
*99e0aae7SDavid Rees    } else {
*99e0aae7SDavid Rees      *result = ::std::numeric_limits<Float>::infinity();
*99e0aae7SDavid Rees      return true;
*99e0aae7SDavid Rees    }
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  // For non-NaN, non-Inf values, use the C scanf function, mirroring the use of
*99e0aae7SDavid Rees  // printf for writing the value, below.
*99e0aae7SDavid Rees  int chars_used = -1;
*99e0aae7SDavid Rees  if (::std::sscanf(token.c_str(), FloatConstants<Float>::kScanfFormat(),
*99e0aae7SDavid Rees                    result, &chars_used) < 1) {
*99e0aae7SDavid Rees    return false;
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees  if (chars_used < 0 ||
*99e0aae7SDavid Rees      static_cast</**/ ::std::size_t>(chars_used) < token.size()) {
*99e0aae7SDavid Rees    return false;
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees  return true;
*99e0aae7SDavid Rees}
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees// Decodes a floating-point number from a text stream and writes it to the
*99e0aae7SDavid Rees// specified view.
*99e0aae7SDavid Reestemplate <class Stream, class View>
*99e0aae7SDavid Reesbool ReadFloatFromTextStream(View *view, Stream *stream) {
*99e0aae7SDavid Rees  ::std::string token;
*99e0aae7SDavid Rees  if (!ReadToken(stream, &token)) return false;
*99e0aae7SDavid Rees  typename View::ValueType value;
*99e0aae7SDavid Rees  if (!DecodeFloat(token, &value)) return false;
*99e0aae7SDavid Rees  return view->TryToWrite(value);
*99e0aae7SDavid Rees}
*99e0aae7SDavid Rees
*99e0aae7SDavid Reestemplate <class Stream, class Float>
*99e0aae7SDavid Reesvoid WriteFloatToTextStream(Float n, Stream *stream,
*99e0aae7SDavid Rees                            const TextOutputOptions &options) {
*99e0aae7SDavid Rees  static_assert(::std::is_same<Float, float>::value ||
*99e0aae7SDavid Rees                    ::std::is_same<Float, double>::value,
*99e0aae7SDavid Rees                "WriteFloatToTextStream can only write float or double.");
*99e0aae7SDavid Rees  // The state of the world w.r.t. rendering floating-points as decimal text is,
*99e0aae7SDavid Rees  // ca. 2018, less than ideal.
*99e0aae7SDavid Rees  //
*99e0aae7SDavid Rees  // In C++ land, there is actually no stable facility in the standard library
*99e0aae7SDavid Rees  // until to_chars() in C++17 -- which is not actually implemented yet in
*99e0aae7SDavid Rees  // libc++.  to_string(), the printf() family, and the iostreams system all
*99e0aae7SDavid Rees  // respect the current locale.  In most programs, the locale is permanently
*99e0aae7SDavid Rees  // left on "C", but this is not guaranteed.  to_string() also uses a fixed and
*99e0aae7SDavid Rees  // rather unfortunate format.
*99e0aae7SDavid Rees  //
*99e0aae7SDavid Rees  // For integers, I (bolms@) chose to just implement custom read and write
*99e0aae7SDavid Rees  // routines, but those routines are quite small and straightforward compared
*99e0aae7SDavid Rees  // to floating point conversion.  Even writing correct output is difficult,
*99e0aae7SDavid Rees  // and writing correct and minimal output is the subject of a number of
*99e0aae7SDavid Rees  // academic papers.
*99e0aae7SDavid Rees  //
*99e0aae7SDavid Rees  // For the moment, I'm just using snprintf("%.*g", 17, n), which is guaranteed
*99e0aae7SDavid Rees  // to be read back as the same number, but can be longer than strictly
*99e0aae7SDavid Rees  // necessary.
*99e0aae7SDavid Rees  //
*99e0aae7SDavid Rees  // TODO(bolms): Import a modified version of the double-to-string conversion
*99e0aae7SDavid Rees  // from Swift's standard library, which appears to be best implementation
*99e0aae7SDavid Rees  // currently available.
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  if (::std::isnan(n)) {
*99e0aae7SDavid Rees    // The printf format for NaN is just "NaN".  In the interests of keeping
*99e0aae7SDavid Rees    // things bit-exact, Emboss prints the exact NaN.
*99e0aae7SDavid Rees    typename FloatConstants<Float>::MatchingIntegerType bits;
*99e0aae7SDavid Rees    ::std::memcpy(&bits, &n, sizeof(bits));
*99e0aae7SDavid Rees    ::std::uint64_t nan_payload = bits & FloatConstants<Float>::kMantissaMask();
*99e0aae7SDavid Rees    ::std::uint64_t nan_sign = bits & FloatConstants<Float>::kSignMask();
*99e0aae7SDavid Rees    if (nan_sign) {
*99e0aae7SDavid Rees      // NaN still has a sign bit, which is generally treated differently from
*99e0aae7SDavid Rees      // the payload.  There is no real "standard" text format for NaNs, but
*99e0aae7SDavid Rees      // "-NaN" appears to be a common way of indicating a NaN with the sign bit
*99e0aae7SDavid Rees      // set.
*99e0aae7SDavid Rees      stream->Write("-NaN(");
*99e0aae7SDavid Rees    } else {
*99e0aae7SDavid Rees      stream->Write("NaN(");
*99e0aae7SDavid Rees    }
*99e0aae7SDavid Rees    // NaN payloads are always dumped in hex.  Note that Emboss is treating the
*99e0aae7SDavid Rees    // is_quiet/is_signal bit as just another bit in the payload.
*99e0aae7SDavid Rees    WriteIntegerToTextStream(nan_payload, stream, 16, options.digit_grouping());
*99e0aae7SDavid Rees    stream->Write(")");
*99e0aae7SDavid Rees    return;
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  if (::std::isinf(n)) {
*99e0aae7SDavid Rees    if (n < 0.0) {
*99e0aae7SDavid Rees      stream->Write("-Inf");
*99e0aae7SDavid Rees    } else {
*99e0aae7SDavid Rees      stream->Write("Inf");
*99e0aae7SDavid Rees    }
*99e0aae7SDavid Rees    return;
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  // TODO(bolms): Should the current numeric base be honored here?  Should there
*99e0aae7SDavid Rees  // be a separate Float numeric base?
*99e0aae7SDavid Rees  ::std::array<char, 30> buffer;
*99e0aae7SDavid Rees  // TODO(bolms): Figure out how to get ::std::snprintf to work on
*99e0aae7SDavid Rees  // microcontroller builds.
*99e0aae7SDavid Rees  ::std::size_t snprintf_result = static_cast</**/ ::std::size_t>(::snprintf(
*99e0aae7SDavid Rees      &(buffer[0]), buffer.size(), "%.*g",
*99e0aae7SDavid Rees      FloatConstants<Float>::kPrintfPrecision(), static_cast<double>(n)));
*99e0aae7SDavid Rees  (void)snprintf_result;  // Unused if EMBOSS_CHECK_LE is compiled out.
*99e0aae7SDavid Rees  EMBOSS_CHECK_LE(snprintf_result, buffer.size());
*99e0aae7SDavid Rees  stream->Write(&buffer[0]);
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  // TODO(bolms): Support digit grouping.
*99e0aae7SDavid Rees}
*99e0aae7SDavid Rees
*99e0aae7SDavid Reestemplate <class Stream, class View>
*99e0aae7SDavid Reesbool ReadEnumViewFromTextStream(View *view, Stream *stream) {
*99e0aae7SDavid Rees  ::std::string token;
*99e0aae7SDavid Rees  if (!ReadToken(stream, &token)) return false;
*99e0aae7SDavid Rees  if (token.empty()) return false;
*99e0aae7SDavid Rees  if (::std::isdigit(token[0])) {
*99e0aae7SDavid Rees    ::std::uint64_t value;
*99e0aae7SDavid Rees    if (!DecodeInteger(token, &value)) return false;
*99e0aae7SDavid Rees    // TODO(bolms): Fix the static_cast<ValueType> for signed ValueType.
*99e0aae7SDavid Rees    // TODO(bolms): Should values between 2**63 and 2**64-1 actually be
*99e0aae7SDavid Rees    // allowed in the text format when ValueType is signed?
*99e0aae7SDavid Rees    return view->TryToWrite(static_cast<typename View::ValueType>(value));
*99e0aae7SDavid Rees  } else if (token[0] == '-') {
*99e0aae7SDavid Rees    ::std::int64_t value;
*99e0aae7SDavid Rees    if (!DecodeInteger(token, &value)) return false;
*99e0aae7SDavid Rees    return view->TryToWrite(static_cast<typename View::ValueType>(value));
*99e0aae7SDavid Rees  } else {
*99e0aae7SDavid Rees    typename View::ValueType value;
*99e0aae7SDavid Rees    if (!TryToGetEnumFromName(token.c_str(), &value)) return false;
*99e0aae7SDavid Rees    return view->TryToWrite(value);
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees}
*99e0aae7SDavid Rees
*99e0aae7SDavid Reestemplate <class Stream, class View>
*99e0aae7SDavid Reesvoid WriteEnumViewToTextStream(View *view, Stream *stream,
*99e0aae7SDavid Rees                               const TextOutputOptions &options) {
*99e0aae7SDavid Rees  const char *name = TryToGetNameFromEnum(view->Read());
*99e0aae7SDavid Rees  if (name != nullptr) {
*99e0aae7SDavid Rees    stream->Write(name);
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees  // If the enum value has no known name, then write its numeric value
*99e0aae7SDavid Rees  // instead.  If it does have a known name, and comments are enabled on the
*99e0aae7SDavid Rees  // output, then write the numeric value as a comment.
*99e0aae7SDavid Rees  if (name == nullptr || options.comments()) {
*99e0aae7SDavid Rees    if (name != nullptr) stream->Write("  # ");
*99e0aae7SDavid Rees    WriteIntegerToTextStream(
*99e0aae7SDavid Rees        static_cast<
*99e0aae7SDavid Rees            typename ::std::underlying_type<typename View::ValueType>::type>(
*99e0aae7SDavid Rees            view->Read()),
*99e0aae7SDavid Rees        stream, options.numeric_base(), options.digit_grouping());
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees}
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees// Updates an array from a text stream.  For an array of integers, the most
*99e0aae7SDavid Rees// basic form of the text format looks like:
*99e0aae7SDavid Rees//
*99e0aae7SDavid Rees// { 0, 1, 2 }
*99e0aae7SDavid Rees//
*99e0aae7SDavid Rees// However, the following are all acceptable and equivalent:
*99e0aae7SDavid Rees//
*99e0aae7SDavid Rees// { 0, 1, 2, }
*99e0aae7SDavid Rees// {0 1 2}
*99e0aae7SDavid Rees// { [2]: 2, [1]: 1, [0]: 0 }
*99e0aae7SDavid Rees// {[2]:2, [0]:0, 1}
*99e0aae7SDavid Rees//
*99e0aae7SDavid Rees// Formally, the array must be contained within braces ("{}").  Elements are
*99e0aae7SDavid Rees// represented as an optional index surrounded by brackets ("[]") followed by
*99e0aae7SDavid Rees// the text format of the element, followed by a single optional comma (",").
*99e0aae7SDavid Rees// If no index is present for the first element, the index 0 will be used.  If
*99e0aae7SDavid Rees// no index is present for any elements after the first, the index one greater
*99e0aae7SDavid Rees// than the previous index will be used.
*99e0aae7SDavid Reestemplate <class Array, class Stream>
*99e0aae7SDavid Reesbool ReadArrayFromTextStream(Array *array, Stream *stream) {
*99e0aae7SDavid Rees  // The text format allows any given index to be set more than once.  In
*99e0aae7SDavid Rees  // theory, this function could track indices and fail if an index were
*99e0aae7SDavid Rees  // double-set, but doing so would require quite a bit of overhead, and
*99e0aae7SDavid Rees  // O(array->ElementCount()) extra space in the worst case.  It does not seem
*99e0aae7SDavid Rees  // worth it to impose the runtime cost here.
*99e0aae7SDavid Rees  ::std::size_t index = 0;
*99e0aae7SDavid Rees  ::std::string brace;
*99e0aae7SDavid Rees  // Read out the opening brace.
*99e0aae7SDavid Rees  if (!ReadToken(stream, &brace)) return false;
*99e0aae7SDavid Rees  if (brace != "{") return false;
*99e0aae7SDavid Rees  for (;;) {
*99e0aae7SDavid Rees    char c;
*99e0aae7SDavid Rees    // Check for a closing brace; if present, success.
*99e0aae7SDavid Rees    if (!DiscardWhitespace(stream)) return false;
*99e0aae7SDavid Rees    if (!stream->Read(&c)) return false;
*99e0aae7SDavid Rees    if (c == '}') return true;
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees    // If the element has an index, read it.
*99e0aae7SDavid Rees    if (c == '[') {
*99e0aae7SDavid Rees      ::std::string index_text;
*99e0aae7SDavid Rees      if (!ReadToken(stream, &index_text)) return false;
*99e0aae7SDavid Rees      if (!::emboss::support::DecodeInteger(index_text, &index)) return false;
*99e0aae7SDavid Rees      ::std::string closing_bracket;
*99e0aae7SDavid Rees      if (!ReadToken(stream, &closing_bracket)) return false;
*99e0aae7SDavid Rees      if (closing_bracket != "]") return false;
*99e0aae7SDavid Rees      ::std::string colon;
*99e0aae7SDavid Rees      if (!ReadToken(stream, &colon)) return false;
*99e0aae7SDavid Rees      if (colon != ":") return false;
*99e0aae7SDavid Rees    } else {
*99e0aae7SDavid Rees      if (!stream->Unread(c)) return false;
*99e0aae7SDavid Rees    }
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees    // Read the element.
*99e0aae7SDavid Rees    if (index >= array->ElementCount()) return false;
*99e0aae7SDavid Rees    if (!(*array)[index].UpdateFromTextStream(stream)) return false;
*99e0aae7SDavid Rees    ++index;
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees    // If there is a trailing comma, discard it.
*99e0aae7SDavid Rees    if (!DiscardWhitespace(stream)) return false;
*99e0aae7SDavid Rees    if (!stream->Read(&c)) return false;
*99e0aae7SDavid Rees    if (c != ',') {
*99e0aae7SDavid Rees      if (c != '}') return false;
*99e0aae7SDavid Rees      if (!stream->Unread(c)) return false;
*99e0aae7SDavid Rees    }
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees}
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees// Prints out the elements of an 8-bit Int or UInt array as characters.
*99e0aae7SDavid Reestemplate <class Array, class Stream>
*99e0aae7SDavid Reesvoid WriteShorthandAsciiArrayCommentToTextStream(
*99e0aae7SDavid Rees    const Array *array, Stream *stream, const TextOutputOptions &options) {
*99e0aae7SDavid Rees  if (!options.multiline()) return;
*99e0aae7SDavid Rees  if (!options.comments()) return;
*99e0aae7SDavid Rees  if (array->ElementCount() == 0) return;
*99e0aae7SDavid Rees  static constexpr int kCharsPerBlock = 64;
*99e0aae7SDavid Rees  static constexpr char kStandInForNonPrintableChar = '.';
*99e0aae7SDavid Rees  auto start_new_line = [&]() {
*99e0aae7SDavid Rees    stream->Write("\n");
*99e0aae7SDavid Rees    stream->Write(options.current_indent());
*99e0aae7SDavid Rees    stream->Write("# ");
*99e0aae7SDavid Rees  };
*99e0aae7SDavid Rees  for (int i = 0, n = array->ElementCount(); i < n; ++i) {
*99e0aae7SDavid Rees    const int c = (*array)[i].Read();
*99e0aae7SDavid Rees    const bool c_is_printable = (c >= 32 && c <= 126);
*99e0aae7SDavid Rees    const bool starting_new_block = ((i % kCharsPerBlock) == 0);
*99e0aae7SDavid Rees    if (starting_new_block) start_new_line();
*99e0aae7SDavid Rees    stream->Write(c_is_printable ? static_cast<char>(c)
*99e0aae7SDavid Rees                                 : kStandInForNonPrintableChar);
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees}
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees// Writes an array to a text stream.  This writes the array in a format
*99e0aae7SDavid Rees// compatible with ReadArrayFromTextStream, above.  For multiline output, writes
*99e0aae7SDavid Rees// one element per line.
*99e0aae7SDavid Rees//
*99e0aae7SDavid Rees// TODO(bolms): Make the output for arrays of small elements (like bytes) much
*99e0aae7SDavid Rees// more compact.
*99e0aae7SDavid Rees//
*99e0aae7SDavid Rees// This will require several support functions like `MaxTextLength` on every
*99e0aae7SDavid Rees// view type, and will substantially increase the number of tests required for
*99e0aae7SDavid Rees// this function, but will make arrays of small elements much more readable.
*99e0aae7SDavid Reestemplate <class Array, class Stream>
*99e0aae7SDavid Reesvoid WriteArrayToTextStream(Array *array, Stream *stream,
*99e0aae7SDavid Rees                            const TextOutputOptions &options) {
*99e0aae7SDavid Rees  TextOutputOptions element_options = options.PlusOneIndent();
*99e0aae7SDavid Rees  if (options.multiline()) {
*99e0aae7SDavid Rees    stream->Write("{");
*99e0aae7SDavid Rees    WriteShorthandArrayCommentToTextStream(array, stream, element_options);
*99e0aae7SDavid Rees    for (::std::size_t i = 0; i < array->ElementCount(); ++i) {
*99e0aae7SDavid Rees      if (!options.allow_partial_output() || (*array)[i].IsAggregate() ||
*99e0aae7SDavid Rees          (*array)[i].Ok()) {
*99e0aae7SDavid Rees        stream->Write("\n");
*99e0aae7SDavid Rees        stream->Write(element_options.current_indent());
*99e0aae7SDavid Rees        stream->Write("[");
*99e0aae7SDavid Rees        // TODO(bolms): Put padding in here so that array elements start at the
*99e0aae7SDavid Rees        // same column.
*99e0aae7SDavid Rees        //
*99e0aae7SDavid Rees        // TODO(bolms): (Maybe) figure out how to get padding to work so that
*99e0aae7SDavid Rees        // elements with comments can have their comments align to the same
*99e0aae7SDavid Rees        // column.
*99e0aae7SDavid Rees        WriteIntegerToTextStream(i, stream, options.numeric_base(),
*99e0aae7SDavid Rees                                 options.digit_grouping());
*99e0aae7SDavid Rees        stream->Write("]: ");
*99e0aae7SDavid Rees        (*array)[i].WriteToTextStream(stream, element_options);
*99e0aae7SDavid Rees      } else if (element_options.comments()) {
*99e0aae7SDavid Rees        stream->Write("\n");
*99e0aae7SDavid Rees        stream->Write(element_options.current_indent());
*99e0aae7SDavid Rees        stream->Write("# [");
*99e0aae7SDavid Rees        WriteIntegerToTextStream(i, stream, options.numeric_base(),
*99e0aae7SDavid Rees                                 options.digit_grouping());
*99e0aae7SDavid Rees        stream->Write("]: UNREADABLE");
*99e0aae7SDavid Rees      }
*99e0aae7SDavid Rees    }
*99e0aae7SDavid Rees    stream->Write("\n");
*99e0aae7SDavid Rees    stream->Write(options.current_indent());
*99e0aae7SDavid Rees    stream->Write("}");
*99e0aae7SDavid Rees  } else {
*99e0aae7SDavid Rees    stream->Write("{");
*99e0aae7SDavid Rees    bool skipped_unreadable = false;
*99e0aae7SDavid Rees    for (::std::size_t i = 0; i < array->ElementCount(); ++i) {
*99e0aae7SDavid Rees      if (!options.allow_partial_output() || (*array)[i].IsAggregate() ||
*99e0aae7SDavid Rees          (*array)[i].Ok()) {
*99e0aae7SDavid Rees        stream->Write(" ");
*99e0aae7SDavid Rees        if (i % 8 == 0 || skipped_unreadable) {
*99e0aae7SDavid Rees          stream->Write("[");
*99e0aae7SDavid Rees          WriteIntegerToTextStream(i, stream, options.numeric_base(),
*99e0aae7SDavid Rees                                   options.digit_grouping());
*99e0aae7SDavid Rees          stream->Write("]: ");
*99e0aae7SDavid Rees        }
*99e0aae7SDavid Rees        (*array)[i].WriteToTextStream(stream, element_options);
*99e0aae7SDavid Rees        if (i < array->ElementCount() - 1) {
*99e0aae7SDavid Rees          stream->Write(",");
*99e0aae7SDavid Rees        }
*99e0aae7SDavid Rees        skipped_unreadable = false;
*99e0aae7SDavid Rees      } else {
*99e0aae7SDavid Rees        if (element_options.comments()) {
*99e0aae7SDavid Rees          stream->Write(" # ");
*99e0aae7SDavid Rees          if (i % 8 == 0) {
*99e0aae7SDavid Rees            stream->Write("[");
*99e0aae7SDavid Rees            WriteIntegerToTextStream(i, stream, options.numeric_base(),
*99e0aae7SDavid Rees                                     options.digit_grouping());
*99e0aae7SDavid Rees            stream->Write("]: ");
*99e0aae7SDavid Rees          }
*99e0aae7SDavid Rees          stream->Write("UNREADABLE\n");
*99e0aae7SDavid Rees        }
*99e0aae7SDavid Rees        skipped_unreadable = true;
*99e0aae7SDavid Rees      }
*99e0aae7SDavid Rees    }
*99e0aae7SDavid Rees    stream->Write(" }");
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees}
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees// TextStream puts a stream-like interface onto a std::string, for use by
*99e0aae7SDavid Rees// UpdateFromTextStream.  It is used by UpdateFromText().
*99e0aae7SDavid Reesclass TextStream final {
*99e0aae7SDavid Rees public:
*99e0aae7SDavid Rees  // This template handles std::string, std::string_view, and absl::string_view.
*99e0aae7SDavid Rees  template <class String>
*99e0aae7SDavid Rees  inline explicit TextStream(const String &text)
*99e0aae7SDavid Rees      : text_(text.data()), length_(text.size()) {}
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  inline explicit TextStream(const char *text)
*99e0aae7SDavid Rees      : text_(text), length_(strlen(text)) {}
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  inline TextStream(const char *text, ::std::size_t length)
*99e0aae7SDavid Rees      : text_(text), length_(length) {}
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  inline bool Read(char *result) {
*99e0aae7SDavid Rees    if (index_ >= length_) return false;
*99e0aae7SDavid Rees    *result = text_[index_];
*99e0aae7SDavid Rees    ++index_;
*99e0aae7SDavid Rees    return true;
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees  inline bool Unread(char c) {
*99e0aae7SDavid Rees    if (index_ < 1) return false;
*99e0aae7SDavid Rees    if (text_[index_ - 1] != c) return false;
*99e0aae7SDavid Rees    --index_;
*99e0aae7SDavid Rees    return true;
*99e0aae7SDavid Rees  }
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees private:
*99e0aae7SDavid Rees  // It would be nice to use string_view here, but that's not available until
*99e0aae7SDavid Rees  // C++17.
*99e0aae7SDavid Rees  const char *text_ = nullptr;
*99e0aae7SDavid Rees  ::std::size_t length_ = 0;
*99e0aae7SDavid Rees  ::std::size_t index_ = 0;
*99e0aae7SDavid Rees};
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees}  // namespace support
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees// Returns a TextOutputOptions set for reasonable multi-line text output.
*99e0aae7SDavid Reesstatic inline TextOutputOptions MultilineText() {
*99e0aae7SDavid Rees  return TextOutputOptions()
*99e0aae7SDavid Rees      .Multiline(true)
*99e0aae7SDavid Rees      .WithIndent("  ")
*99e0aae7SDavid Rees      .WithComments(true)
*99e0aae7SDavid Rees      .WithDigitGrouping(true);
*99e0aae7SDavid Rees}
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees// TODO(bolms): Add corresponding ReadFromText*() verbs which enforce the
*99e0aae7SDavid Rees// constraint that all of a field's dependencies must be present in the text
*99e0aae7SDavid Rees// before the field itself is set.
*99e0aae7SDavid Reestemplate <typename EmbossViewType>
*99e0aae7SDavid Reesinline bool UpdateFromText(const EmbossViewType &view,
*99e0aae7SDavid Rees                           const ::std::string &text) {
*99e0aae7SDavid Rees  auto text_stream = support::TextStream{text};
*99e0aae7SDavid Rees  return view.UpdateFromTextStream(&text_stream);
*99e0aae7SDavid Rees}
*99e0aae7SDavid Rees
*99e0aae7SDavid Reestemplate <typename EmbossViewType>
*99e0aae7SDavid Reesinline ::std::string WriteToString(const EmbossViewType &view,
*99e0aae7SDavid Rees                                   TextOutputOptions options) {
*99e0aae7SDavid Rees  support::TextOutputStream text_stream;
*99e0aae7SDavid Rees  view.WriteToTextStream(&text_stream, options);
*99e0aae7SDavid Rees  return text_stream.Result();
*99e0aae7SDavid Rees}
*99e0aae7SDavid Rees
*99e0aae7SDavid Reestemplate <typename EmbossViewType>
*99e0aae7SDavid Reesinline ::std::string WriteToString(const EmbossViewType &view) {
*99e0aae7SDavid Rees  return WriteToString(view, TextOutputOptions());
*99e0aae7SDavid Rees}
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees}  // namespace emboss
*99e0aae7SDavid Rees
*99e0aae7SDavid Rees#endif  // EMBOSS_RUNTIME_CPP_EMBOSS_TEXT_UTIL_H_