1*99e0aae7SDavid Rees // Copyright 2019 Google LLC
2*99e0aae7SDavid Rees //
3*99e0aae7SDavid Rees // Licensed under the Apache License, Version 2.0 (the "License");
4*99e0aae7SDavid Rees // you may not use this file except in compliance with the License.
5*99e0aae7SDavid Rees // You may obtain a copy of the License at
6*99e0aae7SDavid Rees //
7*99e0aae7SDavid Rees // https://www.apache.org/licenses/LICENSE-2.0
8*99e0aae7SDavid Rees //
9*99e0aae7SDavid Rees // Unless required by applicable law or agreed to in writing, software
10*99e0aae7SDavid Rees // distributed under the License is distributed on an "AS IS" BASIS,
11*99e0aae7SDavid Rees // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*99e0aae7SDavid Rees // See the License for the specific language governing permissions and
13*99e0aae7SDavid Rees // limitations under the License.
14*99e0aae7SDavid Rees
15*99e0aae7SDavid Rees // This header contains functionality related to Emboss text output.
16*99e0aae7SDavid Rees #ifndef EMBOSS_RUNTIME_CPP_EMBOSS_TEXT_UTIL_H_
17*99e0aae7SDavid Rees #define EMBOSS_RUNTIME_CPP_EMBOSS_TEXT_UTIL_H_
18*99e0aae7SDavid Rees
19*99e0aae7SDavid Rees #include <array>
20*99e0aae7SDavid Rees #include <climits>
21*99e0aae7SDavid Rees #include <cmath>
22*99e0aae7SDavid Rees #include <cstdint>
23*99e0aae7SDavid Rees #include <cstdio>
24*99e0aae7SDavid Rees #include <cstring>
25*99e0aae7SDavid Rees #include <limits>
26*99e0aae7SDavid Rees #include <sstream>
27*99e0aae7SDavid Rees #include <string>
28*99e0aae7SDavid Rees #include <vector>
29*99e0aae7SDavid Rees
30*99e0aae7SDavid Rees #include "runtime/cpp/emboss_defines.h"
31*99e0aae7SDavid Rees
32*99e0aae7SDavid Rees namespace emboss {
33*99e0aae7SDavid Rees
34*99e0aae7SDavid Rees // TextOutputOptions are used to configure text output. Typically, one can just
35*99e0aae7SDavid Rees // use a default TextOutputOptions() (for compact output) or MultilineText()
36*99e0aae7SDavid Rees // (for reasonable formatted output).
37*99e0aae7SDavid Rees class TextOutputOptions final {
38*99e0aae7SDavid Rees public:
39*99e0aae7SDavid Rees TextOutputOptions() = default;
40*99e0aae7SDavid Rees
PlusOneIndent()41*99e0aae7SDavid Rees TextOutputOptions PlusOneIndent() const {
42*99e0aae7SDavid Rees TextOutputOptions result = *this;
43*99e0aae7SDavid Rees result.current_indent_ += indent();
44*99e0aae7SDavid Rees return result;
45*99e0aae7SDavid Rees }
46*99e0aae7SDavid Rees
Multiline(bool new_value)47*99e0aae7SDavid Rees TextOutputOptions Multiline(bool new_value) const {
48*99e0aae7SDavid Rees TextOutputOptions result = *this;
49*99e0aae7SDavid Rees result.multiline_ = new_value;
50*99e0aae7SDavid Rees return result;
51*99e0aae7SDavid Rees }
52*99e0aae7SDavid Rees
WithIndent(::std::string new_value)53*99e0aae7SDavid Rees TextOutputOptions WithIndent(::std::string new_value) const {
54*99e0aae7SDavid Rees TextOutputOptions result = *this;
55*99e0aae7SDavid Rees result.indent_ = ::std::move(new_value);
56*99e0aae7SDavid Rees return result;
57*99e0aae7SDavid Rees }
58*99e0aae7SDavid Rees
WithComments(bool new_value)59*99e0aae7SDavid Rees TextOutputOptions WithComments(bool new_value) const {
60*99e0aae7SDavid Rees TextOutputOptions result = *this;
61*99e0aae7SDavid Rees result.comments_ = new_value;
62*99e0aae7SDavid Rees return result;
63*99e0aae7SDavid Rees }
64*99e0aae7SDavid Rees
WithDigitGrouping(bool new_value)65*99e0aae7SDavid Rees TextOutputOptions WithDigitGrouping(bool new_value) const {
66*99e0aae7SDavid Rees TextOutputOptions result = *this;
67*99e0aae7SDavid Rees result.digit_grouping_ = new_value;
68*99e0aae7SDavid Rees return result;
69*99e0aae7SDavid Rees }
70*99e0aae7SDavid Rees
WithNumericBase(uint8_t new_value)71*99e0aae7SDavid Rees TextOutputOptions WithNumericBase(uint8_t new_value) const {
72*99e0aae7SDavid Rees TextOutputOptions result = *this;
73*99e0aae7SDavid Rees result.numeric_base_ = new_value;
74*99e0aae7SDavid Rees return result;
75*99e0aae7SDavid Rees }
76*99e0aae7SDavid Rees
WithAllowPartialOutput(bool new_value)77*99e0aae7SDavid Rees TextOutputOptions WithAllowPartialOutput(bool new_value) const {
78*99e0aae7SDavid Rees TextOutputOptions result = *this;
79*99e0aae7SDavid Rees result.allow_partial_output_ = new_value;
80*99e0aae7SDavid Rees return result;
81*99e0aae7SDavid Rees }
82*99e0aae7SDavid Rees
current_indent()83*99e0aae7SDavid Rees ::std::string current_indent() const { return current_indent_; }
indent()84*99e0aae7SDavid Rees ::std::string indent() const { return indent_; }
multiline()85*99e0aae7SDavid Rees bool multiline() const { return multiline_; }
digit_grouping()86*99e0aae7SDavid Rees bool digit_grouping() const { return digit_grouping_; }
comments()87*99e0aae7SDavid Rees bool comments() const { return comments_; }
numeric_base()88*99e0aae7SDavid Rees ::std::uint8_t numeric_base() const { return numeric_base_; }
allow_partial_output()89*99e0aae7SDavid Rees bool allow_partial_output() const { return allow_partial_output_; }
90*99e0aae7SDavid Rees
91*99e0aae7SDavid Rees private:
92*99e0aae7SDavid Rees ::std::string indent_;
93*99e0aae7SDavid Rees ::std::string current_indent_;
94*99e0aae7SDavid Rees bool comments_ = false;
95*99e0aae7SDavid Rees bool multiline_ = false;
96*99e0aae7SDavid Rees bool digit_grouping_ = false;
97*99e0aae7SDavid Rees bool allow_partial_output_ = false;
98*99e0aae7SDavid Rees ::std::uint8_t numeric_base_ = 10;
99*99e0aae7SDavid Rees };
100*99e0aae7SDavid Rees
101*99e0aae7SDavid Rees namespace support {
102*99e0aae7SDavid Rees
103*99e0aae7SDavid Rees // TextOutputStream puts a stream-like interface onto a std::string, for use by
104*99e0aae7SDavid Rees // DumpToTextStream. It is used by UpdateFromText().
105*99e0aae7SDavid Rees class TextOutputStream final {
106*99e0aae7SDavid Rees public:
107*99e0aae7SDavid Rees inline explicit TextOutputStream() = default;
108*99e0aae7SDavid Rees
Write(const::std::string & text)109*99e0aae7SDavid Rees inline void Write(const ::std::string &text) {
110*99e0aae7SDavid Rees text_.write(text.data(), text.size());
111*99e0aae7SDavid Rees }
112*99e0aae7SDavid Rees
Write(const char * text)113*99e0aae7SDavid Rees inline void Write(const char *text) { text_.write(text, strlen(text)); }
114*99e0aae7SDavid Rees
Write(const char c)115*99e0aae7SDavid Rees inline void Write(const char c) { text_.put(c); }
116*99e0aae7SDavid Rees
Result()117*99e0aae7SDavid Rees inline ::std::string Result() { return text_.str(); }
118*99e0aae7SDavid Rees
119*99e0aae7SDavid Rees private:
120*99e0aae7SDavid Rees ::std::ostringstream text_;
121*99e0aae7SDavid Rees };
122*99e0aae7SDavid Rees
123*99e0aae7SDavid Rees // DecodeInteger decodes an integer from a string. This is very similar to the
124*99e0aae7SDavid Rees // many, many existing integer decode routines in the world, except that a) it
125*99e0aae7SDavid Rees // accepts integers in any Emboss format, and b) it can run in environments that
126*99e0aae7SDavid Rees // do not support std::istream or Google's number conversion routines.
127*99e0aae7SDavid Rees //
128*99e0aae7SDavid Rees // Ideally, this would be replaced by someone else's code.
129*99e0aae7SDavid Rees template <class IntType>
DecodeInteger(const::std::string & text,IntType * result)130*99e0aae7SDavid Rees bool DecodeInteger(const ::std::string &text, IntType *result) {
131*99e0aae7SDavid Rees IntType accumulator = 0;
132*99e0aae7SDavid Rees IntType base = 10;
133*99e0aae7SDavid Rees bool negative = false;
134*99e0aae7SDavid Rees unsigned offset = 0;
135*99e0aae7SDavid Rees if (::std::is_signed<IntType>::value && text.size() >= 1 + offset &&
136*99e0aae7SDavid Rees text[offset] == '-') {
137*99e0aae7SDavid Rees negative = true;
138*99e0aae7SDavid Rees offset += 1;
139*99e0aae7SDavid Rees }
140*99e0aae7SDavid Rees if (text.size() >= 2 + offset && text[offset] == '0') {
141*99e0aae7SDavid Rees if (text[offset + 1] == 'x' || text[offset + 1] == 'X') {
142*99e0aae7SDavid Rees base = 16;
143*99e0aae7SDavid Rees offset += 2;
144*99e0aae7SDavid Rees } else if (text[offset + 1] == 'b' || text[offset + 1] == 'B') {
145*99e0aae7SDavid Rees base = 2;
146*99e0aae7SDavid Rees offset += 2;
147*99e0aae7SDavid Rees }
148*99e0aae7SDavid Rees }
149*99e0aae7SDavid Rees // "", "0x", "0b", "-", "-0x", and "-0b" are not valid numbers.
150*99e0aae7SDavid Rees if (offset == text.size()) return false;
151*99e0aae7SDavid Rees for (; offset < text.size(); ++offset) {
152*99e0aae7SDavid Rees char c = text[offset];
153*99e0aae7SDavid Rees IntType digit = 0;
154*99e0aae7SDavid Rees if (c == '_') {
155*99e0aae7SDavid Rees if (offset == 0) {
156*99e0aae7SDavid Rees return false;
157*99e0aae7SDavid Rees }
158*99e0aae7SDavid Rees continue;
159*99e0aae7SDavid Rees } else if (c >= '0' && c <= '9') {
160*99e0aae7SDavid Rees digit = c - '0';
161*99e0aae7SDavid Rees } else if (c >= 'A' && c <= 'F') {
162*99e0aae7SDavid Rees digit = c - 'A' + 10;
163*99e0aae7SDavid Rees } else if (c >= 'a' && c <= 'f') {
164*99e0aae7SDavid Rees digit = c - 'a' + 10;
165*99e0aae7SDavid Rees } else {
166*99e0aae7SDavid Rees return false;
167*99e0aae7SDavid Rees }
168*99e0aae7SDavid Rees if (digit >= base) {
169*99e0aae7SDavid Rees return false;
170*99e0aae7SDavid Rees }
171*99e0aae7SDavid Rees if (negative) {
172*99e0aae7SDavid Rees if (accumulator <
173*99e0aae7SDavid Rees (::std::numeric_limits<IntType>::min() + digit) / base) {
174*99e0aae7SDavid Rees return false;
175*99e0aae7SDavid Rees }
176*99e0aae7SDavid Rees accumulator = accumulator * base - digit;
177*99e0aae7SDavid Rees } else {
178*99e0aae7SDavid Rees if (accumulator >
179*99e0aae7SDavid Rees (::std::numeric_limits<IntType>::max() - digit) / base) {
180*99e0aae7SDavid Rees return false;
181*99e0aae7SDavid Rees }
182*99e0aae7SDavid Rees accumulator = accumulator * base + digit;
183*99e0aae7SDavid Rees }
184*99e0aae7SDavid Rees }
185*99e0aae7SDavid Rees *result = accumulator;
186*99e0aae7SDavid Rees return true;
187*99e0aae7SDavid Rees }
188*99e0aae7SDavid Rees
189*99e0aae7SDavid Rees template <class Stream>
DiscardWhitespace(Stream * stream)190*99e0aae7SDavid Rees bool DiscardWhitespace(Stream *stream) {
191*99e0aae7SDavid Rees char c;
192*99e0aae7SDavid Rees bool in_comment = false;
193*99e0aae7SDavid Rees do {
194*99e0aae7SDavid Rees if (!stream->Read(&c)) return true;
195*99e0aae7SDavid Rees if (c == '#') in_comment = true;
196*99e0aae7SDavid Rees if (c == '\r' || c == '\n') in_comment = false;
197*99e0aae7SDavid Rees } while (in_comment || c == ' ' || c == '\t' || c == '\n' || c == '\r');
198*99e0aae7SDavid Rees return stream->Unread(c);
199*99e0aae7SDavid Rees }
200*99e0aae7SDavid Rees
201*99e0aae7SDavid Rees template <class Stream>
ReadToken(Stream * stream,::std::string * token)202*99e0aae7SDavid Rees bool ReadToken(Stream *stream, ::std::string *token) {
203*99e0aae7SDavid Rees ::std::vector<char> result;
204*99e0aae7SDavid Rees char c;
205*99e0aae7SDavid Rees if (!DiscardWhitespace(stream)) return false;
206*99e0aae7SDavid Rees if (!stream->Read(&c)) {
207*99e0aae7SDavid Rees *token = "";
208*99e0aae7SDavid Rees return true;
209*99e0aae7SDavid Rees }
210*99e0aae7SDavid Rees
211*99e0aae7SDavid Rees const char *const punctuation = ":{}[],";
212*99e0aae7SDavid Rees if (strchr(punctuation, c) != nullptr) {
213*99e0aae7SDavid Rees *token = ::std::string(1, c);
214*99e0aae7SDavid Rees return true;
215*99e0aae7SDavid Rees } else {
216*99e0aae7SDavid Rees // TODO(bolms): Only allow alphanumeric characters here?
217*99e0aae7SDavid Rees do {
218*99e0aae7SDavid Rees result.push_back(c);
219*99e0aae7SDavid Rees if (!stream->Read(&c)) {
220*99e0aae7SDavid Rees *token = ::std::string(&result[0], result.size());
221*99e0aae7SDavid Rees return true;
222*99e0aae7SDavid Rees }
223*99e0aae7SDavid Rees } while (c != ' ' && c != '\t' && c != '\n' && c != '\r' && c != '#' &&
224*99e0aae7SDavid Rees strchr(punctuation, c) == nullptr);
225*99e0aae7SDavid Rees if (!stream->Unread(c)) return false;
226*99e0aae7SDavid Rees *token = ::std::string(&result[0], result.size());
227*99e0aae7SDavid Rees return true;
228*99e0aae7SDavid Rees }
229*99e0aae7SDavid Rees }
230*99e0aae7SDavid Rees
231*99e0aae7SDavid Rees template <class Stream, class View>
ReadIntegerFromTextStream(View * view,Stream * stream)232*99e0aae7SDavid Rees bool ReadIntegerFromTextStream(View *view, Stream *stream) {
233*99e0aae7SDavid Rees ::std::string token;
234*99e0aae7SDavid Rees if (!::emboss::support::ReadToken(stream, &token)) return false;
235*99e0aae7SDavid Rees if (token.empty()) return false;
236*99e0aae7SDavid Rees typename View::ValueType value;
237*99e0aae7SDavid Rees if (!::emboss::support::DecodeInteger(token, &value)) return false;
238*99e0aae7SDavid Rees return view->TryToWrite(value);
239*99e0aae7SDavid Rees }
240*99e0aae7SDavid Rees
241*99e0aae7SDavid Rees // WriteIntegerToTextStream encodes the given value in base 2, 10, or 16, with
242*99e0aae7SDavid Rees // or without digit group separators ('_'), and then calls stream->Write() with
243*99e0aae7SDavid Rees // a char * argument that is a C-style null-terminated string of the encoded
244*99e0aae7SDavid Rees // number.
245*99e0aae7SDavid Rees //
246*99e0aae7SDavid Rees // As with DecodeInteger, above, it would be nice to be able to replace this
247*99e0aae7SDavid Rees // with someone else's code, but I (bolms@) was unable to find anything in
248*99e0aae7SDavid Rees // standard C++ that would encode numbers in binary, nothing that would add
249*99e0aae7SDavid Rees // digit separators to hex numbers, and nothing that would use '_' for digit
250*99e0aae7SDavid Rees // separators.
251*99e0aae7SDavid Rees template <class Stream, typename IntegralType>
WriteIntegerToTextStream(IntegralType value,Stream * stream,::std::uint8_t base,bool digit_grouping)252*99e0aae7SDavid Rees void WriteIntegerToTextStream(IntegralType value, Stream *stream,
253*99e0aae7SDavid Rees ::std::uint8_t base, bool digit_grouping) {
254*99e0aae7SDavid Rees static_assert(::std::numeric_limits<
255*99e0aae7SDavid Rees typename ::std::remove_cv<IntegralType>::type>::is_integer,
256*99e0aae7SDavid Rees "WriteIntegerToTextStream only supports integer types.");
257*99e0aae7SDavid Rees static_assert(
258*99e0aae7SDavid Rees !::std::is_same<bool,
259*99e0aae7SDavid Rees typename ::std::remove_cv<IntegralType>::type>::value,
260*99e0aae7SDavid Rees "WriteIntegerToTextStream only supports integer types.");
261*99e0aae7SDavid Rees EMBOSS_CHECK(base == 10 || base == 2 || base == 16);
262*99e0aae7SDavid Rees const char *const digits = "0123456789abcdef";
263*99e0aae7SDavid Rees const int grouping = base == 10 ? 3 : base == 16 ? 4 : 8;
264*99e0aae7SDavid Rees // The maximum size 32-bit number is -2**31, which is:
265*99e0aae7SDavid Rees //
266*99e0aae7SDavid Rees // -0b10000000_00000000_00000000_00000000 (38 chars)
267*99e0aae7SDavid Rees // -2_147_483_648 (14 chars)
268*99e0aae7SDavid Rees // -0x8000_0000 (12 chars)
269*99e0aae7SDavid Rees //
270*99e0aae7SDavid Rees // Likewise, the maximum size 8-bit number is -128, which is:
271*99e0aae7SDavid Rees // -0b10000000 (11 chars)
272*99e0aae7SDavid Rees // -128 (4 chars)
273*99e0aae7SDavid Rees // -0x80 (5 chars)
274*99e0aae7SDavid Rees //
275*99e0aae7SDavid Rees // Binary with separators is always the longest value: 9 chars per 8 bits,
276*99e0aae7SDavid Rees // minus 1 char for the '_' that does not appear at the front of the number,
277*99e0aae7SDavid Rees // plus 2 chars for "0b", plus 1 char for '-', plus 1 extra char for the
278*99e0aae7SDavid Rees // trailing '\0', which is (sizeof value) * CHAR_BIT * 9 / 8 - 1 + 2 + 1 + 1.
279*99e0aae7SDavid Rees const int buffer_size = (sizeof value) * CHAR_BIT * 9 / 8 + 3;
280*99e0aae7SDavid Rees char buffer[buffer_size];
281*99e0aae7SDavid Rees buffer[buffer_size - 1] = '\0';
282*99e0aae7SDavid Rees int next_char = buffer_size - 2;
283*99e0aae7SDavid Rees if (value == 0) {
284*99e0aae7SDavid Rees EMBOSS_DCHECK_GE(next_char, 0);
285*99e0aae7SDavid Rees buffer[next_char] = digits[0];
286*99e0aae7SDavid Rees --next_char;
287*99e0aae7SDavid Rees }
288*99e0aae7SDavid Rees int sign = value < 0 ? -1 : 1;
289*99e0aae7SDavid Rees int digit_count = 0;
290*99e0aae7SDavid Rees auto buffer_char = [&](char c) {
291*99e0aae7SDavid Rees EMBOSS_DCHECK_GE(next_char, 0);
292*99e0aae7SDavid Rees buffer[next_char] = c;
293*99e0aae7SDavid Rees --next_char;
294*99e0aae7SDavid Rees };
295*99e0aae7SDavid Rees if (value < 0) {
296*99e0aae7SDavid Rees if (value == ::std::numeric_limits<decltype(value)>::lowest()) {
297*99e0aae7SDavid Rees // The minimum negative two's-complement value has no corresponding
298*99e0aae7SDavid Rees // positive value, so 'value = -value' is not useful in that case.
299*99e0aae7SDavid Rees // Instead, we do some trickery to buffer the lowest-order digit here.
300*99e0aae7SDavid Rees auto digit = -(value + 1) % base + 1;
301*99e0aae7SDavid Rees value = -(value + 1) / base;
302*99e0aae7SDavid Rees if (digit == base) {
303*99e0aae7SDavid Rees digit = 0;
304*99e0aae7SDavid Rees ++value;
305*99e0aae7SDavid Rees }
306*99e0aae7SDavid Rees buffer_char(digits[digit]);
307*99e0aae7SDavid Rees ++digit_count;
308*99e0aae7SDavid Rees } else {
309*99e0aae7SDavid Rees value = -value;
310*99e0aae7SDavid Rees }
311*99e0aae7SDavid Rees }
312*99e0aae7SDavid Rees while (value > 0) {
313*99e0aae7SDavid Rees if (digit_count && digit_count % grouping == 0 && digit_grouping) {
314*99e0aae7SDavid Rees buffer_char('_');
315*99e0aae7SDavid Rees }
316*99e0aae7SDavid Rees buffer_char(digits[value % base]);
317*99e0aae7SDavid Rees value /= base;
318*99e0aae7SDavid Rees ++digit_count;
319*99e0aae7SDavid Rees }
320*99e0aae7SDavid Rees if (base == 16) {
321*99e0aae7SDavid Rees buffer_char('x');
322*99e0aae7SDavid Rees buffer_char('0');
323*99e0aae7SDavid Rees } else if (base == 2) {
324*99e0aae7SDavid Rees buffer_char('b');
325*99e0aae7SDavid Rees buffer_char('0');
326*99e0aae7SDavid Rees }
327*99e0aae7SDavid Rees if (sign < 0) {
328*99e0aae7SDavid Rees buffer_char('-');
329*99e0aae7SDavid Rees }
330*99e0aae7SDavid Rees
331*99e0aae7SDavid Rees stream->Write(buffer + 1 + next_char);
332*99e0aae7SDavid Rees }
333*99e0aae7SDavid Rees
334*99e0aae7SDavid Rees // Writes an integer value in the base given in options, plus an optional
335*99e0aae7SDavid Rees // comment with the same value in a second base. This is used for the common
336*99e0aae7SDavid Rees // output format of IntView, UIntView, and BcdView.
337*99e0aae7SDavid Rees template <class Stream, class View>
WriteIntegerViewToTextStream(View * view,Stream * stream,const TextOutputOptions & options)338*99e0aae7SDavid Rees void WriteIntegerViewToTextStream(View *view, Stream *stream,
339*99e0aae7SDavid Rees const TextOutputOptions &options) {
340*99e0aae7SDavid Rees WriteIntegerToTextStream(view->Read(), stream, options.numeric_base(),
341*99e0aae7SDavid Rees options.digit_grouping());
342*99e0aae7SDavid Rees if (options.comments()) {
343*99e0aae7SDavid Rees stream->Write(" # ");
344*99e0aae7SDavid Rees WriteIntegerToTextStream(view->Read(), stream,
345*99e0aae7SDavid Rees options.numeric_base() == 10 ? 16 : 10,
346*99e0aae7SDavid Rees options.digit_grouping());
347*99e0aae7SDavid Rees }
348*99e0aae7SDavid Rees }
349*99e0aae7SDavid Rees
350*99e0aae7SDavid Rees template <class Stream, class View>
ReadBooleanFromTextStream(View * view,Stream * stream)351*99e0aae7SDavid Rees bool ReadBooleanFromTextStream(View *view, Stream *stream) {
352*99e0aae7SDavid Rees ::std::string token;
353*99e0aae7SDavid Rees if (!::emboss::support::ReadToken(stream, &token)) return false;
354*99e0aae7SDavid Rees if (token == "true") {
355*99e0aae7SDavid Rees return view->TryToWrite(true);
356*99e0aae7SDavid Rees } else if (token == "false") {
357*99e0aae7SDavid Rees return view->TryToWrite(false);
358*99e0aae7SDavid Rees }
359*99e0aae7SDavid Rees // TODO(bolms): Provide a way to get an error message on parse failure.
360*99e0aae7SDavid Rees return false;
361*99e0aae7SDavid Rees }
362*99e0aae7SDavid Rees
363*99e0aae7SDavid Rees // The TextOutputOptions parameter is present so that it can be passed in by
364*99e0aae7SDavid Rees // generated code that uses the same form for WriteBooleanViewToTextStream,
365*99e0aae7SDavid Rees // WriteIntegerViewToTextStream, and WriteEnumViewToTextStream.
366*99e0aae7SDavid Rees template <class Stream, class View>
WriteBooleanViewToTextStream(View * view,Stream * stream,const TextOutputOptions &)367*99e0aae7SDavid Rees void WriteBooleanViewToTextStream(View *view, Stream *stream,
368*99e0aae7SDavid Rees const TextOutputOptions &) {
369*99e0aae7SDavid Rees if (view->Read()) {
370*99e0aae7SDavid Rees stream->Write("true");
371*99e0aae7SDavid Rees } else {
372*99e0aae7SDavid Rees stream->Write("false");
373*99e0aae7SDavid Rees }
374*99e0aae7SDavid Rees }
375*99e0aae7SDavid Rees
376*99e0aae7SDavid Rees // FloatConstants holds various masks for working with IEEE754-compatible
377*99e0aae7SDavid Rees // floating-point values at a bit level. These are mostly used here to
378*99e0aae7SDavid Rees // implement text format for NaNs, preserving the NaN payload so that the text
379*99e0aae7SDavid Rees // format can (in theory) provide a bit-exact round-trip through the text
380*99e0aae7SDavid Rees // format.
381*99e0aae7SDavid Rees template <class Float>
382*99e0aae7SDavid Rees struct FloatConstants;
383*99e0aae7SDavid Rees
384*99e0aae7SDavid Rees template <>
385*99e0aae7SDavid Rees struct FloatConstants<float> {
386*99e0aae7SDavid Rees static_assert(sizeof(float) == 4, "Emboss requires 32-bit float.");
387*99e0aae7SDavid Rees using MatchingIntegerType = ::std::uint32_t;
388*99e0aae7SDavid Rees static constexpr MatchingIntegerType kMantissaMask() { return 0x7fffffU; }
389*99e0aae7SDavid Rees static constexpr MatchingIntegerType kExponentMask() { return 0x7f800000U; }
390*99e0aae7SDavid Rees static constexpr MatchingIntegerType kSignMask() { return 0x80000000U; }
391*99e0aae7SDavid Rees static constexpr int kPrintfPrecision() { return 9; }
392*99e0aae7SDavid Rees static constexpr const char *kScanfFormat() { return "%f%n"; }
393*99e0aae7SDavid Rees };
394*99e0aae7SDavid Rees
395*99e0aae7SDavid Rees template <>
396*99e0aae7SDavid Rees struct FloatConstants<double> {
397*99e0aae7SDavid Rees static_assert(sizeof(double) == 8, "Emboss requires 64-bit double.");
398*99e0aae7SDavid Rees using MatchingIntegerType = ::std::uint64_t;
399*99e0aae7SDavid Rees static constexpr MatchingIntegerType kMantissaMask() {
400*99e0aae7SDavid Rees return 0xfffffffffffffUL;
401*99e0aae7SDavid Rees }
402*99e0aae7SDavid Rees static constexpr MatchingIntegerType kExponentMask() {
403*99e0aae7SDavid Rees return 0x7ff0000000000000UL;
404*99e0aae7SDavid Rees }
405*99e0aae7SDavid Rees static constexpr MatchingIntegerType kSignMask() {
406*99e0aae7SDavid Rees return 0x8000000000000000UL;
407*99e0aae7SDavid Rees }
408*99e0aae7SDavid Rees static constexpr int kPrintfPrecision() { return 17; }
409*99e0aae7SDavid Rees static constexpr const char *kScanfFormat() { return "%lf%n"; }
410*99e0aae7SDavid Rees };
411*99e0aae7SDavid Rees
412*99e0aae7SDavid Rees // Decodes a floating-point number from text.
413*99e0aae7SDavid Rees template <class Float>
414*99e0aae7SDavid Rees bool DecodeFloat(const ::std::string &token, Float *result) {
415*99e0aae7SDavid Rees // The state of the world for reading floating-point values is somewhat better
416*99e0aae7SDavid Rees // than the situation for writing them, but there are still a few bits that
417*99e0aae7SDavid Rees // are underspecified. This function is the mirror of WriteFloatToTextStream,
418*99e0aae7SDavid Rees // below, so it specifically decodes infinities and NaNs in the formats that
419*99e0aae7SDavid Rees // Emboss uses.
420*99e0aae7SDavid Rees //
421*99e0aae7SDavid Rees // Because of the use of scanf here, this function accepts hex floating-point
422*99e0aae7SDavid Rees // values (0xh.hhhhpeee) *on some systems*. TODO(bolms): make hex float
423*99e0aae7SDavid Rees // support universal.
424*99e0aae7SDavid Rees
425*99e0aae7SDavid Rees using UInt = typename FloatConstants<Float>::MatchingIntegerType;
426*99e0aae7SDavid Rees
427*99e0aae7SDavid Rees if (token.empty()) return false;
428*99e0aae7SDavid Rees
429*99e0aae7SDavid Rees // First, check for negative.
430*99e0aae7SDavid Rees bool negative = token[0] == '-';
431*99e0aae7SDavid Rees
432*99e0aae7SDavid Rees // Second, check for NaN.
433*99e0aae7SDavid Rees ::std::size_t i = token[0] == '-' || token[0] == '+' ? 1 : 0;
434*99e0aae7SDavid Rees if (token.size() >= i + 3 && (token[i] == 'N' || token[i] == 'n') &&
435*99e0aae7SDavid Rees (token[i + 1] == 'A' || token[i + 1] == 'a') &&
436*99e0aae7SDavid Rees (token[i + 2] == 'N' || token[i + 2] == 'n')) {
437*99e0aae7SDavid Rees UInt nan_payload;
438*99e0aae7SDavid Rees if (token.size() >= i + 4) {
439*99e0aae7SDavid Rees if (token[i + 3] == '(' && token[token.size() - 1] == ')') {
440*99e0aae7SDavid Rees if (!DecodeInteger(token.substr(i + 4, token.size() - i - 5),
441*99e0aae7SDavid Rees &nan_payload)) {
442*99e0aae7SDavid Rees return false;
443*99e0aae7SDavid Rees }
444*99e0aae7SDavid Rees } else {
445*99e0aae7SDavid Rees // NaN may not be followed by trailing characters other than a
446*99e0aae7SDavid Rees // ()-enclosed payload.
447*99e0aae7SDavid Rees return false;
448*99e0aae7SDavid Rees }
449*99e0aae7SDavid Rees } else {
450*99e0aae7SDavid Rees // If no specific NaN was given, take a default NaN from the C++ standard
451*99e0aae7SDavid Rees // library. Technically, a conformant C++ implementation might not have
452*99e0aae7SDavid Rees // quiet_NaN(), but any IEEE754-based implementation should.
453*99e0aae7SDavid Rees //
454*99e0aae7SDavid Rees // It is tempting to just write the default NaN directly into the view and
455*99e0aae7SDavid Rees // return success, but "-NaN" should be have its sign bit set, and there
456*99e0aae7SDavid Rees // is no direct way to set the sign bit of a NaN, so there are fewer code
457*99e0aae7SDavid Rees // paths if we extract the default NaN payload, then use it in the
458*99e0aae7SDavid Rees // reconstruction step, below.
459*99e0aae7SDavid Rees Float default_nan = ::std::numeric_limits<Float>::quiet_NaN();
460*99e0aae7SDavid Rees UInt bits;
461*99e0aae7SDavid Rees ::std::memcpy(&bits, &default_nan, sizeof(bits));
462*99e0aae7SDavid Rees nan_payload = bits & FloatConstants<Float>::kMantissaMask();
463*99e0aae7SDavid Rees }
464*99e0aae7SDavid Rees if (nan_payload == 0) {
465*99e0aae7SDavid Rees // "NaN" with a payload of zero is actually the bit pattern for infinity;
466*99e0aae7SDavid Rees // "NaN(0)" should not be an alias for "Inf".
467*99e0aae7SDavid Rees return false;
468*99e0aae7SDavid Rees }
469*99e0aae7SDavid Rees if (nan_payload & (FloatConstants<Float>::kExponentMask() |
470*99e0aae7SDavid Rees FloatConstants<Float>::kSignMask())) {
471*99e0aae7SDavid Rees // The payload must be small enough to fit in the payload space; it must
472*99e0aae7SDavid Rees // not overflow into the exponent or sign bits.
473*99e0aae7SDavid Rees //
474*99e0aae7SDavid Rees // Note that the DecodeInteger call which decoded the payload will return
475*99e0aae7SDavid Rees // false if the payload would overflow the `UInt` type, so cases like
476*99e0aae7SDavid Rees // "NaN(0x10000000000000000000000000000)" -- which are so big that they no
477*99e0aae7SDavid Rees // longer interfere with the sign or exponent -- are caught above.
478*99e0aae7SDavid Rees return false;
479*99e0aae7SDavid Rees }
480*99e0aae7SDavid Rees UInt bits = FloatConstants<Float>::kExponentMask();
481*99e0aae7SDavid Rees bits |= nan_payload;
482*99e0aae7SDavid Rees if (negative) {
483*99e0aae7SDavid Rees bits |= FloatConstants<Float>::kSignMask();
484*99e0aae7SDavid Rees }
485*99e0aae7SDavid Rees ::std::memcpy(result, &bits, sizeof(bits));
486*99e0aae7SDavid Rees return true;
487*99e0aae7SDavid Rees }
488*99e0aae7SDavid Rees
489*99e0aae7SDavid Rees // If the value is not NaN, check for infinity.
490*99e0aae7SDavid Rees if (token.size() >= i + 3 && (token[i] == 'I' || token[i] == 'i') &&
491*99e0aae7SDavid Rees (token[i + 1] == 'N' || token[i + 1] == 'n') &&
492*99e0aae7SDavid Rees (token[i + 2] == 'F' || token[i + 2] == 'f')) {
493*99e0aae7SDavid Rees if (token.size() > i + 3) {
494*99e0aae7SDavid Rees // Infinity must be exactly "Inf" or "-Inf" (case insensitive). There
495*99e0aae7SDavid Rees // must not be trailing characters.
496*99e0aae7SDavid Rees return false;
497*99e0aae7SDavid Rees }
498*99e0aae7SDavid Rees // As with quiet_NaN(), a conforming C++ implementation might not have
499*99e0aae7SDavid Rees // infinity(), but an IEEE 754-based implementation should.
500*99e0aae7SDavid Rees if (negative) {
501*99e0aae7SDavid Rees *result = -::std::numeric_limits<Float>::infinity();
502*99e0aae7SDavid Rees return true;
503*99e0aae7SDavid Rees } else {
504*99e0aae7SDavid Rees *result = ::std::numeric_limits<Float>::infinity();
505*99e0aae7SDavid Rees return true;
506*99e0aae7SDavid Rees }
507*99e0aae7SDavid Rees }
508*99e0aae7SDavid Rees
509*99e0aae7SDavid Rees // For non-NaN, non-Inf values, use the C scanf function, mirroring the use of
510*99e0aae7SDavid Rees // printf for writing the value, below.
511*99e0aae7SDavid Rees int chars_used = -1;
512*99e0aae7SDavid Rees if (::std::sscanf(token.c_str(), FloatConstants<Float>::kScanfFormat(),
513*99e0aae7SDavid Rees result, &chars_used) < 1) {
514*99e0aae7SDavid Rees return false;
515*99e0aae7SDavid Rees }
516*99e0aae7SDavid Rees if (chars_used < 0 ||
517*99e0aae7SDavid Rees static_cast</**/ ::std::size_t>(chars_used) < token.size()) {
518*99e0aae7SDavid Rees return false;
519*99e0aae7SDavid Rees }
520*99e0aae7SDavid Rees return true;
521*99e0aae7SDavid Rees }
522*99e0aae7SDavid Rees
523*99e0aae7SDavid Rees // Decodes a floating-point number from a text stream and writes it to the
524*99e0aae7SDavid Rees // specified view.
525*99e0aae7SDavid Rees template <class Stream, class View>
526*99e0aae7SDavid Rees bool ReadFloatFromTextStream(View *view, Stream *stream) {
527*99e0aae7SDavid Rees ::std::string token;
528*99e0aae7SDavid Rees if (!ReadToken(stream, &token)) return false;
529*99e0aae7SDavid Rees typename View::ValueType value;
530*99e0aae7SDavid Rees if (!DecodeFloat(token, &value)) return false;
531*99e0aae7SDavid Rees return view->TryToWrite(value);
532*99e0aae7SDavid Rees }
533*99e0aae7SDavid Rees
534*99e0aae7SDavid Rees template <class Stream, class Float>
535*99e0aae7SDavid Rees void WriteFloatToTextStream(Float n, Stream *stream,
536*99e0aae7SDavid Rees const TextOutputOptions &options) {
537*99e0aae7SDavid Rees static_assert(::std::is_same<Float, float>::value ||
538*99e0aae7SDavid Rees ::std::is_same<Float, double>::value,
539*99e0aae7SDavid Rees "WriteFloatToTextStream can only write float or double.");
540*99e0aae7SDavid Rees // The state of the world w.r.t. rendering floating-points as decimal text is,
541*99e0aae7SDavid Rees // ca. 2018, less than ideal.
542*99e0aae7SDavid Rees //
543*99e0aae7SDavid Rees // In C++ land, there is actually no stable facility in the standard library
544*99e0aae7SDavid Rees // until to_chars() in C++17 -- which is not actually implemented yet in
545*99e0aae7SDavid Rees // libc++. to_string(), the printf() family, and the iostreams system all
546*99e0aae7SDavid Rees // respect the current locale. In most programs, the locale is permanently
547*99e0aae7SDavid Rees // left on "C", but this is not guaranteed. to_string() also uses a fixed and
548*99e0aae7SDavid Rees // rather unfortunate format.
549*99e0aae7SDavid Rees //
550*99e0aae7SDavid Rees // For integers, I (bolms@) chose to just implement custom read and write
551*99e0aae7SDavid Rees // routines, but those routines are quite small and straightforward compared
552*99e0aae7SDavid Rees // to floating point conversion. Even writing correct output is difficult,
553*99e0aae7SDavid Rees // and writing correct and minimal output is the subject of a number of
554*99e0aae7SDavid Rees // academic papers.
555*99e0aae7SDavid Rees //
556*99e0aae7SDavid Rees // For the moment, I'm just using snprintf("%.*g", 17, n), which is guaranteed
557*99e0aae7SDavid Rees // to be read back as the same number, but can be longer than strictly
558*99e0aae7SDavid Rees // necessary.
559*99e0aae7SDavid Rees //
560*99e0aae7SDavid Rees // TODO(bolms): Import a modified version of the double-to-string conversion
561*99e0aae7SDavid Rees // from Swift's standard library, which appears to be best implementation
562*99e0aae7SDavid Rees // currently available.
563*99e0aae7SDavid Rees
564*99e0aae7SDavid Rees if (::std::isnan(n)) {
565*99e0aae7SDavid Rees // The printf format for NaN is just "NaN". In the interests of keeping
566*99e0aae7SDavid Rees // things bit-exact, Emboss prints the exact NaN.
567*99e0aae7SDavid Rees typename FloatConstants<Float>::MatchingIntegerType bits;
568*99e0aae7SDavid Rees ::std::memcpy(&bits, &n, sizeof(bits));
569*99e0aae7SDavid Rees ::std::uint64_t nan_payload = bits & FloatConstants<Float>::kMantissaMask();
570*99e0aae7SDavid Rees ::std::uint64_t nan_sign = bits & FloatConstants<Float>::kSignMask();
571*99e0aae7SDavid Rees if (nan_sign) {
572*99e0aae7SDavid Rees // NaN still has a sign bit, which is generally treated differently from
573*99e0aae7SDavid Rees // the payload. There is no real "standard" text format for NaNs, but
574*99e0aae7SDavid Rees // "-NaN" appears to be a common way of indicating a NaN with the sign bit
575*99e0aae7SDavid Rees // set.
576*99e0aae7SDavid Rees stream->Write("-NaN(");
577*99e0aae7SDavid Rees } else {
578*99e0aae7SDavid Rees stream->Write("NaN(");
579*99e0aae7SDavid Rees }
580*99e0aae7SDavid Rees // NaN payloads are always dumped in hex. Note that Emboss is treating the
581*99e0aae7SDavid Rees // is_quiet/is_signal bit as just another bit in the payload.
582*99e0aae7SDavid Rees WriteIntegerToTextStream(nan_payload, stream, 16, options.digit_grouping());
583*99e0aae7SDavid Rees stream->Write(")");
584*99e0aae7SDavid Rees return;
585*99e0aae7SDavid Rees }
586*99e0aae7SDavid Rees
587*99e0aae7SDavid Rees if (::std::isinf(n)) {
588*99e0aae7SDavid Rees if (n < 0.0) {
589*99e0aae7SDavid Rees stream->Write("-Inf");
590*99e0aae7SDavid Rees } else {
591*99e0aae7SDavid Rees stream->Write("Inf");
592*99e0aae7SDavid Rees }
593*99e0aae7SDavid Rees return;
594*99e0aae7SDavid Rees }
595*99e0aae7SDavid Rees
596*99e0aae7SDavid Rees // TODO(bolms): Should the current numeric base be honored here? Should there
597*99e0aae7SDavid Rees // be a separate Float numeric base?
598*99e0aae7SDavid Rees ::std::array<char, 30> buffer;
599*99e0aae7SDavid Rees // TODO(bolms): Figure out how to get ::std::snprintf to work on
600*99e0aae7SDavid Rees // microcontroller builds.
601*99e0aae7SDavid Rees ::std::size_t snprintf_result = static_cast</**/ ::std::size_t>(::snprintf(
602*99e0aae7SDavid Rees &(buffer[0]), buffer.size(), "%.*g",
603*99e0aae7SDavid Rees FloatConstants<Float>::kPrintfPrecision(), static_cast<double>(n)));
604*99e0aae7SDavid Rees (void)snprintf_result; // Unused if EMBOSS_CHECK_LE is compiled out.
605*99e0aae7SDavid Rees EMBOSS_CHECK_LE(snprintf_result, buffer.size());
606*99e0aae7SDavid Rees stream->Write(&buffer[0]);
607*99e0aae7SDavid Rees
608*99e0aae7SDavid Rees // TODO(bolms): Support digit grouping.
609*99e0aae7SDavid Rees }
610*99e0aae7SDavid Rees
611*99e0aae7SDavid Rees template <class Stream, class View>
612*99e0aae7SDavid Rees bool ReadEnumViewFromTextStream(View *view, Stream *stream) {
613*99e0aae7SDavid Rees ::std::string token;
614*99e0aae7SDavid Rees if (!ReadToken(stream, &token)) return false;
615*99e0aae7SDavid Rees if (token.empty()) return false;
616*99e0aae7SDavid Rees if (::std::isdigit(token[0])) {
617*99e0aae7SDavid Rees ::std::uint64_t value;
618*99e0aae7SDavid Rees if (!DecodeInteger(token, &value)) return false;
619*99e0aae7SDavid Rees // TODO(bolms): Fix the static_cast<ValueType> for signed ValueType.
620*99e0aae7SDavid Rees // TODO(bolms): Should values between 2**63 and 2**64-1 actually be
621*99e0aae7SDavid Rees // allowed in the text format when ValueType is signed?
622*99e0aae7SDavid Rees return view->TryToWrite(static_cast<typename View::ValueType>(value));
623*99e0aae7SDavid Rees } else if (token[0] == '-') {
624*99e0aae7SDavid Rees ::std::int64_t value;
625*99e0aae7SDavid Rees if (!DecodeInteger(token, &value)) return false;
626*99e0aae7SDavid Rees return view->TryToWrite(static_cast<typename View::ValueType>(value));
627*99e0aae7SDavid Rees } else {
628*99e0aae7SDavid Rees typename View::ValueType value;
629*99e0aae7SDavid Rees if (!TryToGetEnumFromName(token.c_str(), &value)) return false;
630*99e0aae7SDavid Rees return view->TryToWrite(value);
631*99e0aae7SDavid Rees }
632*99e0aae7SDavid Rees }
633*99e0aae7SDavid Rees
634*99e0aae7SDavid Rees template <class Stream, class View>
635*99e0aae7SDavid Rees void WriteEnumViewToTextStream(View *view, Stream *stream,
636*99e0aae7SDavid Rees const TextOutputOptions &options) {
637*99e0aae7SDavid Rees const char *name = TryToGetNameFromEnum(view->Read());
638*99e0aae7SDavid Rees if (name != nullptr) {
639*99e0aae7SDavid Rees stream->Write(name);
640*99e0aae7SDavid Rees }
641*99e0aae7SDavid Rees // If the enum value has no known name, then write its numeric value
642*99e0aae7SDavid Rees // instead. If it does have a known name, and comments are enabled on the
643*99e0aae7SDavid Rees // output, then write the numeric value as a comment.
644*99e0aae7SDavid Rees if (name == nullptr || options.comments()) {
645*99e0aae7SDavid Rees if (name != nullptr) stream->Write(" # ");
646*99e0aae7SDavid Rees WriteIntegerToTextStream(
647*99e0aae7SDavid Rees static_cast<
648*99e0aae7SDavid Rees typename ::std::underlying_type<typename View::ValueType>::type>(
649*99e0aae7SDavid Rees view->Read()),
650*99e0aae7SDavid Rees stream, options.numeric_base(), options.digit_grouping());
651*99e0aae7SDavid Rees }
652*99e0aae7SDavid Rees }
653*99e0aae7SDavid Rees
654*99e0aae7SDavid Rees // Updates an array from a text stream. For an array of integers, the most
655*99e0aae7SDavid Rees // basic form of the text format looks like:
656*99e0aae7SDavid Rees //
657*99e0aae7SDavid Rees // { 0, 1, 2 }
658*99e0aae7SDavid Rees //
659*99e0aae7SDavid Rees // However, the following are all acceptable and equivalent:
660*99e0aae7SDavid Rees //
661*99e0aae7SDavid Rees // { 0, 1, 2, }
662*99e0aae7SDavid Rees // {0 1 2}
663*99e0aae7SDavid Rees // { [2]: 2, [1]: 1, [0]: 0 }
664*99e0aae7SDavid Rees // {[2]:2, [0]:0, 1}
665*99e0aae7SDavid Rees //
666*99e0aae7SDavid Rees // Formally, the array must be contained within braces ("{}"). Elements are
667*99e0aae7SDavid Rees // represented as an optional index surrounded by brackets ("[]") followed by
668*99e0aae7SDavid Rees // the text format of the element, followed by a single optional comma (",").
669*99e0aae7SDavid Rees // If no index is present for the first element, the index 0 will be used. If
670*99e0aae7SDavid Rees // no index is present for any elements after the first, the index one greater
671*99e0aae7SDavid Rees // than the previous index will be used.
672*99e0aae7SDavid Rees template <class Array, class Stream>
673*99e0aae7SDavid Rees bool ReadArrayFromTextStream(Array *array, Stream *stream) {
674*99e0aae7SDavid Rees // The text format allows any given index to be set more than once. In
675*99e0aae7SDavid Rees // theory, this function could track indices and fail if an index were
676*99e0aae7SDavid Rees // double-set, but doing so would require quite a bit of overhead, and
677*99e0aae7SDavid Rees // O(array->ElementCount()) extra space in the worst case. It does not seem
678*99e0aae7SDavid Rees // worth it to impose the runtime cost here.
679*99e0aae7SDavid Rees ::std::size_t index = 0;
680*99e0aae7SDavid Rees ::std::string brace;
681*99e0aae7SDavid Rees // Read out the opening brace.
682*99e0aae7SDavid Rees if (!ReadToken(stream, &brace)) return false;
683*99e0aae7SDavid Rees if (brace != "{") return false;
684*99e0aae7SDavid Rees for (;;) {
685*99e0aae7SDavid Rees char c;
686*99e0aae7SDavid Rees // Check for a closing brace; if present, success.
687*99e0aae7SDavid Rees if (!DiscardWhitespace(stream)) return false;
688*99e0aae7SDavid Rees if (!stream->Read(&c)) return false;
689*99e0aae7SDavid Rees if (c == '}') return true;
690*99e0aae7SDavid Rees
691*99e0aae7SDavid Rees // If the element has an index, read it.
692*99e0aae7SDavid Rees if (c == '[') {
693*99e0aae7SDavid Rees ::std::string index_text;
694*99e0aae7SDavid Rees if (!ReadToken(stream, &index_text)) return false;
695*99e0aae7SDavid Rees if (!::emboss::support::DecodeInteger(index_text, &index)) return false;
696*99e0aae7SDavid Rees ::std::string closing_bracket;
697*99e0aae7SDavid Rees if (!ReadToken(stream, &closing_bracket)) return false;
698*99e0aae7SDavid Rees if (closing_bracket != "]") return false;
699*99e0aae7SDavid Rees ::std::string colon;
700*99e0aae7SDavid Rees if (!ReadToken(stream, &colon)) return false;
701*99e0aae7SDavid Rees if (colon != ":") return false;
702*99e0aae7SDavid Rees } else {
703*99e0aae7SDavid Rees if (!stream->Unread(c)) return false;
704*99e0aae7SDavid Rees }
705*99e0aae7SDavid Rees
706*99e0aae7SDavid Rees // Read the element.
707*99e0aae7SDavid Rees if (index >= array->ElementCount()) return false;
708*99e0aae7SDavid Rees if (!(*array)[index].UpdateFromTextStream(stream)) return false;
709*99e0aae7SDavid Rees ++index;
710*99e0aae7SDavid Rees
711*99e0aae7SDavid Rees // If there is a trailing comma, discard it.
712*99e0aae7SDavid Rees if (!DiscardWhitespace(stream)) return false;
713*99e0aae7SDavid Rees if (!stream->Read(&c)) return false;
714*99e0aae7SDavid Rees if (c != ',') {
715*99e0aae7SDavid Rees if (c != '}') return false;
716*99e0aae7SDavid Rees if (!stream->Unread(c)) return false;
717*99e0aae7SDavid Rees }
718*99e0aae7SDavid Rees }
719*99e0aae7SDavid Rees }
720*99e0aae7SDavid Rees
721*99e0aae7SDavid Rees // Prints out the elements of an 8-bit Int or UInt array as characters.
722*99e0aae7SDavid Rees template <class Array, class Stream>
723*99e0aae7SDavid Rees void WriteShorthandAsciiArrayCommentToTextStream(
724*99e0aae7SDavid Rees const Array *array, Stream *stream, const TextOutputOptions &options) {
725*99e0aae7SDavid Rees if (!options.multiline()) return;
726*99e0aae7SDavid Rees if (!options.comments()) return;
727*99e0aae7SDavid Rees if (array->ElementCount() == 0) return;
728*99e0aae7SDavid Rees static constexpr int kCharsPerBlock = 64;
729*99e0aae7SDavid Rees static constexpr char kStandInForNonPrintableChar = '.';
730*99e0aae7SDavid Rees auto start_new_line = [&]() {
731*99e0aae7SDavid Rees stream->Write("\n");
732*99e0aae7SDavid Rees stream->Write(options.current_indent());
733*99e0aae7SDavid Rees stream->Write("# ");
734*99e0aae7SDavid Rees };
735*99e0aae7SDavid Rees for (int i = 0, n = array->ElementCount(); i < n; ++i) {
736*99e0aae7SDavid Rees const int c = (*array)[i].Read();
737*99e0aae7SDavid Rees const bool c_is_printable = (c >= 32 && c <= 126);
738*99e0aae7SDavid Rees const bool starting_new_block = ((i % kCharsPerBlock) == 0);
739*99e0aae7SDavid Rees if (starting_new_block) start_new_line();
740*99e0aae7SDavid Rees stream->Write(c_is_printable ? static_cast<char>(c)
741*99e0aae7SDavid Rees : kStandInForNonPrintableChar);
742*99e0aae7SDavid Rees }
743*99e0aae7SDavid Rees }
744*99e0aae7SDavid Rees
745*99e0aae7SDavid Rees // Writes an array to a text stream. This writes the array in a format
746*99e0aae7SDavid Rees // compatible with ReadArrayFromTextStream, above. For multiline output, writes
747*99e0aae7SDavid Rees // one element per line.
748*99e0aae7SDavid Rees //
749*99e0aae7SDavid Rees // TODO(bolms): Make the output for arrays of small elements (like bytes) much
750*99e0aae7SDavid Rees // more compact.
751*99e0aae7SDavid Rees //
752*99e0aae7SDavid Rees // This will require several support functions like `MaxTextLength` on every
753*99e0aae7SDavid Rees // view type, and will substantially increase the number of tests required for
754*99e0aae7SDavid Rees // this function, but will make arrays of small elements much more readable.
755*99e0aae7SDavid Rees template <class Array, class Stream>
756*99e0aae7SDavid Rees void WriteArrayToTextStream(Array *array, Stream *stream,
757*99e0aae7SDavid Rees const TextOutputOptions &options) {
758*99e0aae7SDavid Rees TextOutputOptions element_options = options.PlusOneIndent();
759*99e0aae7SDavid Rees if (options.multiline()) {
760*99e0aae7SDavid Rees stream->Write("{");
761*99e0aae7SDavid Rees WriteShorthandArrayCommentToTextStream(array, stream, element_options);
762*99e0aae7SDavid Rees for (::std::size_t i = 0; i < array->ElementCount(); ++i) {
763*99e0aae7SDavid Rees if (!options.allow_partial_output() || (*array)[i].IsAggregate() ||
764*99e0aae7SDavid Rees (*array)[i].Ok()) {
765*99e0aae7SDavid Rees stream->Write("\n");
766*99e0aae7SDavid Rees stream->Write(element_options.current_indent());
767*99e0aae7SDavid Rees stream->Write("[");
768*99e0aae7SDavid Rees // TODO(bolms): Put padding in here so that array elements start at the
769*99e0aae7SDavid Rees // same column.
770*99e0aae7SDavid Rees //
771*99e0aae7SDavid Rees // TODO(bolms): (Maybe) figure out how to get padding to work so that
772*99e0aae7SDavid Rees // elements with comments can have their comments align to the same
773*99e0aae7SDavid Rees // column.
774*99e0aae7SDavid Rees WriteIntegerToTextStream(i, stream, options.numeric_base(),
775*99e0aae7SDavid Rees options.digit_grouping());
776*99e0aae7SDavid Rees stream->Write("]: ");
777*99e0aae7SDavid Rees (*array)[i].WriteToTextStream(stream, element_options);
778*99e0aae7SDavid Rees } else if (element_options.comments()) {
779*99e0aae7SDavid Rees stream->Write("\n");
780*99e0aae7SDavid Rees stream->Write(element_options.current_indent());
781*99e0aae7SDavid Rees stream->Write("# [");
782*99e0aae7SDavid Rees WriteIntegerToTextStream(i, stream, options.numeric_base(),
783*99e0aae7SDavid Rees options.digit_grouping());
784*99e0aae7SDavid Rees stream->Write("]: UNREADABLE");
785*99e0aae7SDavid Rees }
786*99e0aae7SDavid Rees }
787*99e0aae7SDavid Rees stream->Write("\n");
788*99e0aae7SDavid Rees stream->Write(options.current_indent());
789*99e0aae7SDavid Rees stream->Write("}");
790*99e0aae7SDavid Rees } else {
791*99e0aae7SDavid Rees stream->Write("{");
792*99e0aae7SDavid Rees bool skipped_unreadable = false;
793*99e0aae7SDavid Rees for (::std::size_t i = 0; i < array->ElementCount(); ++i) {
794*99e0aae7SDavid Rees if (!options.allow_partial_output() || (*array)[i].IsAggregate() ||
795*99e0aae7SDavid Rees (*array)[i].Ok()) {
796*99e0aae7SDavid Rees stream->Write(" ");
797*99e0aae7SDavid Rees if (i % 8 == 0 || skipped_unreadable) {
798*99e0aae7SDavid Rees stream->Write("[");
799*99e0aae7SDavid Rees WriteIntegerToTextStream(i, stream, options.numeric_base(),
800*99e0aae7SDavid Rees options.digit_grouping());
801*99e0aae7SDavid Rees stream->Write("]: ");
802*99e0aae7SDavid Rees }
803*99e0aae7SDavid Rees (*array)[i].WriteToTextStream(stream, element_options);
804*99e0aae7SDavid Rees if (i < array->ElementCount() - 1) {
805*99e0aae7SDavid Rees stream->Write(",");
806*99e0aae7SDavid Rees }
807*99e0aae7SDavid Rees skipped_unreadable = false;
808*99e0aae7SDavid Rees } else {
809*99e0aae7SDavid Rees if (element_options.comments()) {
810*99e0aae7SDavid Rees stream->Write(" # ");
811*99e0aae7SDavid Rees if (i % 8 == 0) {
812*99e0aae7SDavid Rees stream->Write("[");
813*99e0aae7SDavid Rees WriteIntegerToTextStream(i, stream, options.numeric_base(),
814*99e0aae7SDavid Rees options.digit_grouping());
815*99e0aae7SDavid Rees stream->Write("]: ");
816*99e0aae7SDavid Rees }
817*99e0aae7SDavid Rees stream->Write("UNREADABLE\n");
818*99e0aae7SDavid Rees }
819*99e0aae7SDavid Rees skipped_unreadable = true;
820*99e0aae7SDavid Rees }
821*99e0aae7SDavid Rees }
822*99e0aae7SDavid Rees stream->Write(" }");
823*99e0aae7SDavid Rees }
824*99e0aae7SDavid Rees }
825*99e0aae7SDavid Rees
826*99e0aae7SDavid Rees // TextStream puts a stream-like interface onto a std::string, for use by
827*99e0aae7SDavid Rees // UpdateFromTextStream. It is used by UpdateFromText().
828*99e0aae7SDavid Rees class TextStream final {
829*99e0aae7SDavid Rees public:
830*99e0aae7SDavid Rees // This template handles std::string, std::string_view, and absl::string_view.
831*99e0aae7SDavid Rees template <class String>
832*99e0aae7SDavid Rees inline explicit TextStream(const String &text)
833*99e0aae7SDavid Rees : text_(text.data()), length_(text.size()) {}
834*99e0aae7SDavid Rees
835*99e0aae7SDavid Rees inline explicit TextStream(const char *text)
836*99e0aae7SDavid Rees : text_(text), length_(strlen(text)) {}
837*99e0aae7SDavid Rees
838*99e0aae7SDavid Rees inline TextStream(const char *text, ::std::size_t length)
839*99e0aae7SDavid Rees : text_(text), length_(length) {}
840*99e0aae7SDavid Rees
841*99e0aae7SDavid Rees inline bool Read(char *result) {
842*99e0aae7SDavid Rees if (index_ >= length_) return false;
843*99e0aae7SDavid Rees *result = text_[index_];
844*99e0aae7SDavid Rees ++index_;
845*99e0aae7SDavid Rees return true;
846*99e0aae7SDavid Rees }
847*99e0aae7SDavid Rees
848*99e0aae7SDavid Rees inline bool Unread(char c) {
849*99e0aae7SDavid Rees if (index_ < 1) return false;
850*99e0aae7SDavid Rees if (text_[index_ - 1] != c) return false;
851*99e0aae7SDavid Rees --index_;
852*99e0aae7SDavid Rees return true;
853*99e0aae7SDavid Rees }
854*99e0aae7SDavid Rees
855*99e0aae7SDavid Rees private:
856*99e0aae7SDavid Rees // It would be nice to use string_view here, but that's not available until
857*99e0aae7SDavid Rees // C++17.
858*99e0aae7SDavid Rees const char *text_ = nullptr;
859*99e0aae7SDavid Rees ::std::size_t length_ = 0;
860*99e0aae7SDavid Rees ::std::size_t index_ = 0;
861*99e0aae7SDavid Rees };
862*99e0aae7SDavid Rees
863*99e0aae7SDavid Rees } // namespace support
864*99e0aae7SDavid Rees
865*99e0aae7SDavid Rees // Returns a TextOutputOptions set for reasonable multi-line text output.
866*99e0aae7SDavid Rees static inline TextOutputOptions MultilineText() {
867*99e0aae7SDavid Rees return TextOutputOptions()
868*99e0aae7SDavid Rees .Multiline(true)
869*99e0aae7SDavid Rees .WithIndent(" ")
870*99e0aae7SDavid Rees .WithComments(true)
871*99e0aae7SDavid Rees .WithDigitGrouping(true);
872*99e0aae7SDavid Rees }
873*99e0aae7SDavid Rees
874*99e0aae7SDavid Rees // TODO(bolms): Add corresponding ReadFromText*() verbs which enforce the
875*99e0aae7SDavid Rees // constraint that all of a field's dependencies must be present in the text
876*99e0aae7SDavid Rees // before the field itself is set.
877*99e0aae7SDavid Rees template <typename EmbossViewType>
878*99e0aae7SDavid Rees inline bool UpdateFromText(const EmbossViewType &view,
879*99e0aae7SDavid Rees const ::std::string &text) {
880*99e0aae7SDavid Rees auto text_stream = support::TextStream{text};
881*99e0aae7SDavid Rees return view.UpdateFromTextStream(&text_stream);
882*99e0aae7SDavid Rees }
883*99e0aae7SDavid Rees
884*99e0aae7SDavid Rees template <typename EmbossViewType>
885*99e0aae7SDavid Rees inline ::std::string WriteToString(const EmbossViewType &view,
886*99e0aae7SDavid Rees TextOutputOptions options) {
887*99e0aae7SDavid Rees support::TextOutputStream text_stream;
888*99e0aae7SDavid Rees view.WriteToTextStream(&text_stream, options);
889*99e0aae7SDavid Rees return text_stream.Result();
890*99e0aae7SDavid Rees }
891*99e0aae7SDavid Rees
892*99e0aae7SDavid Rees template <typename EmbossViewType>
893*99e0aae7SDavid Rees inline ::std::string WriteToString(const EmbossViewType &view) {
894*99e0aae7SDavid Rees return WriteToString(view, TextOutputOptions());
895*99e0aae7SDavid Rees }
896*99e0aae7SDavid Rees
897*99e0aae7SDavid Rees } // namespace emboss
898*99e0aae7SDavid Rees
899*99e0aae7SDavid Rees #endif // EMBOSS_RUNTIME_CPP_EMBOSS_TEXT_UTIL_H_
900